25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.
 
 
 

291 satır
7.9 KiB

  1. // Copyright 2016 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package storage
  15. import (
  16. "errors"
  17. "fmt"
  18. "hash/crc32"
  19. "io"
  20. "io/ioutil"
  21. "net/http"
  22. "net/url"
  23. "reflect"
  24. "strconv"
  25. "strings"
  26. "cloud.google.com/go/internal/trace"
  27. "golang.org/x/net/context"
  28. "google.golang.org/api/googleapi"
  29. )
  30. var crc32cTable = crc32.MakeTable(crc32.Castagnoli)
  31. // NewReader creates a new Reader to read the contents of the
  32. // object.
  33. // ErrObjectNotExist will be returned if the object is not found.
  34. //
  35. // The caller must call Close on the returned Reader when done reading.
  36. func (o *ObjectHandle) NewReader(ctx context.Context) (*Reader, error) {
  37. return o.NewRangeReader(ctx, 0, -1)
  38. }
  39. // NewRangeReader reads part of an object, reading at most length bytes
  40. // starting at the given offset. If length is negative, the object is read
  41. // until the end.
  42. func (o *ObjectHandle) NewRangeReader(ctx context.Context, offset, length int64) (r *Reader, err error) {
  43. ctx = trace.StartSpan(ctx, "cloud.google.com/go/storage.Object.NewRangeReader")
  44. defer func() { trace.EndSpan(ctx, err) }()
  45. if err := o.validate(); err != nil {
  46. return nil, err
  47. }
  48. if offset < 0 {
  49. return nil, fmt.Errorf("storage: invalid offset %d < 0", offset)
  50. }
  51. if o.conds != nil {
  52. if err := o.conds.validate("NewRangeReader"); err != nil {
  53. return nil, err
  54. }
  55. }
  56. u := &url.URL{
  57. Scheme: "https",
  58. Host: "storage.googleapis.com",
  59. Path: fmt.Sprintf("/%s/%s", o.bucket, o.object),
  60. RawQuery: conditionsQuery(o.gen, o.conds),
  61. }
  62. verb := "GET"
  63. if length == 0 {
  64. verb = "HEAD"
  65. }
  66. req, err := http.NewRequest(verb, u.String(), nil)
  67. if err != nil {
  68. return nil, err
  69. }
  70. req = withContext(req, ctx)
  71. if o.userProject != "" {
  72. req.Header.Set("X-Goog-User-Project", o.userProject)
  73. }
  74. if o.readCompressed {
  75. req.Header.Set("Accept-Encoding", "gzip")
  76. }
  77. if err := setEncryptionHeaders(req.Header, o.encryptionKey, false); err != nil {
  78. return nil, err
  79. }
  80. // Define a function that initiates a Read with offset and length, assuming we
  81. // have already read seen bytes.
  82. reopen := func(seen int64) (*http.Response, error) {
  83. start := offset + seen
  84. if length < 0 && start > 0 {
  85. req.Header.Set("Range", fmt.Sprintf("bytes=%d-", start))
  86. } else if length > 0 {
  87. // The end character isn't affected by how many bytes we've seen.
  88. req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", start, offset+length-1))
  89. }
  90. var res *http.Response
  91. err = runWithRetry(ctx, func() error {
  92. res, err = o.c.hc.Do(req)
  93. if err != nil {
  94. return err
  95. }
  96. if res.StatusCode == http.StatusNotFound {
  97. res.Body.Close()
  98. return ErrObjectNotExist
  99. }
  100. if res.StatusCode < 200 || res.StatusCode > 299 {
  101. body, _ := ioutil.ReadAll(res.Body)
  102. res.Body.Close()
  103. return &googleapi.Error{
  104. Code: res.StatusCode,
  105. Header: res.Header,
  106. Body: string(body),
  107. }
  108. }
  109. if start > 0 && length != 0 && res.StatusCode != http.StatusPartialContent {
  110. res.Body.Close()
  111. return errors.New("storage: partial request not satisfied")
  112. }
  113. return nil
  114. })
  115. if err != nil {
  116. return nil, err
  117. }
  118. return res, nil
  119. }
  120. res, err := reopen(0)
  121. if err != nil {
  122. return nil, err
  123. }
  124. var size int64 // total size of object, even if a range was requested.
  125. if res.StatusCode == http.StatusPartialContent {
  126. cr := strings.TrimSpace(res.Header.Get("Content-Range"))
  127. if !strings.HasPrefix(cr, "bytes ") || !strings.Contains(cr, "/") {
  128. return nil, fmt.Errorf("storage: invalid Content-Range %q", cr)
  129. }
  130. size, err = strconv.ParseInt(cr[strings.LastIndex(cr, "/")+1:], 10, 64)
  131. if err != nil {
  132. return nil, fmt.Errorf("storage: invalid Content-Range %q", cr)
  133. }
  134. } else {
  135. size = res.ContentLength
  136. }
  137. remain := res.ContentLength
  138. body := res.Body
  139. if length == 0 {
  140. remain = 0
  141. body.Close()
  142. body = emptyBody
  143. }
  144. var (
  145. checkCRC bool
  146. crc uint32
  147. )
  148. // Even if there is a CRC header, we can't compute the hash on partial data.
  149. if remain == size {
  150. crc, checkCRC = parseCRC32c(res)
  151. }
  152. return &Reader{
  153. body: body,
  154. size: size,
  155. remain: remain,
  156. contentType: res.Header.Get("Content-Type"),
  157. contentEncoding: res.Header.Get("Content-Encoding"),
  158. cacheControl: res.Header.Get("Cache-Control"),
  159. wantCRC: crc,
  160. checkCRC: checkCRC,
  161. reopen: reopen,
  162. }, nil
  163. }
  164. func parseCRC32c(res *http.Response) (uint32, bool) {
  165. const prefix = "crc32c="
  166. for _, spec := range res.Header["X-Goog-Hash"] {
  167. if strings.HasPrefix(spec, prefix) {
  168. c, err := decodeUint32(spec[len(prefix):])
  169. if err == nil {
  170. return c, true
  171. }
  172. }
  173. }
  174. return 0, false
  175. }
  176. var emptyBody = ioutil.NopCloser(strings.NewReader(""))
  177. // Reader reads a Cloud Storage object.
  178. // It implements io.Reader.
  179. //
  180. // Typically, a Reader computes the CRC of the downloaded content and compares it to
  181. // the stored CRC, returning an error from Read if there is a mismatch. This integrity check
  182. // is skipped if transcoding occurs. See https://cloud.google.com/storage/docs/transcoding.
  183. type Reader struct {
  184. body io.ReadCloser
  185. seen, remain, size int64
  186. contentType string
  187. contentEncoding string
  188. cacheControl string
  189. checkCRC bool // should we check the CRC?
  190. wantCRC uint32 // the CRC32c value the server sent in the header
  191. gotCRC uint32 // running crc
  192. checkedCRC bool // did we check the CRC? (For tests.)
  193. reopen func(seen int64) (*http.Response, error)
  194. }
  195. // Close closes the Reader. It must be called when done reading.
  196. func (r *Reader) Close() error {
  197. return r.body.Close()
  198. }
  199. func (r *Reader) Read(p []byte) (int, error) {
  200. n, err := r.readWithRetry(p)
  201. if r.remain != -1 {
  202. r.remain -= int64(n)
  203. }
  204. if r.checkCRC {
  205. r.gotCRC = crc32.Update(r.gotCRC, crc32cTable, p[:n])
  206. // Check CRC here. It would be natural to check it in Close, but
  207. // everybody defers Close on the assumption that it doesn't return
  208. // anything worth looking at.
  209. if r.remain == 0 { // Only check if we have Content-Length.
  210. r.checkedCRC = true
  211. if r.gotCRC != r.wantCRC {
  212. return n, fmt.Errorf("storage: bad CRC on read: got %d, want %d",
  213. r.gotCRC, r.wantCRC)
  214. }
  215. }
  216. }
  217. return n, err
  218. }
  219. func (r *Reader) readWithRetry(p []byte) (int, error) {
  220. n := 0
  221. for len(p[n:]) > 0 {
  222. m, err := r.body.Read(p[n:])
  223. n += m
  224. r.seen += int64(m)
  225. if !shouldRetryRead(err) {
  226. return n, err
  227. }
  228. // Read failed, but we will try again. Send a ranged read request that takes
  229. // into account the number of bytes we've already seen.
  230. res, err := r.reopen(r.seen)
  231. if err != nil {
  232. // reopen already retries
  233. return n, err
  234. }
  235. r.body.Close()
  236. r.body = res.Body
  237. }
  238. return n, nil
  239. }
  240. func shouldRetryRead(err error) bool {
  241. if err == nil {
  242. return false
  243. }
  244. return strings.HasSuffix(err.Error(), "INTERNAL_ERROR") && strings.Contains(reflect.TypeOf(err).String(), "http2")
  245. }
  246. // Size returns the size of the object in bytes.
  247. // The returned value is always the same and is not affected by
  248. // calls to Read or Close.
  249. func (r *Reader) Size() int64 {
  250. return r.size
  251. }
  252. // Remain returns the number of bytes left to read, or -1 if unknown.
  253. func (r *Reader) Remain() int64 {
  254. return r.remain
  255. }
  256. // ContentType returns the content type of the object.
  257. func (r *Reader) ContentType() string {
  258. return r.contentType
  259. }
  260. // ContentEncoding returns the content encoding of the object.
  261. func (r *Reader) ContentEncoding() string {
  262. return r.contentEncoding
  263. }
  264. // CacheControl returns the cache control of the object.
  265. func (r *Reader) CacheControl() string {
  266. return r.cacheControl
  267. }