You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

239 lines
7.1 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "context"
  17. "errors"
  18. "fmt"
  19. "reflect"
  20. "cloud.google.com/go/internal/trace"
  21. bq "google.golang.org/api/bigquery/v2"
  22. )
  23. // An Inserter does streaming inserts into a BigQuery table.
  24. // It is safe for concurrent use.
  25. type Inserter struct {
  26. t *Table
  27. // SkipInvalidRows causes rows containing invalid data to be silently
  28. // ignored. The default value is false, which causes the entire request to
  29. // fail if there is an attempt to insert an invalid row.
  30. SkipInvalidRows bool
  31. // IgnoreUnknownValues causes values not matching the schema to be ignored.
  32. // The default value is false, which causes records containing such values
  33. // to be treated as invalid records.
  34. IgnoreUnknownValues bool
  35. // A TableTemplateSuffix allows Inserters to create tables automatically.
  36. //
  37. // Experimental: this option is experimental and may be modified or removed in future versions,
  38. // regardless of any other documented package stability guarantees.
  39. //
  40. // When you specify a suffix, the table you upload data to
  41. // will be used as a template for creating a new table, with the same schema,
  42. // called <table> + <suffix>.
  43. //
  44. // More information is available at
  45. // https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables
  46. TableTemplateSuffix string
  47. }
  48. // Inserter returns an Inserter that can be used to append rows to t.
  49. // The returned Inserter may optionally be further configured before its Put method is called.
  50. //
  51. // To stream rows into a date-partitioned table at a particular date, add the
  52. // $yyyymmdd suffix to the table name when constructing the Table.
  53. func (t *Table) Inserter() *Inserter {
  54. return &Inserter{t: t}
  55. }
  56. // Uploader calls Inserter.
  57. // Deprecated: use Table.Inserter instead.
  58. func (t *Table) Uploader() *Inserter { return t.Inserter() }
  59. // Put uploads one or more rows to the BigQuery service.
  60. //
  61. // If src is ValueSaver, then its Save method is called to produce a row for uploading.
  62. //
  63. // If src is a struct or pointer to a struct, then a schema is inferred from it
  64. // and used to create a StructSaver. The InsertID of the StructSaver will be
  65. // empty.
  66. //
  67. // If src is a slice of ValueSavers, structs, or struct pointers, then each
  68. // element of the slice is treated as above, and multiple rows are uploaded.
  69. //
  70. // Put returns a PutMultiError if one or more rows failed to be uploaded.
  71. // The PutMultiError contains a RowInsertionError for each failed row.
  72. //
  73. // Put will retry on temporary errors (see
  74. // https://cloud.google.com/bigquery/troubleshooting-errors). This can result
  75. // in duplicate rows if you do not use insert IDs. Also, if the error persists,
  76. // the call will run indefinitely. Pass a context with a timeout to prevent
  77. // hanging calls.
  78. func (u *Inserter) Put(ctx context.Context, src interface{}) (err error) {
  79. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Inserter.Put")
  80. defer func() { trace.EndSpan(ctx, err) }()
  81. savers, err := valueSavers(src)
  82. if err != nil {
  83. return err
  84. }
  85. return u.putMulti(ctx, savers)
  86. }
  87. func valueSavers(src interface{}) ([]ValueSaver, error) {
  88. saver, ok, err := toValueSaver(src)
  89. if err != nil {
  90. return nil, err
  91. }
  92. if ok {
  93. return []ValueSaver{saver}, nil
  94. }
  95. srcVal := reflect.ValueOf(src)
  96. if srcVal.Kind() != reflect.Slice {
  97. return nil, fmt.Errorf("%T is not a ValueSaver, struct, struct pointer, or slice", src)
  98. }
  99. var savers []ValueSaver
  100. for i := 0; i < srcVal.Len(); i++ {
  101. s := srcVal.Index(i).Interface()
  102. saver, ok, err := toValueSaver(s)
  103. if err != nil {
  104. return nil, err
  105. }
  106. if !ok {
  107. return nil, fmt.Errorf("src[%d] has type %T, which is not a ValueSaver, struct or struct pointer", i, s)
  108. }
  109. savers = append(savers, saver)
  110. }
  111. return savers, nil
  112. }
  113. // Make a ValueSaver from x, which must implement ValueSaver already
  114. // or be a struct or pointer to struct.
  115. func toValueSaver(x interface{}) (ValueSaver, bool, error) {
  116. if _, ok := x.(StructSaver); ok {
  117. return nil, false, errors.New("bigquery: use &StructSaver, not StructSaver")
  118. }
  119. var insertID string
  120. // Handle StructSavers specially so we can infer the schema if necessary.
  121. if ss, ok := x.(*StructSaver); ok && ss.Schema == nil {
  122. x = ss.Struct
  123. insertID = ss.InsertID
  124. // Fall through so we can infer the schema.
  125. }
  126. if saver, ok := x.(ValueSaver); ok {
  127. return saver, ok, nil
  128. }
  129. v := reflect.ValueOf(x)
  130. // Support Put with []interface{}
  131. if v.Kind() == reflect.Interface {
  132. v = v.Elem()
  133. }
  134. if v.Kind() == reflect.Ptr {
  135. v = v.Elem()
  136. }
  137. if v.Kind() != reflect.Struct {
  138. return nil, false, nil
  139. }
  140. schema, err := inferSchemaReflectCached(v.Type())
  141. if err != nil {
  142. return nil, false, err
  143. }
  144. return &StructSaver{
  145. Struct: x,
  146. InsertID: insertID,
  147. Schema: schema,
  148. }, true, nil
  149. }
  150. func (u *Inserter) putMulti(ctx context.Context, src []ValueSaver) error {
  151. req, err := u.newInsertRequest(src)
  152. if err != nil {
  153. return err
  154. }
  155. if req == nil {
  156. return nil
  157. }
  158. call := u.t.c.bqs.Tabledata.InsertAll(u.t.ProjectID, u.t.DatasetID, u.t.TableID, req)
  159. call = call.Context(ctx)
  160. setClientHeader(call.Header())
  161. var res *bq.TableDataInsertAllResponse
  162. err = runWithRetry(ctx, func() (err error) {
  163. res, err = call.Do()
  164. return err
  165. })
  166. if err != nil {
  167. return err
  168. }
  169. return handleInsertErrors(res.InsertErrors, req.Rows)
  170. }
  171. func (u *Inserter) newInsertRequest(savers []ValueSaver) (*bq.TableDataInsertAllRequest, error) {
  172. if savers == nil { // If there are no rows, do nothing.
  173. return nil, nil
  174. }
  175. req := &bq.TableDataInsertAllRequest{
  176. TemplateSuffix: u.TableTemplateSuffix,
  177. IgnoreUnknownValues: u.IgnoreUnknownValues,
  178. SkipInvalidRows: u.SkipInvalidRows,
  179. }
  180. for _, saver := range savers {
  181. row, insertID, err := saver.Save()
  182. if err != nil {
  183. return nil, err
  184. }
  185. if insertID == "" {
  186. insertID = randomIDFn()
  187. }
  188. m := make(map[string]bq.JsonValue)
  189. for k, v := range row {
  190. m[k] = bq.JsonValue(v)
  191. }
  192. req.Rows = append(req.Rows, &bq.TableDataInsertAllRequestRows{
  193. InsertId: insertID,
  194. Json: m,
  195. })
  196. }
  197. return req, nil
  198. }
  199. func handleInsertErrors(ierrs []*bq.TableDataInsertAllResponseInsertErrors, rows []*bq.TableDataInsertAllRequestRows) error {
  200. if len(ierrs) == 0 {
  201. return nil
  202. }
  203. var errs PutMultiError
  204. for _, e := range ierrs {
  205. if int(e.Index) > len(rows) {
  206. return fmt.Errorf("internal error: unexpected row index: %v", e.Index)
  207. }
  208. rie := RowInsertionError{
  209. InsertID: rows[e.Index].InsertId,
  210. RowIndex: int(e.Index),
  211. }
  212. for _, errp := range e.Errors {
  213. rie.Errors = append(rie.Errors, bqToError(errp))
  214. }
  215. errs = append(errs, rie)
  216. }
  217. return errs
  218. }
  219. // Uploader is an obsolete name for Inserter.
  220. type Uploader = Inserter