You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

138 lines
4.7 KiB

  1. // Copyright 2016 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "io"
  17. bq "google.golang.org/api/bigquery/v2"
  18. )
  19. // A ReaderSource is a source for a load operation that gets
  20. // data from an io.Reader.
  21. //
  22. // When a ReaderSource is part of a LoadConfig obtained via Job.Config,
  23. // its internal io.Reader will be nil, so it cannot be used for a
  24. // subsequent load operation.
  25. type ReaderSource struct {
  26. r io.Reader
  27. FileConfig
  28. }
  29. // NewReaderSource creates a ReaderSource from an io.Reader. You may
  30. // optionally configure properties on the ReaderSource that describe the
  31. // data being read, before passing it to Table.LoaderFrom.
  32. func NewReaderSource(r io.Reader) *ReaderSource {
  33. return &ReaderSource{r: r}
  34. }
  35. func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader {
  36. r.FileConfig.populateLoadConfig(lc)
  37. return r.r
  38. }
  39. // FileConfig contains configuration options that pertain to files, typically
  40. // text files that require interpretation to be used as a BigQuery table. A
  41. // file may live in Google Cloud Storage (see GCSReference), or it may be
  42. // loaded into a table via the Table.LoaderFromReader.
  43. type FileConfig struct {
  44. // SourceFormat is the format of the data to be read.
  45. // Allowed values are: Avro, CSV, DatastoreBackup, JSON, ORC, and Parquet. The default is CSV.
  46. SourceFormat DataFormat
  47. // Indicates if we should automatically infer the options and
  48. // schema for CSV and JSON sources.
  49. AutoDetect bool
  50. // MaxBadRecords is the maximum number of bad records that will be ignored
  51. // when reading data.
  52. MaxBadRecords int64
  53. // IgnoreUnknownValues causes values not matching the schema to be
  54. // tolerated. Unknown values are ignored. For CSV this ignores extra values
  55. // at the end of a line. For JSON this ignores named values that do not
  56. // match any column name. If this field is not set, records containing
  57. // unknown values are treated as bad records. The MaxBadRecords field can
  58. // be used to customize how bad records are handled.
  59. IgnoreUnknownValues bool
  60. // Schema describes the data. It is required when reading CSV or JSON data,
  61. // unless the data is being loaded into a table that already exists.
  62. Schema Schema
  63. // Additional options for CSV files.
  64. CSVOptions
  65. }
  66. func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
  67. conf.SkipLeadingRows = fc.SkipLeadingRows
  68. conf.SourceFormat = string(fc.SourceFormat)
  69. conf.Autodetect = fc.AutoDetect
  70. conf.AllowJaggedRows = fc.AllowJaggedRows
  71. conf.AllowQuotedNewlines = fc.AllowQuotedNewlines
  72. conf.Encoding = string(fc.Encoding)
  73. conf.FieldDelimiter = fc.FieldDelimiter
  74. conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
  75. conf.MaxBadRecords = fc.MaxBadRecords
  76. if fc.Schema != nil {
  77. conf.Schema = fc.Schema.toBQ()
  78. }
  79. conf.Quote = fc.quote()
  80. }
  81. func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
  82. fc.SourceFormat = DataFormat(conf.SourceFormat)
  83. fc.AutoDetect = conf.Autodetect
  84. fc.MaxBadRecords = conf.MaxBadRecords
  85. fc.IgnoreUnknownValues = conf.IgnoreUnknownValues
  86. fc.Schema = bqToSchema(conf.Schema)
  87. fc.SkipLeadingRows = conf.SkipLeadingRows
  88. fc.AllowJaggedRows = conf.AllowJaggedRows
  89. fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
  90. fc.Encoding = Encoding(conf.Encoding)
  91. fc.FieldDelimiter = conf.FieldDelimiter
  92. fc.CSVOptions.setQuote(conf.Quote)
  93. }
  94. func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) {
  95. format := fc.SourceFormat
  96. if format == "" {
  97. // Format must be explicitly set for external data sources.
  98. format = CSV
  99. }
  100. conf.Autodetect = fc.AutoDetect
  101. conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
  102. conf.MaxBadRecords = fc.MaxBadRecords
  103. conf.SourceFormat = string(format)
  104. if fc.Schema != nil {
  105. conf.Schema = fc.Schema.toBQ()
  106. }
  107. if format == CSV {
  108. fc.CSVOptions.populateExternalDataConfig(conf)
  109. }
  110. }
  111. // Encoding specifies the character encoding of data to be loaded into BigQuery.
  112. // See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
  113. // for more details about how this is used.
  114. type Encoding string
  115. const (
  116. // UTF_8 specifies the UTF-8 encoding type.
  117. UTF_8 Encoding = "UTF-8"
  118. // ISO_8859_1 specifies the ISO-8859-1 encoding type.
  119. ISO_8859_1 Encoding = "ISO-8859-1"
  120. )