|
- // Copyright 2016 Google LLC
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package bigquery
-
- import (
- "io"
-
- bq "google.golang.org/api/bigquery/v2"
- )
-
- // A ReaderSource is a source for a load operation that gets
- // data from an io.Reader.
- //
- // When a ReaderSource is part of a LoadConfig obtained via Job.Config,
- // its internal io.Reader will be nil, so it cannot be used for a
- // subsequent load operation.
- type ReaderSource struct {
- r io.Reader
- FileConfig
- }
-
- // NewReaderSource creates a ReaderSource from an io.Reader. You may
- // optionally configure properties on the ReaderSource that describe the
- // data being read, before passing it to Table.LoaderFrom.
- func NewReaderSource(r io.Reader) *ReaderSource {
- return &ReaderSource{r: r}
- }
-
- func (r *ReaderSource) populateLoadConfig(lc *bq.JobConfigurationLoad) io.Reader {
- r.FileConfig.populateLoadConfig(lc)
- return r.r
- }
-
- // FileConfig contains configuration options that pertain to files, typically
- // text files that require interpretation to be used as a BigQuery table. A
- // file may live in Google Cloud Storage (see GCSReference), or it may be
- // loaded into a table via the Table.LoaderFromReader.
- type FileConfig struct {
- // SourceFormat is the format of the data to be read.
- // Allowed values are: CSV, Avro, Parquet, JSON, DatastoreBackup. The default is CSV.
- SourceFormat DataFormat
-
- // Indicates if we should automatically infer the options and
- // schema for CSV and JSON sources.
- AutoDetect bool
-
- // MaxBadRecords is the maximum number of bad records that will be ignored
- // when reading data.
- MaxBadRecords int64
-
- // IgnoreUnknownValues causes values not matching the schema to be
- // tolerated. Unknown values are ignored. For CSV this ignores extra values
- // at the end of a line. For JSON this ignores named values that do not
- // match any column name. If this field is not set, records containing
- // unknown values are treated as bad records. The MaxBadRecords field can
- // be used to customize how bad records are handled.
- IgnoreUnknownValues bool
-
- // Schema describes the data. It is required when reading CSV or JSON data,
- // unless the data is being loaded into a table that already exists.
- Schema Schema
-
- // Additional options for CSV files.
- CSVOptions
- }
-
- func (fc *FileConfig) populateLoadConfig(conf *bq.JobConfigurationLoad) {
- conf.SkipLeadingRows = fc.SkipLeadingRows
- conf.SourceFormat = string(fc.SourceFormat)
- conf.Autodetect = fc.AutoDetect
- conf.AllowJaggedRows = fc.AllowJaggedRows
- conf.AllowQuotedNewlines = fc.AllowQuotedNewlines
- conf.Encoding = string(fc.Encoding)
- conf.FieldDelimiter = fc.FieldDelimiter
- conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
- conf.MaxBadRecords = fc.MaxBadRecords
- if fc.Schema != nil {
- conf.Schema = fc.Schema.toBQ()
- }
- conf.Quote = fc.quote()
- }
-
- func bqPopulateFileConfig(conf *bq.JobConfigurationLoad, fc *FileConfig) {
- fc.SourceFormat = DataFormat(conf.SourceFormat)
- fc.AutoDetect = conf.Autodetect
- fc.MaxBadRecords = conf.MaxBadRecords
- fc.IgnoreUnknownValues = conf.IgnoreUnknownValues
- fc.Schema = bqToSchema(conf.Schema)
- fc.SkipLeadingRows = conf.SkipLeadingRows
- fc.AllowJaggedRows = conf.AllowJaggedRows
- fc.AllowQuotedNewlines = conf.AllowQuotedNewlines
- fc.Encoding = Encoding(conf.Encoding)
- fc.FieldDelimiter = conf.FieldDelimiter
- fc.CSVOptions.setQuote(conf.Quote)
- }
-
- func (fc *FileConfig) populateExternalDataConfig(conf *bq.ExternalDataConfiguration) {
- format := fc.SourceFormat
- if format == "" {
- // Format must be explicitly set for external data sources.
- format = CSV
- }
- conf.Autodetect = fc.AutoDetect
- conf.IgnoreUnknownValues = fc.IgnoreUnknownValues
- conf.MaxBadRecords = fc.MaxBadRecords
- conf.SourceFormat = string(format)
- if fc.Schema != nil {
- conf.Schema = fc.Schema.toBQ()
- }
- if format == CSV {
- fc.CSVOptions.populateExternalDataConfig(conf)
- }
- }
-
- // Encoding specifies the character encoding of data to be loaded into BigQuery.
- // See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
- // for more details about how this is used.
- type Encoding string
-
- const (
- UTF_8 Encoding = "UTF-8"
- ISO_8859_1 Encoding = "ISO-8859-1"
- )
|