You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

154 lines
5.0 KiB

  1. // Copyright 2016 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "context"
  17. "io"
  18. "cloud.google.com/go/internal/trace"
  19. bq "google.golang.org/api/bigquery/v2"
  20. )
  21. // LoadConfig holds the configuration for a load job.
  22. type LoadConfig struct {
  23. // Src is the source from which data will be loaded.
  24. Src LoadSource
  25. // Dst is the table into which the data will be loaded.
  26. Dst *Table
  27. // CreateDisposition specifies the circumstances under which the destination table will be created.
  28. // The default is CreateIfNeeded.
  29. CreateDisposition TableCreateDisposition
  30. // WriteDisposition specifies how existing data in the destination table is treated.
  31. // The default is WriteAppend.
  32. WriteDisposition TableWriteDisposition
  33. // The labels associated with this job.
  34. Labels map[string]string
  35. // If non-nil, the destination table is partitioned by time.
  36. TimePartitioning *TimePartitioning
  37. // Clustering specifies the data clustering configuration for the destination table.
  38. Clustering *Clustering
  39. // Custom encryption configuration (e.g., Cloud KMS keys).
  40. DestinationEncryptionConfig *EncryptionConfig
  41. // Allows the schema of the destination table to be updated as a side effect of
  42. // the load job.
  43. SchemaUpdateOptions []string
  44. // For Avro-based loads, controls whether logical type annotations are used.
  45. // See https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#logical_types
  46. // for additional information.
  47. UseAvroLogicalTypes bool
  48. }
  49. func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
  50. config := &bq.JobConfiguration{
  51. Labels: l.Labels,
  52. Load: &bq.JobConfigurationLoad{
  53. CreateDisposition: string(l.CreateDisposition),
  54. WriteDisposition: string(l.WriteDisposition),
  55. DestinationTable: l.Dst.toBQ(),
  56. TimePartitioning: l.TimePartitioning.toBQ(),
  57. Clustering: l.Clustering.toBQ(),
  58. DestinationEncryptionConfiguration: l.DestinationEncryptionConfig.toBQ(),
  59. SchemaUpdateOptions: l.SchemaUpdateOptions,
  60. UseAvroLogicalTypes: l.UseAvroLogicalTypes,
  61. },
  62. }
  63. media := l.Src.populateLoadConfig(config.Load)
  64. return config, media
  65. }
  66. func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig {
  67. lc := &LoadConfig{
  68. Labels: q.Labels,
  69. CreateDisposition: TableCreateDisposition(q.Load.CreateDisposition),
  70. WriteDisposition: TableWriteDisposition(q.Load.WriteDisposition),
  71. Dst: bqToTable(q.Load.DestinationTable, c),
  72. TimePartitioning: bqToTimePartitioning(q.Load.TimePartitioning),
  73. Clustering: bqToClustering(q.Load.Clustering),
  74. DestinationEncryptionConfig: bqToEncryptionConfig(q.Load.DestinationEncryptionConfiguration),
  75. SchemaUpdateOptions: q.Load.SchemaUpdateOptions,
  76. UseAvroLogicalTypes: q.Load.UseAvroLogicalTypes,
  77. }
  78. var fc *FileConfig
  79. if len(q.Load.SourceUris) == 0 {
  80. s := NewReaderSource(nil)
  81. fc = &s.FileConfig
  82. lc.Src = s
  83. } else {
  84. s := NewGCSReference(q.Load.SourceUris...)
  85. fc = &s.FileConfig
  86. lc.Src = s
  87. }
  88. bqPopulateFileConfig(q.Load, fc)
  89. return lc
  90. }
  91. // A Loader loads data from Google Cloud Storage into a BigQuery table.
  92. type Loader struct {
  93. JobIDConfig
  94. LoadConfig
  95. c *Client
  96. }
  97. // A LoadSource represents a source of data that can be loaded into
  98. // a BigQuery table.
  99. //
  100. // This package defines two LoadSources: GCSReference, for Google Cloud Storage
  101. // objects, and ReaderSource, for data read from an io.Reader.
  102. type LoadSource interface {
  103. // populates config, returns media
  104. populateLoadConfig(*bq.JobConfigurationLoad) io.Reader
  105. }
  106. // LoaderFrom returns a Loader which can be used to load data into a BigQuery table.
  107. // The returned Loader may optionally be further configured before its Run method is called.
  108. // See GCSReference and ReaderSource for additional configuration options that
  109. // affect loading.
  110. func (t *Table) LoaderFrom(src LoadSource) *Loader {
  111. return &Loader{
  112. c: t.c,
  113. LoadConfig: LoadConfig{
  114. Src: src,
  115. Dst: t,
  116. },
  117. }
  118. }
  119. // Run initiates a load job.
  120. func (l *Loader) Run(ctx context.Context) (j *Job, err error) {
  121. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Load.Run")
  122. defer func() { trace.EndSpan(ctx, err) }()
  123. job, media := l.newJob()
  124. return l.c.insertJob(ctx, job, media)
  125. }
  126. func (l *Loader) newJob() (*bq.Job, io.Reader) {
  127. config, media := l.LoadConfig.toBQ()
  128. return &bq.Job{
  129. JobReference: l.JobIDConfig.createJobRef(l.c),
  130. Configuration: config,
  131. }, media
  132. }