You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

142 lines
4.4 KiB

  1. // Copyright 2016 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "io"
  17. "cloud.google.com/go/internal/trace"
  18. "golang.org/x/net/context"
  19. bq "google.golang.org/api/bigquery/v2"
  20. )
  21. // LoadConfig holds the configuration for a load job.
  22. type LoadConfig struct {
  23. // Src is the source from which data will be loaded.
  24. Src LoadSource
  25. // Dst is the table into which the data will be loaded.
  26. Dst *Table
  27. // CreateDisposition specifies the circumstances under which the destination table will be created.
  28. // The default is CreateIfNeeded.
  29. CreateDisposition TableCreateDisposition
  30. // WriteDisposition specifies how existing data in the destination table is treated.
  31. // The default is WriteAppend.
  32. WriteDisposition TableWriteDisposition
  33. // The labels associated with this job.
  34. Labels map[string]string
  35. // If non-nil, the destination table is partitioned by time.
  36. TimePartitioning *TimePartitioning
  37. // Custom encryption configuration (e.g., Cloud KMS keys).
  38. DestinationEncryptionConfig *EncryptionConfig
  39. // Allows the schema of the destination table to be updated as a side effect of
  40. // the load job.
  41. SchemaUpdateOptions []string
  42. }
  43. func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
  44. config := &bq.JobConfiguration{
  45. Labels: l.Labels,
  46. Load: &bq.JobConfigurationLoad{
  47. CreateDisposition: string(l.CreateDisposition),
  48. WriteDisposition: string(l.WriteDisposition),
  49. DestinationTable: l.Dst.toBQ(),
  50. TimePartitioning: l.TimePartitioning.toBQ(),
  51. DestinationEncryptionConfiguration: l.DestinationEncryptionConfig.toBQ(),
  52. SchemaUpdateOptions: l.SchemaUpdateOptions,
  53. },
  54. }
  55. media := l.Src.populateLoadConfig(config.Load)
  56. return config, media
  57. }
  58. func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig {
  59. lc := &LoadConfig{
  60. Labels: q.Labels,
  61. CreateDisposition: TableCreateDisposition(q.Load.CreateDisposition),
  62. WriteDisposition: TableWriteDisposition(q.Load.WriteDisposition),
  63. Dst: bqToTable(q.Load.DestinationTable, c),
  64. TimePartitioning: bqToTimePartitioning(q.Load.TimePartitioning),
  65. DestinationEncryptionConfig: bqToEncryptionConfig(q.Load.DestinationEncryptionConfiguration),
  66. SchemaUpdateOptions: q.Load.SchemaUpdateOptions,
  67. }
  68. var fc *FileConfig
  69. if len(q.Load.SourceUris) == 0 {
  70. s := NewReaderSource(nil)
  71. fc = &s.FileConfig
  72. lc.Src = s
  73. } else {
  74. s := NewGCSReference(q.Load.SourceUris...)
  75. fc = &s.FileConfig
  76. lc.Src = s
  77. }
  78. bqPopulateFileConfig(q.Load, fc)
  79. return lc
  80. }
  81. // A Loader loads data from Google Cloud Storage into a BigQuery table.
  82. type Loader struct {
  83. JobIDConfig
  84. LoadConfig
  85. c *Client
  86. }
  87. // A LoadSource represents a source of data that can be loaded into
  88. // a BigQuery table.
  89. //
  90. // This package defines two LoadSources: GCSReference, for Google Cloud Storage
  91. // objects, and ReaderSource, for data read from an io.Reader.
  92. type LoadSource interface {
  93. // populates config, returns media
  94. populateLoadConfig(*bq.JobConfigurationLoad) io.Reader
  95. }
  96. // LoaderFrom returns a Loader which can be used to load data into a BigQuery table.
  97. // The returned Loader may optionally be further configured before its Run method is called.
  98. // See GCSReference and ReaderSource for additional configuration options that
  99. // affect loading.
  100. func (t *Table) LoaderFrom(src LoadSource) *Loader {
  101. return &Loader{
  102. c: t.c,
  103. LoadConfig: LoadConfig{
  104. Src: src,
  105. Dst: t,
  106. },
  107. }
  108. }
  109. // Run initiates a load job.
  110. func (l *Loader) Run(ctx context.Context) (j *Job, err error) {
  111. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Load.Run")
  112. defer func() { trace.EndSpan(ctx, err) }()
  113. job, media := l.newJob()
  114. return l.c.insertJob(ctx, job, media)
  115. }
  116. func (l *Loader) newJob() (*bq.Job, io.Reader) {
  117. config, media := l.LoadConfig.toBQ()
  118. return &bq.Job{
  119. JobReference: l.JobIDConfig.createJobRef(l.c),
  120. Configuration: config,
  121. }, media
  122. }