You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

261 lines
6.8 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "strings"
  17. "testing"
  18. "time"
  19. "cloud.google.com/go/internal/testutil"
  20. "github.com/google/go-cmp/cmp"
  21. "github.com/google/go-cmp/cmp/cmpopts"
  22. bq "google.golang.org/api/bigquery/v2"
  23. )
  24. func defaultLoadJob() *bq.Job {
  25. return &bq.Job{
  26. JobReference: &bq.JobReference{JobId: "RANDOM", ProjectId: "client-project-id"},
  27. Configuration: &bq.JobConfiguration{
  28. Load: &bq.JobConfigurationLoad{
  29. DestinationTable: &bq.TableReference{
  30. ProjectId: "client-project-id",
  31. DatasetId: "dataset-id",
  32. TableId: "table-id",
  33. },
  34. SourceUris: []string{"uri"},
  35. },
  36. },
  37. }
  38. }
  39. func stringFieldSchema() *FieldSchema {
  40. return &FieldSchema{Name: "fieldname", Type: StringFieldType}
  41. }
  42. func nestedFieldSchema() *FieldSchema {
  43. return &FieldSchema{
  44. Name: "nested",
  45. Type: RecordFieldType,
  46. Schema: Schema{stringFieldSchema()},
  47. }
  48. }
  49. func bqStringFieldSchema() *bq.TableFieldSchema {
  50. return &bq.TableFieldSchema{
  51. Name: "fieldname",
  52. Type: "STRING",
  53. }
  54. }
  55. func bqNestedFieldSchema() *bq.TableFieldSchema {
  56. return &bq.TableFieldSchema{
  57. Name: "nested",
  58. Type: "RECORD",
  59. Fields: []*bq.TableFieldSchema{bqStringFieldSchema()},
  60. }
  61. }
  62. func TestLoad(t *testing.T) {
  63. defer fixRandomID("RANDOM")()
  64. c := &Client{projectID: "client-project-id"}
  65. testCases := []struct {
  66. dst *Table
  67. src LoadSource
  68. jobID string
  69. location string
  70. config LoadConfig
  71. want *bq.Job
  72. }{
  73. {
  74. dst: c.Dataset("dataset-id").Table("table-id"),
  75. src: NewGCSReference("uri"),
  76. want: defaultLoadJob(),
  77. },
  78. {
  79. dst: c.Dataset("dataset-id").Table("table-id"),
  80. src: NewGCSReference("uri"),
  81. location: "loc",
  82. want: func() *bq.Job {
  83. j := defaultLoadJob()
  84. j.JobReference.Location = "loc"
  85. return j
  86. }(),
  87. },
  88. {
  89. dst: c.Dataset("dataset-id").Table("table-id"),
  90. jobID: "ajob",
  91. config: LoadConfig{
  92. CreateDisposition: CreateNever,
  93. WriteDisposition: WriteTruncate,
  94. Labels: map[string]string{"a": "b"},
  95. TimePartitioning: &TimePartitioning{Expiration: 1234 * time.Millisecond},
  96. DestinationEncryptionConfig: &EncryptionConfig{KMSKeyName: "keyName"},
  97. SchemaUpdateOptions: []string{"ALLOW_FIELD_ADDITION"},
  98. },
  99. src: NewGCSReference("uri"),
  100. want: func() *bq.Job {
  101. j := defaultLoadJob()
  102. j.Configuration.Labels = map[string]string{"a": "b"}
  103. j.Configuration.Load.CreateDisposition = "CREATE_NEVER"
  104. j.Configuration.Load.WriteDisposition = "WRITE_TRUNCATE"
  105. j.Configuration.Load.TimePartitioning = &bq.TimePartitioning{
  106. Type: "DAY",
  107. ExpirationMs: 1234,
  108. }
  109. j.Configuration.Load.DestinationEncryptionConfiguration = &bq.EncryptionConfiguration{KmsKeyName: "keyName"}
  110. j.JobReference = &bq.JobReference{
  111. JobId: "ajob",
  112. ProjectId: "client-project-id",
  113. }
  114. j.Configuration.Load.SchemaUpdateOptions = []string{"ALLOW_FIELD_ADDITION"}
  115. return j
  116. }(),
  117. },
  118. {
  119. dst: c.Dataset("dataset-id").Table("table-id"),
  120. src: func() *GCSReference {
  121. g := NewGCSReference("uri")
  122. g.MaxBadRecords = 1
  123. g.AllowJaggedRows = true
  124. g.AllowQuotedNewlines = true
  125. g.IgnoreUnknownValues = true
  126. return g
  127. }(),
  128. want: func() *bq.Job {
  129. j := defaultLoadJob()
  130. j.Configuration.Load.MaxBadRecords = 1
  131. j.Configuration.Load.AllowJaggedRows = true
  132. j.Configuration.Load.AllowQuotedNewlines = true
  133. j.Configuration.Load.IgnoreUnknownValues = true
  134. return j
  135. }(),
  136. },
  137. {
  138. dst: c.Dataset("dataset-id").Table("table-id"),
  139. src: func() *GCSReference {
  140. g := NewGCSReference("uri")
  141. g.Schema = Schema{
  142. stringFieldSchema(),
  143. nestedFieldSchema(),
  144. }
  145. return g
  146. }(),
  147. want: func() *bq.Job {
  148. j := defaultLoadJob()
  149. j.Configuration.Load.Schema = &bq.TableSchema{
  150. Fields: []*bq.TableFieldSchema{
  151. bqStringFieldSchema(),
  152. bqNestedFieldSchema(),
  153. }}
  154. return j
  155. }(),
  156. },
  157. {
  158. dst: c.Dataset("dataset-id").Table("table-id"),
  159. src: func() *GCSReference {
  160. g := NewGCSReference("uri")
  161. g.SkipLeadingRows = 1
  162. g.SourceFormat = JSON
  163. g.Encoding = UTF_8
  164. g.FieldDelimiter = "\t"
  165. g.Quote = "-"
  166. return g
  167. }(),
  168. want: func() *bq.Job {
  169. j := defaultLoadJob()
  170. j.Configuration.Load.SkipLeadingRows = 1
  171. j.Configuration.Load.SourceFormat = "NEWLINE_DELIMITED_JSON"
  172. j.Configuration.Load.Encoding = "UTF-8"
  173. j.Configuration.Load.FieldDelimiter = "\t"
  174. hyphen := "-"
  175. j.Configuration.Load.Quote = &hyphen
  176. return j
  177. }(),
  178. },
  179. {
  180. dst: c.Dataset("dataset-id").Table("table-id"),
  181. src: NewGCSReference("uri"),
  182. want: func() *bq.Job {
  183. j := defaultLoadJob()
  184. // Quote is left unset in GCSReference, so should be nil here.
  185. j.Configuration.Load.Quote = nil
  186. return j
  187. }(),
  188. },
  189. {
  190. dst: c.Dataset("dataset-id").Table("table-id"),
  191. src: func() *GCSReference {
  192. g := NewGCSReference("uri")
  193. g.ForceZeroQuote = true
  194. return g
  195. }(),
  196. want: func() *bq.Job {
  197. j := defaultLoadJob()
  198. empty := ""
  199. j.Configuration.Load.Quote = &empty
  200. return j
  201. }(),
  202. },
  203. {
  204. dst: c.Dataset("dataset-id").Table("table-id"),
  205. src: func() *ReaderSource {
  206. r := NewReaderSource(strings.NewReader("foo"))
  207. r.SkipLeadingRows = 1
  208. r.SourceFormat = JSON
  209. r.Encoding = UTF_8
  210. r.FieldDelimiter = "\t"
  211. r.Quote = "-"
  212. return r
  213. }(),
  214. want: func() *bq.Job {
  215. j := defaultLoadJob()
  216. j.Configuration.Load.SourceUris = nil
  217. j.Configuration.Load.SkipLeadingRows = 1
  218. j.Configuration.Load.SourceFormat = "NEWLINE_DELIMITED_JSON"
  219. j.Configuration.Load.Encoding = "UTF-8"
  220. j.Configuration.Load.FieldDelimiter = "\t"
  221. hyphen := "-"
  222. j.Configuration.Load.Quote = &hyphen
  223. return j
  224. }(),
  225. },
  226. }
  227. for i, tc := range testCases {
  228. loader := tc.dst.LoaderFrom(tc.src)
  229. loader.JobID = tc.jobID
  230. loader.Location = tc.location
  231. tc.config.Src = tc.src
  232. tc.config.Dst = tc.dst
  233. loader.LoadConfig = tc.config
  234. got, _ := loader.newJob()
  235. checkJob(t, i, got, tc.want)
  236. jc, err := bqToJobConfig(got.Configuration, c)
  237. if err != nil {
  238. t.Fatalf("#%d: %v", i, err)
  239. }
  240. diff := testutil.Diff(jc.(*LoadConfig), &loader.LoadConfig,
  241. cmp.AllowUnexported(Table{}, Client{}),
  242. cmpopts.IgnoreUnexported(ReaderSource{}))
  243. if diff != "" {
  244. t.Errorf("#%d: (got=-, want=+:\n%s", i, diff)
  245. }
  246. }
  247. }