You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

299 lines
7.8 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "strings"
  17. "testing"
  18. "time"
  19. "cloud.google.com/go/internal/testutil"
  20. "github.com/google/go-cmp/cmp"
  21. "github.com/google/go-cmp/cmp/cmpopts"
  22. bq "google.golang.org/api/bigquery/v2"
  23. )
  24. func defaultLoadJob() *bq.Job {
  25. return &bq.Job{
  26. JobReference: &bq.JobReference{JobId: "RANDOM", ProjectId: "client-project-id"},
  27. Configuration: &bq.JobConfiguration{
  28. Load: &bq.JobConfigurationLoad{
  29. DestinationTable: &bq.TableReference{
  30. ProjectId: "client-project-id",
  31. DatasetId: "dataset-id",
  32. TableId: "table-id",
  33. },
  34. SourceUris: []string{"uri"},
  35. },
  36. },
  37. }
  38. }
  39. func stringFieldSchema() *FieldSchema {
  40. return &FieldSchema{Name: "fieldname", Type: StringFieldType}
  41. }
  42. func nestedFieldSchema() *FieldSchema {
  43. return &FieldSchema{
  44. Name: "nested",
  45. Type: RecordFieldType,
  46. Schema: Schema{stringFieldSchema()},
  47. }
  48. }
  49. func bqStringFieldSchema() *bq.TableFieldSchema {
  50. return &bq.TableFieldSchema{
  51. Name: "fieldname",
  52. Type: "STRING",
  53. }
  54. }
  55. func bqNestedFieldSchema() *bq.TableFieldSchema {
  56. return &bq.TableFieldSchema{
  57. Name: "nested",
  58. Type: "RECORD",
  59. Fields: []*bq.TableFieldSchema{bqStringFieldSchema()},
  60. }
  61. }
  62. func TestLoad(t *testing.T) {
  63. defer fixRandomID("RANDOM")()
  64. c := &Client{projectID: "client-project-id"}
  65. testCases := []struct {
  66. dst *Table
  67. src LoadSource
  68. jobID string
  69. location string
  70. config LoadConfig
  71. want *bq.Job
  72. }{
  73. {
  74. dst: c.Dataset("dataset-id").Table("table-id"),
  75. src: NewGCSReference("uri"),
  76. want: defaultLoadJob(),
  77. },
  78. {
  79. dst: c.Dataset("dataset-id").Table("table-id"),
  80. src: NewGCSReference("uri"),
  81. location: "loc",
  82. want: func() *bq.Job {
  83. j := defaultLoadJob()
  84. j.JobReference.Location = "loc"
  85. return j
  86. }(),
  87. },
  88. {
  89. dst: c.Dataset("dataset-id").Table("table-id"),
  90. jobID: "ajob",
  91. config: LoadConfig{
  92. CreateDisposition: CreateNever,
  93. WriteDisposition: WriteTruncate,
  94. Labels: map[string]string{"a": "b"},
  95. TimePartitioning: &TimePartitioning{Expiration: 1234 * time.Millisecond},
  96. Clustering: &Clustering{Fields: []string{"cfield1"}},
  97. DestinationEncryptionConfig: &EncryptionConfig{KMSKeyName: "keyName"},
  98. SchemaUpdateOptions: []string{"ALLOW_FIELD_ADDITION"},
  99. },
  100. src: NewGCSReference("uri"),
  101. want: func() *bq.Job {
  102. j := defaultLoadJob()
  103. j.Configuration.Labels = map[string]string{"a": "b"}
  104. j.Configuration.Load.CreateDisposition = "CREATE_NEVER"
  105. j.Configuration.Load.WriteDisposition = "WRITE_TRUNCATE"
  106. j.Configuration.Load.TimePartitioning = &bq.TimePartitioning{
  107. Type: "DAY",
  108. ExpirationMs: 1234,
  109. }
  110. j.Configuration.Load.Clustering = &bq.Clustering{
  111. Fields: []string{"cfield1"},
  112. }
  113. j.Configuration.Load.DestinationEncryptionConfiguration = &bq.EncryptionConfiguration{KmsKeyName: "keyName"}
  114. j.JobReference = &bq.JobReference{
  115. JobId: "ajob",
  116. ProjectId: "client-project-id",
  117. }
  118. j.Configuration.Load.SchemaUpdateOptions = []string{"ALLOW_FIELD_ADDITION"}
  119. return j
  120. }(),
  121. },
  122. {
  123. dst: c.Dataset("dataset-id").Table("table-id"),
  124. src: func() *GCSReference {
  125. g := NewGCSReference("uri")
  126. g.MaxBadRecords = 1
  127. g.AllowJaggedRows = true
  128. g.AllowQuotedNewlines = true
  129. g.IgnoreUnknownValues = true
  130. return g
  131. }(),
  132. want: func() *bq.Job {
  133. j := defaultLoadJob()
  134. j.Configuration.Load.MaxBadRecords = 1
  135. j.Configuration.Load.AllowJaggedRows = true
  136. j.Configuration.Load.AllowQuotedNewlines = true
  137. j.Configuration.Load.IgnoreUnknownValues = true
  138. return j
  139. }(),
  140. },
  141. {
  142. dst: c.Dataset("dataset-id").Table("table-id"),
  143. src: func() *GCSReference {
  144. g := NewGCSReference("uri")
  145. g.Schema = Schema{
  146. stringFieldSchema(),
  147. nestedFieldSchema(),
  148. }
  149. return g
  150. }(),
  151. want: func() *bq.Job {
  152. j := defaultLoadJob()
  153. j.Configuration.Load.Schema = &bq.TableSchema{
  154. Fields: []*bq.TableFieldSchema{
  155. bqStringFieldSchema(),
  156. bqNestedFieldSchema(),
  157. }}
  158. return j
  159. }(),
  160. },
  161. {
  162. dst: c.Dataset("dataset-id").Table("table-id"),
  163. src: func() *GCSReference {
  164. g := NewGCSReference("uri")
  165. g.SkipLeadingRows = 1
  166. g.SourceFormat = JSON
  167. g.Encoding = UTF_8
  168. g.FieldDelimiter = "\t"
  169. g.Quote = "-"
  170. return g
  171. }(),
  172. want: func() *bq.Job {
  173. j := defaultLoadJob()
  174. j.Configuration.Load.SkipLeadingRows = 1
  175. j.Configuration.Load.SourceFormat = "NEWLINE_DELIMITED_JSON"
  176. j.Configuration.Load.Encoding = "UTF-8"
  177. j.Configuration.Load.FieldDelimiter = "\t"
  178. hyphen := "-"
  179. j.Configuration.Load.Quote = &hyphen
  180. return j
  181. }(),
  182. },
  183. {
  184. dst: c.Dataset("dataset-id").Table("table-id"),
  185. src: NewGCSReference("uri"),
  186. want: func() *bq.Job {
  187. j := defaultLoadJob()
  188. // Quote is left unset in GCSReference, so should be nil here.
  189. j.Configuration.Load.Quote = nil
  190. return j
  191. }(),
  192. },
  193. {
  194. dst: c.Dataset("dataset-id").Table("table-id"),
  195. src: func() *GCSReference {
  196. g := NewGCSReference("uri")
  197. g.ForceZeroQuote = true
  198. return g
  199. }(),
  200. want: func() *bq.Job {
  201. j := defaultLoadJob()
  202. empty := ""
  203. j.Configuration.Load.Quote = &empty
  204. return j
  205. }(),
  206. },
  207. {
  208. dst: c.Dataset("dataset-id").Table("table-id"),
  209. src: func() *ReaderSource {
  210. r := NewReaderSource(strings.NewReader("foo"))
  211. r.SkipLeadingRows = 1
  212. r.SourceFormat = JSON
  213. r.Encoding = UTF_8
  214. r.FieldDelimiter = "\t"
  215. r.Quote = "-"
  216. return r
  217. }(),
  218. want: func() *bq.Job {
  219. j := defaultLoadJob()
  220. j.Configuration.Load.SourceUris = nil
  221. j.Configuration.Load.SkipLeadingRows = 1
  222. j.Configuration.Load.SourceFormat = "NEWLINE_DELIMITED_JSON"
  223. j.Configuration.Load.Encoding = "UTF-8"
  224. j.Configuration.Load.FieldDelimiter = "\t"
  225. hyphen := "-"
  226. j.Configuration.Load.Quote = &hyphen
  227. return j
  228. }(),
  229. },
  230. {
  231. dst: c.Dataset("dataset-id").Table("table-id"),
  232. src: func() *GCSReference {
  233. g := NewGCSReference("uri")
  234. g.SourceFormat = Avro
  235. return g
  236. }(),
  237. config: LoadConfig{
  238. UseAvroLogicalTypes: true,
  239. },
  240. want: func() *bq.Job {
  241. j := defaultLoadJob()
  242. j.Configuration.Load.SourceFormat = "AVRO"
  243. j.Configuration.Load.UseAvroLogicalTypes = true
  244. return j
  245. }(),
  246. },
  247. {
  248. dst: c.Dataset("dataset-id").Table("table-id"),
  249. src: func() *ReaderSource {
  250. r := NewReaderSource(strings.NewReader("foo"))
  251. r.SourceFormat = Avro
  252. return r
  253. }(),
  254. config: LoadConfig{
  255. UseAvroLogicalTypes: true,
  256. },
  257. want: func() *bq.Job {
  258. j := defaultLoadJob()
  259. j.Configuration.Load.SourceUris = nil
  260. j.Configuration.Load.SourceFormat = "AVRO"
  261. j.Configuration.Load.UseAvroLogicalTypes = true
  262. return j
  263. }(),
  264. },
  265. }
  266. for i, tc := range testCases {
  267. loader := tc.dst.LoaderFrom(tc.src)
  268. loader.JobID = tc.jobID
  269. loader.Location = tc.location
  270. tc.config.Src = tc.src
  271. tc.config.Dst = tc.dst
  272. loader.LoadConfig = tc.config
  273. got, _ := loader.newJob()
  274. checkJob(t, i, got, tc.want)
  275. jc, err := bqToJobConfig(got.Configuration, c)
  276. if err != nil {
  277. t.Fatalf("#%d: %v", i, err)
  278. }
  279. diff := testutil.Diff(jc.(*LoadConfig), &loader.LoadConfig,
  280. cmp.AllowUnexported(Table{}, Client{}),
  281. cmpopts.IgnoreUnexported(ReaderSource{}))
  282. if diff != "" {
  283. t.Errorf("#%d: (got=-, want=+:\n%s", i, diff)
  284. }
  285. }
  286. }