You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

630 lines
20 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "context"
  17. "errors"
  18. "fmt"
  19. "time"
  20. "cloud.google.com/go/internal/optional"
  21. "cloud.google.com/go/internal/trace"
  22. bq "google.golang.org/api/bigquery/v2"
  23. )
  24. // A Table is a reference to a BigQuery table.
  25. type Table struct {
  26. // ProjectID, DatasetID and TableID may be omitted if the Table is the destination for a query.
  27. // In this case the result will be stored in an ephemeral table.
  28. ProjectID string
  29. DatasetID string
  30. // TableID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_).
  31. // The maximum length is 1,024 characters.
  32. TableID string
  33. c *Client
  34. }
  35. // TableMetadata contains information about a BigQuery table.
  36. type TableMetadata struct {
  37. // The following fields can be set when creating a table.
  38. // The user-friendly name for the table.
  39. Name string
  40. // The user-friendly description of the table.
  41. Description string
  42. // The table schema. If provided on create, ViewQuery must be empty.
  43. Schema Schema
  44. // The query to use for a view. If provided on create, Schema must be nil.
  45. ViewQuery string
  46. // Use Legacy SQL for the view query.
  47. // At most one of UseLegacySQL and UseStandardSQL can be true.
  48. UseLegacySQL bool
  49. // Use Legacy SQL for the view query. The default.
  50. // At most one of UseLegacySQL and UseStandardSQL can be true.
  51. // Deprecated: use UseLegacySQL.
  52. UseStandardSQL bool
  53. // If non-nil, the table is partitioned by time.
  54. TimePartitioning *TimePartitioning
  55. // Clustering specifies the data clustering configuration for the table.
  56. Clustering *Clustering
  57. // The time when this table expires. If set, this table will expire at the
  58. // specified time. Expired tables will be deleted and their storage
  59. // reclaimed. The zero value is ignored.
  60. ExpirationTime time.Time
  61. // User-provided labels.
  62. Labels map[string]string
  63. // Information about a table stored outside of BigQuery.
  64. ExternalDataConfig *ExternalDataConfig
  65. // Custom encryption configuration (e.g., Cloud KMS keys).
  66. EncryptionConfig *EncryptionConfig
  67. // All the fields below are read-only.
  68. FullID string // An opaque ID uniquely identifying the table.
  69. Type TableType
  70. CreationTime time.Time
  71. LastModifiedTime time.Time
  72. // The size of the table in bytes.
  73. // This does not include data that is being buffered during a streaming insert.
  74. NumBytes int64
  75. // The number of bytes in the table considered "long-term storage" for reduced
  76. // billing purposes. See https://cloud.google.com/bigquery/pricing#long-term-storage
  77. // for more information.
  78. NumLongTermBytes int64
  79. // The number of rows of data in this table.
  80. // This does not include data that is being buffered during a streaming insert.
  81. NumRows uint64
  82. // Contains information regarding this table's streaming buffer, if one is
  83. // present. This field will be nil if the table is not being streamed to or if
  84. // there is no data in the streaming buffer.
  85. StreamingBuffer *StreamingBuffer
  86. // ETag is the ETag obtained when reading metadata. Pass it to Table.Update to
  87. // ensure that the metadata hasn't changed since it was read.
  88. ETag string
  89. }
  90. // TableCreateDisposition specifies the circumstances under which destination table will be created.
  91. // Default is CreateIfNeeded.
  92. type TableCreateDisposition string
  93. const (
  94. // CreateIfNeeded will create the table if it does not already exist.
  95. // Tables are created atomically on successful completion of a job.
  96. CreateIfNeeded TableCreateDisposition = "CREATE_IF_NEEDED"
  97. // CreateNever ensures the table must already exist and will not be
  98. // automatically created.
  99. CreateNever TableCreateDisposition = "CREATE_NEVER"
  100. )
  101. // TableWriteDisposition specifies how existing data in a destination table is treated.
  102. // Default is WriteAppend.
  103. type TableWriteDisposition string
  104. const (
  105. // WriteAppend will append to any existing data in the destination table.
  106. // Data is appended atomically on successful completion of a job.
  107. WriteAppend TableWriteDisposition = "WRITE_APPEND"
  108. // WriteTruncate overrides the existing data in the destination table.
  109. // Data is overwritten atomically on successful completion of a job.
  110. WriteTruncate TableWriteDisposition = "WRITE_TRUNCATE"
  111. // WriteEmpty fails writes if the destination table already contains data.
  112. WriteEmpty TableWriteDisposition = "WRITE_EMPTY"
  113. )
  114. // TableType is the type of table.
  115. type TableType string
  116. const (
  117. // RegularTable is a regular table.
  118. RegularTable TableType = "TABLE"
  119. // ViewTable is a table type describing that the table is view. See more
  120. // information at https://cloud.google.com/bigquery/docs/views.
  121. ViewTable TableType = "VIEW"
  122. // ExternalTable is a table type describing that the table is an external
  123. // table (also known as a federated data source). See more information at
  124. // https://cloud.google.com/bigquery/external-data-sources.
  125. ExternalTable TableType = "EXTERNAL"
  126. )
  127. // TimePartitioning describes the time-based date partitioning on a table.
  128. // For more information see: https://cloud.google.com/bigquery/docs/creating-partitioned-tables.
  129. type TimePartitioning struct {
  130. // The amount of time to keep the storage for a partition.
  131. // If the duration is empty (0), the data in the partitions do not expire.
  132. Expiration time.Duration
  133. // If empty, the table is partitioned by pseudo column '_PARTITIONTIME'; if set, the
  134. // table is partitioned by this field. The field must be a top-level TIMESTAMP or
  135. // DATE field. Its mode must be NULLABLE or REQUIRED.
  136. Field string
  137. // If true, queries that reference this table must include a filter (e.g. a WHERE predicate)
  138. // that can be used for partition elimination.
  139. RequirePartitionFilter bool
  140. }
  141. func (p *TimePartitioning) toBQ() *bq.TimePartitioning {
  142. if p == nil {
  143. return nil
  144. }
  145. return &bq.TimePartitioning{
  146. Type: "DAY",
  147. ExpirationMs: int64(p.Expiration / time.Millisecond),
  148. Field: p.Field,
  149. RequirePartitionFilter: p.RequirePartitionFilter,
  150. }
  151. }
  152. func bqToTimePartitioning(q *bq.TimePartitioning) *TimePartitioning {
  153. if q == nil {
  154. return nil
  155. }
  156. return &TimePartitioning{
  157. Expiration: time.Duration(q.ExpirationMs) * time.Millisecond,
  158. Field: q.Field,
  159. RequirePartitionFilter: q.RequirePartitionFilter,
  160. }
  161. }
  162. // Clustering governs the organization of data within a partitioned table.
  163. // For more information, see https://cloud.google.com/bigquery/docs/clustered-tables
  164. type Clustering struct {
  165. Fields []string
  166. }
  167. func (c *Clustering) toBQ() *bq.Clustering {
  168. if c == nil {
  169. return nil
  170. }
  171. return &bq.Clustering{
  172. Fields: c.Fields,
  173. }
  174. }
  175. func bqToClustering(q *bq.Clustering) *Clustering {
  176. if q == nil {
  177. return nil
  178. }
  179. return &Clustering{
  180. Fields: q.Fields,
  181. }
  182. }
  183. // EncryptionConfig configures customer-managed encryption on tables.
  184. type EncryptionConfig struct {
  185. // Describes the Cloud KMS encryption key that will be used to protect
  186. // destination BigQuery table. The BigQuery Service Account associated with your
  187. // project requires access to this encryption key.
  188. KMSKeyName string
  189. }
  190. func (e *EncryptionConfig) toBQ() *bq.EncryptionConfiguration {
  191. if e == nil {
  192. return nil
  193. }
  194. return &bq.EncryptionConfiguration{
  195. KmsKeyName: e.KMSKeyName,
  196. }
  197. }
  198. func bqToEncryptionConfig(q *bq.EncryptionConfiguration) *EncryptionConfig {
  199. if q == nil {
  200. return nil
  201. }
  202. return &EncryptionConfig{
  203. KMSKeyName: q.KmsKeyName,
  204. }
  205. }
  206. // StreamingBuffer holds information about the streaming buffer.
  207. type StreamingBuffer struct {
  208. // A lower-bound estimate of the number of bytes currently in the streaming
  209. // buffer.
  210. EstimatedBytes uint64
  211. // A lower-bound estimate of the number of rows currently in the streaming
  212. // buffer.
  213. EstimatedRows uint64
  214. // The time of the oldest entry in the streaming buffer.
  215. OldestEntryTime time.Time
  216. }
  217. func (t *Table) toBQ() *bq.TableReference {
  218. return &bq.TableReference{
  219. ProjectId: t.ProjectID,
  220. DatasetId: t.DatasetID,
  221. TableId: t.TableID,
  222. }
  223. }
  224. // FullyQualifiedName returns the ID of the table in projectID:datasetID.tableID format.
  225. func (t *Table) FullyQualifiedName() string {
  226. return fmt.Sprintf("%s:%s.%s", t.ProjectID, t.DatasetID, t.TableID)
  227. }
  228. // implicitTable reports whether Table is an empty placeholder, which signifies that a new table should be created with an auto-generated Table ID.
  229. func (t *Table) implicitTable() bool {
  230. return t.ProjectID == "" && t.DatasetID == "" && t.TableID == ""
  231. }
  232. // Create creates a table in the BigQuery service.
  233. // Pass in a TableMetadata value to configure the table.
  234. // If tm.View.Query is non-empty, the created table will be of type VIEW.
  235. // If no ExpirationTime is specified, the table will never expire.
  236. // After table creation, a view can be modified only if its table was initially created
  237. // with a view.
  238. func (t *Table) Create(ctx context.Context, tm *TableMetadata) (err error) {
  239. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Create")
  240. defer func() { trace.EndSpan(ctx, err) }()
  241. table, err := tm.toBQ()
  242. if err != nil {
  243. return err
  244. }
  245. table.TableReference = &bq.TableReference{
  246. ProjectId: t.ProjectID,
  247. DatasetId: t.DatasetID,
  248. TableId: t.TableID,
  249. }
  250. req := t.c.bqs.Tables.Insert(t.ProjectID, t.DatasetID, table).Context(ctx)
  251. setClientHeader(req.Header())
  252. _, err = req.Do()
  253. return err
  254. }
  255. func (tm *TableMetadata) toBQ() (*bq.Table, error) {
  256. t := &bq.Table{}
  257. if tm == nil {
  258. return t, nil
  259. }
  260. if tm.Schema != nil && tm.ViewQuery != "" {
  261. return nil, errors.New("bigquery: provide Schema or ViewQuery, not both")
  262. }
  263. t.FriendlyName = tm.Name
  264. t.Description = tm.Description
  265. t.Labels = tm.Labels
  266. if tm.Schema != nil {
  267. t.Schema = tm.Schema.toBQ()
  268. }
  269. if tm.ViewQuery != "" {
  270. if tm.UseStandardSQL && tm.UseLegacySQL {
  271. return nil, errors.New("bigquery: cannot provide both UseStandardSQL and UseLegacySQL")
  272. }
  273. t.View = &bq.ViewDefinition{Query: tm.ViewQuery}
  274. if tm.UseLegacySQL {
  275. t.View.UseLegacySql = true
  276. } else {
  277. t.View.UseLegacySql = false
  278. t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql")
  279. }
  280. } else if tm.UseLegacySQL || tm.UseStandardSQL {
  281. return nil, errors.New("bigquery: UseLegacy/StandardSQL requires ViewQuery")
  282. }
  283. t.TimePartitioning = tm.TimePartitioning.toBQ()
  284. t.Clustering = tm.Clustering.toBQ()
  285. if !validExpiration(tm.ExpirationTime) {
  286. return nil, fmt.Errorf("invalid expiration time: %v.\n"+
  287. "Valid expiration times are after 1678 and before 2262", tm.ExpirationTime)
  288. }
  289. if !tm.ExpirationTime.IsZero() && tm.ExpirationTime != NeverExpire {
  290. t.ExpirationTime = tm.ExpirationTime.UnixNano() / 1e6
  291. }
  292. if tm.ExternalDataConfig != nil {
  293. edc := tm.ExternalDataConfig.toBQ()
  294. t.ExternalDataConfiguration = &edc
  295. }
  296. t.EncryptionConfiguration = tm.EncryptionConfig.toBQ()
  297. if tm.FullID != "" {
  298. return nil, errors.New("cannot set FullID on create")
  299. }
  300. if tm.Type != "" {
  301. return nil, errors.New("cannot set Type on create")
  302. }
  303. if !tm.CreationTime.IsZero() {
  304. return nil, errors.New("cannot set CreationTime on create")
  305. }
  306. if !tm.LastModifiedTime.IsZero() {
  307. return nil, errors.New("cannot set LastModifiedTime on create")
  308. }
  309. if tm.NumBytes != 0 {
  310. return nil, errors.New("cannot set NumBytes on create")
  311. }
  312. if tm.NumLongTermBytes != 0 {
  313. return nil, errors.New("cannot set NumLongTermBytes on create")
  314. }
  315. if tm.NumRows != 0 {
  316. return nil, errors.New("cannot set NumRows on create")
  317. }
  318. if tm.StreamingBuffer != nil {
  319. return nil, errors.New("cannot set StreamingBuffer on create")
  320. }
  321. if tm.ETag != "" {
  322. return nil, errors.New("cannot set ETag on create")
  323. }
  324. return t, nil
  325. }
  326. // Metadata fetches the metadata for the table.
  327. func (t *Table) Metadata(ctx context.Context) (md *TableMetadata, err error) {
  328. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Metadata")
  329. defer func() { trace.EndSpan(ctx, err) }()
  330. req := t.c.bqs.Tables.Get(t.ProjectID, t.DatasetID, t.TableID).Context(ctx)
  331. setClientHeader(req.Header())
  332. var table *bq.Table
  333. err = runWithRetry(ctx, func() (err error) {
  334. table, err = req.Do()
  335. return err
  336. })
  337. if err != nil {
  338. return nil, err
  339. }
  340. return bqToTableMetadata(table)
  341. }
  342. func bqToTableMetadata(t *bq.Table) (*TableMetadata, error) {
  343. md := &TableMetadata{
  344. Description: t.Description,
  345. Name: t.FriendlyName,
  346. Type: TableType(t.Type),
  347. FullID: t.Id,
  348. Labels: t.Labels,
  349. NumBytes: t.NumBytes,
  350. NumLongTermBytes: t.NumLongTermBytes,
  351. NumRows: t.NumRows,
  352. ExpirationTime: unixMillisToTime(t.ExpirationTime),
  353. CreationTime: unixMillisToTime(t.CreationTime),
  354. LastModifiedTime: unixMillisToTime(int64(t.LastModifiedTime)),
  355. ETag: t.Etag,
  356. EncryptionConfig: bqToEncryptionConfig(t.EncryptionConfiguration),
  357. }
  358. if t.Schema != nil {
  359. md.Schema = bqToSchema(t.Schema)
  360. }
  361. if t.View != nil {
  362. md.ViewQuery = t.View.Query
  363. md.UseLegacySQL = t.View.UseLegacySql
  364. }
  365. md.TimePartitioning = bqToTimePartitioning(t.TimePartitioning)
  366. md.Clustering = bqToClustering(t.Clustering)
  367. if t.StreamingBuffer != nil {
  368. md.StreamingBuffer = &StreamingBuffer{
  369. EstimatedBytes: t.StreamingBuffer.EstimatedBytes,
  370. EstimatedRows: t.StreamingBuffer.EstimatedRows,
  371. OldestEntryTime: unixMillisToTime(int64(t.StreamingBuffer.OldestEntryTime)),
  372. }
  373. }
  374. if t.ExternalDataConfiguration != nil {
  375. edc, err := bqToExternalDataConfig(t.ExternalDataConfiguration)
  376. if err != nil {
  377. return nil, err
  378. }
  379. md.ExternalDataConfig = edc
  380. }
  381. return md, nil
  382. }
  383. // Delete deletes the table.
  384. func (t *Table) Delete(ctx context.Context) (err error) {
  385. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Delete")
  386. defer func() { trace.EndSpan(ctx, err) }()
  387. req := t.c.bqs.Tables.Delete(t.ProjectID, t.DatasetID, t.TableID).Context(ctx)
  388. setClientHeader(req.Header())
  389. return req.Do()
  390. }
  391. // Read fetches the contents of the table.
  392. func (t *Table) Read(ctx context.Context) *RowIterator {
  393. return t.read(ctx, fetchPage)
  394. }
  395. func (t *Table) read(ctx context.Context, pf pageFetcher) *RowIterator {
  396. return newRowIterator(ctx, t, pf)
  397. }
  398. // NeverExpire is a sentinel value used to remove a table'e expiration time.
  399. var NeverExpire = time.Time{}.Add(-1)
  400. // Update modifies specific Table metadata fields.
  401. func (t *Table) Update(ctx context.Context, tm TableMetadataToUpdate, etag string) (md *TableMetadata, err error) {
  402. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Update")
  403. defer func() { trace.EndSpan(ctx, err) }()
  404. bqt, err := tm.toBQ()
  405. if err != nil {
  406. return nil, err
  407. }
  408. call := t.c.bqs.Tables.Patch(t.ProjectID, t.DatasetID, t.TableID, bqt).Context(ctx)
  409. setClientHeader(call.Header())
  410. if etag != "" {
  411. call.Header().Set("If-Match", etag)
  412. }
  413. var res *bq.Table
  414. if err := runWithRetry(ctx, func() (err error) {
  415. res, err = call.Do()
  416. return err
  417. }); err != nil {
  418. return nil, err
  419. }
  420. return bqToTableMetadata(res)
  421. }
  422. func (tm *TableMetadataToUpdate) toBQ() (*bq.Table, error) {
  423. t := &bq.Table{}
  424. forceSend := func(field string) {
  425. t.ForceSendFields = append(t.ForceSendFields, field)
  426. }
  427. if tm.Description != nil {
  428. t.Description = optional.ToString(tm.Description)
  429. forceSend("Description")
  430. }
  431. if tm.Name != nil {
  432. t.FriendlyName = optional.ToString(tm.Name)
  433. forceSend("FriendlyName")
  434. }
  435. if tm.Schema != nil {
  436. t.Schema = tm.Schema.toBQ()
  437. forceSend("Schema")
  438. }
  439. if tm.EncryptionConfig != nil {
  440. t.EncryptionConfiguration = tm.EncryptionConfig.toBQ()
  441. }
  442. if !validExpiration(tm.ExpirationTime) {
  443. return nil, fmt.Errorf("invalid expiration time: %v.\n"+
  444. "Valid expiration times are after 1678 and before 2262", tm.ExpirationTime)
  445. }
  446. if tm.ExpirationTime == NeverExpire {
  447. t.NullFields = append(t.NullFields, "ExpirationTime")
  448. } else if !tm.ExpirationTime.IsZero() {
  449. t.ExpirationTime = tm.ExpirationTime.UnixNano() / 1e6
  450. forceSend("ExpirationTime")
  451. }
  452. if tm.TimePartitioning != nil {
  453. t.TimePartitioning = tm.TimePartitioning.toBQ()
  454. t.TimePartitioning.ForceSendFields = []string{"RequirePartitionFilter"}
  455. if tm.TimePartitioning.Expiration == 0 {
  456. t.TimePartitioning.NullFields = []string{"ExpirationMs"}
  457. }
  458. }
  459. if tm.ViewQuery != nil {
  460. t.View = &bq.ViewDefinition{
  461. Query: optional.ToString(tm.ViewQuery),
  462. ForceSendFields: []string{"Query"},
  463. }
  464. }
  465. if tm.UseLegacySQL != nil {
  466. if t.View == nil {
  467. t.View = &bq.ViewDefinition{}
  468. }
  469. t.View.UseLegacySql = optional.ToBool(tm.UseLegacySQL)
  470. t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql")
  471. }
  472. labels, forces, nulls := tm.update()
  473. t.Labels = labels
  474. t.ForceSendFields = append(t.ForceSendFields, forces...)
  475. t.NullFields = append(t.NullFields, nulls...)
  476. return t, nil
  477. }
  478. // validExpiration ensures a specified time is either the sentinel NeverExpire,
  479. // the zero value, or within the defined range of UnixNano. Internal
  480. // represetations of expiration times are based upon Time.UnixNano. Any time
  481. // before 1678 or after 2262 cannot be represented by an int64 and is therefore
  482. // undefined and invalid. See https://godoc.org/time#Time.UnixNano.
  483. func validExpiration(t time.Time) bool {
  484. return t == NeverExpire || t.IsZero() || time.Unix(0, t.UnixNano()).Equal(t)
  485. }
  486. // TableMetadataToUpdate is used when updating a table's metadata.
  487. // Only non-nil fields will be updated.
  488. type TableMetadataToUpdate struct {
  489. // The user-friendly description of this table.
  490. Description optional.String
  491. // The user-friendly name for this table.
  492. Name optional.String
  493. // The table's schema.
  494. // When updating a schema, you can add columns but not remove them.
  495. Schema Schema
  496. // The table's encryption configuration. When calling Update, ensure that
  497. // all mutable fields of EncryptionConfig are populated.
  498. EncryptionConfig *EncryptionConfig
  499. // The time when this table expires. To remove a table's expiration,
  500. // set ExpirationTime to NeverExpire. The zero value is ignored.
  501. ExpirationTime time.Time
  502. // The query to use for a view.
  503. ViewQuery optional.String
  504. // Use Legacy SQL for the view query.
  505. UseLegacySQL optional.Bool
  506. // TimePartitioning allows modification of certain aspects of partition
  507. // configuration such as partition expiration and whether partition
  508. // filtration is required at query time. When calling Update, ensure
  509. // that all mutable fields of TimePartitioning are populated.
  510. TimePartitioning *TimePartitioning
  511. labelUpdater
  512. }
  513. // labelUpdater contains common code for updating labels.
  514. type labelUpdater struct {
  515. setLabels map[string]string
  516. deleteLabels map[string]bool
  517. }
  518. // SetLabel causes a label to be added or modified on a call to Update.
  519. func (u *labelUpdater) SetLabel(name, value string) {
  520. if u.setLabels == nil {
  521. u.setLabels = map[string]string{}
  522. }
  523. u.setLabels[name] = value
  524. }
  525. // DeleteLabel causes a label to be deleted on a call to Update.
  526. func (u *labelUpdater) DeleteLabel(name string) {
  527. if u.deleteLabels == nil {
  528. u.deleteLabels = map[string]bool{}
  529. }
  530. u.deleteLabels[name] = true
  531. }
  532. func (u *labelUpdater) update() (labels map[string]string, forces, nulls []string) {
  533. if u.setLabels == nil && u.deleteLabels == nil {
  534. return nil, nil, nil
  535. }
  536. labels = map[string]string{}
  537. for k, v := range u.setLabels {
  538. labels[k] = v
  539. }
  540. if len(labels) == 0 && len(u.deleteLabels) > 0 {
  541. forces = []string{"Labels"}
  542. }
  543. for l := range u.deleteLabels {
  544. nulls = append(nulls, "Labels."+l)
  545. }
  546. return labels, forces, nulls
  547. }