You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

532 lines
16 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "errors"
  17. "fmt"
  18. "time"
  19. "cloud.google.com/go/internal/trace"
  20. "golang.org/x/net/context"
  21. "cloud.google.com/go/internal/optional"
  22. bq "google.golang.org/api/bigquery/v2"
  23. )
  24. // A Table is a reference to a BigQuery table.
  25. type Table struct {
  26. // ProjectID, DatasetID and TableID may be omitted if the Table is the destination for a query.
  27. // In this case the result will be stored in an ephemeral table.
  28. ProjectID string
  29. DatasetID string
  30. // TableID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_).
  31. // The maximum length is 1,024 characters.
  32. TableID string
  33. c *Client
  34. }
  35. // TableMetadata contains information about a BigQuery table.
  36. type TableMetadata struct {
  37. // The following fields can be set when creating a table.
  38. // The user-friendly name for the table.
  39. Name string
  40. // The user-friendly description of the table.
  41. Description string
  42. // The table schema. If provided on create, ViewQuery must be empty.
  43. Schema Schema
  44. // The query to use for a view. If provided on create, Schema must be nil.
  45. ViewQuery string
  46. // Use Legacy SQL for the view query.
  47. // At most one of UseLegacySQL and UseStandardSQL can be true.
  48. UseLegacySQL bool
  49. // Use Legacy SQL for the view query. The default.
  50. // At most one of UseLegacySQL and UseStandardSQL can be true.
  51. // Deprecated: use UseLegacySQL.
  52. UseStandardSQL bool
  53. // If non-nil, the table is partitioned by time.
  54. TimePartitioning *TimePartitioning
  55. // The time when this table expires. If not set, the table will persist
  56. // indefinitely. Expired tables will be deleted and their storage reclaimed.
  57. ExpirationTime time.Time
  58. // User-provided labels.
  59. Labels map[string]string
  60. // Information about a table stored outside of BigQuery.
  61. ExternalDataConfig *ExternalDataConfig
  62. // Custom encryption configuration (e.g., Cloud KMS keys).
  63. EncryptionConfig *EncryptionConfig
  64. // All the fields below are read-only.
  65. FullID string // An opaque ID uniquely identifying the table.
  66. Type TableType
  67. CreationTime time.Time
  68. LastModifiedTime time.Time
  69. // The size of the table in bytes.
  70. // This does not include data that is being buffered during a streaming insert.
  71. NumBytes int64
  72. // The number of rows of data in this table.
  73. // This does not include data that is being buffered during a streaming insert.
  74. NumRows uint64
  75. // Contains information regarding this table's streaming buffer, if one is
  76. // present. This field will be nil if the table is not being streamed to or if
  77. // there is no data in the streaming buffer.
  78. StreamingBuffer *StreamingBuffer
  79. // ETag is the ETag obtained when reading metadata. Pass it to Table.Update to
  80. // ensure that the metadata hasn't changed since it was read.
  81. ETag string
  82. }
  83. // TableCreateDisposition specifies the circumstances under which destination table will be created.
  84. // Default is CreateIfNeeded.
  85. type TableCreateDisposition string
  86. const (
  87. // CreateIfNeeded will create the table if it does not already exist.
  88. // Tables are created atomically on successful completion of a job.
  89. CreateIfNeeded TableCreateDisposition = "CREATE_IF_NEEDED"
  90. // CreateNever ensures the table must already exist and will not be
  91. // automatically created.
  92. CreateNever TableCreateDisposition = "CREATE_NEVER"
  93. )
  94. // TableWriteDisposition specifies how existing data in a destination table is treated.
  95. // Default is WriteAppend.
  96. type TableWriteDisposition string
  97. const (
  98. // WriteAppend will append to any existing data in the destination table.
  99. // Data is appended atomically on successful completion of a job.
  100. WriteAppend TableWriteDisposition = "WRITE_APPEND"
  101. // WriteTruncate overrides the existing data in the destination table.
  102. // Data is overwritten atomically on successful completion of a job.
  103. WriteTruncate TableWriteDisposition = "WRITE_TRUNCATE"
  104. // WriteEmpty fails writes if the destination table already contains data.
  105. WriteEmpty TableWriteDisposition = "WRITE_EMPTY"
  106. )
  107. // TableType is the type of table.
  108. type TableType string
  109. const (
  110. RegularTable TableType = "TABLE"
  111. ViewTable TableType = "VIEW"
  112. ExternalTable TableType = "EXTERNAL"
  113. )
  114. // TimePartitioning describes the time-based date partitioning on a table.
  115. // For more information see: https://cloud.google.com/bigquery/docs/creating-partitioned-tables.
  116. type TimePartitioning struct {
  117. // The amount of time to keep the storage for a partition.
  118. // If the duration is empty (0), the data in the partitions do not expire.
  119. Expiration time.Duration
  120. // If empty, the table is partitioned by pseudo column '_PARTITIONTIME'; if set, the
  121. // table is partitioned by this field. The field must be a top-level TIMESTAMP or
  122. // DATE field. Its mode must be NULLABLE or REQUIRED.
  123. Field string
  124. }
  125. func (p *TimePartitioning) toBQ() *bq.TimePartitioning {
  126. if p == nil {
  127. return nil
  128. }
  129. return &bq.TimePartitioning{
  130. Type: "DAY",
  131. ExpirationMs: int64(p.Expiration / time.Millisecond),
  132. Field: p.Field,
  133. }
  134. }
  135. func bqToTimePartitioning(q *bq.TimePartitioning) *TimePartitioning {
  136. if q == nil {
  137. return nil
  138. }
  139. return &TimePartitioning{
  140. Expiration: time.Duration(q.ExpirationMs) * time.Millisecond,
  141. Field: q.Field,
  142. }
  143. }
  144. // EncryptionConfig configures customer-managed encryption on tables.
  145. type EncryptionConfig struct {
  146. // Describes the Cloud KMS encryption key that will be used to protect
  147. // destination BigQuery table. The BigQuery Service Account associated with your
  148. // project requires access to this encryption key.
  149. KMSKeyName string
  150. }
  151. func (e *EncryptionConfig) toBQ() *bq.EncryptionConfiguration {
  152. if e == nil {
  153. return nil
  154. }
  155. return &bq.EncryptionConfiguration{
  156. KmsKeyName: e.KMSKeyName,
  157. }
  158. }
  159. func bqToEncryptionConfig(q *bq.EncryptionConfiguration) *EncryptionConfig {
  160. if q == nil {
  161. return nil
  162. }
  163. return &EncryptionConfig{
  164. KMSKeyName: q.KmsKeyName,
  165. }
  166. }
  167. // StreamingBuffer holds information about the streaming buffer.
  168. type StreamingBuffer struct {
  169. // A lower-bound estimate of the number of bytes currently in the streaming
  170. // buffer.
  171. EstimatedBytes uint64
  172. // A lower-bound estimate of the number of rows currently in the streaming
  173. // buffer.
  174. EstimatedRows uint64
  175. // The time of the oldest entry in the streaming buffer.
  176. OldestEntryTime time.Time
  177. }
  178. func (t *Table) toBQ() *bq.TableReference {
  179. return &bq.TableReference{
  180. ProjectId: t.ProjectID,
  181. DatasetId: t.DatasetID,
  182. TableId: t.TableID,
  183. }
  184. }
  185. // FullyQualifiedName returns the ID of the table in projectID:datasetID.tableID format.
  186. func (t *Table) FullyQualifiedName() string {
  187. return fmt.Sprintf("%s:%s.%s", t.ProjectID, t.DatasetID, t.TableID)
  188. }
  189. // implicitTable reports whether Table is an empty placeholder, which signifies that a new table should be created with an auto-generated Table ID.
  190. func (t *Table) implicitTable() bool {
  191. return t.ProjectID == "" && t.DatasetID == "" && t.TableID == ""
  192. }
  193. // Create creates a table in the BigQuery service.
  194. // Pass in a TableMetadata value to configure the table.
  195. // If tm.View.Query is non-empty, the created table will be of type VIEW.
  196. // Expiration can only be set during table creation.
  197. // After table creation, a view can be modified only if its table was initially created
  198. // with a view.
  199. func (t *Table) Create(ctx context.Context, tm *TableMetadata) (err error) {
  200. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Create")
  201. defer func() { trace.EndSpan(ctx, err) }()
  202. table, err := tm.toBQ()
  203. if err != nil {
  204. return err
  205. }
  206. table.TableReference = &bq.TableReference{
  207. ProjectId: t.ProjectID,
  208. DatasetId: t.DatasetID,
  209. TableId: t.TableID,
  210. }
  211. req := t.c.bqs.Tables.Insert(t.ProjectID, t.DatasetID, table).Context(ctx)
  212. setClientHeader(req.Header())
  213. _, err = req.Do()
  214. return err
  215. }
  216. func (tm *TableMetadata) toBQ() (*bq.Table, error) {
  217. t := &bq.Table{}
  218. if tm == nil {
  219. return t, nil
  220. }
  221. if tm.Schema != nil && tm.ViewQuery != "" {
  222. return nil, errors.New("bigquery: provide Schema or ViewQuery, not both")
  223. }
  224. t.FriendlyName = tm.Name
  225. t.Description = tm.Description
  226. t.Labels = tm.Labels
  227. if tm.Schema != nil {
  228. t.Schema = tm.Schema.toBQ()
  229. }
  230. if tm.ViewQuery != "" {
  231. if tm.UseStandardSQL && tm.UseLegacySQL {
  232. return nil, errors.New("bigquery: cannot provide both UseStandardSQL and UseLegacySQL")
  233. }
  234. t.View = &bq.ViewDefinition{Query: tm.ViewQuery}
  235. if tm.UseLegacySQL {
  236. t.View.UseLegacySql = true
  237. } else {
  238. t.View.UseLegacySql = false
  239. t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql")
  240. }
  241. } else if tm.UseLegacySQL || tm.UseStandardSQL {
  242. return nil, errors.New("bigquery: UseLegacy/StandardSQL requires ViewQuery")
  243. }
  244. t.TimePartitioning = tm.TimePartitioning.toBQ()
  245. if !tm.ExpirationTime.IsZero() {
  246. t.ExpirationTime = tm.ExpirationTime.UnixNano() / 1e6
  247. }
  248. if tm.ExternalDataConfig != nil {
  249. edc := tm.ExternalDataConfig.toBQ()
  250. t.ExternalDataConfiguration = &edc
  251. }
  252. t.EncryptionConfiguration = tm.EncryptionConfig.toBQ()
  253. if tm.FullID != "" {
  254. return nil, errors.New("cannot set FullID on create")
  255. }
  256. if tm.Type != "" {
  257. return nil, errors.New("cannot set Type on create")
  258. }
  259. if !tm.CreationTime.IsZero() {
  260. return nil, errors.New("cannot set CreationTime on create")
  261. }
  262. if !tm.LastModifiedTime.IsZero() {
  263. return nil, errors.New("cannot set LastModifiedTime on create")
  264. }
  265. if tm.NumBytes != 0 {
  266. return nil, errors.New("cannot set NumBytes on create")
  267. }
  268. if tm.NumRows != 0 {
  269. return nil, errors.New("cannot set NumRows on create")
  270. }
  271. if tm.StreamingBuffer != nil {
  272. return nil, errors.New("cannot set StreamingBuffer on create")
  273. }
  274. if tm.ETag != "" {
  275. return nil, errors.New("cannot set ETag on create")
  276. }
  277. return t, nil
  278. }
  279. // Metadata fetches the metadata for the table.
  280. func (t *Table) Metadata(ctx context.Context) (md *TableMetadata, err error) {
  281. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Metadata")
  282. defer func() { trace.EndSpan(ctx, err) }()
  283. req := t.c.bqs.Tables.Get(t.ProjectID, t.DatasetID, t.TableID).Context(ctx)
  284. setClientHeader(req.Header())
  285. var table *bq.Table
  286. err = runWithRetry(ctx, func() (err error) {
  287. table, err = req.Do()
  288. return err
  289. })
  290. if err != nil {
  291. return nil, err
  292. }
  293. return bqToTableMetadata(table)
  294. }
  295. func bqToTableMetadata(t *bq.Table) (*TableMetadata, error) {
  296. md := &TableMetadata{
  297. Description: t.Description,
  298. Name: t.FriendlyName,
  299. Type: TableType(t.Type),
  300. FullID: t.Id,
  301. Labels: t.Labels,
  302. NumBytes: t.NumBytes,
  303. NumRows: t.NumRows,
  304. ExpirationTime: unixMillisToTime(t.ExpirationTime),
  305. CreationTime: unixMillisToTime(t.CreationTime),
  306. LastModifiedTime: unixMillisToTime(int64(t.LastModifiedTime)),
  307. ETag: t.Etag,
  308. EncryptionConfig: bqToEncryptionConfig(t.EncryptionConfiguration),
  309. }
  310. if t.Schema != nil {
  311. md.Schema = bqToSchema(t.Schema)
  312. }
  313. if t.View != nil {
  314. md.ViewQuery = t.View.Query
  315. md.UseLegacySQL = t.View.UseLegacySql
  316. }
  317. md.TimePartitioning = bqToTimePartitioning(t.TimePartitioning)
  318. if t.StreamingBuffer != nil {
  319. md.StreamingBuffer = &StreamingBuffer{
  320. EstimatedBytes: t.StreamingBuffer.EstimatedBytes,
  321. EstimatedRows: t.StreamingBuffer.EstimatedRows,
  322. OldestEntryTime: unixMillisToTime(int64(t.StreamingBuffer.OldestEntryTime)),
  323. }
  324. }
  325. if t.ExternalDataConfiguration != nil {
  326. edc, err := bqToExternalDataConfig(t.ExternalDataConfiguration)
  327. if err != nil {
  328. return nil, err
  329. }
  330. md.ExternalDataConfig = edc
  331. }
  332. return md, nil
  333. }
  334. // Delete deletes the table.
  335. func (t *Table) Delete(ctx context.Context) (err error) {
  336. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Delete")
  337. defer func() { trace.EndSpan(ctx, err) }()
  338. req := t.c.bqs.Tables.Delete(t.ProjectID, t.DatasetID, t.TableID).Context(ctx)
  339. setClientHeader(req.Header())
  340. return req.Do()
  341. }
  342. // Read fetches the contents of the table.
  343. func (t *Table) Read(ctx context.Context) *RowIterator {
  344. return t.read(ctx, fetchPage)
  345. }
  346. func (t *Table) read(ctx context.Context, pf pageFetcher) *RowIterator {
  347. return newRowIterator(ctx, t, pf)
  348. }
  349. // Update modifies specific Table metadata fields.
  350. func (t *Table) Update(ctx context.Context, tm TableMetadataToUpdate, etag string) (md *TableMetadata, err error) {
  351. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Table.Update")
  352. defer func() { trace.EndSpan(ctx, err) }()
  353. bqt := tm.toBQ()
  354. call := t.c.bqs.Tables.Patch(t.ProjectID, t.DatasetID, t.TableID, bqt).Context(ctx)
  355. setClientHeader(call.Header())
  356. if etag != "" {
  357. call.Header().Set("If-Match", etag)
  358. }
  359. var res *bq.Table
  360. if err := runWithRetry(ctx, func() (err error) {
  361. res, err = call.Do()
  362. return err
  363. }); err != nil {
  364. return nil, err
  365. }
  366. return bqToTableMetadata(res)
  367. }
  368. func (tm *TableMetadataToUpdate) toBQ() *bq.Table {
  369. t := &bq.Table{}
  370. forceSend := func(field string) {
  371. t.ForceSendFields = append(t.ForceSendFields, field)
  372. }
  373. if tm.Description != nil {
  374. t.Description = optional.ToString(tm.Description)
  375. forceSend("Description")
  376. }
  377. if tm.Name != nil {
  378. t.FriendlyName = optional.ToString(tm.Name)
  379. forceSend("FriendlyName")
  380. }
  381. if tm.Schema != nil {
  382. t.Schema = tm.Schema.toBQ()
  383. forceSend("Schema")
  384. }
  385. if !tm.ExpirationTime.IsZero() {
  386. t.ExpirationTime = tm.ExpirationTime.UnixNano() / 1e6
  387. forceSend("ExpirationTime")
  388. }
  389. if tm.ViewQuery != nil {
  390. t.View = &bq.ViewDefinition{
  391. Query: optional.ToString(tm.ViewQuery),
  392. ForceSendFields: []string{"Query"},
  393. }
  394. }
  395. if tm.UseLegacySQL != nil {
  396. if t.View == nil {
  397. t.View = &bq.ViewDefinition{}
  398. }
  399. t.View.UseLegacySql = optional.ToBool(tm.UseLegacySQL)
  400. t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql")
  401. }
  402. labels, forces, nulls := tm.update()
  403. t.Labels = labels
  404. t.ForceSendFields = append(t.ForceSendFields, forces...)
  405. t.NullFields = append(t.NullFields, nulls...)
  406. return t
  407. }
  408. // TableMetadataToUpdate is used when updating a table's metadata.
  409. // Only non-nil fields will be updated.
  410. type TableMetadataToUpdate struct {
  411. // The user-friendly description of this table.
  412. Description optional.String
  413. // The user-friendly name for this table.
  414. Name optional.String
  415. // The table's schema.
  416. // When updating a schema, you can add columns but not remove them.
  417. Schema Schema
  418. // The time when this table expires.
  419. ExpirationTime time.Time
  420. // The query to use for a view.
  421. ViewQuery optional.String
  422. // Use Legacy SQL for the view query.
  423. UseLegacySQL optional.Bool
  424. labelUpdater
  425. }
  426. // labelUpdater contains common code for updating labels.
  427. type labelUpdater struct {
  428. setLabels map[string]string
  429. deleteLabels map[string]bool
  430. }
  431. // SetLabel causes a label to be added or modified on a call to Update.
  432. func (u *labelUpdater) SetLabel(name, value string) {
  433. if u.setLabels == nil {
  434. u.setLabels = map[string]string{}
  435. }
  436. u.setLabels[name] = value
  437. }
  438. // DeleteLabel causes a label to be deleted on a call to Update.
  439. func (u *labelUpdater) DeleteLabel(name string) {
  440. if u.deleteLabels == nil {
  441. u.deleteLabels = map[string]bool{}
  442. }
  443. u.deleteLabels[name] = true
  444. }
  445. func (u *labelUpdater) update() (labels map[string]string, forces, nulls []string) {
  446. if u.setLabels == nil && u.deleteLabels == nil {
  447. return nil, nil, nil
  448. }
  449. labels = map[string]string{}
  450. for k, v := range u.setLabels {
  451. labels[k] = v
  452. }
  453. if len(labels) == 0 && len(u.deleteLabels) > 0 {
  454. forces = []string{"Labels"}
  455. }
  456. for l := range u.deleteLabels {
  457. nulls = append(nulls, "Labels."+l)
  458. }
  459. return labels, forces, nulls
  460. }