You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

537 lines
16 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "context"
  17. "errors"
  18. "fmt"
  19. "time"
  20. "cloud.google.com/go/internal/optional"
  21. "cloud.google.com/go/internal/trace"
  22. bq "google.golang.org/api/bigquery/v2"
  23. "google.golang.org/api/iterator"
  24. )
  25. // Dataset is a reference to a BigQuery dataset.
  26. type Dataset struct {
  27. ProjectID string
  28. DatasetID string
  29. c *Client
  30. }
  31. // DatasetMetadata contains information about a BigQuery dataset.
  32. type DatasetMetadata struct {
  33. // These fields can be set when creating a dataset.
  34. Name string // The user-friendly name for this dataset.
  35. Description string // The user-friendly description of this dataset.
  36. Location string // The geo location of the dataset.
  37. DefaultTableExpiration time.Duration // The default expiration time for new tables.
  38. Labels map[string]string // User-provided labels.
  39. Access []*AccessEntry // Access permissions.
  40. // These fields are read-only.
  41. CreationTime time.Time
  42. LastModifiedTime time.Time // When the dataset or any of its tables were modified.
  43. FullID string // The full dataset ID in the form projectID:datasetID.
  44. // ETag is the ETag obtained when reading metadata. Pass it to Dataset.Update to
  45. // ensure that the metadata hasn't changed since it was read.
  46. ETag string
  47. }
  48. // DatasetMetadataToUpdate is used when updating a dataset's metadata.
  49. // Only non-nil fields will be updated.
  50. type DatasetMetadataToUpdate struct {
  51. Description optional.String // The user-friendly description of this table.
  52. Name optional.String // The user-friendly name for this dataset.
  53. // DefaultTableExpiration is the default expiration time for new tables.
  54. // If set to time.Duration(0), new tables never expire.
  55. DefaultTableExpiration optional.Duration
  56. // The entire access list. It is not possible to replace individual entries.
  57. Access []*AccessEntry
  58. labelUpdater
  59. }
  60. // Dataset creates a handle to a BigQuery dataset in the client's project.
  61. func (c *Client) Dataset(id string) *Dataset {
  62. return c.DatasetInProject(c.projectID, id)
  63. }
  64. // DatasetInProject creates a handle to a BigQuery dataset in the specified project.
  65. func (c *Client) DatasetInProject(projectID, datasetID string) *Dataset {
  66. return &Dataset{
  67. ProjectID: projectID,
  68. DatasetID: datasetID,
  69. c: c,
  70. }
  71. }
  72. // Create creates a dataset in the BigQuery service. An error will be returned if the
  73. // dataset already exists. Pass in a DatasetMetadata value to configure the dataset.
  74. func (d *Dataset) Create(ctx context.Context, md *DatasetMetadata) (err error) {
  75. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Dataset.Create")
  76. defer func() { trace.EndSpan(ctx, err) }()
  77. ds, err := md.toBQ()
  78. if err != nil {
  79. return err
  80. }
  81. ds.DatasetReference = &bq.DatasetReference{DatasetId: d.DatasetID}
  82. // Use Client.Location as a default.
  83. if ds.Location == "" {
  84. ds.Location = d.c.Location
  85. }
  86. call := d.c.bqs.Datasets.Insert(d.ProjectID, ds).Context(ctx)
  87. setClientHeader(call.Header())
  88. _, err = call.Do()
  89. return err
  90. }
  91. func (dm *DatasetMetadata) toBQ() (*bq.Dataset, error) {
  92. ds := &bq.Dataset{}
  93. if dm == nil {
  94. return ds, nil
  95. }
  96. ds.FriendlyName = dm.Name
  97. ds.Description = dm.Description
  98. ds.Location = dm.Location
  99. ds.DefaultTableExpirationMs = int64(dm.DefaultTableExpiration / time.Millisecond)
  100. ds.Labels = dm.Labels
  101. var err error
  102. ds.Access, err = accessListToBQ(dm.Access)
  103. if err != nil {
  104. return nil, err
  105. }
  106. if !dm.CreationTime.IsZero() {
  107. return nil, errors.New("bigquery: Dataset.CreationTime is not writable")
  108. }
  109. if !dm.LastModifiedTime.IsZero() {
  110. return nil, errors.New("bigquery: Dataset.LastModifiedTime is not writable")
  111. }
  112. if dm.FullID != "" {
  113. return nil, errors.New("bigquery: Dataset.FullID is not writable")
  114. }
  115. if dm.ETag != "" {
  116. return nil, errors.New("bigquery: Dataset.ETag is not writable")
  117. }
  118. return ds, nil
  119. }
  120. func accessListToBQ(a []*AccessEntry) ([]*bq.DatasetAccess, error) {
  121. var q []*bq.DatasetAccess
  122. for _, e := range a {
  123. a, err := e.toBQ()
  124. if err != nil {
  125. return nil, err
  126. }
  127. q = append(q, a)
  128. }
  129. return q, nil
  130. }
  131. // Delete deletes the dataset. Delete will fail if the dataset is not empty.
  132. func (d *Dataset) Delete(ctx context.Context) (err error) {
  133. return d.deleteInternal(ctx, false)
  134. }
  135. // DeleteWithContents deletes the dataset, as well as contained resources.
  136. func (d *Dataset) DeleteWithContents(ctx context.Context) (err error) {
  137. return d.deleteInternal(ctx, true)
  138. }
  139. func (d *Dataset) deleteInternal(ctx context.Context, deleteContents bool) (err error) {
  140. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Dataset.Delete")
  141. defer func() { trace.EndSpan(ctx, err) }()
  142. call := d.c.bqs.Datasets.Delete(d.ProjectID, d.DatasetID).Context(ctx).DeleteContents(deleteContents)
  143. setClientHeader(call.Header())
  144. return call.Do()
  145. }
  146. // Metadata fetches the metadata for the dataset.
  147. func (d *Dataset) Metadata(ctx context.Context) (md *DatasetMetadata, err error) {
  148. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Dataset.Metadata")
  149. defer func() { trace.EndSpan(ctx, err) }()
  150. call := d.c.bqs.Datasets.Get(d.ProjectID, d.DatasetID).Context(ctx)
  151. setClientHeader(call.Header())
  152. var ds *bq.Dataset
  153. if err := runWithRetry(ctx, func() (err error) {
  154. ds, err = call.Do()
  155. return err
  156. }); err != nil {
  157. return nil, err
  158. }
  159. return bqToDatasetMetadata(ds)
  160. }
  161. func bqToDatasetMetadata(d *bq.Dataset) (*DatasetMetadata, error) {
  162. dm := &DatasetMetadata{
  163. CreationTime: unixMillisToTime(d.CreationTime),
  164. LastModifiedTime: unixMillisToTime(d.LastModifiedTime),
  165. DefaultTableExpiration: time.Duration(d.DefaultTableExpirationMs) * time.Millisecond,
  166. Description: d.Description,
  167. Name: d.FriendlyName,
  168. FullID: d.Id,
  169. Location: d.Location,
  170. Labels: d.Labels,
  171. ETag: d.Etag,
  172. }
  173. for _, a := range d.Access {
  174. e, err := bqToAccessEntry(a, nil)
  175. if err != nil {
  176. return nil, err
  177. }
  178. dm.Access = append(dm.Access, e)
  179. }
  180. return dm, nil
  181. }
  182. // Update modifies specific Dataset metadata fields.
  183. // To perform a read-modify-write that protects against intervening reads,
  184. // set the etag argument to the DatasetMetadata.ETag field from the read.
  185. // Pass the empty string for etag for a "blind write" that will always succeed.
  186. func (d *Dataset) Update(ctx context.Context, dm DatasetMetadataToUpdate, etag string) (md *DatasetMetadata, err error) {
  187. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Dataset.Update")
  188. defer func() { trace.EndSpan(ctx, err) }()
  189. ds, err := dm.toBQ()
  190. if err != nil {
  191. return nil, err
  192. }
  193. call := d.c.bqs.Datasets.Patch(d.ProjectID, d.DatasetID, ds).Context(ctx)
  194. setClientHeader(call.Header())
  195. if etag != "" {
  196. call.Header().Set("If-Match", etag)
  197. }
  198. var ds2 *bq.Dataset
  199. if err := runWithRetry(ctx, func() (err error) {
  200. ds2, err = call.Do()
  201. return err
  202. }); err != nil {
  203. return nil, err
  204. }
  205. return bqToDatasetMetadata(ds2)
  206. }
  207. func (dm *DatasetMetadataToUpdate) toBQ() (*bq.Dataset, error) {
  208. ds := &bq.Dataset{}
  209. forceSend := func(field string) {
  210. ds.ForceSendFields = append(ds.ForceSendFields, field)
  211. }
  212. if dm.Description != nil {
  213. ds.Description = optional.ToString(dm.Description)
  214. forceSend("Description")
  215. }
  216. if dm.Name != nil {
  217. ds.FriendlyName = optional.ToString(dm.Name)
  218. forceSend("FriendlyName")
  219. }
  220. if dm.DefaultTableExpiration != nil {
  221. dur := optional.ToDuration(dm.DefaultTableExpiration)
  222. if dur == 0 {
  223. // Send a null to delete the field.
  224. ds.NullFields = append(ds.NullFields, "DefaultTableExpirationMs")
  225. } else {
  226. ds.DefaultTableExpirationMs = int64(dur / time.Millisecond)
  227. }
  228. }
  229. if dm.Access != nil {
  230. var err error
  231. ds.Access, err = accessListToBQ(dm.Access)
  232. if err != nil {
  233. return nil, err
  234. }
  235. if len(ds.Access) == 0 {
  236. ds.NullFields = append(ds.NullFields, "Access")
  237. }
  238. }
  239. labels, forces, nulls := dm.update()
  240. ds.Labels = labels
  241. ds.ForceSendFields = append(ds.ForceSendFields, forces...)
  242. ds.NullFields = append(ds.NullFields, nulls...)
  243. return ds, nil
  244. }
  245. // Table creates a handle to a BigQuery table in the dataset.
  246. // To determine if a table exists, call Table.Metadata.
  247. // If the table does not already exist, use Table.Create to create it.
  248. func (d *Dataset) Table(tableID string) *Table {
  249. return &Table{ProjectID: d.ProjectID, DatasetID: d.DatasetID, TableID: tableID, c: d.c}
  250. }
  251. // Tables returns an iterator over the tables in the Dataset.
  252. func (d *Dataset) Tables(ctx context.Context) *TableIterator {
  253. it := &TableIterator{
  254. ctx: ctx,
  255. dataset: d,
  256. }
  257. it.pageInfo, it.nextFunc = iterator.NewPageInfo(
  258. it.fetch,
  259. func() int { return len(it.tables) },
  260. func() interface{} { b := it.tables; it.tables = nil; return b })
  261. return it
  262. }
  263. // A TableIterator is an iterator over Tables.
  264. type TableIterator struct {
  265. ctx context.Context
  266. dataset *Dataset
  267. tables []*Table
  268. pageInfo *iterator.PageInfo
  269. nextFunc func() error
  270. }
  271. // Next returns the next result. Its second return value is Done if there are
  272. // no more results. Once Next returns Done, all subsequent calls will return
  273. // Done.
  274. func (it *TableIterator) Next() (*Table, error) {
  275. if err := it.nextFunc(); err != nil {
  276. return nil, err
  277. }
  278. t := it.tables[0]
  279. it.tables = it.tables[1:]
  280. return t, nil
  281. }
  282. // PageInfo supports pagination. See the google.golang.org/api/iterator package for details.
  283. func (it *TableIterator) PageInfo() *iterator.PageInfo { return it.pageInfo }
  284. // for testing
  285. var listTables = func(it *TableIterator, pageSize int, pageToken string) (*bq.TableList, error) {
  286. call := it.dataset.c.bqs.Tables.List(it.dataset.ProjectID, it.dataset.DatasetID).
  287. PageToken(pageToken).
  288. Context(it.ctx)
  289. setClientHeader(call.Header())
  290. if pageSize > 0 {
  291. call.MaxResults(int64(pageSize))
  292. }
  293. var res *bq.TableList
  294. err := runWithRetry(it.ctx, func() (err error) {
  295. res, err = call.Do()
  296. return err
  297. })
  298. return res, err
  299. }
  300. func (it *TableIterator) fetch(pageSize int, pageToken string) (string, error) {
  301. res, err := listTables(it, pageSize, pageToken)
  302. if err != nil {
  303. return "", err
  304. }
  305. for _, t := range res.Tables {
  306. it.tables = append(it.tables, bqToTable(t.TableReference, it.dataset.c))
  307. }
  308. return res.NextPageToken, nil
  309. }
  310. func bqToTable(tr *bq.TableReference, c *Client) *Table {
  311. if tr == nil {
  312. return nil
  313. }
  314. return &Table{
  315. ProjectID: tr.ProjectId,
  316. DatasetID: tr.DatasetId,
  317. TableID: tr.TableId,
  318. c: c,
  319. }
  320. }
  321. // Datasets returns an iterator over the datasets in a project.
  322. // The Client's project is used by default, but that can be
  323. // changed by setting ProjectID on the returned iterator before calling Next.
  324. func (c *Client) Datasets(ctx context.Context) *DatasetIterator {
  325. return c.DatasetsInProject(ctx, c.projectID)
  326. }
  327. // DatasetsInProject returns an iterator over the datasets in the provided project.
  328. //
  329. // Deprecated: call Client.Datasets, then set ProjectID on the returned iterator.
  330. func (c *Client) DatasetsInProject(ctx context.Context, projectID string) *DatasetIterator {
  331. it := &DatasetIterator{
  332. ctx: ctx,
  333. c: c,
  334. ProjectID: projectID,
  335. }
  336. it.pageInfo, it.nextFunc = iterator.NewPageInfo(
  337. it.fetch,
  338. func() int { return len(it.items) },
  339. func() interface{} { b := it.items; it.items = nil; return b })
  340. return it
  341. }
  342. // DatasetIterator iterates over the datasets in a project.
  343. type DatasetIterator struct {
  344. // ListHidden causes hidden datasets to be listed when set to true.
  345. // Set before the first call to Next.
  346. ListHidden bool
  347. // Filter restricts the datasets returned by label. The filter syntax is described in
  348. // https://cloud.google.com/bigquery/docs/labeling-datasets#filtering_datasets_using_labels
  349. // Set before the first call to Next.
  350. Filter string
  351. // The project ID of the listed datasets.
  352. // Set before the first call to Next.
  353. ProjectID string
  354. ctx context.Context
  355. c *Client
  356. pageInfo *iterator.PageInfo
  357. nextFunc func() error
  358. items []*Dataset
  359. }
  360. // PageInfo supports pagination. See the google.golang.org/api/iterator package for details.
  361. func (it *DatasetIterator) PageInfo() *iterator.PageInfo { return it.pageInfo }
  362. // Next returns the next Dataset. Its second return value is iterator.Done if
  363. // there are no more results. Once Next returns Done, all subsequent calls will
  364. // return Done.
  365. func (it *DatasetIterator) Next() (*Dataset, error) {
  366. if err := it.nextFunc(); err != nil {
  367. return nil, err
  368. }
  369. item := it.items[0]
  370. it.items = it.items[1:]
  371. return item, nil
  372. }
  373. // for testing
  374. var listDatasets = func(it *DatasetIterator, pageSize int, pageToken string) (*bq.DatasetList, error) {
  375. call := it.c.bqs.Datasets.List(it.ProjectID).
  376. Context(it.ctx).
  377. PageToken(pageToken).
  378. All(it.ListHidden)
  379. setClientHeader(call.Header())
  380. if pageSize > 0 {
  381. call.MaxResults(int64(pageSize))
  382. }
  383. if it.Filter != "" {
  384. call.Filter(it.Filter)
  385. }
  386. var res *bq.DatasetList
  387. err := runWithRetry(it.ctx, func() (err error) {
  388. res, err = call.Do()
  389. return err
  390. })
  391. return res, err
  392. }
  393. func (it *DatasetIterator) fetch(pageSize int, pageToken string) (string, error) {
  394. res, err := listDatasets(it, pageSize, pageToken)
  395. if err != nil {
  396. return "", err
  397. }
  398. for _, d := range res.Datasets {
  399. it.items = append(it.items, &Dataset{
  400. ProjectID: d.DatasetReference.ProjectId,
  401. DatasetID: d.DatasetReference.DatasetId,
  402. c: it.c,
  403. })
  404. }
  405. return res.NextPageToken, nil
  406. }
  407. // An AccessEntry describes the permissions that an entity has on a dataset.
  408. type AccessEntry struct {
  409. Role AccessRole // The role of the entity
  410. EntityType EntityType // The type of entity
  411. Entity string // The entity (individual or group) granted access
  412. View *Table // The view granted access (EntityType must be ViewEntity)
  413. }
  414. // AccessRole is the level of access to grant to a dataset.
  415. type AccessRole string
  416. const (
  417. // OwnerRole is the OWNER AccessRole.
  418. OwnerRole AccessRole = "OWNER"
  419. // ReaderRole is the READER AccessRole.
  420. ReaderRole AccessRole = "READER"
  421. // WriterRole is the WRITER AccessRole.
  422. WriterRole AccessRole = "WRITER"
  423. )
  424. // EntityType is the type of entity in an AccessEntry.
  425. type EntityType int
  426. const (
  427. // DomainEntity is a domain (e.g. "example.com").
  428. DomainEntity EntityType = iota + 1
  429. // GroupEmailEntity is an email address of a Google Group.
  430. GroupEmailEntity
  431. // UserEmailEntity is an email address of an individual user.
  432. UserEmailEntity
  433. // SpecialGroupEntity is a special group: one of projectOwners, projectReaders, projectWriters or
  434. // allAuthenticatedUsers.
  435. SpecialGroupEntity
  436. // ViewEntity is a BigQuery view.
  437. ViewEntity
  438. )
  439. func (e *AccessEntry) toBQ() (*bq.DatasetAccess, error) {
  440. q := &bq.DatasetAccess{Role: string(e.Role)}
  441. switch e.EntityType {
  442. case DomainEntity:
  443. q.Domain = e.Entity
  444. case GroupEmailEntity:
  445. q.GroupByEmail = e.Entity
  446. case UserEmailEntity:
  447. q.UserByEmail = e.Entity
  448. case SpecialGroupEntity:
  449. q.SpecialGroup = e.Entity
  450. case ViewEntity:
  451. q.View = e.View.toBQ()
  452. default:
  453. return nil, fmt.Errorf("bigquery: unknown entity type %d", e.EntityType)
  454. }
  455. return q, nil
  456. }
  457. func bqToAccessEntry(q *bq.DatasetAccess, c *Client) (*AccessEntry, error) {
  458. e := &AccessEntry{Role: AccessRole(q.Role)}
  459. switch {
  460. case q.Domain != "":
  461. e.Entity = q.Domain
  462. e.EntityType = DomainEntity
  463. case q.GroupByEmail != "":
  464. e.Entity = q.GroupByEmail
  465. e.EntityType = GroupEmailEntity
  466. case q.UserByEmail != "":
  467. e.Entity = q.UserByEmail
  468. e.EntityType = UserEmailEntity
  469. case q.SpecialGroup != "":
  470. e.Entity = q.SpecialGroup
  471. e.EntityType = SpecialGroupEntity
  472. case q.View != nil:
  473. e.View = c.DatasetInProject(q.View.ProjectId, q.View.DatasetId).Table(q.View.TableId)
  474. e.EntityType = ViewEntity
  475. default:
  476. return nil, errors.New("bigquery: invalid access value")
  477. }
  478. return e, nil
  479. }