You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

329 lines
12 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "context"
  17. "errors"
  18. "cloud.google.com/go/internal/trace"
  19. bq "google.golang.org/api/bigquery/v2"
  20. )
  21. // QueryConfig holds the configuration for a query job.
  22. type QueryConfig struct {
  23. // Dst is the table into which the results of the query will be written.
  24. // If this field is nil, a temporary table will be created.
  25. Dst *Table
  26. // The query to execute. See https://cloud.google.com/bigquery/query-reference for details.
  27. Q string
  28. // DefaultProjectID and DefaultDatasetID specify the dataset to use for unqualified table names in the query.
  29. // If DefaultProjectID is set, DefaultDatasetID must also be set.
  30. DefaultProjectID string
  31. DefaultDatasetID string
  32. // TableDefinitions describes data sources outside of BigQuery.
  33. // The map keys may be used as table names in the query string.
  34. //
  35. // When a QueryConfig is returned from Job.Config, the map values
  36. // are always of type *ExternalDataConfig.
  37. TableDefinitions map[string]ExternalData
  38. // CreateDisposition specifies the circumstances under which the destination table will be created.
  39. // The default is CreateIfNeeded.
  40. CreateDisposition TableCreateDisposition
  41. // WriteDisposition specifies how existing data in the destination table is treated.
  42. // The default is WriteEmpty.
  43. WriteDisposition TableWriteDisposition
  44. // DisableQueryCache prevents results being fetched from the query cache.
  45. // If this field is false, results are fetched from the cache if they are available.
  46. // The query cache is a best-effort cache that is flushed whenever tables in the query are modified.
  47. // Cached results are only available when TableID is unspecified in the query's destination Table.
  48. // For more information, see https://cloud.google.com/bigquery/querying-data#querycaching
  49. DisableQueryCache bool
  50. // DisableFlattenedResults prevents results being flattened.
  51. // If this field is false, results from nested and repeated fields are flattened.
  52. // DisableFlattenedResults implies AllowLargeResults
  53. // For more information, see https://cloud.google.com/bigquery/docs/data#nested
  54. DisableFlattenedResults bool
  55. // AllowLargeResults allows the query to produce arbitrarily large result tables.
  56. // The destination must be a table.
  57. // When using this option, queries will take longer to execute, even if the result set is small.
  58. // For additional limitations, see https://cloud.google.com/bigquery/querying-data#largequeryresults
  59. AllowLargeResults bool
  60. // Priority specifies the priority with which to schedule the query.
  61. // The default priority is InteractivePriority.
  62. // For more information, see https://cloud.google.com/bigquery/querying-data#batchqueries
  63. Priority QueryPriority
  64. // MaxBillingTier sets the maximum billing tier for a Query.
  65. // Queries that have resource usage beyond this tier will fail (without
  66. // incurring a charge). If this field is zero, the project default will be used.
  67. MaxBillingTier int
  68. // MaxBytesBilled limits the number of bytes billed for
  69. // this job. Queries that would exceed this limit will fail (without incurring
  70. // a charge).
  71. // If this field is less than 1, the project default will be
  72. // used.
  73. MaxBytesBilled int64
  74. // UseStandardSQL causes the query to use standard SQL. The default.
  75. // Deprecated: use UseLegacySQL.
  76. UseStandardSQL bool
  77. // UseLegacySQL causes the query to use legacy SQL.
  78. UseLegacySQL bool
  79. // Parameters is a list of query parameters. The presence of parameters
  80. // implies the use of standard SQL.
  81. // If the query uses positional syntax ("?"), then no parameter may have a name.
  82. // If the query uses named syntax ("@p"), then all parameters must have names.
  83. // It is illegal to mix positional and named syntax.
  84. Parameters []QueryParameter
  85. // TimePartitioning specifies time-based partitioning
  86. // for the destination table.
  87. TimePartitioning *TimePartitioning
  88. // Clustering specifies the data clustering configuration for the destination table.
  89. Clustering *Clustering
  90. // The labels associated with this job.
  91. Labels map[string]string
  92. // If true, don't actually run this job. A valid query will return a mostly
  93. // empty response with some processing statistics, while an invalid query will
  94. // return the same error it would if it wasn't a dry run.
  95. //
  96. // Query.Read will fail with dry-run queries. Call Query.Run instead, and then
  97. // call LastStatus on the returned job to get statistics. Calling Status on a
  98. // dry-run job will fail.
  99. DryRun bool
  100. // Custom encryption configuration (e.g., Cloud KMS keys).
  101. DestinationEncryptionConfig *EncryptionConfig
  102. // Allows the schema of the destination table to be updated as a side effect of
  103. // the query job.
  104. SchemaUpdateOptions []string
  105. }
  106. func (qc *QueryConfig) toBQ() (*bq.JobConfiguration, error) {
  107. qconf := &bq.JobConfigurationQuery{
  108. Query: qc.Q,
  109. CreateDisposition: string(qc.CreateDisposition),
  110. WriteDisposition: string(qc.WriteDisposition),
  111. AllowLargeResults: qc.AllowLargeResults,
  112. Priority: string(qc.Priority),
  113. MaximumBytesBilled: qc.MaxBytesBilled,
  114. TimePartitioning: qc.TimePartitioning.toBQ(),
  115. Clustering: qc.Clustering.toBQ(),
  116. DestinationEncryptionConfiguration: qc.DestinationEncryptionConfig.toBQ(),
  117. SchemaUpdateOptions: qc.SchemaUpdateOptions,
  118. }
  119. if len(qc.TableDefinitions) > 0 {
  120. qconf.TableDefinitions = make(map[string]bq.ExternalDataConfiguration)
  121. }
  122. for name, data := range qc.TableDefinitions {
  123. qconf.TableDefinitions[name] = data.toBQ()
  124. }
  125. if qc.DefaultProjectID != "" || qc.DefaultDatasetID != "" {
  126. qconf.DefaultDataset = &bq.DatasetReference{
  127. DatasetId: qc.DefaultDatasetID,
  128. ProjectId: qc.DefaultProjectID,
  129. }
  130. }
  131. if tier := int64(qc.MaxBillingTier); tier > 0 {
  132. qconf.MaximumBillingTier = &tier
  133. }
  134. f := false
  135. if qc.DisableQueryCache {
  136. qconf.UseQueryCache = &f
  137. }
  138. if qc.DisableFlattenedResults {
  139. qconf.FlattenResults = &f
  140. // DisableFlattenResults implies AllowLargeResults.
  141. qconf.AllowLargeResults = true
  142. }
  143. if qc.UseStandardSQL && qc.UseLegacySQL {
  144. return nil, errors.New("bigquery: cannot provide both UseStandardSQL and UseLegacySQL")
  145. }
  146. if len(qc.Parameters) > 0 && qc.UseLegacySQL {
  147. return nil, errors.New("bigquery: cannot provide both Parameters (implying standard SQL) and UseLegacySQL")
  148. }
  149. ptrue := true
  150. pfalse := false
  151. if qc.UseLegacySQL {
  152. qconf.UseLegacySql = &ptrue
  153. } else {
  154. qconf.UseLegacySql = &pfalse
  155. }
  156. if qc.Dst != nil && !qc.Dst.implicitTable() {
  157. qconf.DestinationTable = qc.Dst.toBQ()
  158. }
  159. for _, p := range qc.Parameters {
  160. qp, err := p.toBQ()
  161. if err != nil {
  162. return nil, err
  163. }
  164. qconf.QueryParameters = append(qconf.QueryParameters, qp)
  165. }
  166. return &bq.JobConfiguration{
  167. Labels: qc.Labels,
  168. DryRun: qc.DryRun,
  169. Query: qconf,
  170. }, nil
  171. }
  172. func bqToQueryConfig(q *bq.JobConfiguration, c *Client) (*QueryConfig, error) {
  173. qq := q.Query
  174. qc := &QueryConfig{
  175. Labels: q.Labels,
  176. DryRun: q.DryRun,
  177. Q: qq.Query,
  178. CreateDisposition: TableCreateDisposition(qq.CreateDisposition),
  179. WriteDisposition: TableWriteDisposition(qq.WriteDisposition),
  180. AllowLargeResults: qq.AllowLargeResults,
  181. Priority: QueryPriority(qq.Priority),
  182. MaxBytesBilled: qq.MaximumBytesBilled,
  183. UseLegacySQL: qq.UseLegacySql == nil || *qq.UseLegacySql,
  184. TimePartitioning: bqToTimePartitioning(qq.TimePartitioning),
  185. Clustering: bqToClustering(qq.Clustering),
  186. DestinationEncryptionConfig: bqToEncryptionConfig(qq.DestinationEncryptionConfiguration),
  187. SchemaUpdateOptions: qq.SchemaUpdateOptions,
  188. }
  189. qc.UseStandardSQL = !qc.UseLegacySQL
  190. if len(qq.TableDefinitions) > 0 {
  191. qc.TableDefinitions = make(map[string]ExternalData)
  192. }
  193. for name, qedc := range qq.TableDefinitions {
  194. edc, err := bqToExternalDataConfig(&qedc)
  195. if err != nil {
  196. return nil, err
  197. }
  198. qc.TableDefinitions[name] = edc
  199. }
  200. if qq.DefaultDataset != nil {
  201. qc.DefaultProjectID = qq.DefaultDataset.ProjectId
  202. qc.DefaultDatasetID = qq.DefaultDataset.DatasetId
  203. }
  204. if qq.MaximumBillingTier != nil {
  205. qc.MaxBillingTier = int(*qq.MaximumBillingTier)
  206. }
  207. if qq.UseQueryCache != nil && !*qq.UseQueryCache {
  208. qc.DisableQueryCache = true
  209. }
  210. if qq.FlattenResults != nil && !*qq.FlattenResults {
  211. qc.DisableFlattenedResults = true
  212. }
  213. if qq.DestinationTable != nil {
  214. qc.Dst = bqToTable(qq.DestinationTable, c)
  215. }
  216. for _, qp := range qq.QueryParameters {
  217. p, err := bqToQueryParameter(qp)
  218. if err != nil {
  219. return nil, err
  220. }
  221. qc.Parameters = append(qc.Parameters, p)
  222. }
  223. return qc, nil
  224. }
  225. // QueryPriority specifies a priority with which a query is to be executed.
  226. type QueryPriority string
  227. const (
  228. // BatchPriority specifies that the query should be scheduled with the
  229. // batch priority. BigQuery queues each batch query on your behalf, and
  230. // starts the query as soon as idle resources are available, usually within
  231. // a few minutes. If BigQuery hasn't started the query within 24 hours,
  232. // BigQuery changes the job priority to interactive. Batch queries don't
  233. // count towards your concurrent rate limit, which can make it easier to
  234. // start many queries at once.
  235. //
  236. // More information can be found at https://cloud.google.com/bigquery/docs/running-queries#batchqueries.
  237. BatchPriority QueryPriority = "BATCH"
  238. // InteractivePriority specifies that the query should be scheduled with
  239. // interactive priority, which means that the query is executed as soon as
  240. // possible. Interactive queries count towards your concurrent rate limit
  241. // and your daily limit. It is the default priority with which queries get
  242. // executed.
  243. //
  244. // More information can be found at https://cloud.google.com/bigquery/docs/running-queries#queries.
  245. InteractivePriority QueryPriority = "INTERACTIVE"
  246. )
  247. // A Query queries data from a BigQuery table. Use Client.Query to create a Query.
  248. type Query struct {
  249. JobIDConfig
  250. QueryConfig
  251. client *Client
  252. }
  253. // Query creates a query with string q.
  254. // The returned Query may optionally be further configured before its Run method is called.
  255. func (c *Client) Query(q string) *Query {
  256. return &Query{
  257. client: c,
  258. QueryConfig: QueryConfig{Q: q},
  259. }
  260. }
  261. // Run initiates a query job.
  262. func (q *Query) Run(ctx context.Context) (j *Job, err error) {
  263. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Query.Run")
  264. defer func() { trace.EndSpan(ctx, err) }()
  265. job, err := q.newJob()
  266. if err != nil {
  267. return nil, err
  268. }
  269. j, err = q.client.insertJob(ctx, job, nil)
  270. if err != nil {
  271. return nil, err
  272. }
  273. return j, nil
  274. }
  275. func (q *Query) newJob() (*bq.Job, error) {
  276. config, err := q.QueryConfig.toBQ()
  277. if err != nil {
  278. return nil, err
  279. }
  280. return &bq.Job{
  281. JobReference: q.JobIDConfig.createJobRef(q.client),
  282. Configuration: config,
  283. }, nil
  284. }
  285. // Read submits a query for execution and returns the results via a RowIterator.
  286. // It is a shorthand for Query.Run followed by Job.Read.
  287. func (q *Query) Read(ctx context.Context) (*RowIterator, error) {
  288. job, err := q.Run(ctx)
  289. if err != nil {
  290. return nil, err
  291. }
  292. return job.Read(ctx)
  293. }