You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

308 lines
10 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "errors"
  17. "cloud.google.com/go/internal/trace"
  18. "golang.org/x/net/context"
  19. bq "google.golang.org/api/bigquery/v2"
  20. )
  21. // QueryConfig holds the configuration for a query job.
  22. type QueryConfig struct {
  23. // Dst is the table into which the results of the query will be written.
  24. // If this field is nil, a temporary table will be created.
  25. Dst *Table
  26. // The query to execute. See https://cloud.google.com/bigquery/query-reference for details.
  27. Q string
  28. // DefaultProjectID and DefaultDatasetID specify the dataset to use for unqualified table names in the query.
  29. // If DefaultProjectID is set, DefaultDatasetID must also be set.
  30. DefaultProjectID string
  31. DefaultDatasetID string
  32. // TableDefinitions describes data sources outside of BigQuery.
  33. // The map keys may be used as table names in the query string.
  34. //
  35. // When a QueryConfig is returned from Job.Config, the map values
  36. // are always of type *ExternalDataConfig.
  37. TableDefinitions map[string]ExternalData
  38. // CreateDisposition specifies the circumstances under which the destination table will be created.
  39. // The default is CreateIfNeeded.
  40. CreateDisposition TableCreateDisposition
  41. // WriteDisposition specifies how existing data in the destination table is treated.
  42. // The default is WriteEmpty.
  43. WriteDisposition TableWriteDisposition
  44. // DisableQueryCache prevents results being fetched from the query cache.
  45. // If this field is false, results are fetched from the cache if they are available.
  46. // The query cache is a best-effort cache that is flushed whenever tables in the query are modified.
  47. // Cached results are only available when TableID is unspecified in the query's destination Table.
  48. // For more information, see https://cloud.google.com/bigquery/querying-data#querycaching
  49. DisableQueryCache bool
  50. // DisableFlattenedResults prevents results being flattened.
  51. // If this field is false, results from nested and repeated fields are flattened.
  52. // DisableFlattenedResults implies AllowLargeResults
  53. // For more information, see https://cloud.google.com/bigquery/docs/data#nested
  54. DisableFlattenedResults bool
  55. // AllowLargeResults allows the query to produce arbitrarily large result tables.
  56. // The destination must be a table.
  57. // When using this option, queries will take longer to execute, even if the result set is small.
  58. // For additional limitations, see https://cloud.google.com/bigquery/querying-data#largequeryresults
  59. AllowLargeResults bool
  60. // Priority specifies the priority with which to schedule the query.
  61. // The default priority is InteractivePriority.
  62. // For more information, see https://cloud.google.com/bigquery/querying-data#batchqueries
  63. Priority QueryPriority
  64. // MaxBillingTier sets the maximum billing tier for a Query.
  65. // Queries that have resource usage beyond this tier will fail (without
  66. // incurring a charge). If this field is zero, the project default will be used.
  67. MaxBillingTier int
  68. // MaxBytesBilled limits the number of bytes billed for
  69. // this job. Queries that would exceed this limit will fail (without incurring
  70. // a charge).
  71. // If this field is less than 1, the project default will be
  72. // used.
  73. MaxBytesBilled int64
  74. // UseStandardSQL causes the query to use standard SQL. The default.
  75. // Deprecated: use UseLegacySQL.
  76. UseStandardSQL bool
  77. // UseLegacySQL causes the query to use legacy SQL.
  78. UseLegacySQL bool
  79. // Parameters is a list of query parameters. The presence of parameters
  80. // implies the use of standard SQL.
  81. // If the query uses positional syntax ("?"), then no parameter may have a name.
  82. // If the query uses named syntax ("@p"), then all parameters must have names.
  83. // It is illegal to mix positional and named syntax.
  84. Parameters []QueryParameter
  85. // TimePartitioning specifies time-based partitioning
  86. // for the destination table.
  87. TimePartitioning *TimePartitioning
  88. // The labels associated with this job.
  89. Labels map[string]string
  90. // If true, don't actually run this job. A valid query will return a mostly
  91. // empty response with some processing statistics, while an invalid query will
  92. // return the same error it would if it wasn't a dry run.
  93. //
  94. // Query.Read will fail with dry-run queries. Call Query.Run instead, and then
  95. // call LastStatus on the returned job to get statistics. Calling Status on a
  96. // dry-run job will fail.
  97. DryRun bool
  98. // Custom encryption configuration (e.g., Cloud KMS keys).
  99. DestinationEncryptionConfig *EncryptionConfig
  100. // Allows the schema of the destination table to be updated as a side effect of
  101. // the query job.
  102. SchemaUpdateOptions []string
  103. }
  104. func (qc *QueryConfig) toBQ() (*bq.JobConfiguration, error) {
  105. qconf := &bq.JobConfigurationQuery{
  106. Query: qc.Q,
  107. CreateDisposition: string(qc.CreateDisposition),
  108. WriteDisposition: string(qc.WriteDisposition),
  109. AllowLargeResults: qc.AllowLargeResults,
  110. Priority: string(qc.Priority),
  111. MaximumBytesBilled: qc.MaxBytesBilled,
  112. TimePartitioning: qc.TimePartitioning.toBQ(),
  113. DestinationEncryptionConfiguration: qc.DestinationEncryptionConfig.toBQ(),
  114. SchemaUpdateOptions: qc.SchemaUpdateOptions,
  115. }
  116. if len(qc.TableDefinitions) > 0 {
  117. qconf.TableDefinitions = make(map[string]bq.ExternalDataConfiguration)
  118. }
  119. for name, data := range qc.TableDefinitions {
  120. qconf.TableDefinitions[name] = data.toBQ()
  121. }
  122. if qc.DefaultProjectID != "" || qc.DefaultDatasetID != "" {
  123. qconf.DefaultDataset = &bq.DatasetReference{
  124. DatasetId: qc.DefaultDatasetID,
  125. ProjectId: qc.DefaultProjectID,
  126. }
  127. }
  128. if tier := int64(qc.MaxBillingTier); tier > 0 {
  129. qconf.MaximumBillingTier = &tier
  130. }
  131. f := false
  132. if qc.DisableQueryCache {
  133. qconf.UseQueryCache = &f
  134. }
  135. if qc.DisableFlattenedResults {
  136. qconf.FlattenResults = &f
  137. // DisableFlattenResults implies AllowLargeResults.
  138. qconf.AllowLargeResults = true
  139. }
  140. if qc.UseStandardSQL && qc.UseLegacySQL {
  141. return nil, errors.New("bigquery: cannot provide both UseStandardSQL and UseLegacySQL")
  142. }
  143. if len(qc.Parameters) > 0 && qc.UseLegacySQL {
  144. return nil, errors.New("bigquery: cannot provide both Parameters (implying standard SQL) and UseLegacySQL")
  145. }
  146. ptrue := true
  147. pfalse := false
  148. if qc.UseLegacySQL {
  149. qconf.UseLegacySql = &ptrue
  150. } else {
  151. qconf.UseLegacySql = &pfalse
  152. }
  153. if qc.Dst != nil && !qc.Dst.implicitTable() {
  154. qconf.DestinationTable = qc.Dst.toBQ()
  155. }
  156. for _, p := range qc.Parameters {
  157. qp, err := p.toBQ()
  158. if err != nil {
  159. return nil, err
  160. }
  161. qconf.QueryParameters = append(qconf.QueryParameters, qp)
  162. }
  163. return &bq.JobConfiguration{
  164. Labels: qc.Labels,
  165. DryRun: qc.DryRun,
  166. Query: qconf,
  167. }, nil
  168. }
  169. func bqToQueryConfig(q *bq.JobConfiguration, c *Client) (*QueryConfig, error) {
  170. qq := q.Query
  171. qc := &QueryConfig{
  172. Labels: q.Labels,
  173. DryRun: q.DryRun,
  174. Q: qq.Query,
  175. CreateDisposition: TableCreateDisposition(qq.CreateDisposition),
  176. WriteDisposition: TableWriteDisposition(qq.WriteDisposition),
  177. AllowLargeResults: qq.AllowLargeResults,
  178. Priority: QueryPriority(qq.Priority),
  179. MaxBytesBilled: qq.MaximumBytesBilled,
  180. UseLegacySQL: qq.UseLegacySql == nil || *qq.UseLegacySql,
  181. TimePartitioning: bqToTimePartitioning(qq.TimePartitioning),
  182. DestinationEncryptionConfig: bqToEncryptionConfig(qq.DestinationEncryptionConfiguration),
  183. SchemaUpdateOptions: qq.SchemaUpdateOptions,
  184. }
  185. qc.UseStandardSQL = !qc.UseLegacySQL
  186. if len(qq.TableDefinitions) > 0 {
  187. qc.TableDefinitions = make(map[string]ExternalData)
  188. }
  189. for name, qedc := range qq.TableDefinitions {
  190. edc, err := bqToExternalDataConfig(&qedc)
  191. if err != nil {
  192. return nil, err
  193. }
  194. qc.TableDefinitions[name] = edc
  195. }
  196. if qq.DefaultDataset != nil {
  197. qc.DefaultProjectID = qq.DefaultDataset.ProjectId
  198. qc.DefaultDatasetID = qq.DefaultDataset.DatasetId
  199. }
  200. if qq.MaximumBillingTier != nil {
  201. qc.MaxBillingTier = int(*qq.MaximumBillingTier)
  202. }
  203. if qq.UseQueryCache != nil && !*qq.UseQueryCache {
  204. qc.DisableQueryCache = true
  205. }
  206. if qq.FlattenResults != nil && !*qq.FlattenResults {
  207. qc.DisableFlattenedResults = true
  208. }
  209. if qq.DestinationTable != nil {
  210. qc.Dst = bqToTable(qq.DestinationTable, c)
  211. }
  212. for _, qp := range qq.QueryParameters {
  213. p, err := bqToQueryParameter(qp)
  214. if err != nil {
  215. return nil, err
  216. }
  217. qc.Parameters = append(qc.Parameters, p)
  218. }
  219. return qc, nil
  220. }
  221. // QueryPriority specifies a priority with which a query is to be executed.
  222. type QueryPriority string
  223. const (
  224. BatchPriority QueryPriority = "BATCH"
  225. InteractivePriority QueryPriority = "INTERACTIVE"
  226. )
  227. // A Query queries data from a BigQuery table. Use Client.Query to create a Query.
  228. type Query struct {
  229. JobIDConfig
  230. QueryConfig
  231. client *Client
  232. }
  233. // Query creates a query with string q.
  234. // The returned Query may optionally be further configured before its Run method is called.
  235. func (c *Client) Query(q string) *Query {
  236. return &Query{
  237. client: c,
  238. QueryConfig: QueryConfig{Q: q},
  239. }
  240. }
  241. // Run initiates a query job.
  242. func (q *Query) Run(ctx context.Context) (j *Job, err error) {
  243. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Query.Run")
  244. defer func() { trace.EndSpan(ctx, err) }()
  245. job, err := q.newJob()
  246. if err != nil {
  247. return nil, err
  248. }
  249. j, err = q.client.insertJob(ctx, job, nil)
  250. if err != nil {
  251. return nil, err
  252. }
  253. return j, nil
  254. }
  255. func (q *Query) newJob() (*bq.Job, error) {
  256. config, err := q.QueryConfig.toBQ()
  257. if err != nil {
  258. return nil, err
  259. }
  260. return &bq.Job{
  261. JobReference: q.JobIDConfig.createJobRef(q.client),
  262. Configuration: config,
  263. }, nil
  264. }
  265. // Read submits a query for execution and returns the results via a RowIterator.
  266. // It is a shorthand for Query.Run followed by Job.Read.
  267. func (q *Query) Read(ctx context.Context) (*RowIterator, error) {
  268. job, err := q.Run(ctx)
  269. if err != nil {
  270. return nil, err
  271. }
  272. return job.Read(ctx)
  273. }