You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

823 lines
24 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "errors"
  17. "fmt"
  18. "math/rand"
  19. "os"
  20. "sync"
  21. "time"
  22. "cloud.google.com/go/internal"
  23. "cloud.google.com/go/internal/trace"
  24. gax "github.com/googleapis/gax-go"
  25. "golang.org/x/net/context"
  26. bq "google.golang.org/api/bigquery/v2"
  27. "google.golang.org/api/googleapi"
  28. "google.golang.org/api/iterator"
  29. )
  30. // A Job represents an operation which has been submitted to BigQuery for processing.
  31. type Job struct {
  32. c *Client
  33. projectID string
  34. jobID string
  35. location string
  36. config *bq.JobConfiguration
  37. lastStatus *JobStatus
  38. }
  39. // JobFromID creates a Job which refers to an existing BigQuery job. The job
  40. // need not have been created by this package. For example, the job may have
  41. // been created in the BigQuery console.
  42. //
  43. // For jobs whose location is other than "US" or "EU", set Client.Location or use
  44. // JobFromIDLocation.
  45. func (c *Client) JobFromID(ctx context.Context, id string) (*Job, error) {
  46. return c.JobFromIDLocation(ctx, id, c.Location)
  47. }
  48. // JobFromIDLocation creates a Job which refers to an existing BigQuery job. The job
  49. // need not have been created by this package (for example, it may have
  50. // been created in the BigQuery console), but it must exist in the specified location.
  51. func (c *Client) JobFromIDLocation(ctx context.Context, id, location string) (j *Job, err error) {
  52. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.JobFromIDLocation")
  53. defer func() { trace.EndSpan(ctx, err) }()
  54. bqjob, err := c.getJobInternal(ctx, id, location, "configuration", "jobReference", "status", "statistics")
  55. if err != nil {
  56. return nil, err
  57. }
  58. return bqToJob(bqjob, c)
  59. }
  60. // ID returns the job's ID.
  61. func (j *Job) ID() string {
  62. return j.jobID
  63. }
  64. // Location returns the job's location.
  65. func (j *Job) Location() string {
  66. return j.location
  67. }
  68. // State is one of a sequence of states that a Job progresses through as it is processed.
  69. type State int
  70. const (
  71. StateUnspecified State = iota // used only as a default in JobIterator
  72. Pending
  73. Running
  74. Done
  75. )
  76. // JobStatus contains the current State of a job, and errors encountered while processing that job.
  77. type JobStatus struct {
  78. State State
  79. err error
  80. // All errors encountered during the running of the job.
  81. // Not all Errors are fatal, so errors here do not necessarily mean that the job has completed or was unsuccessful.
  82. Errors []*Error
  83. // Statistics about the job.
  84. Statistics *JobStatistics
  85. }
  86. // JobConfig contains configuration information for a job. It is implemented by
  87. // *CopyConfig, *ExtractConfig, *LoadConfig and *QueryConfig.
  88. type JobConfig interface {
  89. isJobConfig()
  90. }
  91. func (*CopyConfig) isJobConfig() {}
  92. func (*ExtractConfig) isJobConfig() {}
  93. func (*LoadConfig) isJobConfig() {}
  94. func (*QueryConfig) isJobConfig() {}
  95. // Config returns the configuration information for j.
  96. func (j *Job) Config() (JobConfig, error) {
  97. return bqToJobConfig(j.config, j.c)
  98. }
  99. func bqToJobConfig(q *bq.JobConfiguration, c *Client) (JobConfig, error) {
  100. switch {
  101. case q == nil:
  102. return nil, nil
  103. case q.Copy != nil:
  104. return bqToCopyConfig(q, c), nil
  105. case q.Extract != nil:
  106. return bqToExtractConfig(q, c), nil
  107. case q.Load != nil:
  108. return bqToLoadConfig(q, c), nil
  109. case q.Query != nil:
  110. return bqToQueryConfig(q, c)
  111. default:
  112. return nil, nil
  113. }
  114. }
  115. // JobIDConfig describes how to create an ID for a job.
  116. type JobIDConfig struct {
  117. // JobID is the ID to use for the job. If empty, a random job ID will be generated.
  118. JobID string
  119. // If AddJobIDSuffix is true, then a random string will be appended to JobID.
  120. AddJobIDSuffix bool
  121. // Location is the location for the job.
  122. Location string
  123. }
  124. // createJobRef creates a JobReference.
  125. func (j *JobIDConfig) createJobRef(c *Client) *bq.JobReference {
  126. // We don't check whether projectID is empty; the server will return an
  127. // error when it encounters the resulting JobReference.
  128. loc := j.Location
  129. if loc == "" { // Use Client.Location as a default.
  130. loc = c.Location
  131. }
  132. jr := &bq.JobReference{ProjectId: c.projectID, Location: loc}
  133. if j.JobID == "" {
  134. jr.JobId = randomIDFn()
  135. } else if j.AddJobIDSuffix {
  136. jr.JobId = j.JobID + "-" + randomIDFn()
  137. } else {
  138. jr.JobId = j.JobID
  139. }
  140. return jr
  141. }
  142. const alphanum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
  143. var (
  144. rngMu sync.Mutex
  145. rng = rand.New(rand.NewSource(time.Now().UnixNano() ^ int64(os.Getpid())))
  146. )
  147. // For testing.
  148. var randomIDFn = randomID
  149. // As of August 2017, the BigQuery service uses 27 alphanumeric characters for
  150. // suffixes.
  151. const randomIDLen = 27
  152. func randomID() string {
  153. // This is used for both job IDs and insert IDs.
  154. var b [randomIDLen]byte
  155. rngMu.Lock()
  156. for i := 0; i < len(b); i++ {
  157. b[i] = alphanum[rng.Intn(len(alphanum))]
  158. }
  159. rngMu.Unlock()
  160. return string(b[:])
  161. }
  162. // Done reports whether the job has completed.
  163. // After Done returns true, the Err method will return an error if the job completed unsuccessfully.
  164. func (s *JobStatus) Done() bool {
  165. return s.State == Done
  166. }
  167. // Err returns the error that caused the job to complete unsuccessfully (if any).
  168. func (s *JobStatus) Err() error {
  169. return s.err
  170. }
  171. // Status retrieves the current status of the job from BigQuery. It fails if the Status could not be determined.
  172. func (j *Job) Status(ctx context.Context) (js *JobStatus, err error) {
  173. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Job.Status")
  174. defer func() { trace.EndSpan(ctx, err) }()
  175. bqjob, err := j.c.getJobInternal(ctx, j.jobID, j.location, "status", "statistics")
  176. if err != nil {
  177. return nil, err
  178. }
  179. if err := j.setStatus(bqjob.Status); err != nil {
  180. return nil, err
  181. }
  182. j.setStatistics(bqjob.Statistics, j.c)
  183. return j.lastStatus, nil
  184. }
  185. // LastStatus returns the most recently retrieved status of the job. The status is
  186. // retrieved when a new job is created, or when JobFromID or Job.Status is called.
  187. // Call Job.Status to get the most up-to-date information about a job.
  188. func (j *Job) LastStatus() *JobStatus {
  189. return j.lastStatus
  190. }
  191. // Cancel requests that a job be cancelled. This method returns without waiting for
  192. // cancellation to take effect. To check whether the job has terminated, use Job.Status.
  193. // Cancelled jobs may still incur costs.
  194. func (j *Job) Cancel(ctx context.Context) error {
  195. // Jobs.Cancel returns a job entity, but the only relevant piece of
  196. // data it may contain (the status of the job) is unreliable. From the
  197. // docs: "This call will return immediately, and the client will need
  198. // to poll for the job status to see if the cancel completed
  199. // successfully". So it would be misleading to return a status.
  200. call := j.c.bqs.Jobs.Cancel(j.projectID, j.jobID).
  201. Location(j.location).
  202. Fields(). // We don't need any of the response data.
  203. Context(ctx)
  204. setClientHeader(call.Header())
  205. return runWithRetry(ctx, func() error {
  206. _, err := call.Do()
  207. return err
  208. })
  209. }
  210. // Wait blocks until the job or the context is done. It returns the final status
  211. // of the job.
  212. // If an error occurs while retrieving the status, Wait returns that error. But
  213. // Wait returns nil if the status was retrieved successfully, even if
  214. // status.Err() != nil. So callers must check both errors. See the example.
  215. func (j *Job) Wait(ctx context.Context) (js *JobStatus, err error) {
  216. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Job.Wait")
  217. defer func() { trace.EndSpan(ctx, err) }()
  218. if j.isQuery() {
  219. // We can avoid polling for query jobs.
  220. if _, err := j.waitForQuery(ctx, j.projectID); err != nil {
  221. return nil, err
  222. }
  223. // Note: extra RPC even if you just want to wait for the query to finish.
  224. js, err := j.Status(ctx)
  225. if err != nil {
  226. return nil, err
  227. }
  228. return js, nil
  229. }
  230. // Non-query jobs must poll.
  231. err = internal.Retry(ctx, gax.Backoff{}, func() (stop bool, err error) {
  232. js, err = j.Status(ctx)
  233. if err != nil {
  234. return true, err
  235. }
  236. if js.Done() {
  237. return true, nil
  238. }
  239. return false, nil
  240. })
  241. if err != nil {
  242. return nil, err
  243. }
  244. return js, nil
  245. }
  246. // Read fetches the results of a query job.
  247. // If j is not a query job, Read returns an error.
  248. func (j *Job) Read(ctx context.Context) (ri *RowIterator, err error) {
  249. ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Job.Read")
  250. defer func() { trace.EndSpan(ctx, err) }()
  251. return j.read(ctx, j.waitForQuery, fetchPage)
  252. }
  253. func (j *Job) read(ctx context.Context, waitForQuery func(context.Context, string) (Schema, error), pf pageFetcher) (*RowIterator, error) {
  254. if !j.isQuery() {
  255. return nil, errors.New("bigquery: cannot read from a non-query job")
  256. }
  257. destTable := j.config.Query.DestinationTable
  258. // The destination table should only be nil if there was a query error.
  259. projectID := j.projectID
  260. if destTable != nil && projectID != destTable.ProjectId {
  261. return nil, fmt.Errorf("bigquery: job project ID is %q, but destination table's is %q", projectID, destTable.ProjectId)
  262. }
  263. schema, err := waitForQuery(ctx, projectID)
  264. if err != nil {
  265. return nil, err
  266. }
  267. if destTable == nil {
  268. return nil, errors.New("bigquery: query job missing destination table")
  269. }
  270. dt := bqToTable(destTable, j.c)
  271. it := newRowIterator(ctx, dt, pf)
  272. it.Schema = schema
  273. return it, nil
  274. }
  275. // waitForQuery waits for the query job to complete and returns its schema.
  276. func (j *Job) waitForQuery(ctx context.Context, projectID string) (Schema, error) {
  277. // Use GetQueryResults only to wait for completion, not to read results.
  278. call := j.c.bqs.Jobs.GetQueryResults(projectID, j.jobID).Location(j.location).Context(ctx).MaxResults(0)
  279. setClientHeader(call.Header())
  280. backoff := gax.Backoff{
  281. Initial: 1 * time.Second,
  282. Multiplier: 2,
  283. Max: 60 * time.Second,
  284. }
  285. var res *bq.GetQueryResultsResponse
  286. err := internal.Retry(ctx, backoff, func() (stop bool, err error) {
  287. res, err = call.Do()
  288. if err != nil {
  289. return !retryableError(err), err
  290. }
  291. if !res.JobComplete { // GetQueryResults may return early without error; retry.
  292. return false, nil
  293. }
  294. return true, nil
  295. })
  296. if err != nil {
  297. return nil, err
  298. }
  299. return bqToSchema(res.Schema), nil
  300. }
  301. // JobStatistics contains statistics about a job.
  302. type JobStatistics struct {
  303. CreationTime time.Time
  304. StartTime time.Time
  305. EndTime time.Time
  306. TotalBytesProcessed int64
  307. Details Statistics
  308. }
  309. // Statistics is one of ExtractStatistics, LoadStatistics or QueryStatistics.
  310. type Statistics interface {
  311. implementsStatistics()
  312. }
  313. // ExtractStatistics contains statistics about an extract job.
  314. type ExtractStatistics struct {
  315. // The number of files per destination URI or URI pattern specified in the
  316. // extract configuration. These values will be in the same order as the
  317. // URIs specified in the 'destinationUris' field.
  318. DestinationURIFileCounts []int64
  319. }
  320. // LoadStatistics contains statistics about a load job.
  321. type LoadStatistics struct {
  322. // The number of bytes of source data in a load job.
  323. InputFileBytes int64
  324. // The number of source files in a load job.
  325. InputFiles int64
  326. // Size of the loaded data in bytes. Note that while a load job is in the
  327. // running state, this value may change.
  328. OutputBytes int64
  329. // The number of rows imported in a load job. Note that while an import job is
  330. // in the running state, this value may change.
  331. OutputRows int64
  332. }
  333. // QueryStatistics contains statistics about a query job.
  334. type QueryStatistics struct {
  335. // Billing tier for the job.
  336. BillingTier int64
  337. // Whether the query result was fetched from the query cache.
  338. CacheHit bool
  339. // The type of query statement, if valid.
  340. StatementType string
  341. // Total bytes billed for the job.
  342. TotalBytesBilled int64
  343. // Total bytes processed for the job.
  344. TotalBytesProcessed int64
  345. // Describes execution plan for the query.
  346. QueryPlan []*ExplainQueryStage
  347. // The number of rows affected by a DML statement. Present only for DML
  348. // statements INSERT, UPDATE or DELETE.
  349. NumDMLAffectedRows int64
  350. // Describes a timeline of job execution.
  351. Timeline []*QueryTimelineSample
  352. // ReferencedTables: [Output-only, Experimental] Referenced tables for
  353. // the job. Queries that reference more than 50 tables will not have a
  354. // complete list.
  355. ReferencedTables []*Table
  356. // The schema of the results. Present only for successful dry run of
  357. // non-legacy SQL queries.
  358. Schema Schema
  359. // Slot-milliseconds consumed by this query job.
  360. SlotMillis int64
  361. // Standard SQL: list of undeclared query parameter names detected during a
  362. // dry run validation.
  363. UndeclaredQueryParameterNames []string
  364. // DDL target table.
  365. DDLTargetTable *Table
  366. // DDL Operation performed on the target table. Used to report how the
  367. // query impacted the DDL target table.
  368. DDLOperationPerformed string
  369. }
  370. // ExplainQueryStage describes one stage of a query.
  371. type ExplainQueryStage struct {
  372. // CompletedParallelInputs: Number of parallel input segments completed.
  373. CompletedParallelInputs int64
  374. // ComputeAvg: Duration the average shard spent on CPU-bound tasks.
  375. ComputeAvg time.Duration
  376. // ComputeMax: Duration the slowest shard spent on CPU-bound tasks.
  377. ComputeMax time.Duration
  378. // Relative amount of the total time the average shard spent on CPU-bound tasks.
  379. ComputeRatioAvg float64
  380. // Relative amount of the total time the slowest shard spent on CPU-bound tasks.
  381. ComputeRatioMax float64
  382. // EndTime: Stage end time.
  383. EndTime time.Time
  384. // Unique ID for stage within plan.
  385. ID int64
  386. // InputStages: IDs for stages that are inputs to this stage.
  387. InputStages []int64
  388. // Human-readable name for stage.
  389. Name string
  390. // ParallelInputs: Number of parallel input segments to be processed.
  391. ParallelInputs int64
  392. // ReadAvg: Duration the average shard spent reading input.
  393. ReadAvg time.Duration
  394. // ReadMax: Duration the slowest shard spent reading input.
  395. ReadMax time.Duration
  396. // Relative amount of the total time the average shard spent reading input.
  397. ReadRatioAvg float64
  398. // Relative amount of the total time the slowest shard spent reading input.
  399. ReadRatioMax float64
  400. // Number of records read into the stage.
  401. RecordsRead int64
  402. // Number of records written by the stage.
  403. RecordsWritten int64
  404. // ShuffleOutputBytes: Total number of bytes written to shuffle.
  405. ShuffleOutputBytes int64
  406. // ShuffleOutputBytesSpilled: Total number of bytes written to shuffle
  407. // and spilled to disk.
  408. ShuffleOutputBytesSpilled int64
  409. // StartTime: Stage start time.
  410. StartTime time.Time
  411. // Current status for the stage.
  412. Status string
  413. // List of operations within the stage in dependency order (approximately
  414. // chronological).
  415. Steps []*ExplainQueryStep
  416. // WaitAvg: Duration the average shard spent waiting to be scheduled.
  417. WaitAvg time.Duration
  418. // WaitMax: Duration the slowest shard spent waiting to be scheduled.
  419. WaitMax time.Duration
  420. // Relative amount of the total time the average shard spent waiting to be scheduled.
  421. WaitRatioAvg float64
  422. // Relative amount of the total time the slowest shard spent waiting to be scheduled.
  423. WaitRatioMax float64
  424. // WriteAvg: Duration the average shard spent on writing output.
  425. WriteAvg time.Duration
  426. // WriteMax: Duration the slowest shard spent on writing output.
  427. WriteMax time.Duration
  428. // Relative amount of the total time the average shard spent on writing output.
  429. WriteRatioAvg float64
  430. // Relative amount of the total time the slowest shard spent on writing output.
  431. WriteRatioMax float64
  432. }
  433. // ExplainQueryStep describes one step of a query stage.
  434. type ExplainQueryStep struct {
  435. // Machine-readable operation type.
  436. Kind string
  437. // Human-readable stage descriptions.
  438. Substeps []string
  439. }
  440. // QueryTimelineSample represents a sample of execution statistics at a point in time.
  441. type QueryTimelineSample struct {
  442. // Total number of units currently being processed by workers, represented as largest value since last sample.
  443. ActiveUnits int64
  444. // Total parallel units of work completed by this query.
  445. CompletedUnits int64
  446. // Time elapsed since start of query execution.
  447. Elapsed time.Duration
  448. // Total parallel units of work remaining for the active stages.
  449. PendingUnits int64
  450. // Cumulative slot-milliseconds consumed by the query.
  451. SlotMillis int64
  452. }
  453. func (*ExtractStatistics) implementsStatistics() {}
  454. func (*LoadStatistics) implementsStatistics() {}
  455. func (*QueryStatistics) implementsStatistics() {}
  456. // Jobs lists jobs within a project.
  457. func (c *Client) Jobs(ctx context.Context) *JobIterator {
  458. it := &JobIterator{
  459. ctx: ctx,
  460. c: c,
  461. ProjectID: c.projectID,
  462. }
  463. it.pageInfo, it.nextFunc = iterator.NewPageInfo(
  464. it.fetch,
  465. func() int { return len(it.items) },
  466. func() interface{} { b := it.items; it.items = nil; return b })
  467. return it
  468. }
  469. // JobIterator iterates over jobs in a project.
  470. type JobIterator struct {
  471. ProjectID string // Project ID of the jobs to list. Default is the client's project.
  472. AllUsers bool // Whether to list jobs owned by all users in the project, or just the current caller.
  473. State State // List only jobs in the given state. Defaults to all states.
  474. ctx context.Context
  475. c *Client
  476. pageInfo *iterator.PageInfo
  477. nextFunc func() error
  478. items []*Job
  479. }
  480. func (it *JobIterator) PageInfo() *iterator.PageInfo { return it.pageInfo }
  481. func (it *JobIterator) Next() (*Job, error) {
  482. if err := it.nextFunc(); err != nil {
  483. return nil, err
  484. }
  485. item := it.items[0]
  486. it.items = it.items[1:]
  487. return item, nil
  488. }
  489. func (it *JobIterator) fetch(pageSize int, pageToken string) (string, error) {
  490. var st string
  491. switch it.State {
  492. case StateUnspecified:
  493. st = ""
  494. case Pending:
  495. st = "pending"
  496. case Running:
  497. st = "running"
  498. case Done:
  499. st = "done"
  500. default:
  501. return "", fmt.Errorf("bigquery: invalid value for JobIterator.State: %d", it.State)
  502. }
  503. req := it.c.bqs.Jobs.List(it.ProjectID).
  504. Context(it.ctx).
  505. PageToken(pageToken).
  506. Projection("full").
  507. AllUsers(it.AllUsers)
  508. if st != "" {
  509. req.StateFilter(st)
  510. }
  511. setClientHeader(req.Header())
  512. if pageSize > 0 {
  513. req.MaxResults(int64(pageSize))
  514. }
  515. res, err := req.Do()
  516. if err != nil {
  517. return "", err
  518. }
  519. for _, j := range res.Jobs {
  520. job, err := convertListedJob(j, it.c)
  521. if err != nil {
  522. return "", err
  523. }
  524. it.items = append(it.items, job)
  525. }
  526. return res.NextPageToken, nil
  527. }
  528. func convertListedJob(j *bq.JobListJobs, c *Client) (*Job, error) {
  529. return bqToJob2(j.JobReference, j.Configuration, j.Status, j.Statistics, c)
  530. }
  531. func (c *Client) getJobInternal(ctx context.Context, jobID, location string, fields ...googleapi.Field) (*bq.Job, error) {
  532. var job *bq.Job
  533. call := c.bqs.Jobs.Get(c.projectID, jobID).Context(ctx)
  534. if location != "" {
  535. call = call.Location(location)
  536. }
  537. if len(fields) > 0 {
  538. call = call.Fields(fields...)
  539. }
  540. setClientHeader(call.Header())
  541. err := runWithRetry(ctx, func() (err error) {
  542. job, err = call.Do()
  543. return err
  544. })
  545. if err != nil {
  546. return nil, err
  547. }
  548. return job, nil
  549. }
  550. func bqToJob(q *bq.Job, c *Client) (*Job, error) {
  551. return bqToJob2(q.JobReference, q.Configuration, q.Status, q.Statistics, c)
  552. }
  553. func bqToJob2(qr *bq.JobReference, qc *bq.JobConfiguration, qs *bq.JobStatus, qt *bq.JobStatistics, c *Client) (*Job, error) {
  554. j := &Job{
  555. projectID: qr.ProjectId,
  556. jobID: qr.JobId,
  557. location: qr.Location,
  558. c: c,
  559. }
  560. j.setConfig(qc)
  561. if err := j.setStatus(qs); err != nil {
  562. return nil, err
  563. }
  564. j.setStatistics(qt, c)
  565. return j, nil
  566. }
  567. func (j *Job) setConfig(config *bq.JobConfiguration) {
  568. if config == nil {
  569. return
  570. }
  571. j.config = config
  572. }
  573. func (j *Job) isQuery() bool {
  574. return j.config != nil && j.config.Query != nil
  575. }
  576. var stateMap = map[string]State{"PENDING": Pending, "RUNNING": Running, "DONE": Done}
  577. func (j *Job) setStatus(qs *bq.JobStatus) error {
  578. if qs == nil {
  579. return nil
  580. }
  581. state, ok := stateMap[qs.State]
  582. if !ok {
  583. return fmt.Errorf("unexpected job state: %v", qs.State)
  584. }
  585. j.lastStatus = &JobStatus{
  586. State: state,
  587. err: nil,
  588. }
  589. if err := bqToError(qs.ErrorResult); state == Done && err != nil {
  590. j.lastStatus.err = err
  591. }
  592. for _, ep := range qs.Errors {
  593. j.lastStatus.Errors = append(j.lastStatus.Errors, bqToError(ep))
  594. }
  595. return nil
  596. }
  597. func (j *Job) setStatistics(s *bq.JobStatistics, c *Client) {
  598. if s == nil || j.lastStatus == nil {
  599. return
  600. }
  601. js := &JobStatistics{
  602. CreationTime: unixMillisToTime(s.CreationTime),
  603. StartTime: unixMillisToTime(s.StartTime),
  604. EndTime: unixMillisToTime(s.EndTime),
  605. TotalBytesProcessed: s.TotalBytesProcessed,
  606. }
  607. switch {
  608. case s.Extract != nil:
  609. js.Details = &ExtractStatistics{
  610. DestinationURIFileCounts: []int64(s.Extract.DestinationUriFileCounts),
  611. }
  612. case s.Load != nil:
  613. js.Details = &LoadStatistics{
  614. InputFileBytes: s.Load.InputFileBytes,
  615. InputFiles: s.Load.InputFiles,
  616. OutputBytes: s.Load.OutputBytes,
  617. OutputRows: s.Load.OutputRows,
  618. }
  619. case s.Query != nil:
  620. var names []string
  621. for _, qp := range s.Query.UndeclaredQueryParameters {
  622. names = append(names, qp.Name)
  623. }
  624. var tables []*Table
  625. for _, tr := range s.Query.ReferencedTables {
  626. tables = append(tables, bqToTable(tr, c))
  627. }
  628. js.Details = &QueryStatistics{
  629. BillingTier: s.Query.BillingTier,
  630. CacheHit: s.Query.CacheHit,
  631. DDLTargetTable: bqToTable(s.Query.DdlTargetTable, c),
  632. DDLOperationPerformed: s.Query.DdlOperationPerformed,
  633. StatementType: s.Query.StatementType,
  634. TotalBytesBilled: s.Query.TotalBytesBilled,
  635. TotalBytesProcessed: s.Query.TotalBytesProcessed,
  636. NumDMLAffectedRows: s.Query.NumDmlAffectedRows,
  637. QueryPlan: queryPlanFromProto(s.Query.QueryPlan),
  638. Schema: bqToSchema(s.Query.Schema),
  639. SlotMillis: s.Query.TotalSlotMs,
  640. Timeline: timelineFromProto(s.Query.Timeline),
  641. ReferencedTables: tables,
  642. UndeclaredQueryParameterNames: names,
  643. }
  644. }
  645. j.lastStatus.Statistics = js
  646. }
  647. func queryPlanFromProto(stages []*bq.ExplainQueryStage) []*ExplainQueryStage {
  648. var res []*ExplainQueryStage
  649. for _, s := range stages {
  650. var steps []*ExplainQueryStep
  651. for _, p := range s.Steps {
  652. steps = append(steps, &ExplainQueryStep{
  653. Kind: p.Kind,
  654. Substeps: p.Substeps,
  655. })
  656. }
  657. res = append(res, &ExplainQueryStage{
  658. CompletedParallelInputs: s.CompletedParallelInputs,
  659. ComputeAvg: time.Duration(s.ComputeMsAvg) * time.Millisecond,
  660. ComputeMax: time.Duration(s.ComputeMsMax) * time.Millisecond,
  661. ComputeRatioAvg: s.ComputeRatioAvg,
  662. ComputeRatioMax: s.ComputeRatioMax,
  663. EndTime: time.Unix(0, s.EndMs*1e6),
  664. ID: s.Id,
  665. InputStages: s.InputStages,
  666. Name: s.Name,
  667. ParallelInputs: s.ParallelInputs,
  668. ReadAvg: time.Duration(s.ReadMsAvg) * time.Millisecond,
  669. ReadMax: time.Duration(s.ReadMsMax) * time.Millisecond,
  670. ReadRatioAvg: s.ReadRatioAvg,
  671. ReadRatioMax: s.ReadRatioMax,
  672. RecordsRead: s.RecordsRead,
  673. RecordsWritten: s.RecordsWritten,
  674. ShuffleOutputBytes: s.ShuffleOutputBytes,
  675. ShuffleOutputBytesSpilled: s.ShuffleOutputBytesSpilled,
  676. StartTime: time.Unix(0, s.StartMs*1e6),
  677. Status: s.Status,
  678. Steps: steps,
  679. WaitAvg: time.Duration(s.WaitMsAvg) * time.Millisecond,
  680. WaitMax: time.Duration(s.WaitMsMax) * time.Millisecond,
  681. WaitRatioAvg: s.WaitRatioAvg,
  682. WaitRatioMax: s.WaitRatioMax,
  683. WriteAvg: time.Duration(s.WriteMsAvg) * time.Millisecond,
  684. WriteMax: time.Duration(s.WriteMsMax) * time.Millisecond,
  685. WriteRatioAvg: s.WriteRatioAvg,
  686. WriteRatioMax: s.WriteRatioMax,
  687. })
  688. }
  689. return res
  690. }
  691. func timelineFromProto(timeline []*bq.QueryTimelineSample) []*QueryTimelineSample {
  692. var res []*QueryTimelineSample
  693. for _, s := range timeline {
  694. res = append(res, &QueryTimelineSample{
  695. ActiveUnits: s.ActiveUnits,
  696. CompletedUnits: s.CompletedUnits,
  697. Elapsed: time.Duration(s.ElapsedMs) * time.Millisecond,
  698. PendingUnits: s.PendingUnits,
  699. SlotMillis: s.TotalSlotMs,
  700. })
  701. }
  702. return res
  703. }