You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

398 lines
12 KiB

  1. // Copyright 2015 Google LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "errors"
  17. "fmt"
  18. "reflect"
  19. "cloud.google.com/go/internal/atomiccache"
  20. bq "google.golang.org/api/bigquery/v2"
  21. )
  22. // Schema describes the fields in a table or query result.
  23. type Schema []*FieldSchema
  24. type FieldSchema struct {
  25. // The field name.
  26. // Must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_),
  27. // and must start with a letter or underscore.
  28. // The maximum length is 128 characters.
  29. Name string
  30. // A description of the field. The maximum length is 16,384 characters.
  31. Description string
  32. // Whether the field may contain multiple values.
  33. Repeated bool
  34. // Whether the field is required. Ignored if Repeated is true.
  35. Required bool
  36. // The field data type. If Type is Record, then this field contains a nested schema,
  37. // which is described by Schema.
  38. Type FieldType
  39. // Describes the nested schema if Type is set to Record.
  40. Schema Schema
  41. }
  42. func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
  43. tfs := &bq.TableFieldSchema{
  44. Description: fs.Description,
  45. Name: fs.Name,
  46. Type: string(fs.Type),
  47. }
  48. if fs.Repeated {
  49. tfs.Mode = "REPEATED"
  50. } else if fs.Required {
  51. tfs.Mode = "REQUIRED"
  52. } // else leave as default, which is interpreted as NULLABLE.
  53. for _, f := range fs.Schema {
  54. tfs.Fields = append(tfs.Fields, f.toBQ())
  55. }
  56. return tfs
  57. }
  58. func (s Schema) toBQ() *bq.TableSchema {
  59. var fields []*bq.TableFieldSchema
  60. for _, f := range s {
  61. fields = append(fields, f.toBQ())
  62. }
  63. return &bq.TableSchema{Fields: fields}
  64. }
  65. func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema {
  66. fs := &FieldSchema{
  67. Description: tfs.Description,
  68. Name: tfs.Name,
  69. Repeated: tfs.Mode == "REPEATED",
  70. Required: tfs.Mode == "REQUIRED",
  71. Type: FieldType(tfs.Type),
  72. }
  73. for _, f := range tfs.Fields {
  74. fs.Schema = append(fs.Schema, bqToFieldSchema(f))
  75. }
  76. return fs
  77. }
  78. func bqToSchema(ts *bq.TableSchema) Schema {
  79. if ts == nil {
  80. return nil
  81. }
  82. var s Schema
  83. for _, f := range ts.Fields {
  84. s = append(s, bqToFieldSchema(f))
  85. }
  86. return s
  87. }
  88. type FieldType string
  89. const (
  90. StringFieldType FieldType = "STRING"
  91. BytesFieldType FieldType = "BYTES"
  92. IntegerFieldType FieldType = "INTEGER"
  93. FloatFieldType FieldType = "FLOAT"
  94. BooleanFieldType FieldType = "BOOLEAN"
  95. TimestampFieldType FieldType = "TIMESTAMP"
  96. RecordFieldType FieldType = "RECORD"
  97. DateFieldType FieldType = "DATE"
  98. TimeFieldType FieldType = "TIME"
  99. DateTimeFieldType FieldType = "DATETIME"
  100. NumericFieldType FieldType = "NUMERIC"
  101. )
  102. var (
  103. errNoStruct = errors.New("bigquery: can only infer schema from struct or pointer to struct")
  104. errUnsupportedFieldType = errors.New("bigquery: unsupported type of field in struct")
  105. errInvalidFieldName = errors.New("bigquery: invalid name of field in struct")
  106. errBadNullable = errors.New(`bigquery: use "nullable" only for []byte and struct pointers; for all other types, use a NullXXX type`)
  107. )
  108. var typeOfByteSlice = reflect.TypeOf([]byte{})
  109. // InferSchema tries to derive a BigQuery schema from the supplied struct value.
  110. // Each exported struct field is mapped to a field in the schema.
  111. //
  112. // The following BigQuery types are inferred from the corresponding Go types.
  113. // (This is the same mapping as that used for RowIterator.Next.) Fields inferred
  114. // from these types are marked required (non-nullable).
  115. //
  116. // STRING string
  117. // BOOL bool
  118. // INTEGER int, int8, int16, int32, int64, uint8, uint16, uint32
  119. // FLOAT float32, float64
  120. // BYTES []byte
  121. // TIMESTAMP time.Time
  122. // DATE civil.Date
  123. // TIME civil.Time
  124. // DATETIME civil.DateTime
  125. // NUMERIC *big.Rat
  126. //
  127. // The big.Rat type supports numbers of arbitrary size and precision. Values
  128. // will be rounded to 9 digits after the decimal point before being transmitted
  129. // to BigQuery. See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type
  130. // for more on NUMERIC.
  131. //
  132. // A Go slice or array type is inferred to be a BigQuery repeated field of the
  133. // element type. The element type must be one of the above listed types.
  134. //
  135. // Nullable fields are inferred from the NullXXX types, declared in this package:
  136. //
  137. // STRING NullString
  138. // BOOL NullBool
  139. // INTEGER NullInt64
  140. // FLOAT NullFloat64
  141. // TIMESTAMP NullTimestamp
  142. // DATE NullDate
  143. // TIME NullTime
  144. // DATETIME NullDateTime
  145. //
  146. // For a nullable BYTES field, use the type []byte and tag the field "nullable" (see below).
  147. // For a nullable NUMERIC field, use the type *big.Rat and tag the field "nullable".
  148. //
  149. // A struct field that is of struct type is inferred to be a required field of type
  150. // RECORD with a schema inferred recursively. For backwards compatibility, a field of
  151. // type pointer to struct is also inferred to be required. To get a nullable RECORD
  152. // field, use the "nullable" tag (see below).
  153. //
  154. // InferSchema returns an error if any of the examined fields is of type uint,
  155. // uint64, uintptr, map, interface, complex64, complex128, func, or chan. Future
  156. // versions may handle these cases without error.
  157. //
  158. // Recursively defined structs are also disallowed.
  159. //
  160. // Struct fields may be tagged in a way similar to the encoding/json package.
  161. // A tag of the form
  162. // bigquery:"name"
  163. // uses "name" instead of the struct field name as the BigQuery field name.
  164. // A tag of the form
  165. // bigquery:"-"
  166. // omits the field from the inferred schema.
  167. // The "nullable" option marks the field as nullable (not required). It is only
  168. // needed for []byte, *big.Rat and pointer-to-struct fields, and cannot appear on other
  169. // fields. In this example, the Go name of the field is retained:
  170. // bigquery:",nullable"
  171. func InferSchema(st interface{}) (Schema, error) {
  172. return inferSchemaReflectCached(reflect.TypeOf(st))
  173. }
  174. // TODO(jba): replace with sync.Map for Go 1.9.
  175. var schemaCache atomiccache.Cache
  176. type cacheVal struct {
  177. schema Schema
  178. err error
  179. }
  180. func inferSchemaReflectCached(t reflect.Type) (Schema, error) {
  181. cv := schemaCache.Get(t, func() interface{} {
  182. s, err := inferSchemaReflect(t)
  183. return cacheVal{s, err}
  184. }).(cacheVal)
  185. return cv.schema, cv.err
  186. }
  187. func inferSchemaReflect(t reflect.Type) (Schema, error) {
  188. rec, err := hasRecursiveType(t, nil)
  189. if err != nil {
  190. return nil, err
  191. }
  192. if rec {
  193. return nil, fmt.Errorf("bigquery: schema inference for recursive type %s", t)
  194. }
  195. return inferStruct(t)
  196. }
  197. func inferStruct(t reflect.Type) (Schema, error) {
  198. switch t.Kind() {
  199. case reflect.Ptr:
  200. if t.Elem().Kind() != reflect.Struct {
  201. return nil, errNoStruct
  202. }
  203. t = t.Elem()
  204. fallthrough
  205. case reflect.Struct:
  206. return inferFields(t)
  207. default:
  208. return nil, errNoStruct
  209. }
  210. }
  211. // inferFieldSchema infers the FieldSchema for a Go type
  212. func inferFieldSchema(rt reflect.Type, nullable bool) (*FieldSchema, error) {
  213. // Only []byte and struct pointers can be tagged nullable.
  214. if nullable && !(rt == typeOfByteSlice || rt.Kind() == reflect.Ptr && rt.Elem().Kind() == reflect.Struct) {
  215. return nil, errBadNullable
  216. }
  217. switch rt {
  218. case typeOfByteSlice:
  219. return &FieldSchema{Required: !nullable, Type: BytesFieldType}, nil
  220. case typeOfGoTime:
  221. return &FieldSchema{Required: true, Type: TimestampFieldType}, nil
  222. case typeOfDate:
  223. return &FieldSchema{Required: true, Type: DateFieldType}, nil
  224. case typeOfTime:
  225. return &FieldSchema{Required: true, Type: TimeFieldType}, nil
  226. case typeOfDateTime:
  227. return &FieldSchema{Required: true, Type: DateTimeFieldType}, nil
  228. case typeOfRat:
  229. return &FieldSchema{Required: !nullable, Type: NumericFieldType}, nil
  230. }
  231. if ft := nullableFieldType(rt); ft != "" {
  232. return &FieldSchema{Required: false, Type: ft}, nil
  233. }
  234. if isSupportedIntType(rt) || isSupportedUintType(rt) {
  235. return &FieldSchema{Required: true, Type: IntegerFieldType}, nil
  236. }
  237. switch rt.Kind() {
  238. case reflect.Slice, reflect.Array:
  239. et := rt.Elem()
  240. if et != typeOfByteSlice && (et.Kind() == reflect.Slice || et.Kind() == reflect.Array) {
  241. // Multi dimensional slices/arrays are not supported by BigQuery
  242. return nil, errUnsupportedFieldType
  243. }
  244. if nullableFieldType(et) != "" {
  245. // Repeated nullable types are not supported by BigQuery.
  246. return nil, errUnsupportedFieldType
  247. }
  248. f, err := inferFieldSchema(et, false)
  249. if err != nil {
  250. return nil, err
  251. }
  252. f.Repeated = true
  253. f.Required = false
  254. return f, nil
  255. case reflect.Ptr:
  256. if rt.Elem().Kind() != reflect.Struct {
  257. return nil, errUnsupportedFieldType
  258. }
  259. fallthrough
  260. case reflect.Struct:
  261. nested, err := inferStruct(rt)
  262. if err != nil {
  263. return nil, err
  264. }
  265. return &FieldSchema{Required: !nullable, Type: RecordFieldType, Schema: nested}, nil
  266. case reflect.String:
  267. return &FieldSchema{Required: !nullable, Type: StringFieldType}, nil
  268. case reflect.Bool:
  269. return &FieldSchema{Required: !nullable, Type: BooleanFieldType}, nil
  270. case reflect.Float32, reflect.Float64:
  271. return &FieldSchema{Required: !nullable, Type: FloatFieldType}, nil
  272. default:
  273. return nil, errUnsupportedFieldType
  274. }
  275. }
  276. // inferFields extracts all exported field types from struct type.
  277. func inferFields(rt reflect.Type) (Schema, error) {
  278. var s Schema
  279. fields, err := fieldCache.Fields(rt)
  280. if err != nil {
  281. return nil, err
  282. }
  283. for _, field := range fields {
  284. var nullable bool
  285. for _, opt := range field.ParsedTag.([]string) {
  286. if opt == nullableTagOption {
  287. nullable = true
  288. break
  289. }
  290. }
  291. f, err := inferFieldSchema(field.Type, nullable)
  292. if err != nil {
  293. return nil, err
  294. }
  295. f.Name = field.Name
  296. s = append(s, f)
  297. }
  298. return s, nil
  299. }
  300. // isSupportedIntType reports whether t is an int type that can be properly
  301. // represented by the BigQuery INTEGER/INT64 type.
  302. func isSupportedIntType(t reflect.Type) bool {
  303. switch t.Kind() {
  304. case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
  305. return true
  306. default:
  307. return false
  308. }
  309. }
  310. // isSupportedIntType reports whether t is a uint type that can be properly
  311. // represented by the BigQuery INTEGER/INT64 type.
  312. func isSupportedUintType(t reflect.Type) bool {
  313. switch t.Kind() {
  314. case reflect.Uint8, reflect.Uint16, reflect.Uint32:
  315. return true
  316. default:
  317. return false
  318. }
  319. }
  320. // typeList is a linked list of reflect.Types.
  321. type typeList struct {
  322. t reflect.Type
  323. next *typeList
  324. }
  325. func (l *typeList) has(t reflect.Type) bool {
  326. for l != nil {
  327. if l.t == t {
  328. return true
  329. }
  330. l = l.next
  331. }
  332. return false
  333. }
  334. // hasRecursiveType reports whether t or any type inside t refers to itself, directly or indirectly,
  335. // via exported fields. (Schema inference ignores unexported fields.)
  336. func hasRecursiveType(t reflect.Type, seen *typeList) (bool, error) {
  337. for t.Kind() == reflect.Ptr || t.Kind() == reflect.Slice || t.Kind() == reflect.Array {
  338. t = t.Elem()
  339. }
  340. if t.Kind() != reflect.Struct {
  341. return false, nil
  342. }
  343. if seen.has(t) {
  344. return true, nil
  345. }
  346. fields, err := fieldCache.Fields(t)
  347. if err != nil {
  348. return false, err
  349. }
  350. seen = &typeList{t, seen}
  351. // Because seen is a linked list, additions to it from one field's
  352. // recursive call will not affect the value for subsequent fields' calls.
  353. for _, field := range fields {
  354. ok, err := hasRecursiveType(field.Type, seen)
  355. if err != nil {
  356. return false, err
  357. }
  358. if ok {
  359. return true, nil
  360. }
  361. }
  362. return false, nil
  363. }