You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

250 lines
6.4 KiB

  1. /*
  2. Copyright 2016 Google LLC
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package bigtable
  14. import (
  15. "bytes"
  16. "fmt"
  17. btpb "google.golang.org/genproto/googleapis/bigtable/v2"
  18. )
  19. // A Row is returned by ReadRows. The map is keyed by column family (the prefix
  20. // of the column name before the colon). The values are the returned ReadItems
  21. // for that column family in the order returned by Read.
  22. type Row map[string][]ReadItem
  23. // Key returns the row's key, or "" if the row is empty.
  24. func (r Row) Key() string {
  25. for _, items := range r {
  26. if len(items) > 0 {
  27. return items[0].Row
  28. }
  29. }
  30. return ""
  31. }
  32. // A ReadItem is returned by Read. A ReadItem contains data from a specific row and column.
  33. type ReadItem struct {
  34. Row, Column string
  35. Timestamp Timestamp
  36. Value []byte
  37. }
  38. // The current state of the read rows state machine.
  39. type rrState int64
  40. const (
  41. newRow rrState = iota
  42. rowInProgress
  43. cellInProgress
  44. )
  45. // chunkReader handles cell chunks from the read rows response and combines
  46. // them into full Rows.
  47. type chunkReader struct {
  48. state rrState
  49. curKey []byte
  50. curFam string
  51. curQual []byte
  52. curTS int64
  53. curVal []byte
  54. curRow Row
  55. lastKey string
  56. }
  57. // newChunkReader returns a new chunkReader for handling read rows responses.
  58. func newChunkReader() *chunkReader {
  59. return &chunkReader{state: newRow}
  60. }
  61. // Process takes a cell chunk and returns a new Row if the given chunk
  62. // completes a Row, or nil otherwise.
  63. func (cr *chunkReader) Process(cc *btpb.ReadRowsResponse_CellChunk) (Row, error) {
  64. var row Row
  65. switch cr.state {
  66. case newRow:
  67. if err := cr.validateNewRow(cc); err != nil {
  68. return nil, err
  69. }
  70. cr.curRow = make(Row)
  71. cr.curKey = cc.RowKey
  72. cr.curFam = cc.FamilyName.Value
  73. cr.curQual = cc.Qualifier.Value
  74. cr.curTS = cc.TimestampMicros
  75. row = cr.handleCellValue(cc)
  76. case rowInProgress:
  77. if err := cr.validateRowInProgress(cc); err != nil {
  78. return nil, err
  79. }
  80. if cc.GetResetRow() {
  81. cr.resetToNewRow()
  82. return nil, nil
  83. }
  84. if cc.FamilyName != nil {
  85. cr.curFam = cc.FamilyName.Value
  86. }
  87. if cc.Qualifier != nil {
  88. cr.curQual = cc.Qualifier.Value
  89. }
  90. cr.curTS = cc.TimestampMicros
  91. row = cr.handleCellValue(cc)
  92. case cellInProgress:
  93. if err := cr.validateCellInProgress(cc); err != nil {
  94. return nil, err
  95. }
  96. if cc.GetResetRow() {
  97. cr.resetToNewRow()
  98. return nil, nil
  99. }
  100. row = cr.handleCellValue(cc)
  101. }
  102. return row, nil
  103. }
  104. // Close must be called after all cell chunks from the response
  105. // have been processed. An error will be returned if the reader is
  106. // in an invalid state, in which case the error should be propagated to the caller.
  107. func (cr *chunkReader) Close() error {
  108. if cr.state != newRow {
  109. return fmt.Errorf("invalid state for end of stream %q", cr.state)
  110. }
  111. return nil
  112. }
  113. // handleCellValue returns a Row if the cell value includes a commit, otherwise nil.
  114. func (cr *chunkReader) handleCellValue(cc *btpb.ReadRowsResponse_CellChunk) Row {
  115. if cc.ValueSize > 0 {
  116. // ValueSize is specified so expect a split value of ValueSize bytes
  117. if cr.curVal == nil {
  118. cr.curVal = make([]byte, 0, cc.ValueSize)
  119. }
  120. cr.curVal = append(cr.curVal, cc.Value...)
  121. cr.state = cellInProgress
  122. } else {
  123. // This cell is either the complete value or the last chunk of a split
  124. if cr.curVal == nil {
  125. cr.curVal = cc.Value
  126. } else {
  127. cr.curVal = append(cr.curVal, cc.Value...)
  128. }
  129. cr.finishCell()
  130. if cc.GetCommitRow() {
  131. return cr.commitRow()
  132. }
  133. cr.state = rowInProgress
  134. }
  135. return nil
  136. }
  137. func (cr *chunkReader) finishCell() {
  138. ri := ReadItem{
  139. Row: string(cr.curKey),
  140. Column: string(cr.curFam) + ":" + string(cr.curQual),
  141. Timestamp: Timestamp(cr.curTS),
  142. Value: cr.curVal,
  143. }
  144. cr.curRow[cr.curFam] = append(cr.curRow[cr.curFam], ri)
  145. cr.curVal = nil
  146. }
  147. func (cr *chunkReader) commitRow() Row {
  148. row := cr.curRow
  149. cr.lastKey = cr.curRow.Key()
  150. cr.resetToNewRow()
  151. return row
  152. }
  153. func (cr *chunkReader) resetToNewRow() {
  154. cr.curKey = nil
  155. cr.curFam = ""
  156. cr.curQual = nil
  157. cr.curVal = nil
  158. cr.curRow = nil
  159. cr.curTS = 0
  160. cr.state = newRow
  161. }
  162. func (cr *chunkReader) validateNewRow(cc *btpb.ReadRowsResponse_CellChunk) error {
  163. if cc.GetResetRow() {
  164. return fmt.Errorf("reset_row not allowed between rows")
  165. }
  166. if cc.RowKey == nil || cc.FamilyName == nil || cc.Qualifier == nil {
  167. return fmt.Errorf("missing key field for new row %v", cc)
  168. }
  169. if cr.lastKey != "" && cr.lastKey >= string(cc.RowKey) {
  170. return fmt.Errorf("out of order row key: %q, %q", cr.lastKey, string(cc.RowKey))
  171. }
  172. return nil
  173. }
  174. func (cr *chunkReader) validateRowInProgress(cc *btpb.ReadRowsResponse_CellChunk) error {
  175. if err := cr.validateRowStatus(cc); err != nil {
  176. return err
  177. }
  178. if cc.RowKey != nil && !bytes.Equal(cc.RowKey, cr.curKey) {
  179. return fmt.Errorf("received new row key %q during existing row %q", cc.RowKey, cr.curKey)
  180. }
  181. if cc.FamilyName != nil && cc.Qualifier == nil {
  182. return fmt.Errorf("family name %q specified without a qualifier", cc.FamilyName)
  183. }
  184. return nil
  185. }
  186. func (cr *chunkReader) validateCellInProgress(cc *btpb.ReadRowsResponse_CellChunk) error {
  187. if err := cr.validateRowStatus(cc); err != nil {
  188. return err
  189. }
  190. if cr.curVal == nil {
  191. return fmt.Errorf("no cached cell while CELL_IN_PROGRESS %v", cc)
  192. }
  193. if cc.GetResetRow() == false && cr.isAnyKeyPresent(cc) {
  194. return fmt.Errorf("cell key components found while CELL_IN_PROGRESS %v", cc)
  195. }
  196. return nil
  197. }
  198. func (cr *chunkReader) isAnyKeyPresent(cc *btpb.ReadRowsResponse_CellChunk) bool {
  199. return cc.RowKey != nil ||
  200. cc.FamilyName != nil ||
  201. cc.Qualifier != nil ||
  202. cc.TimestampMicros != 0
  203. }
  204. // Validate a RowStatus, commit or reset, if present.
  205. func (cr *chunkReader) validateRowStatus(cc *btpb.ReadRowsResponse_CellChunk) error {
  206. // Resets can't be specified with any other part of a cell
  207. if cc.GetResetRow() && (cr.isAnyKeyPresent(cc) ||
  208. cc.Value != nil ||
  209. cc.ValueSize != 0 ||
  210. cc.Labels != nil) {
  211. return fmt.Errorf("reset must not be specified with other fields %v", cc)
  212. }
  213. if cc.GetCommitRow() && cc.ValueSize > 0 {
  214. return fmt.Errorf("commit row found in between chunks in a cell")
  215. }
  216. return nil
  217. }