You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1332 lines
36 KiB

  1. /*
  2. Copyright 2015 Google LLC
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. /*
  14. Package bttest contains test helpers for working with the bigtable package.
  15. To use a Server, create it, and then connect to it with no security:
  16. (The project/instance values are ignored.)
  17. srv, err := bttest.NewServer("localhost:0")
  18. ...
  19. conn, err := grpc.Dial(srv.Addr, grpc.WithInsecure())
  20. ...
  21. client, err := bigtable.NewClient(ctx, proj, instance,
  22. option.WithGRPCConn(conn))
  23. ...
  24. */
  25. package bttest // import "cloud.google.com/go/bigtable/bttest"
  26. import (
  27. "encoding/binary"
  28. "fmt"
  29. "log"
  30. "math/rand"
  31. "net"
  32. "regexp"
  33. "sort"
  34. "strings"
  35. "sync"
  36. "time"
  37. "bytes"
  38. emptypb "github.com/golang/protobuf/ptypes/empty"
  39. "github.com/golang/protobuf/ptypes/wrappers"
  40. "github.com/google/btree"
  41. "golang.org/x/net/context"
  42. btapb "google.golang.org/genproto/googleapis/bigtable/admin/v2"
  43. btpb "google.golang.org/genproto/googleapis/bigtable/v2"
  44. statpb "google.golang.org/genproto/googleapis/rpc/status"
  45. "google.golang.org/grpc"
  46. "google.golang.org/grpc/codes"
  47. "google.golang.org/grpc/status"
  48. )
  49. const (
  50. // MilliSeconds field of the minimum valid Timestamp.
  51. minValidMilliSeconds = 0
  52. // MilliSeconds field of the max valid Timestamp.
  53. maxValidMilliSeconds = int64(time.Millisecond) * 253402300800
  54. )
  55. // Server is an in-memory Cloud Bigtable fake.
  56. // It is unauthenticated, and only a rough approximation.
  57. type Server struct {
  58. Addr string
  59. l net.Listener
  60. srv *grpc.Server
  61. s *server
  62. }
  63. // server is the real implementation of the fake.
  64. // It is a separate and unexported type so the API won't be cluttered with
  65. // methods that are only relevant to the fake's implementation.
  66. type server struct {
  67. mu sync.Mutex
  68. tables map[string]*table // keyed by fully qualified name
  69. gcc chan int // set when gcloop starts, closed when server shuts down
  70. // Any unimplemented methods will cause a panic.
  71. btapb.BigtableTableAdminServer
  72. btpb.BigtableServer
  73. }
  74. // NewServer creates a new Server.
  75. // The Server will be listening for gRPC connections, without TLS,
  76. // on the provided address. The resolved address is named by the Addr field.
  77. func NewServer(laddr string, opt ...grpc.ServerOption) (*Server, error) {
  78. l, err := net.Listen("tcp", laddr)
  79. if err != nil {
  80. return nil, err
  81. }
  82. s := &Server{
  83. Addr: l.Addr().String(),
  84. l: l,
  85. srv: grpc.NewServer(opt...),
  86. s: &server{
  87. tables: make(map[string]*table),
  88. },
  89. }
  90. btapb.RegisterBigtableTableAdminServer(s.srv, s.s)
  91. btpb.RegisterBigtableServer(s.srv, s.s)
  92. go s.srv.Serve(s.l)
  93. return s, nil
  94. }
  95. // Close shuts down the server.
  96. func (s *Server) Close() {
  97. s.s.mu.Lock()
  98. if s.s.gcc != nil {
  99. close(s.s.gcc)
  100. }
  101. s.s.mu.Unlock()
  102. s.srv.Stop()
  103. s.l.Close()
  104. }
  105. func (s *server) CreateTable(ctx context.Context, req *btapb.CreateTableRequest) (*btapb.Table, error) {
  106. tbl := req.Parent + "/tables/" + req.TableId
  107. s.mu.Lock()
  108. if _, ok := s.tables[tbl]; ok {
  109. s.mu.Unlock()
  110. return nil, status.Errorf(codes.AlreadyExists, "table %q already exists", tbl)
  111. }
  112. s.tables[tbl] = newTable(req)
  113. s.mu.Unlock()
  114. return &btapb.Table{Name: tbl}, nil
  115. }
  116. func (s *server) ListTables(ctx context.Context, req *btapb.ListTablesRequest) (*btapb.ListTablesResponse, error) {
  117. res := &btapb.ListTablesResponse{}
  118. prefix := req.Parent + "/tables/"
  119. s.mu.Lock()
  120. for tbl := range s.tables {
  121. if strings.HasPrefix(tbl, prefix) {
  122. res.Tables = append(res.Tables, &btapb.Table{Name: tbl})
  123. }
  124. }
  125. s.mu.Unlock()
  126. return res, nil
  127. }
  128. func (s *server) GetTable(ctx context.Context, req *btapb.GetTableRequest) (*btapb.Table, error) {
  129. tbl := req.Name
  130. s.mu.Lock()
  131. tblIns, ok := s.tables[tbl]
  132. s.mu.Unlock()
  133. if !ok {
  134. return nil, status.Errorf(codes.NotFound, "table %q not found", tbl)
  135. }
  136. return &btapb.Table{
  137. Name: tbl,
  138. ColumnFamilies: toColumnFamilies(tblIns.columnFamilies()),
  139. }, nil
  140. }
  141. func (s *server) DeleteTable(ctx context.Context, req *btapb.DeleteTableRequest) (*emptypb.Empty, error) {
  142. s.mu.Lock()
  143. defer s.mu.Unlock()
  144. if _, ok := s.tables[req.Name]; !ok {
  145. return nil, status.Errorf(codes.NotFound, "table %q not found", req.Name)
  146. }
  147. delete(s.tables, req.Name)
  148. return &emptypb.Empty{}, nil
  149. }
  150. func (s *server) ModifyColumnFamilies(ctx context.Context, req *btapb.ModifyColumnFamiliesRequest) (*btapb.Table, error) {
  151. tblName := req.Name[strings.LastIndex(req.Name, "/")+1:]
  152. s.mu.Lock()
  153. tbl, ok := s.tables[req.Name]
  154. s.mu.Unlock()
  155. if !ok {
  156. return nil, status.Errorf(codes.NotFound, "table %q not found", req.Name)
  157. }
  158. tbl.mu.Lock()
  159. defer tbl.mu.Unlock()
  160. for _, mod := range req.Modifications {
  161. if create := mod.GetCreate(); create != nil {
  162. if _, ok := tbl.families[mod.Id]; ok {
  163. return nil, status.Errorf(codes.AlreadyExists, "family %q already exists", mod.Id)
  164. }
  165. newcf := &columnFamily{
  166. name: req.Name + "/columnFamilies/" + mod.Id,
  167. order: tbl.counter,
  168. gcRule: create.GcRule,
  169. }
  170. tbl.counter++
  171. tbl.families[mod.Id] = newcf
  172. } else if mod.GetDrop() {
  173. if _, ok := tbl.families[mod.Id]; !ok {
  174. return nil, fmt.Errorf("can't delete unknown family %q", mod.Id)
  175. }
  176. delete(tbl.families, mod.Id)
  177. } else if modify := mod.GetUpdate(); modify != nil {
  178. if _, ok := tbl.families[mod.Id]; !ok {
  179. return nil, fmt.Errorf("no such family %q", mod.Id)
  180. }
  181. newcf := &columnFamily{
  182. name: req.Name + "/columnFamilies/" + mod.Id,
  183. gcRule: modify.GcRule,
  184. }
  185. // assume that we ALWAYS want to replace by the new setting
  186. // we may need partial update through
  187. tbl.families[mod.Id] = newcf
  188. }
  189. }
  190. s.needGC()
  191. return &btapb.Table{
  192. Name: tblName,
  193. ColumnFamilies: toColumnFamilies(tbl.families),
  194. Granularity: btapb.Table_TimestampGranularity(btapb.Table_MILLIS),
  195. }, nil
  196. }
  197. func (s *server) DropRowRange(ctx context.Context, req *btapb.DropRowRangeRequest) (*emptypb.Empty, error) {
  198. s.mu.Lock()
  199. defer s.mu.Unlock()
  200. tbl, ok := s.tables[req.Name]
  201. if !ok {
  202. return nil, status.Errorf(codes.NotFound, "table %q not found", req.Name)
  203. }
  204. if req.GetDeleteAllDataFromTable() {
  205. tbl.rows = btree.New(btreeDegree)
  206. } else {
  207. // Delete rows by prefix.
  208. prefixBytes := req.GetRowKeyPrefix()
  209. if prefixBytes == nil {
  210. return nil, fmt.Errorf("missing row key prefix")
  211. }
  212. prefix := string(prefixBytes)
  213. // The BTree does not specify what happens if rows are deleted during
  214. // iteration, and it provides no "delete range" method.
  215. // So we collect the rows first, then delete them one by one.
  216. var rowsToDelete []*row
  217. tbl.rows.AscendGreaterOrEqual(btreeKey(prefix), func(i btree.Item) bool {
  218. r := i.(*row)
  219. if strings.HasPrefix(r.key, prefix) {
  220. rowsToDelete = append(rowsToDelete, r)
  221. return true
  222. } else {
  223. return false // stop iteration
  224. }
  225. })
  226. for _, r := range rowsToDelete {
  227. tbl.rows.Delete(r)
  228. }
  229. }
  230. return &emptypb.Empty{}, nil
  231. }
  232. // This is a private alpha release of Cloud Bigtable replication. This feature
  233. // is not currently available to most Cloud Bigtable customers. This feature
  234. // might be changed in backward-incompatible ways and is not recommended for
  235. // production use. It is not subject to any SLA or deprecation policy.
  236. func (s *server) GenerateConsistencyToken(ctx context.Context, req *btapb.GenerateConsistencyTokenRequest) (*btapb.GenerateConsistencyTokenResponse, error) {
  237. // Check that the table exists.
  238. _, ok := s.tables[req.Name]
  239. if !ok {
  240. return nil, status.Errorf(codes.NotFound, "table %q not found", req.Name)
  241. }
  242. return &btapb.GenerateConsistencyTokenResponse{
  243. ConsistencyToken: "TokenFor-" + req.Name,
  244. }, nil
  245. }
  246. // This is a private alpha release of Cloud Bigtable replication. This feature
  247. // is not currently available to most Cloud Bigtable customers. This feature
  248. // might be changed in backward-incompatible ways and is not recommended for
  249. // production use. It is not subject to any SLA or deprecation policy.
  250. func (s *server) CheckConsistency(ctx context.Context, req *btapb.CheckConsistencyRequest) (*btapb.CheckConsistencyResponse, error) {
  251. // Check that the table exists.
  252. _, ok := s.tables[req.Name]
  253. if !ok {
  254. return nil, status.Errorf(codes.NotFound, "table %q not found", req.Name)
  255. }
  256. // Check this is the right token.
  257. if req.ConsistencyToken != "TokenFor-"+req.Name {
  258. return nil, status.Errorf(codes.InvalidArgument, "token %q not valid", req.ConsistencyToken)
  259. }
  260. // Single cluster instances are always consistent.
  261. return &btapb.CheckConsistencyResponse{
  262. Consistent: true,
  263. }, nil
  264. }
  265. func (s *server) ReadRows(req *btpb.ReadRowsRequest, stream btpb.Bigtable_ReadRowsServer) error {
  266. s.mu.Lock()
  267. tbl, ok := s.tables[req.TableName]
  268. s.mu.Unlock()
  269. if !ok {
  270. return status.Errorf(codes.NotFound, "table %q not found", req.TableName)
  271. }
  272. // Rows to read can be specified by a set of row keys and/or a set of row ranges.
  273. // Output is a stream of sorted, de-duped rows.
  274. tbl.mu.RLock()
  275. rowSet := make(map[string]*row)
  276. addRow := func(i btree.Item) bool {
  277. r := i.(*row)
  278. rowSet[r.key] = r
  279. return true
  280. }
  281. if req.Rows != nil &&
  282. len(req.Rows.RowKeys)+len(req.Rows.RowRanges) > 0 {
  283. // Add the explicitly given keys
  284. for _, key := range req.Rows.RowKeys {
  285. k := string(key)
  286. if i := tbl.rows.Get(btreeKey(k)); i != nil {
  287. addRow(i)
  288. }
  289. }
  290. // Add keys from row ranges
  291. for _, rr := range req.Rows.RowRanges {
  292. var start, end string
  293. switch sk := rr.StartKey.(type) {
  294. case *btpb.RowRange_StartKeyClosed:
  295. start = string(sk.StartKeyClosed)
  296. case *btpb.RowRange_StartKeyOpen:
  297. start = string(sk.StartKeyOpen) + "\x00"
  298. }
  299. switch ek := rr.EndKey.(type) {
  300. case *btpb.RowRange_EndKeyClosed:
  301. end = string(ek.EndKeyClosed) + "\x00"
  302. case *btpb.RowRange_EndKeyOpen:
  303. end = string(ek.EndKeyOpen)
  304. }
  305. switch {
  306. case start == "" && end == "":
  307. tbl.rows.Ascend(addRow) // all rows
  308. case start == "":
  309. tbl.rows.AscendLessThan(btreeKey(end), addRow)
  310. case end == "":
  311. tbl.rows.AscendGreaterOrEqual(btreeKey(start), addRow)
  312. default:
  313. tbl.rows.AscendRange(btreeKey(start), btreeKey(end), addRow)
  314. }
  315. }
  316. } else {
  317. // Read all rows
  318. tbl.rows.Ascend(addRow)
  319. }
  320. tbl.mu.RUnlock()
  321. rows := make([]*row, 0, len(rowSet))
  322. for _, r := range rowSet {
  323. rows = append(rows, r)
  324. }
  325. sort.Sort(byRowKey(rows))
  326. limit := int(req.RowsLimit)
  327. count := 0
  328. for _, r := range rows {
  329. if limit > 0 && count >= limit {
  330. return nil
  331. }
  332. streamed, err := streamRow(stream, r, req.Filter)
  333. if err != nil {
  334. return err
  335. }
  336. if streamed {
  337. count++
  338. }
  339. }
  340. return nil
  341. }
  342. // streamRow filters the given row and sends it via the given stream.
  343. // Returns true if at least one cell matched the filter and was streamed, false otherwise.
  344. func streamRow(stream btpb.Bigtable_ReadRowsServer, r *row, f *btpb.RowFilter) (bool, error) {
  345. r.mu.Lock()
  346. nr := r.copy()
  347. r.mu.Unlock()
  348. r = nr
  349. if !filterRow(f, r) {
  350. return false, nil
  351. }
  352. rrr := &btpb.ReadRowsResponse{}
  353. families := r.sortedFamilies()
  354. for _, fam := range families {
  355. for _, colName := range fam.colNames {
  356. cells := fam.cells[colName]
  357. if len(cells) == 0 {
  358. continue
  359. }
  360. // TODO(dsymonds): Apply transformers.
  361. for _, cell := range cells {
  362. rrr.Chunks = append(rrr.Chunks, &btpb.ReadRowsResponse_CellChunk{
  363. RowKey: []byte(r.key),
  364. FamilyName: &wrappers.StringValue{Value: fam.name},
  365. Qualifier: &wrappers.BytesValue{Value: []byte(colName)},
  366. TimestampMicros: cell.ts,
  367. Value: cell.value,
  368. })
  369. }
  370. }
  371. }
  372. // We can't have a cell with just COMMIT set, which would imply a new empty cell.
  373. // So modify the last cell to have the COMMIT flag set.
  374. if len(rrr.Chunks) > 0 {
  375. rrr.Chunks[len(rrr.Chunks)-1].RowStatus = &btpb.ReadRowsResponse_CellChunk_CommitRow{CommitRow: true}
  376. }
  377. return true, stream.Send(rrr)
  378. }
  379. // filterRow modifies a row with the given filter. Returns true if at least one cell from the row matches,
  380. // false otherwise.
  381. func filterRow(f *btpb.RowFilter, r *row) bool {
  382. if f == nil {
  383. return true
  384. }
  385. // Handle filters that apply beyond just including/excluding cells.
  386. switch f := f.Filter.(type) {
  387. case *btpb.RowFilter_BlockAllFilter:
  388. return !f.BlockAllFilter
  389. case *btpb.RowFilter_PassAllFilter:
  390. return f.PassAllFilter
  391. case *btpb.RowFilter_Chain_:
  392. for _, sub := range f.Chain.Filters {
  393. if !filterRow(sub, r) {
  394. return false
  395. }
  396. }
  397. return true
  398. case *btpb.RowFilter_Interleave_:
  399. srs := make([]*row, 0, len(f.Interleave.Filters))
  400. for _, sub := range f.Interleave.Filters {
  401. sr := r.copy()
  402. filterRow(sub, sr)
  403. srs = append(srs, sr)
  404. }
  405. // merge
  406. // TODO(dsymonds): is this correct?
  407. r.families = make(map[string]*family)
  408. for _, sr := range srs {
  409. for _, fam := range sr.families {
  410. f := r.getOrCreateFamily(fam.name, fam.order)
  411. for colName, cs := range fam.cells {
  412. f.cells[colName] = append(f.cellsByColumn(colName), cs...)
  413. }
  414. }
  415. }
  416. var count int
  417. for _, fam := range r.families {
  418. for _, cs := range fam.cells {
  419. sort.Sort(byDescTS(cs))
  420. count += len(cs)
  421. }
  422. }
  423. return count > 0
  424. case *btpb.RowFilter_CellsPerColumnLimitFilter:
  425. lim := int(f.CellsPerColumnLimitFilter)
  426. for _, fam := range r.families {
  427. for col, cs := range fam.cells {
  428. if len(cs) > lim {
  429. fam.cells[col] = cs[:lim]
  430. }
  431. }
  432. }
  433. return true
  434. case *btpb.RowFilter_Condition_:
  435. if filterRow(f.Condition.PredicateFilter, r.copy()) {
  436. if f.Condition.TrueFilter == nil {
  437. return false
  438. }
  439. return filterRow(f.Condition.TrueFilter, r)
  440. }
  441. if f.Condition.FalseFilter == nil {
  442. return false
  443. }
  444. return filterRow(f.Condition.FalseFilter, r)
  445. case *btpb.RowFilter_RowKeyRegexFilter:
  446. pat := string(f.RowKeyRegexFilter)
  447. rx, err := regexp.Compile(pat)
  448. if err != nil {
  449. log.Printf("Bad rowkey_regex_filter pattern %q: %v", pat, err)
  450. return false
  451. }
  452. if !rx.MatchString(r.key) {
  453. return false
  454. }
  455. case *btpb.RowFilter_CellsPerRowLimitFilter:
  456. // Grab the first n cells in the row.
  457. lim := int(f.CellsPerRowLimitFilter)
  458. for _, fam := range r.families {
  459. for _, col := range fam.colNames {
  460. cs := fam.cells[col]
  461. if len(cs) > lim {
  462. fam.cells[col] = cs[:lim]
  463. lim = 0
  464. } else {
  465. lim -= len(cs)
  466. }
  467. }
  468. }
  469. return true
  470. case *btpb.RowFilter_CellsPerRowOffsetFilter:
  471. // Skip the first n cells in the row.
  472. offset := int(f.CellsPerRowOffsetFilter)
  473. for _, fam := range r.families {
  474. for _, col := range fam.colNames {
  475. cs := fam.cells[col]
  476. if len(cs) > offset {
  477. fam.cells[col] = cs[offset:]
  478. offset = 0
  479. return true
  480. } else {
  481. fam.cells[col] = cs[:0]
  482. offset -= len(cs)
  483. }
  484. }
  485. }
  486. return true
  487. }
  488. // Any other case, operate on a per-cell basis.
  489. cellCount := 0
  490. for _, fam := range r.families {
  491. for colName, cs := range fam.cells {
  492. fam.cells[colName] = filterCells(f, fam.name, colName, cs)
  493. cellCount += len(fam.cells[colName])
  494. }
  495. }
  496. return cellCount > 0
  497. }
  498. func filterCells(f *btpb.RowFilter, fam, col string, cs []cell) []cell {
  499. var ret []cell
  500. for _, cell := range cs {
  501. if includeCell(f, fam, col, cell) {
  502. cell = modifyCell(f, cell)
  503. ret = append(ret, cell)
  504. }
  505. }
  506. return ret
  507. }
  508. func modifyCell(f *btpb.RowFilter, c cell) cell {
  509. if f == nil {
  510. return c
  511. }
  512. // Consider filters that may modify the cell contents
  513. switch f.Filter.(type) {
  514. case *btpb.RowFilter_StripValueTransformer:
  515. return cell{ts: c.ts}
  516. default:
  517. return c
  518. }
  519. }
  520. func includeCell(f *btpb.RowFilter, fam, col string, cell cell) bool {
  521. if f == nil {
  522. return true
  523. }
  524. // TODO(dsymonds): Implement many more filters.
  525. switch f := f.Filter.(type) {
  526. case *btpb.RowFilter_CellsPerColumnLimitFilter:
  527. // Don't log, row-level filter
  528. return true
  529. case *btpb.RowFilter_RowKeyRegexFilter:
  530. // Don't log, row-level filter
  531. return true
  532. case *btpb.RowFilter_StripValueTransformer:
  533. // Don't log, cell-modifying filter
  534. return true
  535. default:
  536. log.Printf("WARNING: don't know how to handle filter of type %T (ignoring it)", f)
  537. return true
  538. case *btpb.RowFilter_FamilyNameRegexFilter:
  539. pat := string(f.FamilyNameRegexFilter)
  540. rx, err := regexp.Compile(pat)
  541. if err != nil {
  542. log.Printf("Bad family_name_regex_filter pattern %q: %v", pat, err)
  543. return false
  544. }
  545. return rx.MatchString(fam)
  546. case *btpb.RowFilter_ColumnQualifierRegexFilter:
  547. pat := string(f.ColumnQualifierRegexFilter)
  548. rx, err := regexp.Compile(pat)
  549. if err != nil {
  550. log.Printf("Bad column_qualifier_regex_filter pattern %q: %v", pat, err)
  551. return false
  552. }
  553. return rx.MatchString(col)
  554. case *btpb.RowFilter_ValueRegexFilter:
  555. pat := string(f.ValueRegexFilter)
  556. rx, err := regexp.Compile(pat)
  557. if err != nil {
  558. log.Printf("Bad value_regex_filter pattern %q: %v", pat, err)
  559. return false
  560. }
  561. return rx.Match(cell.value)
  562. case *btpb.RowFilter_ColumnRangeFilter:
  563. if fam != f.ColumnRangeFilter.FamilyName {
  564. return false
  565. }
  566. // Start qualifier defaults to empty string closed
  567. inRangeStart := func() bool { return col >= "" }
  568. switch sq := f.ColumnRangeFilter.StartQualifier.(type) {
  569. case *btpb.ColumnRange_StartQualifierOpen:
  570. inRangeStart = func() bool { return col > string(sq.StartQualifierOpen) }
  571. case *btpb.ColumnRange_StartQualifierClosed:
  572. inRangeStart = func() bool { return col >= string(sq.StartQualifierClosed) }
  573. }
  574. // End qualifier defaults to no upper boundary
  575. inRangeEnd := func() bool { return true }
  576. switch eq := f.ColumnRangeFilter.EndQualifier.(type) {
  577. case *btpb.ColumnRange_EndQualifierClosed:
  578. inRangeEnd = func() bool { return col <= string(eq.EndQualifierClosed) }
  579. case *btpb.ColumnRange_EndQualifierOpen:
  580. inRangeEnd = func() bool { return col < string(eq.EndQualifierOpen) }
  581. }
  582. return inRangeStart() && inRangeEnd()
  583. case *btpb.RowFilter_TimestampRangeFilter:
  584. // Lower bound is inclusive and defaults to 0, upper bound is exclusive and defaults to infinity.
  585. return cell.ts >= f.TimestampRangeFilter.StartTimestampMicros &&
  586. (f.TimestampRangeFilter.EndTimestampMicros == 0 || cell.ts < f.TimestampRangeFilter.EndTimestampMicros)
  587. case *btpb.RowFilter_ValueRangeFilter:
  588. v := cell.value
  589. // Start value defaults to empty string closed
  590. inRangeStart := func() bool { return bytes.Compare(v, []byte{}) >= 0 }
  591. switch sv := f.ValueRangeFilter.StartValue.(type) {
  592. case *btpb.ValueRange_StartValueOpen:
  593. inRangeStart = func() bool { return bytes.Compare(v, sv.StartValueOpen) > 0 }
  594. case *btpb.ValueRange_StartValueClosed:
  595. inRangeStart = func() bool { return bytes.Compare(v, sv.StartValueClosed) >= 0 }
  596. }
  597. // End value defaults to no upper boundary
  598. inRangeEnd := func() bool { return true }
  599. switch ev := f.ValueRangeFilter.EndValue.(type) {
  600. case *btpb.ValueRange_EndValueClosed:
  601. inRangeEnd = func() bool { return bytes.Compare(v, ev.EndValueClosed) <= 0 }
  602. case *btpb.ValueRange_EndValueOpen:
  603. inRangeEnd = func() bool { return bytes.Compare(v, ev.EndValueOpen) < 0 }
  604. }
  605. return inRangeStart() && inRangeEnd()
  606. }
  607. }
  608. func (s *server) MutateRow(ctx context.Context, req *btpb.MutateRowRequest) (*btpb.MutateRowResponse, error) {
  609. s.mu.Lock()
  610. tbl, ok := s.tables[req.TableName]
  611. s.mu.Unlock()
  612. if !ok {
  613. return nil, status.Errorf(codes.NotFound, "table %q not found", req.TableName)
  614. }
  615. fs := tbl.columnFamilies()
  616. r := tbl.mutableRow(string(req.RowKey))
  617. r.mu.Lock()
  618. defer r.mu.Unlock()
  619. if err := applyMutations(tbl, r, req.Mutations, fs); err != nil {
  620. return nil, err
  621. }
  622. return &btpb.MutateRowResponse{}, nil
  623. }
  624. func (s *server) MutateRows(req *btpb.MutateRowsRequest, stream btpb.Bigtable_MutateRowsServer) error {
  625. s.mu.Lock()
  626. tbl, ok := s.tables[req.TableName]
  627. s.mu.Unlock()
  628. if !ok {
  629. return status.Errorf(codes.NotFound, "table %q not found", req.TableName)
  630. }
  631. res := &btpb.MutateRowsResponse{Entries: make([]*btpb.MutateRowsResponse_Entry, len(req.Entries))}
  632. fs := tbl.columnFamilies()
  633. for i, entry := range req.Entries {
  634. r := tbl.mutableRow(string(entry.RowKey))
  635. r.mu.Lock()
  636. code, msg := int32(codes.OK), ""
  637. if err := applyMutations(tbl, r, entry.Mutations, fs); err != nil {
  638. code = int32(codes.Internal)
  639. msg = err.Error()
  640. }
  641. res.Entries[i] = &btpb.MutateRowsResponse_Entry{
  642. Index: int64(i),
  643. Status: &statpb.Status{Code: code, Message: msg},
  644. }
  645. r.mu.Unlock()
  646. }
  647. return stream.Send(res)
  648. }
  649. func (s *server) CheckAndMutateRow(ctx context.Context, req *btpb.CheckAndMutateRowRequest) (*btpb.CheckAndMutateRowResponse, error) {
  650. s.mu.Lock()
  651. tbl, ok := s.tables[req.TableName]
  652. s.mu.Unlock()
  653. if !ok {
  654. return nil, status.Errorf(codes.NotFound, "table %q not found", req.TableName)
  655. }
  656. res := &btpb.CheckAndMutateRowResponse{}
  657. fs := tbl.columnFamilies()
  658. r := tbl.mutableRow(string(req.RowKey))
  659. r.mu.Lock()
  660. defer r.mu.Unlock()
  661. // Figure out which mutation to apply.
  662. whichMut := false
  663. if req.PredicateFilter == nil {
  664. // Use true_mutations iff row contains any cells.
  665. whichMut = !r.isEmpty()
  666. } else {
  667. // Use true_mutations iff any cells in the row match the filter.
  668. // TODO(dsymonds): This could be cheaper.
  669. nr := r.copy()
  670. filterRow(req.PredicateFilter, nr)
  671. whichMut = !nr.isEmpty()
  672. }
  673. res.PredicateMatched = whichMut
  674. muts := req.FalseMutations
  675. if whichMut {
  676. muts = req.TrueMutations
  677. }
  678. if err := applyMutations(tbl, r, muts, fs); err != nil {
  679. return nil, err
  680. }
  681. return res, nil
  682. }
  683. // applyMutations applies a sequence of mutations to a row.
  684. // fam should be a snapshot of the keys of tbl.families.
  685. // It assumes r.mu is locked.
  686. func applyMutations(tbl *table, r *row, muts []*btpb.Mutation, fs map[string]*columnFamily) error {
  687. for _, mut := range muts {
  688. switch mut := mut.Mutation.(type) {
  689. default:
  690. return fmt.Errorf("can't handle mutation type %T", mut)
  691. case *btpb.Mutation_SetCell_:
  692. set := mut.SetCell
  693. if _, ok := fs[set.FamilyName]; !ok {
  694. return fmt.Errorf("unknown family %q", set.FamilyName)
  695. }
  696. ts := set.TimestampMicros
  697. if ts == -1 { // bigtable.ServerTime
  698. ts = newTimestamp()
  699. }
  700. if !tbl.validTimestamp(ts) {
  701. return fmt.Errorf("invalid timestamp %d", ts)
  702. }
  703. fam := set.FamilyName
  704. col := string(set.ColumnQualifier)
  705. newCell := cell{ts: ts, value: set.Value}
  706. f := r.getOrCreateFamily(fam, fs[fam].order)
  707. f.cells[col] = appendOrReplaceCell(f.cellsByColumn(col), newCell)
  708. case *btpb.Mutation_DeleteFromColumn_:
  709. del := mut.DeleteFromColumn
  710. if _, ok := fs[del.FamilyName]; !ok {
  711. return fmt.Errorf("unknown family %q", del.FamilyName)
  712. }
  713. fam := del.FamilyName
  714. col := string(del.ColumnQualifier)
  715. if _, ok := r.families[fam]; ok {
  716. cs := r.families[fam].cells[col]
  717. if del.TimeRange != nil {
  718. tsr := del.TimeRange
  719. if !tbl.validTimestamp(tsr.StartTimestampMicros) {
  720. return fmt.Errorf("invalid timestamp %d", tsr.StartTimestampMicros)
  721. }
  722. if !tbl.validTimestamp(tsr.EndTimestampMicros) {
  723. return fmt.Errorf("invalid timestamp %d", tsr.EndTimestampMicros)
  724. }
  725. // Find half-open interval to remove.
  726. // Cells are in descending timestamp order,
  727. // so the predicates to sort.Search are inverted.
  728. si, ei := 0, len(cs)
  729. if tsr.StartTimestampMicros > 0 {
  730. ei = sort.Search(len(cs), func(i int) bool { return cs[i].ts < tsr.StartTimestampMicros })
  731. }
  732. if tsr.EndTimestampMicros > 0 {
  733. si = sort.Search(len(cs), func(i int) bool { return cs[i].ts < tsr.EndTimestampMicros })
  734. }
  735. if si < ei {
  736. copy(cs[si:], cs[ei:])
  737. cs = cs[:len(cs)-(ei-si)]
  738. }
  739. } else {
  740. cs = nil
  741. }
  742. if len(cs) == 0 {
  743. delete(r.families[fam].cells, col)
  744. colNames := r.families[fam].colNames
  745. i := sort.Search(len(colNames), func(i int) bool { return colNames[i] >= col })
  746. if i < len(colNames) && colNames[i] == col {
  747. r.families[fam].colNames = append(colNames[:i], colNames[i+1:]...)
  748. }
  749. if len(r.families[fam].cells) == 0 {
  750. delete(r.families, fam)
  751. }
  752. } else {
  753. r.families[fam].cells[col] = cs
  754. }
  755. }
  756. case *btpb.Mutation_DeleteFromRow_:
  757. r.families = make(map[string]*family)
  758. case *btpb.Mutation_DeleteFromFamily_:
  759. fampre := mut.DeleteFromFamily.FamilyName
  760. delete(r.families, fampre)
  761. }
  762. }
  763. return nil
  764. }
  765. func maxTimestamp(x, y int64) int64 {
  766. if x > y {
  767. return x
  768. }
  769. return y
  770. }
  771. func newTimestamp() int64 {
  772. ts := time.Now().UnixNano() / 1e3
  773. ts -= ts % 1000 // round to millisecond granularity
  774. return ts
  775. }
  776. func appendOrReplaceCell(cs []cell, newCell cell) []cell {
  777. replaced := false
  778. for i, cell := range cs {
  779. if cell.ts == newCell.ts {
  780. cs[i] = newCell
  781. replaced = true
  782. break
  783. }
  784. }
  785. if !replaced {
  786. cs = append(cs, newCell)
  787. }
  788. sort.Sort(byDescTS(cs))
  789. return cs
  790. }
  791. func (s *server) ReadModifyWriteRow(ctx context.Context, req *btpb.ReadModifyWriteRowRequest) (*btpb.ReadModifyWriteRowResponse, error) {
  792. s.mu.Lock()
  793. tbl, ok := s.tables[req.TableName]
  794. s.mu.Unlock()
  795. if !ok {
  796. return nil, status.Errorf(codes.NotFound, "table %q not found", req.TableName)
  797. }
  798. fs := tbl.columnFamilies()
  799. rowKey := string(req.RowKey)
  800. r := tbl.mutableRow(rowKey)
  801. resultRow := newRow(rowKey) // copy of updated cells
  802. // This must be done before the row lock, acquired below, is released.
  803. r.mu.Lock()
  804. defer r.mu.Unlock()
  805. // Assume all mutations apply to the most recent version of the cell.
  806. // TODO(dsymonds): Verify this assumption and document it in the proto.
  807. for _, rule := range req.Rules {
  808. if _, ok := fs[rule.FamilyName]; !ok {
  809. return nil, fmt.Errorf("unknown family %q", rule.FamilyName)
  810. }
  811. fam := rule.FamilyName
  812. col := string(rule.ColumnQualifier)
  813. isEmpty := false
  814. f := r.getOrCreateFamily(fam, fs[fam].order)
  815. cs := f.cells[col]
  816. isEmpty = len(cs) == 0
  817. ts := newTimestamp()
  818. var newCell, prevCell cell
  819. if !isEmpty {
  820. cells := r.families[fam].cells[col]
  821. prevCell = cells[0]
  822. // ts is the max of now or the prev cell's timestamp in case the
  823. // prev cell is in the future
  824. ts = maxTimestamp(ts, prevCell.ts)
  825. }
  826. switch rule := rule.Rule.(type) {
  827. default:
  828. return nil, fmt.Errorf("unknown RMW rule oneof %T", rule)
  829. case *btpb.ReadModifyWriteRule_AppendValue:
  830. newCell = cell{ts: ts, value: append(prevCell.value, rule.AppendValue...)}
  831. case *btpb.ReadModifyWriteRule_IncrementAmount:
  832. var v int64
  833. if !isEmpty {
  834. prevVal := prevCell.value
  835. if len(prevVal) != 8 {
  836. return nil, fmt.Errorf("increment on non-64-bit value")
  837. }
  838. v = int64(binary.BigEndian.Uint64(prevVal))
  839. }
  840. v += rule.IncrementAmount
  841. var val [8]byte
  842. binary.BigEndian.PutUint64(val[:], uint64(v))
  843. newCell = cell{ts: ts, value: val[:]}
  844. }
  845. // Store the new cell
  846. f.cells[col] = appendOrReplaceCell(f.cellsByColumn(col), newCell)
  847. // Store a copy for the result row
  848. resultFamily := resultRow.getOrCreateFamily(fam, fs[fam].order)
  849. resultFamily.cellsByColumn(col) // create the column
  850. resultFamily.cells[col] = []cell{newCell} // overwrite the cells
  851. }
  852. // Build the response using the result row
  853. res := &btpb.Row{
  854. Key: req.RowKey,
  855. Families: make([]*btpb.Family, len(resultRow.families)),
  856. }
  857. for i, family := range resultRow.sortedFamilies() {
  858. res.Families[i] = &btpb.Family{
  859. Name: family.name,
  860. Columns: make([]*btpb.Column, len(family.colNames)),
  861. }
  862. for j, colName := range family.colNames {
  863. res.Families[i].Columns[j] = &btpb.Column{
  864. Qualifier: []byte(colName),
  865. Cells: []*btpb.Cell{{
  866. TimestampMicros: family.cells[colName][0].ts,
  867. Value: family.cells[colName][0].value,
  868. }},
  869. }
  870. }
  871. }
  872. return &btpb.ReadModifyWriteRowResponse{Row: res}, nil
  873. }
  874. func (s *server) SampleRowKeys(req *btpb.SampleRowKeysRequest, stream btpb.Bigtable_SampleRowKeysServer) error {
  875. s.mu.Lock()
  876. tbl, ok := s.tables[req.TableName]
  877. s.mu.Unlock()
  878. if !ok {
  879. return status.Errorf(codes.NotFound, "table %q not found", req.TableName)
  880. }
  881. tbl.mu.RLock()
  882. defer tbl.mu.RUnlock()
  883. // The return value of SampleRowKeys is very loosely defined. Return at least the
  884. // final row key in the table and choose other row keys randomly.
  885. var offset int64
  886. var err error
  887. i := 0
  888. tbl.rows.Ascend(func(it btree.Item) bool {
  889. row := it.(*row)
  890. if i == tbl.rows.Len()-1 || rand.Int31n(100) == 0 {
  891. resp := &btpb.SampleRowKeysResponse{
  892. RowKey: []byte(row.key),
  893. OffsetBytes: offset,
  894. }
  895. err = stream.Send(resp)
  896. if err != nil {
  897. return false
  898. }
  899. }
  900. offset += int64(row.size())
  901. i++
  902. return true
  903. })
  904. return err
  905. }
  906. // needGC is invoked whenever the server needs gcloop running.
  907. func (s *server) needGC() {
  908. s.mu.Lock()
  909. if s.gcc == nil {
  910. s.gcc = make(chan int)
  911. go s.gcloop(s.gcc)
  912. }
  913. s.mu.Unlock()
  914. }
  915. func (s *server) gcloop(done <-chan int) {
  916. const (
  917. minWait = 500 // ms
  918. maxWait = 1500 // ms
  919. )
  920. for {
  921. // Wait for a random time interval.
  922. d := time.Duration(minWait+rand.Intn(maxWait-minWait)) * time.Millisecond
  923. select {
  924. case <-time.After(d):
  925. case <-done:
  926. return // server has been closed
  927. }
  928. // Do a GC pass over all tables.
  929. var tables []*table
  930. s.mu.Lock()
  931. for _, tbl := range s.tables {
  932. tables = append(tables, tbl)
  933. }
  934. s.mu.Unlock()
  935. for _, tbl := range tables {
  936. tbl.gc()
  937. }
  938. }
  939. }
  940. type table struct {
  941. mu sync.RWMutex
  942. counter uint64 // increment by 1 when a new family is created
  943. families map[string]*columnFamily // keyed by plain family name
  944. rows *btree.BTree // indexed by row key
  945. }
  946. const btreeDegree = 16
  947. func newTable(ctr *btapb.CreateTableRequest) *table {
  948. fams := make(map[string]*columnFamily)
  949. c := uint64(0)
  950. if ctr.Table != nil {
  951. for id, cf := range ctr.Table.ColumnFamilies {
  952. fams[id] = &columnFamily{
  953. name: ctr.Parent + "/columnFamilies/" + id,
  954. order: c,
  955. gcRule: cf.GcRule,
  956. }
  957. c++
  958. }
  959. }
  960. return &table{
  961. families: fams,
  962. counter: c,
  963. rows: btree.New(btreeDegree),
  964. }
  965. }
  966. func (t *table) validTimestamp(ts int64) bool {
  967. if ts <= minValidMilliSeconds || ts >= maxValidMilliSeconds {
  968. return false
  969. }
  970. // Assume millisecond granularity is required.
  971. return ts%1000 == 0
  972. }
  973. func (t *table) columnFamilies() map[string]*columnFamily {
  974. cp := make(map[string]*columnFamily)
  975. t.mu.RLock()
  976. for fam, cf := range t.families {
  977. cp[fam] = cf
  978. }
  979. t.mu.RUnlock()
  980. return cp
  981. }
  982. func (t *table) mutableRow(key string) *row {
  983. bkey := btreeKey(key)
  984. // Try fast path first.
  985. t.mu.RLock()
  986. i := t.rows.Get(bkey)
  987. t.mu.RUnlock()
  988. if i != nil {
  989. return i.(*row)
  990. }
  991. // We probably need to create the row.
  992. t.mu.Lock()
  993. defer t.mu.Unlock()
  994. i = t.rows.Get(bkey)
  995. if i != nil {
  996. return i.(*row)
  997. }
  998. r := newRow(key)
  999. t.rows.ReplaceOrInsert(r)
  1000. return r
  1001. }
  1002. func (t *table) gc() {
  1003. // This method doesn't add or remove rows, so we only need a read lock for the table.
  1004. t.mu.RLock()
  1005. defer t.mu.RUnlock()
  1006. // Gather GC rules we'll apply.
  1007. rules := make(map[string]*btapb.GcRule) // keyed by "fam"
  1008. for fam, cf := range t.families {
  1009. if cf.gcRule != nil {
  1010. rules[fam] = cf.gcRule
  1011. }
  1012. }
  1013. if len(rules) == 0 {
  1014. return
  1015. }
  1016. t.rows.Ascend(func(i btree.Item) bool {
  1017. r := i.(*row)
  1018. r.mu.Lock()
  1019. r.gc(rules)
  1020. r.mu.Unlock()
  1021. return true
  1022. })
  1023. }
  1024. type byRowKey []*row
  1025. func (b byRowKey) Len() int { return len(b) }
  1026. func (b byRowKey) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  1027. func (b byRowKey) Less(i, j int) bool { return b[i].key < b[j].key }
  1028. type row struct {
  1029. key string
  1030. mu sync.Mutex
  1031. families map[string]*family // keyed by family name
  1032. }
  1033. func newRow(key string) *row {
  1034. return &row{
  1035. key: key,
  1036. families: make(map[string]*family),
  1037. }
  1038. }
  1039. // copy returns a copy of the row.
  1040. // Cell values are aliased.
  1041. // r.mu should be held.
  1042. func (r *row) copy() *row {
  1043. nr := newRow(r.key)
  1044. for _, fam := range r.families {
  1045. nr.families[fam.name] = &family{
  1046. name: fam.name,
  1047. order: fam.order,
  1048. colNames: fam.colNames,
  1049. cells: make(map[string][]cell),
  1050. }
  1051. for col, cs := range fam.cells {
  1052. // Copy the []cell slice, but not the []byte inside each cell.
  1053. nr.families[fam.name].cells[col] = append([]cell(nil), cs...)
  1054. }
  1055. }
  1056. return nr
  1057. }
  1058. // isEmpty returns true if a row doesn't contain any cell
  1059. func (r *row) isEmpty() bool {
  1060. for _, fam := range r.families {
  1061. for _, cs := range fam.cells {
  1062. if len(cs) > 0 {
  1063. return false
  1064. }
  1065. }
  1066. }
  1067. return true
  1068. }
  1069. // sortedFamilies returns a column family set
  1070. // sorted in ascending creation order in a row.
  1071. func (r *row) sortedFamilies() []*family {
  1072. var families []*family
  1073. for _, fam := range r.families {
  1074. families = append(families, fam)
  1075. }
  1076. sort.Sort(byCreationOrder(families))
  1077. return families
  1078. }
  1079. func (r *row) getOrCreateFamily(name string, order uint64) *family {
  1080. if _, ok := r.families[name]; !ok {
  1081. r.families[name] = &family{
  1082. name: name,
  1083. order: order,
  1084. cells: make(map[string][]cell),
  1085. }
  1086. }
  1087. return r.families[name]
  1088. }
  1089. // gc applies the given GC rules to the row.
  1090. // r.mu should be held.
  1091. func (r *row) gc(rules map[string]*btapb.GcRule) {
  1092. for _, fam := range r.families {
  1093. rule, ok := rules[fam.name]
  1094. if !ok {
  1095. continue
  1096. }
  1097. for col, cs := range fam.cells {
  1098. r.families[fam.name].cells[col] = applyGC(cs, rule)
  1099. }
  1100. }
  1101. }
  1102. // size returns the total size of all cell values in the row.
  1103. func (r *row) size() int {
  1104. size := 0
  1105. for _, fam := range r.families {
  1106. for _, cells := range fam.cells {
  1107. for _, cell := range cells {
  1108. size += len(cell.value)
  1109. }
  1110. }
  1111. }
  1112. return size
  1113. }
  1114. // Less implements btree.Less.
  1115. func (r *row) Less(i btree.Item) bool {
  1116. return r.key < i.(*row).key
  1117. }
  1118. // btreeKey returns a row for use as a key into the BTree.
  1119. func btreeKey(s string) *row { return &row{key: s} }
  1120. func (r *row) String() string {
  1121. return r.key
  1122. }
  1123. var gcTypeWarn sync.Once
  1124. // applyGC applies the given GC rule to the cells.
  1125. func applyGC(cells []cell, rule *btapb.GcRule) []cell {
  1126. switch rule := rule.Rule.(type) {
  1127. default:
  1128. // TODO(dsymonds): Support GcRule_Intersection_
  1129. gcTypeWarn.Do(func() {
  1130. log.Printf("Unsupported GC rule type %T", rule)
  1131. })
  1132. case *btapb.GcRule_Union_:
  1133. for _, sub := range rule.Union.Rules {
  1134. cells = applyGC(cells, sub)
  1135. }
  1136. return cells
  1137. case *btapb.GcRule_MaxAge:
  1138. // Timestamps are in microseconds.
  1139. cutoff := time.Now().UnixNano() / 1e3
  1140. cutoff -= rule.MaxAge.Seconds * 1e6
  1141. cutoff -= int64(rule.MaxAge.Nanos) / 1e3
  1142. // The slice of cells in in descending timestamp order.
  1143. // This sort.Search will return the index of the first cell whose timestamp is chronologically before the cutoff.
  1144. si := sort.Search(len(cells), func(i int) bool { return cells[i].ts < cutoff })
  1145. if si < len(cells) {
  1146. log.Printf("bttest: GC MaxAge(%v) deleted %d cells.", rule.MaxAge, len(cells)-si)
  1147. }
  1148. return cells[:si]
  1149. case *btapb.GcRule_MaxNumVersions:
  1150. n := int(rule.MaxNumVersions)
  1151. if len(cells) > n {
  1152. cells = cells[:n]
  1153. }
  1154. return cells
  1155. }
  1156. return cells
  1157. }
  1158. type family struct {
  1159. name string // Column family name
  1160. order uint64 // Creation order of column family
  1161. colNames []string // Column names are sorted in lexicographical ascending order
  1162. cells map[string][]cell // Keyed by column name; cells are in descending timestamp order
  1163. }
  1164. type byCreationOrder []*family
  1165. func (b byCreationOrder) Len() int { return len(b) }
  1166. func (b byCreationOrder) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  1167. func (b byCreationOrder) Less(i, j int) bool { return b[i].order < b[j].order }
  1168. // cellsByColumn adds the column name to colNames set if it does not exist
  1169. // and returns all cells within a column
  1170. func (f *family) cellsByColumn(name string) []cell {
  1171. if _, ok := f.cells[name]; !ok {
  1172. f.colNames = append(f.colNames, name)
  1173. sort.Strings(f.colNames)
  1174. }
  1175. return f.cells[name]
  1176. }
  1177. type cell struct {
  1178. ts int64
  1179. value []byte
  1180. }
  1181. type byDescTS []cell
  1182. func (b byDescTS) Len() int { return len(b) }
  1183. func (b byDescTS) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
  1184. func (b byDescTS) Less(i, j int) bool { return b[i].ts > b[j].ts }
  1185. type columnFamily struct {
  1186. name string
  1187. order uint64 // Creation order of column family
  1188. gcRule *btapb.GcRule
  1189. }
  1190. func (c *columnFamily) proto() *btapb.ColumnFamily {
  1191. return &btapb.ColumnFamily{
  1192. GcRule: c.gcRule,
  1193. }
  1194. }
  1195. func toColumnFamilies(families map[string]*columnFamily) map[string]*btapb.ColumnFamily {
  1196. fs := make(map[string]*btapb.ColumnFamily)
  1197. for k, v := range families {
  1198. fs[k] = v.proto()
  1199. }
  1200. return fs
  1201. }