You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

1243 lines
31 KiB

  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file or at
  5. // https://developers.google.com/open-source/licenses/bsd.
  6. // Redis keys and types:
  7. //
  8. // maxPackageId string: next id to assign
  9. // ids hash maps import path to package id
  10. // pkg:<id> hash
  11. // terms: space separated search terms
  12. // path: import path
  13. // synopsis: synopsis
  14. // gob: snappy compressed gob encoded doc.Package
  15. // score: document search score
  16. // etag:
  17. // kind: p=package, c=command, d=directory with no go files
  18. // index:<term> set: package ids for given search term
  19. // index:import:<path> set: packages with import path
  20. // index:project:<root> set: packages in project with root
  21. // block set: packages to block
  22. // popular zset: package id, score
  23. // popular:0 string: scaled base time for popular scores
  24. // nextCrawl zset: package id, Unix time for next crawl
  25. // newCrawl set: new paths to crawl
  26. // badCrawl set: paths that returned error when crawling.
  27. // Package database manages storage for GoPkgDoc.
  28. package database
  29. import (
  30. "bytes"
  31. "encoding/gob"
  32. "errors"
  33. "fmt"
  34. "log"
  35. "math"
  36. "net/url"
  37. "os"
  38. "path"
  39. "sort"
  40. "strconv"
  41. "strings"
  42. "time"
  43. "unicode/utf8"
  44. "github.com/garyburd/redigo/redis"
  45. "github.com/golang/snappy"
  46. "golang.org/x/net/context"
  47. "google.golang.org/appengine"
  48. "google.golang.org/appengine/search"
  49. "github.com/golang/gddo/doc"
  50. "github.com/golang/gddo/gosrc"
  51. )
  52. type Database struct {
  53. Pool interface {
  54. Get() redis.Conn
  55. }
  56. }
  57. // Package represents the content of a package both for the search index and
  58. // for the HTML template. It implements the search.FieldLoadSaver interface
  59. // to customize the Rank function in the search index.
  60. type Package struct {
  61. Name string `json:"name,omitempty"`
  62. Path string `json:"path"`
  63. ImportCount int `json:"import_count"`
  64. Synopsis string `json:"synopsis,omitempty"`
  65. Fork bool `json:"fork,omitempty"`
  66. Stars int `json:"stars,omitempty"`
  67. Score float64 `json:"score,omitempty"`
  68. }
  69. type byPath []Package
  70. func (p byPath) Len() int { return len(p) }
  71. func (p byPath) Less(i, j int) bool { return p[i].Path < p[j].Path }
  72. func (p byPath) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  73. // Configuration variables (and default values of flags.
  74. var (
  75. RedisServer = "redis://127.0.0.1:6379" // URL of Redis server
  76. RedisIdleTimeout = 250 * time.Second // Close Redis connections after remaining idle for this duration.
  77. RedisLog = false // Log database commands
  78. )
  79. func dialDb() (c redis.Conn, err error) {
  80. u, err := url.Parse(RedisServer)
  81. if err != nil {
  82. return nil, err
  83. }
  84. defer func() {
  85. if err != nil && c != nil {
  86. c.Close()
  87. }
  88. }()
  89. c, err = redis.Dial("tcp", u.Host)
  90. if err != nil {
  91. return
  92. }
  93. if RedisLog {
  94. l := log.New(os.Stderr, "", log.LstdFlags)
  95. c = redis.NewLoggingConn(c, l, "")
  96. }
  97. if u.User != nil {
  98. if pw, ok := u.User.Password(); ok {
  99. if _, err = c.Do("AUTH", pw); err != nil {
  100. return
  101. }
  102. }
  103. }
  104. return
  105. }
  106. // New creates a database configured from command line flags.
  107. func New() (*Database, error) {
  108. pool := &redis.Pool{
  109. Dial: dialDb,
  110. MaxIdle: 10,
  111. IdleTimeout: RedisIdleTimeout,
  112. }
  113. c := pool.Get()
  114. if c.Err() != nil {
  115. return nil, c.Err()
  116. }
  117. c.Close()
  118. return &Database{Pool: pool}, nil
  119. }
  120. // Exists returns true if package with import path exists in the database.
  121. func (db *Database) Exists(path string) (bool, error) {
  122. c := db.Pool.Get()
  123. defer c.Close()
  124. return redis.Bool(c.Do("HEXISTS", "ids", path))
  125. }
  126. var putScript = redis.NewScript(0, `
  127. local path = ARGV[1]
  128. local synopsis = ARGV[2]
  129. local score = ARGV[3]
  130. local gob = ARGV[4]
  131. local terms = ARGV[5]
  132. local etag = ARGV[6]
  133. local kind = ARGV[7]
  134. local nextCrawl = ARGV[8]
  135. local id = redis.call('HGET', 'ids', path)
  136. if not id then
  137. id = redis.call('INCR', 'maxPackageId')
  138. redis.call('HSET', 'ids', path, id)
  139. end
  140. if etag ~= '' and etag == redis.call('HGET', 'pkg:' .. id, 'clone') then
  141. terms = ''
  142. score = 0
  143. end
  144. local update = {}
  145. for term in string.gmatch(redis.call('HGET', 'pkg:' .. id, 'terms') or '', '([^ ]+)') do
  146. update[term] = 1
  147. end
  148. for term in string.gmatch(terms, '([^ ]+)') do
  149. update[term] = (update[term] or 0) + 2
  150. end
  151. for term, x in pairs(update) do
  152. if x == 1 then
  153. redis.call('SREM', 'index:' .. term, id)
  154. elseif x == 2 then
  155. redis.call('SADD', 'index:' .. term, id)
  156. end
  157. end
  158. redis.call('SREM', 'badCrawl', path)
  159. redis.call('SREM', 'newCrawl', path)
  160. if nextCrawl ~= '0' then
  161. redis.call('ZADD', 'nextCrawl', nextCrawl, id)
  162. redis.call('HSET', 'pkg:' .. id, 'crawl', nextCrawl)
  163. end
  164. return redis.call('HMSET', 'pkg:' .. id, 'path', path, 'synopsis', synopsis, 'score', score, 'gob', gob, 'terms', terms, 'etag', etag, 'kind', kind)
  165. `)
  166. var addCrawlScript = redis.NewScript(0, `
  167. for i=1,#ARGV do
  168. local pkg = ARGV[i]
  169. if redis.call('HEXISTS', 'ids', pkg) == 0 and redis.call('SISMEMBER', 'badCrawl', pkg) == 0 then
  170. redis.call('SADD', 'newCrawl', pkg)
  171. end
  172. end
  173. `)
  174. func (db *Database) AddNewCrawl(importPath string) error {
  175. if !gosrc.IsValidRemotePath(importPath) {
  176. return errors.New("bad path")
  177. }
  178. c := db.Pool.Get()
  179. defer c.Close()
  180. _, err := addCrawlScript.Do(c, importPath)
  181. return err
  182. }
  183. var bgCtx = appengine.BackgroundContext // replaced by tests
  184. // Put adds the package documentation to the database.
  185. func (db *Database) Put(pdoc *doc.Package, nextCrawl time.Time, hide bool) error {
  186. c := db.Pool.Get()
  187. defer c.Close()
  188. score := 0.0
  189. if !hide {
  190. score = documentScore(pdoc)
  191. }
  192. terms := documentTerms(pdoc, score)
  193. var gobBuf bytes.Buffer
  194. if err := gob.NewEncoder(&gobBuf).Encode(pdoc); err != nil {
  195. return err
  196. }
  197. gobBytes := snappy.Encode(nil, gobBuf.Bytes())
  198. // Truncate large documents.
  199. if len(gobBytes) > 400000 {
  200. pdocNew := *pdoc
  201. pdoc = &pdocNew
  202. pdoc.Truncated = true
  203. pdoc.Vars = nil
  204. pdoc.Funcs = nil
  205. pdoc.Types = nil
  206. pdoc.Consts = nil
  207. pdoc.Examples = nil
  208. gobBuf.Reset()
  209. if err := gob.NewEncoder(&gobBuf).Encode(pdoc); err != nil {
  210. return err
  211. }
  212. gobBytes = snappy.Encode(nil, gobBuf.Bytes())
  213. }
  214. kind := "p"
  215. switch {
  216. case pdoc.Name == "":
  217. kind = "d"
  218. case pdoc.IsCmd:
  219. kind = "c"
  220. }
  221. t := int64(0)
  222. if !nextCrawl.IsZero() {
  223. t = nextCrawl.Unix()
  224. }
  225. // Get old version of the package to extract its imports.
  226. // If the package does not exist, both oldDoc and err will be nil.
  227. old, _, err := db.getDoc(c, pdoc.ImportPath)
  228. if err != nil {
  229. return err
  230. }
  231. _, err = putScript.Do(c, pdoc.ImportPath, pdoc.Synopsis, score, gobBytes, strings.Join(terms, " "), pdoc.Etag, kind, t)
  232. if err != nil {
  233. return err
  234. }
  235. id, n, err := pkgIDAndImportCount(c, pdoc.ImportPath)
  236. if err != nil {
  237. return err
  238. }
  239. ctx := bgCtx()
  240. if score > 0 {
  241. if err := PutIndex(ctx, pdoc, id, score, n); err != nil {
  242. log.Printf("Cannot put %q in index: %v", pdoc.ImportPath, err)
  243. }
  244. if old != nil {
  245. if err := updateImportsIndex(c, ctx, old, pdoc); err != nil {
  246. return err
  247. }
  248. }
  249. } else {
  250. if err := deleteIndex(ctx, id); err != nil {
  251. return err
  252. }
  253. }
  254. if nextCrawl.IsZero() {
  255. // Skip crawling related packages if this is not a full save.
  256. return nil
  257. }
  258. paths := make(map[string]bool)
  259. for _, p := range pdoc.Imports {
  260. if gosrc.IsValidRemotePath(p) {
  261. paths[p] = true
  262. }
  263. }
  264. for _, p := range pdoc.TestImports {
  265. if gosrc.IsValidRemotePath(p) {
  266. paths[p] = true
  267. }
  268. }
  269. for _, p := range pdoc.XTestImports {
  270. if gosrc.IsValidRemotePath(p) {
  271. paths[p] = true
  272. }
  273. }
  274. if pdoc.ImportPath != pdoc.ProjectRoot && pdoc.ProjectRoot != "" {
  275. paths[pdoc.ProjectRoot] = true
  276. }
  277. for _, p := range pdoc.Subdirectories {
  278. paths[pdoc.ImportPath+"/"+p] = true
  279. }
  280. args := make([]interface{}, 0, len(paths))
  281. for p := range paths {
  282. args = append(args, p)
  283. }
  284. _, err = addCrawlScript.Do(c, args...)
  285. return err
  286. }
  287. // pkgIDAndImportCount returns the ID and import count of a specified package.
  288. func pkgIDAndImportCount(c redis.Conn, path string) (id string, numImported int, err error) {
  289. numImported, err = redis.Int(c.Do("SCARD", "index:import:"+path))
  290. if err != nil {
  291. return
  292. }
  293. id, err = redis.String(c.Do("HGET", "ids", path))
  294. if err == redis.ErrNil {
  295. return "", 0, nil
  296. }
  297. return id, numImported, nil
  298. }
  299. func updateImportsIndex(c redis.Conn, ctx context.Context, oldDoc, newDoc *doc.Package) error {
  300. // Create a map to store any import change since last time we indexed the package.
  301. changes := make(map[string]bool)
  302. for _, p := range oldDoc.Imports {
  303. if gosrc.IsValidRemotePath(p) {
  304. changes[p] = true
  305. }
  306. }
  307. for _, p := range newDoc.Imports {
  308. if gosrc.IsValidRemotePath(p) {
  309. delete(changes, p)
  310. }
  311. }
  312. // For each import change, re-index that package with updated NumImported.
  313. // In practice this should not happen often and when it does, the changes are
  314. // likely to be a small amount.
  315. for p, _ := range changes {
  316. id, n, err := pkgIDAndImportCount(c, p)
  317. if err != nil {
  318. return err
  319. }
  320. if id != "" {
  321. PutIndex(ctx, nil, id, -1, n)
  322. }
  323. }
  324. return nil
  325. }
  326. var setNextCrawlScript = redis.NewScript(0, `
  327. local path = ARGV[1]
  328. local nextCrawl = ARGV[2]
  329. local id = redis.call('HGET', 'ids', path)
  330. if not id then
  331. return false
  332. end
  333. redis.call('ZADD', 'nextCrawl', nextCrawl, id)
  334. redis.call('HSET', 'pkg:' .. id, 'crawl', nextCrawl)
  335. `)
  336. // SetNextCrawl sets the next crawl time for a package.
  337. func (db *Database) SetNextCrawl(path string, t time.Time) error {
  338. c := db.Pool.Get()
  339. defer c.Close()
  340. _, err := setNextCrawlScript.Do(c, path, t.Unix())
  341. return err
  342. }
  343. // bumpCrawlScript sets the crawl time to now. To avoid continuously crawling
  344. // frequently updated repositories, the crawl is scheduled in the future.
  345. var bumpCrawlScript = redis.NewScript(0, `
  346. local root = ARGV[1]
  347. local now = tonumber(ARGV[2])
  348. local nextCrawl = now + 7200
  349. local pkgs = redis.call('SORT', 'index:project:' .. root, 'GET', '#')
  350. for i=1,#pkgs do
  351. local v = redis.call('HMGET', 'pkg:' .. pkgs[i], 'crawl', 'kind')
  352. local t = tonumber(v[1] or 0)
  353. if t == 0 or now < t then
  354. redis.call('HSET', 'pkg:' .. pkgs[i], 'crawl', now)
  355. end
  356. local nextCrawl = now + 86400
  357. if v[2] == 'p' then
  358. nextCrawl = now + 7200
  359. end
  360. t = tonumber(redis.call('ZSCORE', 'nextCrawl', pkgs[i]) or 0)
  361. if t == 0 or nextCrawl < t then
  362. redis.call('ZADD', 'nextCrawl', nextCrawl, pkgs[i])
  363. end
  364. end
  365. `)
  366. func (db *Database) BumpCrawl(projectRoot string) error {
  367. c := db.Pool.Get()
  368. defer c.Close()
  369. _, err := bumpCrawlScript.Do(c, normalizeProjectRoot(projectRoot), time.Now().Unix())
  370. return err
  371. }
  372. // getDocScript gets the package documentation and update time for the
  373. // specified path. If path is "-", then the oldest document is returned.
  374. var getDocScript = redis.NewScript(0, `
  375. local path = ARGV[1]
  376. local id
  377. if path == '-' then
  378. local r = redis.call('ZRANGE', 'nextCrawl', 0, 0)
  379. if not r or #r == 0 then
  380. return false
  381. end
  382. id = r[1]
  383. else
  384. id = redis.call('HGET', 'ids', path)
  385. if not id then
  386. return false
  387. end
  388. end
  389. local gob = redis.call('HGET', 'pkg:' .. id, 'gob')
  390. if not gob then
  391. return false
  392. end
  393. local nextCrawl = redis.call('HGET', 'pkg:' .. id, 'crawl')
  394. if not nextCrawl then
  395. nextCrawl = redis.call('ZSCORE', 'nextCrawl', id)
  396. if not nextCrawl then
  397. nextCrawl = 0
  398. end
  399. end
  400. return {gob, nextCrawl}
  401. `)
  402. func (db *Database) getDoc(c redis.Conn, path string) (*doc.Package, time.Time, error) {
  403. r, err := redis.Values(getDocScript.Do(c, path))
  404. if err == redis.ErrNil {
  405. return nil, time.Time{}, nil
  406. } else if err != nil {
  407. return nil, time.Time{}, err
  408. }
  409. var p []byte
  410. var t int64
  411. if _, err := redis.Scan(r, &p, &t); err != nil {
  412. return nil, time.Time{}, err
  413. }
  414. p, err = snappy.Decode(nil, p)
  415. if err != nil {
  416. return nil, time.Time{}, err
  417. }
  418. var pdoc doc.Package
  419. if err := gob.NewDecoder(bytes.NewReader(p)).Decode(&pdoc); err != nil {
  420. return nil, time.Time{}, err
  421. }
  422. nextCrawl := pdoc.Updated
  423. if t != 0 {
  424. nextCrawl = time.Unix(t, 0).UTC()
  425. }
  426. return &pdoc, nextCrawl, err
  427. }
  428. var getSubdirsScript = redis.NewScript(0, `
  429. local reply
  430. for i = 1,#ARGV do
  431. reply = redis.call('SORT', 'index:project:' .. ARGV[i], 'ALPHA', 'BY', 'pkg:*->path', 'GET', 'pkg:*->path', 'GET', 'pkg:*->synopsis', 'GET', 'pkg:*->kind')
  432. if #reply > 0 then
  433. break
  434. end
  435. end
  436. return reply
  437. `)
  438. func (db *Database) getSubdirs(c redis.Conn, path string, pdoc *doc.Package) ([]Package, error) {
  439. var reply interface{}
  440. var err error
  441. switch {
  442. case isStandardPackage(path):
  443. reply, err = getSubdirsScript.Do(c, "go")
  444. case pdoc != nil:
  445. reply, err = getSubdirsScript.Do(c, pdoc.ProjectRoot)
  446. default:
  447. var roots []interface{}
  448. projectRoot := path
  449. for i := 0; i < 5; i++ {
  450. roots = append(roots, projectRoot)
  451. if j := strings.LastIndex(projectRoot, "/"); j < 0 {
  452. break
  453. } else {
  454. projectRoot = projectRoot[:j]
  455. }
  456. }
  457. reply, err = getSubdirsScript.Do(c, roots...)
  458. }
  459. values, err := redis.Values(reply, err)
  460. if err != nil {
  461. return nil, err
  462. }
  463. var subdirs []Package
  464. prefix := path + "/"
  465. for len(values) > 0 {
  466. var pkg Package
  467. var kind string
  468. values, err = redis.Scan(values, &pkg.Path, &pkg.Synopsis, &kind)
  469. if err != nil {
  470. return nil, err
  471. }
  472. if (kind == "p" || kind == "c") && strings.HasPrefix(pkg.Path, prefix) {
  473. subdirs = append(subdirs, pkg)
  474. }
  475. }
  476. return subdirs, err
  477. }
  478. // Get gets the package documentation and sub-directories for the the given
  479. // import path.
  480. func (db *Database) Get(path string) (*doc.Package, []Package, time.Time, error) {
  481. c := db.Pool.Get()
  482. defer c.Close()
  483. pdoc, nextCrawl, err := db.getDoc(c, path)
  484. if err != nil {
  485. return nil, nil, time.Time{}, err
  486. }
  487. if pdoc != nil {
  488. // fixup for speclal "-" path.
  489. path = pdoc.ImportPath
  490. }
  491. subdirs, err := db.getSubdirs(c, path, pdoc)
  492. if err != nil {
  493. return nil, nil, time.Time{}, err
  494. }
  495. return pdoc, subdirs, nextCrawl, nil
  496. }
  497. func (db *Database) GetDoc(path string) (*doc.Package, time.Time, error) {
  498. c := db.Pool.Get()
  499. defer c.Close()
  500. return db.getDoc(c, path)
  501. }
  502. var deleteScript = redis.NewScript(0, `
  503. local path = ARGV[1]
  504. local id = redis.call('HGET', 'ids', path)
  505. if not id then
  506. return false
  507. end
  508. for term in string.gmatch(redis.call('HGET', 'pkg:' .. id, 'terms') or '', '([^ ]+)') do
  509. redis.call('SREM', 'index:' .. term, id)
  510. end
  511. redis.call('ZREM', 'nextCrawl', id)
  512. redis.call('SREM', 'newCrawl', path)
  513. redis.call('ZREM', 'popular', id)
  514. redis.call('DEL', 'pkg:' .. id)
  515. return redis.call('HDEL', 'ids', path)
  516. `)
  517. // Delete deletes the documentation for the given import path.
  518. func (db *Database) Delete(path string) error {
  519. c := db.Pool.Get()
  520. defer c.Close()
  521. ctx := bgCtx()
  522. id, err := redis.String(c.Do("HGET", "ids", path))
  523. if err == redis.ErrNil {
  524. return nil
  525. }
  526. if err != nil {
  527. return err
  528. }
  529. if err := deleteIndex(ctx, id); err != nil {
  530. return err
  531. }
  532. _, err = deleteScript.Do(c, path)
  533. return err
  534. }
  535. func packages(reply interface{}, all bool) ([]Package, error) {
  536. values, err := redis.Values(reply, nil)
  537. if err != nil {
  538. return nil, err
  539. }
  540. result := make([]Package, 0, len(values)/3)
  541. for len(values) > 0 {
  542. var pkg Package
  543. var kind string
  544. values, err = redis.Scan(values, &pkg.Path, &pkg.Synopsis, &kind)
  545. if err != nil {
  546. return nil, err
  547. }
  548. if !all && kind == "d" {
  549. continue
  550. }
  551. if pkg.Path == "C" {
  552. pkg.Synopsis = "Package C is a \"pseudo-package\" used to access the C namespace from a cgo source file."
  553. }
  554. result = append(result, pkg)
  555. }
  556. return result, nil
  557. }
  558. func (db *Database) getPackages(key string, all bool) ([]Package, error) {
  559. c := db.Pool.Get()
  560. defer c.Close()
  561. reply, err := c.Do("SORT", key, "ALPHA", "BY", "pkg:*->path", "GET", "pkg:*->path", "GET", "pkg:*->synopsis", "GET", "pkg:*->kind")
  562. if err != nil {
  563. return nil, err
  564. }
  565. return packages(reply, all)
  566. }
  567. func (db *Database) GoIndex() ([]Package, error) {
  568. return db.getPackages("index:project:go", false)
  569. }
  570. func (db *Database) GoSubrepoIndex() ([]Package, error) {
  571. return db.getPackages("index:project:subrepo", false)
  572. }
  573. func (db *Database) Index() ([]Package, error) {
  574. return db.getPackages("index:all:", false)
  575. }
  576. func (db *Database) Project(projectRoot string) ([]Package, error) {
  577. return db.getPackages("index:project:"+normalizeProjectRoot(projectRoot), true)
  578. }
  579. func (db *Database) AllPackages() ([]Package, error) {
  580. c := db.Pool.Get()
  581. defer c.Close()
  582. values, err := redis.Values(c.Do("SORT", "nextCrawl", "DESC", "BY", "pkg:*->score", "GET", "pkg:*->path", "GET", "pkg:*->kind"))
  583. if err != nil {
  584. return nil, err
  585. }
  586. result := make([]Package, 0, len(values)/2)
  587. for len(values) > 0 {
  588. var pkg Package
  589. var kind string
  590. values, err = redis.Scan(values, &pkg.Path, &kind)
  591. if err != nil {
  592. return nil, err
  593. }
  594. if kind == "d" {
  595. continue
  596. }
  597. result = append(result, pkg)
  598. }
  599. return result, nil
  600. }
  601. var packagesScript = redis.NewScript(0, `
  602. local result = {}
  603. for i = 1,#ARGV do
  604. local path = ARGV[i]
  605. local synopsis = ''
  606. local kind = 'u'
  607. local id = redis.call('HGET', 'ids', path)
  608. if id then
  609. synopsis = redis.call('HGET', 'pkg:' .. id, 'synopsis')
  610. kind = redis.call('HGET', 'pkg:' .. id, 'kind')
  611. end
  612. result[#result+1] = path
  613. result[#result+1] = synopsis
  614. result[#result+1] = kind
  615. end
  616. return result
  617. `)
  618. func (db *Database) Packages(paths []string) ([]Package, error) {
  619. var args []interface{}
  620. for _, p := range paths {
  621. args = append(args, p)
  622. }
  623. c := db.Pool.Get()
  624. defer c.Close()
  625. reply, err := packagesScript.Do(c, args...)
  626. if err != nil {
  627. return nil, err
  628. }
  629. pkgs, err := packages(reply, true)
  630. sort.Sort(byPath(pkgs))
  631. return pkgs, err
  632. }
  633. func (db *Database) ImporterCount(path string) (int, error) {
  634. c := db.Pool.Get()
  635. defer c.Close()
  636. return redis.Int(c.Do("SCARD", "index:import:"+path))
  637. }
  638. func (db *Database) Importers(path string) ([]Package, error) {
  639. return db.getPackages("index:import:"+path, false)
  640. }
  641. func (db *Database) Block(root string) error {
  642. c := db.Pool.Get()
  643. defer c.Close()
  644. if _, err := c.Do("SADD", "block", root); err != nil {
  645. return err
  646. }
  647. keys, err := redis.Strings(c.Do("HKEYS", "ids"))
  648. if err != nil {
  649. return err
  650. }
  651. for _, key := range keys {
  652. if key == root || strings.HasPrefix(key, root) && key[len(root)] == '/' {
  653. if _, err := deleteScript.Do(c, key); err != nil {
  654. return err
  655. }
  656. }
  657. }
  658. return nil
  659. }
  660. var isBlockedScript = redis.NewScript(0, `
  661. local path = ''
  662. for s in string.gmatch(ARGV[1], '[^/]+') do
  663. path = path .. s
  664. if redis.call('SISMEMBER', 'block', path) == 1 then
  665. return 1
  666. end
  667. path = path .. '/'
  668. end
  669. return 0
  670. `)
  671. func (db *Database) IsBlocked(path string) (bool, error) {
  672. c := db.Pool.Get()
  673. defer c.Close()
  674. return redis.Bool(isBlockedScript.Do(c, path))
  675. }
  676. type queryResult struct {
  677. Path string
  678. Synopsis string
  679. Score float64
  680. }
  681. type byScore []*queryResult
  682. func (p byScore) Len() int { return len(p) }
  683. func (p byScore) Less(i, j int) bool { return p[j].Score < p[i].Score }
  684. func (p byScore) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  685. func (db *Database) Query(q string) ([]Package, error) {
  686. terms := parseQuery(q)
  687. if len(terms) == 0 {
  688. return nil, nil
  689. }
  690. c := db.Pool.Get()
  691. defer c.Close()
  692. n, err := redis.Int(c.Do("INCR", "maxQueryId"))
  693. if err != nil {
  694. return nil, err
  695. }
  696. id := "tmp:query-" + strconv.Itoa(n)
  697. args := []interface{}{id}
  698. for _, term := range terms {
  699. args = append(args, "index:"+term)
  700. }
  701. c.Send("SINTERSTORE", args...)
  702. c.Send("SORT", id, "DESC", "BY", "nosort", "GET", "pkg:*->path", "GET", "pkg:*->synopsis", "GET", "pkg:*->score")
  703. c.Send("DEL", id)
  704. c.Flush()
  705. c.Receive() // SINTERSTORE
  706. values, err := redis.Values(c.Receive()) // SORT
  707. if err != nil {
  708. return nil, err
  709. }
  710. c.Receive() // DEL
  711. var queryResults []*queryResult
  712. if err := redis.ScanSlice(values, &queryResults, "Path", "Synopsis", "Score"); err != nil {
  713. return nil, err
  714. }
  715. for _, qr := range queryResults {
  716. c.Send("SCARD", "index:import:"+qr.Path)
  717. }
  718. c.Flush()
  719. for _, qr := range queryResults {
  720. importCount, err := redis.Int(c.Receive())
  721. if err != nil {
  722. return nil, err
  723. }
  724. qr.Score *= math.Log(float64(10 + importCount))
  725. if isStandardPackage(qr.Path) {
  726. if strings.HasSuffix(qr.Path, q) {
  727. // Big bump for exact match on standard package name.
  728. qr.Score *= 10000
  729. } else {
  730. qr.Score *= 1.2
  731. }
  732. }
  733. if q == path.Base(qr.Path) {
  734. qr.Score *= 1.1
  735. }
  736. }
  737. sort.Sort(byScore(queryResults))
  738. pkgs := make([]Package, len(queryResults))
  739. for i, qr := range queryResults {
  740. pkgs[i].Path = qr.Path
  741. pkgs[i].Synopsis = qr.Synopsis
  742. }
  743. return pkgs, nil
  744. }
  745. type PackageInfo struct {
  746. PDoc *doc.Package
  747. Score float64
  748. Kind string
  749. Size int
  750. }
  751. // Do executes function f for each document in the database.
  752. func (db *Database) Do(f func(*PackageInfo) error) error {
  753. c := db.Pool.Get()
  754. defer c.Close()
  755. cursor := 0
  756. c.Send("SCAN", cursor, "MATCH", "pkg:*")
  757. c.Flush()
  758. for {
  759. // Receive previous SCAN.
  760. values, err := redis.Values(c.Receive())
  761. if err != nil {
  762. return err
  763. }
  764. var keys [][]byte
  765. if _, err := redis.Scan(values, &cursor, &keys); err != nil {
  766. return err
  767. }
  768. if cursor == 0 {
  769. break
  770. }
  771. for _, key := range keys {
  772. c.Send("HMGET", key, "gob", "score", "kind", "path", "terms", "synopis")
  773. }
  774. c.Send("SCAN", cursor, "MATCH", "pkg:*")
  775. c.Flush()
  776. for _ = range keys {
  777. values, err := redis.Values(c.Receive())
  778. if err != nil {
  779. return err
  780. }
  781. var (
  782. pi PackageInfo
  783. p []byte
  784. path string
  785. terms string
  786. synopsis string
  787. )
  788. if _, err := redis.Scan(values, &p, &pi.Score, &pi.Kind, &path, &terms, &synopsis); err != nil {
  789. return err
  790. }
  791. if p == nil {
  792. continue
  793. }
  794. pi.Size = len(path) + len(p) + len(terms) + len(synopsis)
  795. p, err = snappy.Decode(nil, p)
  796. if err != nil {
  797. return fmt.Errorf("snappy decoding %s: %v", path, err)
  798. }
  799. if err := gob.NewDecoder(bytes.NewReader(p)).Decode(&pi.PDoc); err != nil {
  800. return fmt.Errorf("gob decoding %s: %v", path, err)
  801. }
  802. if err := f(&pi); err != nil {
  803. return fmt.Errorf("func %s: %v", path, err)
  804. }
  805. }
  806. }
  807. return nil
  808. }
  809. var importGraphScript = redis.NewScript(0, `
  810. local path = ARGV[1]
  811. local id = redis.call('HGET', 'ids', path)
  812. if not id then
  813. return false
  814. end
  815. return redis.call('HMGET', 'pkg:' .. id, 'synopsis', 'terms')
  816. `)
  817. // DepLevel specifies the level of depdenencies to show in an import graph.
  818. type DepLevel int
  819. const (
  820. ShowAllDeps DepLevel = iota // show all dependencies
  821. HideStandardDeps // don't show dependencies of standard libraries
  822. HideStandardAll // don't show standard libraries at all
  823. )
  824. func (db *Database) ImportGraph(pdoc *doc.Package, level DepLevel) ([]Package, [][2]int, error) {
  825. // This breadth-first traversal of the package's dependencies uses the
  826. // Redis pipeline as queue. Links to packages with invalid import paths are
  827. // only included for the root package.
  828. c := db.Pool.Get()
  829. defer c.Close()
  830. if err := importGraphScript.Load(c); err != nil {
  831. return nil, nil, err
  832. }
  833. nodes := []Package{{Path: pdoc.ImportPath, Synopsis: pdoc.Synopsis}}
  834. edges := [][2]int{}
  835. index := map[string]int{pdoc.ImportPath: 0}
  836. for _, path := range pdoc.Imports {
  837. if level >= HideStandardAll && isStandardPackage(path) {
  838. continue
  839. }
  840. j := len(nodes)
  841. index[path] = j
  842. edges = append(edges, [2]int{0, j})
  843. nodes = append(nodes, Package{Path: path})
  844. importGraphScript.Send(c, path)
  845. }
  846. for i := 1; i < len(nodes); i++ {
  847. c.Flush()
  848. r, err := redis.Values(c.Receive())
  849. if err == redis.ErrNil {
  850. continue
  851. } else if err != nil {
  852. return nil, nil, err
  853. }
  854. var synopsis, terms string
  855. if _, err := redis.Scan(r, &synopsis, &terms); err != nil {
  856. return nil, nil, err
  857. }
  858. nodes[i].Synopsis = synopsis
  859. for _, term := range strings.Fields(terms) {
  860. if strings.HasPrefix(term, "import:") {
  861. path := term[len("import:"):]
  862. if level >= HideStandardDeps && isStandardPackage(path) {
  863. continue
  864. }
  865. j, ok := index[path]
  866. if !ok {
  867. j = len(nodes)
  868. index[path] = j
  869. nodes = append(nodes, Package{Path: path})
  870. importGraphScript.Send(c, path)
  871. }
  872. edges = append(edges, [2]int{i, j})
  873. }
  874. }
  875. }
  876. return nodes, edges, nil
  877. }
  878. func (db *Database) PutGob(key string, value interface{}) error {
  879. var buf bytes.Buffer
  880. if err := gob.NewEncoder(&buf).Encode(value); err != nil {
  881. return err
  882. }
  883. c := db.Pool.Get()
  884. defer c.Close()
  885. _, err := c.Do("SET", "gob:"+key, buf.Bytes())
  886. return err
  887. }
  888. func (db *Database) GetGob(key string, value interface{}) error {
  889. c := db.Pool.Get()
  890. defer c.Close()
  891. p, err := redis.Bytes(c.Do("GET", "gob:"+key))
  892. if err == redis.ErrNil {
  893. return nil
  894. } else if err != nil {
  895. return err
  896. }
  897. return gob.NewDecoder(bytes.NewReader(p)).Decode(value)
  898. }
  899. var incrementPopularScoreScript = redis.NewScript(0, `
  900. local path = ARGV[1]
  901. local n = ARGV[2]
  902. local t = ARGV[3]
  903. local id = redis.call('HGET', 'ids', path)
  904. if not id then
  905. return
  906. end
  907. local t0 = redis.call('GET', 'popular:0') or '0'
  908. local f = math.exp(tonumber(t) - tonumber(t0))
  909. redis.call('ZINCRBY', 'popular', tonumber(n) * f, id)
  910. if f > 10 then
  911. redis.call('SET', 'popular:0', t)
  912. redis.call('ZUNIONSTORE', 'popular', 1, 'popular', 'WEIGHTS', 1.0 / f)
  913. redis.call('ZREMRANGEBYSCORE', 'popular', '-inf', 0.05)
  914. end
  915. `)
  916. const popularHalfLife = time.Hour * 24 * 7
  917. func (db *Database) incrementPopularScoreInternal(path string, delta float64, t time.Time) error {
  918. // nt = n0 * math.Exp(-lambda * t)
  919. // lambda = math.Ln2 / thalf
  920. c := db.Pool.Get()
  921. defer c.Close()
  922. const lambda = math.Ln2 / float64(popularHalfLife)
  923. scaledTime := lambda * float64(t.Sub(time.Unix(1257894000, 0)))
  924. _, err := incrementPopularScoreScript.Do(c, path, delta, scaledTime)
  925. return err
  926. }
  927. func (db *Database) IncrementPopularScore(path string) error {
  928. return db.incrementPopularScoreInternal(path, 1, time.Now())
  929. }
  930. var popularScript = redis.NewScript(0, `
  931. local stop = ARGV[1]
  932. local ids = redis.call('ZREVRANGE', 'popular', '0', stop)
  933. local result = {}
  934. for i=1,#ids do
  935. local values = redis.call('HMGET', 'pkg:' .. ids[i], 'path', 'synopsis', 'kind')
  936. result[#result+1] = values[1]
  937. result[#result+1] = values[2]
  938. result[#result+1] = values[3]
  939. end
  940. return result
  941. `)
  942. func (db *Database) Popular(count int) ([]Package, error) {
  943. c := db.Pool.Get()
  944. defer c.Close()
  945. reply, err := popularScript.Do(c, count-1)
  946. if err != nil {
  947. return nil, err
  948. }
  949. pkgs, err := packages(reply, false)
  950. return pkgs, err
  951. }
  952. var popularWithScoreScript = redis.NewScript(0, `
  953. local ids = redis.call('ZREVRANGE', 'popular', '0', -1, 'WITHSCORES')
  954. local result = {}
  955. for i=1,#ids,2 do
  956. result[#result+1] = redis.call('HGET', 'pkg:' .. ids[i], 'path')
  957. result[#result+1] = ids[i+1]
  958. result[#result+1] = 'p'
  959. end
  960. return result
  961. `)
  962. func (db *Database) PopularWithScores() ([]Package, error) {
  963. c := db.Pool.Get()
  964. defer c.Close()
  965. reply, err := popularWithScoreScript.Do(c)
  966. if err != nil {
  967. return nil, err
  968. }
  969. pkgs, err := packages(reply, false)
  970. return pkgs, err
  971. }
  972. func (db *Database) PopNewCrawl() (string, bool, error) {
  973. c := db.Pool.Get()
  974. defer c.Close()
  975. var subdirs []Package
  976. path, err := redis.String(c.Do("SPOP", "newCrawl"))
  977. switch {
  978. case err == redis.ErrNil:
  979. err = nil
  980. path = ""
  981. case err == nil:
  982. subdirs, err = db.getSubdirs(c, path, nil)
  983. }
  984. return path, len(subdirs) > 0, err
  985. }
  986. func (db *Database) AddBadCrawl(path string) error {
  987. c := db.Pool.Get()
  988. defer c.Close()
  989. _, err := c.Do("SADD", "badCrawl", path)
  990. return err
  991. }
  992. var incrementCounterScript = redis.NewScript(0, `
  993. local key = 'counter:' .. ARGV[1]
  994. local n = tonumber(ARGV[2])
  995. local t = tonumber(ARGV[3])
  996. local exp = tonumber(ARGV[4])
  997. local counter = redis.call('GET', key)
  998. if counter then
  999. counter = cjson.decode(counter)
  1000. n = n + counter.n * math.exp(counter.t - t)
  1001. end
  1002. redis.call('SET', key, cjson.encode({n = n; t = t}))
  1003. redis.call('EXPIRE', key, exp)
  1004. return tostring(n)
  1005. `)
  1006. const counterHalflife = time.Hour
  1007. func (db *Database) incrementCounterInternal(key string, delta float64, t time.Time) (float64, error) {
  1008. // nt = n0 * math.Exp(-lambda * t)
  1009. // lambda = math.Ln2 / thalf
  1010. c := db.Pool.Get()
  1011. defer c.Close()
  1012. const lambda = math.Ln2 / float64(counterHalflife)
  1013. scaledTime := lambda * float64(t.Sub(time.Unix(1257894000, 0)))
  1014. return redis.Float64(incrementCounterScript.Do(c, key, delta, scaledTime, (4*counterHalflife)/time.Second))
  1015. }
  1016. func (db *Database) IncrementCounter(key string, delta float64) (float64, error) {
  1017. return db.incrementCounterInternal(key, delta, time.Now())
  1018. }
  1019. // Reindex gets all the packages in database and put them into the search index.
  1020. // This will update the search index with the path, synopsis, score, import counts
  1021. // of all the packages in the database.
  1022. func (db *Database) Reindex(ctx context.Context) error {
  1023. c := db.Pool.Get()
  1024. defer c.Close()
  1025. idx, err := search.Open("packages")
  1026. if err != nil {
  1027. return fmt.Errorf("database: failed to open packages: %v", err)
  1028. }
  1029. npkgs := 0
  1030. for {
  1031. // Get 200 packages from the nextCrawl set each time. Use npkgs as a cursor
  1032. // to store the current position we actually indexed. Retry from the cursor
  1033. // position if we received a timeout error from app engine.
  1034. values, err := redis.Values(c.Do(
  1035. "SORT", "nextCrawl",
  1036. "LIMIT", strconv.Itoa(npkgs), "200",
  1037. "GET", "pkg:*->path",
  1038. "GET", "pkg:*->synopsis",
  1039. "GET", "pkg:*->score",
  1040. ))
  1041. if err != nil {
  1042. return err
  1043. }
  1044. if len(values) == 0 {
  1045. break // all done
  1046. }
  1047. // The Search API should support put in batches of up to 200 documents,
  1048. // the Go version of this API does not support this yet.
  1049. // TODO(shantuo): Put packages in batch operations.
  1050. for ; len(values) > 0; npkgs++ {
  1051. var pdoc doc.Package
  1052. var score float64
  1053. values, err = redis.Scan(values, &pdoc.ImportPath, &pdoc.Synopsis, &score)
  1054. if err != nil {
  1055. return err
  1056. }
  1057. // There are some corrupted data in our current database
  1058. // that causes an error when putting the package into the
  1059. // search index which only supports UTF8 encoding.
  1060. if !utf8.ValidString(pdoc.Synopsis) {
  1061. pdoc.Synopsis = ""
  1062. }
  1063. id, n, err := pkgIDAndImportCount(c, pdoc.ImportPath)
  1064. if err != nil {
  1065. return err
  1066. }
  1067. if _, err := idx.Put(ctx, id, &Package{
  1068. Path: pdoc.ImportPath,
  1069. Synopsis: pdoc.Synopsis,
  1070. Score: score,
  1071. ImportCount: n,
  1072. }); err != nil {
  1073. if appengine.IsTimeoutError(err) {
  1074. log.Printf("App Engine timeout: %v. Continue...", err)
  1075. break
  1076. }
  1077. return fmt.Errorf("Failed to put index %s: %v", id, err)
  1078. }
  1079. }
  1080. }
  1081. log.Printf("%d packages are reindexed", npkgs)
  1082. return nil
  1083. }