|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242 |
- // Copyright 2013 The Go Authors. All rights reserved.
- //
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file or at
- // https://developers.google.com/open-source/licenses/bsd.
-
- // Redis keys and types:
- //
- // maxPackageId string: next id to assign
- // ids hash maps import path to package id
- // pkg:<id> hash
- // terms: space separated search terms
- // path: import path
- // synopsis: synopsis
- // gob: snappy compressed gob encoded doc.Package
- // score: document search score
- // etag:
- // kind: p=package, c=command, d=directory with no go files
- // index:<term> set: package ids for given search term
- // index:import:<path> set: packages with import path
- // index:project:<root> set: packages in project with root
- // block set: packages to block
- // popular zset: package id, score
- // popular:0 string: scaled base time for popular scores
- // nextCrawl zset: package id, Unix time for next crawl
- // newCrawl set: new paths to crawl
- // badCrawl set: paths that returned error when crawling.
-
- // Package database manages storage for GoPkgDoc.
- package database
-
- import (
- "bytes"
- "encoding/gob"
- "errors"
- "fmt"
- "log"
- "math"
- "net/url"
- "os"
- "path"
- "sort"
- "strconv"
- "strings"
- "time"
- "unicode/utf8"
-
- "github.com/garyburd/redigo/redis"
- "github.com/golang/snappy"
- "golang.org/x/net/context"
- "google.golang.org/appengine"
- "google.golang.org/appengine/search"
-
- "github.com/golang/gddo/doc"
- "github.com/golang/gddo/gosrc"
- )
-
- type Database struct {
- Pool interface {
- Get() redis.Conn
- }
- }
-
- // Package represents the content of a package both for the search index and
- // for the HTML template. It implements the search.FieldLoadSaver interface
- // to customize the Rank function in the search index.
- type Package struct {
- Name string `json:"name,omitempty"`
- Path string `json:"path"`
- ImportCount int `json:"import_count"`
- Synopsis string `json:"synopsis,omitempty"`
- Fork bool `json:"fork,omitempty"`
- Stars int `json:"stars,omitempty"`
- Score float64 `json:"score,omitempty"`
- }
-
- type byPath []Package
-
- func (p byPath) Len() int { return len(p) }
- func (p byPath) Less(i, j int) bool { return p[i].Path < p[j].Path }
- func (p byPath) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
-
- // Configuration variables (and default values of flags.
- var (
- RedisServer = "redis://127.0.0.1:6379" // URL of Redis server
- RedisIdleTimeout = 250 * time.Second // Close Redis connections after remaining idle for this duration.
- RedisLog = false // Log database commands
- )
-
- func dialDb() (c redis.Conn, err error) {
- u, err := url.Parse(RedisServer)
- if err != nil {
- return nil, err
- }
-
- defer func() {
- if err != nil && c != nil {
- c.Close()
- }
- }()
-
- c, err = redis.Dial("tcp", u.Host)
- if err != nil {
- return
- }
-
- if RedisLog {
- l := log.New(os.Stderr, "", log.LstdFlags)
- c = redis.NewLoggingConn(c, l, "")
- }
-
- if u.User != nil {
- if pw, ok := u.User.Password(); ok {
- if _, err = c.Do("AUTH", pw); err != nil {
- return
- }
- }
- }
- return
- }
-
- // New creates a database configured from command line flags.
- func New() (*Database, error) {
- pool := &redis.Pool{
- Dial: dialDb,
- MaxIdle: 10,
- IdleTimeout: RedisIdleTimeout,
- }
-
- c := pool.Get()
- if c.Err() != nil {
- return nil, c.Err()
- }
- c.Close()
-
- return &Database{Pool: pool}, nil
- }
-
- // Exists returns true if package with import path exists in the database.
- func (db *Database) Exists(path string) (bool, error) {
- c := db.Pool.Get()
- defer c.Close()
- return redis.Bool(c.Do("HEXISTS", "ids", path))
- }
-
- var putScript = redis.NewScript(0, `
- local path = ARGV[1]
- local synopsis = ARGV[2]
- local score = ARGV[3]
- local gob = ARGV[4]
- local terms = ARGV[5]
- local etag = ARGV[6]
- local kind = ARGV[7]
- local nextCrawl = ARGV[8]
-
- local id = redis.call('HGET', 'ids', path)
- if not id then
- id = redis.call('INCR', 'maxPackageId')
- redis.call('HSET', 'ids', path, id)
- end
-
- if etag ~= '' and etag == redis.call('HGET', 'pkg:' .. id, 'clone') then
- terms = ''
- score = 0
- end
-
- local update = {}
- for term in string.gmatch(redis.call('HGET', 'pkg:' .. id, 'terms') or '', '([^ ]+)') do
- update[term] = 1
- end
-
- for term in string.gmatch(terms, '([^ ]+)') do
- update[term] = (update[term] or 0) + 2
- end
-
- for term, x in pairs(update) do
- if x == 1 then
- redis.call('SREM', 'index:' .. term, id)
- elseif x == 2 then
- redis.call('SADD', 'index:' .. term, id)
- end
- end
-
- redis.call('SREM', 'badCrawl', path)
- redis.call('SREM', 'newCrawl', path)
-
- if nextCrawl ~= '0' then
- redis.call('ZADD', 'nextCrawl', nextCrawl, id)
- redis.call('HSET', 'pkg:' .. id, 'crawl', nextCrawl)
- end
-
- return redis.call('HMSET', 'pkg:' .. id, 'path', path, 'synopsis', synopsis, 'score', score, 'gob', gob, 'terms', terms, 'etag', etag, 'kind', kind)
- `)
-
- var addCrawlScript = redis.NewScript(0, `
- for i=1,#ARGV do
- local pkg = ARGV[i]
- if redis.call('HEXISTS', 'ids', pkg) == 0 and redis.call('SISMEMBER', 'badCrawl', pkg) == 0 then
- redis.call('SADD', 'newCrawl', pkg)
- end
- end
- `)
-
- func (db *Database) AddNewCrawl(importPath string) error {
- if !gosrc.IsValidRemotePath(importPath) {
- return errors.New("bad path")
- }
- c := db.Pool.Get()
- defer c.Close()
- _, err := addCrawlScript.Do(c, importPath)
- return err
- }
-
- var bgCtx = appengine.BackgroundContext // replaced by tests
-
- // Put adds the package documentation to the database.
- func (db *Database) Put(pdoc *doc.Package, nextCrawl time.Time, hide bool) error {
- c := db.Pool.Get()
- defer c.Close()
-
- score := 0.0
- if !hide {
- score = documentScore(pdoc)
- }
- terms := documentTerms(pdoc, score)
-
- var gobBuf bytes.Buffer
- if err := gob.NewEncoder(&gobBuf).Encode(pdoc); err != nil {
- return err
- }
-
- gobBytes := snappy.Encode(nil, gobBuf.Bytes())
-
- // Truncate large documents.
- if len(gobBytes) > 400000 {
- pdocNew := *pdoc
- pdoc = &pdocNew
- pdoc.Truncated = true
- pdoc.Vars = nil
- pdoc.Funcs = nil
- pdoc.Types = nil
- pdoc.Consts = nil
- pdoc.Examples = nil
- gobBuf.Reset()
- if err := gob.NewEncoder(&gobBuf).Encode(pdoc); err != nil {
- return err
- }
- gobBytes = snappy.Encode(nil, gobBuf.Bytes())
- }
-
- kind := "p"
- switch {
- case pdoc.Name == "":
- kind = "d"
- case pdoc.IsCmd:
- kind = "c"
- }
-
- t := int64(0)
- if !nextCrawl.IsZero() {
- t = nextCrawl.Unix()
- }
-
- // Get old version of the package to extract its imports.
- // If the package does not exist, both oldDoc and err will be nil.
- old, _, err := db.getDoc(c, pdoc.ImportPath)
- if err != nil {
- return err
- }
-
- _, err = putScript.Do(c, pdoc.ImportPath, pdoc.Synopsis, score, gobBytes, strings.Join(terms, " "), pdoc.Etag, kind, t)
- if err != nil {
- return err
- }
-
- id, n, err := pkgIDAndImportCount(c, pdoc.ImportPath)
- if err != nil {
- return err
- }
- ctx := bgCtx()
-
- if score > 0 {
- if err := PutIndex(ctx, pdoc, id, score, n); err != nil {
- log.Printf("Cannot put %q in index: %v", pdoc.ImportPath, err)
- }
-
- if old != nil {
- if err := updateImportsIndex(c, ctx, old, pdoc); err != nil {
- return err
- }
- }
- } else {
- if err := deleteIndex(ctx, id); err != nil {
- return err
- }
- }
-
- if nextCrawl.IsZero() {
- // Skip crawling related packages if this is not a full save.
- return nil
- }
-
- paths := make(map[string]bool)
- for _, p := range pdoc.Imports {
- if gosrc.IsValidRemotePath(p) {
- paths[p] = true
- }
- }
- for _, p := range pdoc.TestImports {
- if gosrc.IsValidRemotePath(p) {
- paths[p] = true
- }
- }
- for _, p := range pdoc.XTestImports {
- if gosrc.IsValidRemotePath(p) {
- paths[p] = true
- }
- }
- if pdoc.ImportPath != pdoc.ProjectRoot && pdoc.ProjectRoot != "" {
- paths[pdoc.ProjectRoot] = true
- }
- for _, p := range pdoc.Subdirectories {
- paths[pdoc.ImportPath+"/"+p] = true
- }
-
- args := make([]interface{}, 0, len(paths))
- for p := range paths {
- args = append(args, p)
- }
- _, err = addCrawlScript.Do(c, args...)
- return err
- }
-
- // pkgIDAndImportCount returns the ID and import count of a specified package.
- func pkgIDAndImportCount(c redis.Conn, path string) (id string, numImported int, err error) {
- numImported, err = redis.Int(c.Do("SCARD", "index:import:"+path))
- if err != nil {
- return
- }
- id, err = redis.String(c.Do("HGET", "ids", path))
- if err == redis.ErrNil {
- return "", 0, nil
- }
- return id, numImported, nil
- }
-
- func updateImportsIndex(c redis.Conn, ctx context.Context, oldDoc, newDoc *doc.Package) error {
- // Create a map to store any import change since last time we indexed the package.
- changes := make(map[string]bool)
- for _, p := range oldDoc.Imports {
- if gosrc.IsValidRemotePath(p) {
- changes[p] = true
- }
- }
- for _, p := range newDoc.Imports {
- if gosrc.IsValidRemotePath(p) {
- delete(changes, p)
- }
- }
-
- // For each import change, re-index that package with updated NumImported.
- // In practice this should not happen often and when it does, the changes are
- // likely to be a small amount.
- for p, _ := range changes {
- id, n, err := pkgIDAndImportCount(c, p)
- if err != nil {
- return err
- }
- if id != "" {
- PutIndex(ctx, nil, id, -1, n)
- }
- }
- return nil
- }
-
- var setNextCrawlScript = redis.NewScript(0, `
- local path = ARGV[1]
- local nextCrawl = ARGV[2]
-
- local id = redis.call('HGET', 'ids', path)
- if not id then
- return false
- end
-
- redis.call('ZADD', 'nextCrawl', nextCrawl, id)
- redis.call('HSET', 'pkg:' .. id, 'crawl', nextCrawl)
- `)
-
- // SetNextCrawl sets the next crawl time for a package.
- func (db *Database) SetNextCrawl(path string, t time.Time) error {
- c := db.Pool.Get()
- defer c.Close()
- _, err := setNextCrawlScript.Do(c, path, t.Unix())
- return err
- }
-
- // bumpCrawlScript sets the crawl time to now. To avoid continuously crawling
- // frequently updated repositories, the crawl is scheduled in the future.
- var bumpCrawlScript = redis.NewScript(0, `
- local root = ARGV[1]
- local now = tonumber(ARGV[2])
- local nextCrawl = now + 7200
- local pkgs = redis.call('SORT', 'index:project:' .. root, 'GET', '#')
-
- for i=1,#pkgs do
- local v = redis.call('HMGET', 'pkg:' .. pkgs[i], 'crawl', 'kind')
- local t = tonumber(v[1] or 0)
- if t == 0 or now < t then
- redis.call('HSET', 'pkg:' .. pkgs[i], 'crawl', now)
- end
- local nextCrawl = now + 86400
- if v[2] == 'p' then
- nextCrawl = now + 7200
- end
- t = tonumber(redis.call('ZSCORE', 'nextCrawl', pkgs[i]) or 0)
- if t == 0 or nextCrawl < t then
- redis.call('ZADD', 'nextCrawl', nextCrawl, pkgs[i])
- end
- end
- `)
-
- func (db *Database) BumpCrawl(projectRoot string) error {
- c := db.Pool.Get()
- defer c.Close()
- _, err := bumpCrawlScript.Do(c, normalizeProjectRoot(projectRoot), time.Now().Unix())
- return err
- }
-
- // getDocScript gets the package documentation and update time for the
- // specified path. If path is "-", then the oldest document is returned.
- var getDocScript = redis.NewScript(0, `
- local path = ARGV[1]
-
- local id
- if path == '-' then
- local r = redis.call('ZRANGE', 'nextCrawl', 0, 0)
- if not r or #r == 0 then
- return false
- end
- id = r[1]
- else
- id = redis.call('HGET', 'ids', path)
- if not id then
- return false
- end
- end
-
- local gob = redis.call('HGET', 'pkg:' .. id, 'gob')
- if not gob then
- return false
- end
-
- local nextCrawl = redis.call('HGET', 'pkg:' .. id, 'crawl')
- if not nextCrawl then
- nextCrawl = redis.call('ZSCORE', 'nextCrawl', id)
- if not nextCrawl then
- nextCrawl = 0
- end
- end
-
- return {gob, nextCrawl}
- `)
-
- func (db *Database) getDoc(c redis.Conn, path string) (*doc.Package, time.Time, error) {
- r, err := redis.Values(getDocScript.Do(c, path))
- if err == redis.ErrNil {
- return nil, time.Time{}, nil
- } else if err != nil {
- return nil, time.Time{}, err
- }
-
- var p []byte
- var t int64
-
- if _, err := redis.Scan(r, &p, &t); err != nil {
- return nil, time.Time{}, err
- }
-
- p, err = snappy.Decode(nil, p)
- if err != nil {
- return nil, time.Time{}, err
- }
-
- var pdoc doc.Package
- if err := gob.NewDecoder(bytes.NewReader(p)).Decode(&pdoc); err != nil {
- return nil, time.Time{}, err
- }
-
- nextCrawl := pdoc.Updated
- if t != 0 {
- nextCrawl = time.Unix(t, 0).UTC()
- }
-
- return &pdoc, nextCrawl, err
- }
-
- var getSubdirsScript = redis.NewScript(0, `
- local reply
- for i = 1,#ARGV do
- reply = redis.call('SORT', 'index:project:' .. ARGV[i], 'ALPHA', 'BY', 'pkg:*->path', 'GET', 'pkg:*->path', 'GET', 'pkg:*->synopsis', 'GET', 'pkg:*->kind')
- if #reply > 0 then
- break
- end
- end
- return reply
- `)
-
- func (db *Database) getSubdirs(c redis.Conn, path string, pdoc *doc.Package) ([]Package, error) {
- var reply interface{}
- var err error
-
- switch {
- case isStandardPackage(path):
- reply, err = getSubdirsScript.Do(c, "go")
- case pdoc != nil:
- reply, err = getSubdirsScript.Do(c, pdoc.ProjectRoot)
- default:
- var roots []interface{}
- projectRoot := path
- for i := 0; i < 5; i++ {
- roots = append(roots, projectRoot)
- if j := strings.LastIndex(projectRoot, "/"); j < 0 {
- break
- } else {
- projectRoot = projectRoot[:j]
- }
- }
- reply, err = getSubdirsScript.Do(c, roots...)
- }
-
- values, err := redis.Values(reply, err)
- if err != nil {
- return nil, err
- }
-
- var subdirs []Package
- prefix := path + "/"
-
- for len(values) > 0 {
- var pkg Package
- var kind string
- values, err = redis.Scan(values, &pkg.Path, &pkg.Synopsis, &kind)
- if err != nil {
- return nil, err
- }
- if (kind == "p" || kind == "c") && strings.HasPrefix(pkg.Path, prefix) {
- subdirs = append(subdirs, pkg)
- }
- }
-
- return subdirs, err
- }
-
- // Get gets the package documentation and sub-directories for the the given
- // import path.
- func (db *Database) Get(path string) (*doc.Package, []Package, time.Time, error) {
- c := db.Pool.Get()
- defer c.Close()
-
- pdoc, nextCrawl, err := db.getDoc(c, path)
- if err != nil {
- return nil, nil, time.Time{}, err
- }
-
- if pdoc != nil {
- // fixup for speclal "-" path.
- path = pdoc.ImportPath
- }
-
- subdirs, err := db.getSubdirs(c, path, pdoc)
- if err != nil {
- return nil, nil, time.Time{}, err
- }
- return pdoc, subdirs, nextCrawl, nil
- }
-
- func (db *Database) GetDoc(path string) (*doc.Package, time.Time, error) {
- c := db.Pool.Get()
- defer c.Close()
- return db.getDoc(c, path)
- }
-
- var deleteScript = redis.NewScript(0, `
- local path = ARGV[1]
-
- local id = redis.call('HGET', 'ids', path)
- if not id then
- return false
- end
-
- for term in string.gmatch(redis.call('HGET', 'pkg:' .. id, 'terms') or '', '([^ ]+)') do
- redis.call('SREM', 'index:' .. term, id)
- end
-
- redis.call('ZREM', 'nextCrawl', id)
- redis.call('SREM', 'newCrawl', path)
- redis.call('ZREM', 'popular', id)
- redis.call('DEL', 'pkg:' .. id)
- return redis.call('HDEL', 'ids', path)
- `)
-
- // Delete deletes the documentation for the given import path.
- func (db *Database) Delete(path string) error {
- c := db.Pool.Get()
- defer c.Close()
-
- ctx := bgCtx()
- id, err := redis.String(c.Do("HGET", "ids", path))
- if err == redis.ErrNil {
- return nil
- }
- if err != nil {
- return err
- }
- if err := deleteIndex(ctx, id); err != nil {
- return err
- }
-
- _, err = deleteScript.Do(c, path)
- return err
- }
-
- func packages(reply interface{}, all bool) ([]Package, error) {
- values, err := redis.Values(reply, nil)
- if err != nil {
- return nil, err
- }
- result := make([]Package, 0, len(values)/3)
- for len(values) > 0 {
- var pkg Package
- var kind string
- values, err = redis.Scan(values, &pkg.Path, &pkg.Synopsis, &kind)
- if err != nil {
- return nil, err
- }
- if !all && kind == "d" {
- continue
- }
- if pkg.Path == "C" {
- pkg.Synopsis = "Package C is a \"pseudo-package\" used to access the C namespace from a cgo source file."
- }
- result = append(result, pkg)
- }
- return result, nil
- }
-
- func (db *Database) getPackages(key string, all bool) ([]Package, error) {
- c := db.Pool.Get()
- defer c.Close()
- reply, err := c.Do("SORT", key, "ALPHA", "BY", "pkg:*->path", "GET", "pkg:*->path", "GET", "pkg:*->synopsis", "GET", "pkg:*->kind")
- if err != nil {
- return nil, err
- }
- return packages(reply, all)
- }
-
- func (db *Database) GoIndex() ([]Package, error) {
- return db.getPackages("index:project:go", false)
- }
-
- func (db *Database) GoSubrepoIndex() ([]Package, error) {
- return db.getPackages("index:project:subrepo", false)
- }
-
- func (db *Database) Index() ([]Package, error) {
- return db.getPackages("index:all:", false)
- }
-
- func (db *Database) Project(projectRoot string) ([]Package, error) {
- return db.getPackages("index:project:"+normalizeProjectRoot(projectRoot), true)
- }
-
- func (db *Database) AllPackages() ([]Package, error) {
- c := db.Pool.Get()
- defer c.Close()
- values, err := redis.Values(c.Do("SORT", "nextCrawl", "DESC", "BY", "pkg:*->score", "GET", "pkg:*->path", "GET", "pkg:*->kind"))
- if err != nil {
- return nil, err
- }
- result := make([]Package, 0, len(values)/2)
- for len(values) > 0 {
- var pkg Package
- var kind string
- values, err = redis.Scan(values, &pkg.Path, &kind)
- if err != nil {
- return nil, err
- }
- if kind == "d" {
- continue
- }
- result = append(result, pkg)
- }
- return result, nil
- }
-
- var packagesScript = redis.NewScript(0, `
- local result = {}
- for i = 1,#ARGV do
- local path = ARGV[i]
- local synopsis = ''
- local kind = 'u'
- local id = redis.call('HGET', 'ids', path)
- if id then
- synopsis = redis.call('HGET', 'pkg:' .. id, 'synopsis')
- kind = redis.call('HGET', 'pkg:' .. id, 'kind')
- end
- result[#result+1] = path
- result[#result+1] = synopsis
- result[#result+1] = kind
- end
- return result
- `)
-
- func (db *Database) Packages(paths []string) ([]Package, error) {
- var args []interface{}
- for _, p := range paths {
- args = append(args, p)
- }
- c := db.Pool.Get()
- defer c.Close()
- reply, err := packagesScript.Do(c, args...)
- if err != nil {
- return nil, err
- }
- pkgs, err := packages(reply, true)
- sort.Sort(byPath(pkgs))
- return pkgs, err
- }
-
- func (db *Database) ImporterCount(path string) (int, error) {
- c := db.Pool.Get()
- defer c.Close()
- return redis.Int(c.Do("SCARD", "index:import:"+path))
- }
-
- func (db *Database) Importers(path string) ([]Package, error) {
- return db.getPackages("index:import:"+path, false)
- }
-
- func (db *Database) Block(root string) error {
- c := db.Pool.Get()
- defer c.Close()
- if _, err := c.Do("SADD", "block", root); err != nil {
- return err
- }
- keys, err := redis.Strings(c.Do("HKEYS", "ids"))
- if err != nil {
- return err
- }
- for _, key := range keys {
- if key == root || strings.HasPrefix(key, root) && key[len(root)] == '/' {
- if _, err := deleteScript.Do(c, key); err != nil {
- return err
- }
- }
- }
- return nil
- }
-
- var isBlockedScript = redis.NewScript(0, `
- local path = ''
- for s in string.gmatch(ARGV[1], '[^/]+') do
- path = path .. s
- if redis.call('SISMEMBER', 'block', path) == 1 then
- return 1
- end
- path = path .. '/'
- end
- return 0
- `)
-
- func (db *Database) IsBlocked(path string) (bool, error) {
- c := db.Pool.Get()
- defer c.Close()
- return redis.Bool(isBlockedScript.Do(c, path))
- }
-
- type queryResult struct {
- Path string
- Synopsis string
- Score float64
- }
-
- type byScore []*queryResult
-
- func (p byScore) Len() int { return len(p) }
- func (p byScore) Less(i, j int) bool { return p[j].Score < p[i].Score }
- func (p byScore) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
-
- func (db *Database) Query(q string) ([]Package, error) {
- terms := parseQuery(q)
- if len(terms) == 0 {
- return nil, nil
- }
- c := db.Pool.Get()
- defer c.Close()
- n, err := redis.Int(c.Do("INCR", "maxQueryId"))
- if err != nil {
- return nil, err
- }
- id := "tmp:query-" + strconv.Itoa(n)
-
- args := []interface{}{id}
- for _, term := range terms {
- args = append(args, "index:"+term)
- }
- c.Send("SINTERSTORE", args...)
- c.Send("SORT", id, "DESC", "BY", "nosort", "GET", "pkg:*->path", "GET", "pkg:*->synopsis", "GET", "pkg:*->score")
- c.Send("DEL", id)
- c.Flush()
- c.Receive() // SINTERSTORE
- values, err := redis.Values(c.Receive()) // SORT
- if err != nil {
- return nil, err
- }
- c.Receive() // DEL
-
- var queryResults []*queryResult
- if err := redis.ScanSlice(values, &queryResults, "Path", "Synopsis", "Score"); err != nil {
- return nil, err
- }
-
- for _, qr := range queryResults {
- c.Send("SCARD", "index:import:"+qr.Path)
- }
- c.Flush()
-
- for _, qr := range queryResults {
- importCount, err := redis.Int(c.Receive())
- if err != nil {
- return nil, err
- }
-
- qr.Score *= math.Log(float64(10 + importCount))
-
- if isStandardPackage(qr.Path) {
- if strings.HasSuffix(qr.Path, q) {
- // Big bump for exact match on standard package name.
- qr.Score *= 10000
- } else {
- qr.Score *= 1.2
- }
- }
-
- if q == path.Base(qr.Path) {
- qr.Score *= 1.1
- }
- }
-
- sort.Sort(byScore(queryResults))
-
- pkgs := make([]Package, len(queryResults))
- for i, qr := range queryResults {
- pkgs[i].Path = qr.Path
- pkgs[i].Synopsis = qr.Synopsis
- }
-
- return pkgs, nil
- }
-
- type PackageInfo struct {
- PDoc *doc.Package
- Score float64
- Kind string
- Size int
- }
-
- // Do executes function f for each document in the database.
- func (db *Database) Do(f func(*PackageInfo) error) error {
- c := db.Pool.Get()
- defer c.Close()
- cursor := 0
- c.Send("SCAN", cursor, "MATCH", "pkg:*")
- c.Flush()
- for {
- // Receive previous SCAN.
- values, err := redis.Values(c.Receive())
- if err != nil {
- return err
- }
- var keys [][]byte
- if _, err := redis.Scan(values, &cursor, &keys); err != nil {
- return err
- }
- if cursor == 0 {
- break
- }
- for _, key := range keys {
- c.Send("HMGET", key, "gob", "score", "kind", "path", "terms", "synopis")
- }
- c.Send("SCAN", cursor, "MATCH", "pkg:*")
- c.Flush()
- for _ = range keys {
- values, err := redis.Values(c.Receive())
- if err != nil {
- return err
- }
-
- var (
- pi PackageInfo
- p []byte
- path string
- terms string
- synopsis string
- )
-
- if _, err := redis.Scan(values, &p, &pi.Score, &pi.Kind, &path, &terms, &synopsis); err != nil {
- return err
- }
-
- if p == nil {
- continue
- }
-
- pi.Size = len(path) + len(p) + len(terms) + len(synopsis)
-
- p, err = snappy.Decode(nil, p)
- if err != nil {
- return fmt.Errorf("snappy decoding %s: %v", path, err)
- }
-
- if err := gob.NewDecoder(bytes.NewReader(p)).Decode(&pi.PDoc); err != nil {
- return fmt.Errorf("gob decoding %s: %v", path, err)
- }
- if err := f(&pi); err != nil {
- return fmt.Errorf("func %s: %v", path, err)
- }
- }
- }
- return nil
- }
-
- var importGraphScript = redis.NewScript(0, `
- local path = ARGV[1]
-
- local id = redis.call('HGET', 'ids', path)
- if not id then
- return false
- end
-
- return redis.call('HMGET', 'pkg:' .. id, 'synopsis', 'terms')
- `)
-
- // DepLevel specifies the level of depdenencies to show in an import graph.
- type DepLevel int
-
- const (
- ShowAllDeps DepLevel = iota // show all dependencies
- HideStandardDeps // don't show dependencies of standard libraries
- HideStandardAll // don't show standard libraries at all
- )
-
- func (db *Database) ImportGraph(pdoc *doc.Package, level DepLevel) ([]Package, [][2]int, error) {
-
- // This breadth-first traversal of the package's dependencies uses the
- // Redis pipeline as queue. Links to packages with invalid import paths are
- // only included for the root package.
-
- c := db.Pool.Get()
- defer c.Close()
- if err := importGraphScript.Load(c); err != nil {
- return nil, nil, err
- }
-
- nodes := []Package{{Path: pdoc.ImportPath, Synopsis: pdoc.Synopsis}}
- edges := [][2]int{}
- index := map[string]int{pdoc.ImportPath: 0}
-
- for _, path := range pdoc.Imports {
- if level >= HideStandardAll && isStandardPackage(path) {
- continue
- }
- j := len(nodes)
- index[path] = j
- edges = append(edges, [2]int{0, j})
- nodes = append(nodes, Package{Path: path})
- importGraphScript.Send(c, path)
- }
-
- for i := 1; i < len(nodes); i++ {
- c.Flush()
- r, err := redis.Values(c.Receive())
- if err == redis.ErrNil {
- continue
- } else if err != nil {
- return nil, nil, err
- }
- var synopsis, terms string
- if _, err := redis.Scan(r, &synopsis, &terms); err != nil {
- return nil, nil, err
- }
- nodes[i].Synopsis = synopsis
- for _, term := range strings.Fields(terms) {
- if strings.HasPrefix(term, "import:") {
- path := term[len("import:"):]
- if level >= HideStandardDeps && isStandardPackage(path) {
- continue
- }
- j, ok := index[path]
- if !ok {
- j = len(nodes)
- index[path] = j
- nodes = append(nodes, Package{Path: path})
- importGraphScript.Send(c, path)
- }
- edges = append(edges, [2]int{i, j})
- }
- }
- }
- return nodes, edges, nil
- }
-
- func (db *Database) PutGob(key string, value interface{}) error {
- var buf bytes.Buffer
- if err := gob.NewEncoder(&buf).Encode(value); err != nil {
- return err
- }
- c := db.Pool.Get()
- defer c.Close()
- _, err := c.Do("SET", "gob:"+key, buf.Bytes())
- return err
- }
-
- func (db *Database) GetGob(key string, value interface{}) error {
- c := db.Pool.Get()
- defer c.Close()
- p, err := redis.Bytes(c.Do("GET", "gob:"+key))
- if err == redis.ErrNil {
- return nil
- } else if err != nil {
- return err
- }
- return gob.NewDecoder(bytes.NewReader(p)).Decode(value)
- }
-
- var incrementPopularScoreScript = redis.NewScript(0, `
- local path = ARGV[1]
- local n = ARGV[2]
- local t = ARGV[3]
-
- local id = redis.call('HGET', 'ids', path)
- if not id then
- return
- end
-
- local t0 = redis.call('GET', 'popular:0') or '0'
- local f = math.exp(tonumber(t) - tonumber(t0))
- redis.call('ZINCRBY', 'popular', tonumber(n) * f, id)
- if f > 10 then
- redis.call('SET', 'popular:0', t)
- redis.call('ZUNIONSTORE', 'popular', 1, 'popular', 'WEIGHTS', 1.0 / f)
- redis.call('ZREMRANGEBYSCORE', 'popular', '-inf', 0.05)
- end
- `)
-
- const popularHalfLife = time.Hour * 24 * 7
-
- func (db *Database) incrementPopularScoreInternal(path string, delta float64, t time.Time) error {
- // nt = n0 * math.Exp(-lambda * t)
- // lambda = math.Ln2 / thalf
- c := db.Pool.Get()
- defer c.Close()
- const lambda = math.Ln2 / float64(popularHalfLife)
- scaledTime := lambda * float64(t.Sub(time.Unix(1257894000, 0)))
- _, err := incrementPopularScoreScript.Do(c, path, delta, scaledTime)
- return err
- }
-
- func (db *Database) IncrementPopularScore(path string) error {
- return db.incrementPopularScoreInternal(path, 1, time.Now())
- }
-
- var popularScript = redis.NewScript(0, `
- local stop = ARGV[1]
- local ids = redis.call('ZREVRANGE', 'popular', '0', stop)
- local result = {}
- for i=1,#ids do
- local values = redis.call('HMGET', 'pkg:' .. ids[i], 'path', 'synopsis', 'kind')
- result[#result+1] = values[1]
- result[#result+1] = values[2]
- result[#result+1] = values[3]
- end
- return result
- `)
-
- func (db *Database) Popular(count int) ([]Package, error) {
- c := db.Pool.Get()
- defer c.Close()
- reply, err := popularScript.Do(c, count-1)
- if err != nil {
- return nil, err
- }
- pkgs, err := packages(reply, false)
- return pkgs, err
- }
-
- var popularWithScoreScript = redis.NewScript(0, `
- local ids = redis.call('ZREVRANGE', 'popular', '0', -1, 'WITHSCORES')
- local result = {}
- for i=1,#ids,2 do
- result[#result+1] = redis.call('HGET', 'pkg:' .. ids[i], 'path')
- result[#result+1] = ids[i+1]
- result[#result+1] = 'p'
- end
- return result
- `)
-
- func (db *Database) PopularWithScores() ([]Package, error) {
- c := db.Pool.Get()
- defer c.Close()
- reply, err := popularWithScoreScript.Do(c)
- if err != nil {
- return nil, err
- }
- pkgs, err := packages(reply, false)
- return pkgs, err
- }
-
- func (db *Database) PopNewCrawl() (string, bool, error) {
- c := db.Pool.Get()
- defer c.Close()
-
- var subdirs []Package
-
- path, err := redis.String(c.Do("SPOP", "newCrawl"))
- switch {
- case err == redis.ErrNil:
- err = nil
- path = ""
- case err == nil:
- subdirs, err = db.getSubdirs(c, path, nil)
- }
- return path, len(subdirs) > 0, err
- }
-
- func (db *Database) AddBadCrawl(path string) error {
- c := db.Pool.Get()
- defer c.Close()
- _, err := c.Do("SADD", "badCrawl", path)
- return err
- }
-
- var incrementCounterScript = redis.NewScript(0, `
- local key = 'counter:' .. ARGV[1]
- local n = tonumber(ARGV[2])
- local t = tonumber(ARGV[3])
- local exp = tonumber(ARGV[4])
-
- local counter = redis.call('GET', key)
- if counter then
- counter = cjson.decode(counter)
- n = n + counter.n * math.exp(counter.t - t)
- end
-
- redis.call('SET', key, cjson.encode({n = n; t = t}))
- redis.call('EXPIRE', key, exp)
- return tostring(n)
- `)
-
- const counterHalflife = time.Hour
-
- func (db *Database) incrementCounterInternal(key string, delta float64, t time.Time) (float64, error) {
- // nt = n0 * math.Exp(-lambda * t)
- // lambda = math.Ln2 / thalf
- c := db.Pool.Get()
- defer c.Close()
- const lambda = math.Ln2 / float64(counterHalflife)
- scaledTime := lambda * float64(t.Sub(time.Unix(1257894000, 0)))
- return redis.Float64(incrementCounterScript.Do(c, key, delta, scaledTime, (4*counterHalflife)/time.Second))
- }
-
- func (db *Database) IncrementCounter(key string, delta float64) (float64, error) {
- return db.incrementCounterInternal(key, delta, time.Now())
- }
-
- // Reindex gets all the packages in database and put them into the search index.
- // This will update the search index with the path, synopsis, score, import counts
- // of all the packages in the database.
- func (db *Database) Reindex(ctx context.Context) error {
- c := db.Pool.Get()
- defer c.Close()
-
- idx, err := search.Open("packages")
- if err != nil {
- return fmt.Errorf("database: failed to open packages: %v", err)
- }
- npkgs := 0
- for {
- // Get 200 packages from the nextCrawl set each time. Use npkgs as a cursor
- // to store the current position we actually indexed. Retry from the cursor
- // position if we received a timeout error from app engine.
- values, err := redis.Values(c.Do(
- "SORT", "nextCrawl",
- "LIMIT", strconv.Itoa(npkgs), "200",
- "GET", "pkg:*->path",
- "GET", "pkg:*->synopsis",
- "GET", "pkg:*->score",
- ))
- if err != nil {
- return err
- }
- if len(values) == 0 {
- break // all done
- }
-
- // The Search API should support put in batches of up to 200 documents,
- // the Go version of this API does not support this yet.
- // TODO(shantuo): Put packages in batch operations.
- for ; len(values) > 0; npkgs++ {
- var pdoc doc.Package
- var score float64
- values, err = redis.Scan(values, &pdoc.ImportPath, &pdoc.Synopsis, &score)
- if err != nil {
- return err
- }
- // There are some corrupted data in our current database
- // that causes an error when putting the package into the
- // search index which only supports UTF8 encoding.
- if !utf8.ValidString(pdoc.Synopsis) {
- pdoc.Synopsis = ""
- }
- id, n, err := pkgIDAndImportCount(c, pdoc.ImportPath)
- if err != nil {
- return err
- }
- if _, err := idx.Put(ctx, id, &Package{
- Path: pdoc.ImportPath,
- Synopsis: pdoc.Synopsis,
- Score: score,
- ImportCount: n,
- }); err != nil {
- if appengine.IsTimeoutError(err) {
- log.Printf("App Engine timeout: %v. Continue...", err)
- break
- }
- return fmt.Errorf("Failed to put index %s: %v", id, err)
- }
- }
- }
- log.Printf("%d packages are reindexed", npkgs)
- return nil
- }
|