// Copyright 2013 The Go Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file or at // https://developers.google.com/open-source/licenses/bsd. package database import ( "path" "regexp" "strings" "unicode" "github.com/golang/gddo/doc" "github.com/golang/gddo/gosrc" ) func isStandardPackage(path string) bool { return strings.Index(path, ".") < 0 } func isTermSep(r rune) bool { return unicode.IsSpace(r) || r != '.' && unicode.IsPunct(r) || unicode.IsSymbol(r) } func normalizeProjectRoot(projectRoot string) string { if projectRoot == "" { return "go" } return projectRoot } var synonyms = map[string]string{ "redis": "redisdb", // append db to avoid stemming to 'red' "rand": "random", "postgres": "postgresql", "mongo": "mongodb", } func term(s string) string { s = strings.ToLower(s) if x, ok := synonyms[s]; ok { s = x } // Trim the trailing period at the end of any sentence. return stem(strings.TrimSuffix(s, ".")) } var httpPat = regexp.MustCompile(`https?://\S+`) func collectSynopsisTerms(terms map[string]bool, synopsis string) { synopsis = httpPat.ReplaceAllLiteralString(synopsis, "") fields := strings.FieldsFunc(synopsis, isTermSep) for i := range fields { fields[i] = strings.ToLower(fields[i]) } // Ignore boilerplate in the following common patterns: // Package foo ... // Command foo ... // Package foo implements ... (and provides, contains) // The foo package ... // The foo package implements ... // The foo command ... checkPackageVerb := false switch { case len(fields) >= 1 && fields[0] == "package": fields = fields[1:] checkPackageVerb = true case len(fields) >= 1 && fields[0] == "command": fields = fields[1:] case len(fields) >= 3 && fields[0] == "the" && fields[2] == "package": fields[2] = fields[1] fields = fields[2:] checkPackageVerb = true case len(fields) >= 3 && fields[0] == "the" && fields[2] == "command": fields[2] = fields[1] fields = fields[2:] } if checkPackageVerb && len(fields) >= 2 && (fields[1] == "implements" || fields[1] == "provides" || fields[1] == "contains") { fields[1] = fields[0] fields = fields[1:] } for _, s := range fields { if !stopWord[s] { terms[term(s)] = true } } } func termSlice(terms map[string]bool) []string { result := make([]string, 0, len(terms)) for term := range terms { result = append(result, term) } return result } func documentTerms(pdoc *doc.Package, score float64) []string { terms := make(map[string]bool) // Project root projectRoot := normalizeProjectRoot(pdoc.ProjectRoot) terms["project:"+projectRoot] = true if strings.HasPrefix(pdoc.ImportPath, "golang.org/x/") { terms["project:subrepo"] = true } // Imports for _, path := range pdoc.Imports { if gosrc.IsValidPath(path) { terms["import:"+path] = true } } if score > 0 { for _, term := range parseQuery(pdoc.ImportPath) { terms[term] = true } if !isStandardPackage(pdoc.ImportPath) { terms["all:"] = true for _, term := range parseQuery(pdoc.ProjectName) { terms[term] = true } for _, term := range parseQuery(pdoc.Name) { terms[term] = true } } // Synopsis collectSynopsisTerms(terms, pdoc.Synopsis) } return termSlice(terms) } // vendorPat matches the path of a vendored package. var vendorPat = regexp.MustCompile( // match directories used by tools to vendor packages. `/(?:_?third_party|vendors|Godeps/_workspace/src)/` + // match a domain name. `[^./]+\.[^/]+`) func documentScore(pdoc *doc.Package) float64 { if pdoc.Name == "" || pdoc.Status != gosrc.Active || len(pdoc.Errors) > 0 || strings.HasSuffix(pdoc.ImportPath, ".go") || strings.HasPrefix(pdoc.ImportPath, "gist.github.com/") || strings.HasSuffix(pdoc.ImportPath, "/internal") || strings.Contains(pdoc.ImportPath, "/internal/") || vendorPat.MatchString(pdoc.ImportPath) { return 0 } for _, p := range pdoc.Imports { if strings.HasSuffix(p, ".go") { return 0 } } r := 1.0 if pdoc.IsCmd { if pdoc.Doc == "" { // Do not include command in index if it does not have documentation. return 0 } if !importsGoPackages(pdoc) { // Penalize commands that don't use the "go/*" packages. r *= 0.9 } } else { if !pdoc.Truncated && len(pdoc.Consts) == 0 && len(pdoc.Vars) == 0 && len(pdoc.Funcs) == 0 && len(pdoc.Types) == 0 && len(pdoc.Examples) == 0 { // Do not include package in index if it does not have exports. return 0 } if pdoc.Doc == "" { // Penalty for no documentation. r *= 0.95 } if path.Base(pdoc.ImportPath) != pdoc.Name { // Penalty for last element of path != package name. r *= 0.9 } for i := 0; i < strings.Count(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/"); i++ { // Penalty for deeply nested packages. r *= 0.99 } if strings.Index(pdoc.ImportPath[len(pdoc.ProjectRoot):], "/src/") > 0 { r *= 0.95 } for _, p := range pdoc.Imports { if vendorPat.MatchString(p) { // Penalize packages that import vendored packages. r *= 0.1 break } } } return r } func parseQuery(q string) []string { var terms []string q = strings.ToLower(q) for _, s := range strings.FieldsFunc(q, isTermSep) { if !stopWord[s] { terms = append(terms, term(s)) } } return terms } func importsGoPackages(pdoc *doc.Package) bool { for _, m := range pdoc.Imports { if strings.HasPrefix(m, "go/") { return true } } return false }