|
- // Copyright 2013 The Go Authors. All rights reserved.
- //
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file or at
- // https://developers.google.com/open-source/licenses/bsd.
-
- // Package gosrc fetches Go package source code from version control services.
- package gosrc
-
- import (
- "encoding/xml"
- "errors"
- "fmt"
- "io"
- "net/http"
- "path"
- "regexp"
- "strings"
- "time"
- )
-
- const ExpiresAfter = 2 * 365 * 24 * time.Hour // Package with no commits and imports expires.
-
- // File represents a file.
- type File struct {
- // File name with no directory.
- Name string
-
- // Contents of the file.
- Data []byte
-
- // Location of file on version control service website.
- BrowseURL string
- }
-
- type DirectoryStatus int
-
- const (
- Active DirectoryStatus = iota
- DeadEndFork // Forks with no commits
- QuickFork // Forks with less than 3 commits, all within a week from creation
- NoRecentCommits // No commits for ExpiresAfter
-
- // No commits for ExpiresAfter and no imports.
- // This is a status derived from NoRecentCommits and the imports count information in the db.
- Inactive
- )
-
- // Directory describes a directory on a version control service.
- type Directory struct {
- // The import path for this package.
- ImportPath string
-
- // Import path of package after resolving go-import meta tags, if any.
- ResolvedPath string
-
- // Import path prefix for all packages in the project.
- ProjectRoot string
-
- // Name of the project.
- ProjectName string
-
- // Project home page.
- ProjectURL string
-
- // Version control system: git, hg, bzr, ...
- VCS string
-
- // Version control: active or should be suppressed.
- Status DirectoryStatus
-
- // Cache validation tag. This tag is not necessarily an HTTP entity tag.
- // The tag is "" if there is no meaningful cache validation for the VCS.
- Etag string
-
- // Files.
- Files []*File
-
- // Subdirectories, not guaranteed to contain Go code.
- Subdirectories []string
-
- // Location of directory on version control service website.
- BrowseURL string
-
- // Format specifier for link to source line. It must contain one %s (file URL)
- // followed by one %d (source line number), or be empty string if not available.
- // Example: "%s#L%d".
- LineFmt string
-
- // Whether the repository of this directory is a fork of another one.
- Fork bool
-
- // How many stars (for a GitHub project) or followers (for a BitBucket
- // project) the repository of this directory has.
- Stars int
- }
-
- // Project represents a repository.
- type Project struct {
- Description string
- }
-
- // NotFoundError indicates that the directory or presentation was not found.
- type NotFoundError struct {
- // Diagnostic message describing why the directory was not found.
- Message string
-
- // Redirect specifies the path where package can be found.
- Redirect string
- }
-
- func (e NotFoundError) Error() string {
- return e.Message
- }
-
- // IsNotFound returns true if err is of type NotFoundError.
- func IsNotFound(err error) bool {
- _, ok := err.(NotFoundError)
- return ok
- }
-
- type RemoteError struct {
- Host string
- err error
- }
-
- func (e *RemoteError) Error() string {
- return e.err.Error()
- }
-
- type NotModifiedError struct {
- Since time.Time
- Status DirectoryStatus
- }
-
- func (e NotModifiedError) Error() string {
- msg := "package not modified"
- if !e.Since.IsZero() {
- msg += fmt.Sprintf(" since %s", e.Since.Format(time.RFC1123))
- }
- if e.Status == QuickFork {
- msg += " (package is a quick fork)"
- }
- return msg
- }
-
- var errNoMatch = errors.New("no match")
-
- // service represents a source code control service.
- type service struct {
- pattern *regexp.Regexp
- prefix string
- get func(*http.Client, map[string]string, string) (*Directory, error)
- getPresentation func(*http.Client, map[string]string) (*Presentation, error)
- getProject func(*http.Client, map[string]string) (*Project, error)
- }
-
- var services []*service
-
- func addService(s *service) {
- if s.prefix == "" {
- services = append(services, s)
- } else {
- services = append([]*service{s}, services...)
- }
- }
-
- func (s *service) match(importPath string) (map[string]string, error) {
- if !strings.HasPrefix(importPath, s.prefix) {
- return nil, nil
- }
- m := s.pattern.FindStringSubmatch(importPath)
- if m == nil {
- if s.prefix != "" {
- return nil, NotFoundError{Message: "Import path prefix matches known service, but regexp does not."}
- }
- return nil, nil
- }
- match := map[string]string{"importPath": importPath}
- for i, n := range s.pattern.SubexpNames() {
- if n != "" {
- match[n] = m[i]
- }
- }
- return match, nil
- }
-
- // importMeta represents the values in a go-import meta tag.
- type importMeta struct {
- projectRoot string
- vcs string
- repo string
- }
-
- // sourceMeta represents the values in a go-source meta tag.
- type sourceMeta struct {
- projectRoot string
- projectURL string
- dirTemplate string
- fileTemplate string
- }
-
- func isHTTPURL(s string) bool {
- return strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://")
- }
-
- func replaceDir(s string, dir string) string {
- slashDir := ""
- dir = strings.Trim(dir, "/")
- if dir != "" {
- slashDir = "/" + dir
- }
- s = strings.Replace(s, "{dir}", dir, -1)
- s = strings.Replace(s, "{/dir}", slashDir, -1)
- return s
- }
-
- func attrValue(attrs []xml.Attr, name string) string {
- for _, a := range attrs {
- if strings.EqualFold(a.Name.Local, name) {
- return a.Value
- }
- }
- return ""
- }
-
- func fetchMeta(client *http.Client, importPath string) (scheme string, im *importMeta, sm *sourceMeta, redir bool, err error) {
- uri := importPath
- if !strings.Contains(uri, "/") {
- // Add slash for root of domain.
- uri = uri + "/"
- }
- uri = uri + "?go-get=1"
-
- c := httpClient{client: client}
- scheme = "https"
- resp, err := c.get(scheme + "://" + uri)
- if err != nil || resp.StatusCode != 200 {
- if err == nil {
- resp.Body.Close()
- }
- scheme = "http"
- resp, err = c.get(scheme + "://" + uri)
- if err != nil {
- return scheme, nil, nil, false, err
- }
- }
- defer resp.Body.Close()
- im, sm, redir, err = parseMeta(scheme, importPath, resp.Body)
- return scheme, im, sm, redir, err
- }
-
- var refreshToGodocPat = regexp.MustCompile(`(?i)^\d+; url=https?://godoc\.org/`)
-
- func parseMeta(scheme, importPath string, r io.Reader) (im *importMeta, sm *sourceMeta, redir bool, err error) {
- errorMessage := "go-import meta tag not found"
-
- d := xml.NewDecoder(r)
- d.Strict = false
- metaScan:
- for {
- t, tokenErr := d.Token()
- if tokenErr != nil {
- break metaScan
- }
- switch t := t.(type) {
- case xml.EndElement:
- if strings.EqualFold(t.Name.Local, "head") {
- break metaScan
- }
- case xml.StartElement:
- if strings.EqualFold(t.Name.Local, "body") {
- break metaScan
- }
- if !strings.EqualFold(t.Name.Local, "meta") {
- continue metaScan
- }
- if strings.EqualFold(attrValue(t.Attr, "http-equiv"), "refresh") {
- // Check for http-equiv refresh back to godoc.org.
- redir = refreshToGodocPat.MatchString(attrValue(t.Attr, "content"))
- continue metaScan
- }
- nameAttr := attrValue(t.Attr, "name")
- if nameAttr != "go-import" && nameAttr != "go-source" {
- continue metaScan
- }
- fields := strings.Fields(attrValue(t.Attr, "content"))
- if len(fields) < 1 {
- continue metaScan
- }
- projectRoot := fields[0]
- if !strings.HasPrefix(importPath, projectRoot) ||
- !(len(importPath) == len(projectRoot) || importPath[len(projectRoot)] == '/') {
- // Ignore if root is not a prefix of the path. This allows a
- // site to use a single error page for multiple repositories.
- continue metaScan
- }
- switch nameAttr {
- case "go-import":
- if len(fields) != 3 {
- errorMessage = "go-import meta tag content attribute does not have three fields"
- continue metaScan
- }
- if im != nil {
- im = nil
- errorMessage = "more than one go-import meta tag found"
- break metaScan
- }
- im = &importMeta{
- projectRoot: projectRoot,
- vcs: fields[1],
- repo: fields[2],
- }
- case "go-source":
- if sm != nil {
- // Ignore extra go-source meta tags.
- continue metaScan
- }
- if len(fields) != 4 {
- continue metaScan
- }
- sm = &sourceMeta{
- projectRoot: projectRoot,
- projectURL: fields[1],
- dirTemplate: fields[2],
- fileTemplate: fields[3],
- }
- }
- }
- }
- if im == nil {
- return nil, nil, redir, NotFoundError{Message: fmt.Sprintf("%s at %s://%s", errorMessage, scheme, importPath)}
- }
- if sm != nil && sm.projectRoot != im.projectRoot {
- sm = nil
- }
- return im, sm, redir, nil
- }
-
- // getVCSDirFn is called by getDynamic to fetch source using VCS commands. The
- // default value here does nothing. If the code is not built for App Engine,
- // then getvCSDirFn is set getVCSDir, the function that actually does the work.
- var getVCSDirFn = func(client *http.Client, m map[string]string, etag string) (*Directory, error) {
- return nil, errNoMatch
- }
-
- // getDynamic gets a directory from a service that is not statically known.
- func getDynamic(client *http.Client, importPath, etag string) (*Directory, error) {
- metaProto, im, sm, redir, err := fetchMeta(client, importPath)
- if err != nil {
- return nil, err
- }
-
- if im.projectRoot != importPath {
- var imRoot *importMeta
- metaProto, imRoot, _, redir, err = fetchMeta(client, im.projectRoot)
- if err != nil {
- return nil, err
- }
- if *imRoot != *im {
- return nil, NotFoundError{Message: "project root mismatch."}
- }
- }
-
- // clonePath is the repo URL from import meta tag, with the "scheme://" prefix removed.
- // It should be used for cloning repositories.
- // repo is the repo URL from import meta tag, with the "scheme://" prefix removed, and
- // a possible ".vcs" suffix trimmed.
- i := strings.Index(im.repo, "://")
- if i < 0 {
- return nil, NotFoundError{Message: "bad repo URL: " + im.repo}
- }
- proto := im.repo[:i]
- clonePath := im.repo[i+len("://"):]
- repo := strings.TrimSuffix(clonePath, "."+im.vcs)
- dirName := importPath[len(im.projectRoot):]
-
- resolvedPath := repo + dirName
- dir, err := getStatic(client, resolvedPath, etag)
- if err == errNoMatch {
- resolvedPath = repo + "." + im.vcs + dirName
- match := map[string]string{
- "dir": dirName,
- "importPath": importPath,
- "clonePath": clonePath,
- "repo": repo,
- "scheme": proto,
- "vcs": im.vcs,
- }
- dir, err = getVCSDirFn(client, match, etag)
- }
- if err != nil || dir == nil {
- return nil, err
- }
-
- dir.ImportPath = importPath
- dir.ProjectRoot = im.projectRoot
- dir.ResolvedPath = resolvedPath
- dir.ProjectName = path.Base(im.projectRoot)
- if !redir {
- dir.ProjectURL = metaProto + "://" + im.projectRoot
- }
-
- if sm == nil {
- return dir, nil
- }
-
- if isHTTPURL(sm.projectURL) {
- dir.ProjectURL = sm.projectURL
- }
-
- if isHTTPURL(sm.dirTemplate) {
- dir.BrowseURL = replaceDir(sm.dirTemplate, dirName)
- }
-
- // TODO: Refactor this to be simpler, implement the go-source meta tag spec fully.
- if isHTTPURL(sm.fileTemplate) {
- fileTemplate := replaceDir(sm.fileTemplate, dirName)
- if strings.Contains(fileTemplate, "{file}") {
- cut := strings.LastIndex(fileTemplate, "{file}") + len("{file}") // Cut point is right after last {file} section.
- switch hash := strings.Index(fileTemplate, "#"); {
- case hash == -1: // If there's no '#', place cut at the end.
- cut = len(fileTemplate)
- case hash > cut: // If a '#' comes after last {file}, use it as cut point.
- cut = hash
- }
- head, tail := fileTemplate[:cut], fileTemplate[cut:]
- for _, f := range dir.Files {
- f.BrowseURL = strings.Replace(head, "{file}", f.Name, -1)
- }
-
- if strings.Contains(tail, "{line}") {
- s := strings.Replace(tail, "%", "%%", -1)
- s = strings.Replace(s, "{line}", "%d", 1)
- dir.LineFmt = "%s" + s
- }
- }
- }
-
- return dir, nil
- }
-
- // getStatic gets a diretory from a statically known service. getStatic
- // returns errNoMatch if the import path is not recognized.
- func getStatic(client *http.Client, importPath, etag string) (*Directory, error) {
- for _, s := range services {
- if s.get == nil {
- continue
- }
- match, err := s.match(importPath)
- if err != nil {
- return nil, err
- }
- if match != nil {
- dir, err := s.get(client, match, etag)
- if dir != nil {
- dir.ImportPath = importPath
- dir.ResolvedPath = importPath
- }
- return dir, err
- }
- }
- return nil, errNoMatch
- }
-
- func Get(client *http.Client, importPath string, etag string) (dir *Directory, err error) {
- switch {
- case localPath != "":
- dir, err = getLocal(importPath)
- case IsGoRepoPath(importPath):
- dir, err = getStandardDir(client, importPath, etag)
- case IsValidRemotePath(importPath):
- dir, err = getStatic(client, importPath, etag)
- if err == errNoMatch {
- dir, err = getDynamic(client, importPath, etag)
- }
- default:
- err = errNoMatch
- }
-
- if err == errNoMatch {
- err = NotFoundError{Message: "Import path not valid:"}
- }
-
- return dir, err
- }
-
- // GetPresentation gets a presentation from the the given path.
- func GetPresentation(client *http.Client, importPath string) (*Presentation, error) {
- ext := path.Ext(importPath)
- if ext != ".slide" && ext != ".article" {
- return nil, NotFoundError{Message: "unknown file extension."}
- }
-
- importPath, file := path.Split(importPath)
- importPath = strings.TrimSuffix(importPath, "/")
- for _, s := range services {
- if s.getPresentation == nil {
- continue
- }
- match, err := s.match(importPath)
- if err != nil {
- return nil, err
- }
- if match != nil {
- match["file"] = file
- return s.getPresentation(client, match)
- }
- }
- return nil, NotFoundError{Message: "path does not match registered service"}
- }
-
- // GetProject gets information about a repository.
- func GetProject(client *http.Client, importPath string) (*Project, error) {
- for _, s := range services {
- if s.getProject == nil {
- continue
- }
- match, err := s.match(importPath)
- if err != nil {
- return nil, err
- }
- if match != nil {
- return s.getProject(client, match)
- }
- }
- return nil, NotFoundError{Message: "path does not match registered service"}
- }
|