Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 

529 rader
14 KiB

  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file or at
  5. // https://developers.google.com/open-source/licenses/bsd.
  6. // Package gosrc fetches Go package source code from version control services.
  7. package gosrc
  8. import (
  9. "encoding/xml"
  10. "errors"
  11. "fmt"
  12. "io"
  13. "net/http"
  14. "path"
  15. "regexp"
  16. "strings"
  17. "time"
  18. )
  19. const ExpiresAfter = 2 * 365 * 24 * time.Hour // Package with no commits and imports expires.
  20. // File represents a file.
  21. type File struct {
  22. // File name with no directory.
  23. Name string
  24. // Contents of the file.
  25. Data []byte
  26. // Location of file on version control service website.
  27. BrowseURL string
  28. }
  29. type DirectoryStatus int
  30. const (
  31. Active DirectoryStatus = iota
  32. DeadEndFork // Forks with no commits
  33. QuickFork // Forks with less than 3 commits, all within a week from creation
  34. NoRecentCommits // No commits for ExpiresAfter
  35. // No commits for ExpiresAfter and no imports.
  36. // This is a status derived from NoRecentCommits and the imports count information in the db.
  37. Inactive
  38. )
  39. // Directory describes a directory on a version control service.
  40. type Directory struct {
  41. // The import path for this package.
  42. ImportPath string
  43. // Import path of package after resolving go-import meta tags, if any.
  44. ResolvedPath string
  45. // Import path prefix for all packages in the project.
  46. ProjectRoot string
  47. // Name of the project.
  48. ProjectName string
  49. // Project home page.
  50. ProjectURL string
  51. // Version control system: git, hg, bzr, ...
  52. VCS string
  53. // Version control: active or should be suppressed.
  54. Status DirectoryStatus
  55. // Cache validation tag. This tag is not necessarily an HTTP entity tag.
  56. // The tag is "" if there is no meaningful cache validation for the VCS.
  57. Etag string
  58. // Files.
  59. Files []*File
  60. // Subdirectories, not guaranteed to contain Go code.
  61. Subdirectories []string
  62. // Location of directory on version control service website.
  63. BrowseURL string
  64. // Format specifier for link to source line. It must contain one %s (file URL)
  65. // followed by one %d (source line number), or be empty string if not available.
  66. // Example: "%s#L%d".
  67. LineFmt string
  68. // Whether the repository of this directory is a fork of another one.
  69. Fork bool
  70. // How many stars (for a GitHub project) or followers (for a BitBucket
  71. // project) the repository of this directory has.
  72. Stars int
  73. }
  74. // Project represents a repository.
  75. type Project struct {
  76. Description string
  77. }
  78. // NotFoundError indicates that the directory or presentation was not found.
  79. type NotFoundError struct {
  80. // Diagnostic message describing why the directory was not found.
  81. Message string
  82. // Redirect specifies the path where package can be found.
  83. Redirect string
  84. }
  85. func (e NotFoundError) Error() string {
  86. return e.Message
  87. }
  88. // IsNotFound returns true if err is of type NotFoundError.
  89. func IsNotFound(err error) bool {
  90. _, ok := err.(NotFoundError)
  91. return ok
  92. }
  93. type RemoteError struct {
  94. Host string
  95. err error
  96. }
  97. func (e *RemoteError) Error() string {
  98. return e.err.Error()
  99. }
  100. type NotModifiedError struct {
  101. Since time.Time
  102. Status DirectoryStatus
  103. }
  104. func (e NotModifiedError) Error() string {
  105. msg := "package not modified"
  106. if !e.Since.IsZero() {
  107. msg += fmt.Sprintf(" since %s", e.Since.Format(time.RFC1123))
  108. }
  109. if e.Status == QuickFork {
  110. msg += " (package is a quick fork)"
  111. }
  112. return msg
  113. }
  114. var errNoMatch = errors.New("no match")
  115. // service represents a source code control service.
  116. type service struct {
  117. pattern *regexp.Regexp
  118. prefix string
  119. get func(*http.Client, map[string]string, string) (*Directory, error)
  120. getPresentation func(*http.Client, map[string]string) (*Presentation, error)
  121. getProject func(*http.Client, map[string]string) (*Project, error)
  122. }
  123. var services []*service
  124. func addService(s *service) {
  125. if s.prefix == "" {
  126. services = append(services, s)
  127. } else {
  128. services = append([]*service{s}, services...)
  129. }
  130. }
  131. func (s *service) match(importPath string) (map[string]string, error) {
  132. if !strings.HasPrefix(importPath, s.prefix) {
  133. return nil, nil
  134. }
  135. m := s.pattern.FindStringSubmatch(importPath)
  136. if m == nil {
  137. if s.prefix != "" {
  138. return nil, NotFoundError{Message: "Import path prefix matches known service, but regexp does not."}
  139. }
  140. return nil, nil
  141. }
  142. match := map[string]string{"importPath": importPath}
  143. for i, n := range s.pattern.SubexpNames() {
  144. if n != "" {
  145. match[n] = m[i]
  146. }
  147. }
  148. return match, nil
  149. }
  150. // importMeta represents the values in a go-import meta tag.
  151. type importMeta struct {
  152. projectRoot string
  153. vcs string
  154. repo string
  155. }
  156. // sourceMeta represents the values in a go-source meta tag.
  157. type sourceMeta struct {
  158. projectRoot string
  159. projectURL string
  160. dirTemplate string
  161. fileTemplate string
  162. }
  163. func isHTTPURL(s string) bool {
  164. return strings.HasPrefix(s, "https://") || strings.HasPrefix(s, "http://")
  165. }
  166. func replaceDir(s string, dir string) string {
  167. slashDir := ""
  168. dir = strings.Trim(dir, "/")
  169. if dir != "" {
  170. slashDir = "/" + dir
  171. }
  172. s = strings.Replace(s, "{dir}", dir, -1)
  173. s = strings.Replace(s, "{/dir}", slashDir, -1)
  174. return s
  175. }
  176. func attrValue(attrs []xml.Attr, name string) string {
  177. for _, a := range attrs {
  178. if strings.EqualFold(a.Name.Local, name) {
  179. return a.Value
  180. }
  181. }
  182. return ""
  183. }
  184. func fetchMeta(client *http.Client, importPath string) (scheme string, im *importMeta, sm *sourceMeta, redir bool, err error) {
  185. uri := importPath
  186. if !strings.Contains(uri, "/") {
  187. // Add slash for root of domain.
  188. uri = uri + "/"
  189. }
  190. uri = uri + "?go-get=1"
  191. c := httpClient{client: client}
  192. scheme = "https"
  193. resp, err := c.get(scheme + "://" + uri)
  194. if err != nil || resp.StatusCode != 200 {
  195. if err == nil {
  196. resp.Body.Close()
  197. }
  198. scheme = "http"
  199. resp, err = c.get(scheme + "://" + uri)
  200. if err != nil {
  201. return scheme, nil, nil, false, err
  202. }
  203. }
  204. defer resp.Body.Close()
  205. im, sm, redir, err = parseMeta(scheme, importPath, resp.Body)
  206. return scheme, im, sm, redir, err
  207. }
  208. var refreshToGodocPat = regexp.MustCompile(`(?i)^\d+; url=https?://godoc\.org/`)
  209. func parseMeta(scheme, importPath string, r io.Reader) (im *importMeta, sm *sourceMeta, redir bool, err error) {
  210. errorMessage := "go-import meta tag not found"
  211. d := xml.NewDecoder(r)
  212. d.Strict = false
  213. metaScan:
  214. for {
  215. t, tokenErr := d.Token()
  216. if tokenErr != nil {
  217. break metaScan
  218. }
  219. switch t := t.(type) {
  220. case xml.EndElement:
  221. if strings.EqualFold(t.Name.Local, "head") {
  222. break metaScan
  223. }
  224. case xml.StartElement:
  225. if strings.EqualFold(t.Name.Local, "body") {
  226. break metaScan
  227. }
  228. if !strings.EqualFold(t.Name.Local, "meta") {
  229. continue metaScan
  230. }
  231. if strings.EqualFold(attrValue(t.Attr, "http-equiv"), "refresh") {
  232. // Check for http-equiv refresh back to godoc.org.
  233. redir = refreshToGodocPat.MatchString(attrValue(t.Attr, "content"))
  234. continue metaScan
  235. }
  236. nameAttr := attrValue(t.Attr, "name")
  237. if nameAttr != "go-import" && nameAttr != "go-source" {
  238. continue metaScan
  239. }
  240. fields := strings.Fields(attrValue(t.Attr, "content"))
  241. if len(fields) < 1 {
  242. continue metaScan
  243. }
  244. projectRoot := fields[0]
  245. if !strings.HasPrefix(importPath, projectRoot) ||
  246. !(len(importPath) == len(projectRoot) || importPath[len(projectRoot)] == '/') {
  247. // Ignore if root is not a prefix of the path. This allows a
  248. // site to use a single error page for multiple repositories.
  249. continue metaScan
  250. }
  251. switch nameAttr {
  252. case "go-import":
  253. if len(fields) != 3 {
  254. errorMessage = "go-import meta tag content attribute does not have three fields"
  255. continue metaScan
  256. }
  257. if im != nil {
  258. im = nil
  259. errorMessage = "more than one go-import meta tag found"
  260. break metaScan
  261. }
  262. im = &importMeta{
  263. projectRoot: projectRoot,
  264. vcs: fields[1],
  265. repo: fields[2],
  266. }
  267. case "go-source":
  268. if sm != nil {
  269. // Ignore extra go-source meta tags.
  270. continue metaScan
  271. }
  272. if len(fields) != 4 {
  273. continue metaScan
  274. }
  275. sm = &sourceMeta{
  276. projectRoot: projectRoot,
  277. projectURL: fields[1],
  278. dirTemplate: fields[2],
  279. fileTemplate: fields[3],
  280. }
  281. }
  282. }
  283. }
  284. if im == nil {
  285. return nil, nil, redir, NotFoundError{Message: fmt.Sprintf("%s at %s://%s", errorMessage, scheme, importPath)}
  286. }
  287. if sm != nil && sm.projectRoot != im.projectRoot {
  288. sm = nil
  289. }
  290. return im, sm, redir, nil
  291. }
  292. // getVCSDirFn is called by getDynamic to fetch source using VCS commands. The
  293. // default value here does nothing. If the code is not built for App Engine,
  294. // then getvCSDirFn is set getVCSDir, the function that actually does the work.
  295. var getVCSDirFn = func(client *http.Client, m map[string]string, etag string) (*Directory, error) {
  296. return nil, errNoMatch
  297. }
  298. // getDynamic gets a directory from a service that is not statically known.
  299. func getDynamic(client *http.Client, importPath, etag string) (*Directory, error) {
  300. metaProto, im, sm, redir, err := fetchMeta(client, importPath)
  301. if err != nil {
  302. return nil, err
  303. }
  304. if im.projectRoot != importPath {
  305. var imRoot *importMeta
  306. metaProto, imRoot, _, redir, err = fetchMeta(client, im.projectRoot)
  307. if err != nil {
  308. return nil, err
  309. }
  310. if *imRoot != *im {
  311. return nil, NotFoundError{Message: "project root mismatch."}
  312. }
  313. }
  314. // clonePath is the repo URL from import meta tag, with the "scheme://" prefix removed.
  315. // It should be used for cloning repositories.
  316. // repo is the repo URL from import meta tag, with the "scheme://" prefix removed, and
  317. // a possible ".vcs" suffix trimmed.
  318. i := strings.Index(im.repo, "://")
  319. if i < 0 {
  320. return nil, NotFoundError{Message: "bad repo URL: " + im.repo}
  321. }
  322. proto := im.repo[:i]
  323. clonePath := im.repo[i+len("://"):]
  324. repo := strings.TrimSuffix(clonePath, "."+im.vcs)
  325. dirName := importPath[len(im.projectRoot):]
  326. resolvedPath := repo + dirName
  327. dir, err := getStatic(client, resolvedPath, etag)
  328. if err == errNoMatch {
  329. resolvedPath = repo + "." + im.vcs + dirName
  330. match := map[string]string{
  331. "dir": dirName,
  332. "importPath": importPath,
  333. "clonePath": clonePath,
  334. "repo": repo,
  335. "scheme": proto,
  336. "vcs": im.vcs,
  337. }
  338. dir, err = getVCSDirFn(client, match, etag)
  339. }
  340. if err != nil || dir == nil {
  341. return nil, err
  342. }
  343. dir.ImportPath = importPath
  344. dir.ProjectRoot = im.projectRoot
  345. dir.ResolvedPath = resolvedPath
  346. dir.ProjectName = path.Base(im.projectRoot)
  347. if !redir {
  348. dir.ProjectURL = metaProto + "://" + im.projectRoot
  349. }
  350. if sm == nil {
  351. return dir, nil
  352. }
  353. if isHTTPURL(sm.projectURL) {
  354. dir.ProjectURL = sm.projectURL
  355. }
  356. if isHTTPURL(sm.dirTemplate) {
  357. dir.BrowseURL = replaceDir(sm.dirTemplate, dirName)
  358. }
  359. // TODO: Refactor this to be simpler, implement the go-source meta tag spec fully.
  360. if isHTTPURL(sm.fileTemplate) {
  361. fileTemplate := replaceDir(sm.fileTemplate, dirName)
  362. if strings.Contains(fileTemplate, "{file}") {
  363. cut := strings.LastIndex(fileTemplate, "{file}") + len("{file}") // Cut point is right after last {file} section.
  364. switch hash := strings.Index(fileTemplate, "#"); {
  365. case hash == -1: // If there's no '#', place cut at the end.
  366. cut = len(fileTemplate)
  367. case hash > cut: // If a '#' comes after last {file}, use it as cut point.
  368. cut = hash
  369. }
  370. head, tail := fileTemplate[:cut], fileTemplate[cut:]
  371. for _, f := range dir.Files {
  372. f.BrowseURL = strings.Replace(head, "{file}", f.Name, -1)
  373. }
  374. if strings.Contains(tail, "{line}") {
  375. s := strings.Replace(tail, "%", "%%", -1)
  376. s = strings.Replace(s, "{line}", "%d", 1)
  377. dir.LineFmt = "%s" + s
  378. }
  379. }
  380. }
  381. return dir, nil
  382. }
  383. // getStatic gets a diretory from a statically known service. getStatic
  384. // returns errNoMatch if the import path is not recognized.
  385. func getStatic(client *http.Client, importPath, etag string) (*Directory, error) {
  386. for _, s := range services {
  387. if s.get == nil {
  388. continue
  389. }
  390. match, err := s.match(importPath)
  391. if err != nil {
  392. return nil, err
  393. }
  394. if match != nil {
  395. dir, err := s.get(client, match, etag)
  396. if dir != nil {
  397. dir.ImportPath = importPath
  398. dir.ResolvedPath = importPath
  399. }
  400. return dir, err
  401. }
  402. }
  403. return nil, errNoMatch
  404. }
  405. func Get(client *http.Client, importPath string, etag string) (dir *Directory, err error) {
  406. switch {
  407. case localPath != "":
  408. dir, err = getLocal(importPath)
  409. case IsGoRepoPath(importPath):
  410. dir, err = getStandardDir(client, importPath, etag)
  411. case IsValidRemotePath(importPath):
  412. dir, err = getStatic(client, importPath, etag)
  413. if err == errNoMatch {
  414. dir, err = getDynamic(client, importPath, etag)
  415. }
  416. default:
  417. err = errNoMatch
  418. }
  419. if err == errNoMatch {
  420. err = NotFoundError{Message: "Import path not valid:"}
  421. }
  422. return dir, err
  423. }
  424. // GetPresentation gets a presentation from the the given path.
  425. func GetPresentation(client *http.Client, importPath string) (*Presentation, error) {
  426. ext := path.Ext(importPath)
  427. if ext != ".slide" && ext != ".article" {
  428. return nil, NotFoundError{Message: "unknown file extension."}
  429. }
  430. importPath, file := path.Split(importPath)
  431. importPath = strings.TrimSuffix(importPath, "/")
  432. for _, s := range services {
  433. if s.getPresentation == nil {
  434. continue
  435. }
  436. match, err := s.match(importPath)
  437. if err != nil {
  438. return nil, err
  439. }
  440. if match != nil {
  441. match["file"] = file
  442. return s.getPresentation(client, match)
  443. }
  444. }
  445. return nil, NotFoundError{Message: "path does not match registered service"}
  446. }
  447. // GetProject gets information about a repository.
  448. func GetProject(client *http.Client, importPath string) (*Project, error) {
  449. for _, s := range services {
  450. if s.getProject == nil {
  451. continue
  452. }
  453. match, err := s.match(importPath)
  454. if err != nil {
  455. return nil, err
  456. }
  457. if match != nil {
  458. return s.getProject(client, match)
  459. }
  460. }
  461. return nil, NotFoundError{Message: "path does not match registered service"}
  462. }