A VCS repository archival tool
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

105 linhas
4.2 KiB

  1. import codearchiver.core
  2. import codearchiver.subprocess
  3. import datetime
  4. import functools
  5. import logging
  6. import os.path
  7. import shutil
  8. import subprocess
  9. _logger = logging.getLogger(__name__)
  10. class GitIndex(codearchiver.core.Index):
  11. fields = [
  12. codearchiver.core.IndexField(key = 'Based on bundle', required = False, repeatable = True),
  13. codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True),
  14. codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True),
  15. codearchiver.core.IndexField(key = 'Commit', required = True, repeatable = True),
  16. ]
  17. class Git(codearchiver.core.Module):
  18. name = 'git'
  19. @staticmethod
  20. def matches(inputUrl):
  21. return inputUrl.url.endswith('.git')
  22. def __init__(self, *args, extraBranches = {}, **kwargs):
  23. super().__init__(*args, **kwargs)
  24. self._extraBranches = extraBranches
  25. def process(self):
  26. directory = self._url.rsplit('/', 1)[1]
  27. if os.path.exists(directory):
  28. _logger.fatal(f'{directory!r} already exists')
  29. raise FileExistsError(f'{directory!r} already exists')
  30. startTime = datetime.datetime.utcnow()
  31. if self._id is None:
  32. self._id = f'git_{self._url.replace("/", "_")}_{startTime:%Y%m%dT%H%M%SZ}'
  33. bundle = f'{self._id}.bundle'
  34. if os.path.exists(bundle):
  35. _logger.fatal(f'{bundle!r} already exists')
  36. raise FileExistsError(f'{bundle!r} already exists')
  37. _logger.info(f'Cloning {self._url} into {directory}')
  38. codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'})
  39. if self._extraBranches:
  40. for branch, commit in self._extraBranches.items():
  41. _logger.info(f'Fetching commit {commit} as {branch}')
  42. r = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False)
  43. if r.returncode == 0:
  44. r2 = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False)
  45. if r2.returncode != 0:
  46. _logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}')
  47. else:
  48. _logger.error(f'Failed to fetch {commit}')
  49. _logger.info(f'Collecting repository metadata for index')
  50. _, refs = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
  51. _, commits = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--all', '--format=format:%H% P'], cwd = directory)
  52. commits = list(map(functools.partial(str.split, sep = ' '), commits.splitlines()))
  53. rootCommits = [c[0] for c in commits if len(c) == 1]
  54. # Check whether there are relevant prior bundles to create an incremental one
  55. # Collect their commits shared with this clone (else `git bundle` complains about 'bad object')
  56. commitSet = set(c[0] for c in commits) # For fast lookup
  57. oldCommits = {} # dict to keep the order reasonable
  58. basedOnBundles = {} # ditto
  59. if self._storage:
  60. for oldBundle in self._storage.search_indices([('Root commit', c) for c in rootCommits]):
  61. if not oldBundle.startswith('git_'): #TODO Is there a more generic and elegant approach?
  62. continue
  63. _logger.info(f'Previous bundle: {oldBundle!r}')
  64. with self._storage.open_index(oldBundle) as fp:
  65. idx = GitIndex.deserialise(fp)
  66. for key, value in idx:
  67. if key == 'Commit' and value in commitSet:
  68. oldCommits[value] = True
  69. basedOnBundles[oldBundle] = True
  70. _logger.info(f'Bundling into {bundle}')
  71. codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--stdin', '--reflog', '--all'], cwd = directory, input = ''.join(f'^{commit}\n' for commit in oldCommits).encode('ascii'))
  72. _logger.info(f'Removing clone')
  73. shutil.rmtree(directory)
  74. index = GitIndex()
  75. for oldBundle in basedOnBundles:
  76. index.append('Based on bundle', oldBundle)
  77. for line in refs.splitlines():
  78. index.append('Ref', line)
  79. for commitHash, *parents in commits:
  80. if commitHash not in oldCommits:
  81. index.append('Commit', commitHash)
  82. if not parents:
  83. index.append('Root commit', commitHash)
  84. return codearchiver.core.Result(id = self._id, files = [(bundle, index)])
  85. def __repr__(self):
  86. return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})'