A VCS repository archival tool
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

125 lignes
5.6 KiB

  1. import codearchiver.core
  2. import codearchiver.subprocess
  3. import functools
  4. import hashlib
  5. import logging
  6. import os.path
  7. import shutil
  8. import subprocess
  9. _logger = logging.getLogger(__name__)
  10. class GitMetadata(codearchiver.core.Metadata):
  11. fields = (
  12. codearchiver.core.MetadataField(key = 'Git version', required = True, repeatable = False),
  13. codearchiver.core.MetadataField(key = 'Based on bundle', required = False, repeatable = True),
  14. codearchiver.core.MetadataField(key = 'Ref', required = True, repeatable = True),
  15. codearchiver.core.MetadataField(key = 'Root commit', required = True, repeatable = True),
  16. codearchiver.core.MetadataField(key = 'Commit', required = False, repeatable = True),
  17. )
  18. class Git(codearchiver.core.Module):
  19. name = 'git'
  20. MetadataClass = GitMetadata
  21. @staticmethod
  22. def matches(inputUrl):
  23. return inputUrl.url.endswith('.git')
  24. def __init__(self, *args, extraBranches = {}, **kwargs):
  25. super().__init__(*args, **kwargs)
  26. self._extraBranches = extraBranches
  27. def process(self):
  28. directory = self._url.rsplit('/', 1)[1]
  29. if os.path.exists(directory):
  30. _logger.fatal(f'{directory!r} already exists')
  31. raise FileExistsError(f'{directory!r} already exists')
  32. bundle = f'{self._id}.bundle'
  33. if os.path.exists(bundle):
  34. _logger.fatal(f'{bundle!r} already exists')
  35. raise FileExistsError(f'{bundle!r} already exists')
  36. _, gitVersion, _ = codearchiver.subprocess.run_with_log(['git', '--version'])
  37. if not gitVersion.startswith('git version ') or not gitVersion.endswith('\n') or gitVersion[12:-1].strip('0123456789.') != '':
  38. raise RuntimeError(f'Unexpected output from `git --version`: {gitVersion!r}')
  39. gitVersion = gitVersion[12:-1]
  40. _logger.info(f'Cloning {self._url} into {directory}')
  41. codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'})
  42. if self._extraBranches:
  43. for branch, commit in self._extraBranches.items():
  44. _logger.info(f'Fetching commit {commit} as {branch}')
  45. r, _, _ = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False)
  46. if r == 0:
  47. r2, _, _ = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False)
  48. if r2 != 0:
  49. _logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}')
  50. else:
  51. _logger.error(f'Failed to fetch {commit}')
  52. # This leaves over a FETCH_HEAD file, but git-bundle does not care about that, so it can safely be ignored.
  53. _logger.info('Collecting repository metadata')
  54. _, refs, _ = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
  55. _, commits, _ = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--all', '--format=format:%H% P'], cwd = directory)
  56. commits = list(map(functools.partial(str.split, sep = ' '), commits.splitlines()))
  57. rootCommits = [c[0] for c in commits if len(c) == 1]
  58. # Check whether there are relevant prior bundles to create an incremental one
  59. # Collect their commits shared with this clone (else `git bundle` complains about 'bad object')
  60. commitSet = set(c[0] for c in commits) # For fast lookup
  61. oldCommits = {} # dict to keep the order reasonable
  62. basedOnBundles = {} # ditto
  63. if self._storage:
  64. for oldBundle in self._storage.search_metadata([('Root commit', c) for c in rootCommits]):
  65. if not oldBundle.startswith('git_'): #TODO Is there a more generic and elegant approach?
  66. continue
  67. _logger.info(f'Previous bundle: {oldBundle!r}')
  68. with self._storage.open_metadata(oldBundle) as fp:
  69. idx = GitMetadata.deserialise(fp)
  70. for key, value in idx:
  71. if key == 'Commit' and value in commitSet:
  72. oldCommits[value] = True
  73. basedOnBundles[oldBundle] = True
  74. _logger.info(f'Bundling into {bundle}')
  75. status , _, stderr = codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--stdin', '--reflog', '--all'], cwd = directory, input = ''.join(f'^{commit}\n' for commit in oldCommits).encode('ascii'), check = False)
  76. if status == 128 and stderr == 'fatal: Refusing to create empty bundle.\n':
  77. # Manually write an empty bundle instead
  78. # Cf. Documentation/technical/bundle-format.txt and Documentation/technical/pack-format.txt in git's repository for details on the formats
  79. _logger.info('Writing empty bundle directly instead')
  80. with open(bundle, 'wb') as fp:
  81. fp.write(b'# v2 git bundle\n') # bundle signature
  82. fp.write(b'\n') # bundle end of prerequisites and refs
  83. packdata = b'PACK' # pack signature
  84. packdata += b'\0\0\0\x02' # pack version
  85. packdata += b'\0\0\0\0' # pack number of objects
  86. fp.write(packdata)
  87. fp.write(hashlib.sha1(packdata).digest()) # pack checksum trailer
  88. elif status != 0:
  89. raise RuntimeError(f'git bundle creation returned with non-zero exit status {status}.')
  90. _logger.info(f'Removing clone')
  91. shutil.rmtree(directory)
  92. metadata = self.create_metadata(bundle)
  93. metadata.append('Git version', gitVersion)
  94. for oldBundle in basedOnBundles:
  95. metadata.append('Based on bundle', oldBundle)
  96. for line in refs.splitlines():
  97. metadata.append('Ref', line)
  98. for commitHash, *parents in commits:
  99. if commitHash not in oldCommits:
  100. metadata.append('Commit', commitHash)
  101. if not parents:
  102. metadata.append('Root commit', commitHash)
  103. return codearchiver.core.Result(id = self._id, files = [(bundle, metadata)])
  104. def __repr__(self):
  105. return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})'