A VCS repository archival tool
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

133 satır
6.0 KiB

  1. import codearchiver.core
  2. import codearchiver.subprocess
  3. import datetime
  4. import functools
  5. import hashlib
  6. import itertools
  7. import logging
  8. import os.path
  9. import shutil
  10. import subprocess
  11. _logger = logging.getLogger(__name__)
  12. class GitMetadata(codearchiver.core.Metadata):
  13. fields = (
  14. codearchiver.core.MetadataField(key = 'Git version', required = True, repeatable = False),
  15. codearchiver.core.MetadataField(key = 'Based on bundle', required = False, repeatable = True),
  16. codearchiver.core.MetadataField(key = 'Ref', required = True, repeatable = True),
  17. codearchiver.core.MetadataField(key = 'Root commit', required = True, repeatable = True),
  18. codearchiver.core.MetadataField(key = 'Commit', required = False, repeatable = True),
  19. )
  20. version = 0
  21. class Git(codearchiver.core.Module):
  22. name = 'git'
  23. MetadataClass = GitMetadata
  24. @staticmethod
  25. def matches(inputUrl):
  26. return inputUrl.url.endswith('.git')
  27. def __init__(self, *args, extraBranches = {}, **kwargs):
  28. super().__init__(*args, **kwargs)
  29. self._extraBranches = extraBranches
  30. def process(self):
  31. directory = self._url.rsplit('/', 1)[1]
  32. if os.path.exists(directory):
  33. _logger.fatal(f'{directory!r} already exists')
  34. raise FileExistsError(f'{directory!r} already exists')
  35. bundle = f'{self._id}.bundle'
  36. if os.path.exists(bundle):
  37. _logger.fatal(f'{bundle!r} already exists')
  38. raise FileExistsError(f'{bundle!r} already exists')
  39. _, gitVersion, _ = codearchiver.subprocess.run_with_log(['git', '--version'])
  40. if not gitVersion.startswith('git version ') or not gitVersion.endswith('\n') or gitVersion[12:-1].strip('0123456789.') != '':
  41. raise RuntimeError(f'Unexpected output from `git --version`: {gitVersion!r}')
  42. gitVersion = gitVersion[12:-1]
  43. _logger.info(f'Cloning {self._url} into {directory}')
  44. startTime = datetime.datetime.utcnow()
  45. codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'})
  46. if self._extraBranches:
  47. for branch, commit in self._extraBranches.items():
  48. _logger.info(f'Fetching commit {commit} as {branch}')
  49. r, _, _ = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False)
  50. if r == 0:
  51. r2, _, _ = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False)
  52. if r2 != 0:
  53. _logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}')
  54. else:
  55. _logger.error(f'Failed to fetch {commit}')
  56. # This leaves over a FETCH_HEAD file, but git-bundle does not care about that, so it can safely be ignored.
  57. endTime = datetime.datetime.utcnow()
  58. _logger.info('Collecting repository metadata')
  59. _, refs, _ = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
  60. refs = list(map(str.strip, refs.splitlines()))
  61. _, commits, _ = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--all', '--format=format:%H% P'], cwd = directory)
  62. commits = list(map(functools.partial(str.split, sep = ' '), commits.splitlines()))
  63. rootCommits = [c[0] for c in commits if len(c) == 1]
  64. # Check whether there are relevant prior bundles to create an incremental one
  65. # Collect their commits and ref IDs shared with this clone (else `git bundle` complains about 'bad object')
  66. objectsSet = set(itertools.chain((c[0] for c in commits), (r.split(' ', 1)[0] for r in refs))) # For fast lookup
  67. knownObjects = {} # dict to keep the order reasonable
  68. basedOnBundles = {} # ditto
  69. if self._storage:
  70. for oldBundle in self._storage.search_metadata([('Module', type(self).name)] + [('Root commit', c) for c in rootCommits]):
  71. _logger.info(f'Previous bundle: {oldBundle!r}')
  72. with self._storage.open_metadata(oldBundle) as fp:
  73. idx = GitMetadata.deserialise(fp)
  74. for key, value in idx:
  75. _logger.debug(f'Key/value in previous bundle: {key} → {value!r}')
  76. if key == 'Ref':
  77. value = value.split(' ', 1)[0]
  78. if key in ('Ref', 'Commit') and value in objectsSet and value not in knownObjects:
  79. _logger.debug(f'Filtering out {value}')
  80. knownObjects[value] = True
  81. basedOnBundles[oldBundle] = True
  82. _logger.info(f'Bundling into {bundle}')
  83. status , _, stderr = codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--stdin', '--reflog', '--all'], cwd = directory, input = ''.join(f'^{o}\n' for o in knownObjects).encode('ascii'), check = False)
  84. if status == 128 and stderr == 'fatal: Refusing to create empty bundle.\n':
  85. # Manually write an empty bundle instead
  86. # Cf. Documentation/technical/bundle-format.txt and Documentation/technical/pack-format.txt in git's repository for details on the formats
  87. _logger.info('Writing empty bundle directly instead')
  88. with open(bundle, 'wb') as fp:
  89. fp.write(b'# v2 git bundle\n') # bundle signature
  90. fp.write(b'\n') # bundle end of prerequisites and refs
  91. packdata = b'PACK' # pack signature
  92. packdata += b'\0\0\0\x02' # pack version
  93. packdata += b'\0\0\0\0' # pack number of objects
  94. fp.write(packdata)
  95. fp.write(hashlib.sha1(packdata).digest()) # pack checksum trailer
  96. elif status != 0:
  97. raise RuntimeError(f'git bundle creation returned with non-zero exit status {status}.')
  98. _logger.info(f'Removing clone')
  99. shutil.rmtree(directory)
  100. metadata = self.create_metadata(bundle, startTime, endTime)
  101. metadata.append('Git version', gitVersion)
  102. for oldBundle in basedOnBundles:
  103. metadata.append('Based on bundle', oldBundle)
  104. for line in refs:
  105. metadata.append('Ref', line)
  106. for commitHash, *parents in commits:
  107. if commitHash not in knownObjects:
  108. metadata.append('Commit', commitHash)
  109. if not parents:
  110. metadata.append('Root commit', commitHash)
  111. return codearchiver.core.Result(id = self._id, files = [(bundle, metadata)])
  112. def __repr__(self):
  113. return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})'