A VCS repository archival tool
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

120 Zeilen
5.1 KiB

  1. import codearchiver.core
  2. import codearchiver.subprocess
  3. import datetime
  4. import functools
  5. import hashlib
  6. import logging
  7. import os.path
  8. import shutil
  9. import subprocess
  10. _logger = logging.getLogger(__name__)
  11. class GitIndex(codearchiver.core.Index):
  12. fields = [
  13. codearchiver.core.IndexField(key = 'Based on bundle', required = False, repeatable = True),
  14. codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True),
  15. codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True),
  16. codearchiver.core.IndexField(key = 'Commit', required = False, repeatable = True),
  17. ]
  18. class Git(codearchiver.core.Module):
  19. name = 'git'
  20. @staticmethod
  21. def matches(inputUrl):
  22. return inputUrl.url.endswith('.git')
  23. def __init__(self, *args, extraBranches = {}, **kwargs):
  24. super().__init__(*args, **kwargs)
  25. self._extraBranches = extraBranches
  26. def process(self):
  27. directory = self._url.rsplit('/', 1)[1]
  28. if os.path.exists(directory):
  29. _logger.fatal(f'{directory!r} already exists')
  30. raise FileExistsError(f'{directory!r} already exists')
  31. startTime = datetime.datetime.utcnow()
  32. if self._id is None:
  33. self._id = f'git_{self._url.replace("/", "_")}_{startTime:%Y%m%dT%H%M%SZ}'
  34. bundle = f'{self._id}.bundle'
  35. if os.path.exists(bundle):
  36. _logger.fatal(f'{bundle!r} already exists')
  37. raise FileExistsError(f'{bundle!r} already exists')
  38. _logger.info(f'Cloning {self._url} into {directory}')
  39. codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'})
  40. if self._extraBranches:
  41. for branch, commit in self._extraBranches.items():
  42. _logger.info(f'Fetching commit {commit} as {branch}')
  43. r, _, _ = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False)
  44. if r == 0:
  45. r2, _, _ = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False)
  46. if r2 != 0:
  47. _logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}')
  48. else:
  49. _logger.error(f'Failed to fetch {commit}')
  50. _logger.info(f'Collecting repository metadata for index')
  51. _, refs, _ = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
  52. _, commits, _ = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--all', '--format=format:%H% P'], cwd = directory)
  53. commits = list(map(functools.partial(str.split, sep = ' '), commits.splitlines()))
  54. rootCommits = [c[0] for c in commits if len(c) == 1]
  55. # Check whether there are relevant prior bundles to create an incremental one
  56. # Collect their commits shared with this clone (else `git bundle` complains about 'bad object')
  57. commitSet = set(c[0] for c in commits) # For fast lookup
  58. oldCommits = {} # dict to keep the order reasonable
  59. basedOnBundles = {} # ditto
  60. if self._storage:
  61. for oldBundle in self._storage.search_indices([('Root commit', c) for c in rootCommits]):
  62. if not oldBundle.startswith('git_'): #TODO Is there a more generic and elegant approach?
  63. continue
  64. _logger.info(f'Previous bundle: {oldBundle!r}')
  65. with self._storage.open_index(oldBundle) as fp:
  66. idx = GitIndex.deserialise(fp)
  67. for key, value in idx:
  68. if key == 'Commit' and value in commitSet:
  69. oldCommits[value] = True
  70. basedOnBundles[oldBundle] = True
  71. _logger.info(f'Bundling into {bundle}')
  72. status , _, stderr = codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--stdin', '--reflog', '--all'], cwd = directory, input = ''.join(f'^{commit}\n' for commit in oldCommits).encode('ascii'), check = False)
  73. if status == 128 and stderr == 'fatal: Refusing to create empty bundle.\n':
  74. # Manually write an empty bundle instead
  75. # Cf. Documentation/technical/bundle-format.txt and Documentation/technical/pack-format.txt in git's repository for details on the formats
  76. _logger.info('Writing empty bundle directly instead')
  77. with open(bundle, 'wb') as fp:
  78. fp.write(b'# v2 git bundle\n') # bundle signature
  79. fp.write(b'\n') # bundle end of prerequisites and refs
  80. packdata = b'PACK' # pack signature
  81. packdata += b'\0\0\0\x02' # pack version
  82. packdata += b'\0\0\0\0' # pack number of objects
  83. fp.write(packdata)
  84. fp.write(hashlib.sha1(packdata).digest()) # pack checksum trailer
  85. elif status != 0:
  86. raise RuntimeError(f'git bundle creation returned with non-zero exit status {status}.')
  87. _logger.info(f'Removing clone')
  88. shutil.rmtree(directory)
  89. index = GitIndex()
  90. for oldBundle in basedOnBundles:
  91. index.append('Based on bundle', oldBundle)
  92. for line in refs.splitlines():
  93. index.append('Ref', line)
  94. for commitHash, *parents in commits:
  95. if commitHash not in oldCommits:
  96. index.append('Commit', commitHash)
  97. if not parents:
  98. index.append('Root commit', commitHash)
  99. return codearchiver.core.Result(id = self._id, files = [(bundle, index)])
  100. def __repr__(self):
  101. return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})'