A VCS repository archival tool
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

82 рядки
2.9 KiB

  1. import codearchiver.core
  2. import codearchiver.subprocess
  3. import datetime
  4. import functools
  5. import logging
  6. import os.path
  7. import shutil
  8. import subprocess
  9. logger = logging.getLogger(__name__)
  10. class GitIndex(codearchiver.core.Index):
  11. fields = [
  12. codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True),
  13. codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True),
  14. codearchiver.core.IndexField(key = 'Commit', required = True, repeatable = True),
  15. ]
  16. class Git(codearchiver.core.Module):
  17. name = 'git'
  18. @staticmethod
  19. def matches(inputUrl):
  20. return inputUrl.url.endswith('.git')
  21. def __init__(self, *args, extraBranches = {}, **kwargs):
  22. super().__init__(*args, **kwargs)
  23. self._extraBranches = extraBranches
  24. def process(self):
  25. directory = self._url.rsplit('/', 1)[1]
  26. if os.path.exists(directory):
  27. logger.fatal(f'{directory!r} already exists')
  28. return
  29. startTime = datetime.datetime.utcnow()
  30. if self._id is None:
  31. self._id = f'git_{self._url.replace("/", "_")}_{startTime:%Y%m%dT%H%M%SZ}'
  32. bundle = f'{self._id}.bundle'
  33. if os.path.exists(bundle):
  34. logger.fatal(f'{bundle!r} already exists')
  35. return
  36. logger.info(f'Cloning {self._url} into {directory}')
  37. codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--mirror', self._url, directory])
  38. if self._extraBranches:
  39. for branch, commit in self._extraBranches.items():
  40. logger.info(f'Fetching commit {commit} as {branch}')
  41. r = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False)
  42. if r.returncode == 0:
  43. r2 = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False)
  44. if r2.returncode != 0:
  45. logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}')
  46. else:
  47. logger.error(f'Failed to fetch {commit}')
  48. logger.info(f'Bundling into {bundle}')
  49. codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--all'], cwd = directory)
  50. logger.info(f'Collecting repository metadata for index')
  51. _, refs = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
  52. _, commits = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--format=format:%H% P'], cwd = directory)
  53. logger.info(f'Removing clone')
  54. shutil.rmtree(directory)
  55. index = GitIndex()
  56. for line in refs.splitlines():
  57. index.append('Ref', line)
  58. for commitHash, *parents in map(functools.partial(str.split, sep = ' '), commits.splitlines()):
  59. index.append('Commit', commitHash)
  60. if not parents:
  61. index.append('Root commit', commitHash)
  62. return codearchiver.core.Result(id = self._id, files = [(bundle, index)])
  63. def __repr__(self):
  64. return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})'