A VCS repository archival tool
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
3.0 KiB

  1. import codearchiver.core
  2. import codearchiver.subprocess
  3. import datetime
  4. import functools
  5. import logging
  6. import os.path
  7. import shutil
  8. import subprocess
  9. logger = logging.getLogger(__name__)
  10. class GitIndex(codearchiver.core.Index):
  11. fields = [
  12. codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True),
  13. codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True),
  14. codearchiver.core.IndexField(key = 'Commit', required = True, repeatable = True),
  15. ]
  16. class Git(codearchiver.core.Module):
  17. name = 'git'
  18. @staticmethod
  19. def matches(inputUrl):
  20. return inputUrl.url.endswith('.git')
  21. def __init__(self, *args, extraBranches = {}, **kwargs):
  22. super().__init__(*args, **kwargs)
  23. self._extraBranches = extraBranches
  24. def process(self):
  25. directory = self._url.rsplit('/', 1)[1]
  26. if os.path.exists(directory):
  27. logger.fatal(f'{directory!r} already exists')
  28. raise FileExistsError(f'{directory!r} already exists')
  29. startTime = datetime.datetime.utcnow()
  30. if self._id is None:
  31. self._id = f'git_{self._url.replace("/", "_")}_{startTime:%Y%m%dT%H%M%SZ}'
  32. bundle = f'{self._id}.bundle'
  33. if os.path.exists(bundle):
  34. logger.fatal(f'{bundle!r} already exists')
  35. raise FileExistsError(f'{bundle!r} already exists')
  36. logger.info(f'Cloning {self._url} into {directory}')
  37. codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--mirror', self._url, directory])
  38. if self._extraBranches:
  39. for branch, commit in self._extraBranches.items():
  40. logger.info(f'Fetching commit {commit} as {branch}')
  41. r = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False)
  42. if r.returncode == 0:
  43. r2 = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False)
  44. if r2.returncode != 0:
  45. logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}')
  46. else:
  47. logger.error(f'Failed to fetch {commit}')
  48. logger.info(f'Bundling into {bundle}')
  49. codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--all'], cwd = directory)
  50. logger.info(f'Collecting repository metadata for index')
  51. _, refs = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
  52. _, commits = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--format=format:%H% P'], cwd = directory)
  53. logger.info(f'Removing clone')
  54. shutil.rmtree(directory)
  55. index = GitIndex()
  56. for line in refs.splitlines():
  57. index.append('Ref', line)
  58. for commitHash, *parents in map(functools.partial(str.split, sep = ' '), commits.splitlines()):
  59. index.append('Commit', commitHash)
  60. if not parents:
  61. index.append('Root commit', commitHash)
  62. return codearchiver.core.Result(id = self._id, files = [(bundle, index)])
  63. def __repr__(self):
  64. return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})'