|
- import codearchiver.core
- import codearchiver.subprocess
- import datetime
- import functools
- import logging
- import os.path
- import shutil
- import subprocess
-
-
- _logger = logging.getLogger(__name__)
-
-
- class GitIndex(codearchiver.core.Index):
- fields = [
- codearchiver.core.IndexField(key = 'Based on bundle', required = False, repeatable = True),
- codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True),
- codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True),
- codearchiver.core.IndexField(key = 'Commit', required = True, repeatable = True),
- ]
-
-
- class Git(codearchiver.core.Module):
- name = 'git'
-
- @staticmethod
- def matches(inputUrl):
- return inputUrl.url.endswith('.git')
-
- def __init__(self, *args, extraBranches = {}, **kwargs):
- super().__init__(*args, **kwargs)
- self._extraBranches = extraBranches
-
- def process(self):
- directory = self._url.rsplit('/', 1)[1]
- if os.path.exists(directory):
- _logger.fatal(f'{directory!r} already exists')
- raise FileExistsError(f'{directory!r} already exists')
- startTime = datetime.datetime.utcnow()
- if self._id is None:
- self._id = f'git_{self._url.replace("/", "_")}_{startTime:%Y%m%dT%H%M%SZ}'
- bundle = f'{self._id}.bundle'
- if os.path.exists(bundle):
- _logger.fatal(f'{bundle!r} already exists')
- raise FileExistsError(f'{bundle!r} already exists')
-
- _logger.info(f'Cloning {self._url} into {directory}')
- codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'})
-
- if self._extraBranches:
- for branch, commit in self._extraBranches.items():
- _logger.info(f'Fetching commit {commit} as {branch}')
- r, _ = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False)
- if r == 0:
- r2, _ = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False)
- if r2 != 0:
- _logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}')
- else:
- _logger.error(f'Failed to fetch {commit}')
-
- _logger.info(f'Collecting repository metadata for index')
- _, refs = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
- _, commits = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--all', '--format=format:%H% P'], cwd = directory)
- commits = list(map(functools.partial(str.split, sep = ' '), commits.splitlines()))
- rootCommits = [c[0] for c in commits if len(c) == 1]
-
- # Check whether there are relevant prior bundles to create an incremental one
- # Collect their commits shared with this clone (else `git bundle` complains about 'bad object')
- commitSet = set(c[0] for c in commits) # For fast lookup
- oldCommits = {} # dict to keep the order reasonable
- basedOnBundles = {} # ditto
- if self._storage:
- for oldBundle in self._storage.search_indices([('Root commit', c) for c in rootCommits]):
- if not oldBundle.startswith('git_'): #TODO Is there a more generic and elegant approach?
- continue
- _logger.info(f'Previous bundle: {oldBundle!r}')
- with self._storage.open_index(oldBundle) as fp:
- idx = GitIndex.deserialise(fp)
- for key, value in idx:
- if key == 'Commit' and value in commitSet:
- oldCommits[value] = True
- basedOnBundles[oldBundle] = True
-
- _logger.info(f'Bundling into {bundle}')
- codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--stdin', '--reflog', '--all'], cwd = directory, input = ''.join(f'^{commit}\n' for commit in oldCommits).encode('ascii'))
-
- _logger.info(f'Removing clone')
- shutil.rmtree(directory)
-
- index = GitIndex()
- for oldBundle in basedOnBundles:
- index.append('Based on bundle', oldBundle)
- for line in refs.splitlines():
- index.append('Ref', line)
- for commitHash, *parents in commits:
- if commitHash not in oldCommits:
- index.append('Commit', commitHash)
- if not parents:
- index.append('Root commit', commitHash)
-
- return codearchiver.core.Result(id = self._id, files = [(bundle, index)])
-
- def __repr__(self):
- return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})'
|