From 1355db62356023c827b535b565367e257c47848e Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Thu, 30 Mar 2023 00:07:29 +0000 Subject: [PATCH] Reduce memory usage by deleting potentially big objects when they're no longer needed --- codearchiver/modules/git.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/codearchiver/modules/git.py b/codearchiver/modules/git.py index e0105c6..8b008ef 100644 --- a/codearchiver/modules/git.py +++ b/codearchiver/modules/git.py @@ -188,8 +188,11 @@ class Git(codearchiver.core.Module): _logger.info(f'Bundling into {bundle}') cmd = ['git', 'bundle', 'create', '--progress', f'../{bundle}', '--stdin', '--reflog', '--all'] objectsToExclude = baseOids & commitsAndTags + del commitsAndTags input = ''.join(f'^{o}\n' for o in objectsToExclude).encode('ascii') + del objectsToExclude status, _, stderr = codearchiver.subprocess.run_with_log(cmd, cwd = directory, input = input, check = False) + del input if status == 128 and (stderr == 'fatal: Refusing to create empty bundle.\n' or stderr.endswith('\nfatal: Refusing to create empty bundle.\n')): # Manually write an empty bundle instead # Cf. Documentation/technical/bundle-format.txt and Documentation/technical/pack-format.txt in git's repository for details on the formats @@ -246,6 +249,7 @@ class Git(codearchiver.core.Module): with open('tmp.idx', 'rb') as fp: _, index, _ = codearchiver.subprocess.run_with_log(['git', 'show-index'], input = fp) indexObjectIds = {oid for offset, oid, _ in map(lambda l: l.rstrip('\n').split(' ', 2), index.splitlines()) if int(offset) < bundlePackSize} + del index try: indexObjects = {oid: objects[oid] for oid in indexObjectIds} except KeyError as e: @@ -258,6 +262,7 @@ class Git(codearchiver.core.Module): _, commitsWithSubmodules, _ = codearchiver.subprocess.run_with_log(['git', 'log', '--format=format:%H', '--diff-filter=d', '--all', '--', '.gitmodules'], cwd = directory) if commitsWithSubmodules: _logger.warning('Submodules found but extraction not supported') + del commitsWithSubmodules # Ensure that all commits and tags included in the temporary metadata made it into the pack, else data may be lost! indexCommitsAndTags = {oid for oid, otype in indexObjects.items() if otype in ('commit', 'tag')} @@ -268,6 +273,7 @@ class Git(codearchiver.core.Module): # Already added to metadata earlier continue metadata.append('Object', f'{oid} {otype}') + del indexObjects, indexCommitsAndTags # Bundling completed without issues; wait for depended-on bundles, add them to the metadata, then replace own temporary metadata if self._storage: