Browse Source

Reduce memory usage by deleting potentially big objects when they're no longer needed

tags/v1.1^0
JustAnotherArchivist 1 year ago
parent
commit
1355db6235
1 changed files with 6 additions and 0 deletions
  1. +6
    -0
      codearchiver/modules/git.py

+ 6
- 0
codearchiver/modules/git.py View File

@@ -188,8 +188,11 @@ class Git(codearchiver.core.Module):
_logger.info(f'Bundling into {bundle}')
cmd = ['git', 'bundle', 'create', '--progress', f'../{bundle}', '--stdin', '--reflog', '--all']
objectsToExclude = baseOids & commitsAndTags
del commitsAndTags
input = ''.join(f'^{o}\n' for o in objectsToExclude).encode('ascii')
del objectsToExclude
status, _, stderr = codearchiver.subprocess.run_with_log(cmd, cwd = directory, input = input, check = False)
del input
if status == 128 and (stderr == 'fatal: Refusing to create empty bundle.\n' or stderr.endswith('\nfatal: Refusing to create empty bundle.\n')):
# Manually write an empty bundle instead
# Cf. Documentation/technical/bundle-format.txt and Documentation/technical/pack-format.txt in git's repository for details on the formats
@@ -246,6 +249,7 @@ class Git(codearchiver.core.Module):
with open('tmp.idx', 'rb') as fp:
_, index, _ = codearchiver.subprocess.run_with_log(['git', 'show-index'], input = fp)
indexObjectIds = {oid for offset, oid, _ in map(lambda l: l.rstrip('\n').split(' ', 2), index.splitlines()) if int(offset) < bundlePackSize}
del index
try:
indexObjects = {oid: objects[oid] for oid in indexObjectIds}
except KeyError as e:
@@ -258,6 +262,7 @@ class Git(codearchiver.core.Module):
_, commitsWithSubmodules, _ = codearchiver.subprocess.run_with_log(['git', 'log', '--format=format:%H', '--diff-filter=d', '--all', '--', '.gitmodules'], cwd = directory)
if commitsWithSubmodules:
_logger.warning('Submodules found but extraction not supported')
del commitsWithSubmodules

# Ensure that all commits and tags included in the temporary metadata made it into the pack, else data may be lost!
indexCommitsAndTags = {oid for oid, otype in indexObjects.items() if otype in ('commit', 'tag')}
@@ -268,6 +273,7 @@ class Git(codearchiver.core.Module):
# Already added to metadata earlier
continue
metadata.append('Object', f'{oid} {otype}')
del indexObjects, indexCommitsAndTags

# Bundling completed without issues; wait for depended-on bundles, add them to the metadata, then replace own temporary metadata
if self._storage:


Loading…
Cancel
Save