diff --git a/sum-sizes b/sum-sizes new file mode 100755 index 0000000..0aba891 --- /dev/null +++ b/sum-sizes @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# Read file sizes from stdin, print sum on stdout +# Accepted formats on stdin: +# - '1.2 MB', interpreted as 1200000 bytes +# - '1.2 MiB', interpreted as 1.20 * 1024 * 1024 = 1258291.2 bytes +# - '1.2M', interpreted as MiB +# - '123', interpreted as 123 bytes +# Known units: B; kB, MB, GB, TB, PB, EB; KiB, MiB, GiB, TiB, PiB, EiB; K, M, G, T, P, E +# In all cases, whitespace is optional, can be any width, and may contain tabs. Leading and trailing whitespace is also permitted. +# All numbers are cast to an integer before summation. +# Output is by default formatted as IEC (i.e. KiB, MiB, etc.). + +import math +import re +import sys + + +units = { + 'B': 1, + + # SI prefixes + 'kB': 1000, + 'MB': 1000**2, + 'GB': 1000**3, + 'TB': 1000**4, + 'PB': 1000**5, + 'EB': 1000**6, + + # IEC + 'KiB': 1024, + 'MiB': 1024**2, + 'GiB': 1024**3, + 'TiB': 1024**4, + 'PiB': 1024**5, + 'EiB': 1024**6, + + # Short IEC + 'K': 1024, + 'M': 1024**2, + 'G': 1024**3, + 'T': 1024**4, + 'P': 1024**5, + 'E': 1024**6, +} +sum = 0 +for line in sys.stdin: + origLine = line + line = re.sub(r'\s+', '', line.strip()) + line = re.sub(r'([a-zA-Z])', r' \1', line, count = 1) + if ' ' in line: + number, unit = line.split(' ', 1) + else: + number = line + unit = 'B' + if unit not in units: + print(f'Skipping line with unknown unit: {origLine}', file = sys.stderr) + continue + try: + number = float(number) + except ValueError as e: + print(f'Skipping line with unparseable number: {origLine}', file = sys.stderr) + continue + if number.is_integer(): + number = int(number) + sum += int(number * units[unit]) + +# Special case because log(0) is kinda bad... +if sum == 0: + print('0 B') + sys.exit(0) + +log = int(math.log(sum if sum > 0 else -sum, 1024)) +unit = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB'][log] +if unit == 'B': + print(f'{sum} {unit}') +else: + print(f'{sum / 1024**log :.2f} {unit}')