#!/usr/bin/env python3 # Read file sizes from stdin, print sum on stdout # Accepted formats on stdin: # - '1.2 MB', interpreted as 1200000 bytes # - '1.2 MiB', interpreted as 1.20 * 1024 * 1024 = 1258291.2 bytes # - '1.2M', interpreted as MiB # - '123', interpreted as 123 bytes # Known units: B; kB, MB, GB, TB, PB, EB; KiB, MiB, GiB, TiB, PiB, EiB; K, M, G, T, P, E # In all cases, whitespace is optional, can be any width, and may contain tabs. Leading and trailing whitespace is also permitted. # All numbers are cast to an integer before summation. # Output is by default formatted as IEC (i.e. KiB, MiB, etc.). import math import re import sys units = { 'B': 1, # SI prefixes 'kB': 1000, 'MB': 1000**2, 'GB': 1000**3, 'TB': 1000**4, 'PB': 1000**5, 'EB': 1000**6, # IEC 'KiB': 1024, 'MiB': 1024**2, 'GiB': 1024**3, 'TiB': 1024**4, 'PiB': 1024**5, 'EiB': 1024**6, # Short IEC 'K': 1024, 'M': 1024**2, 'G': 1024**3, 'T': 1024**4, 'P': 1024**5, 'E': 1024**6, } sum = 0 for line in sys.stdin: origLine = line.rstrip('\r\n') line = re.sub(r'\s+', '', line.strip()) line = re.sub(r'([a-zA-Z])', r' \1', line, count = 1) if ' ' in line: number, unit = line.split(' ', 1) else: number = line unit = 'B' if unit not in units: print(f'Skipping line with unknown unit: {origLine}', file = sys.stderr) continue try: if number.strip('0123456789') == '': number = int(number) else: number = float(number) except ValueError as e: print(f'Skipping line with unparseable number: {origLine}', file = sys.stderr) continue sum += int(number * units[unit]) # Special case because log(0) is kinda bad... if sum == 0: print('0 B') sys.exit(0) log = int(math.log(sum if sum > 0 else -sum, 1024)) unit = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB'][log] if unit == 'B': print(f'{sum} {unit}') else: print(f'{sum / 1024**log :.2f} {unit}')