|
- #!/usr/bin/env python3
- # Read file sizes from stdin, print sum on stdout
- # Accepted formats on stdin:
- # - '1.2 MB', interpreted as 1200000 bytes
- # - '1.2 MiB', interpreted as 1.20 * 1024 * 1024 = 1258291.2 bytes
- # - '1.2M', interpreted as MiB
- # - '123', interpreted as 123 bytes
- # Known units: B; kB, MB, GB, TB, PB, EB; KiB, MiB, GiB, TiB, PiB, EiB; K, M, G, T, P, E
- # In all cases, whitespace is optional, can be any width, and may contain tabs. Leading and trailing whitespace is also permitted.
- # All numbers are cast to an integer before summation.
- # Output is by default formatted as IEC (i.e. KiB, MiB, etc.).
-
- import math
- import re
- import sys
-
-
- units = {
- 'B': 1,
-
- # SI prefixes
- 'kB': 1000,
- 'MB': 1000**2,
- 'GB': 1000**3,
- 'TB': 1000**4,
- 'PB': 1000**5,
- 'EB': 1000**6,
-
- # IEC
- 'KiB': 1024,
- 'MiB': 1024**2,
- 'GiB': 1024**3,
- 'TiB': 1024**4,
- 'PiB': 1024**5,
- 'EiB': 1024**6,
-
- # Short IEC
- 'K': 1024,
- 'M': 1024**2,
- 'G': 1024**3,
- 'T': 1024**4,
- 'P': 1024**5,
- 'E': 1024**6,
- }
- sum = 0
- for line in sys.stdin:
- origLine = line
- line = re.sub(r'\s+', '', line.strip())
- line = re.sub(r'([a-zA-Z])', r' \1', line, count = 1)
- if ' ' in line:
- number, unit = line.split(' ', 1)
- else:
- number = line
- unit = 'B'
- if unit not in units:
- print(f'Skipping line with unknown unit: {origLine}', file = sys.stderr)
- continue
- try:
- number = float(number)
- except ValueError as e:
- print(f'Skipping line with unparseable number: {origLine}', file = sys.stderr)
- continue
- if number.is_integer():
- number = int(number)
- sum += int(number * units[unit])
-
- # Special case because log(0) is kinda bad...
- if sum == 0:
- print('0 B')
- sys.exit(0)
-
- log = int(math.log(sum if sum > 0 else -sum, 1024))
- unit = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB'][log]
- if unit == 'B':
- print(f'{sum} {unit}')
- else:
- print(f'{sum / 1024**log :.2f} {unit}')
|