diff --git a/cdx-chunk b/cdx-chunk
new file mode 100755
index 0000000..af3265a
--- /dev/null
+++ b/cdx-chunk
@@ -0,0 +1,51 @@
+#!/bin/bash
+if [[ $# -ne 2 ]]
+then
+	echo 'Usage: cdx-chunk CDXFILE SIZE' >&2
+	echo 'Returns offsets at which to split the WARC corresponding to CDXFILE such that each chunk is about SIZE bytes large.' >&2
+	echo 'CDXFILE must be a IA-style modern CDX file (CDX N b a m s k r M S V g) for a single WARC file. DO NOT PASS ITEM CDX FILES!' >&2
+	echo 'If CDXFILE ends with .gz, it is automatically decompressed. A dash may be passed to read from stdin, in which case it must be decompressed already.' >&2
+	echo 'SIZE is an integer, optionally with a trailing M or G to designate MiB or GiB, respectively.' >&2
+	echo 'The output is one integer per line, which designates the offset at which a new chunk begins. For example, if the first line is 1042, the first 1042 bytes are one chunk and the 1043rd byte begins the second chunk.' >&2
+	echo 'Note that chunks may be much bigger than SIZE if there are large records in the WARC.' >&2
+	exit 1
+fi
+
+file="$1"
+declare -i size
+if [[ "$2" == *M ]]
+then
+	size=$((${2::-1} * 1024 * 1024))
+elif [[ "$2" == *G ]]
+then
+	size=$((${2::-1} * 1024 * 1024 * 1024))
+else
+	size=$2
+fi
+if [[ ${size} -eq 0 ]]
+then
+	echo "Error: invalid size" >&2
+	exit 1
+fi
+
+{
+	if [[ "${file}" == '-' ]]
+	then
+		cat
+	elif [[ "${file}" == *.gz ]]
+	then
+		# Try to use zstdcat if available since it has much better performance.
+		if command -v zstdcat &>/dev/null
+		then
+			zstdcat "${file}"
+		else
+			zcat "${file}"
+		fi
+	else
+		cat "${file}"
+	fi
+} | \
+  tail -n+2 | \
+  awk '{ print $10 }' | \
+  sort -n | \
+  awk -v size=${size} '($1 - lastBoundary) >= size { print; lastBoundary = $1; }'