Browse Source

Add b64grep

master
JustAnotherArchivist 2 years ago
parent
commit
0cb61f4dae
1 changed files with 107 additions and 0 deletions
  1. +107
    -0
      b64grep

+ 107
- 0
b64grep View File

@@ -0,0 +1,107 @@
#!/bin/bash
set -euo pipefail

# Options
if [[ $# -eq 0 || "$1" == '-h' || "$1" == '--help' ]]
then
echo 'Usage: b64grep [OPTIONS] PATTERN [PATTERN...]' >&2
echo >&2
echo 'Options: --ignore-case (-i), --invert-match (-v)' >&2
echo >&2
echo 'grep that handles base64-encoding: `b64grep foo` matches any input line on stdin that contains a base64-encoded string whose decoded data contains foo.' >&2
echo 'Note that b64grep does not actually decode the base64 data or even check that the input is base64. It merely transforms the search string(s) into the appropriate patterns.' >&2
echo 'Only fixed-string searches are supported, i.e. --fixed-strings is implied. --ignore-case and --invert-match function as with grep.' >&2
echo 'Patterns must be at least two characters long. Due to how base64 works, short patterns are inherently unreliable, so use of long patterns is advised.' >&2
echo 'However, --ignore-case incurs a significant performance impact when used with long patterns.' >&2
exit 1
fi

ignoreCase=
invertMatch=
while :
do
if [[ "$1" == '--ignore-case' ]]
then
ignoreCase=yes
shift
elif [[ "$1" == '--invert-match' ]]
then
invertMatch=yes
shift
elif [[ "$1" =~ ^-[iv][iv]*$ ]]
then
if [[ "$1" == *i* ]]
then
ignoreCase=yes
fi
if [[ "$1" == *v* ]]
then
invertMatch=yes
fi
shift
else
break
fi
done

# Handle --ignore-case
if [[ "${ignoreCase}" ]]
then
# Generate all combinations of lowercase and uppercase characters in the patterns
# Algorithm inspired by https://www.geeksforgeeks.org/permute-string-changing-case/ (with the additional handling of characters that have no case variations)
patterns=()
for pattern in "$@"
do
pattern="${pattern,,}"
upPattern="${pattern^^}"
maxComb=$((1 << ${#pattern}))
for (( i = 0; i < ${maxComb}; ++i ))
do
combination="${pattern}"
for (( j = 0; j < ${#pattern}; ++j ))
do
if [[ $(((${i} >> ${j}) & 1)) -eq 1 ]]
then
if [[ "${pattern:${j}:1}" == "${upPattern:${j}:1}" ]]
then
# Skip combinations where the bit for a caseless character is on
continue 2
fi
combination="${combination:0:${j}}${upPattern:${j}:1}${combination:$((${j} + 1))}"
fi
done
patterns+=("${combination}")
done
done
else
patterns=("$@")
fi

# The core: transform pattern to base64...
# When a string X of length L bytes is contained in base64-encoded data D, it will be 4*L/3 characters in D.
# Depending on the position in D, it may however be encoded in three different ways, and the beginning of the occurrence may be shifted by two or four bits on either end.
# This means that the first four and last four bits of the search string cannot be easily used for searches because they may get mixed with the preceding/following data.
# In this implementation, those bits are simply discarded. This is why patterns must be at least two characters (since that means there are two or more base64 chars entirely determined by the pattern).
# A more clever way would be to generate character classes for the corresponding bits. However, the benefit of this is marginal because it would still not take shifts into account.
b64patterns=()
for pattern in "${patterns[@]}"
do
for prefix in '' x xx
do
b64="$(printf '%s' "${prefix}${pattern}" | base64)"
if [[ "${b64}" == *== ]]; then b64="${b64::-3}"; elif [[ "${b64}" == *= ]]; then b64="${b64::-2}"; fi
if [[ "${prefix}" == x ]]; then b64="${b64:2}"; elif [[ "${prefix}" == xx ]]; then b64="${b64:3}"; fi
b64patterns+=("${b64}")
done
done

# Assemble command
cmd=(grep --fixed-strings)
if [[ "${invertMatch}" ]]; then cmd+=(--invert-match); fi
for pattern in "${b64patterns[@]}"
do
cmd+=(-e "${pattern}")
done

# Off we go!
exec "${cmd[@]}"

Loading…
Cancel
Save