|
@@ -3,13 +3,15 @@ function usage_exit { |
|
|
echo 'Usage: dedupe FILE1 FILE2' >&2 |
|
|
echo 'Usage: dedupe FILE1 FILE2' >&2 |
|
|
echo >&2 |
|
|
echo >&2 |
|
|
echo 'Prints all lines from FILE2 that do not appear in FILE1, in the order of FILE2.' >&2 |
|
|
echo 'Prints all lines from FILE2 that do not appear in FILE1, in the order of FILE2.' >&2 |
|
|
echo 'WARNING: FILE1 has to be read into memory fully. If your files are sorted, use comm instead.' >&2 |
|
|
|
|
|
|
|
|
echo "WARNING: FILE1 has to be read into memory fully, and memory use scales with about a factor 40 of FILE1's size. If your files are sorted, use comm instead." >&2 |
|
|
exit $1 |
|
|
exit $1 |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if [[ "$1" == '-h' || "$1" == '--help' ]]; then usage_exit 0; fi |
|
|
if [[ "$1" == '-h' || "$1" == '--help' ]]; then usage_exit 0; fi |
|
|
if [[ $# -ne 2 ]]; then usage_exit 1; fi |
|
|
if [[ $# -ne 2 ]]; then usage_exit 1; fi |
|
|
|
|
|
|
|
|
# Perl seems to be ~30 % faster for this. |
|
|
|
|
|
|
|
|
# Perl seems to be ~30 % faster than AWK for this, but grep is ~2-3 times faster than Perl. |
|
|
|
|
|
# AWK uses the least memory, Perl about 1.5 times as much, grep twice as much (as AWK). |
|
|
#awk 'NR==FNR { s[$0]=1; next; } !($0 in s)' "$1" "$2" |
|
|
#awk 'NR==FNR { s[$0]=1; next; } !($0 in s)' "$1" "$2" |
|
|
perl -ne 'if (@ARGV == 1) { $seen{$_}=1; } else { print $_ if !(exists $seen{$_}); }' "$1" "$2" |
|
|
|
|
|
|
|
|
#perl -ne 'if (@ARGV == 1) { $seen{$_}=1; } else { print $_ if !(exists $seen{$_}); }' "$1" "$2" |
|
|
|
|
|
grep -F -x -v -f "$1" "$2" |