diff --git a/wiki-recursive-extract-normalise b/wiki-recursive-extract-normalise index dd51fea..ca357c2 100755 --- a/wiki-recursive-extract-normalise +++ b/wiki-recursive-extract-normalise @@ -3,7 +3,7 @@ # Everything that looks like a social media link (including YouTube) is run through social-media-extract-profile-link. # Everything else is run through website-extract-social-media. # This is done recursively until no new links are discovered anymore. -# The output is further fed through url-normalise before and during processing to avoid equivalent but slightly different duplicates. +# The output is further fed through url-normalise before and during processing to avoid equivalent but slightly different duplicates, and the output is deduplicated within each section at the end. verbose= while [[ $# -gt 0 ]] @@ -80,4 +80,4 @@ do done done fi -done +done | mawk -W interactive '! /^\*/ { print; } /^\*/ && !seen[$0]++ { print; } /^==/ { delete seen; }'