Browse Source

Catch urljoin exceptions (e.g. invalid IPv6)

master
JustAnotherArchivist 10 months ago
parent
commit
7f0809270b
1 changed files with 1 additions and 1 deletions
  1. +1
    -1
      html-extract-stupid

+ 1
- 1
html-extract-stupid View File

@@ -42,5 +42,5 @@

# Combine base and values to get absolute URLs
# If multiple base tags are present, they all get respected. This violates the HTML specs.
python3 -c 'import os, sys, urllib.parse; base = None'$'\n''try:'$'\n'' for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if base:'$'\n'' value = urllib.parse.urljoin(base, value)'$'\n'' if tag == "base":'$'\n'' base = value'$'\n'' continue'$'\n'' print(f"{tag} {value}")'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)'
python3 -c 'import os, sys, urllib.parse; base = None'$'\n''try:'$'\n'' for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if base:'$'\n'' try:'$'\n'' value = urllib.parse.urljoin(base, value)'$'\n'' except ValueError as e:'$'\n'' print(f"Could not merge {base = }, {l = }: {type(e)} {e}", file = sys.stderr)'$'\n'' continue'$'\n'' if tag == "base":'$'\n'' base = value'$'\n'' continue'$'\n'' print(f"{tag} {value}")'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)'
}

Loading…
Cancel
Save