Parcourir la source

Fix random BrokenPipeError on exiting Python processes

Just catching the BrokenPipeError is not sufficient. There might still be data in the stdout buffer, which then causes the following error when Python shuts down and flushes it:

	Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='utf-8'>
	BrokenPipeError: [Errno 32] Broken pipe

To handle this, redirect stdout to /dev/null in the exception handler before exiting: https://docs.python.org/3.11/library/signal.html#note-on-sigpipe
master
JustAnotherArchivist il y a 1 an
Parent
révision
1977b237e5
1 fichiers modifiés avec 3 ajouts et 3 suppressions
  1. +3
    -3
      html-extract-stupid

+ 3
- 3
html-extract-stupid Voir le fichier

@@ -35,12 +35,12 @@
grep -Pva '^[a-zA-Z]+ $' |

# img srcset splitting
python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' try:'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if tag != "imgsrcset":'$'\n'' print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n'' if url: print(f"img {url}")'$'\n'' except BrokenPipeError: break' |
python3 -c 'import os, re, sys'$'\n''try:'$'\n'' for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if tag != "imgsrcset":'$'\n'' print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n'' if url: print(f"img {url}")'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)' |

# Decode HTML references
python3 -c 'import html, sys'$'\n''for l in sys.stdin:'$'\n'' try: print(html.unescape(l.strip()))'$'\n'' except BrokenPipeError: break' |
python3 -c 'import html, os, sys'$'\n''try:'$'\n'' for l in sys.stdin:'$'\n'' print(html.unescape(l.strip()))'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)' |

# Combine base and values to get absolute URLs
# If multiple base tags are present, they all get respected. This violates the HTML specs.
python3 -c 'import sys, urllib.parse; base = None'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if base:'$'\n'' value = urllib.parse.urljoin(base, value)'$'\n'' if tag == "base":'$'\n'' base = value'$'\n'' continue'$'\n'' try: print(f"{tag} {value}")'$'\n'' except BrokenPipeError: break'
python3 -c 'import os, sys, urllib.parse; base = None'$'\n''try:'$'\n'' for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if base:'$'\n'' value = urllib.parse.urljoin(base, value)'$'\n'' if tag == "base":'$'\n'' base = value'$'\n'' continue'$'\n'' print(f"{tag} {value}")'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)'
}

Chargement…
Annuler
Enregistrer