From 1977b237e51e63240f7e6db15289b1c32fed51da Mon Sep 17 00:00:00 2001
From: JustAnotherArchivist <JustAnotherArchivist@users.noreply.github.com>
Date: Tue, 7 Feb 2023 20:48:54 +0000
Subject: [PATCH] Fix random BrokenPipeError on exiting Python processes

Just catching the BrokenPipeError is not sufficient. There might still be data in the stdout buffer, which then causes the following error when Python shuts down and flushes it:

	Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='utf-8'>
	BrokenPipeError: [Errno 32] Broken pipe

To handle this, redirect stdout to /dev/null in the exception handler before exiting: https://docs.python.org/3.11/library/signal.html#note-on-sigpipe
---
 html-extract-stupid | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/html-extract-stupid b/html-extract-stupid
index 9127322..725fdaf 100755
--- a/html-extract-stupid
+++ b/html-extract-stupid
@@ -35,12 +35,12 @@
 	grep -Pva '^[a-zA-Z]+ $' |
 
 	# img srcset splitting
-	python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' try:'$'\n''  tag, value = l.split(" ", 1)'$'\n''  tag = tag.lower()'$'\n''  if tag != "imgsrcset":'$'\n''   print(l); continue'$'\n''  for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n''   if url: print(f"img {url}")'$'\n'' except BrokenPipeError: break' |
+	python3 -c 'import os, re, sys'$'\n''try:'$'\n'' for l in map(str.strip, sys.stdin):'$'\n''  tag, value = l.split(" ", 1)'$'\n''  tag = tag.lower()'$'\n''  if tag != "imgsrcset":'$'\n''   print(l); continue'$'\n''  for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n''   if url: print(f"img {url}")'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)' |
 
 	# Decode HTML references
-	python3 -c 'import html, sys'$'\n''for l in sys.stdin:'$'\n'' try: print(html.unescape(l.strip()))'$'\n'' except BrokenPipeError: break' |
+	python3 -c 'import html, os, sys'$'\n''try:'$'\n'' for l in sys.stdin:'$'\n''  print(html.unescape(l.strip()))'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)' |
 
 	# Combine base and values to get absolute URLs
 	# If multiple base tags are present, they all get respected. This violates the HTML specs.
-	python3 -c 'import sys, urllib.parse; base = None'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if base:'$'\n''  value = urllib.parse.urljoin(base, value)'$'\n'' if tag == "base":'$'\n''  base = value'$'\n''  continue'$'\n'' try: print(f"{tag} {value}")'$'\n'' except BrokenPipeError: break'
+	python3 -c 'import os, sys, urllib.parse; base = None'$'\n''try:'$'\n'' for l in map(str.strip, sys.stdin):'$'\n''  tag, value = l.split(" ", 1)'$'\n''  tag = tag.lower()'$'\n''  if base:'$'\n''   value = urllib.parse.urljoin(base, value)'$'\n''  if tag == "base":'$'\n''   base = value'$'\n''   continue'$'\n''  print(f"{tag} {value}")'$'\n''except BrokenPipeError:'$'\n'' os.dup2(os.open(os.devnull, os.O_WRONLY), sys.stdout.fileno()); sys.exit(1)'
 }