From 6e5a019d9e994181da41a4b2d8916ad90af12168 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 27 Jul 2021 03:33:51 +0000 Subject: [PATCH] Always decode stdin with surrogateescape to avoid breaking on binary input --- youtube-extract | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube-extract b/youtube-extract index 3cef573..ff926b7 100755 --- a/youtube-extract +++ b/youtube-extract @@ -110,7 +110,8 @@ matchers = [ for e in matchers: e[0] = re.compile(e[0]) -for origLine in sys.stdin: +for origLine in sys.stdin.buffer: + origLine = origLine.decode('utf-8', 'surrogateescape') origLine = origLine.strip() line = re.sub(r'https?://', '//', origLine) line = domainPattern.sub('/www.youtube.com/', line)