#!/usr/bin/env python3 # Take input filenames over stdin, write filenames to stdout only if they're for a wanted video # # Args: file containing just the wanted video IDs import re import sys wanted_video_ids_fname = sys.argv[1] wanted_video_ids = set() with open(wanted_video_ids_fname, "rb") as f: for line in f: video_id = line.rstrip(b"\r\n") assert len(video_id) == 11, video_id wanted_video_ids.add(video_id) del video_id for line in sys.stdin.buffer: filename = line.rstrip(b"\r\n") video_id = re.findall(rb"-[-_A-Za-z0-9]{11}\.", filename)[-1][1:-1] assert len(video_id) == 11, video_id if video_id in wanted_video_ids: sys.stdout.buffer.write(filename + b"\n")