|
|
@@ -29,32 +29,32 @@ assert mode == 'massage' |
|
|
|
|
|
|
|
|
|
|
|
noisePattern = '|'.join([ |
|
|
|
r'^//www\.youtube\.com/s/(desktop|player)/[0-9a-f]+/', |
|
|
|
r'^//www\.youtube\.com/s/gaming/emoji/', |
|
|
|
r'^//www\.youtube\.com/redirect\?event=channel_banner&', |
|
|
|
r'^//www\.youtube\.com/redirect\?(?=(.*&)?event=video_description(&|$))(?!(.*&)?v=)', |
|
|
|
r'^//www\.youtube\.com/yts/', |
|
|
|
r'^//www\.youtube\.com/img/', |
|
|
|
r'^//www\.youtube\.com/youtubei/', |
|
|
|
r'^//www\.youtube\.com/ads(/|$)', |
|
|
|
r'^//www\.youtube\.com/creators(/|$)', |
|
|
|
r'^//www\.youtube\.com/(player|iframe)_api(\?|$)', |
|
|
|
r'^//www\.youtube\.com/error(_204)?/?\?', |
|
|
|
r'^//www\.youtube\.com/(about|t|howyoutubeworks)([/?]|$)', |
|
|
|
r'^//www\.youtube\.com/results/?(\?|$)', |
|
|
|
r'^//www\.youtube\.com/premium/?\?', |
|
|
|
r'^//www\.youtube\.com/new([/?]|$)', |
|
|
|
r'^//www\.youtube\.com/?(\?|$)', |
|
|
|
r'^//www\.youtube\.com/embed/("|%22|' r"'|%27" r')(%20)?(\+|%3B)', # JS extraction stuff |
|
|
|
r'^//www\.youtube\.com/service_ajax$', |
|
|
|
r'^//www\.youtube\.com/watch(\?v=)?$', |
|
|
|
r'^//consent\.(youtube|google)\.com/', |
|
|
|
r'^//www\.youtube\.com/(c|user|channel|watch(_popup)?(\.php)?|embed|e|v|redirect|(my_videos_)?upload)(%[23]F|/)?$', # Miscellaneous crap |
|
|
|
r'//www\.youtube\.com/s/(desktop|player)/[0-9a-f]+/', |
|
|
|
r'//www\.youtube\.com/s/gaming/emoji/', |
|
|
|
r'//www\.youtube\.com/redirect\?event=channel_banner&', |
|
|
|
r'//www\.youtube\.com/redirect\?(?=(\S*&)?event=video_description(&|$))(?!(\S*&)?v=)', |
|
|
|
r'//www\.youtube\.com/yts/', |
|
|
|
r'//www\.youtube\.com/img/', |
|
|
|
r'//www\.youtube\.com/youtubei/', |
|
|
|
r'//www\.youtube\.com/ads(/|$)', |
|
|
|
r'//www\.youtube\.com/creators(/|$)', |
|
|
|
r'//www\.youtube\.com/(player|iframe)_api(\?|$)', |
|
|
|
r'//www\.youtube\.com/error(_204)?/?\?', |
|
|
|
r'//www\.youtube\.com/(about|t|howyoutubeworks)([/?]|$)', |
|
|
|
r'//www\.youtube\.com/results/?(\?|$)', |
|
|
|
r'//www\.youtube\.com/premium/?\?', |
|
|
|
r'//www\.youtube\.com/new([/?]|$)', |
|
|
|
r'//www\.youtube\.com/?(\?|$)', |
|
|
|
r'//www\.youtube\.com/embed/("|%22|' r"'|%27" r')(%20)?(\+|%3B)', # JS extraction stuff |
|
|
|
r'//www\.youtube\.com/service_ajax$', |
|
|
|
r'//www\.youtube\.com/watch(\?v=)?$', |
|
|
|
r'//consent\.(youtube|google)\.com/', |
|
|
|
r'//www\.youtube\.com/(c|user|channel|watch(_popup)?(\.php)?|embed|e|v|redirect|(my_videos_)?upload)(%[23]F|/)?$', # Miscellaneous crap |
|
|
|
]) |
|
|
|
|
|
|
|
channelPattern = '|'.join([ |
|
|
|
r'/www\.youtube\.com/c/[^/?&=.]+', |
|
|
|
r'/www\.youtube\.com/user/[^/?&=.]+', |
|
|
|
r'/www\.youtube\.com/c/[^/?&=.\s]+', |
|
|
|
r'/www\.youtube\.com/user/[^/?&=.\s]+', |
|
|
|
r'/www\.youtube\.com/channel/UC[0-9A-Za-z_-]{22}', |
|
|
|
r'/www\.youtube\.com/[^/?&=.\s]+(?=/?(\s|$))', |
|
|
|
]) |
|
|
@@ -63,26 +63,26 @@ channelPattern = '|'.join([ |
|
|
|
# If necessary, use lookahead assertions to match further stuff after the video ID. |
|
|
|
videoPattern = '|'.join([ |
|
|
|
# Normal watch URL |
|
|
|
r'/www\.youtube\.com/watch(_popup)?(\.php)?/?\?(.*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/watch(_popup)?(\.php)?/?\?(\S*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/watch/[0-9A-Za-z_-]{11}', |
|
|
|
# Embeds |
|
|
|
r'/www\.youtube\.com/e(mbed)?/(?!videoseries\?)[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/embed/?\?(.*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/embed/?\?(\S*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
# Shortener |
|
|
|
r'/(?i:youtu\.be)(:\d+)?/[0-9A-Za-z_-]{11}', |
|
|
|
# Old (Flash) embeds |
|
|
|
r'/www\.youtube\.com/v/[0-9A-Za-z_-]{11}', |
|
|
|
# Redirects from links in video descriptions |
|
|
|
r'/www\.youtube\.com/redirect\?(.*&)?v=[0-9A-Za-z_-]{11}(?=&|$)', |
|
|
|
r'/www\.youtube\.com/redirect\?(\S*&)?v=[0-9A-Za-z_-]{11}(?=&|$)', |
|
|
|
# Tracking and other crap |
|
|
|
r'/www\.youtube\.com/(ptracking|set_awesome)\?(.*&)?video_id=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/api/timedtext\?(.*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/(my_videos_)?edit\?(.*&)?video_id=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/(all_comments|attribution|cthru|get_endscreen|livestreaming/dashboard)\?(.*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/(ptracking|set_awesome)\?(\S*&)?video_id=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/api/timedtext\?(\S*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/(my_videos_)?edit\?(\S*&)?video_id=[0-9A-Za-z_-]{11}', |
|
|
|
r'/www\.youtube\.com/(all_comments|attribution|cthru|get_endscreen|livestreaming/dashboard)\?(\S*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
# Generic v parameter on watch URLs including with percent encoding; this covers e.g. google.com/url?... or the oEmbed |
|
|
|
r'/watch/?\?(.*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
r'/watch/?\?(\S*&)?v=[0-9A-Za-z_-]{11}', |
|
|
|
# Generic v parameter on anything |
|
|
|
r'[?&]v=[0-9A-Za-z_-]{11}(?=&|$)', |
|
|
|
r'[?&]v=[0-9A-Za-z_-]{11}(?=&|\s|$)', |
|
|
|
]) |
|
|
|
|
|
|
|
|
|
|
@@ -96,13 +96,13 @@ matchers = [ |
|
|
|
[noisePattern, False, lambda m: None], |
|
|
|
[channelPattern, True, lambda m: 'https://www.youtube.com/' + m[0].split('/', 2)[-1].rstrip('/')], |
|
|
|
[videoPattern, True, lambda m: f'https://www.youtube.com/watch?v={m[0][-11:]}'], |
|
|
|
[r'/www\.youtube\.com/(?:playlist|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:.*&)?list=UU([0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/UC{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/(?:playlist|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:.*&)?list=((PL|FL|RD|OL)[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/playlist?list={m[1]}'], |
|
|
|
[r'/www\.youtube\.com/embed/?\?(?=(?:.*&)?listType=user_uploads(?:&|$))(?:.*&)?list=([^&]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/rss/user/([^/?]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/(?:subscription_center\?(?:.*&)?add_user=|subscribe_widget\?(?:.*&)?p=|profile\?(?:.*&)?user=)([^/=&]+)(?=(&|$))', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/feeds/videos\.xml\?(?:.*&)?channel_id=(UC[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com(?:/view_play_list\?(?:.*&)?p=|/playlist\?(?:.*&)?list=)([0-9A-F]{16})(?=(&|$))', True, lambda m: f'https://www.youtube.com/playlist?list=PL{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/(?:playlist|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:\S*&)?list=UU([0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/UC{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/(?:playlist|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:\S*&)?list=((PL|FL|RD|OL)[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/playlist?list={m[1]}'], |
|
|
|
[r'/www\.youtube\.com/embed/?\?(?=(?:\S*&)?listType=user_uploads(?:&|$))(?:\S*&)?list=([^&\s]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/rss/user/([^/?\s]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/(?:subscription_center\?(?:\S*&)?add_user=|subscribe_widget\?(?:\S*&)?p=|profile\?(?:\S*&)?user=)([^/=&\s]+)(?=(&|\s|$))', True, lambda m: f'https://www.youtube.com/user/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com/feeds/videos\.xml\?(?:\S*&)?channel_id=(UC[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/{m[1]}'], |
|
|
|
[r'/www\.youtube\.com(?:/view_play_list\?(?:\S*&)?p=|/playlist\?(?:.*&)?list=)([0-9A-F]{16})(?=(&|\s|$))', True, lambda m: f'https://www.youtube.com/playlist?list=PL{m[1]}'], |
|
|
|
[r'/i\.ytimg\.com/vi/([0-9A-Za-z_-]{11})/', True, lambda m: f'https://www.youtube.com/watch?v={m[1]}'], |
|
|
|
] |
|
|
|
|
|
|
@@ -112,23 +112,21 @@ for e in matchers: |
|
|
|
|
|
|
|
for origLine in sys.stdin: |
|
|
|
origLine = origLine.strip() |
|
|
|
line = re.sub(r'^https?://', '//', origLine) |
|
|
|
line = re.sub(r'https?://', '//', origLine) |
|
|
|
line = domainPattern.sub('/www.youtube.com/', line) |
|
|
|
candidates = re.split(r'\s+', line) |
|
|
|
hadMatches = False |
|
|
|
for candidate in candidates: |
|
|
|
for pattern, paramSearch, f in matchers: |
|
|
|
results = set() |
|
|
|
for m in itertools.chain((x for x in pattern.finditer(candidate)), (x for x in pattern.finditer(percentdecode(candidate))) if paramSearch else ()): |
|
|
|
hadMatches = True |
|
|
|
r = f(m) |
|
|
|
if r in results: |
|
|
|
continue |
|
|
|
results.add(r) |
|
|
|
if r is None: |
|
|
|
break |
|
|
|
print(r) |
|
|
|
if None in results: |
|
|
|
for pattern, paramSearch, f in matchers: |
|
|
|
results = set() |
|
|
|
for m in itertools.chain((x for x in pattern.finditer(line)), (x for x in pattern.finditer(percentdecode(line))) if paramSearch else ()): |
|
|
|
hadMatches = True |
|
|
|
r = f(m) |
|
|
|
if r in results: |
|
|
|
continue |
|
|
|
results.add(r) |
|
|
|
if r is None: |
|
|
|
break |
|
|
|
print(r) |
|
|
|
if None in results: |
|
|
|
break |
|
|
|
if not hadMatches: |
|
|
|
print(origLine, file = sys.stderr) |