Browse Source

Tighten patterns for user and custom channel URLs so they can handle HTML input more easily

master
JustAnotherArchivist 2 years ago
parent
commit
e48fb9d1b6
1 changed files with 6 additions and 6 deletions
  1. +6
    -6
      youtube-extract

+ 6
- 6
youtube-extract View File

@@ -53,10 +53,10 @@ noisePattern = '|'.join([
])

channelPattern = '|'.join([
r'/www\.youtube\.com/c/[^/?&=.\s]+',
r'/www\.youtube\.com/user/[^/?&=.\s]+',
r'''/www\.youtube\.com/c/[^/?&=."'>\s]+''',
r'/www\.youtube\.com/user/[A-Za-z0-9]{1,20}',
r'/www\.youtube\.com/channel/UC[0-9A-Za-z_-]{22}',
r'/www\.youtube\.com/[^/?&=.\s]+(?=/?(\s|$))',
r'''/www\.youtube\.com/[^/?&=."'>\s]+(?=/?(\s|["'>]|$))''',
])

# Make sure that the last 11 chars of the match are always the video ID (because Python's re doesn't support \K).
@@ -98,9 +98,9 @@ matchers = [
[videoPattern, True, lambda m: f'https://www.youtube.com/watch?v={m[0][-11:]}'],
[r'/www\.youtube\.com/(?:playlist|watch|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:\S*&)?list=UU([0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/UC{m[1]}'],
[r'/www\.youtube\.com/(?:playlist|watch|embed(?:/videoseries|/\+lastest|/playlist)?/?)\?(?:\S*&)?list=((PL|FL|RD|OL)[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/playlist?list={m[1]}'],
[r'/www\.youtube\.com/embed/?\?(?=(?:\S*&)?listType=user_uploads(?:&|$))(?:\S*&)?list=([^&\s]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'],
[r'/www\.youtube\.com/rss/user/([^/?\s]+)', True, lambda m: f'https://www.youtube.com/user/{m[1]}'],
[r'/www\.youtube\.com/(?:subscription_center\?(?:\S*&)?add_user=|subscribe_widget\?(?:\S*&)?p=|profile\?(?:\S*&)?user=)([^/=&\s]+)(?=(&|\s|$))', True, lambda m: f'https://www.youtube.com/user/{m[1]}'],
[r'/www\.youtube\.com/embed/?\?(?=(?:\S*&)?listType=user_uploads(?:&|$))(?:\S*&)?list=([A-Za-z0-9]{1,20})', True, lambda m: f'https://www.youtube.com/user/{m[1]}'],
[r'/www\.youtube\.com/rss/user/([A-Za-z0-9]{1,20})', True, lambda m: f'https://www.youtube.com/user/{m[1]}'],
[r'/www\.youtube\.com/(?:subscription_center\?(?:\S*&)?add_user=|subscribe_widget\?(?:\S*&)?p=|profile\?(?:\S*&)?user=)([A-Za-z0-9]{1,20})', True, lambda m: f'https://www.youtube.com/user/{m[1]}'],
[r'/www\.youtube\.com/feeds/videos\.xml\?(?:\S*&)?channel_id=(UC[0-9A-Za-z_-]+)', True, lambda m: f'https://www.youtube.com/channel/{m[1]}'],
[r'/www\.youtube\.com(?:/view_play_list\?(?:\S*&)?p=|/playlist\?(?:.*&)?list=)([0-9A-F]{16})(?=(&|\s|$))', True, lambda m: f'https://www.youtube.com/playlist?list=PL{m[1]}'],
[r'/(?i:i\.ytimg\.com|img\.youtube\.com)(?::\d+)?/vi/([0-9A-Za-z_-]{11})/', True, lambda m: f'https://www.youtube.com/watch?v={m[1]}'],


Loading…
Cancel
Save