|
@@ -56,7 +56,7 @@ channelPattern = '|'.join([ |
|
|
r'/www\.youtube\.com/c/[^/?&=.]+', |
|
|
r'/www\.youtube\.com/c/[^/?&=.]+', |
|
|
r'/www\.youtube\.com/user/[^/?&=.]+', |
|
|
r'/www\.youtube\.com/user/[^/?&=.]+', |
|
|
r'/www\.youtube\.com/channel/UC[0-9A-Za-z_-]{22}', |
|
|
r'/www\.youtube\.com/channel/UC[0-9A-Za-z_-]{22}', |
|
|
r'/www\.youtube\.com/[^/?&=.]+(?=/?$)', |
|
|
|
|
|
|
|
|
r'/www\.youtube\.com/[^/?&=.\s]+(?=/?(\s|$))', |
|
|
]) |
|
|
]) |
|
|
|
|
|
|
|
|
# Make sure that the last 11 chars of the match are always the video ID (because Python's re doesn't support \K). |
|
|
# Make sure that the last 11 chars of the match are always the video ID (because Python's re doesn't support \K). |
|
@@ -114,19 +114,21 @@ for origLine in sys.stdin: |
|
|
origLine = origLine.strip() |
|
|
origLine = origLine.strip() |
|
|
line = re.sub(r'^https?://', '//', origLine) |
|
|
line = re.sub(r'^https?://', '//', origLine) |
|
|
line = domainPattern.sub('/www.youtube.com/', line) |
|
|
line = domainPattern.sub('/www.youtube.com/', line) |
|
|
|
|
|
candidates = re.split(r'\s+', line) |
|
|
hadMatches = False |
|
|
hadMatches = False |
|
|
for pattern, paramSearch, f in matchers: |
|
|
|
|
|
results = set() |
|
|
|
|
|
for m in itertools.chain((x for x in pattern.finditer(line)), (x for x in pattern.finditer(percentdecode(line))) if paramSearch else ()): |
|
|
|
|
|
hadMatches = True |
|
|
|
|
|
r = f(m) |
|
|
|
|
|
if r in results: |
|
|
|
|
|
continue |
|
|
|
|
|
results.add(r) |
|
|
|
|
|
if r is None: |
|
|
|
|
|
|
|
|
for candidate in candidates: |
|
|
|
|
|
for pattern, paramSearch, f in matchers: |
|
|
|
|
|
results = set() |
|
|
|
|
|
for m in itertools.chain((x for x in pattern.finditer(candidate)), (x for x in pattern.finditer(percentdecode(candidate))) if paramSearch else ()): |
|
|
|
|
|
hadMatches = True |
|
|
|
|
|
r = f(m) |
|
|
|
|
|
if r in results: |
|
|
|
|
|
continue |
|
|
|
|
|
results.add(r) |
|
|
|
|
|
if r is None: |
|
|
|
|
|
break |
|
|
|
|
|
print(r) |
|
|
|
|
|
if None in results: |
|
|
break |
|
|
break |
|
|
print(r) |
|
|
|
|
|
if None in results: |
|
|
|
|
|
break |
|
|
|
|
|
if not hadMatches: |
|
|
if not hadMatches: |
|
|
print(origLine, file = sys.stderr) |
|
|
print(origLine, file = sys.stderr) |