|
@@ -103,15 +103,7 @@ matchers = [ |
|
|
|
|
|
|
|
|
# Compile pattern and generate one for parameters if desired |
|
|
# Compile pattern and generate one for parameters if desired |
|
|
for e in matchers: |
|
|
for e in matchers: |
|
|
pattern, paramSearch, f = e |
|
|
|
|
|
e[0] = re.compile(pattern) |
|
|
|
|
|
if paramSearch: |
|
|
|
|
|
p2 = pattern.replace('//', '/{1,2}').replace('/', '(/|%2F)').replace(r'\?', r'(\?|%3F)') |
|
|
|
|
|
p2 = re.sub(r'(?<!\(\?):', '(:|%3A)', p2) |
|
|
|
|
|
p2 = re.sub(r'(?<!\(\?)=', '(=|%3D)', p2) |
|
|
|
|
|
e[1] = re.compile(p2.replace('&', '(&|%26)')) |
|
|
|
|
|
else: |
|
|
|
|
|
e[1] = None |
|
|
|
|
|
|
|
|
e[0] = re.compile(e[0]) |
|
|
|
|
|
|
|
|
# Only one slash before so it still matches inside URLs when slashes were collapsed. |
|
|
# Only one slash before so it still matches inside URLs when slashes were collapsed. |
|
|
domainPattern = re.compile(r'/(www\.)?youtube\.(com|de|fr|co\.uk|it|es|at|pt|gr|hu|ro|pl|dk|no|se|fi|ee|lt|lv|ru|by|cz|sk|si|rs|hr|ca)/') |
|
|
domainPattern = re.compile(r'/(www\.)?youtube\.(com|de|fr|co\.uk|it|es|at|pt|gr|hu|ro|pl|dk|no|se|fi|ee|lt|lv|ru|by|cz|sk|si|rs|hr|ca)/') |
|
@@ -121,11 +113,11 @@ for origLine in sys.stdin: |
|
|
line = re.sub(r'^https?://', '//', origLine) |
|
|
line = re.sub(r'^https?://', '//', origLine) |
|
|
line = domainPattern.sub('/www.youtube.com/', line) |
|
|
line = domainPattern.sub('/www.youtube.com/', line) |
|
|
hadMatches = False |
|
|
hadMatches = False |
|
|
for pattern1, pattern2, f in matchers: |
|
|
|
|
|
|
|
|
for pattern, paramSearch, f in matchers: |
|
|
results = set() |
|
|
results = set() |
|
|
for m, encoded in itertools.chain(((x, False) for x in pattern1.finditer(line)), ((x, True) for x in pattern2.finditer(line)) if pattern2 else ()): |
|
|
|
|
|
|
|
|
for m in itertools.chain((x for x in pattern.finditer(line)), (x for x in pattern.finditer(percentdecode(line))) if paramSearch else ()): |
|
|
hadMatches = True |
|
|
hadMatches = True |
|
|
r = f(m if not encoded else [percentdecode(x) if x else x for x in itertools.chain((m[0],), m.groups())]) |
|
|
|
|
|
|
|
|
r = f(m) |
|
|
if r in results: |
|
|
if r in results: |
|
|
continue |
|
|
continue |
|
|
results.add(r) |
|
|
results.add(r) |
|
|