@@ -2,22 +2,32 @@ from time import sleep | |||||
from typing import Dict | from typing import Dict | ||||
from json import loads | from json import loads | ||||
from switchable_request import get | |||||
backend = "http3" | |||||
langcodes = {"Afar": "aa", "Abkhazian": "ab", "Afrikaans": "af", "Akan": "ak", "all": "all", "Amharic": "am", "Aragonese": "an", "Arabic": "ar", "Aramaic": "arc", "Algerian Arabic": "arq", "Assamese": "as", "American Sign Language": "ase", "Asturian": "ast", "Avaric": "av", "Aymara": "ay", "Azerbaijani": "az", "Bashkir": "ba", "Belarusian": "be", "Bulgarian": "bg", "Bihari": "bh", "Bislama": "bi", "Bangla": "bn", "Tibetan": "bo", "Breton": "br", "Bosnian": "bs", "Catalan": "ca", "Cebuano": "ceb", "Choctaw": "cho", "Cherokee": "chr", "Corsican": "co", "Czech": "cs", "Church Slavic": "cu", "Welsh": "cy", "Danish": "da", "Danish (Denmark)": "da-DK", "German": "de", "German (Austria)": "de-AT", "German (Switzerland)": "de-CH", "German (Germany)": "de-DE", "Divehi": "dv", "Dzongkha": "dz", "Ewe": "ee", "Greek": "el", "English": "en", "English (United Arab Emirates)": "en-AE", "English (Canada)": "en-CA", "English (United Kingdom)": "en-GB", "English (Ireland)": "en-IE", "English (India)": "en-IN", "English (United States)": "en-US", "Esperanto": "eo", "Spanish": "es", "Spanish (Latin America)": "es-419", "Spanish (Argentina)": "es-AR", "Spanish (Chile)": "es-CL", "Spanish (Colombia)": "es-CO", "Spanish (Costa Rica)": "es-CR", "Spanish (Spain)": "es-ES", "Spanish (Mexico)": "es-MX", "Spanish (Nicaragua)": "es-NI", "Spanish (United States)": "es-US", "Estonian": "et", "Basque": "eu", "Persian": "fa", "Persian (Afghanistan)": "fa-AF", "Persian (Iran)": "fa-IR", "Fulah": "ff", "Finnish": "fi", "Filipino": "fil", "Fijian": "fj", "Faroese": "fo", "French": "fr", "French (Belgium)": "fr-BE", "French (Canada)": "fr-CA", "French (Switzerland)": "fr-CH", "French (France)": "fr-FR", "Western Frisian": "fy", "Irish": "ga", "Scottish Gaelic": "gd", "Galician": "gl", "Guarani": "gn", "Swiss German": "gsw", "Gujarati": "gu", "Hausa": "ha", "Hakka Chinese": "hak", "Hakka Chinese (Taiwan)": "hak-TW", "Hindi": "hi-Latn", "Hmong": "hmn", "Croatian": "hr", "Haitian Creole": "ht", "Hungarian": "hu", "Armenian": "hy", "Interlingua": "ia", "Indonesian": "id", "Interlingue": "ie", "Igbo": "ig", "Sichuan Yi": "ii", "Inupiaq": "ik", "Icelandic": "is", "Italian": "it", "Italian (Italy)": "it-IT", "Inuktitut": "iu", "Hebrew": "iw", "Japanese": "ja", "Javanese": "jv", "Georgian": "ka", "Kazakh": "kk", "Kalaallisut": "kl", "Khmer": "km", "Kannada": "kn", "Korean": "ko", "Korean (South Korea)": "ko-KR", "Kanuri": "kr", "Kashmiri": "ks", "Kurdish": "ku", "Kyrgyz": "ky", "Latin": "la", "Luxembourgish": "lb", "Lingala": "ln", "Lao": "lo", "Lithuanian": "lt", "Mizo": "lus", "Latvian": "lv", "Masai": "mas", "Malagasy": "mg", "Maori": "mi", "Miscellaneous languages": "mis", "Macedonian": "mk", "Malayalam": "ml", "Mongolian": "mn", "Manipuri": "mni", "Moldavian": "mo", "Marathi": "mr", "Malay": "ms", "Maltese": "mt", "Burmese": "my", "Nauru": "na", "Min Nan Chinese": "nan", "Min Nan Chinese (Taiwan)": "nan-TW", "Nepali": "ne", "Dutch": "nl", "Dutch (Belgium)": "nl-BE", "Dutch (Netherlands)": "nl-NL", "Norwegian Nynorsk": "nn", "Norwegian": "no", "not": "not", "Navajo": "nv", "Occitan": "oc", "Oromo": "om", "Odia": "or", "Punjabi": "pa", "Polish": "pl", "Polish (Poland)": "pl-PL", "Pashto": "ps", "Portuguese": "pt", "Portuguese (Brazil)": "pt-BR", "Portuguese (Portugal)": "pt-PT", "Quechua": "qu", "Romansh": "rm", "Rundi": "rn", "Romanian": "ro", "Romanian (Moldova)": "ro-MD", "Russian": "ru-Latn", "Russian (Russia)": "ru-RU", "Kinyarwanda": "rw", "Sanskrit": "sa", "Sardinian": "sc", "Sicilian": "scn", "Scots": "sco", "Sindhi": "sd", "Sherdukpen": "sdp", "Northern Sami": "se", "Sango": "sg", "Serbo-Croatian": "sh", "Sinhala": "si", "Slovak": "sk", "Slovenian": "sl", "Samoan": "sm", "Shona": "sn", "Somali": "so", "Albanian": "sq", "Serbian": "sr", "Serbian (Cyrillic)": "sr-Cyrl", "Serbian (Latin)": "sr-Latn", "Swati": "ss", "Southern Sotho": "st", "Sundanese": "su", "Swedish": "sv", "Swahili": "sw", "Tamil": "ta", "Telugu": "te", "Tajik": "tg", "Thai": "th", "Tigrinya": "ti", "Turkmen": "tk", "Tagalog": "tl", "Klingon": "tlh", "Tswana": "tn", "Tongan": "to", "Turkish": "tr", "Turkish (Turkey)": "tr-TR", "Tsonga": "ts", "Tatar": "tt", "Twi": "tw", "Ukrainian": "uk", "Urdu": "ur", "Uzbek": "uz", "Vietnamese": "vi", "Volap\\xFCk": "vo", "Wolof": "wo", "Xhosa": "xh", "Yiddish": "yi", "Yoruba": "yo", "Cantonese": "yue", "Cantonese (Hong Kong)": "yue-HK", "Chinese": "zh", "Chinese (China)": "zh-CN", "Chinese (Hong Kong)": "zh-HK", "Chinese (Simplified)": "zh-Hans", "Chinese (Simplified, China)": "zh-Hans-CN", "Chinese (Simplified, Singapore)": "zh-Hans-SG", "Chinese (Traditional)": "zh-Hant", "Chinese (Traditional, Hong Kong)": "zh-Hant-HK", "Chinese (Traditional, Taiwan)": "zh-Hant-TW", "Chinese (Singapore)": "zh-SG", "Chinese (Taiwan)": "zh-TW", "Zulu": "zu", "Hiri Motu": "ho", "Tok Pisin": "tpi", "Voro": "vor"} | langcodes = {"Afar": "aa", "Abkhazian": "ab", "Afrikaans": "af", "Akan": "ak", "all": "all", "Amharic": "am", "Aragonese": "an", "Arabic": "ar", "Aramaic": "arc", "Algerian Arabic": "arq", "Assamese": "as", "American Sign Language": "ase", "Asturian": "ast", "Avaric": "av", "Aymara": "ay", "Azerbaijani": "az", "Bashkir": "ba", "Belarusian": "be", "Bulgarian": "bg", "Bihari": "bh", "Bislama": "bi", "Bangla": "bn", "Tibetan": "bo", "Breton": "br", "Bosnian": "bs", "Catalan": "ca", "Cebuano": "ceb", "Choctaw": "cho", "Cherokee": "chr", "Corsican": "co", "Czech": "cs", "Church Slavic": "cu", "Welsh": "cy", "Danish": "da", "Danish (Denmark)": "da-DK", "German": "de", "German (Austria)": "de-AT", "German (Switzerland)": "de-CH", "German (Germany)": "de-DE", "Divehi": "dv", "Dzongkha": "dz", "Ewe": "ee", "Greek": "el", "English": "en", "English (United Arab Emirates)": "en-AE", "English (Canada)": "en-CA", "English (United Kingdom)": "en-GB", "English (Ireland)": "en-IE", "English (India)": "en-IN", "English (United States)": "en-US", "Esperanto": "eo", "Spanish": "es", "Spanish (Latin America)": "es-419", "Spanish (Argentina)": "es-AR", "Spanish (Chile)": "es-CL", "Spanish (Colombia)": "es-CO", "Spanish (Costa Rica)": "es-CR", "Spanish (Spain)": "es-ES", "Spanish (Mexico)": "es-MX", "Spanish (Nicaragua)": "es-NI", "Spanish (United States)": "es-US", "Estonian": "et", "Basque": "eu", "Persian": "fa", "Persian (Afghanistan)": "fa-AF", "Persian (Iran)": "fa-IR", "Fulah": "ff", "Finnish": "fi", "Filipino": "fil", "Fijian": "fj", "Faroese": "fo", "French": "fr", "French (Belgium)": "fr-BE", "French (Canada)": "fr-CA", "French (Switzerland)": "fr-CH", "French (France)": "fr-FR", "Western Frisian": "fy", "Irish": "ga", "Scottish Gaelic": "gd", "Galician": "gl", "Guarani": "gn", "Swiss German": "gsw", "Gujarati": "gu", "Hausa": "ha", "Hakka Chinese": "hak", "Hakka Chinese (Taiwan)": "hak-TW", "Hindi": "hi-Latn", "Hmong": "hmn", "Croatian": "hr", "Haitian Creole": "ht", "Hungarian": "hu", "Armenian": "hy", "Interlingua": "ia", "Indonesian": "id", "Interlingue": "ie", "Igbo": "ig", "Sichuan Yi": "ii", "Inupiaq": "ik", "Icelandic": "is", "Italian": "it", "Italian (Italy)": "it-IT", "Inuktitut": "iu", "Hebrew": "iw", "Japanese": "ja", "Javanese": "jv", "Georgian": "ka", "Kazakh": "kk", "Kalaallisut": "kl", "Khmer": "km", "Kannada": "kn", "Korean": "ko", "Korean (South Korea)": "ko-KR", "Kanuri": "kr", "Kashmiri": "ks", "Kurdish": "ku", "Kyrgyz": "ky", "Latin": "la", "Luxembourgish": "lb", "Lingala": "ln", "Lao": "lo", "Lithuanian": "lt", "Mizo": "lus", "Latvian": "lv", "Masai": "mas", "Malagasy": "mg", "Maori": "mi", "Miscellaneous languages": "mis", "Macedonian": "mk", "Malayalam": "ml", "Mongolian": "mn", "Manipuri": "mni", "Moldavian": "mo", "Marathi": "mr", "Malay": "ms", "Maltese": "mt", "Burmese": "my", "Nauru": "na", "Min Nan Chinese": "nan", "Min Nan Chinese (Taiwan)": "nan-TW", "Nepali": "ne", "Dutch": "nl", "Dutch (Belgium)": "nl-BE", "Dutch (Netherlands)": "nl-NL", "Norwegian Nynorsk": "nn", "Norwegian": "no", "not": "not", "Navajo": "nv", "Occitan": "oc", "Oromo": "om", "Odia": "or", "Punjabi": "pa", "Polish": "pl", "Polish (Poland)": "pl-PL", "Pashto": "ps", "Portuguese": "pt", "Portuguese (Brazil)": "pt-BR", "Portuguese (Portugal)": "pt-PT", "Quechua": "qu", "Romansh": "rm", "Rundi": "rn", "Romanian": "ro", "Romanian (Moldova)": "ro-MD", "Russian": "ru-Latn", "Russian (Russia)": "ru-RU", "Kinyarwanda": "rw", "Sanskrit": "sa", "Sardinian": "sc", "Sicilian": "scn", "Scots": "sco", "Sindhi": "sd", "Sherdukpen": "sdp", "Northern Sami": "se", "Sango": "sg", "Serbo-Croatian": "sh", "Sinhala": "si", "Slovak": "sk", "Slovenian": "sl", "Samoan": "sm", "Shona": "sn", "Somali": "so", "Albanian": "sq", "Serbian": "sr", "Serbian (Cyrillic)": "sr-Cyrl", "Serbian (Latin)": "sr-Latn", "Swati": "ss", "Southern Sotho": "st", "Sundanese": "su", "Swedish": "sv", "Swahili": "sw", "Tamil": "ta", "Telugu": "te", "Tajik": "tg", "Thai": "th", "Tigrinya": "ti", "Turkmen": "tk", "Tagalog": "tl", "Klingon": "tlh", "Tswana": "tn", "Tongan": "to", "Turkish": "tr", "Turkish (Turkey)": "tr-TR", "Tsonga": "ts", "Tatar": "tt", "Twi": "tw", "Ukrainian": "uk", "Urdu": "ur", "Uzbek": "uz", "Vietnamese": "vi", "Volap\\xFCk": "vo", "Wolof": "wo", "Xhosa": "xh", "Yiddish": "yi", "Yoruba": "yo", "Cantonese": "yue", "Cantonese (Hong Kong)": "yue-HK", "Chinese": "zh", "Chinese (China)": "zh-CN", "Chinese (Hong Kong)": "zh-HK", "Chinese (Simplified)": "zh-Hans", "Chinese (Simplified, China)": "zh-Hans-CN", "Chinese (Simplified, Singapore)": "zh-Hans-SG", "Chinese (Traditional)": "zh-Hant", "Chinese (Traditional, Hong Kong)": "zh-Hant-HK", "Chinese (Traditional, Taiwan)": "zh-Hant-TW", "Chinese (Singapore)": "zh-SG", "Chinese (Taiwan)": "zh-TW", "Zulu": "zu", "Hiri Motu": "ho", "Tok Pisin": "tpi", "Voro": "vor"} | ||||
def getmetadata(mysession, vid): | |||||
def getmetadata(mysession, vid, allheaders): | |||||
global backend | |||||
params = ( | params = ( | ||||
("v", vid), | ("v", vid), | ||||
) | ) | ||||
while True: | while True: | ||||
wpage = mysession.get("https://www.youtube.com/watch", params=params) | |||||
wpage = get("https://www.youtube.com/watch", params=params, mysession=mysession, backend=backend, http3headers=allheaders) | |||||
if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p> | if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p> | ||||
<p>To continue with your YouTube experience, please fill out the form below.</p>""" in wpage.text and not wpage.status_code == 429 and 'window["ytInitialPlayerResponse"] = ' in wpage.text and 'window["ytInitialData"] = ' in wpage.text: | <p>To continue with your YouTube experience, please fill out the form below.</p>""" in wpage.text and not wpage.status_code == 429 and 'window["ytInitialPlayerResponse"] = ' in wpage.text and 'window["ytInitialData"] = ' in wpage.text: | ||||
break | break | ||||
else: | else: | ||||
print("Captcha detected, waiting 30 seconds") | |||||
sleep(30) | |||||
if backend == "requests": | |||||
backend = "http3" | |||||
print("Captcha detected, switching discovery to HTTP3/QUIC") | |||||
else: | |||||
print("Captcha detected, waiting 30 seconds") | |||||
sleep(30) | |||||
wptext = wpage.text | wptext = wpage.text | ||||
@@ -38,6 +38,10 @@ from time import sleep | |||||
# https://docs.python.org/3/library/html.parser.html | # https://docs.python.org/3/library/html.parser.html | ||||
from html.parser import HTMLParser | from html.parser import HTMLParser | ||||
backend = "http3" | |||||
from switchable_request import get | |||||
class MyHTMLParser(HTMLParser): | class MyHTMLParser(HTMLParser): | ||||
def __init__(self): | def __init__(self): | ||||
HTMLParser.__init__(self) | HTMLParser.__init__(self) | ||||
@@ -79,7 +83,8 @@ class MyHTMLParser(HTMLParser): | |||||
elif self.get_starttag_text() and self.get_starttag_text().startswith('<div id="original-video-title"'): | elif self.get_starttag_text() and self.get_starttag_text().startswith('<div id="original-video-title"'): | ||||
self.inittitle += data | self.inittitle += data | ||||
def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaptions): | |||||
def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaptions, allheaders): | |||||
global backend | |||||
if mode == "forceedit-metadata": | if mode == "forceedit-metadata": | ||||
while needforcemetadata[langcode] == None: #extra logic | while needforcemetadata[langcode] == None: #extra logic | ||||
print("Awaiting forcemetadata") | print("Awaiting forcemetadata") | ||||
@@ -114,7 +119,7 @@ def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaption | |||||
("o", "U") | ("o", "U") | ||||
) | ) | ||||
page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) | |||||
page = get("https://www.youtube.com/timedtext_editor", params=pparams, mysession=mysession, backend=backend, http3headers=allheaders) | |||||
elif mode == "forceedit-metadata": | elif mode == "forceedit-metadata": | ||||
pparams = ( | pparams = ( | ||||
("v", vid), | ("v", vid), | ||||
@@ -124,7 +129,7 @@ def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaption | |||||
('tab', 'metadata') | ('tab', 'metadata') | ||||
) | ) | ||||
page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) | |||||
page = get("https://www.youtube.com/timedtext_editor", params=pparams, mysession=mysession, backend=backend, http3headers=allheaders) | |||||
elif mode == "forceedit-captions": | elif mode == "forceedit-captions": | ||||
pparams = ( | pparams = ( | ||||
("v", vid), | ("v", vid), | ||||
@@ -137,13 +142,17 @@ def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaption | |||||
("o", "U") | ("o", "U") | ||||
) | ) | ||||
page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) | |||||
page = get("https://www.youtube.com/timedtext_editor", params=pparams, mysession=mysession, backend=backend, http3headers=allheaders) | |||||
if not "accounts.google.com" in page.url and page.status_code != 429 and 'Subtitles/CC' in page.text and 'Title & description' in page.text: | if not "accounts.google.com" in page.url and page.status_code != 429 and 'Subtitles/CC' in page.text and 'Title & description' in page.text: | ||||
break | break | ||||
else: | else: | ||||
print("[Retrying in 30 seconds for rate limit or login failure] Please supply authentication cookie information in config.json or environment variables. See README.md for more information.") | |||||
sleep(30) | |||||
if backend == "requests": | |||||
backend = "http3" | |||||
print("Rate limit or login failure, switching export to HTTP3/QUIC...") | |||||
else: | |||||
print("[Retrying in 30 seconds for rate limit or login failure] Please supply authentication cookie information in config.json or environment variables. See README.md for more information.") | |||||
sleep(30) | |||||
except: | except: | ||||
print("Error in request, retrying in 5 seconds...") | print("Error in request, retrying in 5 seconds...") | ||||
sleep(5) | sleep(5) | ||||
@@ -0,0 +1,54 @@ | |||||
import asyncio | |||||
from typing import cast | |||||
from urllib.parse import urlparse | |||||
from aioquic.h3.connection import H3_ALPN | |||||
from aioquic.asyncio.client import connect | |||||
from aioquic.quic.configuration import QuicConfiguration | |||||
from http3_base import HttpClient, prepare_response, perform_http_request | |||||
class HTTP3Response: | |||||
def __init__(self, input) -> None: | |||||
headers, content = input | |||||
self.content = content | |||||
try: | |||||
self.text = content.decode() | |||||
except: | |||||
print("Text decoding error") | |||||
self.text = "" | |||||
self.headers = {} | |||||
for k, v in headers.items(): | |||||
self.headers[k.decode()] = v.decode() | |||||
try: | |||||
self.status_code = int(headers[b":status"]) | |||||
except: | |||||
print("Status code not included as header, defaulting to 200") | |||||
self.status_code = 200 | |||||
self.ok = self.status_code < 400 | |||||
async def main(address, headers={}): | |||||
parsed = urlparse(address) | |||||
configuration = QuicConfiguration( | |||||
is_client=True, alpn_protocols=H3_ALPN | |||||
) | |||||
async with connect(parsed.netloc, port=443, configuration=configuration, create_protocol=HttpClient) as client: | |||||
client = cast(HttpClient, client) | |||||
events = await perform_http_request(client=client, url=address, headers=headers) | |||||
return HTTP3Response(prepare_response(events)) | |||||
def get(url, headers={}, params={}): | |||||
plist = [] | |||||
for item in params: | |||||
#print(item) | |||||
k, v = item | |||||
plist.append(str(k)+"="+str(v)) | |||||
if plist: | |||||
pstring = "?"+"&".join(plist) | |||||
else: | |||||
pstring = "" | |||||
#print(url+pstring) | |||||
loop = asyncio.new_event_loop() | |||||
return loop.run_until_complete(main(url+pstring, headers=headers)) |
@@ -0,0 +1,158 @@ | |||||
import asyncio | |||||
import logging | |||||
import time | |||||
from collections import deque | |||||
from typing import Deque, Dict, Optional | |||||
from urllib.parse import urlparse | |||||
import aioquic | |||||
from aioquic.asyncio.protocol import QuicConnectionProtocol | |||||
from aioquic.h3.connection import H3Connection | |||||
from aioquic.h3.events import ( | |||||
DataReceived, | |||||
H3Event, | |||||
HeadersReceived, | |||||
PushPromiseReceived, | |||||
) | |||||
from aioquic.quic.events import QuicEvent | |||||
logger = logging.getLogger("client") | |||||
USER_AGENT = "aioquic/" + aioquic.__version__ | |||||
class URL: | |||||
def __init__(self, url: str) -> None: | |||||
parsed = urlparse(url) | |||||
self.authority = parsed.netloc | |||||
self.full_path = parsed.path | |||||
if parsed.query: | |||||
self.full_path += "?" + parsed.query | |||||
self.scheme = parsed.scheme | |||||
class HttpRequest: | |||||
def __init__( | |||||
self, method: str, url: URL, content: bytes = b"", headers: Dict = {} | |||||
) -> None: | |||||
self.content = content | |||||
self.headers = headers | |||||
self.method = method | |||||
self.url = url | |||||
class HttpClient(QuicConnectionProtocol): | |||||
def __init__(self, *args, **kwargs) -> None: | |||||
super().__init__(*args, **kwargs) | |||||
self.pushes: Dict[int, Deque[H3Event]] = {} | |||||
self._request_events: Dict[int, Deque[H3Event]] = {} | |||||
self._request_waiter: Dict[int, asyncio.Future[Deque[H3Event]]] = {} | |||||
self._http = H3Connection(self._quic) | |||||
async def get(self, url: str, headers: Dict = {}) -> Deque[H3Event]: | |||||
""" | |||||
Perform a GET request. | |||||
""" | |||||
return await self._request( | |||||
HttpRequest(method="GET", url=URL(url), headers=headers) | |||||
) | |||||
async def post(self, url: str, data: bytes, headers: Dict = {}) -> Deque[H3Event]: | |||||
""" | |||||
Perform a POST request. | |||||
""" | |||||
return await self._request( | |||||
HttpRequest(method="POST", url=URL(url), content=data, headers=headers) | |||||
) | |||||
def http_event_received(self, event: H3Event) -> None: | |||||
if isinstance(event, (HeadersReceived, DataReceived)): | |||||
stream_id = event.stream_id | |||||
if stream_id in self._request_events: | |||||
# http | |||||
self._request_events[event.stream_id].append(event) | |||||
if event.stream_ended: | |||||
request_waiter = self._request_waiter.pop(stream_id) | |||||
request_waiter.set_result(self._request_events.pop(stream_id)) | |||||
elif event.push_id in self.pushes: | |||||
# push | |||||
self.pushes[event.push_id].append(event) | |||||
elif isinstance(event, PushPromiseReceived): | |||||
self.pushes[event.push_id] = deque() | |||||
self.pushes[event.push_id].append(event) | |||||
def quic_event_received(self, event: QuicEvent) -> None: | |||||
# pass event to the HTTP layer | |||||
if self._http is not None: | |||||
for http_event in self._http.handle_event(event): | |||||
self.http_event_received(http_event) | |||||
async def _request(self, request: HttpRequest) -> Deque[H3Event]: | |||||
stream_id = self._quic.get_next_available_stream_id() | |||||
self._http.send_headers( | |||||
stream_id=stream_id, | |||||
headers=[ | |||||
(b":method", request.method.encode()), | |||||
(b":scheme", request.url.scheme.encode()), | |||||
(b":authority", request.url.authority.encode()), | |||||
(b":path", request.url.full_path.encode()), | |||||
(b"user-agent", USER_AGENT.encode()), | |||||
] | |||||
+ [(k.lower().encode(), v.encode()) for (k, v) in request.headers.items()], | |||||
) | |||||
self._http.send_data(stream_id=stream_id, data=request.content, end_stream=True) | |||||
waiter = self._loop.create_future() | |||||
self._request_events[stream_id] = deque() | |||||
self._request_waiter[stream_id] = waiter | |||||
self.transmit() | |||||
return await asyncio.shield(waiter) | |||||
async def perform_http_request( | |||||
client: HttpClient, | |||||
url: str, | |||||
headers: Optional[dict] | |||||
) -> Dict[int, Deque[H3Event]] : | |||||
# perform request | |||||
start = time.time() | |||||
if headers: | |||||
http_events = await client.get(url, headers=headers) | |||||
else: | |||||
http_events = await client.get(url) | |||||
method = "GET" | |||||
elapsed = time.time() - start | |||||
# print speed | |||||
octets = 0 | |||||
for http_event in http_events: | |||||
if isinstance(http_event, DataReceived): | |||||
octets += len(http_event.data) | |||||
logger.info( | |||||
"Response received for %s %s : %d bytes in %.1f s (%.3f Mbps)" | |||||
% (method, urlparse(url).path, octets, elapsed, octets * 8 / elapsed / 1000000) | |||||
) | |||||
return http_events | |||||
def prepare_response( | |||||
http_events: Deque[H3Event] | |||||
) -> str: | |||||
byteslist = [] | |||||
headers = {} | |||||
for http_event in http_events: | |||||
if isinstance(http_event, HeadersReceived): | |||||
headers.update(http_event.headers) | |||||
elif isinstance(http_event, DataReceived): | |||||
byteslist.append(http_event.data) | |||||
return headers, b''.join(byteslist) |
@@ -1,4 +1,5 @@ | |||||
requests | requests | ||||
beautifulsoup4 | beautifulsoup4 | ||||
html5lib | html5lib | ||||
youtube_dl | |||||
youtube_dl | |||||
aioquic |
@@ -0,0 +1,7 @@ | |||||
import http3 | |||||
def get(url: str, params: tuple = (), backend="requests", mysession=None, http3headers: dict ={}): | |||||
if backend == "requests": | |||||
return mysession.get(url, params) | |||||
elif backend == "http3": | |||||
#print(http3headers) | |||||
return http3.get(url, headers=http3headers, params=params) |
@@ -9,7 +9,7 @@ from os.path import isfile | |||||
from json import loads | from json import loads | ||||
# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py | # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py | ||||
VERSION = "20200924.07" | |||||
VERSION = "20200924.10" | |||||
TRACKER_ID = "ext-yt-communitycontribs" | TRACKER_ID = "ext-yt-communitycontribs" | ||||
TRACKER_HOST = "trackerproxy.meo.ws" | TRACKER_HOST = "trackerproxy.meo.ws" | ||||
@@ -64,6 +64,7 @@ if not (cookies["HSID"] and cookies["SSID"] and cookies["SID"]): | |||||
assert False | assert False | ||||
mysession = requests.session() | mysession = requests.session() | ||||
allheaders = {"cookie": "HSID="+cookies["HSID"]+"; SSID="+cookies["SSID"]+"; SID="+cookies["SID"], "Accept-Language": "en-US",} | |||||
mysession.headers.update({"cookie": "HSID="+cookies["HSID"]+"; SSID="+cookies["SSID"]+"; SID="+cookies["SID"], "Accept-Language": "en-US",}) | mysession.headers.update({"cookie": "HSID="+cookies["HSID"]+"; SSID="+cookies["SSID"]+"; SID="+cookies["SID"], "Accept-Language": "en-US",}) | ||||
validationtest = mysession.get("https://www.youtube.com/timedtext_editor?action_mde_edit_form=1&v=1iNTtHUwvq4&lang=en&bl=vmp&ui=hd&ref=player&tab=captions&o=U") | validationtest = mysession.get("https://www.youtube.com/timedtext_editor?action_mde_edit_form=1&v=1iNTtHUwvq4&lang=en&bl=vmp&ui=hd&ref=player&tab=captions&o=U") | ||||
@@ -109,11 +110,12 @@ def threadrunner(): | |||||
elif task == "discovery": | elif task == "discovery": | ||||
while True: | while True: | ||||
try: | try: | ||||
info = getmetadata(mysession, str(vid).strip()) | |||||
info = getmetadata(mysession, str(vid).strip(), allheaders) | |||||
break | break | ||||
except BaseException as e: | except BaseException as e: | ||||
print(e) | print(e) | ||||
print("Error in retrieving information, waiting 30 seconds and trying again") | print("Error in retrieving information, waiting 30 seconds and trying again") | ||||
#raise | |||||
sleep(30) | sleep(30) | ||||
if info[0] or info[1]: # ccenabled or creditdata | if info[0] or info[1]: # ccenabled or creditdata | ||||
if not isdir("out/"+str(vid).strip()): | if not isdir("out/"+str(vid).strip()): | ||||
@@ -143,11 +145,11 @@ def threadrunner(): | |||||
jobs.put(("submitdiscovery", playldisc, tracker.ItemType.Playlist)) | jobs.put(("submitdiscovery", playldisc, tracker.ItemType.Playlist)) | ||||
elif task == "subtitles": | elif task == "subtitles": | ||||
subprrun(mysession, args, vid, "default", needforcemetadata, needforcecaptions) | |||||
subprrun(mysession, args, vid, "default", needforcemetadata, needforcecaptions, allheaders) | |||||
elif task == "subtitles-forceedit-captions": | elif task == "subtitles-forceedit-captions": | ||||
subprrun(mysession, args, vid, "forceedit-captions", needforcemetadata, needforcecaptions) | |||||
subprrun(mysession, args, vid, "forceedit-captions", needforcemetadata, needforcecaptions, allheaders) | |||||
elif task == "subtitles-forceedit-metadata": | elif task == "subtitles-forceedit-metadata": | ||||
subprrun(mysession, args, vid, "forceedit-metadata", needforcemetadata, needforcecaptions) | |||||
subprrun(mysession, args, vid, "forceedit-metadata", needforcemetadata, needforcecaptions, allheaders) | |||||
elif task == "channel": | elif task == "channel": | ||||
try: | try: | ||||
y = ydl.extract_info("https://www.youtube.com/channel/"+desit.split(":", 1)[1], download=False) | y = ydl.extract_info("https://www.youtube.com/channel/"+desit.split(":", 1)[1], download=False) | ||||