Browse Source

Redirect support

http3
tech234a 3 years ago
parent
commit
9e8b7a6ecf
3 changed files with 24 additions and 6 deletions
  1. +0
    -1
      export.py
  2. +23
    -4
      http3.py
  3. +1
    -1
      tracker.py

+ 0
- 1
export.py View File

@@ -155,7 +155,6 @@ def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaption
sleep(30)
except:
print("Error in request, retrying in 5 seconds...")
raise
sleep(5)

inttext = page.text


+ 23
- 4
http3.py View File

@@ -5,10 +5,11 @@ from aioquic.h3.connection import H3_ALPN
from aioquic.asyncio.client import connect
from aioquic.quic.configuration import QuicConfiguration
from http3_base import HttpClient, prepare_response, perform_http_request
from urllib.parse import urlparse

class HTTP3Response:
def __init__(self, input) -> None:
headers, content = input
headers, content, url, redirect = input
self.content = content
try:
self.text = content.decode()
@@ -20,6 +21,7 @@ class HTTP3Response:
self.headers[k.decode()] = v.decode()
try:
self.status_code = int(headers[b":status"])
self.url = url
except:
print("Status code not included as header, defaulting to 200")
self.status_code = 200
@@ -37,7 +39,7 @@ async def main(address, headers={}):

events = await perform_http_request(client=client, url=address, headers=headers)

return HTTP3Response(prepare_response(events))
return prepare_response(events)

def get(url, headers={}, params={}):
plist = []
@@ -50,5 +52,22 @@ def get(url, headers={}, params={}):
else:
pstring = ""
#print(url+pstring)
loop = asyncio.new_event_loop()
return loop.run_until_complete(main(url+pstring, headers=headers))
redirect = False
url = url+pstring
while True:
#print(url)
loop = asyncio.new_event_loop()
oheaders, ocontent = loop.run_until_complete(main(url, headers=headers))
statuscode = int(oheaders[b":status"])
if statuscode >= 300 and statuscode < 400 and b"location" in oheaders.keys():
#print("Redirection")
redirect = True
origurl = url
parsedorig = urlparse(origurl)
url = oheaders[b"location"].decode()
parsednew = urlparse(url)
if not parsednew.scheme and not parsednew.netloc:
url = parsedorig.scheme + "://" + parsedorig.netloc + url
else:
break
return HTTP3Response((oheaders, ocontent, url+pstring, redirect))

+ 1
- 1
tracker.py View File

@@ -9,7 +9,7 @@ from os.path import isfile
from json import loads

# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
VERSION = "20200924.10"
VERSION = "20200924.11"

TRACKER_ID = "ext-yt-communitycontribs"
TRACKER_HOST = "trackerproxy.meo.ws"


Loading…
Cancel
Save