Browse Source

Move default headers to qwarc.const

master
JustAnotherArchivist 3 years ago
parent
commit
d3437bde19
3 changed files with 9 additions and 4 deletions
  1. +1
    -4
      qwarc/__init__.py
  2. +2
    -0
      qwarc/aiohttp.py
  3. +6
    -0
      qwarc/const.py

+ 1
- 4
qwarc/__init__.py View File

@@ -263,14 +263,11 @@ class QWARC:
raise

async def run(self, loop):
headers = [('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0')] #TODO: Move elsewhere

for i in range(self._concurrency):
session = _aiohttp.ClientSession(
connector = qwarc.aiohttp.TCPConnector(loop = loop),
request_class = qwarc.aiohttp.ClientRequest,
response_class = qwarc.aiohttp.ClientResponse,
skip_auto_headers = ['Accept-Encoding'],
loop = loop
)
self._sessions.append(session)
@@ -332,7 +329,7 @@ class QWARC:
raise

session = self._freeSessions.popleft()
item = self._make_item(itemType, itemValue, session, headers)
item = self._make_item(itemType, itemValue, session, DEFAULT_HEADERS)
task = asyncio.ensure_future(item.process())
#TODO: Is there a better way to add custom information to a task/coroutine object?
task.taskType = 'process'


+ 2
- 0
qwarc/aiohttp.py View File

@@ -72,6 +72,8 @@ class TCPConnector(aiohttp.connector.TCPConnector):


class ClientRequest(aiohttp.client_reqrep.ClientRequest):
DEFAULT_HEADERS = {}

def send(self, connection):
connection.protocol.reset_raw_data()
return super().send(connection)


+ 6
- 0
qwarc/const.py View File

@@ -25,3 +25,9 @@ ACTION_FOLLOW_OR_SUCCESS = 3

ACTION_RETRIES_EXCEEDED = 4
'''This request failed repeatedly and exceeded the retry limit.'''

DEFAULT_HEADERS = [
('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'),
('Accept', '*/*'),
]
'''The default HTTP headers sent on every request if not overridden'''

Loading…
Cancel
Save