@@ -1,7 +1,7 @@ | |||||
FROM python:3.7-alpine3.14 | FROM python:3.7-alpine3.14 | ||||
RUN \ | RUN \ | ||||
apk add --no-cache gcc libc-dev libffi-dev \ | apk add --no-cache gcc libc-dev libffi-dev \ | ||||
&& pip install --no-cache-dir telethon 'cryptg<0.3' | |||||
&& pip install --no-cache-dir telethon 'cryptg<0.3' tqdm | |||||
COPY telegram-dl.py / | COPY telegram-dl.py / | ||||
VOLUME ["/data/"] | VOLUME ["/data/"] | ||||
WORKDIR /data | WORKDIR /data | ||||
@@ -4,7 +4,9 @@ Usage: `telegram-dl.py URL [URL...]` | |||||
All URLs must be messages in the same channel. The output consists of writing each message to a file `{channelName}_{messageId}.json`. If the message has any attachment (photo, video, file, etc.), it gets downloaded as well. The filename for that is chosen by Telethon; if it's a file attachment, the literal path as presented by Telegram is used, and if it's a photo or video, a name is made up. | All URLs must be messages in the same channel. The output consists of writing each message to a file `{channelName}_{messageId}.json`. If the message has any attachment (photo, video, file, etc.), it gets downloaded as well. The filename for that is chosen by Telethon; if it's a file attachment, the literal path as presented by Telegram is used, and if it's a photo or video, a name is made up. | ||||
The only dependency of the script is Telethon (though you probably want cryptg for performance reasons). For authentication (always required), specify the `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, and `TELEGRAM_BOT_TOKEN` environment variables. The session is stored in a file `telegram-dl.session`; keeping this between executions is not critical but recommended. | |||||
The only mandatory dependency of the script is Telethon. You probably want cryptg as well for performance reasons when downloading larger files. With tqdm installed, you get a progress bar for each download. | |||||
For authentication (always required), specify the `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, and `TELEGRAM_BOT_TOKEN` environment variables. The session is stored in a file `telegram-dl.session`; keeping this between executions is not critical but recommended. | |||||
A `Dockerfile` is provided for running with Docker. Simply provide the URL(s) as arguments on running the image. Data is written to the `/data` volume, which you may want to mount from the host machine instead. | A `Dockerfile` is provided for running with Docker. Simply provide the URL(s) as arguments on running the image. Data is written to the `/data` volume, which you may want to mount from the host machine instead. | ||||
@@ -2,11 +2,16 @@ | |||||
import asyncio | import asyncio | ||||
import base64 | import base64 | ||||
import datetime | import datetime | ||||
import functools | |||||
import json | import json | ||||
import os | import os | ||||
import re | import re | ||||
import sys | import sys | ||||
import telethon | import telethon | ||||
try: | |||||
import tqdm | |||||
except ImportError: | |||||
tqdm = None | |||||
API_ID = os.environ['TELEGRAM_API_ID'] | API_ID = os.environ['TELEGRAM_API_ID'] | ||||
@@ -23,6 +28,13 @@ def stuff_to_json(o): | |||||
raise TypeError(f'Object of type {type(o)} is not JSON serializable') | raise TypeError(f'Object of type {type(o)} is not JSON serializable') | ||||
def download_callback(current, total, bar = None): | |||||
if bar is None: | |||||
return | |||||
bar.total = total #FIXME: Accesses undocumented attribute of tqdm | |||||
bar.update(current - bar.n) #FIXME: https://github.com/tqdm/tqdm/issues/1264 | |||||
async def main(): | async def main(): | ||||
# Parse URLs | # Parse URLs | ||||
targets = [] | targets = [] | ||||
@@ -43,15 +55,26 @@ async def main(): | |||||
# Let's go... | # Let's go... | ||||
client = telethon.TelegramClient('telegram-dl', API_ID, API_HASH) | client = telethon.TelegramClient('telegram-dl', API_ID, API_HASH) | ||||
print('Connecting', file = sys.stderr) | |||||
await client.start(bot_token = BOT_TOKEN) | await client.start(bot_token = BOT_TOKEN) | ||||
print('Fetching messages', file = sys.stderr) | |||||
messages = await client.get_messages(channelName, ids = ids) | messages = await client.get_messages(channelName, ids = ids) | ||||
for message in messages: | for message in messages: | ||||
if not message: | if not message: | ||||
continue | continue | ||||
print(f'Processing message {message.id}', file = sys.stderr) | |||||
with open(f'{channelName}_{message.id}.json', 'x') as fp: | with open(f'{channelName}_{message.id}.json', 'x') as fp: | ||||
json.dump(message.to_dict(), fp, default = stuff_to_json) | json.dump(message.to_dict(), fp, default = stuff_to_json) | ||||
await client.download_media(message) | |||||
if message.media and tqdm: | |||||
bar = tqdm.tqdm(unit = 'iB', unit_divisor = 1024, unit_scale = True) | |||||
else: | |||||
bar = None | |||||
try: | |||||
await client.download_media(message, progress_callback = functools.partial(download_callback, bar = bar)) | |||||
finally: | |||||
if bar is not None: | |||||
bar.close() | |||||
asyncio.run(main()) | asyncio.run(main()) |