diff --git a/Dockerfile b/Dockerfile index 32789e5..67e0604 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.7-alpine3.14 RUN \ apk add --no-cache gcc libc-dev libffi-dev \ - && pip install --no-cache-dir telethon 'cryptg<0.3' + && pip install --no-cache-dir telethon 'cryptg<0.3' tqdm COPY telegram-dl.py / VOLUME ["/data/"] WORKDIR /data diff --git a/README.md b/README.md index 1002687..e53bae9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,9 @@ Usage: `telegram-dl.py URL [URL...]` All URLs must be messages in the same channel. The output consists of writing each message to a file `{channelName}_{messageId}.json`. If the message has any attachment (photo, video, file, etc.), it gets downloaded as well. The filename for that is chosen by Telethon; if it's a file attachment, the literal path as presented by Telegram is used, and if it's a photo or video, a name is made up. -The only dependency of the script is Telethon (though you probably want cryptg for performance reasons). For authentication (always required), specify the `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, and `TELEGRAM_BOT_TOKEN` environment variables. The session is stored in a file `telegram-dl.session`; keeping this between executions is not critical but recommended. +The only mandatory dependency of the script is Telethon. You probably want cryptg as well for performance reasons when downloading larger files. With tqdm installed, you get a progress bar for each download. + +For authentication (always required), specify the `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, and `TELEGRAM_BOT_TOKEN` environment variables. The session is stored in a file `telegram-dl.session`; keeping this between executions is not critical but recommended. A `Dockerfile` is provided for running with Docker. Simply provide the URL(s) as arguments on running the image. Data is written to the `/data` volume, which you may want to mount from the host machine instead. diff --git a/telegram-dl.py b/telegram-dl.py index 30bfe52..7948139 100644 --- a/telegram-dl.py +++ b/telegram-dl.py @@ -2,11 +2,16 @@ import asyncio import base64 import datetime +import functools import json import os import re import sys import telethon +try: + import tqdm +except ImportError: + tqdm = None API_ID = os.environ['TELEGRAM_API_ID'] @@ -23,6 +28,13 @@ def stuff_to_json(o): raise TypeError(f'Object of type {type(o)} is not JSON serializable') +def download_callback(current, total, bar = None): + if bar is None: + return + bar.total = total #FIXME: Accesses undocumented attribute of tqdm + bar.update(current - bar.n) #FIXME: https://github.com/tqdm/tqdm/issues/1264 + + async def main(): # Parse URLs targets = [] @@ -43,15 +55,26 @@ async def main(): # Let's go... client = telethon.TelegramClient('telegram-dl', API_ID, API_HASH) + print('Connecting', file = sys.stderr) await client.start(bot_token = BOT_TOKEN) + print('Fetching messages', file = sys.stderr) messages = await client.get_messages(channelName, ids = ids) for message in messages: if not message: continue + print(f'Processing message {message.id}', file = sys.stderr) with open(f'{channelName}_{message.id}.json', 'x') as fp: json.dump(message.to_dict(), fp, default = stuff_to_json) - await client.download_media(message) + if message.media and tqdm: + bar = tqdm.tqdm(unit = 'iB', unit_divisor = 1024, unit_scale = True) + else: + bar = None + try: + await client.download_media(message, progress_callback = functools.partial(download_callback, bar = bar)) + finally: + if bar is not None: + bar.close() asyncio.run(main())