@@ -0,0 +1,8 @@ | |||||
FROM python:3.7-alpine3.14 | |||||
RUN \ | |||||
apk add --no-cache gcc libc-dev libffi-dev \ | |||||
&& pip install --no-cache-dir telethon 'cryptg<0.3' | |||||
COPY telegram-dl.py / | |||||
VOLUME ["/data/"] | |||||
WORKDIR /data | |||||
ENTRYPOINT ["python3", "/telegram-dl.py"] |
@@ -0,0 +1,12 @@ | |||||
telegram-dl is a little script for retrieving content on Telegram. | |||||
Usage: `telegram-dl.py URL [URL...]` | |||||
All URLs must be messages in the same channel. The output consists of writing each message to a file `{channelName}_{messageId}.json`. If the message has any attachment (photo, video, file, etc.), it gets downloaded as well. The filename for that is chosen by Telethon; if it's a file attachment, the literal path as presented by Telegram is used, and if it's a photo or video, a name is made up. | |||||
The only dependency of the script is Telethon (though you probably want cryptg for performance reasons). For authentication (always required), specify the `TELEGRAM_API_ID`, `TELEGRAM_API_HASH`, and `TELEGRAM_BOT_TOKEN` environment variables. The session is stored in a file `telegram-dl.session`; keeping this between executions is not critical but recommended. | |||||
A `Dockerfile` is provided for running with Docker. Simply provide the URL(s) as arguments on running the image. Data is written to the `/data` volume, which you may want to mount from the host machine instead. | |||||
docker build -t telegram-dl:latest . | |||||
docker run --rm -v "$(pwd)"/data:/data -e TELEGRAM_API_ID=... -e TELEGRAM_API_HASH=... -e TELEGRAM_BOT_TOKEN=... telegram-dl https://t.me/telegram/178 |
@@ -0,0 +1,57 @@ | |||||
#!/usr/bin/env python3 | |||||
import asyncio | |||||
import base64 | |||||
import datetime | |||||
import json | |||||
import os | |||||
import re | |||||
import sys | |||||
import telethon | |||||
API_ID = os.environ['TELEGRAM_API_ID'] | |||||
API_HASH = os.environ['TELEGRAM_API_HASH'] | |||||
BOT_TOKEN = os.environ['TELEGRAM_BOT_TOKEN'] | |||||
URL_PATTERN = re.compile(r'^https?://t\.me/(?:s/)?(?P<channel>[^/]+)/(?P<message>\d+)$') | |||||
def stuff_to_json(o): | |||||
if isinstance(o, datetime.datetime): | |||||
return o.isoformat() | |||||
if isinstance(o, bytes): | |||||
return f'binary data: {base64.b64encode(o).decode("ascii")}' | |||||
raise TypeError(f'Object of type {type(o)} is not JSON serializable') | |||||
async def main(): | |||||
# Parse URLs | |||||
targets = [] | |||||
for url in sys.argv[1:]: | |||||
m = URL_PATTERN.match(url) | |||||
if not m: | |||||
print(f'Error: {url} is not a recognised Telegram URL', file = sys.stderr) | |||||
sys.exit(1) | |||||
targets.append((m['channel'], int(m['message']))) | |||||
if not targets: | |||||
print(f'Usage: telegram-dl.py URL [URL...]', file = sys.stderr) | |||||
sys.exit(1) | |||||
channelName = targets[0][0] | |||||
if not all(x[0] == channelName for x in targets[1:]): | |||||
print(f'Error: all URLs must be of the same channel', file = sys.stderr) | |||||
sys.exit(1) | |||||
ids = [x[1] for x in targets] | |||||
# Let's go... | |||||
client = telethon.TelegramClient('telegram-dl', API_ID, API_HASH) | |||||
await client.start(bot_token = BOT_TOKEN) | |||||
messages = await client.get_messages(channelName, ids = ids) | |||||
for message in messages: | |||||
if not message: | |||||
continue | |||||
with open(f'{channelName}_{message.id}.json', 'x') as fp: | |||||
json.dump(message.to_dict(), fp, default = stuff_to_json) | |||||
await client.download_media(message) | |||||
asyncio.run(main()) |