@@ -0,0 +1,82 @@ | |||
FROM debian:stretch as builder | |||
RUN apt-get update && apt-get dist-upgrade -y && apt-get install -y \ | |||
build-essential dpkg-dev devscripts cmake git libmnl-dev \ | |||
&& rm -rf /var/lib/apt/lists/* | |||
RUN git clone https://github.com/kristrev/tcp_closer.git /tcp_closer | |||
RUN mkdir -p tcp_closer/src/build | |||
WORKDIR /tcp_closer/src/build | |||
RUN cmake .. | |||
RUN make package | |||
RUN find . -type f | |||
FROM python:3.6-stretch | |||
RUN apt-get update && apt-get dist-upgrade -y && apt-get install -y \ | |||
build-essential \ | |||
libxml2-dev libxslt-dev zlib1g-dev libssl-dev libsqlite3-dev \ | |||
libffi-dev git tmux fontconfig-config fonts-dejavu-core curl \ | |||
libfontconfig1 libjpeg62-turbo-dev libjpeg62-turbo libjpeg-dev libjpeg-turbo-progs lsof ffmpeg \ | |||
autossh rsync bundler git tmux python3 libtool pkg-config \ | |||
build-essential autoconf automake libzmq3-dev libmnl0 \ | |||
&& rm -rf /var/lib/apt/lists/* | |||
# Install tcp-closer | |||
COPY --from=builder /tcp_closer/src/build/tcp-closer-0.1.1-Linux.deb /tcp-closer.deb | |||
RUN dpkg -i /tcp-closer.deb | |||
# Add user | |||
RUN groupadd -g 1337 ab | |||
RUN groupadd -r psudo | |||
RUN useradd -rm -d /home/ab -s /bin/bash -g ab -G psudo -u 1337 ab | |||
# Install python dependencies | |||
RUN pip install websockets requests | |||
RUN curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl | |||
# Setup output directory | |||
RUN mkdir /data | |||
RUN chown ab:ab /data | |||
RUN chmod a+rx /usr/local/bin/youtube-dl | |||
# Install tini | |||
ENV TINI_VERSION v0.19.0 | |||
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini | |||
RUN chmod +x /tini | |||
# Give ab sudo for a bit | |||
RUN echo "%psudo ALL=(ALL:ALL) NOPASSWD: ALL" > /etc/sudoers.d/psudo | |||
RUN chmod 0440 /etc/sudoers.d/psudo | |||
USER ab | |||
# Clone the repo | |||
RUN git clone --recursive https://github.com/ArchiveTeam/ArchiveBot.git /home/ab/ArchiveBot | |||
WORKDIR /home/ab/ArchiveBot | |||
# Patch a file | |||
COPY config.patch /home/ab/config.patch | |||
RUN patch /home/ab/ArchiveBot/pipeline/archivebot/shared_config.py /home/ab/config.patch | |||
# Setup symlinks for pipeline | |||
RUN ln -s /usr/local/bin/wpull /home/ab/ArchiveBot/pipeline/wpull | |||
RUN rm /home/ab/ArchiveBot/pipeline/youtube-dl | |||
RUN ln -s /usr/local/bin/youtube-dl /home/ab/ArchiveBot/pipeline/youtube-dl | |||
# Setup env var | |||
ENV OPENSSL_CONF=/home/ab/ArchiveBot/ops/openssl-less-secure.cnf | |||
# Bundle install | |||
RUN bundle install | |||
RUN cd plumbing && bundle install && cd .. | |||
# Install pip dependencies | |||
USER root | |||
RUN pip install -r pipeline/requirements.txt | |||
# Copy in entrypoint | |||
COPY entrypoint.sh /entrypoint.sh | |||
# Button up image | |||
USER ab | |||
ENTRYPOINT [ "/tini", "--", "/entrypoint.sh" ] | |||
@@ -0,0 +1,21 @@ | |||
--- shared_config.py.orig 2022-02-24 20:30:36.509450521 +0100 | |||
@@ -1,12 +1,17 @@ | |||
import os | |||
import yaml | |||
+try: | |||
+ from yaml import CLoader as Loader, CDumper as Dumper | |||
+except ImportError: | |||
+ from yaml import Loader, Dumper | |||
+ | |||
def config(): | |||
my_dir = os.path.dirname(__file__) | |||
config_file = os.path.join(my_dir, '../../lib/shared_config.yml') | |||
with open(config_file, 'r') as f: | |||
- return yaml.load(f.read()) | |||
+ return yaml.load(f.read(), Loader=Loader) | |||
def log_channel(): | |||
c = config() |
@@ -0,0 +1,70 @@ | |||
version: "3.9" | |||
services: | |||
redis: | |||
image: "redis:alpine" | |||
couchdb: | |||
image: "couchdb:2" | |||
bot: | |||
build: . | |||
command: "bot" | |||
environment: | |||
- IRC_URL=ircs://irc.hackint.org:6697 | |||
- REDIS_URL=redis://redis:6379/0 | |||
- IRC_CHANNEL=#archivebot-rewby-dev | |||
- IRC_NICK=ArchiveBotRewbyDev | |||
- COUCHDB_URL=http://couchdb:5984 | |||
cogs: | |||
build: . | |||
command: "cogs" | |||
environment: | |||
- REDIS_URL=redis://redis:6379/0 | |||
- COUCHDB_URL=http://couchdb:5984 | |||
firehose: | |||
build: . | |||
command: "firehose" | |||
environment: | |||
- REDIS_URL=redis://redis:6379/0 | |||
websocket: | |||
build: . | |||
command: "websocket" | |||
ports: | |||
- 4568:4568 | |||
environment: | |||
- REDIS_URL=redis://redis:6379/0 | |||
- FIREHOSE_SOCKET_URL=tcp://firehose:12345 | |||
dashboard: | |||
build: . | |||
command: "dashboard" | |||
ports: | |||
- 8080:8080 | |||
environment: | |||
- REDIS_URL=redis://redis:6379/0 | |||
uploader: | |||
build: . | |||
command: "uploader" | |||
volumes: | |||
- warcs:/data | |||
environment: | |||
- RSYNC_URL=rsync://at-offload.hawc.eu/abtest/ | |||
pipeline: | |||
build: . | |||
command: "pipeline" | |||
volumes: | |||
- warcs:/data | |||
environment: | |||
- REDIS_URL=redis://redis:6379/0 | |||
- PIPELINE_CONCURRENT=10 | |||
- PIPELINE_PREFIX=testpipeline | |||
analyzer: | |||
build: . | |||
command: "analyzer" | |||
environment: | |||
- REDIS_URL=redis://redis:6379/0 | |||
trimmer: | |||
build: . | |||
command: "trimmer" | |||
environment: | |||
- REDIS_URL=redis://redis:6379/0 | |||
volumes: | |||
warcs: | |||
@@ -0,0 +1,92 @@ | |||
#!/bin/bash | |||
set -exuo pipefail | |||
if [ -z ${COUCHDB_URL+x} ]; then | |||
echo "Skipping couchdb init" | |||
else | |||
timeout 300 bash -c "while [[ \"\$(curl -s -o /dev/null -w ''%{http_code}'' ${COUCHDB_URL})\" != \"200\" ]]; do sleep 5; done" || false | |||
if [ -z ${COUCHDB_USER+x} ]; then | |||
export COUCHDB_CURL_ARGS="" | |||
else | |||
export COUCHDB_CURL_ARGS="-u \"$COUCHDB_USER:$COUCHDB_PASSWORD\"" | |||
fi | |||
pushd "db/design_docs" | |||
curl -s $COUCHDB_CURL_ARGS -X PUT $COUCHDB_URL/_users | |||
curl -s $COUCHDB_CURL_ARGS -X PUT $COUCHDB_URL/archivebot | |||
curl -s $COUCHDB_CURL_ARGS -X PUT $COUCHDB_URL/archivebot_logs | |||
grep -v _rev archive_urls.json > /tmp/archive_urls.json | |||
grep -v _rev ignore_patterns.json > /tmp/ignore_patterns.json | |||
grep -v _rev jobs.json > /tmp/jobs.json | |||
grep -v _rev user_agents.json > /tmp/user_agents.json | |||
curl -s $COUCHDB_CURL_ARGS -X PUT $COUCHDB_URL/archivebot/_design/archive_urls -d @/tmp/archive_urls.json | |||
curl -s $COUCHDB_CURL_ARGS -X PUT $COUCHDB_URL/archivebot/_design/ignore_patterns -d @/tmp/ignore_patterns.json | |||
curl -s $COUCHDB_CURL_ARGS -X PUT $COUCHDB_URL/archivebot/_design/jobs -d @/tmp/jobs.json | |||
curl -s $COUCHDB_CURL_ARGS -X PUT $COUCHDB_URL/archivebot/_design/user_agents -d @/tmp/user_agents.json | |||
popd | |||
fi | |||
export FINISHED_WARCS_DIR=${FINISHED_WARCS_DIR:-/data/} | |||
case "$1" in | |||
"bot") | |||
cd bot | |||
if [ -z ${COUCHDB_USER+x} ]; then | |||
export COUCHDB_ARGS="" | |||
else | |||
export COUCHDB_ARGS="--db-credentials \"$COUCHDB_USER:$COUCHDB_PASSWORD\"" | |||
fi | |||
bundle exec ruby bot.rb \ | |||
-s "$IRC_URL" \ | |||
-r "$REDIS_URL" \ | |||
-c "$IRC_CHANNEL" \ | |||
-n "$IRC_NICK" \ | |||
--db "$COUCHDB_URL/archivebot" $COUCHDB_ARGS | |||
;; | |||
"cogs") | |||
if [ -z ${COUCHDB_USER+x} ]; then | |||
export COUCHDB_ARGS="" | |||
else | |||
export COUCHDB_ARGS="--db-credentials \"$COUCHDB_USER:$COUCHDB_PASSWORD\" --log-db-credentials \"$COUCHDB_USER:$COUCHDB_PASSWORD\"" | |||
fi | |||
bundle exec ruby cogs/start.rb \ | |||
-r "$REDIS_URL" \ | |||
--db "$COUCHDB_URL/archivebot" \ | |||
--log-db "$COUCHDB_URL/archivebot_logs" $COUCHDB_ARGS | |||
;; | |||
"firehose") | |||
export UPDATES_CHANNEL=updates | |||
export FIREHOSE_SOCKET_URL=tcp://0.0.0.0:12345 | |||
plumbing/updates-listener | plumbing/log-firehose | |||
;; | |||
"dashboard") | |||
bundle exec ruby dashboard/app.rb -u http://0.0.0.0:8080 -r "$REDIS_URL" | |||
;; | |||
"websocket") | |||
plumbing/firehose-client | python3 dashboard/websocket.py | |||
;; | |||
"pipeline") | |||
cd pipeline | |||
export PIPELINE_NAME="${PIPELINE_NAME:-${PIPELINE_PREFIX}-$(hostname -s)}" | |||
export NO_SCREEN=1 | |||
sudo /usr/sbin/tcp-closer -4 --dport 443 --idle_time 21601000 --last_recv_limit 43200000 --interval 300 & | |||
sudo /usr/sbin/tcp-closer -6 --dport 443 --idle_time 21601000 --last_recv_limit 43200000 --interval 300 & | |||
run-pipeline3 pipeline.py --disable-web-server \ | |||
--concurrent $PIPELINE_CONCURRENT $PIPELINE_NAME | |||
;; | |||
"uploader") | |||
python ./uploader/uploader.py $FINISHED_WARCS_DIR | |||
;; | |||
"analyzer") | |||
export UPDATES_CHANNEL=updates | |||
cd plumbing | |||
./analyzer | |||
;; | |||
"trimmer") | |||
export UPDATES_CHANNEL=updates | |||
cd plumbing | |||
./trimmer > /dev/null | |||
;; | |||
esac | |||