|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- #!/bin/bash
- set -exuo pipefail
-
- # https://stackoverflow.com/a/2173421
- trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT
-
- if [ -z ${COUCHDB_URL+x} ]; then
- echo "Skipping couchdb init"
- else
- timeout 300 bash -c "while [[ \"\$(curl -s -o /dev/null -w ''%{http_code}'' ${COUCHDB_URL})\" != \"200\" ]]; do sleep 5; done" || false
-
- if [ -z ${COUCHDB_USER+x} ]; then
- export COUCHDB_CURL_ARGS=""
- else
- export COUCHDB_CURL_ARGS="-u \"$COUCHDB_USER:$COUCHDB_PASSWORD\""
- fi
-
- pushd "db/design_docs"
- curl -s $COUCHDB_CURL_ARGS -X PUT "$COUCHDB_URL/_users"
- curl -s $COUCHDB_CURL_ARGS -X PUT "$COUCHDB_URL/archivebot"
- curl -s $COUCHDB_CURL_ARGS -X PUT "$COUCHDB_URL/archivebot_logs"
- grep -v _rev archive_urls.json > /tmp/archive_urls.json
- grep -v _rev ignore_patterns.json > /tmp/ignore_patterns.json
- grep -v _rev jobs.json > /tmp/jobs.json
- grep -v _rev user_agents.json > /tmp/user_agents.json
- curl -s $COUCHDB_CURL_ARGS -X PUT "$COUCHDB_URL/archivebot/_design/archive_urls" -d @/tmp/archive_urls.json
- curl -s $COUCHDB_CURL_ARGS -X PUT "$COUCHDB_URL/archivebot/_design/ignore_patterns" -d @/tmp/ignore_patterns.json
- curl -s $COUCHDB_CURL_ARGS -X PUT "$COUCHDB_URL/archivebot/_design/jobs" -d @/tmp/jobs.json
- curl -s $COUCHDB_CURL_ARGS -X PUT "$COUCHDB_URL/archivebot/_design/user_agents" -d @/tmp/user_agents.json
- popd
- fi
-
- export SHARED_WARCS_DIR="${SHARED_WARCS_DIR:-/data/}"
- mkdir -pv "$SHARED_WARCS_DIR/upload-queue/"
-
- case "$1" in
- "bot")
- cd bot
- if [ -z ${COUCHDB_USER+x} ]; then
- export COUCHDB_ARGS=""
- else
- export COUCHDB_ARGS="--db-credentials \"$COUCHDB_USER:$COUCHDB_PASSWORD\""
- fi
- bundle exec ruby bot.rb \
- -s "$IRC_URL" \
- -r "$REDIS_URL" \
- -c "$IRC_CHANNEL" \
- -n "$IRC_NICK" \
- --db "$COUCHDB_URL/archivebot" $COUCHDB_ARGS
- ;;
- "cogs")
- if [ -z ${COUCHDB_USER+x} ]; then
- export COUCHDB_ARGS=""
- else
- export COUCHDB_ARGS="--db-credentials \"$COUCHDB_USER:$COUCHDB_PASSWORD\" --log-db-credentials \"$COUCHDB_USER:$COUCHDB_PASSWORD\""
- fi
- bundle exec ruby cogs/start.rb \
- -r "$REDIS_URL" \
- --db "$COUCHDB_URL/archivebot" \
- --log-db "$COUCHDB_URL/archivebot_logs" $COUCHDB_ARGS
- ;;
- "firehose")
- export UPDATES_CHANNEL=updates
- export FIREHOSE_SOCKET_URL=tcp://0.0.0.0:12345
- plumbing/updates-listener | plumbing/log-firehose
- ;;
- "dashboard")
- bundle exec ruby dashboard/app.rb -u http://0.0.0.0:8080 -r "$REDIS_URL"
- ;;
- "websocket")
- plumbing/firehose-client | python3 dashboard/websocket.py
- ;;
- "pipeline")
- cd pipeline
- export PIPELINE_NAME="${PIPELINE_NAME:-${PIPELINE_PREFIX}-$(hostname -s)}"
- export PIPELINE_NAME="${PIPELINE_NAME:0:30}"
- export NO_SCREEN=1
- export STAGING_WARCS_DIR="$(mktemp -p "$SHARED_WARCS_DIR" -d staging-XXXXXXXXXX)"
- export FINISHED_WARCS_DIR="/local-staging/"
- sudo /usr/sbin/tcp-closer -4 --dport 443 --idle_time 21601000 --last_recv_limit 43200000 --interval 300 &
- sudo /usr/sbin/tcp-closer -6 --dport 443 --idle_time 21601000 --last_recv_limit 43200000 --interval 300 &
- /stager.sh "$FINISHED_WARCS_DIR" "$STAGING_WARCS_DIR" "$SHARED_WARCS_DIR/upload-queue/" &
- run-pipeline3 pipeline.py --disable-web-server \
- --concurrent $PIPELINE_CONCURRENT $PIPELINE_NAME
- /wait-empty.sh "$FINISHED_WARCS_DIR"
- /wait-empty.sh "$STAGING_WARCS_DIR"
- rmdir "$STAGING_WARCS_DIR"
- ;;
- "uploader")
- python ./uploader/uploader.py "$SHARED_WARCS_DIR/upload-queue/"
- ;;
- "analyzer")
- export UPDATES_CHANNEL=updates
- cd plumbing
- ./analyzer
- ;;
- "trimmer")
- export UPDATES_CHANNEL=updates
- cd plumbing
- ./trimmer > /dev/null
- ;;
- esac
-
|