|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- #!/usr/bin/env python
- import base64
- import copy
- import json
- import os
- import pathlib
- import shutil
- import time
- import urllib.parse
-
- import click
- import logging
- import requests
- import minio
-
- logging.basicConfig(level=logging.DEBUG)
-
-
- @click.group()
- def sender():
- pass
-
-
- def watch_pass(input_directory: pathlib.Path, work_directory: pathlib.Path, ia_collection: str, ia_item_title: str,
- ia_item_prefix: str, ia_item_date: str, project: str, dispatcher: str, delete: bool, backfeed_key: str):
- for original_directory in input_directory.iterdir():
- if original_directory.is_dir():
- original_name = original_directory.name
- new_directory = work_directory.joinpath(original_name)
- try:
- original_directory.rename(new_directory)
- except FileNotFoundError:
- logging.warning(f"Unable to move item {original_directory}")
- single(new_directory, ia_collection, ia_item_title, ia_item_prefix, ia_item_date, project, dispatcher,
- delete, backfeed_key)
- return True
- return False
-
-
- @sender.command()
- @click.option('--input-directory', envvar='UPLOAD_QUEUE_DIR', default="/data/upload-queue",
- type=click.Path(exists=True))
- @click.option('--work-directory', envvar='UPLOADER_WORKING_DIR', default="/data/uploader-work",
- type=click.Path(exists=True))
- @click.option('--ia-collection', envvar='IA_COLLECTION', required=True)
- @click.option('--ia-item-title', envvar='IA_ITEM_TITLE', required=True)
- @click.option('--ia-item-prefix', envvar='IA_ITEM_PREFIX', required=True)
- @click.option('--ia-item-date', envvar='IA_ITEM_DATE', required=True)
- @click.option('--project', envvar='PROJECT', required=True)
- @click.option('--dispatcher', envvar='DISPATCHER', required=True)
- @click.option('--delete/--no-delete', default=False)
- @click.option('--backfeed-key', envvar='BACKFEED_KEY', required=True)
- def watch(input_directory: pathlib.Path, work_directory: pathlib.Path, ia_collection: str, ia_item_title: str,
- ia_item_prefix: str, ia_item_date: str, project: str, dispatcher: str, delete: bool, backfeed_key: str):
- while True:
- if not watch_pass(input_directory, work_directory, ia_collection, ia_item_title, ia_item_prefix, ia_item_date,
- project, dispatcher, delete, backfeed_key):
- time.sleep(10)
-
-
- @sender.command()
- @click.option('--item-directory', type=click.Path(exists=True), required=True)
- @click.option('--ia-collection', envvar='IA_COLLECTION', required=True)
- @click.option('--ia-item-title', envvar='IA_ITEM_TITLE', required=True)
- @click.option('--ia-item-prefix', envvar='IA_ITEM_PREFIX', required=True)
- @click.option('--ia-item-date', envvar='IA_ITEM_DATE', required=True)
- @click.option('--project', envvar='PROJECT', required=True)
- @click.option('--dispatcher', envvar='DISPATCHER', required=True)
- @click.option('--delete/--no-delete', default=False)
- @click.option('--backfeed-key', envvar='BACKFEED_KEY', required=True)
- def single(item_directory: pathlib.Path, ia_collection: str, ia_item_title: str, ia_item_prefix: str, ia_item_date: str,
- project: str, dispatcher: str, delete: bool, backfeed_key: str):
- logging.info(f"Processing item {item_directory}...")
- meta_json_loc = item_directory.joinpath('__upload_meta.json')
- if meta_json_loc.exists():
- raise Exception("META JSON EXISTS WTF")
- meta_json = {
- "IA_COLLECTION": ia_collection,
- "IA_ITEM_TITLE": f"{ia_item_title} {item_directory.name}",
- "IA_ITEM_DATE": ia_item_date,
- "IA_ITEM_NAME": f"{ia_item_prefix}{item_directory.name}",
- "PROJECT": project,
- }
- with open(meta_json_loc, 'w') as f:
- f.write(json.dumps(meta_json))
- logging.info("Wrote metadata json.")
- total_size = 0
- files = item_directory.glob("**/*")
- for item in files:
- total_size = total_size + os.path.getsize(item)
- logging.info(f"Item size is {total_size} bytes.")
- while True:
- try:
- r = requests.get(f"{dispatcher}/offload_target", params=meta_json)
- if r.status_code == 200:
- data = r.json()
- url = data["url"]
- break
- else:
- raise Exception(f"Invalid status code {r.status_code}: {r.text}")
- except Exception as e:
- logging.warning(f"Unable to fetch target: {e}")
- logging.info(f"Assigned target {url}")
- parsed_url = urllib.parse.urlparse(url)
- bf_item = None
- if parsed_url.scheme == "minio+http" or parsed_url.scheme == "minio+https":
- secure = (parsed_url.scheme == "minio+https")
- ep = parsed_url.hostname
- if parsed_url.port is not None:
- ep = f"{ep}:{parsed_url.port}"
- client = minio.Minio(endpoint=ep, access_key=parsed_url.username, secret_key=parsed_url.password, secure=secure)
- for file in files:
- rel_file = file.relative_to(item_directory)
- while True:
- try:
- logging.info(f"Uploading file {rel_file}...")
- client.fput_object(bucket_name=item_directory.name, object_name=rel_file, file_path=file)
- break
- except Exception as e:
- logging.error(f"Failed to upload: {e}")
- time.sleep(30)
- new_url = copy.deepcopy(parsed_url)
- new_url.path = new_url.path.join(item_directory.name)
- new_url = urllib.parse.urlunparse(new_url)
- logging.info(f"Constructed backfeed url: {new_url}")
- new_url = base64.b64encode(new_url)
- bf_item = f"{project}:{parsed_url.hostname}:{new_url}"
- else:
- raise Exception("Unable to upload, don't understand url: {url}")
-
- if bf_item is None:
- raise Exception("Unable to create backfeed item")
-
- while True:
- resp = requests.post(f"https://legacy-api.arpa.li/backfeed/legacy/{backfeed_key}",
- params={"skipbloom": "1", "delimiter": "\n"}, data=str(bf_item).encode("UTF-8"))
- if resp.status_code == 200:
- break
- logging.warning(f"Failed to submit to backfeed {resp.status_code}: {resp.text}")
- time.sleep(30)
-
- if delete:
- shutil.rmtree(item_directory)
-
-
- if __name__ == '__main__':
- sender()
|