@@ -0,0 +1,320 @@ | |||
# Created by https://www.toptal.com/developers/gitignore/api/intellij+all,pycharm+all,python | |||
# Edit at https://www.toptal.com/developers/gitignore?templates=intellij+all,pycharm+all,python | |||
### Intellij+all ### | |||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider | |||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | |||
# User-specific stuff | |||
.idea/**/workspace.xml | |||
.idea/**/tasks.xml | |||
.idea/**/usage.statistics.xml | |||
.idea/**/dictionaries | |||
.idea/**/shelf | |||
# AWS User-specific | |||
.idea/**/aws.xml | |||
# Generated files | |||
.idea/**/contentModel.xml | |||
# Sensitive or high-churn files | |||
.idea/**/dataSources/ | |||
.idea/**/dataSources.ids | |||
.idea/**/dataSources.local.xml | |||
.idea/**/sqlDataSources.xml | |||
.idea/**/dynamic.xml | |||
.idea/**/uiDesigner.xml | |||
.idea/**/dbnavigator.xml | |||
# Gradle | |||
.idea/**/gradle.xml | |||
.idea/**/libraries | |||
# Gradle and Maven with auto-import | |||
# When using Gradle or Maven with auto-import, you should exclude module files, | |||
# since they will be recreated, and may cause churn. Uncomment if using | |||
# auto-import. | |||
# .idea/artifacts | |||
# .idea/compiler.xml | |||
# .idea/jarRepositories.xml | |||
# .idea/modules.xml | |||
# .idea/*.iml | |||
# .idea/modules | |||
# *.iml | |||
# *.ipr | |||
# CMake | |||
cmake-build-*/ | |||
# Mongo Explorer plugin | |||
.idea/**/mongoSettings.xml | |||
# File-based project format | |||
*.iws | |||
# IntelliJ | |||
out/ | |||
# mpeltonen/sbt-idea plugin | |||
.idea_modules/ | |||
# JIRA plugin | |||
atlassian-ide-plugin.xml | |||
# Cursive Clojure plugin | |||
.idea/replstate.xml | |||
# SonarLint plugin | |||
.idea/sonarlint/ | |||
# Crashlytics plugin (for Android Studio and IntelliJ) | |||
com_crashlytics_export_strings.xml | |||
crashlytics.properties | |||
crashlytics-build.properties | |||
fabric.properties | |||
# Editor-based Rest Client | |||
.idea/httpRequests | |||
# Android studio 3.1+ serialized cache file | |||
.idea/caches/build_file_checksums.ser | |||
### Intellij+all Patch ### | |||
# Ignore everything but code style settings and run configurations | |||
# that are supposed to be shared within teams. | |||
.idea/* | |||
!.idea/codeStyles | |||
!.idea/runConfigurations | |||
### PyCharm+all ### | |||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider | |||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | |||
# User-specific stuff | |||
# AWS User-specific | |||
# Generated files | |||
# Sensitive or high-churn files | |||
# Gradle | |||
# Gradle and Maven with auto-import | |||
# When using Gradle or Maven with auto-import, you should exclude module files, | |||
# since they will be recreated, and may cause churn. Uncomment if using | |||
# auto-import. | |||
# .idea/artifacts | |||
# .idea/compiler.xml | |||
# .idea/jarRepositories.xml | |||
# .idea/modules.xml | |||
# .idea/*.iml | |||
# .idea/modules | |||
# *.iml | |||
# *.ipr | |||
# CMake | |||
# Mongo Explorer plugin | |||
# File-based project format | |||
# IntelliJ | |||
# mpeltonen/sbt-idea plugin | |||
# JIRA plugin | |||
# Cursive Clojure plugin | |||
# SonarLint plugin | |||
# Crashlytics plugin (for Android Studio and IntelliJ) | |||
# Editor-based Rest Client | |||
# Android studio 3.1+ serialized cache file | |||
### PyCharm+all Patch ### | |||
# Ignore everything but code style settings and run configurations | |||
# that are supposed to be shared within teams. | |||
### Python ### | |||
# Byte-compiled / optimized / DLL files | |||
__pycache__/ | |||
*.py[cod] | |||
*$py.class | |||
# C extensions | |||
*.so | |||
# Distribution / packaging | |||
.Python | |||
build/ | |||
develop-eggs/ | |||
dist/ | |||
downloads/ | |||
eggs/ | |||
.eggs/ | |||
lib/ | |||
lib64/ | |||
parts/ | |||
sdist/ | |||
var/ | |||
wheels/ | |||
share/python-wheels/ | |||
*.egg-info/ | |||
.installed.cfg | |||
*.egg | |||
MANIFEST | |||
# PyInstaller | |||
# Usually these files are written by a python script from a template | |||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | |||
*.manifest | |||
*.spec | |||
# Installer logs | |||
pip-log.txt | |||
pip-delete-this-directory.txt | |||
# Unit test / coverage reports | |||
htmlcov/ | |||
.tox/ | |||
.nox/ | |||
.coverage | |||
.coverage.* | |||
.cache | |||
nosetests.xml | |||
coverage.xml | |||
*.cover | |||
*.py,cover | |||
.hypothesis/ | |||
.pytest_cache/ | |||
cover/ | |||
# Translations | |||
*.mo | |||
*.pot | |||
# Django stuff: | |||
*.log | |||
local_settings.py | |||
db.sqlite3 | |||
db.sqlite3-journal | |||
# Flask stuff: | |||
instance/ | |||
.webassets-cache | |||
# Scrapy stuff: | |||
.scrapy | |||
# Sphinx documentation | |||
docs/_build/ | |||
# PyBuilder | |||
.pybuilder/ | |||
target/ | |||
# Jupyter Notebook | |||
.ipynb_checkpoints | |||
# IPython | |||
profile_default/ | |||
ipython_config.py | |||
# pyenv | |||
# For a library or package, you might want to ignore these files since the code is | |||
# intended to run in multiple environments; otherwise, check them in: | |||
# .python-version | |||
# pipenv | |||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | |||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | |||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | |||
# install all needed dependencies. | |||
#Pipfile.lock | |||
# poetry | |||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. | |||
# This is especially recommended for binary packages to ensure reproducibility, and is more | |||
# commonly ignored for libraries. | |||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control | |||
#poetry.lock | |||
# pdm | |||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. | |||
#pdm.lock | |||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it | |||
# in version control. | |||
# https://pdm.fming.dev/#use-with-ide | |||
.pdm.toml | |||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm | |||
__pypackages__/ | |||
# Celery stuff | |||
celerybeat-schedule | |||
celerybeat.pid | |||
# SageMath parsed files | |||
*.sage.py | |||
# Environments | |||
.env | |||
.venv | |||
env/ | |||
venv/ | |||
ENV/ | |||
env.bak/ | |||
venv.bak/ | |||
# Spyder project settings | |||
.spyderproject | |||
.spyproject | |||
# Rope project settings | |||
.ropeproject | |||
# mkdocs documentation | |||
/site | |||
# mypy | |||
.mypy_cache/ | |||
.dmypy.json | |||
dmypy.json | |||
# Pyre type checker | |||
.pyre/ | |||
# pytype static type analyzer | |||
.pytype/ | |||
# Cython debug symbols | |||
cython_debug/ | |||
# PyCharm | |||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can | |||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore | |||
# and can be added to the global gitignore or merged into this file. For a more nuclear | |||
# option (not recommended) you can uncomment the following to ignore the entire idea folder. | |||
#.idea/ | |||
### Python Patch ### | |||
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration | |||
poetry.toml | |||
# ruff | |||
.ruff_cache/ | |||
# LSP config files | |||
pyrightconfig.json | |||
# End of https://www.toptal.com/developers/gitignore/api/intellij+all,pycharm+all,python | |||
/venv/ |
@@ -0,0 +1,107 @@ | |||
import json | |||
import os | |||
import pathlib | |||
import shutil | |||
import time | |||
import urllib.parse | |||
import click | |||
import logging | |||
import requests | |||
import minio | |||
logging.basicConfig(level=logging.DEBUG) | |||
@click.group() | |||
def sender(): | |||
pass | |||
def watch_pass(input_directory: pathlib.Path, work_directory: pathlib.Path, ia_collection: str, ia_item_title: str, ia_item_prefix: str, ia_item_date: str, project: str, dispatcher: str): | |||
for original_directory in input_directory.iterdir(): | |||
if original_directory.is_dir(): | |||
original_name = original_directory.name | |||
new_directory = work_directory.joinpath(original_name) | |||
try: | |||
shutil.move(original_directory, new_directory) | |||
except FileNotFoundError: | |||
logging.warning(f"Unable to move item {original_directory}") | |||
single(new_directory, ia_collection, ia_item_title, ia_item_prefix, ia_item_date, project, dispatcher) | |||
return True | |||
return False | |||
@sender.command() | |||
@click.option('--input-directory', envvar='UPLOAD_QUEUE_DIR', default="/data/upload-queue", type=click.Path(exists=True)) | |||
@click.option('--work-directory', envvar='UPLOADER_WORKING_DIR', default="/data/uploader-work", type=click.Path(exists=True)) | |||
@click.option('--ia-collection', envvar='IA_COLLECTION') | |||
@click.option('--ia-item-title', envvar='IA_ITEM_TITLE') | |||
@click.option('--ia-item-prefix', envvar='IA_ITEM_PREFIX') | |||
@click.option('--ia-item-date', envvar='IA_ITEM_DATE') | |||
@click.option('--project', envvar='PROJECT') | |||
@click.option('--dispatcher', envvar='DISPATCHER') | |||
def watch(input_directory: pathlib.Path, work_directory: pathlib.Path, ia_collection: str, ia_item_title: str, ia_item_prefix: str, ia_item_date: str, project: str, dispatcher: str): | |||
while True: | |||
if not watch_pass(input_directory, work_directory, ia_collection, ia_item_title, ia_item_prefix, ia_item_date, project, dispatcher): | |||
time.sleep(10) | |||
@sender.command() | |||
@click.option('--item-directory', type=click.Path(exists=True)) | |||
@click.option('--ia-collection', envvar='IA_COLLECTION') | |||
@click.option('--ia-item-title', envvar='IA_ITEM_TITLE') | |||
@click.option('--ia-item-prefix', envvar='IA_ITEM_PREFIX') | |||
@click.option('--ia-item-date', envvar='IA_ITEM_DATE') | |||
@click.option('--project', envvar='PROJECT') | |||
@click.option('--dispatcher', envvar='DISPATCHER') | |||
def single(item_directory: pathlib.Path, ia_collection: str, ia_item_title: str, ia_item_prefix: str, ia_item_date: str, project: str, dispatcher: str): | |||
logging.info(f"Processing item {item_directory}...") | |||
meta_json_loc = item_directory.joinpath('__upload_meta.json') | |||
if meta_json_loc.exists(): | |||
raise Exception("META JSON EXISTS WTF") | |||
meta_json = { | |||
"IA_COLLECTION": ia_collection, | |||
"IA_ITEM_TITLE": f"{ia_item_title} {item_directory.name}", | |||
"IA_ITEM_DATE": ia_item_date, | |||
"IA_ITEM_NAME": f"{ia_item_prefix}{item_directory.name}", | |||
"PROJECT": project, | |||
} | |||
with open(meta_json_loc, 'w') as f: | |||
f.write(json.dumps(meta_json)) | |||
logging.info("Wrote metadata json.") | |||
total_size = 0 | |||
files = item_directory.glob("**/*") | |||
for item in files: | |||
total_size = total_size + os.path.getsize(item) | |||
logging.info(f"Item size is {total_size} bytes.") | |||
url = None | |||
while True: | |||
try: | |||
r = requests.get(f"{dispatcher}/offload_target", params=meta_json) | |||
if r.status_code == 200: | |||
data = r.json() | |||
url = data["url"] | |||
break | |||
else: | |||
raise Exception(f"Invalid status code {r.status_code}: {r.text}") | |||
except Exception as e: | |||
logging.warning(f"Unable to fetch target: {e}") | |||
logging.info(f"Assigned target {url}") | |||
parsed_url = urllib.parse.urlparse(url) | |||
if parsed_url.scheme == "minio+http" or parsed_url.scheme == "minio+https": | |||
secure = (parsed_url.scheme == "minio+https") | |||
ep = parsed_url.hostname | |||
if parsed_url.port is not None: | |||
ep = f"{ep}:{parsed_url.port}" | |||
client = minio.Minio(endpoint=ep, access_key=parsed_url.username, secret_key=parsed_url.password, secure=secure) | |||
for file in files: | |||
rel_file = file.relative_to(item_directory) | |||
logging.info(f"Uploading file {rel_file}...") | |||
client.fput_object(bucket_name=item_directory.name, object_name=rel_file, file_path=file) | |||
else: | |||
raise Exception("Unable to upload, don't understand url: {url}") | |||
if __name__ == '__main__': | |||
sender() |
@@ -0,0 +1,3 @@ | |||
click | |||
requests | |||
minio |