Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 

215 linhas
9.1 KiB

  1. #!/usr/bin/env python
  2. import base64
  3. import copy
  4. import datetime
  5. import json
  6. import os
  7. import pathlib
  8. import shutil
  9. import time
  10. import urllib.parse
  11. from typing import Optional
  12. import click
  13. import logging
  14. import requests
  15. import minio
  16. from progress import Progress
  17. logging.basicConfig(level=logging.INFO)
  18. BACKFEED_DELIM = "\n"
  19. @click.group()
  20. def sender():
  21. pass
  22. def watch_pass(input_directory: pathlib.Path, work_directory: pathlib.Path, ia_collection: str, ia_item_title: str,
  23. ia_item_prefix: str, ia_item_date: str, project: str, dispatcher: str, delete: bool, backfeed_key: str):
  24. logging.info("Checking for new items...")
  25. for original_directory in input_directory.iterdir():
  26. if original_directory.is_dir():
  27. original_name = original_directory.name
  28. new_directory = work_directory.joinpath(original_name)
  29. try:
  30. try:
  31. original_directory.rename(new_directory)
  32. except FileNotFoundError:
  33. logging.warning(f"Unable to move item {original_directory}")
  34. single_impl(new_directory, ia_collection, ia_item_title, ia_item_prefix, ia_item_date, project, dispatcher,
  35. delete, backfeed_key)
  36. return True
  37. finally:
  38. # If we exit somehow without deleting, move it back. Likely ctrl+c.
  39. if new_directory.exists():
  40. if len(list(new_directory.iterdir())) > 0:
  41. logging.warning("Stopped upload but files remain, moving back to queue...")
  42. try:
  43. new_directory.rename(original_directory)
  44. except FileNotFoundError:
  45. logging.warning(f"Unable to move item {new_directory}")
  46. return False
  47. @sender.command()
  48. @click.option('--input-directory', envvar='UPLOAD_QUEUE_DIR', default="/data/upload-queue",
  49. type=click.Path(exists=True))
  50. @click.option('--work-directory', envvar='UPLOADER_WORKING_DIR', default="/data/uploader-work",
  51. type=click.Path(exists=True))
  52. @click.option('--ia-collection', envvar='IA_COLLECTION', required=True)
  53. @click.option('--ia-item-title', envvar='IA_ITEM_TITLE', required=True)
  54. @click.option('--ia-item-prefix', envvar='IA_ITEM_PREFIX', required=True)
  55. @click.option('--ia-item-date', envvar='IA_ITEM_DATE', required=False)
  56. @click.option('--project', envvar='PROJECT', required=True)
  57. @click.option('--dispatcher', envvar='DISPATCHER', required=True)
  58. @click.option('--delete/--no-delete', envvar='DELETE', default=False)
  59. @click.option('--backfeed-key', envvar='BACKFEED_KEY', required=True)
  60. def watch(input_directory: pathlib.Path, work_directory: pathlib.Path, ia_collection: str, ia_item_title: str,
  61. ia_item_prefix: str, ia_item_date: str, project: str, dispatcher: str, delete: bool, backfeed_key: str):
  62. if not isinstance(input_directory, pathlib.Path):
  63. input_directory = pathlib.Path(input_directory)
  64. if not isinstance(work_directory, pathlib.Path):
  65. work_directory = pathlib.Path(work_directory)
  66. while True:
  67. if not watch_pass(input_directory, work_directory, ia_collection, ia_item_title, ia_item_prefix, ia_item_date,
  68. project, dispatcher, delete, backfeed_key):
  69. logging.info("No item found, sleeping...")
  70. time.sleep(10)
  71. @sender.command()
  72. @click.option('--item-directory', type=click.Path(exists=True), required=True)
  73. @click.option('--ia-collection', envvar='IA_COLLECTION', required=True)
  74. @click.option('--ia-item-title', envvar='IA_ITEM_TITLE', required=True)
  75. @click.option('--ia-item-prefix', envvar='IA_ITEM_PREFIX', required=True)
  76. @click.option('--ia-item-date', envvar='IA_ITEM_DATE', required=False)
  77. @click.option('--project', envvar='PROJECT', required=True)
  78. @click.option('--dispatcher', envvar='DISPATCHER', required=True)
  79. @click.option('--delete/--no-delete', envvar='DELETE', default=False)
  80. @click.option('--backfeed-key', envvar='BACKFEED_KEY', required=True)
  81. def single(item_directory: pathlib.Path, ia_collection: str, ia_item_title: str, ia_item_prefix: str,
  82. ia_item_date: Optional[str], project: str, dispatcher: str, delete: bool, backfeed_key: str):
  83. single_impl(item_directory, ia_collection, ia_item_title, ia_item_prefix, ia_item_date, project, dispatcher, delete,
  84. backfeed_key)
  85. def single_impl(item_directory: pathlib.Path, ia_collection: str, ia_item_title: str, ia_item_prefix: str,
  86. ia_item_date: Optional[str], project: str, dispatcher: str, delete: bool, backfeed_key: str):
  87. if not isinstance(item_directory, pathlib.Path):
  88. item_directory = pathlib.Path(item_directory)
  89. logging.info(f"Processing item {item_directory}...")
  90. if ia_item_date is None:
  91. s = item_directory.name.split("_")
  92. if len(s) > 0:
  93. ds = s[0]
  94. try:
  95. d = datetime.datetime.strptime(ds, "%Y%m%d%H%M%S")
  96. ia_item_date = d.strftime("%Y-%m")
  97. except ValueError:
  98. pass
  99. meta_json_loc = item_directory.joinpath('__upload_meta.json')
  100. if meta_json_loc.exists():
  101. raise Exception("META JSON EXISTS WTF")
  102. meta_json = {
  103. "IA_COLLECTION": ia_collection,
  104. "IA_ITEM_TITLE": f"{ia_item_title} {item_directory.name}",
  105. "IA_ITEM_DATE": ia_item_date,
  106. "IA_ITEM_NAME": f"{ia_item_prefix}{item_directory.name}",
  107. "PROJECT": project,
  108. }
  109. with open(meta_json_loc, 'w') as f:
  110. f.write(json.dumps(meta_json))
  111. logging.info("Wrote metadata json.")
  112. total_size = 0
  113. files = list(item_directory.glob("**/*"))
  114. for item in files:
  115. total_size = total_size + os.path.getsize(item)
  116. logging.info(f"Item size is {total_size} bytes across {len(files)} files.")
  117. meta_json["SIZE_HINT"] = str(total_size)
  118. while True:
  119. try:
  120. r = requests.get(f"{dispatcher}/offload_target", params=meta_json)
  121. if r.status_code == 200:
  122. data = r.json()
  123. url = data["url"]
  124. break
  125. else:
  126. raise Exception(f"Invalid status code {r.status_code}: {r.text}")
  127. except Exception:
  128. logging.exception("Unable to fetch target")
  129. time.sleep(30)
  130. logging.info(f"Assigned target {url}")
  131. parsed_url = urllib.parse.urlparse(url)
  132. if parsed_url.scheme == "minio+http" or parsed_url.scheme == "minio+https":
  133. secure = (parsed_url.scheme == "minio+https")
  134. ep = parsed_url.hostname
  135. if parsed_url.port is not None:
  136. ep = f"{ep}:{parsed_url.port}"
  137. client = minio.Minio(endpoint=ep, access_key=parsed_url.username, secret_key=parsed_url.password, secure=secure)
  138. bucket_name = item_directory.name.replace("_", "-")
  139. logging.info("Making bucket...")
  140. while True:
  141. try:
  142. if client.bucket_exists(bucket_name=bucket_name):
  143. raise Exception("Bucket already exists!")
  144. client.make_bucket(bucket_name=bucket_name)
  145. break
  146. except Exception:
  147. logging.exception("Failed to make bucket")
  148. time.sleep(30)
  149. logging.info("Starting uploads...")
  150. for file in files:
  151. rel_file = file.relative_to(item_directory)
  152. while True:
  153. try:
  154. logging.info(f"Uploading file {rel_file}...")
  155. client.fput_object(bucket_name=bucket_name, object_name=str(rel_file), file_path=file,
  156. progress=Progress())
  157. break
  158. except minio.error.MinioException:
  159. logging.exception("Failed to upload")
  160. time.sleep(30)
  161. except Exception:
  162. logging.exception("Failed to upload")
  163. time.sleep(30)
  164. item_data = { "url": url, "item_name": item_directory.name, "bucket_name": bucket_name }
  165. bf_item_part = base64.urlsafe_b64encode(str(json.dumps(item_data)).encode("UTF-8")).decode("UTF-8")
  166. bf_item = f"{project}:{parsed_url.hostname}:{bf_item_part}"
  167. else:
  168. raise Exception("Unable to upload, don't understand url: {url}")
  169. if bf_item is None:
  170. raise Exception("Unable to create backfeed item")
  171. if backfeed_key == "SKIPBF":
  172. logging.warning(f"Skipping backfeed! Would have submitted: {bf_item}")
  173. else:
  174. while True:
  175. u = f"https://legacy-api.arpa.li/backfeed/legacy/{backfeed_key}"
  176. logging.info(f"Attempting to submit bf item {bf_item} to {u}...")
  177. resp = requests.post(u, params={"skipbloom": "1", "delimiter": BACKFEED_DELIM},
  178. data=f"{bf_item}{BACKFEED_DELIM}".encode("UTF-8"))
  179. if resp.status_code == 200:
  180. break
  181. logging.warning(f"Failed to submit to backfeed {resp.status_code}: {resp.text}")
  182. time.sleep(30)
  183. logging.info("Backfeed submit complete!")
  184. if delete:
  185. logging.info("Removing item...")
  186. shutil.rmtree(item_directory)
  187. logging.info("Upload complete!")
  188. if __name__ == '__main__':
  189. sender()