archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import requests
  2. from time import sleep
  3. from os import mkdir
  4. from json import dumps
  5. from shutil import make_archive, rmtree
  6. from discovery import getmetadata
  7. from export import getsubs
  8. WORKER_VERSION = 1
  9. SERVER_BASE_URL = "http://localhost:5000"
  10. # Get a worker ID
  11. while True:
  12. params = (
  13. ("worker_version", WORKER_VERSION),
  14. )
  15. idrequest = requests.get(SERVER_BASE_URL+"/worker/getID", params=params)
  16. if idrequest.status_code == 200:
  17. WORKER_ID = idrequest.text
  18. break
  19. else:
  20. print("Error in retrieving ID, will attempt again in 10 minutes")
  21. sleep(600)
  22. while True:
  23. try:
  24. mkdir("out")
  25. except:
  26. pass
  27. recvids = set()
  28. recchans = set()
  29. recmixes = set()
  30. recplayl = set()
  31. # Get a batch ID
  32. while True:
  33. params = (
  34. ("id", WORKER_ID),
  35. ("worker_version", WORKER_VERSION),
  36. )
  37. batchrequest = requests.get(SERVER_BASE_URL+"/worker/getBatch", params=params)
  38. if batchrequest.status_code == 200:
  39. batchinfo = batchrequest.json()
  40. break
  41. else:
  42. print("Error in retrieving batch assignment, will attempt again in 10 minutes")
  43. sleep(600)
  44. print("Received batch ID:", batchinfo["batchID"], "Content:", batchinfo["content"])
  45. # Process the batch
  46. batchcontent = requests.get(batchinfo["content"]).text.split("\n")
  47. for item in batchcontent:
  48. print("Video ID:", str(item).strip())
  49. while True:
  50. try:
  51. info = getmetadata(str(item).strip())
  52. break
  53. except BaseException as e:
  54. print(e)
  55. print("Error in retrieving information, waiting 10 minutes")
  56. sleep(600)
  57. # Add any discovered videos
  58. recvids.update(info[2])
  59. recchans.update(info[3])
  60. recmixes.update(info[4])
  61. recplayl.update(info[5])
  62. if info[0] or info[1]: # ccenabled or creditdata
  63. mkdir("out/"+str(item).strip())
  64. if info[1]: # creditdata
  65. open("out/"+str(item).strip()+"/"+str(item).strip()+"_published_credits.json", "w").write(dumps(info[1]))
  66. if info[0]: #ccenabled
  67. while True:
  68. gsres = False
  69. try:
  70. gsres = getsubs(str(item).strip())
  71. except BaseException as e:
  72. print(e)
  73. if gsres:
  74. break
  75. else:
  76. print("Error in retrieving subtitles, waiting 10 minutes")
  77. sleep(600)
  78. # TODO: put the data somewhere...
  79. # TODO: put the discoveries somewhere...
  80. open("out/discoveries.json", "w").write(dumps({"recvids": sorted(recvids), "recchans": sorted(recchans), "recmixes": sorted(recmixes), "recplayl": sorted(recplayl)}))
  81. make_archive("out.zip", "zip", "out") #check this
  82. while True:
  83. try:
  84. uploadr = requests.post("https://transfersh.com/"+str(batchinfo["batchID"])+".zip", data=open("out.zip"))
  85. if uploadr.status_code == 200:
  86. resulturl = uploadr.text
  87. break
  88. except BaseException as e:
  89. print(e)
  90. print("Encountered error in uploading results... retrying in 10 minutes")
  91. sleep(600)
  92. # Report the batch as complete (I can't think of a fail condition except for a worker exiting...)
  93. # TODO: handle worker exit
  94. while True:
  95. params = (
  96. ("id", WORKER_ID),
  97. ("worker_version", WORKER_VERSION),
  98. ("batchID", batchinfo["batchID"]),
  99. ("randomKey", batchinfo["randomKey"]),
  100. ("status", "c"),
  101. ("resulturl", resulturl),
  102. )
  103. statusrequest = requests.get(SERVER_BASE_URL+"/worker/updateStatus", params=params)
  104. if statusrequest.status_code == 200 and statusrequest.text == "Success":
  105. break
  106. else:
  107. print("Error in reporting success, will attempt again in 10 minutes")
  108. sleep(600)
  109. # TODO: clear the output directory
  110. rmtree("out")