@@ -11,12 +11,14 @@ import tracker
from youtube_dl import YoutubeDL
from youtube_dl import YoutubeDL
from shutil import make_archive, rmtree
from shutil import rmtree
from queue import Queue
from queue import Queue
from gc import collect
from gc import collect
from datetime import timedelta, datetime
from discovery import getmetadata
from discovery import getmetadata
from export import subprrun
from export import subprrun
@@ -25,6 +27,8 @@ jobs = Queue()
langcnt = {}
langcnt = {}
lasttask = datetime.min()
try:
try:
mkdir("out")
mkdir("out")
except:
except:
@@ -91,16 +95,16 @@ class GracefulKiller:
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGINT, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
signal.signal(signal.SIGTERM, self.exit_gracefully)
def exit_gracefully(self,signum, frame):
def exit_gracefully(self, signum, frame):
print("Graceful exit process initiated, stopping all tasks...")
self.kill_now = True
self.kill_now = True
gkiller = GracefulKiller()
gkiller = GracefulKiller()
#TODO: zipping, completion of subtitles (return value), limit task retrieval count
#microtasks
#microtasks
def threadrunner(jobs: Queue):
def threadrunner(jobs: Queue):
global langcnt
global langcnt
global lasttask
ydl = YoutubeDL({"extract_flat": "in_playlist", "simulate": True, "skip_download": True, "quiet": True, "cookiefile": "cookies.txt", "source_address": "0.0.0.0", "call_home": False})
ydl = YoutubeDL({"extract_flat": "in_playlist", "simulate": True, "skip_download": True, "quiet": True, "cookiefile": "cookies.txt", "source_address": "0.0.0.0", "call_home": False})
while not gkiller.kill_now:
while not gkiller.kill_now:
if not jobs.empty():
if not jobs.empty():
@@ -140,10 +144,10 @@ def threadrunner(jobs: Queue):
jobs.put(("submitdiscovery", playldisc, tracker.ItemType.Playlist))
jobs.put(("submitdiscovery", playldisc, tracker.ItemType.Playlist))
elif task == "subtitles":
elif task == "subtitles":
subprrun(jobs, mysession, args, vid, "default")
langcnt[vid] += 1
if langcnt[vid] >= 19 5:
pass #complete(? )
retval = subprrun(jobs, mysession, args, vid, "default")
langcnt[vid] += retval
if langcnt[vid] >= 58 5:
jobs.put(("complete", None, "video:"+vid) )
elif task == "subtitles-forceedit-captions":
elif task == "subtitles-forceedit-captions":
subprrun(jobs, mysession, args, vid, "forceedit-captions")
subprrun(jobs, mysession, args, vid, "forceedit-captions")
elif task == "subtitles-forceedit-metadata":
elif task == "subtitles-forceedit-metadata":
@@ -163,84 +167,91 @@ def threadrunner(jobs: Queue):
if ":" in args:
if ":" in args:
if args.split(":", 1)[0] == "video":
if args.split(":", 1)[0] == "video":
#check if dir is empty, make zip if needed
#check if dir is empty, make zip if needed
if isfile("directory/"+args.split(":", 1)[1]+".zip"):
size = getsize("directory/"+args.split(":", 1)[1]+".zip")
#get a target
targetloc = None
while not targetloc:
targetloc = tracker.request_upload_target()
if targetloc:
break
else:
print("Waiting 5 minutes...")
sleep(300)
if targetloc.startswith("rsync"):
system("rsync -rltv --timeout=300 --contimeout=300 --progress --bwlimit 0 --recursive --partial --partial-dir .rsync-tmp --min-size 1 --no-compress --compress-level 0 --files-from=- directory/ "+targetloc)
elif targetloc.startswith("http"):
for filzip in listdir("directory"):
if filzip.endswith(".zip"):
system("curl -F "+filzip+"=@directory/"+filzip+" "+targetloc)
tracker.mark_item_as_done(args, size)
if isdir("out/"+args.split(":", 1)[1]):
if not listdir("out/"+args.split(":", 1)[1]):
rmdir("out/"+args.split(":", 1)[1])
else:
#zip it up
if not isdir("directory/"+args.split(":", 1)[1]):
mkdir("directory/"+args.split(":", 1)[1])
while not isfile("directory/"+args.split(":", 1)[1]+"/"+args.split(":", 1)[1]+".zip"):
print("Attempting to zip item...")
system("zip -r directory/"+args.split(":", 1)[1]+"/"+args.split(":", 1)[1]+".zip out/"+args.split(":", 1)[1])
#get a target
targetloc = None
while not targetloc:
targetloc = tracker.request_upload_target()
if targetloc:
break
else:
print("Waiting 5 minutes...")
sleep(300)
if targetloc.startswith("rsync"):
system("rsync -rltv --timeout=300 --contimeout=300 --progress --bwlimit 0 --recursive --partial --partial-dir .rsync-tmp --min-size 1 --no-compress --compress-level 0 --files-from=- directory/"+args.split(":", 1)[1]+"/ "+targetloc)
elif targetloc.startswith("http"):
system("curl -F "+args.split(":", 1)[1]+".zip=@directory/"+args.split(":", 1)[1]+"/"+args.split(":", 1)[1]+".zip "+targetloc)
size = getsize("directory/"+args.split(":", 1)[1]+"/"+args.split(":", 1)[1]+".zip")
#cleanup
try:
del langcnt[args.split(":", 1)[1]]
rmtree("directory/"+args.split(":", 1)[1]+"/")
rmdir("directory/"+args.split(":", 1)[1]+"/")
rmtree("out/"+args.split(":", 1)[1]+"/")
rmdir("out/"+args.split(":", 1)[1]+"/")
except:
pass
tracker.mark_item_as_done(args, size)
jobs.task_done()
jobs.task_done()
else:
else:
# get a new task from tracker
# get a new task from tracker
collect() #cleanup
desit = tracker.request_item_from_tracker()
if desit:
if desit.split(":", 1)[0] == "video":
jobs.put(("discovery", desit.split(":", 1)[1], None))
elif desit.split(":", 1)[0] == "channel":
jobs.put(("channel", None, desit.split(":", 1)[1]))
elif desit.split(":", 1)[0] == "playlist":
jobs.put(("playlist", None, desit.split(":", 1)[1]))
if datetime.now() - lasttask > timedelta(seconds=15): #only retrieve a task every 15 seconds to allow queue to build up
collect() #cleanup
desit = tracker.request_item_from_tracker()
print("New task:", desit)
if desit:
if desit.split(":", 1)[0] == "video":
lasttask = datetime.now()
jobs.put(("discovery", desit.split(":", 1)[1], None))
elif desit.split(":", 1)[0] == "channel":
lasttask = datetime.now()
jobs.put(("channel", None, desit.split(":", 1)[1]))
elif desit.split(":", 1)[0] == "playlist":
lasttask = datetime.now()
jobs.put(("playlist", None, desit.split(":", 1)[1]))
else:
print("Ignoring item for now", desit)
else:
else:
print("Ignoring item for now", desit)
print("Ignoring item for now", desit)
else:
else:
print("Ignoring item for now", desit)
def prrun():
print("Video ID:", str(item).strip())
while not gkiller.kill_now:
sleep(1)
threads = []
for i in range(50):
runthread = Thread(target=prrun)
runthread.start()
threads.append(runthread)
del runthread
for x in threads:
x.join()
threads.remove(x)
del x
for fol in listdir("out"): #remove empty folders
try:
if isdir("out/"+fol):
rmdir("out/"+fol)
except:
pass
#https://stackoverflow.com/a/11968881
for fol in listdir("out"):
if isdir("out/"+fol):
make_archive("directory/"+fol, "zip", "out/"+fol)
threads = []
#start with 1 thread, give it a 5 second head start
runthread = Thread(target=threadrunner, args=(jobs,))
runthread.start()
threads.append(runthread)
del runthread
sleep(5)
#now create the other 49 threads
for i in range(49):
runthread = Thread(target=threadrunner, args=(jobs,))
runthread.start()
threads.append(runthread)
del runthread
#https://stackoverflow.com/a/11968881
for x in threads:
x.join()
threads.remove(x)
del x
# clear the output directories
rmtree("out")
rmtree("directory")
print("Exiting...")