From a9aa24409f887feb4cf058e9a1dbb32990b08bcd Mon Sep 17 00:00:00 2001 From: tech234a <46801700+tech234a@users.noreply.github.com> Date: Wed, 23 Sep 2020 16:22:59 -0400 Subject: [PATCH] Fix exception on marking as complete --- tracker.py | 2 +- worker.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tracker.py b/tracker.py index 55ea317..90da312 100644 --- a/tracker.py +++ b/tracker.py @@ -9,7 +9,7 @@ from os.path import isfile from json import loads # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py -VERSION = "20200923.01" +VERSION = "20200923.02" TRACKER_ID = "ext-yt-communitycontribs" TRACKER_HOST = "trackerproxy.meo.ws" diff --git a/worker.py b/worker.py index bb1508d..a134c95 100644 --- a/worker.py +++ b/worker.py @@ -21,6 +21,7 @@ from discovery import getmetadata from export import subprrun batchcontent = [] +actualitems = [] HEROKU = False if isfile("../Procfile"): @@ -53,6 +54,7 @@ def batchfunc(): print("Ignoring item for now", desit) batchcontent.append(desit.split(":", 1)[1]) + actualitems.append(desit) def submitfunc(submitqueue): while not submitqueue.empty(): @@ -179,6 +181,7 @@ while not gkiller.kill_now: pass batchcontent.clear() + actualitems.clear() # Get a batch ID batchthreads = [] @@ -310,11 +313,12 @@ while not gkiller.kill_now: system("curl -F "+filzip+"=@directory/"+filzip+" "+targetloc) # Report the batch as complete - for itemb in batchcontent: - if isfile("directory/"+itemb.split(":", 1)[1]+".zip"): - size = getsize("directory/"+itemb.split(":", 1)[1]+".zip") - else: - size = 0 + for itemb in actualitems: + size = 0 + if ":" in itemb: + if itemb.split(":", 1)[0] == "video": + if isfile("directory/"+itemb.split(":", 1)[1]+".zip"): + size = getsize("directory/"+itemb.split(":", 1)[1]+".zip") tracker.mark_item_as_done(itemb, size) # clear the output directories