소스 검색

Try to improve channel and playlist collection

microtasks
tech234a 3 년 전
부모
커밋
0087135911
1개의 변경된 파일35개의 추가작업 그리고 43개의 파일을 삭제
  1. +35
    -43
      worker.py

+ 35
- 43
worker.py 파일 보기

@@ -25,8 +25,26 @@ from export import subprrun
batchcontent = [] batchcontent = []


def batchfunc(): def batchfunc():
while len(batchcontent) < 500:
batchcontent.append(tracker.request_item_from_tracker())
ydl = YoutubeDL({"extract_flat": "in_playlist", "simulate": True, "skip_download": True, "quiet": True, "cookiefile": "cookies.txt", "source_address": "0.0.0.0", "call_home": False})
while len(jobs) < 501:
desit = tracker.request_item_from_tracker()
if desit:
if desit.split(":", 1)[0] == "video":
jobs.put(desit.split(":", 1)[1])
elif desit.split(":", 1)[0] == "channel":
y = ydl.extract_info("https://www.youtube.com/channel/"+desit.split(":", 1)[1], download=False)
for itemyv in y["entries"]:
tracker.add_item_to_tracker(tracker.ItemType.Video, itemyv["id"])
elif desit.split(":", 1)[0] == "playlist":
y = ydl.extract_info("https://www.youtube.com/playlist?list="+desit.split(":", 1)[1], download=False)
for itemyvp in y["entries"]:
tracker.add_item_to_tracker(tracker.ItemType.Video, itemyvp["id"])
else:
print("Ignoring item for now", desit)
else:
print("Ignoring item for now", desit)
batchcontent.append(desit.split(":", 1)[1])


def submitfunc(submitqueue): def submitfunc(submitqueue):
while not submitqueue.empty(): while not submitqueue.empty():
@@ -113,33 +131,6 @@ def prrun():
#raise #raise
sleep(30) sleep(30)


ydl = YoutubeDL({"extract_flat": "in_playlist", "simulate": True, "skip_download": True, "quiet": True, "cookiefile": "cookies.txt", "source_address": "0.0.0.0", "call_home": False})
for chaninfo in set(info[3]):
if chaninfo not in recchans:
while True:
try:
y = ydl.extract_info("https://www.youtube.com/channel/"+chaninfo, download=False)
recchans.add(chaninfo)
break
except:
sleep(30)
sleep(5) #prevent error 429
for itemyv in y["entries"]:
recvids.add(itemyv["id"])

for playlinfo in set(info[5]):
if playlinfo not in recplayl:
while True:
try:
y = ydl.extract_info("https://www.youtube.com/playlist?list="+playlinfo, download=False)
recplayl.add(playlinfo)
break
except:
sleep(30)
sleep(5) #prevent error 429
for itemyvp in y["entries"]:
recvids.add(itemyvp["id"])

# Add any discovered videos # Add any discovered videos
recvids.update(info[2]) recvids.update(info[2])
recchans.update(info[3]) recchans.update(info[3])
@@ -188,14 +179,7 @@ while not gkiller.kill_now:
#for ir in range(501): #for ir in range(501):
# batchcontent.append(tracker.request_item_from_tracker()) # batchcontent.append(tracker.request_item_from_tracker())


for desit in batchcontent:
if desit:
if desit.split(":", 1)[0] == "video":
jobs.put(desit.split(":", 1)[1])
else:
print("Ignoring item for now", desit)
else:
print("Ignoring item for now", desit)


threads = [] threads = []


@@ -213,22 +197,30 @@ while not gkiller.kill_now:
print("Sending discoveries to tracker...") print("Sending discoveries to tracker...")


submitjobs = Queue() submitjobs = Queue()
#don't send channels and playlists as those have already been converted for video IDs
#IDK how to handle mixes so send them for now
print(len(recvids))
#IDK how to handle mixes so just send them for now
print("Videos:", len(recvids))
for itemvid in recvids: for itemvid in recvids:
submitjobs.put((tracker.ItemType.Video, itemvid)) submitjobs.put((tracker.ItemType.Video, itemvid))


print(len(recmixes))
print("Channels:", len(recchans))
for itemchan in recchans:
submitjobs.put((tracker.ItemType.Channel, itemchan))

print("Mix Playlists:", len(recmixes))
for itemmix in recmixes: for itemmix in recmixes:
submitjobs.put((tracker.ItemType.MixPlaylist, itemmix)) submitjobs.put((tracker.ItemType.MixPlaylist, itemmix))


print("Playlists:", len(recplayl))
for itemplayl in recplayl:
submitjobs.put((tracker.ItemType.Playlist, itemplayl))

#open("out/discoveries.json", "w").write(dumps({"recvids": sorted(recvids), "recchans": sorted(recchans), "recmixes": sorted(recmixes), "recplayl": sorted(recplayl)})) #open("out/discoveries.json", "w").write(dumps({"recvids": sorted(recvids), "recchans": sorted(recchans), "recmixes": sorted(recmixes), "recplayl": sorted(recplayl)}))
#clear #clear
recvids.clear() recvids.clear()
# recchans.clear()
recchans.clear()
recmixes.clear() recmixes.clear()
# recplayl.clear()
recplayl.clear()


submitthreads = [] submitthreads = []




불러오는 중...
취소
저장