Pārlūkot izejas kodu

Reduce exceptions, limit threads

pull/3/head
tech234a pirms 3 gadiem
vecāks
revīzija
0a29b95e6e
2 mainītis faili ar 49 papildinājumiem un 23 dzēšanām
  1. +35
    -14
      discovery.py
  2. +14
    -9
      worker.py

+ 35
- 14
discovery.py Parādīt failu

@@ -1,3 +1,4 @@
from typing import Dict
import requests import requests
from json import loads from json import loads
@@ -24,6 +25,9 @@ def getmetadata(vid):
recmixes = set() recmixes = set()
recplayl = set() recplayl = set()
ccenabled = False #default values
creditdata = {}
for line in wptext.splitlines(): for line in wptext.splitlines():
if line.strip().startswith('window["ytInitialPlayerResponse"] = '): if line.strip().startswith('window["ytInitialPlayerResponse"] = '):
initplay = loads(line.split('window["ytInitialPlayerResponse"] = ', 1)[1].strip()[:-1]) initplay = loads(line.split('window["ytInitialPlayerResponse"] = ', 1)[1].strip()[:-1])
@@ -33,26 +37,35 @@ def getmetadata(vid):
return False, {}, recvids, recchans, recmixes, recplayl return False, {}, recvids, recchans, recmixes, recplayl
if "endscreen" in initplay.keys(): if "endscreen" in initplay.keys():
for el in initplay["endscreen"]["endscreenRenderer"]:
if "endscreenRenderer" in initplay["endscreen"].keys():
for el in initplay["endscreen"]["endscreenRenderer"]:
elint = el["endscreenElementRenderer"]
if type(el) == Dict:
elint = el["endscreenElementRenderer"]
if elint["style"] == "VIDEO":
recvids.add(elint["endpoint"]["watchEndpoint"]["videoId"])
if "endscreenElementRenderer" in el.keys():
if elint["style"] == "VIDEO":
recvids.add(elint["endpoint"]["watchEndpoint"]["videoId"])
elif elint["style"] == "CHANNEL":
recchans.add(elint["endpoint"]["browseEndpoint"]["browseId"])
elif elint["style"] == "CHANNEL":
try:
recchans.add(elint["endpoint"]["browseEndpoint"]["browseId"])
except:
print("Channel endscreen error")
raise
elif elint["style"] == "PLAYLIST":
recvids.add(elint["endpoint"]["watchEndpoint"]["videoId"])
recplayl.add(elint["endpoint"]["watchEndpint"]["playlistId"])
elif elint["style"] == "PLAYLIST":
recvids.add(elint["endpoint"]["watchEndpoint"]["videoId"])
recplayl.add(elint["endpoint"]["watchEndpint"]["playlistId"])
if "captions" in initplay.keys(): if "captions" in initplay.keys():
ccenabled = "contribute" in initplay["captions"]["playerCaptionsRenderer"] ccenabled = "contribute" in initplay["captions"]["playerCaptionsRenderer"]
else: else:
ccenabled = False # if captions information is not present, community contributions are not enabled ccenabled = False # if captions information is not present, community contributions are not enabled
recchans.add(initplay["videoDetails"]["channelId"])
if "videoDetails" in initplay.keys():
if "channelId" in initplay["videoDetails"].keys():
recchans.add(initplay["videoDetails"]["channelId"])
elif line.strip().startswith('window["ytInitialData"] = '): elif line.strip().startswith('window["ytInitialData"] = '):
initdata = loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1]) initdata = loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
if "contents" in initdata.keys(): #prevent exception if "contents" in initdata.keys(): #prevent exception
@@ -66,14 +79,20 @@ def getmetadata(vid):
try: try:
recchans.add(recmd["compactVideoRenderer"]["channelId"]) recchans.add(recmd["compactVideoRenderer"]["channelId"])
except KeyError as e: except KeyError as e:
print("Channel extract error")
try:
recchans.add(recmd["compactVideoRenderer"]["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
except KeyError as e:
print("Channel extract error")
#raise
#print("Unable to extract channel:") #print("Unable to extract channel:")
#print(recmd["compactVideoRenderer"]) #print(recmd["compactVideoRenderer"])
elif "compactPlaylistRenderer" in recmd.keys(): elif "compactPlaylistRenderer" in recmd.keys():
recplayl.add(recmd["compactPlaylistRenderer"]["playlistId"]) recplayl.add(recmd["compactPlaylistRenderer"]["playlistId"])
recvids.add(recmd["compactPlaylistRenderer"]["navigationEndpoint"]["watchEndpoint"]["videoId"])
recchans.add(recmd["compactPlaylistRenderer"]["shortBylineText"]["navigationEndpoint"]["browseEndpoint"]["browseId"])
if "navigationEndpoint" in recmd["compactPlaylistRenderer"].keys():
recvids.add(recmd["compactPlaylistRenderer"]["navigationEndpoint"]["watchEndpoint"]["videoId"])
if "navigationEndpoint" in recmd["compactPlaylistRenderer"]["shortBylineText"].keys():
recchans.add(recmd["compactPlaylistRenderer"]["shortBylineText"]["navigationEndpoint"]["browseEndpoint"]["browseId"])
elif "compactRadioRenderer" in recmd.keys(): #mix playlist elif "compactRadioRenderer" in recmd.keys(): #mix playlist
recmixes.add(recmd["compactRadioRenderer"]["playlistId"]) recmixes.add(recmd["compactRadioRenderer"]["playlistId"])
@@ -95,7 +114,9 @@ def getmetadata(vid):
creditdata[desl].append({"name": itemint["runs"][0]["text"], "channel": itemint["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"]}) creditdata[desl].append({"name": itemint["runs"][0]["text"], "channel": itemint["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"]})
except KeyError as e: except KeyError as e:
print("Metadata key error")
#print("Video does not have credits")
pass
#raise
#print(e) #print(e)
if initplay and initdata: if initplay and initdata:


+ 14
- 9
worker.py Parādīt failu

@@ -1,6 +1,7 @@
import requests import requests
from time import sleep from time import sleep
from os import mkdir from os import mkdir
from os.path import isdir
from json import dumps from json import dumps
import threading import threading


@@ -28,6 +29,7 @@ class batchthread(threading.Thread):
except BaseException as e: except BaseException as e:
print(e) print(e)
print("Error in retrieving information, waiting 30 seconds") print("Error in retrieving information, waiting 30 seconds")
raise
sleep(30) sleep(30)


# Add any discovered videos # Add any discovered videos
@@ -37,7 +39,8 @@ class batchthread(threading.Thread):
recplayl.update(info[5]) recplayl.update(info[5])


if info[0] or info[1]: # ccenabled or creditdata if info[0] or info[1]: # ccenabled or creditdata
mkdir("out/"+str(item).strip())
if not isdir("out/"+str(item).strip()):
mkdir("out/"+str(item).strip())


if info[1]: # creditdata if info[1]: # creditdata
open("out/"+str(item).strip()+"/"+str(item).strip()+"_published_credits.json", "w").write(dumps(info[1])) open("out/"+str(item).strip()+"/"+str(item).strip()+"_published_credits.json", "w").write(dumps(info[1]))
@@ -103,15 +106,17 @@ while True:
# Process the batch # Process the batch
batchcontent = requests.get(batchinfo["content"]).text.split("\n") batchcontent = requests.get(batchinfo["content"]).text.split("\n")



threads = [] threads = []
for item in batchcontent:
runthread = batchthread(name = item)
runthread.start()
threads.append(runthread)
while batchcontent:
while len(threads) <= 50 and batchcontent:
item = batchcontent.pop(0)
runthread = batchthread(name = item)
runthread.start()
threads.append(runthread)


for x in threads:
x.join()
for x in threads:
x.join()
threads.remove(x)


#https://stackoverflow.com/a/11968881 #https://stackoverflow.com/a/11968881


@@ -119,7 +124,7 @@ while True:
# TODO: put the discoveries somewhere... # TODO: put the discoveries somewhere...
open("out/discoveries.json", "w").write(dumps({"recvids": sorted(recvids), "recchans": sorted(recchans), "recmixes": sorted(recmixes), "recplayl": sorted(recplayl)})) open("out/discoveries.json", "w").write(dumps({"recvids": sorted(recvids), "recchans": sorted(recchans), "recmixes": sorted(recmixes), "recplayl": sorted(recplayl)}))


make_archive("out.zip", "zip", "out") #check this
make_archive("out", "zip", "out") #check this


# while True: # while True:
# try: # try:


Notiek ielāde…
Atcelt
Saglabāt