diff --git a/discovery.py b/discovery.py
index c6ad4a5..bf3c24c 100644
--- a/discovery.py
+++ b/discovery.py
@@ -123,34 +123,11 @@ def getmetadata(mysession, vid, ccenabledonly=False):
except BaseException as e:
print(e)
print("Exception in discovery, continuing anyway")
-
- creditdata = {}
-
- if not ccenabledonly:
- try:
- mdinfo = initdata["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"][1]["videoSecondaryInfoRenderer"]["metadataRowContainer"]["metadataRowContainerRenderer"]["rows"]
- for item in mdinfo:
- if item["metadataRowRenderer"]["title"]["simpleText"].startswith("Caption author"): #the request to /watch needs to be in English for this to work
- try:
- desl = langcodes[item["metadataRowRenderer"]["title"]["simpleText"].split("(", 1)[1][:-1]]
- except KeyError as e:
- #print(e)
- print("Language code conversion error, using language name")
- desl = item["metadataRowRenderer"]["title"]["simpleText"].split("(", 1)[1][:-1]
- creditdata[desl] = []
- for itemint in item["metadataRowRenderer"]["contents"]:
- creditdata[desl].append({"name": itemint["runs"][0]["text"], "channel": itemint["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"]})
-
- except KeyError as e:
- #print("Video does not have credits")
- pass
- #raise
- #print(e)
if initplay and (initdata or ccenabledonly):
break
- return ccenabled, creditdata, recvids, recchans, recmixes, recplayl
+ return ccenabled, recvids, recchans, recmixes, recplayl
if __name__ == "__main__":
from sys import argv
diff --git a/tracker.py b/tracker.py
index f3782bf..a78ec20 100644
--- a/tracker.py
+++ b/tracker.py
@@ -9,7 +9,7 @@ from os.path import isfile
from json import loads
# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
-VERSION = "20201002.01"
+VERSION = "20201017.01"
TRACKER_ID = "ext-yt-communitycontribs"
TRACKER_HOST = "trackerproxy.meo.ws"
diff --git a/worker.py b/worker.py
index bf9738c..40904bb 100644
--- a/worker.py
+++ b/worker.py
@@ -3,9 +3,9 @@ import requests
from time import sleep
from os import mkdir, rmdir, listdir, system, environ
from os.path import isdir, isfile, getsize
-from json import dumps, loads
+from json import loads
-from youtube_channel import main
+from youtube_channel import process_channel
import signal
@@ -87,7 +87,6 @@ open("cookies.txt", "w").write("""# HTTP Cookie File
del cookies
validationtimes = 0
-shouldgetjob = True
#Graceful Shutdown
class GracefulKiller:
@@ -102,20 +101,8 @@ class GracefulKiller:
gkiller = GracefulKiller()
-#REMOVED PANIC MECHANISM!
-"""
-enres = getmetadata(mysession, "IjJKfe-0Ty0", True)[0]
-if not enres:
- print("Community Contribution discovery has been disabled for this account, please report this on our Discord as this may have caused some videos to be incorrectly marked as having community contributions disabled.")
- shouldgetjob = False
- gkiller.kill_now = True #exit the script
-
-del enres
-"""
-
#microtasks
def threadrunner():
- global shouldgetjob
global validationtimes
jobs = Queue()
ydl = YoutubeDL({"extract_flat": "in_playlist", "simulate": True, "skip_download": True, "quiet": True, "cookiefile": "cookies.txt", "source_address": "0.0.0.0", "call_home": False})
@@ -124,7 +111,6 @@ def threadrunner():
task, vid, args = jobs.get()
if task == "submitdiscovery":
tracker.add_item_to_tracker(args, vid)
- #pass
elif task == "discovery":
while True:
@@ -135,11 +121,9 @@ def threadrunner():
print(e)
print("Error in retrieving information, waiting 30 seconds and trying again")
sleep(30)
- if info[0] or info[1]: # ccenabled or creditdata
+ if info[0]: # ccenabled
if not isdir("out/"+str(vid).strip()):
mkdir("out/"+str(vid).strip())
- if info[1]:
- open("out/"+str(vid).strip()+"/"+str(vid).strip()+"_published_credits.json", "w").write(dumps(info[1]))
if info[0]:
for langcode in langs:
@@ -153,17 +137,14 @@ def threadrunner():
jobs.put(("complete", None, "video:"+vid))
- for videodisc in info[2]:
+ for videodisc in info[1]:
jobs.put(("submitdiscovery", videodisc, tracker.ItemType.Video))
- for channeldisc in info[3]:
+ for channeldisc in info[2]:
jobs.put(("submitdiscovery", channeldisc, tracker.ItemType.Channel))
- for mixdisc in info[4]:
+ for mixdisc in info[3]:
jobs.put(("submitdiscovery", mixdisc, tracker.ItemType.MixPlaylist))
- for playldisc in info[5]:
+ for playldisc in info[4]:
jobs.put(("submitdiscovery", playldisc, tracker.ItemType.Playlist))
-
- #jobs.put(("complete", None, "video:"+vid))
- #pass
elif task == "subtitles":
subprrun(mysession, args, vid, "default", needforcemetadata, needforcecaptions)
@@ -178,7 +159,7 @@ def threadrunner():
jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video))
#channel created playlists
- y = main(desit.split(":", 1)[1])
+ y = process_channel(desit.split(":", 1)[1])
for itemyv in y["playlists"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Playlist))
for itemyv in y["channels"]:
@@ -200,17 +181,14 @@ def threadrunner():
elif task == "mixplaylist":
try:
wptext = mysession.get("https://www.youtube.com/watch?v=jNQXAC9IVRw&list="+desit.split(":", 1)[1]).text
- #chanl = set()
+
#channel handling not needed here because we will get it from the video
for line in wptext.splitlines():
if line.strip().startswith('window["ytInitialData"] = '):
initdata = loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
for itemyvp in initdata["contents"]["twoColumnWatchNextResults"]["playlist"]["playlist"]["contents"]:
jobs.put(("submitdiscovery", itemyvp["playlistPanelVideoRenderer"]["videoId"], tracker.ItemType.Video))
- #chanl.add(itemyvp["playlistPanelVideoRenderer"]["longBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
-
- #for itemn in chanl:
- # jobs.put(("submitdiscovery", itemn, tracker.ItemType.Channel))
+
jobs.put(("complete", None, "mixplaylist:"+args))
except:
print("Mix Playlist error, ignoring but not marking as complete...", "https://www.youtube.com/watch?v=jNQXAC9IVRw&list="+desit.split(":", 1)[1])
@@ -241,10 +219,17 @@ def threadrunner():
print("Waiting 5 minutes...")
sleep(300)
- if targetloc.startswith("rsync"):
- system("rsync -rltv --timeout=300 --contimeout=300 --progress --bwlimit 0 --recursive --partial --partial-dir .rsync-tmp --min-size 1 --no-compress --compress-level 0 directory/"+args.split(":", 1)[1]+"/ "+targetloc)
- elif targetloc.startswith("http"):
- system("curl -F "+args.split(":", 1)[1]+".zip=@directory/"+args.split(":", 1)[1]+"/"+args.split(":", 1)[1]+".zip "+targetloc)
+ while True:
+ if targetloc.startswith("rsync"):
+ exitinfo = system("rsync -rltv --timeout=300 --contimeout=300 --progress --bwlimit 0 --recursive --partial --partial-dir .rsync-tmp --min-size 1 --no-compress --compress-level 0 directory/"+args.split(":", 1)[1]+"/ "+targetloc)
+ elif targetloc.startswith("http"):
+ exitinfo = system("curl -F "+args.split(":", 1)[1]+".zip=@directory/"+args.split(":", 1)[1]+"/"+args.split(":", 1)[1]+".zip "+targetloc)
+
+ if exitinfo == 0: # note that on Unix this isn't necessarily the exit code but it's still 0 upon successful exit
+ break
+ else:
+ print("Error in sending data to target, waiting 30 seconds and trying again.")
+ sleep(30)
size = getsize("directory/"+args.split(":", 1)[1]+"/"+args.split(":", 1)[1]+".zip")
@@ -264,60 +249,44 @@ def threadrunner():
# get a new task from tracker
collect() #cleanup
- #Protection Mechanism Disarmed
- """
- #check that the account has community contributions enabled every 50th item
- validationtimes += 1
- if not validationtimes % 50:
- enres = getmetadata(mysession, "IjJKfe-0Ty0", True)[0]
- if not enres:
- print("Community Contribution discovery has been disabled for this account, please report this on our Discord as this may have caused some videos to be incorrectly marked as having community contributions disabled.")
- shouldgetjob = False
- gkiller.kill_now = True #exit the script
- del enres
- """
-
- if shouldgetjob:
- desit = tracker.request_item_from_tracker()
- print("New task:", desit)
-
- if desit:
- if desit.split(":", 1)[0] == "video":
- needforcemetadata = {'ab': None, 'aa': None, 'af': None, 'sq': None, 'ase': None, 'am': None, 'ar': None, 'arc': None, 'hy': None, 'as': None, 'ay': None, 'az': None, 'bn': None, 'ba': None, 'eu': None, 'be': None, 'bh': None, 'bi': None, 'bs': None, 'br': None,
- 'bg': None, 'yue': None, 'yue-HK': None, 'ca': None, 'chr': None, 'zh-CN': None, 'zh-HK': None, 'zh-Hans': None, 'zh-SG': None, 'zh-TW': None, 'zh-Hant': None, 'cho': None, 'co': None, 'hr': None, 'cs': None, 'da': None, 'nl': None,
- 'nl-BE': None, 'nl-NL': None, 'dz': None, 'en': None, 'en-CA': None, 'en-IN': None, 'en-IE': None, 'en-GB': None, 'en-US': None, 'eo': None, 'et': None, 'fo': None, 'fj': None, 'fil': None, 'fi': None, 'fr': None, 'fr-BE': None,
- 'fr-CA': None, 'fr-FR': None, 'fr-CH': None, 'ff': None, 'gl': None, 'ka': None, 'de': None, 'de-AT': None, 'de-DE': None, 'de-CH': None, 'el': None, 'kl': None, 'gn': None, 'gu': None, 'ht': None, 'hak': None, 'hak-TW': None, 'ha': None,
- 'iw': None, 'hi': None, 'hi-Latn': None, 'ho': None, 'hu': None, 'is': None, 'ig': None, 'id': None, 'ia': None, 'ie': None, 'iu': None, 'ik': None, 'ga': None, 'it': None, 'ja': None, 'jv': None, 'kn': None, 'ks': None, 'kk': None, 'km': None, 'rw': None,
- 'tlh': None, 'ko': None, 'ku': None, 'ky': None, 'lo': None, 'la': None, 'lv': None, 'ln': None, 'lt': None, 'lb': None, 'mk': None, 'mg': None, 'ms': None, 'ml': None, 'mt': None, 'mni': None, 'mi': None, 'mr': None, 'mas': None, 'nan': None,
- 'nan-TW': None, 'lus': None, 'mo': None, 'mn': None, 'my': None, 'na': None, 'nv': None, 'ne': None, 'no': None, 'oc': None, 'or': None, 'om': None, 'ps': None, 'fa': None, 'fa-AF': None, 'fa-IR': None, 'pl': None, 'pt': None, 'pt-BR': None,
- 'pt-PT': None, 'pa': None, 'qu': None, 'ro': None, 'rm': None, 'rn': None, 'ru': None, 'ru-Latn': None, 'sm': None, 'sg': None, 'sa': None, 'sc': None, 'gd': None, 'sr': None, 'sr-Cyrl': None, 'sr-Latn': None, 'sh': None, 'sdp': None, 'sn': None,
- 'scn': None, 'sd': None, 'si': None, 'sk': None, 'sl': None, 'so': None, 'st': None, 'es': None, 'es-419': None, 'es-MX': None, 'es-ES': None, 'es-US': None, 'su': None, 'sw': None, 'ss': None, 'sv': None, 'tl': None, 'tg': None, 'ta': None,
- 'tt': None, 'te': None, 'th': None, 'bo': None, 'ti': None, 'tpi': None, 'to': None, 'ts': None, 'tn': None, 'tr': None, 'tk': None, 'tw': None, 'uk': None, 'ur': None, 'uz': None, 'vi': None, 'vo': None, 'vor': None, 'cy': None, 'fy': None, 'wo': None,
- 'xh': None, 'yi': None, 'yo': None, 'zu': None}
- needforcecaptions = {'ab': None, 'aa': None, 'af': None, 'sq': None, 'ase': None, 'am': None, 'ar': None, 'arc': None, 'hy': None, 'as': None, 'ay': None, 'az': None, 'bn': None, 'ba': None, 'eu': None, 'be': None, 'bh': None, 'bi': None, 'bs': None, 'br': None,
- 'bg': None, 'yue': None, 'yue-HK': None, 'ca': None, 'chr': None, 'zh-CN': None, 'zh-HK': None, 'zh-Hans': None, 'zh-SG': None, 'zh-TW': None, 'zh-Hant': None, 'cho': None, 'co': None, 'hr': None, 'cs': None, 'da': None, 'nl': None,
- 'nl-BE': None, 'nl-NL': None, 'dz': None, 'en': None, 'en-CA': None, 'en-IN': None, 'en-IE': None, 'en-GB': None, 'en-US': None, 'eo': None, 'et': None, 'fo': None, 'fj': None, 'fil': None, 'fi': None, 'fr': None, 'fr-BE': None,
- 'fr-CA': None, 'fr-FR': None, 'fr-CH': None, 'ff': None, 'gl': None, 'ka': None, 'de': None, 'de-AT': None, 'de-DE': None, 'de-CH': None, 'el': None, 'kl': None, 'gn': None, 'gu': None, 'ht': None, 'hak': None, 'hak-TW': None, 'ha': None,
- 'iw': None, 'hi': None, 'hi-Latn': None, 'ho': None, 'hu': None, 'is': None, 'ig': None, 'id': None, 'ia': None, 'ie': None, 'iu': None, 'ik': None, 'ga': None, 'it': None, 'ja': None, 'jv': None, 'kn': None, 'ks': None, 'kk': None, 'km': None, 'rw': None,
- 'tlh': None, 'ko': None, 'ku': None, 'ky': None, 'lo': None, 'la': None, 'lv': None, 'ln': None, 'lt': None, 'lb': None, 'mk': None, 'mg': None, 'ms': None, 'ml': None, 'mt': None, 'mni': None, 'mi': None, 'mr': None, 'mas': None, 'nan': None,
- 'nan-TW': None, 'lus': None, 'mo': None, 'mn': None, 'my': None, 'na': None, 'nv': None, 'ne': None, 'no': None, 'oc': None, 'or': None, 'om': None, 'ps': None, 'fa': None, 'fa-AF': None, 'fa-IR': None, 'pl': None, 'pt': None, 'pt-BR': None,
- 'pt-PT': None, 'pa': None, 'qu': None, 'ro': None, 'rm': None, 'rn': None, 'ru': None, 'ru-Latn': None, 'sm': None, 'sg': None, 'sa': None, 'sc': None, 'gd': None, 'sr': None, 'sr-Cyrl': None, 'sr-Latn': None, 'sh': None, 'sdp': None, 'sn': None,
- 'scn': None, 'sd': None, 'si': None, 'sk': None, 'sl': None, 'so': None, 'st': None, 'es': None, 'es-419': None, 'es-MX': None, 'es-ES': None, 'es-US': None, 'su': None, 'sw': None, 'ss': None, 'sv': None, 'tl': None, 'tg': None, 'ta': None,
- 'tt': None, 'te': None, 'th': None, 'bo': None, 'ti': None, 'tpi': None, 'to': None, 'ts': None, 'tn': None, 'tr': None, 'tk': None, 'tw': None, 'uk': None, 'ur': None, 'uz': None, 'vi': None, 'vo': None, 'vor': None, 'cy': None, 'fy': None, 'wo': None,
- 'xh': None, 'yi': None, 'yo': None, 'zu': None}
- jobs.put(("discovery", desit.split(":", 1)[1], None))
- elif desit.split(":", 1)[0] == "channel":
- jobs.put(("channel", None, desit.split(":", 1)[1]))
- elif desit.split(":", 1)[0] == "playlist":
- jobs.put(("playlist", None, desit.split(":", 1)[1]))
- elif desit.split(":", 1)[0] == "mixplaylist":
- jobs.put(("mixplaylist", None, desit.split(":", 1)[1]))
- else:
- print("Ignoring item for now", desit)
+ desit = tracker.request_item_from_tracker()
+ print("New task:", desit)
+
+ if desit:
+ if desit.split(":", 1)[0] == "video":
+ needforcemetadata = {'ab': None, 'aa': None, 'af': None, 'sq': None, 'ase': None, 'am': None, 'ar': None, 'arc': None, 'hy': None, 'as': None, 'ay': None, 'az': None, 'bn': None, 'ba': None, 'eu': None, 'be': None, 'bh': None, 'bi': None, 'bs': None, 'br': None,
+ 'bg': None, 'yue': None, 'yue-HK': None, 'ca': None, 'chr': None, 'zh-CN': None, 'zh-HK': None, 'zh-Hans': None, 'zh-SG': None, 'zh-TW': None, 'zh-Hant': None, 'cho': None, 'co': None, 'hr': None, 'cs': None, 'da': None, 'nl': None,
+ 'nl-BE': None, 'nl-NL': None, 'dz': None, 'en': None, 'en-CA': None, 'en-IN': None, 'en-IE': None, 'en-GB': None, 'en-US': None, 'eo': None, 'et': None, 'fo': None, 'fj': None, 'fil': None, 'fi': None, 'fr': None, 'fr-BE': None,
+ 'fr-CA': None, 'fr-FR': None, 'fr-CH': None, 'ff': None, 'gl': None, 'ka': None, 'de': None, 'de-AT': None, 'de-DE': None, 'de-CH': None, 'el': None, 'kl': None, 'gn': None, 'gu': None, 'ht': None, 'hak': None, 'hak-TW': None, 'ha': None,
+ 'iw': None, 'hi': None, 'hi-Latn': None, 'ho': None, 'hu': None, 'is': None, 'ig': None, 'id': None, 'ia': None, 'ie': None, 'iu': None, 'ik': None, 'ga': None, 'it': None, 'ja': None, 'jv': None, 'kn': None, 'ks': None, 'kk': None, 'km': None, 'rw': None,
+ 'tlh': None, 'ko': None, 'ku': None, 'ky': None, 'lo': None, 'la': None, 'lv': None, 'ln': None, 'lt': None, 'lb': None, 'mk': None, 'mg': None, 'ms': None, 'ml': None, 'mt': None, 'mni': None, 'mi': None, 'mr': None, 'mas': None, 'nan': None,
+ 'nan-TW': None, 'lus': None, 'mo': None, 'mn': None, 'my': None, 'na': None, 'nv': None, 'ne': None, 'no': None, 'oc': None, 'or': None, 'om': None, 'ps': None, 'fa': None, 'fa-AF': None, 'fa-IR': None, 'pl': None, 'pt': None, 'pt-BR': None,
+ 'pt-PT': None, 'pa': None, 'qu': None, 'ro': None, 'rm': None, 'rn': None, 'ru': None, 'ru-Latn': None, 'sm': None, 'sg': None, 'sa': None, 'sc': None, 'gd': None, 'sr': None, 'sr-Cyrl': None, 'sr-Latn': None, 'sh': None, 'sdp': None, 'sn': None,
+ 'scn': None, 'sd': None, 'si': None, 'sk': None, 'sl': None, 'so': None, 'st': None, 'es': None, 'es-419': None, 'es-MX': None, 'es-ES': None, 'es-US': None, 'su': None, 'sw': None, 'ss': None, 'sv': None, 'tl': None, 'tg': None, 'ta': None,
+ 'tt': None, 'te': None, 'th': None, 'bo': None, 'ti': None, 'tpi': None, 'to': None, 'ts': None, 'tn': None, 'tr': None, 'tk': None, 'tw': None, 'uk': None, 'ur': None, 'uz': None, 'vi': None, 'vo': None, 'vor': None, 'cy': None, 'fy': None, 'wo': None,
+ 'xh': None, 'yi': None, 'yo': None, 'zu': None}
+ needforcecaptions = {'ab': None, 'aa': None, 'af': None, 'sq': None, 'ase': None, 'am': None, 'ar': None, 'arc': None, 'hy': None, 'as': None, 'ay': None, 'az': None, 'bn': None, 'ba': None, 'eu': None, 'be': None, 'bh': None, 'bi': None, 'bs': None, 'br': None,
+ 'bg': None, 'yue': None, 'yue-HK': None, 'ca': None, 'chr': None, 'zh-CN': None, 'zh-HK': None, 'zh-Hans': None, 'zh-SG': None, 'zh-TW': None, 'zh-Hant': None, 'cho': None, 'co': None, 'hr': None, 'cs': None, 'da': None, 'nl': None,
+ 'nl-BE': None, 'nl-NL': None, 'dz': None, 'en': None, 'en-CA': None, 'en-IN': None, 'en-IE': None, 'en-GB': None, 'en-US': None, 'eo': None, 'et': None, 'fo': None, 'fj': None, 'fil': None, 'fi': None, 'fr': None, 'fr-BE': None,
+ 'fr-CA': None, 'fr-FR': None, 'fr-CH': None, 'ff': None, 'gl': None, 'ka': None, 'de': None, 'de-AT': None, 'de-DE': None, 'de-CH': None, 'el': None, 'kl': None, 'gn': None, 'gu': None, 'ht': None, 'hak': None, 'hak-TW': None, 'ha': None,
+ 'iw': None, 'hi': None, 'hi-Latn': None, 'ho': None, 'hu': None, 'is': None, 'ig': None, 'id': None, 'ia': None, 'ie': None, 'iu': None, 'ik': None, 'ga': None, 'it': None, 'ja': None, 'jv': None, 'kn': None, 'ks': None, 'kk': None, 'km': None, 'rw': None,
+ 'tlh': None, 'ko': None, 'ku': None, 'ky': None, 'lo': None, 'la': None, 'lv': None, 'ln': None, 'lt': None, 'lb': None, 'mk': None, 'mg': None, 'ms': None, 'ml': None, 'mt': None, 'mni': None, 'mi': None, 'mr': None, 'mas': None, 'nan': None,
+ 'nan-TW': None, 'lus': None, 'mo': None, 'mn': None, 'my': None, 'na': None, 'nv': None, 'ne': None, 'no': None, 'oc': None, 'or': None, 'om': None, 'ps': None, 'fa': None, 'fa-AF': None, 'fa-IR': None, 'pl': None, 'pt': None, 'pt-BR': None,
+ 'pt-PT': None, 'pa': None, 'qu': None, 'ro': None, 'rm': None, 'rn': None, 'ru': None, 'ru-Latn': None, 'sm': None, 'sg': None, 'sa': None, 'sc': None, 'gd': None, 'sr': None, 'sr-Cyrl': None, 'sr-Latn': None, 'sh': None, 'sdp': None, 'sn': None,
+ 'scn': None, 'sd': None, 'si': None, 'sk': None, 'sl': None, 'so': None, 'st': None, 'es': None, 'es-419': None, 'es-MX': None, 'es-ES': None, 'es-US': None, 'su': None, 'sw': None, 'ss': None, 'sv': None, 'tl': None, 'tg': None, 'ta': None,
+ 'tt': None, 'te': None, 'th': None, 'bo': None, 'ti': None, 'tpi': None, 'to': None, 'ts': None, 'tn': None, 'tr': None, 'tk': None, 'tw': None, 'uk': None, 'ur': None, 'uz': None, 'vi': None, 'vo': None, 'vor': None, 'cy': None, 'fy': None, 'wo': None,
+ 'xh': None, 'yi': None, 'yo': None, 'zu': None}
+ jobs.put(("discovery", desit.split(":", 1)[1], None))
+ elif desit.split(":", 1)[0] == "channel":
+ jobs.put(("channel", None, desit.split(":", 1)[1]))
+ elif desit.split(":", 1)[0] == "playlist":
+ jobs.put(("playlist", None, desit.split(":", 1)[1]))
+ elif desit.split(":", 1)[0] == "mixplaylist":
+ jobs.put(("mixplaylist", None, desit.split(":", 1)[1]))
else:
print("Ignoring item for now", desit)
else:
- break
+ print("Ignoring item for now", desit)
else:
break
@@ -340,8 +309,4 @@ for x in threads:
threads.remove(x)
del x
-if not shouldgetjob:
- print("PROTECTION MECHANISM #3 WAS SOMEHOW TRIGERRED")
- print("Community Contribution discovery has been disabled for this account, please report this on our Discord as this may have caused some videos to be incorrectly marked as having community contributions disabled.")
-
-print("Exiting...")
+print("Exiting...")
\ No newline at end of file
diff --git a/youtube_channel.py b/youtube_channel.py
index 57ffbbe..c5d2395 100644
--- a/youtube_channel.py
+++ b/youtube_channel.py
@@ -1,25 +1,39 @@
from requests import session
-from youtube_util import getinitialdata, fullyexpand
-
-# TODO: Rate limit detection, HTTP3?
+from youtube_util import getinitialdata, fullyexpand, getapikey, getlver
+from time import sleep
mysession = session()
#extract latest version automatically
-try:
- lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
-except:
- lver = "2.20201002.02.01"
+homepage = mysession.get("https://www.youtube.com/").text
+
+API_KEY = getapikey(homepage)
+
+params = (
+ ('key', API_KEY),
+)
-#print(lver)
-mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
+API_VERSION = getlver(getinitialdata(homepage))
-def main(channelid: str):
+continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}
+
+del homepage
+
+def process_channel(channelid: str):
playlists = set()
shelfres = set()
channellist = set()
# PLAYLISTS
- initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text)
+ data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
+ while True:
+ initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
+ if initdata.status_code == 200:
+ initdata = initdata.json()
+ break
+ else:
+ print("Non-200 API status code, waiting 30 seconds before retrying...")
+ sleep(30)
+
CHANNELS_ID = 0
PLAYLISTS_ID = 0
@@ -42,7 +56,7 @@ def main(channelid: str):
if "shelfRenderer" in itemint.keys():
shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
- playlistsint = fullyexpand(itemint["gridRenderer"])["items"]
+ playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
@@ -50,8 +64,18 @@ def main(channelid: str):
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
for item in shelfres:
- shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
- playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
+ while True:
+ shelfintp = mysession.get("https://www.youtube.com/"+str(item))
+ if not """
Sorry for the interruption. We have been receiving a large volume of requests from your network.
+
+
To continue with your YouTube experience, please fill out the form below.
""" in shelfintp.text and shelfintp.status_code == 200:
+ break
+ else:
+ print("Non-200 status code, waiting 30 seconds before retrying...")
+ sleep(30)
+
+ shelfiteminitdata = getinitialdata(shelfintp.text)
+ playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
@@ -61,7 +85,16 @@ def main(channelid: str):
# CHANNELS
cshelfres = set()
- initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text)
+ # PLAYLISTS
+ data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
+ while True:
+ initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
+ if initdata.status_code == 200:
+ initdata = initdata.json()
+ break
+ else:
+ print("Non-200 API status code, waiting 30 seconds before retrying...")
+ sleep(30)
shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
@@ -70,14 +103,24 @@ def main(channelid: str):
if "shelfRenderer" in itemint.keys():
cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
- chanlistint = fullyexpand(itemint["gridRenderer"])["items"]
+ chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])
for item in cshelfres:
- shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
- chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
+ while True:
+ shelfintc = mysession.get("https://www.youtube.com/"+str(item))
+ if not """
Sorry for the interruption. We have been receiving a large volume of requests from your network.
+
+
To continue with your YouTube experience, please fill out the form below.
""" in shelfintc.text and shelfintc.status_code == 200:
+ break
+ else:
+ print("Non-200 status code, waiting 30 seconds before retrying...")
+ sleep(30)
+
+ shelfiteminitdata = getinitialdata(shelfintc.text)
+ chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])
@@ -89,7 +132,7 @@ if __name__ == "__main__":
chanl = argv
chanl.pop(0)
for channel in chanl:
- print(main(channel))
+ print(process_channel(channel))
# SAMPLES:
# UCqj7Cz7revf5maW9g5pgNcg lots of playlists
@@ -103,4 +146,4 @@ if __name__ == "__main__":
# UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
-# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels
+# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels
\ No newline at end of file
diff --git a/youtube_util.py b/youtube_util.py
index 54fe2cf..81d466a 100644
--- a/youtube_util.py
+++ b/youtube_util.py
@@ -1,6 +1,8 @@
-from requests import session
from json import loads
from urllib.parse import unquote
+from time import sleep
+
+import requests
def getinitialdata(html: str):
for line in html.splitlines():
@@ -8,19 +10,27 @@ def getinitialdata(html: str):
return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
return {}
-mysession = session()
+def getapikey(html: str):
+ return html.split('"INNERTUBE_API_KEY":"', 1)[-1].split('"', 1)[0]
+
#extract latest version automatically
-try:
- lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
-except:
- lver = "2.20201002.02.01"
-mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
+def getlver(initialdata: dict):
+ try:
+ return initialdata["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
+ except:
+ return "2.20201016.02.00"
-def fullyexpand(inputdict: dict):
+def fullyexpand(inputdict: dict, mysession: requests.session, continuationheaders: dict):
lastrequestj = inputdict
while "continuations" in lastrequestj.keys():
- lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]))
+ while True:
+ lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]), headers=continuationheaders)
+ if lastrequest.status_code == 200:
+ break
+ else:
+ print("Non-200 API status code, waiting 30 seconds before retrying...")
+ sleep(30)
lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
inputdict["items"].extend(lastrequestj["items"])
- return inputdict
+ return inputdict
\ No newline at end of file