Check that certain strings are in the page

3 years ago · eb9bda9ab4
--- a/discovery.py
+++ b/discovery.py
@@ -13,7 +13,7 @@ def getmetadata(mysession, vid):
        wpage = mysession.get("https://www.youtube.com/watch", params=params)
        if not """</div><div id="content" class="  content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>

 <p>To continue with your YouTube experience, please fill out the form below.</p>""" in wpage.text and not wpage.status_code == 429:
 <p>To continue with your YouTube experience, please fill out the form below.</p>""" in wpage.text and not wpage.status_code == 429 and 'window["ytInitialPlayerResponse"] = ' in wpage.text and 'window["ytInitialData"] = ' in wpage.text:
            break
        else:
            print("Captcha detected, waiting 30 seconds")
--- a/export.py
+++ b/export.py
@@ -139,7 +139,7 @@ def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaption

                page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams)

            if not "accounts.google.com" in page.url and page.status_code != 429:
            if not "accounts.google.com" in page.url and page.status_code != 429 and 'Subtitles/CC' in page.text and 'Title &amp; description' in page.text:
                break
            else:
                print("[Retrying in 30 seconds for rate limit or login failure] Please supply authentication cookie information in config.json or environment variables. See README.md for more information.")
--- a/tracker.py
+++ b/tracker.py
@@ -9,7 +9,7 @@ from os.path import isfile
 from json import loads

 # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
 VERSION = "20200924.06"
 VERSION = "20200924.07"

 TRACKER_ID = "ext-yt-communitycontribs"
 TRACKER_HOST = "trackerproxy.meo.ws"