Browse Source

Check that certain strings are in the page

pull/8/head
tech234a 3 years ago
parent
commit
eb9bda9ab4
3 changed files with 3 additions and 3 deletions
  1. +1
    -1
      discovery.py
  2. +1
    -1
      export.py
  3. +1
    -1
      tracker.py

+ 1
- 1
discovery.py View File

@@ -13,7 +13,7 @@ def getmetadata(mysession, vid):
wpage = mysession.get("https://www.youtube.com/watch", params=params)
if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>
<p>To continue with your YouTube experience, please fill out the form below.</p>""" in wpage.text and not wpage.status_code == 429:
<p>To continue with your YouTube experience, please fill out the form below.</p>""" in wpage.text and not wpage.status_code == 429 and 'window["ytInitialPlayerResponse"] = ' in wpage.text and 'window["ytInitialData"] = ' in wpage.text:
break
else:
print("Captcha detected, waiting 30 seconds")


+ 1
- 1
export.py View File

@@ -139,7 +139,7 @@ def subprrun(mysession, langcode, vid, mode, needforcemetadata, needforcecaption

page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams)

if not "accounts.google.com" in page.url and page.status_code != 429:
if not "accounts.google.com" in page.url and page.status_code != 429 and 'Subtitles/CC' in page.text and 'Title &amp; description' in page.text:
break
else:
print("[Retrying in 30 seconds for rate limit or login failure] Please supply authentication cookie information in config.json or environment variables. See README.md for more information.")


+ 1
- 1
tracker.py View File

@@ -9,7 +9,7 @@ from os.path import isfile
from json import loads

# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
VERSION = "20200924.06"
VERSION = "20200924.07"

TRACKER_ID = "ext-yt-communitycontribs"
TRACKER_HOST = "trackerproxy.meo.ws"


Loading…
Cancel
Save