From 664eb02e71bff455747621f418c9b40a9f7c08d4 Mon Sep 17 00:00:00 2001 From: tech234a <46801700+tech234a@users.noreply.github.com> Date: Mon, 21 Sep 2020 17:13:45 -0400 Subject: [PATCH] If community-updated metadata is available retrieve it, if captions are published, retrieve the community version too --- export.py | 72 +++++++++++++++++++++++++++++++++++++++++-------------- worker.py | 2 +- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/export.py b/export.py index 30c31d3..396e0e3 100644 --- a/export.py +++ b/export.py @@ -74,27 +74,63 @@ class MyHTMLParser(HTMLParser): def subprrun(jobs, mysession): while not jobs.empty(): collect() #cleanup memory - langcode, vid = jobs.get() + langcode, vid, mode = jobs.get() vid = vid.strip() print(langcode, vid) - pparams = ( - ("v", vid), - ("lang", langcode), - ("action_mde_edit_form", 1), - ("bl", "vmp"), - ("ui", "hd"), - ("tab", "captions"), - ("o", "U") - ) - - page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) + + if mode == "default": + pparams = ( + ("v", vid), + ("lang", langcode), + ("action_mde_edit_form", 1), + ("bl", "vmp"), + ("ui", "hd"), + ("tab", "captions"), + ("o", "U") + ) + + page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) + elif mode == "forceedit-metadata": + pparams = ( + ("v", vid), + ("lang", langcode), + ("action_mde_edit_form", 1), + ('forceedit', 'metadata'), + ('tab', 'metadata') + ) + + page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) + elif mode == "forceedit-captions": + pparams = ( + ("v", vid), + ("lang", langcode), + ("action_mde_edit_form", 1), + ("bl", "vmp"), + ("ui", "hd"), + ('forceedit', 'captions'), + ("tab", "captions"), + ("o", "U") + ) + + page = mysession.get("https://www.youtube.com/timedtext_editor", params=pparams) assert not "accounts.google.com" in page.url, "Please supply authentication cookie information in config.json. See README.md for more information." inttext = page.text del page - if 'id="reject-captions-button"' in inttext or 'id="reject-metadata-button"' in inttext or 'data-state="published"' in inttext or 'title="The video owner already provided subtitles/CC"' in inttext: #quick way of checking if this page is worth parsing + filestring = "" + if "forceedit" in mode: + filestring = "_community" + + if not "forceedit" in mode: + if '&forceedit=metadata&tab=metadata">See latest' in inttext: + jobs.put((langcode, vid, "forceedit-metadata")) + + if '