Browse Source

Fix org listings not including archived repos

While the 'All' listing currently only excludes archived repos, this is a more general solution that, as long as they don't redesign the repository list yet again, should work even with further category display changes.
master
JustAnotherArchivist 3 weeks ago
parent
commit
dfd01567cb
1 changed files with 26 additions and 13 deletions
  1. +26
    -13
      github-list-repos

+ 26
- 13
github-list-repos View File

@@ -1,4 +1,5 @@
#!/usr/bin/env python3
import collections
import html
import logging
import re
@@ -47,7 +48,7 @@ def p(repoName):
for user in users:
r = get(f'https://github.com/{user}')
if '<div id="org-repositories"' in r.text:
# Organisation, complete list under /orgs/ with ?page=2 pagination
# Organisation, archived repositories don't appear on /orgs/ + pagination, so need to also iterate over all the 'type' parameters
if mode == NAME_OPTION:
musername = re.search(r'<meta property="profile:username" content="([^"]*)" />', r.text)
if not musername:
@@ -60,18 +61,30 @@ for user in users:
print(html.unescape(musername.group(1).strip().replace('\n', ' ').replace('\r', ' ')))
print(html.unescape(mfullname.group(1).strip().replace('\n', ' ').replace('\r', ' ')))
sys.exit(0)
r = get(f'https://github.com/orgs/{user}/repositories')
page = 1
while True:
for m in re.finditer(r'<a itemprop="name codeRepository"\s(?:[^>]*\s)?data-hovercard-url="/([^/>"]+/[^/>"]+)/hovercard"', r.text):
p(m.group(1))
for m in re.finditer(r'<a data-testid="listitem-title-link"\s(?:[^>]*\s)?href="/([^/>"]+/[^/>"]+)"', r.text):
p(m.group(1))
if '<a class="next_page"' not in r.text and '<a rel="next"' not in r.text:
# End of pagination
break
page += 1
r = get(f'https://github.com/orgs/{user}/repositories?page={page}')
types = collections.deque()
types.append('')
seen = set()
def maybe_p(repoName):
if repoName not in seen:
p(repoName)
seen.add(repoName)
while types:
type_ = types.popleft()
j = '&' if type_ else '?'
r = get(f'https://github.com/orgs/{user}/repositories{type_}')
if not type_:
types.extend(x.split('"')[1] for x in re.findall(r'href="\?type=[^"]*', r.text))
page = 1
while True:
for m in re.finditer(r'<a itemprop="name codeRepository"\s(?:[^>]*\s)?data-hovercard-url="/([^/>"]+/[^/>"]+)/hovercard"', r.text):
maybe_p(m.group(1))
for m in re.finditer(r'<a data-testid="listitem-title-link"\s(?:[^>]*\s)?href="/([^/>"]+/[^/>"]+)"', r.text):
maybe_p(m.group(1))
if '<a class="next_page"' not in r.text and '<a rel="next"' not in r.text:
# End of pagination
break
page += 1
r = get(f'https://github.com/orgs/{user}/repositories{type_}{j}page={page}')
else:
# User, ?tab=repositories + cursor pagination
if mode == NAME_OPTION:


Loading…
Cancel
Save