6 Commits

Author SHA1 Message Date
  JustAnotherArchivist 5486b6a224 Add --grafana-eta 3 years ago
  JustAnotherArchivist 885001aab6 Fix --no-table help text (leftover from pre-pure-Python) 3 years ago
  JustAnotherArchivist fd8d502266 Nit: replace double quotes with single quotes 3 years ago
  JustAnotherArchivist ff096bf8dd Fix display of zero timestamps 3 years ago
  JustAnotherArchivist db3e79b9c9 Print repository URLs instead of names 3 years ago
  JustAnotherArchivist 22744fe908 Add script for listing repos of a user or group on GitLab.com 3 years ago
3 changed files with 135 additions and 44 deletions
Split View
  1. +93
    -42
      archivebot-jobs
  2. +2
    -2
      github-list-repos
  3. +40
    -0
      gitlab.com-list-repos

+ 93
- 42
archivebot-jobs View File

@@ -12,21 +12,22 @@ import urllib.request

# Column definitions
columns = {
'jobid': (lambda job, pipelines: job["job_data"]["ident"], ()),
'url': (lambda job, pipelines: job["job_data"]["url"], ('truncatable',)),
'user': (lambda job, pipelines: job["job_data"]["started_by"], ()),
'pipenick': (lambda job, pipelines: pipelines[job["job_data"]["pipeline_id"]] if job["job_data"]["pipeline_id"] in pipelines else "unknown", ()),
'queued': (lambda job, pipelines: job["job_data"]["queued_at"], ('date', 'numeric')),
'started': (lambda job, pipelines: job["job_data"]["started_at"], ('date', 'numeric')),
'last active': (lambda job, pipelines: int(job["ts"]), ('date', 'coloured', 'numeric')),
'dl urls': (lambda job, pipelines: job["job_data"]["items_downloaded"], ('numeric',)),
'dl size': (lambda job, pipelines: job["job_data"]["bytes_downloaded"], ('size', 'numeric')),
'queue': (lambda job, pipelines: job["job_data"]["items_queued"] - job["job_data"]["items_downloaded"], ('numeric',)),
'eta': (lambda job, pipelines: int((curTime := time.time()) + (job["job_data"]["items_queued"] - job["job_data"]["items_downloaded"]) / (job["job_data"]["items_downloaded"] / (curTime - job["job_data"]["started_at"]))) if job["job_data"]["items_downloaded"] > 0 else 0, ('date', 'numeric')),
'con': (lambda job, pipelines: job["job_data"]["concurrency"], ('numeric',)),
'delay min': (lambda job, pipelines: int(job["job_data"]["delay_min"]), ('hidden', 'numeric')),
'delay max': (lambda job, pipelines: int(job["job_data"]["delay_max"]), ('hidden', 'numeric')),
'delay': (lambda job, pipelines: str(int(job["job_data"]["delay_min"])) + '-' + str(int(job["job_data"]["delay_max"])) if job["job_data"]["delay_min"] != job["job_data"]["delay_max"] else str(int(job["job_data"]["delay_min"])), ()),
'jobid': (lambda job, pipelines: job['job_data']['ident'], ()),
'url': (lambda job, pipelines: job['job_data']['url'], ('truncatable',)),
'user': (lambda job, pipelines: job['job_data']['started_by'], ()),
'pipenick': (lambda job, pipelines: pipelines[job['job_data']['pipeline_id']] if job['job_data']['pipeline_id'] in pipelines else 'unknown', ()),
'queued': (lambda job, pipelines: job['job_data']['queued_at'], ('date', 'numeric')),
'started': (lambda job, pipelines: job['job_data']['started_at'], ('date', 'numeric')),
'last active': (lambda job, pipelines: int(job['ts']), ('date', 'coloured', 'numeric')),
'dl urls': (lambda job, pipelines: job['job_data']['items_downloaded'], ('numeric',)),
'dl size': (lambda job, pipelines: job['job_data']['bytes_downloaded'], ('size', 'numeric')),
'queue': (lambda job, pipelines: job['job_data']['items_queued'] - job['job_data']['items_downloaded'], ('numeric',)),
'eta': (lambda job, pipelines: int((curTime := time.time()) + (job['job_data']['items_queued'] - job['job_data']['items_downloaded']) / (job['job_data']['items_downloaded'] / (curTime - job['job_data']['started_at']))) if job['job_data']['items_downloaded'] > 0 else 0, ('date', 'numeric')),
# Overwritten below if --grafana-eta is given
'con': (lambda job, pipelines: job['job_data']['concurrency'], ('numeric',)),
'delay min': (lambda job, pipelines: int(job['job_data']['delay_min']), ('hidden', 'numeric')),
'delay max': (lambda job, pipelines: int(job['job_data']['delay_max']), ('hidden', 'numeric')),
'delay': (lambda job, pipelines: str(int(job['job_data']['delay_min'])) + '-' + str(int(job['job_data']['delay_max'])) if job['job_data']['delay_min'] != job['job_data']['delay_max'] else str(int(job['job_data']['delay_min'])), ()),
}
defaultSort = 'jobid'

@@ -38,13 +39,13 @@ if any('truncatable' in colDef[1] and any(x in colDef[1] for x in ('date', 'colo
# Filter function
def make_field_filter(column, op, value, caseSensitive = True):
compFunc = {
"=": lambda a, b: a == b,
"<": lambda a, b: a < b,
">": lambda a, b: a > b,
"^": lambda a, b: a.startswith(b),
"*": lambda a, b: b in a,
"$": lambda a, b: a.endswith(b),
"~": lambda a, b: re.search(b, a) is not None,
'=': lambda a, b: a == b,
'<': lambda a, b: a < b,
'>': lambda a, b: a > b,
'^': lambda a, b: a.startswith(b),
'*': lambda a, b: b in a,
'$': lambda a, b: a.endswith(b),
'~': lambda a, b: re.search(b, a) is not None,
}[op]
transform = {
True: (lambda x: x),
@@ -64,12 +65,12 @@ class FilterAction(argparse.Action):
setattr(namespace, self.dest, lambda job: eval(func, {}, {'job': job}))
return
global columns
match = re.match(r"^(?P<column>[A-Za-z ]+)(?P<op>[=<>^*$~])(?P<value>.*)$", values[0])
match = re.match(r'^(?P<column>[A-Za-z ]+)(?P<op>[=<>^*$~])(?P<value>.*)$', values[0])
if not match:
parser.error('Invalid filter')
filterDict = match.groupdict()
filterDict["column"] = filterDict["column"].lower()
assert filterDict["column"] in columns
filterDict['column'] = filterDict['column'].lower()
assert filterDict['column'] in columns
if 'numeric' in columns[filterDict['column']][1]:
filterDict['value'] = float(filterDict['value'])
if 'date' in columns[filterDict['column']][1] and filterDict['value'] < 0:
@@ -115,9 +116,10 @@ parser.add_argument('--mode', choices = ('table', 'dashboard-regex', 'con-d-comm
' atdash: print the URL for displaying the matched jobs on atdash',
]))
parser.add_argument('--no-colours', '--no-colors', action = 'store_true', help = "Don't colourise the last activity column if it's been a while. (Table mode only)")
parser.add_argument('--no-table', action = 'store_true', help = 'Raw output without feeding through column(1); columns are separated by tabs. (Table mode only)')
parser.add_argument('--no-table', action = 'store_true', help = 'Raw non-columnised output; columns are separated by tabs. (Table mode only)')
parser.add_argument('--no-truncate', action = 'store_true', help = 'Disable truncating long values if the terminal width would be exceeded. (Table mode without --no-table only)')
parser.add_argument('--dates', action = 'store_true', help = 'Print dates instead of elapsed times for queued/started/last active columns. (Table mode only)')
parser.add_argument('--grafana-eta', action = 'store_true', help = 'Enable fetching data from Grafana for a better ETA on long-running jobs. (Table mode only)')
parser.add_argument('--replace-concurrency', nargs = 1, metavar = 'CON', type = int, help = 'Replace the delay values with the specified ones. (con-d-commands mode only)')
parser.add_argument('--replace-delay', nargs = 2, metavar = ('MIN', 'MAX'), type = int, help = 'Replace the delay values with the specified ones. (con-d-commands mode only)')
parser.add_argument('--format', help = 'Output format for the format mode; this must be a Python format string and can use any column name in lower-case with spaces replaced by underscores; e.g. "{url} {last_active}". (Format mode only)')
@@ -151,7 +153,7 @@ pipelinedata = fetch('http://dashboard.at.ninjawedding.org/pipelines')
currentTime = time.time()

# Process
pipelines = {p["id"]: p["nickname"] for p in pipelinedata["pipelines"]}
pipelines = {p['id']: p['nickname'] for p in pipelinedata['pipelines']}

jobs = []
for job in jobdata:
@@ -168,6 +170,53 @@ if args.filter:
if not jobs:
sys.exit(0)

# Retrieve Grafana ETA if appropriate
if args.grafana_eta and args.mode == 'table':
def populate_grafana_eta(jobs):
idents = {job['jobid'] for job in jobs}
if not idents:
return

# Request
for i, timeFilter in enumerate(('time>=now()-10m', 'time>=now()-24h-10m AND time<=now()-24h+10m')):
req = urllib.request.Request('https://atdash.meo.ws/api/datasources/proxy/1/query?db=ateam&epoch=s')
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
query = 'SELECT mean("items_queued")-mean("items_downloaded") FROM "grabsite" WHERE ('
query += ' OR '.join(f""""ident"='{job["jobid"]}'""" for job in jobs)
query += ')'
query += f' AND {timeFilter}'
query += ' GROUP BY time(1m), * fill(none)'
query = f'q={urllib.parse.quote(query)}'
req.data = query.encode('utf-8')
with urllib.request.urlopen(req) as f:
if f.getcode() != 200:
raise RuntimeError('Could not fetch Grafana data')
if i == 0:
dataNow = json.load(f)
else:
data24hAgo = json.load(f)

# Restructure data
dataNow = {x['tags']['ident']: x['values'][-1] for x in dataNow['results'][0]['series']}
data24hAgo = {x['tags']['ident']: x['values'][len(x['values']) // 2] for x in data24hAgo['results'][0]['series']}

# Calculate ETA
for job in jobs:
if job['jobid'] not in dataNow or job['jobid'] not in data24hAgo: # Job not started yet 24 hours ago or no datapoint for whatever reason
job['eta'] = 0
continue
nowTs, nowTodo = dataNow[job['jobid']]
prevTs, prevTodo = data24hAgo[job['jobid']]
if nowTodo < 0 or prevTodo < 0: # Negative queue size due to AB's buggy queue counting
job['eta'] = 0
continue
if nowTodo >= prevTodo: # Queue hasn't shrunk
job['eta'] = 0
continue
job['eta'] = nowTs + nowTodo / ((prevTodo - nowTodo) / (nowTs - prevTs))

populate_grafana_eta(jobs)

# Sort
class reversor: # https://stackoverflow.com/a/56842689
def __init__(self, obj):
@@ -210,24 +259,26 @@ elif args.mode == 'atdash':
def render_date(ts, coloured = False):
global args, currentTime
diff = currentTime - ts
colourStr = f"\x1b[{0 if diff < 6 * 3600 else 7};31m" if coloured and diff >= 300 else ""
colourEndStr = "\x1b[0m" if colourStr else ""
colourStr = f'\x1b[{0 if diff < 6 * 3600 else 7};31m' if coloured and diff >= 300 else ''
colourEndStr = '\x1b[0m' if colourStr else ''
if ts == 0:
return 'N/A'
if args.dates:
return (colourStr, datetime.datetime.fromtimestamp(ts).isoformat(sep = " "), colourEndStr)
return (colourStr, datetime.datetime.fromtimestamp(ts).isoformat(sep = ' '), colourEndStr)
if diff < -86400:
return (colourStr, f"in {-diff // 86400:.0f}d {(-diff % 86400) // 3600:.0f}h", colourEndStr)
return (colourStr, f'in {-diff // 86400:.0f}d {(-diff % 86400) // 3600:.0f}h', colourEndStr)
elif diff < -60:
return (colourStr, "in " + (f"{-diff // 3600:.0f}h " if diff <= -3600 else "") + f"{(-diff % 3600) // 60:.0f}mn", colourEndStr)
return (colourStr, 'in ' + (f'{-diff // 3600:.0f}h ' if diff <= -3600 else '') + f'{(-diff % 3600) // 60:.0f}mn', colourEndStr)
elif diff < 0:
return "in <1 min"
return 'in <1 min'
elif diff == 0:
return "now"
return 'now'
elif diff < 60:
return "<1 min ago"
return '<1 min ago'
elif diff < 86400:
return (colourStr, (f"{diff // 3600:.0f}h " if diff >= 3600 else "") + f"{(diff % 3600) // 60:.0f}mn ago", colourEndStr)
return (colourStr, (f'{diff // 3600:.0f}h ' if diff >= 3600 else '') + f'{(diff % 3600) // 60:.0f}mn ago', colourEndStr)
else:
return (colourStr, f"{diff // 86400:.0f}d {(diff % 86400) // 3600:.0f}h ago", colourEndStr)
return (colourStr, f'{diff // 86400:.0f}d {(diff % 86400) // 3600:.0f}h ago', colourEndStr)

def render_size(size):
units = ('B', 'KiB', 'MiB', 'GiB', 'TiB')
@@ -238,12 +289,12 @@ def render_size(size):

renderers = {}
for column, (_, columnAttr) in columns.items():
if "date" in columnAttr:
if "coloured" in columnAttr:
if 'date' in columnAttr:
if 'coloured' in columnAttr:
renderers[column] = lambda x: render_date(x, coloured = not args.no_colours)
else:
renderers[column] = render_date
elif "size" in columnAttr:
elif 'size' in columnAttr:
renderers[column] = render_size
elif isinstance(jobs[0][column], (int, float)):
renderers[column] = str
@@ -286,9 +337,9 @@ if not args.no_table and not args.no_truncate:

# Print
output = []
output.append(tuple(column.upper() for column in columns if "hidden" not in columns[column][1]))
output.append(tuple(column.upper() for column in columns if 'hidden' not in columns[column][1]))
for job in jobs:
output.append(tuple(job[column] for column in columns if "hidden" not in columns[column][1]))
output.append(tuple(job[column] for column in columns if 'hidden' not in columns[column][1]))

if not args.no_table:
widths = tuple(max(len(field) if isinstance(field, str) else len(field[1]) for field in column) for column in zip(*output))


+ 2
- 2
github-list-repos View File

@@ -28,7 +28,7 @@ for user in users:
page = 1
while True:
for m in re.finditer(r'<a itemprop="name codeRepository"\s(?:[^>]*\s)?data-hovercard-url="/([^/>"]+/[^/>"]+)/hovercard"', r.text):
print(m.group(1))
print(f'https://github.com/{m.group(1)}')
if '<a class="next_page"' not in r.text:
# End of pagination
break
@@ -39,7 +39,7 @@ for user in users:
r = get(f'https://github.com/{user}?tab=repositories')
while True:
for m in re.finditer(r'<a href="/([^/>"]+/[^/>"]+)" itemprop="name codeRepository"(\s[^>]*)?>', r.text):
print(m.group(1))
print(f'https://github.com/{m.group(1)}')
if not (m := re.search(r'<a\s(?:[^>]*\s)?href="https://github\.com/[^/?"]+\?after=([^&]+)&amp;tab=repositories"(?:\s[^>]*)?>', r.text)):
# End of pagination
break


+ 40
- 0
gitlab.com-list-repos View File

@@ -0,0 +1,40 @@
#!/bin/bash
set -euo pipefail

if [[ $# -ne 1 || "$1" == '-h' || "$1" == '--help' ]]
then
echo "Usage: $0 USER_OR_GROUP_NAME" >&2
exit 1
fi
name="$1"

scriptpath="$(cd "$(dirname "$0")"; pwd -P)"

page="$("${scriptpath}/curl-ua" firefox -s "https://gitlab.com/${name}")"
if grep -q -F '<a data-target="div#activity" data-action="activity" data-toggle="tab" href="' <<<"${page}"
then
echo "User" >&2
url="https://gitlab.com/api/v4/users/${name}/projects?per_page=100"
elif grep -q -F '<li class="home active"><a title="Group details" href="' <<<"${page}"
then
echo "Group" >&2
url="https://gitlab.com/api/v4/groups/${name}/projects?per_page=100&include_subgroups=true"
else
echo "Error: unknown page type" >&2
exit 1
fi

{
pipe=$(mktemp -u); mkfifo "${pipe}"; exec 3<>"${pipe}"; rm "${pipe}"; unset pipe # For the disgusting HTTP header treatment
"${scriptpath}/curl-ua" firefox -sv "${url}" 2> >(grep '^< x-total-pages: ' | sed 's,^.*: ,,' | tr -d '\r' >&3)
declare -i nPages=$(head -1 <&3)
if [[ ${nPages} -ge 2 ]]
then
for ((page=2; page<=${nPages}; ++page))
do
echo -n $'\r'"Requesting page ${page} of ${nPages}" >&2
"${scriptpath}/curl-ua" firefox -s "${url}&page=${page}"
done
echo >&2
fi
} | grep -Po '"path_with_namespace":\s*"\K[^"]+' | sed 's,^,https://gitlab.com/,'

Loading…
Cancel
Save