6 コミット

作成者 SHA1 メッセージ 日付
  JustAnotherArchivist 5486b6a224 Add --grafana-eta 3年前
  JustAnotherArchivist 885001aab6 Fix --no-table help text (leftover from pre-pure-Python) 3年前
  JustAnotherArchivist fd8d502266 Nit: replace double quotes with single quotes 3年前
  JustAnotherArchivist ff096bf8dd Fix display of zero timestamps 3年前
  JustAnotherArchivist db3e79b9c9 Print repository URLs instead of names 3年前
  JustAnotherArchivist 22744fe908 Add script for listing repos of a user or group on GitLab.com 3年前
3個のファイルの変更135行の追加44行の削除
分割表示
  1. +93
    -42
      archivebot-jobs
  2. +2
    -2
      github-list-repos
  3. +40
    -0
      gitlab.com-list-repos

+ 93
- 42
archivebot-jobs ファイルの表示

@@ -12,21 +12,22 @@ import urllib.request

# Column definitions
columns = {
'jobid': (lambda job, pipelines: job["job_data"]["ident"], ()),
'url': (lambda job, pipelines: job["job_data"]["url"], ('truncatable',)),
'user': (lambda job, pipelines: job["job_data"]["started_by"], ()),
'pipenick': (lambda job, pipelines: pipelines[job["job_data"]["pipeline_id"]] if job["job_data"]["pipeline_id"] in pipelines else "unknown", ()),
'queued': (lambda job, pipelines: job["job_data"]["queued_at"], ('date', 'numeric')),
'started': (lambda job, pipelines: job["job_data"]["started_at"], ('date', 'numeric')),
'last active': (lambda job, pipelines: int(job["ts"]), ('date', 'coloured', 'numeric')),
'dl urls': (lambda job, pipelines: job["job_data"]["items_downloaded"], ('numeric',)),
'dl size': (lambda job, pipelines: job["job_data"]["bytes_downloaded"], ('size', 'numeric')),
'queue': (lambda job, pipelines: job["job_data"]["items_queued"] - job["job_data"]["items_downloaded"], ('numeric',)),
'eta': (lambda job, pipelines: int((curTime := time.time()) + (job["job_data"]["items_queued"] - job["job_data"]["items_downloaded"]) / (job["job_data"]["items_downloaded"] / (curTime - job["job_data"]["started_at"]))) if job["job_data"]["items_downloaded"] > 0 else 0, ('date', 'numeric')),
'con': (lambda job, pipelines: job["job_data"]["concurrency"], ('numeric',)),
'delay min': (lambda job, pipelines: int(job["job_data"]["delay_min"]), ('hidden', 'numeric')),
'delay max': (lambda job, pipelines: int(job["job_data"]["delay_max"]), ('hidden', 'numeric')),
'delay': (lambda job, pipelines: str(int(job["job_data"]["delay_min"])) + '-' + str(int(job["job_data"]["delay_max"])) if job["job_data"]["delay_min"] != job["job_data"]["delay_max"] else str(int(job["job_data"]["delay_min"])), ()),
'jobid': (lambda job, pipelines: job['job_data']['ident'], ()),
'url': (lambda job, pipelines: job['job_data']['url'], ('truncatable',)),
'user': (lambda job, pipelines: job['job_data']['started_by'], ()),
'pipenick': (lambda job, pipelines: pipelines[job['job_data']['pipeline_id']] if job['job_data']['pipeline_id'] in pipelines else 'unknown', ()),
'queued': (lambda job, pipelines: job['job_data']['queued_at'], ('date', 'numeric')),
'started': (lambda job, pipelines: job['job_data']['started_at'], ('date', 'numeric')),
'last active': (lambda job, pipelines: int(job['ts']), ('date', 'coloured', 'numeric')),
'dl urls': (lambda job, pipelines: job['job_data']['items_downloaded'], ('numeric',)),
'dl size': (lambda job, pipelines: job['job_data']['bytes_downloaded'], ('size', 'numeric')),
'queue': (lambda job, pipelines: job['job_data']['items_queued'] - job['job_data']['items_downloaded'], ('numeric',)),
'eta': (lambda job, pipelines: int((curTime := time.time()) + (job['job_data']['items_queued'] - job['job_data']['items_downloaded']) / (job['job_data']['items_downloaded'] / (curTime - job['job_data']['started_at']))) if job['job_data']['items_downloaded'] > 0 else 0, ('date', 'numeric')),
# Overwritten below if --grafana-eta is given
'con': (lambda job, pipelines: job['job_data']['concurrency'], ('numeric',)),
'delay min': (lambda job, pipelines: int(job['job_data']['delay_min']), ('hidden', 'numeric')),
'delay max': (lambda job, pipelines: int(job['job_data']['delay_max']), ('hidden', 'numeric')),
'delay': (lambda job, pipelines: str(int(job['job_data']['delay_min'])) + '-' + str(int(job['job_data']['delay_max'])) if job['job_data']['delay_min'] != job['job_data']['delay_max'] else str(int(job['job_data']['delay_min'])), ()),
}
defaultSort = 'jobid'

@@ -38,13 +39,13 @@ if any('truncatable' in colDef[1] and any(x in colDef[1] for x in ('date', 'colo
# Filter function
def make_field_filter(column, op, value, caseSensitive = True):
compFunc = {
"=": lambda a, b: a == b,
"<": lambda a, b: a < b,
">": lambda a, b: a > b,
"^": lambda a, b: a.startswith(b),
"*": lambda a, b: b in a,
"$": lambda a, b: a.endswith(b),
"~": lambda a, b: re.search(b, a) is not None,
'=': lambda a, b: a == b,
'<': lambda a, b: a < b,
'>': lambda a, b: a > b,
'^': lambda a, b: a.startswith(b),
'*': lambda a, b: b in a,
'$': lambda a, b: a.endswith(b),
'~': lambda a, b: re.search(b, a) is not None,
}[op]
transform = {
True: (lambda x: x),
@@ -64,12 +65,12 @@ class FilterAction(argparse.Action):
setattr(namespace, self.dest, lambda job: eval(func, {}, {'job': job}))
return
global columns
match = re.match(r"^(?P<column>[A-Za-z ]+)(?P<op>[=<>^*$~])(?P<value>.*)$", values[0])
match = re.match(r'^(?P<column>[A-Za-z ]+)(?P<op>[=<>^*$~])(?P<value>.*)$', values[0])
if not match:
parser.error('Invalid filter')
filterDict = match.groupdict()
filterDict["column"] = filterDict["column"].lower()
assert filterDict["column"] in columns
filterDict['column'] = filterDict['column'].lower()
assert filterDict['column'] in columns
if 'numeric' in columns[filterDict['column']][1]:
filterDict['value'] = float(filterDict['value'])
if 'date' in columns[filterDict['column']][1] and filterDict['value'] < 0:
@@ -115,9 +116,10 @@ parser.add_argument('--mode', choices = ('table', 'dashboard-regex', 'con-d-comm
' atdash: print the URL for displaying the matched jobs on atdash',
]))
parser.add_argument('--no-colours', '--no-colors', action = 'store_true', help = "Don't colourise the last activity column if it's been a while. (Table mode only)")
parser.add_argument('--no-table', action = 'store_true', help = 'Raw output without feeding through column(1); columns are separated by tabs. (Table mode only)')
parser.add_argument('--no-table', action = 'store_true', help = 'Raw non-columnised output; columns are separated by tabs. (Table mode only)')
parser.add_argument('--no-truncate', action = 'store_true', help = 'Disable truncating long values if the terminal width would be exceeded. (Table mode without --no-table only)')
parser.add_argument('--dates', action = 'store_true', help = 'Print dates instead of elapsed times for queued/started/last active columns. (Table mode only)')
parser.add_argument('--grafana-eta', action = 'store_true', help = 'Enable fetching data from Grafana for a better ETA on long-running jobs. (Table mode only)')
parser.add_argument('--replace-concurrency', nargs = 1, metavar = 'CON', type = int, help = 'Replace the delay values with the specified ones. (con-d-commands mode only)')
parser.add_argument('--replace-delay', nargs = 2, metavar = ('MIN', 'MAX'), type = int, help = 'Replace the delay values with the specified ones. (con-d-commands mode only)')
parser.add_argument('--format', help = 'Output format for the format mode; this must be a Python format string and can use any column name in lower-case with spaces replaced by underscores; e.g. "{url} {last_active}". (Format mode only)')
@@ -151,7 +153,7 @@ pipelinedata = fetch('http://dashboard.at.ninjawedding.org/pipelines')
currentTime = time.time()

# Process
pipelines = {p["id"]: p["nickname"] for p in pipelinedata["pipelines"]}
pipelines = {p['id']: p['nickname'] for p in pipelinedata['pipelines']}

jobs = []
for job in jobdata:
@@ -168,6 +170,53 @@ if args.filter:
if not jobs:
sys.exit(0)

# Retrieve Grafana ETA if appropriate
if args.grafana_eta and args.mode == 'table':
def populate_grafana_eta(jobs):
idents = {job['jobid'] for job in jobs}
if not idents:
return

# Request
for i, timeFilter in enumerate(('time>=now()-10m', 'time>=now()-24h-10m AND time<=now()-24h+10m')):
req = urllib.request.Request('https://atdash.meo.ws/api/datasources/proxy/1/query?db=ateam&epoch=s')
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
query = 'SELECT mean("items_queued")-mean("items_downloaded") FROM "grabsite" WHERE ('
query += ' OR '.join(f""""ident"='{job["jobid"]}'""" for job in jobs)
query += ')'
query += f' AND {timeFilter}'
query += ' GROUP BY time(1m), * fill(none)'
query = f'q={urllib.parse.quote(query)}'
req.data = query.encode('utf-8')
with urllib.request.urlopen(req) as f:
if f.getcode() != 200:
raise RuntimeError('Could not fetch Grafana data')
if i == 0:
dataNow = json.load(f)
else:
data24hAgo = json.load(f)

# Restructure data
dataNow = {x['tags']['ident']: x['values'][-1] for x in dataNow['results'][0]['series']}
data24hAgo = {x['tags']['ident']: x['values'][len(x['values']) // 2] for x in data24hAgo['results'][0]['series']}

# Calculate ETA
for job in jobs:
if job['jobid'] not in dataNow or job['jobid'] not in data24hAgo: # Job not started yet 24 hours ago or no datapoint for whatever reason
job['eta'] = 0
continue
nowTs, nowTodo = dataNow[job['jobid']]
prevTs, prevTodo = data24hAgo[job['jobid']]
if nowTodo < 0 or prevTodo < 0: # Negative queue size due to AB's buggy queue counting
job['eta'] = 0
continue
if nowTodo >= prevTodo: # Queue hasn't shrunk
job['eta'] = 0
continue
job['eta'] = nowTs + nowTodo / ((prevTodo - nowTodo) / (nowTs - prevTs))

populate_grafana_eta(jobs)

# Sort
class reversor: # https://stackoverflow.com/a/56842689
def __init__(self, obj):
@@ -210,24 +259,26 @@ elif args.mode == 'atdash':
def render_date(ts, coloured = False):
global args, currentTime
diff = currentTime - ts
colourStr = f"\x1b[{0 if diff < 6 * 3600 else 7};31m" if coloured and diff >= 300 else ""
colourEndStr = "\x1b[0m" if colourStr else ""
colourStr = f'\x1b[{0 if diff < 6 * 3600 else 7};31m' if coloured and diff >= 300 else ''
colourEndStr = '\x1b[0m' if colourStr else ''
if ts == 0:
return 'N/A'
if args.dates:
return (colourStr, datetime.datetime.fromtimestamp(ts).isoformat(sep = " "), colourEndStr)
return (colourStr, datetime.datetime.fromtimestamp(ts).isoformat(sep = ' '), colourEndStr)
if diff < -86400:
return (colourStr, f"in {-diff // 86400:.0f}d {(-diff % 86400) // 3600:.0f}h", colourEndStr)
return (colourStr, f'in {-diff // 86400:.0f}d {(-diff % 86400) // 3600:.0f}h', colourEndStr)
elif diff < -60:
return (colourStr, "in " + (f"{-diff // 3600:.0f}h " if diff <= -3600 else "") + f"{(-diff % 3600) // 60:.0f}mn", colourEndStr)
return (colourStr, 'in ' + (f'{-diff // 3600:.0f}h ' if diff <= -3600 else '') + f'{(-diff % 3600) // 60:.0f}mn', colourEndStr)
elif diff < 0:
return "in <1 min"
return 'in <1 min'
elif diff == 0:
return "now"
return 'now'
elif diff < 60:
return "<1 min ago"
return '<1 min ago'
elif diff < 86400:
return (colourStr, (f"{diff // 3600:.0f}h " if diff >= 3600 else "") + f"{(diff % 3600) // 60:.0f}mn ago", colourEndStr)
return (colourStr, (f'{diff // 3600:.0f}h ' if diff >= 3600 else '') + f'{(diff % 3600) // 60:.0f}mn ago', colourEndStr)
else:
return (colourStr, f"{diff // 86400:.0f}d {(diff % 86400) // 3600:.0f}h ago", colourEndStr)
return (colourStr, f'{diff // 86400:.0f}d {(diff % 86400) // 3600:.0f}h ago', colourEndStr)

def render_size(size):
units = ('B', 'KiB', 'MiB', 'GiB', 'TiB')
@@ -238,12 +289,12 @@ def render_size(size):

renderers = {}
for column, (_, columnAttr) in columns.items():
if "date" in columnAttr:
if "coloured" in columnAttr:
if 'date' in columnAttr:
if 'coloured' in columnAttr:
renderers[column] = lambda x: render_date(x, coloured = not args.no_colours)
else:
renderers[column] = render_date
elif "size" in columnAttr:
elif 'size' in columnAttr:
renderers[column] = render_size
elif isinstance(jobs[0][column], (int, float)):
renderers[column] = str
@@ -286,9 +337,9 @@ if not args.no_table and not args.no_truncate:

# Print
output = []
output.append(tuple(column.upper() for column in columns if "hidden" not in columns[column][1]))
output.append(tuple(column.upper() for column in columns if 'hidden' not in columns[column][1]))
for job in jobs:
output.append(tuple(job[column] for column in columns if "hidden" not in columns[column][1]))
output.append(tuple(job[column] for column in columns if 'hidden' not in columns[column][1]))

if not args.no_table:
widths = tuple(max(len(field) if isinstance(field, str) else len(field[1]) for field in column) for column in zip(*output))


+ 2
- 2
github-list-repos ファイルの表示

@@ -28,7 +28,7 @@ for user in users:
page = 1
while True:
for m in re.finditer(r'<a itemprop="name codeRepository"\s(?:[^>]*\s)?data-hovercard-url="/([^/>"]+/[^/>"]+)/hovercard"', r.text):
print(m.group(1))
print(f'https://github.com/{m.group(1)}')
if '<a class="next_page"' not in r.text:
# End of pagination
break
@@ -39,7 +39,7 @@ for user in users:
r = get(f'https://github.com/{user}?tab=repositories')
while True:
for m in re.finditer(r'<a href="/([^/>"]+/[^/>"]+)" itemprop="name codeRepository"(\s[^>]*)?>', r.text):
print(m.group(1))
print(f'https://github.com/{m.group(1)}')
if not (m := re.search(r'<a\s(?:[^>]*\s)?href="https://github\.com/[^/?"]+\?after=([^&]+)&amp;tab=repositories"(?:\s[^>]*)?>', r.text)):
# End of pagination
break


+ 40
- 0
gitlab.com-list-repos ファイルの表示

@@ -0,0 +1,40 @@
#!/bin/bash
set -euo pipefail

if [[ $# -ne 1 || "$1" == '-h' || "$1" == '--help' ]]
then
echo "Usage: $0 USER_OR_GROUP_NAME" >&2
exit 1
fi
name="$1"

scriptpath="$(cd "$(dirname "$0")"; pwd -P)"

page="$("${scriptpath}/curl-ua" firefox -s "https://gitlab.com/${name}")"
if grep -q -F '<a data-target="div#activity" data-action="activity" data-toggle="tab" href="' <<<"${page}"
then
echo "User" >&2
url="https://gitlab.com/api/v4/users/${name}/projects?per_page=100"
elif grep -q -F '<li class="home active"><a title="Group details" href="' <<<"${page}"
then
echo "Group" >&2
url="https://gitlab.com/api/v4/groups/${name}/projects?per_page=100&include_subgroups=true"
else
echo "Error: unknown page type" >&2
exit 1
fi

{
pipe=$(mktemp -u); mkfifo "${pipe}"; exec 3<>"${pipe}"; rm "${pipe}"; unset pipe # For the disgusting HTTP header treatment
"${scriptpath}/curl-ua" firefox -sv "${url}" 2> >(grep '^< x-total-pages: ' | sed 's,^.*: ,,' | tr -d '\r' >&3)
declare -i nPages=$(head -1 <&3)
if [[ ${nPages} -ge 2 ]]
then
for ((page=2; page<=${nPages}; ++page))
do
echo -n $'\r'"Requesting page ${page} of ${nPages}" >&2
"${scriptpath}/curl-ua" firefox -s "${url}&page=${page}"
done
echo >&2
fi
} | grep -Po '"path_with_namespace":\s*"\K[^"]+' | sed 's,^,https://gitlab.com/,'

読み込み中…
キャンセル
保存