|
- #!/usr/bin/env python3
- import datetime
- import json
- import os
- import re
- import requests
- import sys
- import time
-
-
- APP_VERSION_PATTERN = re.compile(r"appVersion\s*:\s*'([0-9.]+)'")
-
-
- def log(msg):
- print(f'{datetime.datetime.utcnow().isoformat()}Z {msg}', file = sys.stderr)
-
-
- def new_session():
- session = requests.Session()
- session.headers.update({
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0',
- 'Accept': 'application/json',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'DNT': '1',
- })
- session.__JAADockerHubScriptLastReqTime = 0 # ¯\_(ツ)_/¯
- return session
-
-
- def fetch(session, url, **kwargs):
- now = time.time()
- if session.__JAADockerHubScriptLastReqTime > now - 0.5:
- time.sleep(now + 0.5 - session.__JAADockerHubScriptLastReqTime)
- session.__JAADockerHubScriptLastReqTime = now
- log(f'Fetching {url}')
- r = session.get(url, **kwargs)
- if not r or r.status_code != 200:
- raise RuntimeError(f'Failed to fetch {url}')
- return r
-
-
- def retrieve(profile):
- assert '/' not in profile, f'profile name contains slashes: {profile!r}'
- log(f'Retrieving Docker Hub user {profile}')
-
- session = new_session()
-
- os.mkdir(profile)
-
- # Fetch profile page to get app version
- #r = fetch(session, f'https://hub.docker.com/u/{profile}')
- #m = APP_VERSION_PATTERN.search(r.text)
- #if not m:
- # raise RuntimeError('Failed to extract app version')
- #session.headers.update({'X-DOCKER-API-CLIENT': f'docker-hub/{m.group(1)}'})
- ## ONLY API REQUESTS FROM THIS POINT
-
- # Get user or org info
- r = fetch(session, f'https://hub.docker.com/v2/users/{profile}/')
- with open(f'{profile}/user.json', 'w') as fp:
- json.dump(r.json(), fp, indent = '\t')
-
- # Get repository list
- repositories = []
- nextUrl = f'https://hub.docker.com/v2/repositories/{profile}/?page_size=100&page=1&ordering=last_updated'
- while True:
- r = fetch(session, nextUrl)
- o = r.json()
- repositories.extend(o['results'])
- if o['next'] is None:
- break
- nextUrl = o['next']
- with open(f'{profile}/repositories.json', 'w') as fp:
- json.dump(repositories, fp, indent = '\t')
-
- os.mkdir(f'{profile}/repositories')
-
- for repository in repositories:
- namespace, name = repository['namespace'], repository['name']
- assert '/' not in namespace and '/' not in name, f'namespace and/or name contain slashes: {namespace!r}, {name!r}'
- os.mkdir(f'{profile}/repositories/{name}')
-
- # Get general repo info (more detailed than the list above)
- r = fetch(session, f'https://hub.docker.com/v2/repositories/{namespace}/{name}/')
- with open(f'{profile}/repositories/{name}/info.json', 'w') as fp:
- json.dump(r.json(), fp, indent = '\t')
-
- # Get Dockerfile (if non-empty)
- r = fetch(session, f'https://hub.docker.com/v2/repositories/{namespace}/{name}/dockerfile/')
- o = r.json()
- if o['contents']:
- with open(f'{profile}/repositories/{name}/Dockerfile', 'w') as fp:
- fp.write(o['contents'])
-
- # Get source info
- sourceObjects = []
- r = fetch(session, 'https://hub.docker.com/api/build/v1/source/', params = {'image': f'{namespace}/{name}'})
- while True:
- o = r.json()
- sourceObjects.extend(o['objects'])
- if o['meta']['next'] is None:
- break
- #TODO: Find an example that uses this
- r = fetch(session, o['meta']['next'])
- with open(f'{profile}/repositories/{name}/sources.json', 'w') as fp:
- json.dump(sourceObjects, fp, indent = '\t')
-
- # Get tags
- tags = []
- nextUrl = f'https://hub.docker.com/v2/repositories/{namespace}/{name}/tags/?page_size=100&page=1&ordering=last_updated'
- while True:
- r = fetch(session, nextUrl)
- o = r.json()
- tags.extend(o['results'])
- if o['next'] is None:
- break
- nextUrl = o['next']
- with open(f'{profile}/repositories/{name}/tags.json', 'w') as fp:
- json.dump(tags, fp, indent = '\t')
-
- # Get data for each tag
- os.mkdir(f'{profile}/repositories/{name}/tags')
-
- for tag in tags:
- tagname = tag['name']
- assert '/' not in tagname, f'tag contains slashes: {tagname!r}'
-
- r = fetch(session, f'https://hub.docker.com/v2/repositories/{namespace}/{name}/tags/{tagname}/')
- with open(f'{profile}/repositories/{name}/tags/{tagname}.info.json', 'w') as fp:
- json.dump(r.json(), fp, indent = '\t')
-
- r = fetch(session, f'https://hub.docker.com/v2/repositories/{namespace}/{name}/tags/{tagname}/images')
- with open(f'{profile}/repositories/{name}/tags/{tagname}.images.json', 'w') as fp:
- json.dump(r.json(), fp, indent = '\t')
- log(f'Done with {profile}')
-
-
- def main():
- readStdin = False
- for profile in sys.argv[1:]:
- if profile == '-':
- readStdin = True
- continue
- retrieve(profile)
- if readStdin:
- for line in sys.stdin:
- retrieve(line.strip())
-
-
- if __name__ == '__main__':
- main()
|