Browse Source

Add ia-files-xml-to-jsonl

master
JustAnotherArchivist 2 years ago
parent
commit
0198b221a8
1 changed files with 17 additions and 0 deletions
  1. +17
    -0
      ia-files-xml-to-jsonl

+ 17
- 0
ia-files-xml-to-jsonl View File

@@ -0,0 +1,17 @@
#!/usr/bin/env python3
import json
import sys
import xml.etree.ElementTree


root = xml.etree.ElementTree.fromstring(sys.stdin.read())
assert root.tag == 'files'
for file in root:
assert file.tag == 'file'
attributes = file.attrib
childrenTags = [child.tag for child in file]
assert sorted(childrenTags) == sorted(set(childrenTags)), 'duplicate children'
children = {child.tag: child.text for child in file}
assert not any(k in children for k in attributes), 'attribute found in children'
assert not any(k in attributes for k in children), 'child found in attributes'
print(json.dumps({**attributes, **children}))

Loading…
Cancel
Save