Browse Source

Filter out lines without an attribute value

master
JustAnotherArchivist 1 year ago
parent
commit
c9bf3a93af
1 changed files with 3 additions and 0 deletions
  1. +3
    -0
      html-extract-stupid

+ 3
- 0
html-extract-stupid View File

@@ -31,6 +31,9 @@
# Remove quotes from attribute values
perl -pe "s,^([a-zA-Z]+) (['\"])(.*)\2$,\1 \3," |

# Filter out lines without an attribute value
grep -Pv '^[a-zA-Z]+ $' |

# img srcset splitting
python3 -c 'import re, sys'$'\n''for l in map(str.strip, sys.stdin):'$'\n'' try:'$'\n'' tag, value = l.split(" ", 1)'$'\n'' tag = tag.lower()'$'\n'' if tag != "imgsrcset":'$'\n'' print(l); continue'$'\n'' for url in re.split(r"\s+\d+[wx]\s*(?:,\s*|$)|,\s+", value.strip()):'$'\n'' if url: print(f"img {url}")'$'\n'' except BrokenPipeError: break' |



Loading…
Cancel
Save