diff --git a/.urldecode-test b/.urldecode-test new file mode 100755 index 0000000..2eb5e9b --- /dev/null +++ b/.urldecode-test @@ -0,0 +1,42 @@ +#!/usr/bin/python3 +import io +import itertools +import subprocess + + +def test(input, output): + p = subprocess.Popen(['./.make-and-exec-binaries/urldecode'], text = False, stdin = subprocess.PIPE, stdout = subprocess.PIPE) + stdout, stderr = p.communicate(input) + assert not stderr + if stdout != output: + for i in range(0, len(stdout), 3): + print(repr(stdout[i : i+3])) + for i in range(0, len(output), 3): + print(repr(output[i : i+3])) + assert stdout == output + + +BUFFER_SIZE = 1024 * 300 + + +test(b'', b'') +test(b'a', b'a') +test(b'%', b'%') +test(b'%2', b'%2') +test(b'%20', b' ') +test(b'%gg', b'%gg') +#test(b'%\xc3\xa4', b'%\xc3\xa4') +for i in range(256): + test(f'%{i:02X}'.encode('ascii'), bytes.fromhex(f'{i:02x}')) + test(f'%{i:02x}'.encode('ascii'), bytes.fromhex(f'{i:02x}')) +hexbytes = tuple(f'{i:02x}' for i in range(256)) # ('00', '01', .., 'ff') +data = bytes.fromhex(''.join(map(''.join, filter(lambda x: any(c < '30' or '39' < c < '41' or '46' < c < '61' or '66' < c for c in x[1:]), itertools.product(('25',), hexbytes, hexbytes))))) +test(data, data) +for offset in range(-3, 1): + test(b'a' * (BUFFER_SIZE + offset) + b'%25', b'a' * (BUFFER_SIZE + offset) + b'%') + test(b'a' * (BUFFER_SIZE + offset) + b'%gg', b'a' * (BUFFER_SIZE + offset) + b'%gg') + test(b'a' * (BUFFER_SIZE + offset) + b'%', b'a' * (BUFFER_SIZE + offset) + b'%') + test(b'a' * (BUFFER_SIZE + offset) + b'%2', b'a' * (BUFFER_SIZE + offset) + b'%2') + test(b'a' * (BUFFER_SIZE + offset) + b'%g', b'a' * (BUFFER_SIZE + offset) + b'%g') +for i in range(10): + test(b'%' * i + b'%20', b'%' * i + b' ') diff --git a/urldecode b/urldecode new file mode 120000 index 0000000..4c68fa7 --- /dev/null +++ b/urldecode @@ -0,0 +1 @@ +.make-and-exec \ No newline at end of file diff --git a/urldecode.c b/urldecode.c new file mode 100644 index 0000000..9aa0be9 --- /dev/null +++ b/urldecode.c @@ -0,0 +1,114 @@ +/* + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// OrIdow6, late November 2021 + + +// stdin - a urlencoded string +// stdout - the decoded string + +#define _GNU_SOURCE +#include +#include +#include + +#define BUFFER_SIZE 1024 * 300 + + +int main(int argc, char** argv) { + uint8_t* inbuf = malloc(sizeof(uint8_t) * BUFFER_SIZE); + uint8_t* outbuf = malloc(sizeof(uint8_t) * BUFFER_SIZE + 2); + size_t outp = 0; + int state = -1; + int8_t digita = 0x0; + int8_t digitb = 0x0; + uint8_t digita_real; + uint8_t digitb_real; + + // https://stackoverflow.com/questions/10324/convert-a-hexadecimal-string-to-an-integer-efficiently-in-c + // Because I can't be bothered to generate this manually + // Could be faster if you had a table for 16-bit integers, depending on caching + static const int8_t hextable[] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + while (1) { + size_t readS = fread(inbuf, sizeof(char), BUFFER_SIZE, stdin); + if (readS == 0) { + break; + } + for (size_t p = 0; p < readS; p++) { + if (state == -1) { + if (inbuf[p] != '%') { + outbuf[outp++] = inbuf[p]; + } else { + state = 0; + } + } else if (state == 0) { + digita = hextable[inbuf[p]]; + if (digita == (int8_t)-1 ) { + outbuf[outp++] = '%'; + if (inbuf[p] != '%') { + outbuf[outp++] = inbuf[p]; + state = -1; + } // else state remains 0 + continue; + } + digita_real = inbuf[p]; + state = 1; + } else { + digitb = hextable[inbuf[p]]; + if (digitb == (int8_t)-1 ) { + digitb_real = inbuf[p]; + outbuf[outp++] = '%'; + outbuf[outp++] = digita_real; + if (inbuf[p] != '%') { + outbuf[outp++] = digitb_real; + state = -1; + } else { + state = 0; + } + continue; + } + outbuf[outp++] = digita << 4 | digitb; + state = -1; + } + } + + fwrite(outbuf, outp, 1, stdout); + outp = 0; + + if (readS < BUFFER_SIZE) { + break; + } + } + + if (state == 0 || state == 1) { + fwrite("%", 1, 1, stdout); + } + if (state == 1) { + fwrite(&digita_real, 1, 1, stdout); + } +}