Browse Source

Add URL/percent decoding tool

urldecode.c is entirely written by OrIdow6 except for one bug fix (char → uint8_t in the mallocs) and whitespace changes. The test suite is by JAA.

Co-authored-by: OrIdow6 <68304414+OrIdow6@users.noreply.github.com>
master
JustAnotherArchivist 2 years ago
parent
commit
bf5e065a0f
3 changed files with 157 additions and 0 deletions
  1. +42
    -0
      .urldecode-test
  2. +1
    -0
      urldecode
  3. +114
    -0
      urldecode.c

+ 42
- 0
.urldecode-test View File

@@ -0,0 +1,42 @@
#!/usr/bin/python3
import io
import itertools
import subprocess


def test(input, output):
p = subprocess.Popen(['./.make-and-exec-binaries/urldecode'], text = False, stdin = subprocess.PIPE, stdout = subprocess.PIPE)
stdout, stderr = p.communicate(input)
assert not stderr
if stdout != output:
for i in range(0, len(stdout), 3):
print(repr(stdout[i : i+3]))
for i in range(0, len(output), 3):
print(repr(output[i : i+3]))
assert stdout == output


BUFFER_SIZE = 1024 * 300


test(b'', b'')
test(b'a', b'a')
test(b'%', b'%')
test(b'%2', b'%2')
test(b'%20', b' ')
test(b'%gg', b'%gg')
#test(b'%\xc3\xa4', b'%\xc3\xa4')
for i in range(256):
test(f'%{i:02X}'.encode('ascii'), bytes.fromhex(f'{i:02x}'))
test(f'%{i:02x}'.encode('ascii'), bytes.fromhex(f'{i:02x}'))
hexbytes = tuple(f'{i:02x}' for i in range(256)) # ('00', '01', .., 'ff')
data = bytes.fromhex(''.join(map(''.join, filter(lambda x: any(c < '30' or '39' < c < '41' or '46' < c < '61' or '66' < c for c in x[1:]), itertools.product(('25',), hexbytes, hexbytes)))))
test(data, data)
for offset in range(-3, 1):
test(b'a' * (BUFFER_SIZE + offset) + b'%25', b'a' * (BUFFER_SIZE + offset) + b'%')
test(b'a' * (BUFFER_SIZE + offset) + b'%gg', b'a' * (BUFFER_SIZE + offset) + b'%gg')
test(b'a' * (BUFFER_SIZE + offset) + b'%', b'a' * (BUFFER_SIZE + offset) + b'%')
test(b'a' * (BUFFER_SIZE + offset) + b'%2', b'a' * (BUFFER_SIZE + offset) + b'%2')
test(b'a' * (BUFFER_SIZE + offset) + b'%g', b'a' * (BUFFER_SIZE + offset) + b'%g')
for i in range(10):
test(b'%' * i + b'%20', b'%' * i + b' ')

+ 1
- 0
urldecode View File

@@ -0,0 +1 @@
.make-and-exec

+ 114
- 0
urldecode.c View File

@@ -0,0 +1,114 @@
/*
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

// OrIdow6, late November 2021


// stdin - a urlencoded string
// stdout - the decoded string

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

#define BUFFER_SIZE 1024 * 300


int main(int argc, char** argv) {
uint8_t* inbuf = malloc(sizeof(uint8_t) * BUFFER_SIZE);
uint8_t* outbuf = malloc(sizeof(uint8_t) * BUFFER_SIZE + 2);
size_t outp = 0;
int state = -1;
int8_t digita = 0x0;
int8_t digitb = 0x0;
uint8_t digita_real;
uint8_t digitb_real;

// https://stackoverflow.com/questions/10324/convert-a-hexadecimal-string-to-an-integer-efficiently-in-c
// Because I can't be bothered to generate this manually
// Could be faster if you had a table for 16-bit integers, depending on caching
static const int8_t hextable[] = {
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1, 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,-1,10,11,12,13,14,15,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
};

while (1) {
size_t readS = fread(inbuf, sizeof(char), BUFFER_SIZE, stdin);
if (readS == 0) {
break;
}
for (size_t p = 0; p < readS; p++) {
if (state == -1) {
if (inbuf[p] != '%') {
outbuf[outp++] = inbuf[p];
} else {
state = 0;
}
} else if (state == 0) {
digita = hextable[inbuf[p]];
if (digita == (int8_t)-1 ) {
outbuf[outp++] = '%';
if (inbuf[p] != '%') {
outbuf[outp++] = inbuf[p];
state = -1;
} // else state remains 0
continue;
}
digita_real = inbuf[p];
state = 1;
} else {
digitb = hextable[inbuf[p]];
if (digitb == (int8_t)-1 ) {
digitb_real = inbuf[p];
outbuf[outp++] = '%';
outbuf[outp++] = digita_real;
if (inbuf[p] != '%') {
outbuf[outp++] = digitb_real;
state = -1;
} else {
state = 0;
}
continue;
}
outbuf[outp++] = digita << 4 | digitb;
state = -1;
}
}

fwrite(outbuf, outp, 1, stdout);
outp = 0;

if (readS < BUFFER_SIZE) {
break;
}
}

if (state == 0 || state == 1) {
fwrite("%", 1, 1, stdout);
}
if (state == 1) {
fwrite(&digita_real, 1, 1, stdout);
}
}

Loading…
Cancel
Save