From 65254119d6cc4fb8f68f76d35fe05e792ba0a492 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 26 Dec 2017 23:10:51 +0000 Subject: [PATCH] Initial commit --- parse.py | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 parse.py diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..387077c --- /dev/null +++ b/parse.py @@ -0,0 +1,293 @@ +import enum +import functools +import operator +import re + + +class ParsingError(Exception): + pass + + +class EvaluationError(Exception): + pass + + +class StrEnum(enum.Enum): + def __repr__(self): + return str(self) + + +class Type(StrEnum): + group = 1 + brackets = 2 + + +class Modifier(StrEnum): + negate = 1 + plus = 2 + + +class JSBool: + def __init__(self, value): + self.value = bool(value) + + def __add__(self, other): + if isinstance(other, (JSBool, JSInt)): # bool + bool/int -> int (addition) + return JSInt(self) + JSInt(other) + elif isinstance(other, JSString): # bool + string -> string (concatenation) + return JSString(self) + other + else: + return NotImplemented + + def __sub__(self, other): + if isinstance(other, (JSBool, JSInt, JSString)): # bool - bool/int/string -> int (subtraction) + return JSInt(self) - JSInt(other) + else: + return NotImplemented + + def __mul__(self, other): + if isinstance(other, (JSBool, JSInt, JSString)): # bool * bool/int/string -> int (multiplication) + return JSInt(self) * JSInt(other) + else: + return NotImplemented + + def __eq__(self, other): + return isinstance(other, JSBool) and self.value == other.value + + def __bool__(self): + return self.value + + def __int__(self): + return int(self.value) + + def __str__(self): + return str(self.value).lower() # 'True' in Python is 'true' in JS + + def __repr__(self): + return 'JSBool({!r})'.format(self.value) + + +class JSInt: + def __init__(self, value): + self.value = int(value) + + def __add__(self, other): + if isinstance(other, JSInt): # int + int -> int (addition) + return JSInt(self.value + other.value) + elif isinstance(other, JSBool): # int + bool -> int (addition) + return self + JSInt(other) + elif isinstance(other, JSString): # int + string -> string (concatenation) + return JSString(self) + other + else: + return NotImplemented + + def __sub__(self, other): + if isinstance(other, JSInt): # int - int -> int (subtraction) + return JSInt(self.value - other.value) + elif isinstance(other, (JSBool, JSString)): # int - bool/string -> int (subtraction) + return self - JSInt(other) + else: + return NotImplemented + + def __mul__(self, other): + if isinstance(other, JSInt): # int * int -> int (multiplication) + return JSInt(self.value * other.value) + elif isinstance(other, (JSBool, JSString)): # int * bool/string -> int (multiplication) + return self * JSInt(other) + else: + return NotImplemented + + def __eq__(self, other): + return isinstance(other, JSInt) and self.value == other.value + + def __bool__(self): + return self.value != 0 # Any value other than zero is considered 'true' + + def __int__(self): + return self.value + + def __str__(self): + return str(self.value) + + def __repr__(self): + return 'JSInt({!r})'.format(self.value) + + +class JSString: + def __init__(self, value): + self.value = str(value) + + def __add__(self, other): + if isinstance(other, JSString): # string + string -> string (concatenation) + return JSString(self.value + other.value) + elif isinstance(other, (JSInt, JSBool)): # string + int/bool -> string (concatenation) + return self + JSString(other) + else: + return NotImplemented + + def __sub__(self, other): + if isinstance(other, (JSBool, JSInt, JSString)): # string - bool/int/string -> int (subtraction) + return JSInt(self) - JSInt(other) + else: + return NotImplemented + + def __mul__(self, other): + if isinstance(other, (JSBool, JSInt, JSString)): # string * bool/int/string -> int (multiplication) + return JSInt(self) * JSInt(other) + else: + return NotImplemented + + def __eq__(self, other): + return isinstance(other, JSString) and self.value == other.value + + def __bool__(self): + return self.value != '' # Any non-empty string is considered 'true' + + def __int__(self): + if self.value == '': + return 0 + return int(self.value) + + def __str__(self): + return self.value + + def __repr__(self): + return 'JSString({!r})'.format(self.value) + + +_itemModifierToResultMapping = { + (): JSString(''), + (Modifier.plus,): JSInt(0), + (Modifier.negate,): JSBool(False), + #(Modifier.plus, Modifier.plus): syntax error + (Modifier.plus, Modifier.negate): JSInt(0), + (Modifier.negate, Modifier.plus): JSBool(True), + (Modifier.negate, Modifier.negate): JSBool(True), + #(Modifier.plus, Modifier.plus, Modifier.plus): syntax error + #(Modifier.plus, Modifier.plus, Modifier.negate): syntax error + (Modifier.plus, Modifier.negate, Modifier.plus): JSInt(1), + (Modifier.plus, Modifier.negate, Modifier.negate): JSInt(1), + #(Modifier.negate, Modifier.plus, Modifier.plus): syntax error + (Modifier.negate, Modifier.plus, Modifier.negate): JSBool(True), + (Modifier.negate, Modifier.negate, Modifier.plus): JSBool(False), + (Modifier.negate, Modifier.negate, Modifier.negate): JSBool(False), +} + + +class Item: + def __init__(self, type, modifiers, values = None): + if type not in (Type.group, Type.brackets): + raise ValueError('type must be Type.group or Type.brackets') + iter(modifiers) # Test whether modifiers is an iterable, and let the potential TypeError bubble up + if not all(x in (Modifier.negate, Modifier.plus) for x in modifiers): + raise ValueError('modifiers must be an iterable that can only contain Modifier.negate or Modifier.plus') + if values is not None and type != Type.group: + raise ValueError('values can only be specified for group items') + if type == Type.group and values is None: + raise ValueError('values are required for group items') + self.type = type + self.modifiers = modifiers + self.values = values + + def evaluate(self, evaluateFunction = None): + if self.type == Type.group: + if evaluateFunction is None or not callable(evaluateFunction): + raise ValueError('must specify a callable evaluateFunction when evaluating a group item') + return evaluateFunction(self.values, self.modifiers) + else: + try: + return _itemModifierToResultMapping[tuple(self.modifiers)] + except KeyError: + raise EvaluationError('Unrecognised modifier pattern {!r}'.format(self.modifiers)) + + def __eq__(self, other): + return isinstance(other, Item) and self.type == other.type and self.modifiers == other.modifiers and self.values == other.values + + def __repr__(self): + return 'Item({!r}, {!r}{})'.format(self.type, self.modifiers, ', values = {!r}'.format(self.values) if self.values is not None else '') + + +def parse(s): + ''' + Parse expression s into a tree of Items. + + Argument: s (string), the expression to evaluate + + Returns: tree (list of Items) + ''' + + itemStack = {0: []} + modifierStack = {0: []} + currentItemStack = itemStack[0] + currentModifierStack = modifierStack[0] + stackLevel = 0 + finishedItem = False + pos = 0 + length = len(s) + while pos < length: + char = s[pos] + if char == '+': + if pos == 0 or not finishedItem: + finishedItem = False + currentModifierStack.append(Modifier.plus) + # else: addition, nothing to do + elif char == '!': + finishedItem = False + currentModifierStack.append(Modifier.negate) + elif char == '(': + finishedItem = False + stackLevel += 1 + currentItemStack = itemStack[stackLevel] = [] + currentModifierStack = modifierStack[stackLevel] = [] + elif char == ')': + if stackLevel == 0: + raise ParsingError('Encountered ) without matching (') + stackLevel -= 1 + currentItemStack = itemStack[stackLevel] + currentItemStack.append(Item(type = Type.group, modifiers = modifierStack[stackLevel], values = itemStack[stackLevel + 1])) + currentModifierStack = modifierStack[stackLevel] = [] + finishedItem = True + elif char == '[': + if s[pos + 1] != ']': + raise ParsingError('Invalid byte found at position {}; expected ] but got {}'.format(pos + 1, s[pos + 1])) + # End of modifier sequence + currentItemStack.append(Item(type = Type.brackets, modifiers = currentModifierStack)) + currentModifierStack = [] + pos += 1 # Skip over closing bracket + finishedItem = True + pos += 1 + return itemStack[0] + + +def evaluate(tree, modifiers = None): + t = map(lambda x: x.evaluate(evaluate), tree) + if len(tree) > 1: + result = functools.reduce(operator.add, t) # Concatenation or addition, but this is all handled in the JS* classes + else: + result = next(t) + if modifiers == [Modifier.plus]: + return JSInt(result) + return result + + +def crack(url, html): + m = re.search(r'setTimeout\(function\(\)\{\s+var\s+s,t,o,p,b,r,e,a,k,i,n,g,f,\s*(?P[a-zA-Z]+)=\{"(?P[a-zA-Z]+)":(?P[^}]+)\};' + + r'\s*t\s*=\s*document\.createElement\(\'div\'\);' + + r'\s*t\.innerHTML="x";' + + r'\s*t\s*=\s*t\.firstChild\.href;\s*r\s*=\s*t\.match\(/https\?:\\/\\//\)\[0\];' + + r'\s*t\s*=\s*t\.substr\(r\.length\);\s*t\s*=\s*t\.substr\(0,t\.length-1\);' + + r'\s*a\s*=\s*document\.getElementById\(\'jschl-answer\'\);' + + r'\s*f\s*=\s*document\.getElementById\(\'challenge-form\'\);' + + r'\s*;((?P=parent)\.(?P=child)\s*[*+-]=\s*[^;]+\s*;\s*)+a\.value\s*=\s*parseInt\((?P=parent)\.(?P=child),\s*10\)\s*\+\s*t\.length;\s*\';\s*121\'' + + r'\s*f\.action\s*\+=\s*location\.hash;' + + r'\s*f\.submit\(\);' + + r'\s*\},\s*4000\);', html) + if not m: + return None + d = m.groupdict() + operators = {'*': operator.mul, '+': operator.add, '-': operator.sub} + result = evaluate(parse(d['initialExpression'])) + for m in re.finditer(d['parent'] + r'\.' + d['child'] + r'\s*(?P[*+-])=\s*(?P[^;]+)', html): + result = operators[m.group('operator')](result, evaluate(parse(m.group('expression')))) + domain = re.search(r'^https?://([^/]+)/', url).group(1) + return result + JSInt(len(domain))