|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293 |
- import enum
- import functools
- import operator
- import re
-
-
- class ParsingError(Exception):
- pass
-
-
- class EvaluationError(Exception):
- pass
-
-
- class StrEnum(enum.Enum):
- def __repr__(self):
- return str(self)
-
-
- class Type(StrEnum):
- group = 1
- brackets = 2
-
-
- class Modifier(StrEnum):
- negate = 1
- plus = 2
-
-
- class JSBool:
- def __init__(self, value):
- self.value = bool(value)
-
- def __add__(self, other):
- if isinstance(other, (JSBool, JSInt)): # bool + bool/int -> int (addition)
- return JSInt(self) + JSInt(other)
- elif isinstance(other, JSString): # bool + string -> string (concatenation)
- return JSString(self) + other
- else:
- return NotImplemented
-
- def __sub__(self, other):
- if isinstance(other, (JSBool, JSInt, JSString)): # bool - bool/int/string -> int (subtraction)
- return JSInt(self) - JSInt(other)
- else:
- return NotImplemented
-
- def __mul__(self, other):
- if isinstance(other, (JSBool, JSInt, JSString)): # bool * bool/int/string -> int (multiplication)
- return JSInt(self) * JSInt(other)
- else:
- return NotImplemented
-
- def __eq__(self, other):
- return isinstance(other, JSBool) and self.value == other.value
-
- def __bool__(self):
- return self.value
-
- def __int__(self):
- return int(self.value)
-
- def __str__(self):
- return str(self.value).lower() # 'True' in Python is 'true' in JS
-
- def __repr__(self):
- return 'JSBool({!r})'.format(self.value)
-
-
- class JSInt:
- def __init__(self, value):
- self.value = int(value)
-
- def __add__(self, other):
- if isinstance(other, JSInt): # int + int -> int (addition)
- return JSInt(self.value + other.value)
- elif isinstance(other, JSBool): # int + bool -> int (addition)
- return self + JSInt(other)
- elif isinstance(other, JSString): # int + string -> string (concatenation)
- return JSString(self) + other
- else:
- return NotImplemented
-
- def __sub__(self, other):
- if isinstance(other, JSInt): # int - int -> int (subtraction)
- return JSInt(self.value - other.value)
- elif isinstance(other, (JSBool, JSString)): # int - bool/string -> int (subtraction)
- return self - JSInt(other)
- else:
- return NotImplemented
-
- def __mul__(self, other):
- if isinstance(other, JSInt): # int * int -> int (multiplication)
- return JSInt(self.value * other.value)
- elif isinstance(other, (JSBool, JSString)): # int * bool/string -> int (multiplication)
- return self * JSInt(other)
- else:
- return NotImplemented
-
- def __eq__(self, other):
- return isinstance(other, JSInt) and self.value == other.value
-
- def __bool__(self):
- return self.value != 0 # Any value other than zero is considered 'true'
-
- def __int__(self):
- return self.value
-
- def __str__(self):
- return str(self.value)
-
- def __repr__(self):
- return 'JSInt({!r})'.format(self.value)
-
-
- class JSString:
- def __init__(self, value):
- self.value = str(value)
-
- def __add__(self, other):
- if isinstance(other, JSString): # string + string -> string (concatenation)
- return JSString(self.value + other.value)
- elif isinstance(other, (JSInt, JSBool)): # string + int/bool -> string (concatenation)
- return self + JSString(other)
- else:
- return NotImplemented
-
- def __sub__(self, other):
- if isinstance(other, (JSBool, JSInt, JSString)): # string - bool/int/string -> int (subtraction)
- return JSInt(self) - JSInt(other)
- else:
- return NotImplemented
-
- def __mul__(self, other):
- if isinstance(other, (JSBool, JSInt, JSString)): # string * bool/int/string -> int (multiplication)
- return JSInt(self) * JSInt(other)
- else:
- return NotImplemented
-
- def __eq__(self, other):
- return isinstance(other, JSString) and self.value == other.value
-
- def __bool__(self):
- return self.value != '' # Any non-empty string is considered 'true'
-
- def __int__(self):
- if self.value == '':
- return 0
- return int(self.value)
-
- def __str__(self):
- return self.value
-
- def __repr__(self):
- return 'JSString({!r})'.format(self.value)
-
-
- _itemModifierToResultMapping = {
- (): JSString(''),
- (Modifier.plus,): JSInt(0),
- (Modifier.negate,): JSBool(False),
- #(Modifier.plus, Modifier.plus): syntax error
- (Modifier.plus, Modifier.negate): JSInt(0),
- (Modifier.negate, Modifier.plus): JSBool(True),
- (Modifier.negate, Modifier.negate): JSBool(True),
- #(Modifier.plus, Modifier.plus, Modifier.plus): syntax error
- #(Modifier.plus, Modifier.plus, Modifier.negate): syntax error
- (Modifier.plus, Modifier.negate, Modifier.plus): JSInt(1),
- (Modifier.plus, Modifier.negate, Modifier.negate): JSInt(1),
- #(Modifier.negate, Modifier.plus, Modifier.plus): syntax error
- (Modifier.negate, Modifier.plus, Modifier.negate): JSBool(True),
- (Modifier.negate, Modifier.negate, Modifier.plus): JSBool(False),
- (Modifier.negate, Modifier.negate, Modifier.negate): JSBool(False),
- }
-
-
- class Item:
- def __init__(self, type, modifiers, values = None):
- if type not in (Type.group, Type.brackets):
- raise ValueError('type must be Type.group or Type.brackets')
- iter(modifiers) # Test whether modifiers is an iterable, and let the potential TypeError bubble up
- if not all(x in (Modifier.negate, Modifier.plus) for x in modifiers):
- raise ValueError('modifiers must be an iterable that can only contain Modifier.negate or Modifier.plus')
- if values is not None and type != Type.group:
- raise ValueError('values can only be specified for group items')
- if type == Type.group and values is None:
- raise ValueError('values are required for group items')
- self.type = type
- self.modifiers = modifiers
- self.values = values
-
- def evaluate(self, evaluateFunction = None):
- if self.type == Type.group:
- if evaluateFunction is None or not callable(evaluateFunction):
- raise ValueError('must specify a callable evaluateFunction when evaluating a group item')
- return evaluateFunction(self.values, self.modifiers)
- else:
- try:
- return _itemModifierToResultMapping[tuple(self.modifiers)]
- except KeyError:
- raise EvaluationError('Unrecognised modifier pattern {!r}'.format(self.modifiers))
-
- def __eq__(self, other):
- return isinstance(other, Item) and self.type == other.type and self.modifiers == other.modifiers and self.values == other.values
-
- def __repr__(self):
- return 'Item({!r}, {!r}{})'.format(self.type, self.modifiers, ', values = {!r}'.format(self.values) if self.values is not None else '')
-
-
- def parse(s):
- '''
- Parse expression s into a tree of Items.
-
- Argument: s (string), the expression to evaluate
-
- Returns: tree (list of Items)
- '''
-
- itemStack = {0: []}
- modifierStack = {0: []}
- currentItemStack = itemStack[0]
- currentModifierStack = modifierStack[0]
- stackLevel = 0
- finishedItem = False
- pos = 0
- length = len(s)
- while pos < length:
- char = s[pos]
- if char == '+':
- if pos == 0 or not finishedItem:
- finishedItem = False
- currentModifierStack.append(Modifier.plus)
- # else: addition, nothing to do
- elif char == '!':
- finishedItem = False
- currentModifierStack.append(Modifier.negate)
- elif char == '(':
- finishedItem = False
- stackLevel += 1
- currentItemStack = itemStack[stackLevel] = []
- currentModifierStack = modifierStack[stackLevel] = []
- elif char == ')':
- if stackLevel == 0:
- raise ParsingError('Encountered ) without matching (')
- stackLevel -= 1
- currentItemStack = itemStack[stackLevel]
- currentItemStack.append(Item(type = Type.group, modifiers = modifierStack[stackLevel], values = itemStack[stackLevel + 1]))
- currentModifierStack = modifierStack[stackLevel] = []
- finishedItem = True
- elif char == '[':
- if s[pos + 1] != ']':
- raise ParsingError('Invalid byte found at position {}; expected ] but got {}'.format(pos + 1, s[pos + 1]))
- # End of modifier sequence
- currentItemStack.append(Item(type = Type.brackets, modifiers = currentModifierStack))
- currentModifierStack = []
- pos += 1 # Skip over closing bracket
- finishedItem = True
- pos += 1
- return itemStack[0]
-
-
- def evaluate(tree, modifiers = None):
- t = map(lambda x: x.evaluate(evaluate), tree)
- if len(tree) > 1:
- result = functools.reduce(operator.add, t) # Concatenation or addition, but this is all handled in the JS* classes
- else:
- result = next(t)
- if modifiers == [Modifier.plus]:
- return JSInt(result)
- return result
-
-
- def crack(url, html):
- m = re.search(r'setTimeout\(function\(\)\{\s+var\s+s,t,o,p,b,r,e,a,k,i,n,g,f,\s*(?P<parent>[a-zA-Z]+)=\{"(?P<child>[a-zA-Z]+)":(?P<initialExpression>[^}]+)\};' +
- r'\s*t\s*=\s*document\.createElement\(\'div\'\);' +
- r'\s*t\.innerHTML="<a href=\'/\'>x</a>";' +
- r'\s*t\s*=\s*t\.firstChild\.href;\s*r\s*=\s*t\.match\(/https\?:\\/\\//\)\[0\];' +
- r'\s*t\s*=\s*t\.substr\(r\.length\);\s*t\s*=\s*t\.substr\(0,t\.length-1\);' +
- r'\s*a\s*=\s*document\.getElementById\(\'jschl-answer\'\);' +
- r'\s*f\s*=\s*document\.getElementById\(\'challenge-form\'\);' +
- r'\s*;((?P=parent)\.(?P=child)\s*[*+-]=\s*[^;]+\s*;\s*)+a\.value\s*=\s*parseInt\((?P=parent)\.(?P=child),\s*10\)\s*\+\s*t\.length;\s*\';\s*121\'' +
- r'\s*f\.action\s*\+=\s*location\.hash;' +
- r'\s*f\.submit\(\);' +
- r'\s*\},\s*4000\);', html)
- if not m:
- return None
- d = m.groupdict()
- operators = {'*': operator.mul, '+': operator.add, '-': operator.sub}
- result = evaluate(parse(d['initialExpression']))
- for m in re.finditer(d['parent'] + r'\.' + d['child'] + r'\s*(?P<operator>[*+-])=\s*(?P<expression>[^;]+)', html):
- result = operators[m.group('operator')](result, evaluate(parse(m.group('expression'))))
- domain = re.search(r'^https?://([^/]+)/', url).group(1)
- return result + JSInt(len(domain))
|