#!/usr/bin/python2.3 # # `de_obf_helper.py` # # Helper for deobfuscation of Javascript code. # # Author: follower@rancidbacon.com # # Copyright: 2005 # # License: GPL 2.0 # # Version: 0.1.0 # # MOTD: Are you all *insane*??? # # # Example usage: (Using Google Maps code) # # Note: All code must be pretty-printed in the files. # # * Copy file `maps.1.js` # # * Execute ('-d' = generate documention): # # ./de_obf_helper.py -d maps.1.js > maps.1.html # # * Generates or updates these files: # # maps.1.html funcs-maps.1.js.txt maps.1.js-linenums.html # # * Edit `funcs-maps.1.js.txt`, the format is: # # class/function nameargsdeobfuscated namedescription # # e.g.: # # Y Vc XSLT Handles XSLT processing # # * Re-run above '-d' command to refresh documentation. # # * Then, when target code has been modified and obfuscated differently: # # * Copy file `maps.2.js` # # * Execute ('-u' = upgrade functions/class metadata to new names): # # ./de_obf_helper.py -u maps.1.js maps.2.js # # * Generates or updates: # # funcs-maps.2.js.txt # # * Execute: # # ./de_obf_helper.py -d maps.2.js > maps.2.html # # and documentation matching the most recent obfuscation is generated. # # Note: The "upgrade" step is not perfect, it uses various heuristics to # attempt to identify the same function/class in each file. # # Note: This code is very rough, with hard coded stuff all over the place, # but it does the job for me so I figured I'd throw it up on the net. # Perhaps a little too literally... :-) # import os import re import sys import sets import shutil import difflib import textwrap RE_FUNCTION_DEF = re.compile("function\s+?(.+?)\((.*?)\)") #RE_FUNCTION_DEF = re.compile("^function\s+?(.+?)\((.*?)\)", re.DOTALL | re.MULTILINE) RE_METHOD_DEF = re.compile("^(\w+?)\.([\w.]+?)=function\((.*?)\)", re.MULTILINE) # TODO: Make this all lazy, but cached. class SourcePropertyMixin(object): # TODO: Abstract out more functionality? """ """ def _getSource(self): """ """ return self._source[self.startIdx:self.endIdx] source = property(_getSource, doc = "") class Function(SourcePropertyMixin, object): # TODO: Rename? & Class-ify? """ """ def __init__(self, functionName, functionArgs, functionNewName = "", functionDescription = "", text=None, lineNumber = None, startIdx = None, endIdx = None, source = None): """ """ self.name = functionName self.args = functionArgs self.newName = functionNewName self.description = functionDescription self.text = text self.lineNumber = lineNumber # Offset of function source within the source file. self.startIdx = startIdx self.endIdx = endIdx self._source = source self.methods = [] self.otherName = "" # TODO: Handle this better. Key by file name? def update(self, source): """ * `source` another Function instance. """ #if source.name == self.name: for attr in ["args", "newName", "description"]: #TODO:Improve this? newValue = getattr(source, attr) if newValue: setattr(self, attr, newValue) #else: # raise Exception("Functions are not of the same name") def asExportString(self): """ """ return "\t".join([self.name, self.args, self.newName, self.description]) def _getSignature(self): """ """ return [m.name for m in self.methods] signature = property(_getSignature, doc="") # TODO: Make this static? def functionFromString(metaData): """ * metaData - Tab delimited funcName, funcNewName, funcArgs, funcDesc """ funcName, funcArgs, funcNewName, funcDesc = metaData.split("\t") return Function(funcName, funcArgs, funcNewName, funcDesc) class Method(SourcePropertyMixin, object): """ """ def __init__(self, methodName, methodArgs, text=None, lineNumber = None, startIdx = None, endIdx = None, source = None): """ """ # TODO: Record if 'prototype'? self.name = methodName.replace("prototype.", "") self.args = methodArgs self.text = text self.lineNumber = lineNumber # Offset of function source within the source file. self.startIdx = startIdx self.endIdx = endIdx self._source = source def exportFuncs(filename, funcs): """ """ if os.path.exists(filename): shutil.copy(filename, "%s.bak" % filename) for line in open(filename): funcMetaData = functionFromString(line[:-1]) # TODO: Improve this... try: funcs[funcMetaData.name].update(funcMetaData) except KeyError: # TODO: Only catch missing funcName? funcs[funcMetaData.name] = funcMetaData fh = open(filename, "w") for funcMetaData in funcs.values(): fh.write("%s\n" % funcMetaData.asExportString()) fh.close() #BASE_RE_FUNCTION_REFERENCES = "([\w.])+?\s*?=\s*?%s\((.+?)\)" #BASE_RE_FUNCTION_REFERENCES = "^[^\n]*?([\w.])+?\s*?=\s*(.*?)\s*%s\((.*?)\).*?$" # TODO: Check this matches all references correctly. #BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s(\((.*?)\).*?$|($|\W.*?$))" BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s\((.*?)\).*?$" #BASE_RE_FUNCTION_REFERENCES = "^.*?\W%s(\((.*?)\).*?$|[\s;]*?$)" def getReferences(source, funcName): """ """ re_functionReference = re.compile(BASE_RE_FUNCTION_REFERENCES % funcName, re.MULTILINE) return [match.group(0) for match in re.finditer(re_functionReference, source) if not match.group(0).startswith("function")] FORMAT_HEADING = '%s' def heading(level, klass, content, id_ = None): """ """ format = FORMAT_HEADING if id_: # Ewww... format = format.replace(' ', ' id="%s" ' % id_ , 1) return format % (level, klass, content, level) def paragraph(klass, content): """ """ return '

%s

' % (klass, content) def href(url, text): """ """ return '%s' % (url, text) def table(datalines, columns): """ """ parts = [] parts.append('') numPerCol = len(datalines) / columns count = 0 while count <= len(datalines): parts.append('' % (100 / columns, "
\n".join(datalines[count:count+numPerCol]))) count += numPerCol parts.append('
%s
') return "\n".join(parts) # TODO: Find the proper one... def escape(text): """ """ return text.replace("&", "&").replace("<","<") def referenceList(func, references): """ """ MAX_REFS_TO_DISPLAY = 5 parts = [] parts.append(heading(3, "references", "References:")) uniqueReferences = list(sets.Set([r.strip() for r in references])) parts.append("
    ") for reference in uniqueReferences[:MAX_REFS_TO_DISPLAY]: if func.newName: # TODO: Check this substitution handles all cases. reference = re.sub("(\W|^)%s(\W|$)" % func.name, r"\1%s\2" % func.newName.replace("~",""), reference) parts.append('
  • %s
  • ' % escape(reference)) if len(uniqueReferences) > MAX_REFS_TO_DISPLAY: parts.append('
  • ...
  • ') parts.append("
") return "\n".join(parts) def formatFunction(f, sourceURL): """ """ parts = [] name = f.newName.replace("~","") or f.name parts.append(heading(2, "fname", '%s [%s]' % (name, sourceURL, f.lineNumber, f.name), name)) parts.append(heading(3, "section", "Args:")) parts.append(paragraph("args", f.args.replace(",",", "))) #TODO: Change formatting. parts.append(paragraph("desc", f.description)) if f.methods: parts.append(heading(3, "section", "Methods:")) for m in f.methods: parts.append(paragraph("method", "%s(%s)" % (m.name, m.args))) return (href("#%s" % name, name) , "\n".join(parts)) def extractFunctionInfo(source): """ """ funcs = {} for match in RE_FUNCTION_DEF.finditer(source): text = match.group(0) funcName, args = match.groups() startIdx = source.index(text) lineNumber = source[:startIdx].count("\n") + 1 dummy, endIdx = getBlockOffsets(source, startIdx) funcs[funcName] = Function(funcName, args, text=text, lineNumber=lineNumber, startIdx=startIdx, endIdx=endIdx, source=source) #print "Functions found: %d" % (len(funcs)) #print funcs.keys() for match in RE_METHOD_DEF.finditer(source): text = match.group(0) parent, methodName, args = match.groups() try: func = funcs[parent] except KeyError: # It's adding to builtin object... #print parent #TODO: Output this... pass else: startIdx = source.index(text) lineNumber = source[:startIdx].count("\n") dummy, endIdx = getBlockOffsets(source, startIdx) func.methods.append(Method(methodName, args, text=text, lineNumber=lineNumber, startIdx=startIdx, endIdx=endIdx, source=source)) return funcs def getBlockOffsets(source, startIdx): """ """ startIdx = idx = source.index("{", startIdx) # TODO: Catch ValueError when no more? obcount = 1 while obcount !=0: idx += 1 try: nextobIdx = source.index("{", idx) except ValueError: nextobIdx = len(source) try: nextcbIdx = source.index("}", idx, nextobIdx) except ValueError: idx = nextobIdx obcount += 1 continue if nextobIdx < nextcbIdx: idx = nextobIdx obcount += 1 else: idx = nextcbIdx obcount -= 1 endIdx = idx + 1 return (startIdx, endIdx) def formatLine(lineNumber, text): """ """ return '
%s' % (lineNumber, escape(text)) def generateNumberedSource(sourceFilename, outputFilename): """ """ parts = [] parts.append("

")
    for lineNumber, text in enumerate(open(sourceFilename)):
        parts.append(formatLine(lineNumber, text))
    parts.append("

") if os.path.exists(outputFilename): shutil.copy(outputFilename, "%s.bak" % outputFilename) open(outputFilename, "w").write( "".join(parts)) def generateDocumentation(source, funcs, sourceURL): """ """ print """ Google Maps Classes and Functions Reference """ print """ """ print "" print """

Google Calendar Classes and Functions Reference

Rough initial version.

Original name (as listed in %s) is given in square brackets and linked to the original source. Meta data is from Google Calendar meta data file. Generated by Javascript deobfuscation helper.

""" % (sourceURL.replace("-linenums.html",""), "funcs-%s.txt" % sourceURL.replace("-linenums.html","")) sections = [] toc = [] sortedFuncs = [(f.newName.lower() or f.name.lower(), f) for f in funcs.values()] sortedFuncs.sort() for dummy, f in sortedFuncs: tocEntry, section = formatFunction(f, sourceURL) toc.append(tocEntry) sections.append(section) references = getReferences(source, f.name) if references: sections.append(referenceList(f, references)) print "

" print table(toc, 4) print "

" print "\n".join(sections) print "" def usage(): """ """ print "Usage: %s (-d | -u )" % sys.argv[0] raise SystemExit # TODO: Make this method of Function? def getArgsSig(f): """ Based on function/constructor args? """ if f.args: fargs = f.args.split(",") else: fargs = [] return fargs def findSignatureMatches(targetFunction, allFunctions): """ """ # TODO: Refactor copy & pasting... # TODO: Cache any of this stuff? sigMatches = [] # Method signatures for mf in allFunctions: if mf.signature == targetFunction.signature: # This is strongest match so we not check anything else? sigMatches.append(mf) # Constructor/function arg count signatures if len(sigMatches) > 1: for mf in sigMatches: if not (len(getArgsSig(mf)) == len(getArgsSig(targetFunction))): sigMatches.remove(mf) # Constructor/function source code match if len(sigMatches) > 1: matchSources = [mf.source for mf in sigMatches] closeSources = difflib.get_close_matches(f.source, matchSources, n = 2) if len(closeSources) == 1: sigMatches = [sigMatches[matchSources.index(closeSources[0])]] # TODO: Handle multiple closematches? # First method source code match if (len(sigMatches) > 1) and f.methods: # TODO: Check methods are present. # TODO: Check against all methods? matchSources = [mf.methods[0].source for mf in sigMatches] closeSources = difflib.get_close_matches(f.methods[0].source, matchSources, n = 2) if len(closeSources) == 1: sigMatches = [sigMatches[matchSources.index(closeSources[0])]] # TODO: Handle multiple close matches? # Close method signatures if not sigMatches: allSigs = [mf.signature for mf in allFunctions] closeSigs = difflib.get_close_matches(f.signature, allSigs, n = 2) if len(closeSigs) == 1: sigMatches = [allFunctions[allSigs.index(closeSigs[0])]] # TODO: Handle multiple close matches? return sigMatches if __name__ == "__main__": try: option = sys.argv[1] except IndexError: usage() else: if option == "-d": try: sourceFilename1 = sys.argv[2] except IndexError: usage() elif option =="-u": try: sourceFilename1 = sys.argv[2] sourceFilename2 = sys.argv[3] except IndexError: usage() else: usage() source1 = open(sourceFilename1).read() funcs1 = extractFunctionInfo(source1) exportFuncs("funcs-%s.txt" % sourceFilename1, funcs1) if option == "-d": # "d"ocument linenumsFilename = "%s-linenums.html" % sourceFilename1 generateNumberedSource(sourceFilename1, linenumsFilename) generateDocumentation(source1, funcs1, linenumsFilename) elif option == "-u": # "u"pgrade source2 = open(sourceFilename2).read() funcs2 = extractFunctionInfo(source2) allFuncs1 = funcs1.values() allFuncs2 = funcs2.values() itemMatched = True while allFuncs1 and allFuncs2 and itemMatched: itemMatched = False for f in allFuncs1[:]: sigMatches = findSignatureMatches(f, allFuncs2) if len(sigMatches) == 1: itemMatched = True matchedF = sigMatches[0] allFuncs2.remove(matchedF) allFuncs1.remove(f) matchedF.update(f) # TODO: Allow for manual confirmation. #print "\n------------------------------" #print f.name, matchedF.name, f.newName #print f.source #print matchedF.source if allFuncs1 and not allFuncs2: print "%d items exist in old file but not in new file." % \ len(allFuncs1) if allFuncs2 and not allFuncs1: print "%d items exist in new file but not in old file." % \ len(allFuncs2) if allFuncs1 and allFuncs2: print "%d items in old file and "\ "%d items in new file not matched."% (len(allFuncs1), len(allFuncs2)) exportFuncs("funcs-%s.txt" % sourceFilename2, funcs2) else: usage()