[java2python] r164 committed - adds selector grammer and fiddling.

1 view
Skip to first unread message

codesite...@google.com

unread,
Jul 17, 2010, 7:02:14 AM7/17/10
to java2pyth...@googlegroups.com
Revision: 164
Author: troy.melhase
Date: Sat Jul 17 04:01:55 2010
Log: adds selector grammer and fiddling.
http://code.google.com/p/java2python/source/detail?r=164

Added:
/branches/0.5/java2python/compiler/transformer.py
/branches/0.5/java2python/lang/Selector.g
Modified:
/branches/0.5/bin/j2py
/branches/0.5/java2python/__init__.py
/branches/0.5/java2python/compiler/__init__.py
/branches/0.5/java2python/compiler/block.py
/branches/0.5/java2python/lang
/branches/0.5/java2python/lang/Makefile
/branches/0.5/java2python/lang/base.py
/branches/0.5/java2python/lib/__init__.py

=======================================
--- /dev/null
+++ /branches/0.5/java2python/compiler/transformer.py Sat Jul 17 04:01:55
2010
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+# works:
+# * match any token
+# E match any token of type E
+# E F match any F token that is a descendant of an E token
+# E > F match any F token that is a child of an E token
+# E, F match any E token and any F token
+# E + F match any F token immediately preceded by a sibling
token E
+# E[n] match any n-th child of E token
+
+# E:first-child match any E token when E is the first child of its
parent
+# E[fu] match any E token with the 'fu' attribute set (any
value)
+# E[fu="bar"] match any E token with the 'fu' attribute is exactly
equal to 'bar'
+
+
+##
+# Configuration-based tree transformer.
+from java2python.lang import tokens
+from java2python.lang import SelectorLexer, SelectorParser
+
+
+from antlr3 import ANTLRStringStream, CommonTokenStream, Lexer, Parser
+from antlr3.tree import CommonTreeAdaptor, CommonTree
+
+
+
+def parseSelector(text):
+ stream = ANTLRStringStream(text)
+ lexer = SelectorLexer.SelectorLexer(stream)
+ tokenStream = CommonTokenStream(lexer)
+ parser = SelectorParser.SelectorParser(tokenStream)
+ val = parser.selector()
+ return [val.tree] #.children
+ #return [val.tree] + val.tree.children
+
+## mini selector language?
+
+s1 = "METHOD_CALL DOT DOT (IDENT IDENT)"
+s1 = [tokens.METHOD_CALL, tokens.DOT, tokens.DOT, []]
+s1 = "" # [method, dot, dot, [ident, ident]]
+selectors = (
+ [tokens.METHOD_CALL, ],
+ [tokens.METHOD_CALL, tokens.DOT, tokens.DOT, tokens.IDENT, ],
+ )
+
+class Transformer(object):
+ def __init__(self, configs=()):
+ self.configs = configs
+
+ def __call__(self, tree):
+ for selector in selectors:
+ if self.match(selector, tree):
+ print '######### match:: ', selector
+
+ def match(self, selector, tree):
+ if not selector:
+ return True # false?
+ first, more = selector[0], selector[1:]
+ print '######', first, more
+ if first == tree.type:
+ if more:
+ return self.match(more, tree.children[0])
+ return True
+ for node in tree.children:
+ if self.match(selector, node):
+ return True
+ def blah():
+ for node in tree.children:
+ #title = 'transform{0}'.format(tokens.title(tokens.map[node.type]))
+ #method = getattr(self, title, None)
+ #if method:
+ #method(node)
+ self(node)
+
+
+ def __transformMethodCall(self, node):
+ if node.children:
+ child = node.children[0]
+ if child.type == tokens.DOT:
+ if child.children:
+ gchild = child.children[0]
+ if gchild.type == tokens.DOT:
+ if len(gchild.children) == 2:
+ if gchild.children[0].type == tokens.IDENT and \
+ gchild.children[1].type == tokens.IDENT:
+ print '########## method call:', node, child.text,
+ print gchild.text, gchild.children
+
+
+
+def selectorTokenNames():
+ mapping = {}
+ for name in SelectorParser.tokenNames:
+ value = getattr(SelectorParser, name, None)
+ if value:
+ mapping[value] = name
+ return mapping
+
+selectorTokenNames = selectorTokenNames()
+
+
+def selectorDump(root, i=0):
+ token = root.token
+ typeName = selectorTokenNames[token.type]
+ text = token.text
+ if text != typeName:
+ print '{0}{1}:{2}'.format(' '*i, typeName, text)
+ else:
+ print '{0}{1}'.format(' '*i, typeName)
+ for child in root.children:
+ selectorDump(child, i+1)
+
+
+if __name__ == '__main__':
+ import sys
+ from java2python.config import Config
+ from java2python.compiler import buildAST
+
+ #source = open(sys.argv[1]).read()
+ #tree = buildAST(source, Config(()))
+ #tree.dump(sys.stdout)
+
+
+ print '0:'
+ for tree in parseSelector('*'):
+ selectorDump(tree)
+ print
+
+ print '1:'
+ for tree in parseSelector('FU'):
+ selectorDump(tree)
+ print
+
+ for tree in parseSelector('FU BAR BAZ'):
+ print '2:'
+ selectorDump(tree)
+ print
+
+
+ for tree in parseSelector('PAR > CHILD'):
+ print '4:'
+ selectorDump(tree)
+ print
+
+
+ for tree in parseSelector('AA BB[1] CC[2]'):
+ print '6:'
+ selectorDump(tree)
+ print
+
+
+ for tree in parseSelector('FIRST["r1"] SEC["r2"] ZZ[333] * YY'):
+ print '7:'
+ selectorDump(tree)
+ print
+
+
+
+ for tree in (): # parseSelector('DD["zz"] BB["text what"] ZZ[4] >
AA[3] + YY[33]'):
+ print '8:'
+ selectorDump(tree)
+ print
=======================================
--- /dev/null
+++ /branches/0.5/java2python/lang/Selector.g Sat Jul 17 04:01:55 2010
@@ -0,0 +1,112 @@
+grammar Selector;
+
+
+options {
+ backtrack=true;
+ memoize=true;
+ language=Python;
+ output=AST;
+ ASTLabelType=CommonTree;
+}
+
+tokens {
+COMMA = ',' ;
+DQUOTE = '"' ;
+EQ = '=' ;
+GT = '>' ;
+LSQUARE = '[' ;
+PLUS = '+' ;
+RSQUARE = ']' ;
+STAR = '*' ;
+
+
+NTH;
+MATCH;
+MATCH_CHILD;
+MATCH_DESCEND;
+MATCH_NTH;
+MATCH_OP;
+MATCH_SIBLING;
+MATCH_STAR;
+MATCH_TEXT;
+MATCH_TYPE;
+OP;
+SELECTOR;
+TEXT;
+}
+
+
+selector
+ : expression+
+ -> ^(SELECTOR expression+)
+ ;
+
+
+expression
+ : primary
+ -> ^(primary)
+ | primary optail
+ -> ^(MATCH_OP primary optail)
+ | primary subtail
+ -> ^(MATCH_NTH primary subtail)
+ ;
+
+
+primary
+ : STAR -> ^(MATCH_STAR[$STAR])
+ | TYPE -> ^(MATCH_TYPE[$TYPE])
+ ;
+
+
+optail
+ : (GT expression*)
+ -> ^(OP['>'] expression*)
+ | (PLUS expression*)
+ -> ^(OP['+'] expression*)
+ ;
+
+
+subtail
+ : LSQUARE index RSQUARE expression*
+ -> ^(NTH[$index.text] expression*)
+ | LSQUARE text RSQUARE expression*
+ -> ^(TEXT[$text.text] expression*)
+ ;
+
+
+index : INDEX;
+text : LITERAL;
+
+
+// lexer
+
+
+LLETTERS : ('a'..'z');
+ULETTERS : ('A'..'Z');
+HASH : '#';
+USCORE : '_';
+
+
+fragment TYPE_START : ULETTERS+ ;
+fragment TYPE_PART : TYPE_START | USCORE ;
+TYPE : TYPE_START (TYPE_PART)* ;
+LITERAL : '"' ( ESCAPE_SEQUENCE | ~('\\'|'"') )* '"' ;
+
+fragment
+ESCAPE_SEQUENCE
+ : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
+ | UNICODE_ESCAPE
+ | OCTAL_ESCAPE
+ ;
+
+fragment
+OCTAL_ESCAPE
+ : '\\' ('0'..'3') ('0'..'7') ('0'..'7')
+ | '\\' ('0'..'7') ('0'..'7')
+ | '\\' ('0'..'7')
+ ;
+
+fragment UNICODE_ESCAPE : '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT
HEX_DIGIT ;
+fragment HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
+INDEX : ('0' | '1'..'9' '0'..'9'*);
+WS : (' '|'\r'|'\t'|'\u000C'|'\n') { $channel = HIDDEN };
=======================================
--- /branches/0.5/bin/j2py Wed Jul 14 12:10:27 2010
+++ /branches/0.5/bin/j2py Sat Jul 17 04:01:55 2010
@@ -8,7 +8,7 @@
from os.path import basename, splitext
from time import time

-from java2python.compiler import Module, buildAST
+from java2python.compiler import Module, buildAST, transformAST
from java2python.config import Config
from java2python.lib.colortools import colors

@@ -84,20 +84,25 @@
print 'IOError: %s.' % (msg, )
return code

+ config = Config(configs)
timed['comp']
try:
- tree = buildAST(source)
+ tree = buildAST(source, config)
except (Exception, ), exc:
exception('exception while parsing')
return 1
timed['comp_finish']

- module = Module(Config(configs))
- module.name = splitext(basename(filein))[0] if filein != '-'
else '<stdin>'
timed['xform']
- module.walk(tree)
+ transformAST(tree, config)
timed['xform_finish']

+ timed['visit']
+ module = Module(config)
+ module.name = splitext(basename(filein))[0] if filein != '-'
else '<stdin>'
+ module.walk(tree)
+ timed['visit_finish']
+
timed['encode']
source = unicode(module)
timed['encode_finish']
@@ -127,7 +132,8 @@
info('Generated source has valid syntax.')

info('Parse: %.4f seconds', timed['comp_finish'] - timed['comp'])
- info('Transform: %.4f seconds', timed['xform_finish'] - timed['xform'])
+ info('Visit: %.4f seconds', timed['visit_finish'] - timed['visit'])
+ #info('Transform: %.4f seconds', timed['xform_finish'] -
timed['xform'])
info('Encode: %.4f seconds', timed['encode_finish'] -
timed['encode'])
info('Total: %.4f seconds', timed['overall_finish'] -
timed['overall'])
return 0
=======================================
--- /branches/0.5/java2python/__init__.py Thu Jul 1 15:12:49 2010
+++ /branches/0.5/java2python/__init__.py Sat Jul 17 04:01:55 2010
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+##
+# Top-level package marker for java2python.
+#
=======================================
--- /branches/0.5/java2python/compiler/__init__.py Wed Jul 14 12:11:11 2010
+++ /branches/0.5/java2python/compiler/__init__.py Sat Jul 17 04:01:55 2010
@@ -2,12 +2,13 @@
# -*- coding: utf-8 -*-

from java2python.compiler.block import Module
+from java2python.compiler.transformer import Transformer
from java2python.lang import (
Lexer, Parser, LocalSourceStream, LocalTokenStream, LocalTreeAdaptor
)


-def buildAST(source, configs=(), debug=False):
+def buildAST(source, config=None, debug=False):
sourceStream = LocalSourceStream(source)
sourceLexer = Lexer(sourceStream)
tokenStream = LocalTokenStream(sourceLexer)
@@ -30,6 +31,11 @@
return returnScope.tree


+def transformAST(tree, config):
+ return # bah
+ transformer = Transformer(config)
+ transformer(tree)
+

if __name__ == '__main__':
import sys
=======================================
--- /branches/0.5/java2python/compiler/block.py Wed Jul 14 12:10:27 2010
+++ /branches/0.5/java2python/compiler/block.py Sat Jul 17 04:01:55 2010
@@ -10,10 +10,10 @@
# Each of the base classes depends on the behavior of its counterpart.
# This means they're very tightly coupled and that the classes are not
# very reusable. The module split does allow for grouping of related
-# methods and does hide some of the more cluttered code.
+# methods and does hide some of the cluttered code.
#
# The template base class defines a factory for creating new block
-# instances. This was necessary to avoid lookups to the global
+# instances. This was necessary to avoid lookups to the module
# namespace for classes that weren't there. The factory also makes
# creating block instances a bit easier.
#
=======================================
--- /branches/0.5/java2python/lang/Makefile Wed Jul 14 11:32:04 2010
+++ /branches/0.5/java2python/lang/Makefile Sat Jul 17 04:01:55 2010
@@ -1,7 +1,7 @@
.PHONY: all clean
.SILENT: clean

-all: JavaParser.py JavaLexer.py
+all: JavaParser.py JavaLexer.py SelectorParser.py SelectorLexer.py


JavaParser.py: Java.g
@@ -12,11 +12,20 @@
antlr3 Java.g


+SelectorParser.py: Selector.g
+ antlr3 Selector.g
+
+SelectorLexer.py: Selector.g
+ antlr3 Selector.g
+
+
clean:
rm -f *.pyo
rm -f *.pyc
- rm -rf JavaParser.py
- rm -rf JavaLexer.py
- rm -rf Java.tokens
- rm -rf JavaTreeParser.tokens
- rm -rf Java__.g
+ rm -f *Parser.py
+ rm -f *Lexer.py
+ rm -f *.tokens
+
+
+
+
=======================================
--- /branches/0.5/java2python/lang/base.py Wed Jul 14 11:32:04 2010
+++ /branches/0.5/java2python/lang/base.py Sat Jul 17 04:01:55 2010
@@ -32,20 +32,17 @@


class Tokens(object):
- _m = None
+ """ Tokens -> simplifies token id-name and name-id mapping. """

def __init__(self):
- self.cache = {}
+ self.cache, self.parserModule = {}, None

def __getattr__(self, name):
return getattr(self.module, name)

@property
def commentTypes(self):
- ## perfomance optimization:
- return (181, 182, )
- ## should be:
- #return (self.module.COMMENT, self.module.LINE_COMMENT)
+ return (self.module.COMMENT, self.module.LINE_COMMENT, )

@property
def methodTypes(self):
@@ -63,11 +60,11 @@

@property
def module(self):
- m = self._m
- if m is None:
+ module = self.parserModule
+ if module is None:
import java2python.lang.JavaParser as module
- self._m = m = module
- return m
+ self.parserModule = module
+ return module

@staticmethod
def title(name):
@@ -233,6 +230,14 @@
node.lexer = getattr(self, 'lexer', None)
return node

+ @property
+ def parserTokens(self):
+ return self.parser.input.tokens[self.tokenStartIndex:self.tokenStopIndex]
+
+ def select(self, selector):
+ pass
+
+

class LocalTreeAdaptor(CommonTreeAdaptor):
treeType = LocalTree
=======================================
--- /branches/0.5/java2python/lib/__init__.py Wed Jul 7 22:26:47 2010
+++ /branches/0.5/java2python/lib/__init__.py Sat Jul 17 04:01:55 2010
@@ -1,44 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-from functools import partial
-from itertools import count, dropwhile
-from operator import not_
-from string import Template
-from sys import _getframe as getframe
-
-
-def expression(left='', right='', format='', **kwds):
- """ Sugar for creating a formatting value dictionary.
-
- """
- return dict(left=left, right=right, format=format, **kwds)
-
-
-passExpr = partial(expression, left='pass', right='', format='$left')
-
-
-def parameter(ident='', type='', modifiers='', variadic='',
format='$ident', **kwds):
- if variadic:
- format = '*' + format
- return dict(
- ident=ident,
- type=type,
- modifiers=modifiers,
- variadic=variadic,
- format=format,
- param=True,
- )
-
-
-clsParam = partial(parameter, ident='cls', type='object')
-selfParam = partial(parameter, ident='self', type='object')
-
-
-def variable(ident='', cls=False, local=False, **kwds):
- """ Sugar for creating a property dictionary representing a
- variable
- """
- return dict(ident=ident, cls=cls, local=local, **kwds)


def getModule(name, reloaded=False):
@@ -71,73 +32,16 @@
return getattr(mod, itemname)


-def maybeAttr(obj, name, default=None):
- """ Returns named attribute or default.
-
- """
- return getattr(obj, name, default)
-
-
-def trimStrings(strings):
- """ Removes empty strings from the end of given sequence.
-
- @return list of strings without trailing empty strings
- """
- return list(reversed(list(dropwhile(not_, reversed(strings)))))
-
-
-def formatFloat(value):
- """ Turns a java float into a syntactically correct python float.
-
- """
- if value.startswith('.'):
- value = '0' + value
- if value.endswith(('f', 'd')):
- value = value[:-1]
- elif value.endswith(('l', 'L')):
- value = value[:-1] + 'L'
- return value
-
-
-nameCounter = count(0).next
-
-
-
-
-def iterAttrs(obj):
- for k in dir(obj):
- yield k, getattr(obj, k)
-
-
-def formatParameter(p):
- return Template(p['format']).substitute(p)
-
-
-def ruleName(depth=0, *paths):
- path = '/'.join(str(p) for p in paths)
- return getframe(1+depth).f_code.co_name + ('/' + path if path else '')
-
-
-def ruleNames(depth=15):
- return [ruleName(n) for n in range(depth+1)][1:]
-
-
-class Formats:
+class FS(object):
l = '{left}'
r = '{right}'
- c = '{center}'
- t = '{type}'
- tr = t + '(' + r + ')'
+ c = ':'
+ lc = l + c
lr = l + r
- dlr = '.' + lr
lsr = l + ' ' + r
- cond = l + ' if ' + c + ' else ' + r
- args = '(' + r + ')'
- largs = l + args
- assign = l + ' = ' + r
- tassign = l + ' = ' + t + '()'
- instance = 'isinstance(' + l + ', (' + t + ', ))'
-
+ lsrc = lsr + c
+
+ #instance = 'isinstance(' + l + ', (' + t + ', ))'
@classmethod
def op(cls, op):
if op == '>>>':
@@ -145,21 +49,3 @@
if op == '>>>=':
return '{left} = bsr({left}, {right})'
return cls.l + ' ' + op + ' ' + cls.r
-
-
-def isInt(v):
- return isinstance(v, (int, ))
-
-
-## the new
-
-class FS(object):
- l = '{left}'
- r = '{right}'
- c = ':'
- lc = l + c
- lr = l + r
- lsr = l + ' ' + r
- lsrc = lsr + c
-
-

Reply all
Reply to author
Forward
0 new messages