# -*- Mode: Python -*-
# This uses Jason Evans' Parsing.py LR(1) module.
# http://www.canonware.com/Parsing/
#
# a parser for the BNF-like python meta-grammar.
#
# use the builtin python tokenizer on the grammar,
# just like python does when building itself.
# grammar grammar grammar grammar grammar....
import Parsing
import sys
is_a = isinstance
class token (Parsing.Token):
def __init__ (self, parser, val):
Parsing.Token.__init__ (self, parser)
self.val = val
T = token
class t_IDENT (T):
"%token IDENT"
class t_STRING (T):
"%token STRING"
class t_COLON (T):
"%token COLON"
class t_LPAREN (T):
"%token LPAREN"
class t_RPAREN (T):
"%token RPAREN"
class t_LBRACKET (T):
"%token LBRACKET"
class t_RBRACKET (T):
"%token RBRACKET"
class t_STAR (T):
"%token STAR"
class t_PLUS (T):
"%token PLUS"
class t_VBAR (T):
"%token VBAR"
class t_NEWLINE (T):
"%token NEWLINE"
NT = Parsing.Nonterm
class rules (NT):
"%start"
def reduce_0 (self, *args):
"%reduce rule"
self.val = args[0].val
def reduce_1 (self, *args):
"%reduce rules rule"
self.val = args[0].val + args[1].val
class rule (NT):
"%nonterm"
def reduce_0 (self, *args):
"%reduce IDENT COLON alts NEWLINE"
self.val = [(args[0].val, args[2].val)]
def reduce_1 (self, *args):
"%reduce NEWLINE"
self.val = []
class alts2 (NT):
"%nonterm"
def r_0 (self, *args):
"%reduce alts2 VBAR items"
one = args[0].val
two = args[2].val
if len(one):
self.val = ('or', one[1] + [two])
else:
self.val = ('or', [two])
def r_1 (self, *args):
"%reduce"
self.val = []
class alts (NT):
"%nonterm"
def r_0 (self, *args):
"%reduce items alts2"
one = args[0].val
two = args[1].val
if len(two):
self.val = ('or', [one] + two[1])
else:
self.val = one
class items (NT):
"%nonterm"
def reduce_0 (self, *args):
"%reduce item"
self.val = args[0].val
def reduce_1 (self, *args):
"%reduce items item"
self.val = args[0].val + args[1].val
class item (NT):
"%nonterm"
def reduce_0 (self, *args):
"%reduce optional"
self.val = [args[0].val]
def reduce_1 (self, *args):
"%reduce kleene"
self.val = [args[0].val]
class element (NT):
"%nonterm"
def reduce_0 (self, *args):
"%reduce IDENT"
self.val = args[0].val
def reduce_1 (self, *args):
"%reduce group"
self.val = args[0].val
def reduce_2 (self, *args):
"%reduce STRING"
self.val = ('lit', args[0].val)
class kleene (NT):
"%nonterm"
def reduce_0 (self, *args):
"%reduce element"
self.val = args[0].val
def reduce_1 (self, *args):
"%reduce element STAR"
self.val = ('star', args[0].val)
def reduce_2 (self, *args):
"%reduce element PLUS"
self.val = ('plus', args[0].val)
class group (NT):
"%nonterm"
def reduce_0 (self, *args):
"%reduce LPAREN alts RPAREN"
self.val = args[1].val
class optional (NT):
"%nonterm"
def reduce_0 (self, *args):
"%reduce LBRACKET alts RBRACKET"
self.val = ('optional', args[1].val)
class parser (Parsing.Lr):
def scan (self, stream):
while 1:
code, val = stream.next()
if code == 'ENDMARKER':
self.eoi()
break
else:
tok = self.translate_token (code, val)
self.token (tok)
r = self._stack[1][0]
return r.val
op_toks = {
':' : t_COLON,
'(' : t_LPAREN,
')' : t_RPAREN,
'[' : t_LBRACKET,
']' : t_RBRACKET,
'+' : t_PLUS,
'*' : t_STAR,
'|' : t_VBAR,
}
def translate_token (self, code, val):
# we need to translate a token from the tokenizer module
# into a one acceptable by this parser.
if code == 'NAME':
return t_IDENT (self, val)
elif code == 'STRING':
return t_STRING (self, val)
elif code == 'OP':
return self.op_toks[val](self, val)
elif code == 'NEWLINE':
return t_NEWLINE (self, val)
else:
raise ValueError
def tokenize_grammar (filename):
import tokenize
f = open (filename, 'rb')
g = tokenize.generate_tokens (f.readline)
l = []
while g:
code, val, spos, epos, line = g.next()
name = tokenize.tok_name[code]
# see tokenize docs to grok the 'NL' token
if name not in ('COMMENT', 'NL', 'INDENT'):
#print '*****', code, name, repr(val)
#print
yield (tokenize.tok_name[code], val)
spec = Parsing.Spec (
sys.modules[__name__],
pickleFile="meta.pickle",
#skinny=False,
#logFile="meta.log",
#graphFile="meta.dot",
#verbose=True
)
def parse_grammar (filename):
p = parser (spec)
#p.verbose = True
g = tokenize_grammar (filename)
return p.scan (g)
if __name__ == '__main__':
import sys
from pprint import pprint as pp
if len(sys.argv) < 2:
print 'Usage: %s <Grammar>' % (sys.argv[0],)
print ' try the python grammar from the source distribution:'
print ' e.g., "Python-2.6.4/Grammar/Grammar"'
else:
pp (parse_grammar (sys.argv[1]))