import lexer import gen_parser import gen_irken keywords = [ 'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try', 'while', 'with', 'yield' ] literals = [ ('%', 'percent'), ('&', 'ampersand'), ('(', 'lparen'), (')', 'rparen'), ('*', 'splat'), ('**', 'splatsplat'), ('+', 'plus'), (',', 'comma'), ('-', 'minus'), ('.', 'dot'), ('/', 'slash'), ('//', 'slashslash'), (':', 'colon'), (';', 'semicolon'), ('<<', 'lshift'), ('=', 'equals'), ('>>', 'rshift'), ('@', 'atsign'), ('[', 'lbracket'), (']', 'rbracket'), ('^', 'caret'), ('`', 'backquote'), ('{', 'lbrace'), ('|', 'vbar'), ('}', 'rbrace'), ('~', 'tilde'), ] def safe (lit): r = [] for ch in lit: if ch in ".()[]*+|?": r.append ('\\') r.append (ch) return ''.join (r) def make_alt (choices): return '(' + '|'.join (choices) + ')' augassign = make_alt (r'\+ - \* / % & \| \^ << >> \*\* //'.split()) + '=' comp_op = make_alt (['<','>','==','>=','<=','<>','!=']) string = make_alt ([r'"([^\\\n"]|(\\.))*"', r"'([^\\\n']|(\\.))*'"]) lexicon = [ (r'[ \t]+', 'whitespace'), (r'[ \t]*#[^\n]*\n', 'comment'), (r'[\n]', 'NEWLINE'), # put keywords before NAME to resolve ambiguity. ] + [(k,k) for k in keywords] + [ (r'[A-Za-z_][A-Za-z0-9_]*', 'NAME'), (r'[0-9]+', 'NUMBER'), (string, 'STRING'), (comp_op, 'COMP_OP'), (augassign, 'AUGASSIGN'), ] + [(safe(k),v) for (k,v) in literals] lit_map = {} for k, v in literals: lit_map[k] = v for k in keywords: lit_map[k] = k def make_lexer(): m = lexer.lexer (lexicon) #m.read (open ("../nodes.py")) m.gen_irken (open ("lexstep.scm", 'wb')) def make_parser (path): import meta import os from pprint import pprint as pp base, ext = os.path.splitext (path) g = meta.parse_grammar (path) t = gen_parser.translator (g, lits=lit_map) t.gen() #pp (t.rules) #pp (t.terminals) t.emit_python (base) import sys if len(sys.argv) > 1: base = sys.argv[1] else: base = 't0' # XXX check the timestamp on lexer.py vs lexstep.scm #make_lexer() make_parser (base + '.g') exec ("import %s" % (base,)) spec = eval ("%s.spec" % (base,)) tables = gen_irken.build_tables (spec) file = open ('%s.scm' % (base,), 'wb') gen_irken.gen_irken (file, tables)