;;; -*- Mode: Irken -*- (include "lib/core.scm") (include "lib/pair.scm") (include "lib/alist.scm") (include "lib/string.scm") (include "lib/frb.scm") (include "lib/symbol.scm") (include "lib/io.scm") (include "parse/lexstep.scm") (include "lib/lexer.scm") ;; parser tables (include "parse/t2.scm") (datatype item (:t symbol (range) string) (:nt symbol (range) (list (item))) ) (datatype stack (:empty) (:elem (item) int (stack)) ) ;; this isn't very modular. yet. I'd like to get a generator-based parse going on here. ;; might even obviate the need for tracking position in the AST. [since lexer position ;; can propagate to the current parse error]. (define (parse file) (let ((token-gen (make-lex-generator file)) (paren-stack (list:nil)) (indents (list:cons 0 (list:nil))) (start-of-line #t) (held-token eof-token) (tok eof-token) ) (define get-indent ;; XXX handle or disallow tabs (token:t 'whitespace str _) -> (string-length str) ;; non-whitespace at the front of a line (token:t _ _ _) -> 0) (define (get-top-indent) (match indents with () -> 0 (indent . _) -> indent)) (define (next-token0) ;; process (i.e., filter/synthesize) the token stream (let loop () (cond ((not (eq? held-token eof-token)) (set! tok held-token) (set! held-token eof-token)) (else (set! tok (token-gen)) ;;(print "token-gen: ") (printn tok) )) ;;(print "next-token loop ") (printn start-of-line) (if start-of-line ;; in this state we might emit INDENT/DEDENT (match tok with (token:t sym val range) -> (let ((this-indent (get-indent tok)) (top-indent (get-top-indent))) (set! start-of-line #f) (set! held-token tok) (cond ((> this-indent top-indent) (set! indents (list:cons this-indent indents)) (token:t 'INDENT "" range)) ((< this-indent top-indent) (set! indents (cdr indents)) ;; go around again, might be more DEDENT (set! start-of-line #t) (token:t 'DEDENT "" range)) (else (loop))))) ;; in the middle of a line somewhere (match tok with (token:t 'NEWLINE _ _) -> (match paren-stack with () -> (begin (set! start-of-line #t) tok) _ -> (loop)) (token:t 'whitespace _ _) -> (loop) (token:t 'comment _ _) -> (loop) (token:t _ _ _) -> tok )) )) (define (next-token) (let ((t (next-token0))) (print-string "next-token: ") (printn t) t)) (let ((stack (stack:empty))) (define (get-state) (match stack with (stack:empty) -> 0 (stack:elem _ state _) -> state )) (define (lookup-action state kind) (let loop ((l actions[state])) (match l with (action-list:nil) -> (error "missing action?") (action-list:cons tkind action tl) -> (if (eq? terminals[tkind] kind) action (loop tl))))) (define (lookup-goto state nt) (let loop ((l goto[state])) (match l with (goto-list:nil) -> (error "missing goto?") (goto-list:cons nt0 new-state tl) -> (if (eq? nt0 nt) new-state (loop tl))))) (define (pop-n n) (let loop ((n n) (result (list:nil))) (if (= n 0) result (loop (- n 1) (list:cons (pop) result))))) (define (push item state) (set! stack (stack:elem item state stack))) (define (pop) (match stack with (stack:elem item _ rest) -> (begin (set! stack rest) item) (stack:empty) -> (error "stack underflow"))) (define (get-range args) (let loop ((args args) (l0 -1) (p0 -1) (l1 -1) (p1 -1)) (define test-range -1 tl (range:t l2 p2 l3 p3) -> (loop tl l2 p2 l3 p3) _ tl (range:t l2 p2 l3 p3) -> (loop tl l0 p0 l3 p3) _ tl (range:f) -> (loop tl l0 p0 l1 p1) ) (match l0 args with -1 () -> (range:f) _ () -> (range:t l0 p0 l1 p1) _ ((item:t _ r _) . tl) -> (test-range l0 tl r) _ ((item:nt _ r _) . tl) -> (test-range l0 tl r) ))) (let loop ((tok (next-token))) (cond ((eq? tok eof-token) (pop) (pop)) (else (print-string "token: ") (printn tok) (print-string "state: ") (printn (get-state)) ;;(print "indentation: ") (printn indentation) (vcase token tok ((:t kind val range) (let ((a (lookup-action (get-state) kind))) (vcase action a ((:shift state) (push (item:t kind range val) state) (loop (next-token))) ((:reduce plen nt) (let ((args (pop-n plen)) (next-state (lookup-goto (get-state) nt))) (push (item:nt non-terminals[nt] (get-range args) args) next-state)) (loop tok))) ))) ))) ))) (define indent 0 -> #t n -> (begin (print-string " ") (indent (- n 1)))) (define (print-parse-tree t) (let loop0 ((d 0) (t t)) (indent d) (match t with (item:t sym range str) -> (begin (print range) (print-string " ") (print sym) (print-string " ") (printn str)) (item:nt sym range items) -> (begin (print range) (print-string " ") (printn sym) (let loop1 ((l items)) (match l with () -> #u (hd . tl) -> (begin (loop0 (+ d 1) hd) (loop1 tl))))) ))) ;; print a parse tree out in a way that facilitates writing patterns for it. ;; XXX would be much easier to read if pretty-printed (define ppt (item:nt sym range items) -> (begin (print-string "(item:nt ") (print sym) (print-string " ") (ppt-list items) (print-string ")")) (item:t sym range str) -> (begin (print-string "(item:t ") (print sym) (print-string " \"") (print-string str) (print-string "\")")) ) (define (ppt-list l) (print-string "(") (ppt-list2 l)) (define ppt-list2 () -> (print-string ")") (hd . tl) -> (begin (ppt hd) (print-string " ") (ppt-list2 tl)) ) (datatype formal (:var string) ;;(:var-with-default string (expr)) ) (datatype literal (:int int) (:string string) (:none) ) (datatype params (:literal (literal)) (:varref string) (:function string (list (formal))) ;; (:primapp string) (:unparsed symbol) (:for (list (formal))) (:none) ) (define (perror where x) (print-string "decode error in ") (print-string where) (print-string ": ") (printn x) (error "decode error")) (define p-operator (item:nt _ _ ((item:t kind _ data))) -> data (item:t _ _ data) -> data x -> (perror "p-operator" x)) (define NR (range:f)) (define (make-varref name) {t='varref p=(params:varref name) subs='() range=NR}) (define p-binary-splat e () -> e e (op arg (item:nt _ _ splat)) -> {t='primapp p=(params:primapp (p-operator op)) subs=(LIST e (p-binary-splat (p-expr arg) splat)) range=NR} e x -> (perror "p-binary-splat" x) ) (define p-binary (e (item:nt _ _ splat)) -> (p-binary-splat (p-expr e) splat) x -> (perror "p-binary" x)) (define p-power (arg0 trailer (item:nt _ _ splat)) -> (p-binary-splat (p-trailer-splat (p-expr arg0) trailer) splat) x -> (perror "p-power" x)) (define p-factor (unary f) -> {t='primapp p=(params:primapp (p-operator unary)) subs=(LIST (p-expr f)) range=NR} (power) -> (p-expr power) x -> (perror "p-factor" x)) (define p-trailer-splat exp0 (item:nt _ _ ()) -> exp0 exp0 (item:nt _ _ (trailer splat)) -> (p-trailer-splat (p-trailer exp0 trailer) splat) exp0 x -> (perror "p-trailer-splat" x) ) (define pass-node {t='pass p=(params:none) subs='() range=NR}) (define (literal-string s r) {t='literal p=(params:literal (literal:string s)) subs='() range=r}) (define p-trailer exp0 (item:nt _ _ ((item:t 'lparen _ _) arglist _)) -> {t='call p=(params:none) range=NR subs=(list:cons exp0 (p-arglist arglist))} exp0 (item:nt _ _ ((item:t 'lbracket _ _) exp1 _)) -> {t='primapp p=(params:primapp "__getitem__") range=NR subs=(LIST exp0 (p-expr exp1))} exp0 (item:nt _ _ ((item:t 'dot _ _) (item:t 'NAME nr name))) -> {t='primapp p=(params:primapp "__getattr__") range=NR subs=(LIST exp0 (literal-string name nr))} exp0 x -> (perror "p-trailer" x) ) (define (p-formals formals) (define p-formals0 () -> (list:nil) (_ (item:t _ _ name) (item:nt _ _ splat)) -> (list:cons (formal:var name) (p-formals0 splat)) x -> (perror "p-formals0" x)) (match formals with ((item:nt _ _ ((item:t _ _ name0) (item:nt _ _ splat) _))) -> (list:cons (formal:var name0) (p-formals0 splat)) () -> (list:nil) x -> (perror "p-formals" x))) (define p-funcdef ;; 'def' NAME '(' ')' ':' (_ (item:t _ _ name) _ (item:nt _ _ formals) _ _ (item:nt _ _ body)) -> {t='function p=(params:function name (p-formals formals)) subs=(LIST (p-suite body)) range=NR} x -> (perror "p-funcdef" x)) (define p-lambda (_ (item:nt _ _ formals) _ body) -> {t='function p=(params:function "lambda" (p-formals formals)) subs=(LIST (p-expr body)) range=NR} x -> (perror "p-lambda" x)) (define sequence () -> {t='sequence p=(params:none) subs='() range=NR} (a) -> a l -> {t='sequence p=(params:none) subs=l range=NR} ) ;; (define p-sequence ;; acc () -> (sequence (reverse acc)) ;; acc (_ item (item:nt _ _ splat)) -> (p-sequence (list:cons (p-expr item) acc) splat) ;; acc x -> (perror "p-sequence" x)) (define p-testlist (test0 (item:nt _ _ splat) _) -> (p-sequence (LIST (p-expr test0)) splat) x -> (perror "p-testlist" x) ) (define p-simple-stmt (small (item:nt _ _ splat) _ _) -> (p-sequence (LIST (p-expr small)) splat) x -> (perror "p-simple-stmt" x) ) ;; this will parse any expr like this: (',' )* where ',' is a wildcard (define p-splat acc () -> (reverse acc) acc (_ item (item:nt _ _ splat)) -> (p-splat (list:cons (p-expr item) acc) splat) acc x -> (perror "p-splat" x) ) (define (p-sequence acc exp) (sequence (p-splat acc exp))) (define p-arglist (item:nt _ _ ()) -> '() (item:nt _ _ ((item:nt 'arglist _ (arg0 (item:nt _ _ splat) _)))) -> (p-splat (LIST (p-expr arg0)) splat) x -> (perror "arglist" x) ) (define p-argument ((item:nt _ _ ()) arg) -> (p-expr arg) ((item:nt _ _ (name _)) arg) -> (perror "named arguments not yet implemented" name) x -> (perror "p-argument" x) ) (define p-stmt+ (exp0) -> (LIST (p-expr exp0)) (exp0 (item:nt _ _ plus)) -> (list:cons (p-expr exp0) (p-stmt+ plus)) x -> (perror "p-stmt+" x)) (define p-suite ;; suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT (stmt) -> (p-expr stmt) (_ _ (item:nt _ _ stmts) _) -> (sequence (p-stmt+ stmts)) x -> (perror "p-suite" x)) (define p-return ;; return_stmt: 'return' [testlist] (_ (item:nt _ _ ())) -> {t='return p=(params:none) subs='() range=NR} (_ (item:nt _ _ ((item:nt _ _ val)))) -> {t='return p=(params:none) subs=(LIST (p-testlist val)) range=NR} x -> (perror "p-return" x)) (define p-raise ;; return_stmt: 'raise' [testlist] (_ (item:nt _ _ ())) -> {t='raise p=(params:none) subs=(LIST pass-node) range=NR} (_ (item:nt _ _ ((item:nt _ _ val)))) -> {t='raise p=(params:none) subs=(LIST (p-testlist val)) range=NR} x -> (perror "p-raise" x)) (define p-elif-splat () -> '() ;; ('elif' test ':' suite)* (_ test _ (item:nt _ _ body) (item:nt _ _ splat)) -> (append (LIST (p-expr test) (p-suite body)) (p-elif-splat splat)) x -> (perror "p-elif-splat" x)) (define p-else () -> pass-node (_ _ (item:nt _ _ body)) -> (p-suite body) x -> (perror "p-else" x)) (define p-if-stmt ;; if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] (_ test _ (item:nt _ _ body) (item:nt _ _ splat) (item:nt _ _ else)) ;; urgh, this is a mess. should try to turn it into a ternary-if, or a cond, or something. ;; probably the cleanest way is to pass down to p-elif-splat -> {t='if p=(params:none) subs=(append (LIST (p-expr test) (p-suite body)) (append (p-elif-splat splat) (LIST (p-else else)))) range=NR} x -> (perror "p-if-stmt" x)) (define p-while-stmt ;; while_stmt: 'while' test ':' suite ['else' ':' suite] (_ test _ (item:nt _ _ body) (item:nt _ _ else)) -> {t='while p=(params:none) subs=(LIST (p-expr test) (p-suite body) (p-else else)) range=NR} x -> (perror "p-while-stmt" x)) (define p-for-stmt ;; for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] (_ (item:nt _ _ vars) _ (item:nt _ _ src) _ (item:nt _ _ body) (item:nt _ _ else)) -> {t='for p=(params:none) subs=(LIST (p-testlist vars) (p-testlist src) (p-suite body) (p-else else)) range=NR} x -> (perror "p-for-stmt" x) ) (define p-list () -> (list:nil) (x . y) -> (list:cons (p-expr x) (p-list y)) ) (define p-not-test (a) -> (p-expr a) (not a) -> {t='primapp p=(params:primapp "not") subs=(LIST (p-expr a)) range=NR} x -> (perror "p-not-test" x) ) (define p-one (a) -> (p-expr a) x -> (perror "p-one" x)) (define p-simple ((item:t 'break _ _)) -> {t='break p=(params:none) subs='() range=NR} ((item:t 'pass _ _)) -> {t='pass p=(params:none) subs='() range=NR} ((item:t 'continue _ _)) -> {t='continue p=(params:none) subs='() range=NR} x -> (perror "p-simple" x)) (define (strip-quotes s) (substring s 1 (- (string-length s) 1))) (define p-string+ (item:nt _ _ ((item:t _ _ s))) -> (LIST (strip-quotes s)) (item:nt _ _ ((item:t _ _ s) splat)) -> (list:cons (strip-quotes s) (p-string+ splat)) x -> (perror "p-string+" x)) (define p-atom ((item:t 'NUMBER r val)) -> {t='literal subs='() p=(params:literal (literal:int (string->int val))) range=r } ((item:t 'NAME r val)) -> {t='varref subs='() p=(params:varref val) range=r } (string+) -> {t='literal subs='() p=(params:literal (literal:string (string-concat (p-string+ string+)))) range=NR } x -> (perror "p-atom" x)) (define (p-file-input l) (let loop ((acc (list:nil)) (l l)) (match l with () -> (sequence (reverse acc)) ((item:nt _ _ ((item:t 'NEWLINE _ _))) (item:nt _ _ splat)) -> (loop acc splat) ;; ignore NEWLINE tokens ((item:nt _ _ (item0)) (item:nt _ _ splat)) -> (loop (list:cons (p-expr item0) acc) splat) x -> (perror "p-file-input" x)) )) (define parse-table (alist/make ('expr p-binary) ('xor_expr p-binary) ('and_expr p-binary) ('shift_expr p-binary) ('arith_expr p-binary) ('term p-binary) ('comparison p-binary) ('or_test p-binary) ('and_test p-binary) ('factor p-factor) ('power p-power) ('test p-one) ('not_test p-not-test) ('lambdef p-lambda) ('testlist p-testlist) ('exprlist p-testlist) ('expr_stmt p-binary) ('small_stmt p-one) ('simple_stmt p-simple-stmt) ('stmt p-one) ('file_input p-file-input) ('compound_stmt p-one) ('funcdef p-funcdef) ('suite p-suite) ('flow_stmt p-one) ('if_stmt p-if-stmt) ('while_stmt p-while-stmt) ('for_stmt p-for-stmt) ('break_stmt p-simple) ('continue_stmt p-simple) ('pass_stmt p-simple) ('raise_stmt p-raise) ('return_stmt p-return) ('atom p-atom) ('argument p-argument) )) ;; XXX this is mis-named. is a dispatcher for any node, not only the production. (define p-expr (item:t kind r val) -> {t='unparsed p=(params:unparsed kind) subs=(LIST (literal-string val r)) range=r} (item:nt kind r val) -> (match (alist/lookup parse-table kind) with ;; not in the table, mark it as unparsed (maybe:no) -> {t='unparsed p=(params:unparsed kind) subs=(p-list val) range=r} ;; in the table - parse it and attach a range (maybe:yes fun) -> (let ((n0 (fun val))) (%rset/range n0 r) n0) )) (define (pprint-node n d) ;;(print n.range) ;;(print-string "\t") (indent d) (print n.t) (print-string " ") (printn n.p) (for-each (lambda (n) (pprint-node n (+ d 1))) n.subs) ) (define (parse-file path) (parse (file/open-read path))) ;(include "vm/vm.scm") (datatype lenv (:rib (list (formal)) (lenv)) (:nil) ) (datatype insn (:literal (literal) (cont)) ;; (:return int) ;; return register (:primop string (list int) (cont)) ;; (:test int (insn) (insn) (cont)) ;; (:jump int (cont)) ;; (:close string (insn) (cont)) ;; (:varref int int (cont)) ;; (:new-env int (cont)) ;; (:store-tuple int int int int (cont)) ;; (:invoke int int (cont)) ;; (:invoke-tail int int (cont)) ;; (:global string (cont)) ;; ) (datatype cont (:k int (list int) (insn)) ) (define (max a b) (if (> a b) a b)) (define (register-allocator) (let ((max-reg -1)) (define (allocate free) (let loop ((i 0)) (if (member? i free =) (loop (+ i 1)) (begin (set! max-reg (max i max-reg)) i)))) (define (get-max) max-reg) {allocate = allocate get-max = get-max} )) (define the-register-allocator (register-allocator)) (define (cont free generator) (let ((reg (the-register-allocator.allocate free))) (cont:k reg free (generator reg)))) (define (dead free k) (cont:k -1 free k)) (define k/free (cont:k _ free _) -> free) (define k/target (cont:k target _ _) -> target) (define (compile tail? node lenv k) (if tail? (set! k (cont (k/free k) gen-return)) #u) (match node.t node.p with 'return (params:none) -> (c-return node.subs lenv k) 'literal (params:literal val) -> (c-literal val k) 'sequence (params:none) -> (c-sequence tail? node.subs lenv k) 'if (params:none) -> (c-conditional tail? node lenv k) 'function (params:function name formals) -> (c-function name formals node.subs lenv k) 'varref (params:varref name) -> (c-varref name lenv k) 'primapp (params:primapp name) -> (c-primapp name node.subs lenv k) 'call (params:none) -> (c-call tail? node.subs lenv k) 'pass (params:none) -> (c-literal (literal:none) k) _ _ -> (error node.t) )) (define (c-return subs lenv k) (match subs with () -> (insn:literal (literal:none) k) (val) -> (compile #t val lenv k) _ -> (error "multiple return values") )) (define (c-literal val k) (insn:literal val k)) (define (c-sequence tail? nodes lenv k) (match nodes with () -> (error "empty sequence?") (exp) -> (compile tail? exp lenv k) (hd . tl) -> (compile #f hd lenv (dead (k/free k) (c-sequence tail? tl lenv k))) )) (define (c-primapp prim args lenv k) (c-primargs prim args lenv k)) (define (c-primargs prim args lenv k) (collect-primargs args '() lenv k (lambda (regs) (insn:primop prim regs k)))) (define (collect-primargs args regs lenv k ck) (match args with () -> (ck regs) (hd . tl) -> (compile #f hd lenv (cont (append (k/free k) regs) (lambda (reg) (collect-primargs tl (cons reg regs) lenv k ck)))) )) (define (c-conditional tail? exp lenv k) (match exp.subs with (test then else) -> (compile #f test lenv (cont (k/free k) (lambda (reg) (insn:test reg (compile tail? then lenv (cont (k/free k) (lambda (reg) (insn:jump reg k)))) (compile tail? else lenv (cont (k/free k) (lambda (reg) (insn:jump reg k)))) k)) )) _ -> (error "c-conditional") )) (define search-rib name _ () -> (maybe:no) name i ((formal:var fname) . tl) -> (if (string=? fname name) (maybe:yes i) (search-rib name (+ i 1) tl)) ) (define lexical-address name _ (lenv:nil) -> (:global name) name d (lenv:rib formals lenv) -> (match (search-rib name 0 formals) with (maybe:yes i) -> (:pair d i) (maybe:no) -> (lexical-address name (+ d 1) lenv) )) (define (c-varref name lenv k) (match (lexical-address name 0 lenv) with (:pair depth index) -> (insn:varref depth index k) (:global name) -> (insn:global name k) )) (define extend-lenv () lenv -> lenv ;; don't extend with an empty rib fs lenv -> (lenv:rib fs lenv) ) (define (c-function name formals body lenv k) ;; XXX should verify len(body)==1 (insn:close name (compile #t (car body) (extend-lenv formals lenv) (cont '() gen-return)) k)) (define (c-call tail? subs lenv k) (let ((gen-invoke (if tail? gen-invoke-tail gen-invoke))) (match subs with (fun . args) -> (letrec ((make-application (lambda (args-reg) (compile #f fun lenv (cont (cons args-reg (k/free k)) (lambda (closure-reg) (gen-invoke closure-reg args-reg k))))))) (if (> (length args) 0) (compile-args args lenv (cont (k/free k) make-application)) (make-application -1))) () -> (error "c-call: no function?") ))) (define (compile-args args lenv k) (match args with () -> (insn:new-env 0 k) _ -> (let ((nargs (length args))) (insn:new-env nargs (cont (k/free k) (lambda (tuple-reg) (compile-store-args 0 1 nargs args tuple-reg (cons tuple-reg (k/free k)) lenv k))))) )) (define (compile-store-args i offset nargs args tuple-reg free-regs lenv k) (compile #f (car args) lenv (cont free-regs (lambda (arg-reg) (insn:store-tuple offset arg-reg tuple-reg i (if (< (+ i 1) nargs) (dead free-regs (compile-store-args (+ i 1) offset nargs (cdr args) tuple-reg free-regs lenv k)) k)))) )) (define (gen-return reg) (insn:return reg)) (define (gen-invoke closure-reg args-reg k) (insn:invoke closure-reg args-reg k)) (define (gen-invoke-tail closure-reg args-reg k) (insn:invoke-tail closure-reg args-reg k)) (define (print-insn insn d) (define (print-line print-info k) (match k with (cont:k target free k0) -> (begin (newline) (indent d) (print target) ;;(print-string " ") ;;(print free) (print-string " ") (print-info) (print-insn k0 d) ))) (define (ps x) (print x) (print-string " ")) (match insn with (insn:literal lit k) -> (print-line (lambda () (print-string "lit ") (print lit)) k) (insn:return target) -> (begin (newline) (indent d) (print-string "- ret ") (print target)) (insn:primop prim args k) -> (print-line (lambda () (print-string "prim ") (ps prim) (ps args)) k) (insn:test reg then else k) -> (print-line (lambda () (print-string "test ") (print reg) (print-insn then (+ d 1)) (print-insn else (+ d 1))) k) (insn:jump reg k) -> (print-line (lambda () (print-string "jmp ") (print reg)) k) (insn:close name body k) -> (print-line (lambda () (print-string "close ") (print name) (print-insn body (+ d 1))) k) (insn:varref d i k) -> (print-line (lambda () (print-string "ref ") (ps d) (ps i)) k) (insn:store-tuple o a t i k) -> (print-line (lambda () (print-string "stor ") (ps o) (ps a) (ps t) (ps i)) k) (insn:invoke c a k) -> (print-line (lambda () (print-string "invoke ") (ps c) (ps a)) k) (insn:invoke-tail c a k) -> (print-line (lambda () (print-string "tail ") (ps c) (ps a)) k) (insn:new-env n k) -> (print-line (lambda () (print-string "env ") (ps n)) k) (insn:global name k) -> (print-line (lambda () (print-string "glbl ") (ps name)) k) )) (let ((path (if (> sys.argc 1) sys.argv[1] "tests/parse_2.py")) (ast (parse-file path))) (printn ast) (print-parse-tree ast) (ppt ast) (newline) (let ((root (p-expr ast))) (pprint-node root 0) (print-string "insns:") (let ((insns (compile #t root (lenv:nil) (cont (list:nil) gen-return)))) (print-insn insns 1) (newline) ) ))