;; -*- Mode: Irken -*-
;; test parser for a simple predicate language.
;; the grammar is defined in parse/t1.g, which generates parse/t1.scm
;; the python lexer is defined in parse/lexer.py, which generates parse/lexstep.scm
;; the sample file is in tests/parse_0.py
;;
;; the lexer produces tokens with attached line/column ranges, which are ignored here.
(include "lib/core.scm")
(include "lib/pair.scm")
(include "lib/string.scm")
(include "lib/io.scm")
(include "parse/lexstep.scm")
(include "lib/lexer.scm")
;; parser tables
(include "parse/t1.scm")
;; stack = (:elem item state stack) | (:empty)
;; item = (:nt kind (list (item 'a))) | (:t symbol)
;; args = (:cons item args) | (:nil)
;; state = int
;; kind = symbol
(datatype item
(:nt symbol (range) (list (item)))
(:t symbol (range) string)
)
(datatype stack
(:empty)
(:elem (item) int (stack))
)
(define (parse path)
(let ((file (file/open-read path))
(token-gen (make-lex-generator file))
(paren-stack '())
(indents (LIST 0))
(start-of-line #t)
(held-token eof-token)
(tok eof-token)
)
(define get-indent
;; XXX handle or disallow tabs
(token:t 'whitespace str _) -> (string-length str)
;; non-whitespace at the front of a line
(token:t _ _ _ ) -> 0)
(define (get-top-indent)
(match indents with
() -> 0
(indent . _) -> indent))
(define (next-token)
;; process (i.e., filter/synthesize) the token stream
(let loop ()
(cond ((not (eq? held-token eof-token))
(set! tok held-token)
(set! held-token eof-token))
(else
(set! tok (token-gen))
;;(print "token-gen: ") (printn tok)
))
;;(print "next-token loop ") (printn start-of-line)
(if start-of-line
;; in this state we might emit INDENT/DEDENT
(match tok with
(token:t sym val range)
-> (let ((this-indent (get-indent tok))
(top-indent (get-top-indent)))
(set! start-of-line #f)
(set! held-token tok)
(cond ((> this-indent top-indent)
(set! indents (list:cons this-indent indents))
(token:t 'INDENT "" range))
((< this-indent top-indent)
(set! indents (cdr indents))
;; go around again, might be more DEDENT
(set! start-of-line #t)
(token:t 'DEDENT "" range))
(else
(loop)))))
;; in the middle of a line somewhere
(match tok with
(token:t 'NEWLINE _ _)
-> (match paren-stack with
() -> (begin (set! start-of-line #t) tok)
_ -> (loop))
(token:t 'whitespace _ _) -> (loop)
(token:t 'comment _ _) -> (loop)
(token:t _ _ _) -> tok
))
))
;; (define (next-token)
;; (let ((t (next-token0)))
;; (print-string "next-token: ") (printn t)
;; t))
(let ((stack (stack:empty)))
(define (get-state)
(match stack with
(stack:empty) -> 0
(stack:elem _ state _) -> state
))
(define (lookup-action state kind)
(let loop ((l actions[state]))
(match l with
(action-list:nil)
-> (error "missing action?")
(action-list:cons tkind action tl)
-> (if (eq? terminals[tkind] kind)
action
(loop tl)))))
(define (lookup-goto state nt)
(let loop ((l goto[state]))
(match l with
(goto-list:nil)
-> (error "missing goto?")
(goto-list:cons nt0 new-state tl)
-> (if (eq? nt0 nt)
new-state
(loop tl)))))
(define (pop-n n)
(let loop ((n n) (result (list:nil)))
(if (= n 0)
result
(loop (- n 1) (list:cons (pop) result)))))
(define (push item state)
(set! stack (stack:elem item state stack)))
(define (pop)
(match stack with
(stack:elem item _ rest) -> (begin (set! stack rest) item)
(stack:empty) -> (error "stack underflow")))
(define (get-range args)
(let loop ((args args) (l0 -1) (p0 -1) (l1 -1) (p1 -1))
(define test-range
-1 tl (range:t l2 p2 l3 p3) -> (loop tl l2 p2 l3 p3)
_ tl (range:t l2 p2 l3 p3) -> (loop tl l0 p0 l3 p3)
_ tl (range:f) -> (loop tl l0 p0 l1 p1)
)
(match l0 args with
-1 () -> (range:f)
_ () -> (range:t l0 p0 l1 p1)
_ ((item:t _ r _) . tl) -> (test-range l0 tl r)
_ ((item:nt _ r _) . tl) -> (test-range l0 tl r)
)))
(let loop ((tok (next-token)))
(cond ((eq? tok eof-token) (pop) (pop))
(else
;(print-string "token: ") (printn tok)
;(print-string "state: ") (printn (get-state))
(vcase token tok
((:t kind val range)
(let ((a (lookup-action (get-state) kind)))
(vcase action a
((:shift state)
(push (item:t kind range val) state)
(loop (next-token)))
((:reduce plen nt)
(let ((args (pop-n plen))
(next-state (lookup-goto (get-state) nt)))
(push (item:nt non-terminals[nt] (get-range args) args) next-state))
(loop tok)))
)))
)))
)))
(define (print-parse-tree t)
(define (indent n)
(let loop ((n n))
(cond ((= n 0) #t)
(else
(print-string " ")
(loop (- n 1))))))
(let loop0 ((d 0)
(t t))
(indent d)
(vcase item t
((:t sym range str)
(print sym) (print-string " ") (printn str))
((:nt sym range items)
(printn sym)
(let loop1 ((l items))
(vcase list l
((:nil) #u)
((:cons item tail)
(loop0 (+ d 1) item)
(loop1 tail)))))))
)
;; print a parse tree out in a way that facilitates writing patterns for it.
(define ppt
(item:nt sym range items) -> (begin (print-string "(item:nt ") (print sym) (print-string " ") (ppt-list items) (print-string ")"))
(item:t sym range str) -> (begin (print-string "(item:t ") (print sym) (print-string " \"") (print-string str) (print-string "\")"))
)
(define (ppt-list l)
(print-string "(")
(ppt-list2 l))
(define ppt-list2
() -> (print-string ")")
(hd . tl) -> (begin (ppt hd) (print-string " ") (ppt-list2 tl))
)
;; AST
(datatype atom
(:name string)
(:number int)
(:string string)
)
(datatype expr
(:pred string (list (expr)))
(:atom (atom))
)
;; parse-tree->AST
(define (p-file-input l)
(let loop ((acc (list:nil))
(l l))
(match l with
() -> acc
((item:nt _ _ ((item:t 'NEWLINE _ _))) (item:nt _ _ splat)) -> (loop acc splat) ;; ignore NEWLINE tokens
((item:nt _ _ (item0)) (item:nt _ _ splat)) -> (loop (list:cons (p-expr item0) acc) splat)
_ -> (error "p-file-input"))
))
(define p-atom
(item:t 'NAME _ x) -> (atom:name x)
(item:t 'NUMBER _ x) -> (atom:number (string->int x))
(item:t 'STRING _ x) -> (atom:string x)
_ -> (error "p-atom")
)
(define p-list
(item:nt _ _ ((item:t 'comma _ _) expr recur)) -> (list:cons (p-expr expr) (p-list recur))
(item:nt _ _ ()) -> (list:nil)
_ -> (error "p-list2")
)
(define p-args
(item:nt _ _ ((item:nt 'list _ (expr rest)))) -> (list:cons (p-expr expr) (p-list rest))
(item:nt _ _ ()) -> (list:nil)
_ -> (error "p-args")
)
(define p-expr2
(item:nt 'predicate _ ((item:t 'NAME _ name) _ args _)) -> (expr:pred name (p-args args))
(item:nt 'atom _ (x)) -> (expr:atom (p-atom x))
_ -> (error "p-expr2")
)
(define p-expr
(item:nt 'expr _ (x)) -> (p-expr2 x)
_ -> (error "p-expr")
)
(define start
(item:nt 'file _ val) -> (p-file-input val)
_ -> (error "start"))
;; an unparser
(define print-expr
(expr:pred name args) -> (begin (print-string name) (print-string " (") (print-args args) (print-string ")") #u)
(expr:atom atom) -> (print-atom atom))
(define print-args
(arg0) -> (print-expr arg0)
(arg0 . rest) -> (begin (print-expr arg0) (print-string ", ") (print-args rest))
() -> (begin (print-string "") #u)
)
(define print-atom
(atom:string s) -> (begin (print-string s) #u) ;; quotes are included
(atom:number n) -> (begin (print-string (int->string n)) #u)
(atom:name s) -> (begin (print-string s) #u)
)
(let ((t (if (> sys.argc 1) (parse sys.argv[1]) (parse "tests/parse_0.py"))))
(printn t)
(print-parse-tree t)
;;(ppt t)
(terpri)
(let ((parsed (start t)))
;;(printn parsed)
(let loop ((l parsed))
(match l with
() -> #f
(hd . tl) -> (begin (print-expr hd) (terpri) (loop tl))
))
))