;; -*- Mode: Irken -*- (include "lib/core.scm") (include "lib/pair.scm") (include "lib/string.scm") (include "lib/io.scm") (include "lib/os.scm") (include "lib/alist.scm") (include "lib/aa_map.scm") (include "lib/symbol.scm") ;; XXX consider rewriting with more experience (datatype field (:t symbol sexp) ) ;; an s-expression datatype. (datatype sexp (:list (list sexp)) (:symbol symbol) (:string string) (:char char) (:bool bool) (:int int) (:undef) (:vector (list sexp)) (:record (list field)) (:cons symbol symbol) ;; constructor ':' syntax (:attr sexp symbol) ;; attribute '.' syntax ) ;; In retrospect, I think it may have been a mistake to embed 'list' ;; into sexp. It forces all sexp-handling code to cover two cases, ;; often triggering the need for an auxiliary function. Might be ;; cleaner to just have (sexp:nil) and (sexp:cons)... ;; idea: how about a set of macros, similar to the format macro, ;; to make sexps easier to build? worth it? ;; similar to the list macro. think of this as the 'list' function ;; for s-expressions. (defmacro sexp (sexp) -> (sexp:list '()) (sexp x ...) -> (sexp:list (LIST x ...)) ) (define (char-class char-list) (let ((v (make-vector 256 #f))) (define (in-class? ch) v[(char->ascii ch)]) (let loop ((l char-list)) (match l with () -> in-class? (hd . tl) -> (begin (set! v[(char->ascii hd)] #t) (loop tl)) )))) (define hex-map (literal (alist/make (#\0 0) (#\1 1) (#\2 2) (#\3 3) (#\4 4) (#\5 5) (#\6 6) (#\7 7) (#\8 8) (#\9 9) (#\a 10) (#\b 11) (#\c 12) (#\d 13) (#\e 14) (#\f 15) (#\A 10) (#\B 11) (#\C 12) (#\D 13) (#\E 14) (#\F 15) ))) (define dec-map (literal (alist/make (#\0 0) (#\1 1) (#\2 2) (#\3 3) (#\4 4) (#\5 5) (#\6 6) (#\7 7) (#\8 8) (#\9 9) ))) (define oct-map (literal (alist/make (#\0 0) (#\1 1) (#\2 2) (#\3 3) (#\4 4) (#\5 5) (#\6 6) (#\7 7) ))) (define whitespace '(#\space #\tab #\newline #\return)) (define delimiters (string->list "()[]{}:")) (define letters (string->list "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")) (define all-delimiters (append whitespace delimiters)) (define digits (string->list "0123456789")) (define whitespace? (char-class '(#\space #\tab #\newline #\return))) (define delim? (char-class all-delimiters)) (define digit? (char-class digits)) (define letter? (char-class letters)) (define field? (char-class (cons #\- (append letters digits)))) (define (reader read-char) (let ((char #\eof)) ;; one-character buffer (define (peek) (if (eq? char #\eof) (set! char (read-char)) #u) char) (define (next) (let ((result char)) ;;(print result) (set! char (read-char)) result)) (define (skip-peek) (next) (peek) ) (define (skip-comment) (let loop ((ch (next))) (match ch with #\return -> #u #\newline -> #u _ -> (loop (skip-peek))))) (define (skip-whitespace) (let loop ((ch (peek))) (cond ((eq? ch #\eof) #u) ((eq? ch #\;) (skip-comment) (loop (peek))) ((whitespace? ch) (loop (skip-peek))) (else #u)))) ;; very tricky, using a state machine (define (read-atom) (let ((state 0) (dot-count 0) (ch #\0)) (let loop ((result '())) (set! ch (peek)) (if (eq? ch #\.) (set! dot-count (+ dot-count 1))) (set! state (match state with 0 -> (cond ((eq? ch #\eof) 4) ((eq? ch #\-) 1) ((digit? ch) 2) ((delim? ch) 7) (else 3)) 1 -> (cond ((eq? ch #\eof) 5) ((delim? ch) 5) ((digit? ch) 2) (else 3)) 2 -> (cond ((eq? ch #\eof) 6) ((delim? ch) 6) ((digit? ch) 2) (else 3)) 3 -> (cond ((eq? ch #\eof) 5) ((delim? ch) 5) (else 3)) _ -> (impossible))) (cond ((< state 4) (loop (list:cons (next) result))) ;; non-final ((= state 4) (error "unexpected end-of-file")) ;; error final (else ;; all other finals: 5,6,7 ;; single-character - for #\A (if (= state 7) (set! result (list:cons (next) result))) (:atom (list->string (reverse result)) ;; result string (= state 6) ;; number? (length result) ;; #chars dot-count)))))) ;; #dots (define (dotted-symbol s n) ;; handle dots in a symbol ;; as a special case, allow all-dots symbols (like '...) through unscathed (if (= n (string-length s)) (sexp:symbol (string->symbol s)) (let loop ((parts (reverse (string-split s #\.)))) ;; a.b.c => (get (get a b) c) (match parts with ;; c b a (base) -> (sexp:symbol (string->symbol base)) (attr . rest) -> (sexp:attr (loop rest) (string->symbol attr)) () -> (impossible) )))) (define (read-symbol) (match (read-atom) with (:atom sym #t _ _) -> (error1 "expected symbol" sym) (:atom sym #f _ 0) -> (string->symbol sym) (:atom sym #f _ _) -> (error1 "no dots allowed in constructor names" sym) )) (define (read1) (skip-whitespace) (let ((ch (peek))) (match ch with #\eof -> (error "unexpected end of file") #\( -> (sexp:list (read-list)) #\{ -> (read-record) #\" -> (read-string) #\' -> (begin (next) (sexp (sexp:symbol 'quote) (read))) #\, -> (begin (next) (sexp (sexp:symbol 'comma) (read))) #\: -> (begin (next) (sexp:cons 'nil (read-symbol))) #\# -> (begin (next) (set! ch (peek)) (match ch with #\\ -> (begin (next) ;; skip backslash (match (read-atom) with (:atom atom _ 1 _) -> (sexp:char (string-ref atom 0)) (:atom "newline" _ _ _) -> (sexp:char #\newline) (:atom "space" _ _ _) -> (sexp:char #\space) (:atom "return" _ _ _) -> (sexp:char #\return) (:atom "tab" _ _ _) -> (sexp:char #\tab) (:atom "eof" _ _ _) -> (sexp:char #\eof) (:atom "nul" _ _ _) -> (sexp:char #\nul) x -> (error1 "bad character constant" x) )) ;; Bb #\X -> (begin (next) (sexp:int (read-hex-int))) #\x -> (begin (next) (sexp:int (read-hex-int))) #\O -> (begin (next) (sexp:int (read-oct-int))) #\o -> (begin (next) (sexp:int (read-oct-int))) #\T -> (begin (next) (sexp:bool #t)) #\t -> (begin (next) (sexp:bool #t)) #\F -> (begin (next) (sexp:bool #f)) #\f -> (begin (next) (sexp:bool #f)) #\U -> (begin (next) (sexp:undef)) #\u -> (begin (next) (sexp:undef)) #\( -> (sexp:vector (read-list)) x -> (error1 "syntax error" x) )) #\) -> (error "unexpected close-paren") _ -> (match (read-atom) with (:atom chars #t n _) -> (sexp:int (read-int chars n)) (:atom chars #f _ 0) -> (sexp:symbol (string->symbol chars)) (:atom chars #f _ n) -> (dotted-symbol chars n) ) ) ) ) (define (read) (let ((result (read1))) (skip-whitespace) (let ((ch (peek))) (match ch with ;; postfix array-reference syntax #\[ -> (let ((index (read-array-index))) ;; primops take a parameter---------V (sexp (sexp:symbol '%array-ref) (sexp:bool #f) result index)) ;; infix colon syntax #\: -> (begin (next) (match result (read) with (sexp:symbol dt) (sexp:symbol alt) -> (sexp:cons dt alt) ;; not forcing (sexp:symbol) on might allow 'builtin method calls'... ;;ob (sexp:cons 'nil method) -> (sexp:attr (sexp:attr ob 'o) method) ob (sexp:cons 'nil method) -> (sexp (sexp:symbol '%method) (sexp:symbol method) ob) ;; object : type syntax ob type -> (sexp (sexp:symbol '%typed) ob type))) ;;x y -> (error1 "colon syntax" (:pair x y)))) ;; infix 'get' syntax (i.e., attribute access) ;; XXX this is disabled because it breaks symbols like '... ;; so we'll probably need to do the same hack as the python version ;;#\. -> (begin (next) (sexp:attr result (read-symbol))) _ -> result )))) (define (read-array-index) (next) ;; skip open-left-bracket (let ((exp (read))) (skip-whitespace) (if (eq? (peek) #\]) (begin (next) exp) (error "expected closing ]/} character")))) (define (read-hex-digit ch) (match (alist/lookup hex-map ch) with (maybe:no) -> (error "bad hex digit") (maybe:yes num) -> num)) (define (read-hex-code) (let ((n0 (read-hex-digit (next))) (n1 (read-hex-digit (next)))) (ascii->char (+ (<< n0 8) n1)))) (define (read-string) (next) ;; throw away the opening quote (let loop ((ch (peek)) (result '())) (match ch with #\" -> (begin (next) ;; throw away the close-quote (sexp:string (list->string (reverse result)))) #\\ -> (begin ;; ignore this backslash, read the next char (next) (set! ch (next)) (match ch with #\x -> (loop (peek) (list:cons (read-hex-code) result)) #\X -> (loop (peek) (list:cons (read-hex-code) result)) #\r -> (loop (peek) (list:cons #\return result)) #\n -> (loop (peek) (list:cons #\newline result)) #\t -> (loop (peek) (list:cons #\tab result)) #\" -> (loop (peek) (list:cons #\" result)) #\\ -> (loop (peek) (list:cons #\\ result)) _ -> (error1 "bad backslash escape in string" result) )) _ -> (loop (skip-peek) (list:cons ch result)) ))) (define (read-list) ;; throw away the open paren (next) (let loop ((result '())) (skip-whitespace) (let ((ch (peek))) (if (eq? ch #\)) ;; throw away the paren (begin (next) (reverse result)) (let ((exp (read))) ;; XXX should I check for here? (loop (list:cons exp result))) )))) (define (read-record) ;; { label=value label=value ...} (next) (let loop ((result '())) (skip-whitespace) (let ((p (peek))) (if (eq? p #\}) (begin (next) (sexp:record (reverse result))) (let ((name (read-name))) (cond ((eq? name '...) (loop (list:cons (field:t name (sexp:bool #f)) result))) (else (skip-whitespace) (if (not (eq? (peek) #\=)) (error1 "expected '=' in record literal" result) (begin (next) (let ((val (read))) (loop (list:cons (field:t name val) result)))))))) )))) (define (read-name) (let loop ((result '()) (ch (peek)) (dots #f)) (cond ((or (field? ch) (eq? ch #\.)) (loop (list:cons ch result) (skip-peek) #f)) (else (string->symbol (list->string (reverse result))))))) (define (read-int s n) (let ((neg? (eq? (string-ref s 0) #\-)) (start (if neg? 1 0))) (let loop ((i start) (r 0)) (if (= i n) (if neg? (- 0 r) r) (match (alist/lookup dec-map (string-ref s i)) with (maybe:no) -> (error1 "bad decimal digit?" s) (maybe:yes digit) -> (loop (+ i 1) (+ (* r 10) digit))) )))) (define (read-hex-int) (let ((neg? (eq? (peek) #\-))) (if neg? (begin (next) #u)) (let loop ((r 0) (ch (peek))) (match (alist/lookup hex-map ch) with (maybe:yes digit) -> (loop (+ (* r 16) digit) (skip-peek)) (maybe:no) -> (if neg? (- 0 r) r))))) (define (read-oct-int) (let ((neg? (eq? (peek) #\-))) (if neg? (begin (next) #u)) (let loop ((r 0) (ch (peek))) (match (alist/lookup oct-map ch) with (maybe:yes digit) -> (loop (+ (* r 8) digit) (skip-peek)) (maybe:no) -> (if neg? (- 0 r) r))))) (define (read-include path result) ;; cons the forms from this file onto result, in reverse order... (append (reverse (read-file path)) result)) (define (read-all) (let loop ((result '())) (skip-whitespace) (if (eq? (peek) #\eof) (reverse result) (let ((form (read))) (match form with (sexp:list ((sexp:symbol 'include) (sexp:string path))) -> (loop (read-include path result)) _ -> (loop (list:cons form result))))))) (read-all) )) (define (read-file path) (print-string "reading file ") (printn path) (let ((file (file/open-read path))) (reader (lambda () (file/read-char file))))) (define (read-string s) (reader (string-reader s))) (define sexp->symbol (sexp:symbol s) -> s x -> (error1 "sexp->symbol" x)) (define sexp->int (sexp:int n) -> n x -> (error1 "sexp->int" x)) ;; utility functions (define field=? (field:t sa va) (field:t sb vb) -> (and (eq? sa sb) (sexp=? va vb))) ;; XXX consider eq? shortcut (define sexp=? (sexp:undef) (sexp:undef) -> #t (sexp:symbol a) (sexp:symbol b) -> (eq? a b) (sexp:bool a) (sexp:bool b) -> (eq? a b) (sexp:int a) (sexp:int b) -> (= a b) (sexp:string a) (sexp:string b) -> (string=? a b) (sexp:char a) (sexp:char b) -> (char=? a b) (sexp:list l0) (sexp:list l1) -> (every2? sexp=? l0 l1) (sexp:vector a) (sexp:vector b) -> (every2? sexp=? a b) (sexp:record a) (sexp:record b) -> (every2? field=? a b) (sexp:cons a0 a1) (sexp:cons b0 b1) -> (and (eq? a0 b0) (eq? a1 b1)) (sexp:attr a0 a1) (sexp:attr b0 b1) -> (and (sexp=? a0 b0) (eq? a1 b1)) _ _ -> #f ) (define (sexp1 sym rest) ;; build an s-expression with at the front followed by (sexp:list (list:cons (sexp:symbol sym) rest))) (define repr-field (field:t '... _) -> "..." (field:t name val) -> (format (sym name) "=" (p repr val))) (define repr (sexp:list ((sexp:symbol 'quote) x)) -> (format "'" (repr x)) (sexp:list l) -> (format "(" (join repr " " l) ")") (sexp:symbol s) -> (format (sym s)) (sexp:string s) -> (format "\"" s "\"") ;; XXX escape backslashes... (sexp:char ch) -> (format "#\\" (char ch)) (sexp:bool #t) -> "#t" (sexp:bool #f) -> "#f" (sexp:int n) -> (format (int n)) (sexp:undef) -> "#u" (sexp:vector v) -> (format "#(" (join repr " " v) ")") (sexp:record fl) -> (format "{" (join repr-field " " fl) "}") (sexp:cons dt c) -> (format (if (eq? dt 'nil) "" (symbol->string dt)) ":" (sym c)) (sexp:attr lhs a) -> (format (p repr lhs) "." (sym a)) ) (define indent 0 -> #t n -> (begin (print-string " ") (indent (- n 1)))) (define pp-size-field (field:t name val) -> (+ (+ (string-length (symbol->string name)) 1) (pp-size val))) (define pp-size (sexp:list l) -> (foldr + (+ 1 (length l)) (map pp-size l)) (sexp:symbol s) -> (string-length (symbol->string s)) (sexp:string s) -> (+ 2 (string-length s)) ;; escaped backslashes! (sexp:char ch) -> (string-length (repr (sexp:char ch))) (sexp:bool _) -> 2 (sexp:int n) -> (string-length (int->string n)) (sexp:undef) -> 2 (sexp:vector v) -> (foldr + (+ 2 (length v)) (map pp-size v)) (sexp:record fl) -> (foldr + (+ (length fl) 1) (map pp-size-field fl)) (sexp:cons dt c) -> (+ 1 (+ (string-length (symbol->string dt)) (string-length (symbol->string c)))) (sexp:attr lhs a) -> (+ 1 (+ (pp-size lhs) (string-length (symbol->string a)))) ) (define (pp d exp) (let ((size (pp-size exp))) (if (< size 80) (print-string (repr exp)) (match exp with (sexp:list ()) -> (print-string "()") (sexp:list (hd . tl)) -> (begin (print-string "(") (pp d hd) (for-each (lambda (x) (newline) (indent (+ d 1)) (pp (+ d 1) x)) tl) (print-string ")")) ;; XXX complete for vector & record. _ -> (print-string (repr exp)))))) (define (test-file) (let ((t (read-file (if (> sys.argc 1) sys.argv[1] "lib/core.scm")))) ;; (printn t) ;;(for-each (lambda (x) (printn x) (pp 0 x) (newline)) t) (printn t) (for-each (lambda (x) (pp 0 x) (newline)) t) #u )) ;(test-file)