;; assumes the presence of a 'step' function, generated by parse/lexer.py, ;; that looks like this: ; (define (step ch state) ;; (char->ascii (string-ref dfa[state] (char->ascii ch)))) (datatype range ;; range within a source file ;; (:t int int int int) ;; undefined range (:f) ) (datatype token ;; (:t symbol string (range)) ) (define eof-token (token:t 'eof "eof" (range:t 0 0 0 0))) (define (lex producer consumer) ;; producer gives us characters ;; consumer takes tokens (let ((action 'not-final) (state 0) ;; current char's position (line 1) (pos 1) ;; previous char's position (lline 1) (lpos 1) ;; start of last token (tline 1) (tpos 1) ) (define (next-char) ;; manage line/pos (let ((ch (producer))) (set! lline line) (set! lpos pos) (cond ((char=? ch #\newline) (set! line (+ line 1)) (set! pos 1)) (else (set! pos (+ pos 1)))) ch)) (define (final? action) (not (eq? action 'not-final))) (let loop ((ch (next-char)) (last 'not-final) (current (list:nil))) (cond ((char=? ch #\eof) ;; parser seems to require an extra NEWLINE in here... (consumer (token:t 'NEWLINE "\n" (range:t tline tpos line pos))) (consumer (token:t '<$> "<$>" (range:t tline tpos line pos))) #t) (else (set! state (step ch state)) (set! action finals[state]) (cond ((and (not (final? last)) (final? action)) ;; we've entered a new final state (loop (next-char) action (list:cons ch current))) ((and (final? last) (not (final? action))) ;; we've left a final state - longest match - emit token (consumer (token:t last (list->string (reverse current)) (range:t tline tpos lline lpos))) (set! state 0) (set! tline lline) (set! tpos lpos) (loop ch 'not-final (list:nil))) (else ;; accumulate this character (loop (next-char) action (list:cons ch current))))))) )) (define (make-lex-generator file) (define (producer) (file/read-char file)) (make-generator (lambda (consumer) (lex producer consumer) (let forever () (consumer eof-token) (forever)) )))