;; assumes the presence of a 'step' function, generated by parse/lexer.py,
;; that looks like this: 
;  (define (step ch state)
;;   (char->ascii (string-ref dfa[state] (char->ascii ch))))

(datatype range
  ;; range within a source file
  ;; <line0> <pos0> <line1> <pos1>
  (:t int int int int)
  ;; undefined range
  (:f)
  )

(datatype token
  ;;  <kind> <value> <range>
  (:t symbol string (range))
  )

(define eof-token (token:t 'eof "eof" (range:t 0 0 0 0)))

(define (lex producer consumer)
  ;; producer gives us characters
  ;; consumer takes tokens

  (let ((action 'not-final)
	(state 0)
	;; current char's position
	(line 1) 
	(pos 1)
	;; previous char's position
	(lline 1)
	(lpos 1)
	;; start of last token
	(tline 1)
	(tpos 1)
	)
    
    (define (next-char)
      ;; manage line/pos
      (let ((ch (producer)))
	(set! lline line)
	(set! lpos pos)
	(cond ((char=? ch #\newline)
	       (set! line (+ line 1))
	       (set! pos 1))
	      (else
	       (set! pos (+ pos 1))))
	ch))

    (define (final? action)
      (not (eq? action 'not-final)))

    (let loop ((ch (next-char))
	       (last 'not-final)
	       (current (list:nil)))
      (cond ((char=? ch #\eof)
	     ;; parser seems to require an extra NEWLINE in here...
	     (consumer (token:t 'NEWLINE "\n" (range:t tline tpos line pos)))
	     (consumer (token:t '<$> "<$>" (range:t tline tpos line pos))) #t)
	    (else
	     (set! state (step ch state))
	     (set! action finals[state])
	     (cond ((and (not (final? last)) (final? action))
		    ;; we've entered a new final state
		    (loop (next-char) action (list:cons ch current)))
		   ((and (final? last) (not (final? action)))
		    ;; we've left a final state - longest match - emit token
		    (consumer (token:t last (list->string (reverse current)) (range:t tline tpos lline lpos)))
		    (set! state 0)
		    (set! tline lline)
		    (set! tpos lpos)
		    (loop ch 'not-final (list:nil)))
		   (else
		    ;; accumulate this character
		    (loop (next-char) action (list:cons ch current)))))))
      ))

(define (make-lex-generator file)

  (define (producer)
    (file/read-char file))

  (make-generator
   (lambda (consumer)
     (lex producer consumer)
     (let forever ()
       (consumer eof-token)
       (forever))
     )))