;;; synfl.el --- syntax-directed font locking for Emacs

;; Copyright (C) 2004 Stephen J. Turnbull

;; Author: Stephen J. Turnbull <stephen@xemacs.org>
;; Created: 21 September 2002
;; Keywords: lisp, maint

;; This file is part of synfl.
;; It is not part of XEmacs.

;; synfl is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License, version 2, as
;; published by the Free Software Foundation.

;; synfl is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with Emacs; see the file COPYING.  If not, write to the Free
;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
;; 02111-1307, USA.

;;; Synched up with: Not in GNU Emacs
 
;;; Commentary:

;; This package provides a syntax-directed font lock facility for Emacs.  It
;; provides a specification for a grammar data structure, and functions to
;; generate and execute an LR parser.  It is hoped that this will be faster
;; and more robust than the current scheme.

;; The synfl parser is a program which takes a synfl grammar and a token
;; stream as arguments, and returns a "parse".

;; A "synfl grammar" is a data structure representing a context-free grammar
;; plus annotations to the rules for use in syntax-directed transformation.
;; This is version 1.0.  Future versions are likely to be extensions rather
;; than backward-incompatible changes.

;; A token stream is a data structure representing a stream of typed objects,
;; whose types correspond to the terminals of the grammar.  This is version
;; 0.5.  Backward-incompatible changes should be expected.

;; A "parse" is the output of the parser, and might be a parse tree, a syntax
;; tree, or something else.  Currently parses are completely implementation-
;; defined.  An appropriate abstraction needs to be designed.

;; Grammars

;; A synfl grammar is named by a non-keyword symbol.  Keyword symbols are
;; reserved for the use of the synfl implementation.  Grammars should avoid
;; using them wherever possible.  The components of the grammar are attached
;; as properties to the name symbol.  The following components are required.

;; :synfl-grammar-version The version of the grammar data structure.  A list
;;		whose car is the keyword :synfl-grammar and whose cdr is a
;;		list of integers denoting the version.
;; :terminals	The list of Lisp symbols representing terminal symbols of the
;;		grammar.
;; :nonterminals The list of Lisp symbols representing non-terminal
;;		symbols of the grammar.
;; :start-symbol A nonterminal indicating the start symbol.
;; :productions	The list of productions of the grammar.
;;		Each production is a list.  The first element must be a
;;		non-terminal grammar symbol.  The second element is a list of
;;		grammar symbols, and may be `nil'.  Remaining elements are
;;		reserved for annotations, whose API depends on those for
;;		streams and "parses".
;; :action-table The parser action table produced by the LR parser generator.
;; :goto-table	The parser goto table produced by the LR parser generator.

;; A token stream is a Lisp symbol.  The public methods available for
;; a token stream are `synfl-stream-next' and `synfl-stream-rewind'.
;; #### There should be a constructor.
;; The implementation must provide one or more constructors, and set the
;; following properties on the stream:
;;
;; :synfl-stream-version The version of the stream data structure.  A list
;;		whose car is the keyword :synfl-token-stream and whose cdr
;;		is a list of integers denoting the version.
;; :iterate	A function of a stream, returning a token.
;; :rewind	A function of a stream, reinitializing it.
;; :token-type	A function of a token, returning a terminal symbol.
;; :token-value	A function of a token, returning a Lisp Object.
;; :token-text	A function of a token, returning a string.
;;
;; The stream may have other accessors defined by the implementation.

;; A token is an opaque object generated by applying `synfl-stream-next'
;; to a token stream.  The following accessors are defined for a token,
;; taking the token and the stream that generated it as arguments:
;;
;;   `synfl-token-type'		Returns a terminal symbol from a grammar.
;;   `synfl-token-value'	Returns the value of a token.
;;   `synfl-token-text'		Returns the lexical text of the token.

;; Bugs/TODO

;; 1.  In the current implementation, names of symbols have to be unique
;; ACROSS grammars as well as within them.  I think the way to fix this is
;; to map names of grammar symbols to unique Lisp symbols.  Currently we
;; just use the name symbol as the grammar symbol.
;; 2.  Fix style of symbol, token, production function to be like grammar.
;; 3.  Consider making the *-p functions a condition-case around *-check-*.
;; 4.  Hide the internal representations of symbols.
;; 5.  A symbol is nonterminal if it has a non-nil :productions property (a
;;     list of productions, don't need rhs), terminal otherwise.

;;; Change Log: see separate ChangeLog.

;;; Code:

;; Initialize tracing

(require 'synfl-trace)
;; currently defined trace features:
;;   parser-state-entry:	parser state on entry to main loop body
;;   parser-state-exit:	parser state at exit from main loop body
;;   debug-closure:	set of items at each iteration of building closure
;;   debug-slr:		insertions into action and goto tables
;;   next-token:	synfl-stream-next
;;   token-type:	synfl-token-type
;;   point:		point after last token lexed
(setq synfl-trace t)

;;; Variables:

;; Version information
;; Versions are lists of non-negative integers.
(defconst synfl-grammar-version '(1 0)
  "Version of the grammar specification.")



;; Grammar API

;; accessor defsubsts for grammars or used in grammar functions

(defsubst synfl-grammar-terminals (grammar)
  "Return the terminal symbols (in internal representation) of GRAMMAR."
  (get grammar :terminals))

(defsubst synfl-grammar-nonterminals (grammar)
  "Return the nonterminal symbols (in internal representation) of GRAMMAR."
  (get grammar :nonterminals))

(defsubst synfl-grammar-symbols (grammar)
  "Return all symbols (in internal representation) of GRAMMAR."
  (append (get grammar :terminals) (get grammar :nonterminals)))

(defsubst synfl-grammar-productions (grammar)
  "Return the productions (in internal representation) of GRAMMAR."
  (get grammar :productions))

(defsubst synfl-grammar-start-symbol (grammar)
  "Return the start symbol (in internal representation) of GRAMMAR."
  (get grammar :start-symbol))

(defsubst synfl-grammar-action-table (grammar)
  "Return the action table for GRAMMAR.

Generate it from the productions if needed."
  (unless (get grammar :action-table) (synfl-generate-parser grammar))
  (get grammar :action-table))

(defsubst synfl-grammar-goto-table (grammar)
  "Return the goto table for GRAMMAR.

Generate it from the productions if needed."
  (unless (get grammar :goto-table) (synfl-generate-parser grammar))
  (get grammar :goto-table))

(defsubst synfl-grammar-state-count (grammar)
  "Return the number of states in the parser for GRAMMAR.

Generate the tables if needed."
  (length (cdr (synfl-grammar-action-table grammar))))

(defsubst synfl-grammar-timestamp (grammar)
  (get grammar :timestamp))

(defsubst synfl-grammar (object)
  (car (get object :synfl-grammar)))

(defsubst synfl-grammar-symbol (grammar symbol-name)
  "Return the existing internal representation of SYMBOL-NAME in GRAMMAR."
  symbol-name)

;; #### currently symbols are their own names.
;; this means symbols must be unique across grammars
(defsubst synfl-grammar-make-symbol (grammar name)
  "Return an internal representation for NAME in GRAMMAR.

NAME should not already be used.  Keywords are reserved."
  ;; #### implement interning and error-checking
  (if (or (not (symbolp name))
	  (and (keywordp name) (not (eq name :start))))
      (error 'wrong-type-argument 'symbol name)
    name))

(defsubst synfl-name (object)
  "Return the external representation of internal OBJECT in its grammar.

Do not change it.  It may be the object itself in some implementations."
  (cond ((synfl-grammar-p object) (get object :name))
	((synfl-symbol-p object) object)
	((synfl-production-p object) object)
	(t (error 'wrong-type-argument "synfl object type" object))))



;; Grammar functions

(defvar synfl-grammars nil
  "Alist equivalence mapping synfl grammar names to grammar objects.")


(defun synfl-find-grammar (name-or-grammar)
  "Return the grammar object corresponding to NAME-OR-GRAMMAR.

NAME-OR-GRAMMAR must be a symbol.  If there is no such grammar return `nil'."

  (check-type name-or-grammar symbol "not a grammar name or grammar")
  (if (synfl-grammar-p name-or-grammar)
      name-or-grammar
    (cdr (assq name-or-grammar synfl-grammars))))


(put 'synfl-make-grammar 'lisp-indent-function 1)

(defun synfl-make-grammar (name terminal nonterminal start production)
  "Declare symbol NAME to be a synfl grammar.

If the list of symbols TERMINAL is non-nil, initialize NAME with them.
If the list of symbols NONTERMINAL is non-nil, initialize NAME with them.
Set the start symbol of NAME to START.
If the list of lists PRODUCTION is non-nil, initialize NAME with them.

Each list in PRODUCTION must have the form (LHS RHS SIDE-EFFECT), where
  LHS         is the name of a nonterminal of GRAMMAR
  RHS         is a list of names of symbols of GRAMMAR
  SIDE-EFFECT is a function of 5 arguments: PARSE-TREE OBJECT STATE TOKEN STACK
              (cf. `synfl-production-side-effect'); `nil' is an abbreviation
              for #'ignore
              PARSE-TREE is a synfl parse tree
              OBJECT may be a synfl parser state or a synfl grammar production
              STATE is a synfl parser state
              TOKEN is a synfl token
              STACK is the synfl parser stack (for debugging)."

  ;; argument sanity
  (unless (and (symbolp name) (not (keywordp name)))
    (error 'wrong-type-argument 'symbol name))
  (when (synfl-find-grammar name)
    (error 'args-out-of-range "grammar name exists" name))
  (unless (memq start nonterminal)
    (error 'wrong-type-argument (format "nonterminal of %s" name) start))

  (let ((grammar (gensym)))
    (add-to-list 'synfl-grammars (cons name grammar))
    (put grammar :type (list :synfl-grammar synfl-grammar-version))
    (put grammar :name name)
    (put grammar :terminals (mapcar (lambda (x) (synfl-make-terminal x grammar))
				   terminal))
    (put grammar :nonterminals (mapcar (lambda (x)
					(synfl-make-nonterminal x grammar))
				      nonterminal))
    ;; must come after interning names of nonterminals
    (put grammar :start-symbol (synfl-grammar-symbol grammar start))
    (put grammar :productions (mapcar (lambda (x)
				       (synfl-make-production x grammar))
				     production))
    (put grammar :time-stamp (current-time))))


(defun synfl-copy-grammar (grammar)
  "Return an uninterned copy of the internal representation of GRAMMAR.

Useful for making augmented grammars and the like."

  (let ((source (synfl-find-grammar grammar))
	(target (gensym)))
    (flet ((cc (key)
	     (put target key (get source key))))
      (cc :type)
      (cc :name)
      (cc :terminals)
      (cc :nonterminals)
      (cc :start-symbol)
      (cc :productions)
      (cc :time-stamp))
    target))


(defun synfl-grammar-p (object)
  "Return non-nil if object is a valid synfl grammar."
  (and (symbolp object)
       (let ((version (get object :type)))
	 (and (consp version) (eq (car version) :synfl-grammar)))))


(defun synfl-check-grammar (object)
  "Signal an error if object is not a valid synfl grammar.

Currently no recursive checks are done."
  (unless (and (symbolp object)
	       (get object :type)
	       (get object :terminals)
	       (get object :nonterminals)
	       (get object :productions))
    (if (symbolp object) (remprop object :type))
    (error 'wrong-type-argument 'synfl-grammar object)))



;; Symbol functions

(defun synfl-make-terminal (name grammar)
  "Return internal representation of NAME as a terminal symbol of GRAMMAR."
  (let ((symbol (synfl-grammar-make-symbol grammar name)))
    (put symbol :type :terminal)
    (put symbol :synfl-grammar (list grammar synfl-grammar-version))
    (push symbol (get grammar :terminals))
    symbol))


(defun synfl-terminal-p (object)
  "Return non-nil if object is a terminal of a synfl grammar."
  (and (symbolp object)
       (eq (get object :type) :terminal)))


(defun synfl-check-terminal (object)
  "Signal an error if object is not a terminal of a synfl grammar."
  (unless (and (synfl-terminal-p object)
	       (let ((grammar (synfl-grammar object)))
		 (and (synfl-grammar-p grammar)
		      (memq object (get grammar :terminals)))))
    (error 'wrong-type-argument 'synfl-terminal object)))


(defun synfl-make-nonterminal (name grammar)
  "Return internal representation of NAME as a nonterminal symbol of GRAMMAR."
  (let ((symbol (synfl-grammar-make-symbol grammar name)))
    (put symbol :type :nonterminal)
    (put symbol :synfl-grammar (list grammar synfl-grammar-version))
    (push symbol (get grammar :nonterminals))
    symbol))


(defun synfl-nonterminal-p (object)
  "Return non-nil if object is a nonterminal of a synfl grammar."
  (and (symbolp object)
       (eq (get object :type) :nonterminal)))


(defun synfl-check-nonterminal (object)
  "Signal an error if object is not a nonterminal of a synfl grammar."
  (unless (and (synfl-nonterminal-p object)
	       (let ((grammar (synfl-grammar object)))
		 (and (synfl-grammar-p grammar)
		      (memq object (get grammar :nonterminals)))))
    (error 'wrong-type-argument 'synfl-nonterminal object)))


(defun synfl-symbol-p (object)
  "Return non-nil if object is a symbol of a synfl grammar."
  (or (synfl-terminal-p object) (synfl-nonterminal-p object)))


(defun synfl-check-symbol (object)
  "Signal an error if object is not a symbol of a synfl grammar."
  (unless (and (synfl-symbol-p object)
	       (let ((grammar (synfl-grammar object)))
		 (and (synfl-grammar-p grammar)
		      (memq object (append (get grammar :terminals)
					   (get grammar :nonterminals))))))
    (error 'wrong-type-argument 'synfl-symbol-p object)))



;; Production functions

(defun synfl-make-production (list grammar)
  "Validate LIST as a production of GRAMMAR, and return LIST."
  ;; transform external names to internal representation
  (setq list (list (synfl-grammar-symbol grammar (nth 0 list))
		   (mapcar (lambda (x) (synfl-grammar-symbol grammar x))
			   (nth 1 list))
		   (or (nth 2 list) #'ignore)))
  (synfl-check-nonterminal (nth 0 list))
  (mapc #'synfl-check-symbol (nth 1 list))
  (let ((side-effect (nth 2 list)))
    (unless (or (null side-effect) (functionp side-effect))
      (error 'invalid-function "side effect must be function" side-effect)))
  list)


(defsubst synfl-production-lhs (production)
  "Return the LHS of PRODUCTION."
  (nth 0 production))


(defsubst synfl-production-rhs (production)
  "Return the RHS of PRODUCTION."
  (nth 1 production))


(defsubst synfl-production-side-effect (production)
  "Return the side-effect of PRODUCTION.

A side effect is an object that is functionp and accepts five arguments:
a synfl parse tree, an object that is either a state or a production, a
state, a token, and a parser stack."
  (nth 2 production))


(defun synfl-check-production (object)
  "Signal an error if OBJECT is not a production."
  (unless (synfl-nonterminal-p (synfl-production-lhs object))
    (error 'wrong-type-argument
	   'synfl-production object "lhs not a nonterminal"))
  (mapcar (lambda (x) (unless (synfl-symbol-p x)
			(error 'wrong-type-argument
			       'synfl-production object "not a symbol" x)))
	  (synfl-production-rhs object))
  (let ((grammar (synfl-grammar (synfl-production-lhs object))))
    (mapcar (lambda (x)
	      (unless (eq grammar (synfl-grammar x))
		(error 'wrong-type-argument
		       'synfl-production object "mismatched grammars")))
	    (synfl-production-rhs object)))
  (let ((side-effect (synfl-production-side-effect object)))
    (or (null side-effect)
	(functionp side-effect)
	;; #### is there a way to check functions for number of arguments?
	(error 'wrong-type-argument
	       'synfl-production object
	       "side-effect not a function" side-effect))))


(defun synfl-production-p (object)
  "Return t if OBJECT is a synfl production, otherwise nil."
  (condition-case nil
      (progn
	(synfl-check-production object)
	t)
    (wrong-type-argument nil)))



;; Token stream API
;; #### this API needs a constructor

(defun synfl-stream-rewind (stream)
  (funcall (get stream :rewind) stream))


(defun synfl-stream-next (stream)
  "Return the next token from token stream STREAM."
  (let ((token (funcall (get stream :iterate) stream)))
    (synfl-trace '(next-token) "TOKEN: %S\n" token)
    (synfl-trace '(next-token)
      "  (type: %S)\n" (if (eq token :eof)
			   :eof
			 (extent-property token :synfl-token)))
    token))



;; Token accessors

;; It would be nice if the accessors could infer the stream type from the
;; token but this would require specifying how stream types are attached to
;; different types of object.  For objects with properties, such as symbols
;; and extents, a dedicated property could be used, but CL defstructs (for
;; example) might have a specified layout that needs to be respected.

(defun synfl-token-type (token stream)
  "Return the type of TOKEN produced by STREAM."
  (let ((type (funcall (get stream :token-type) token)))
    (synfl-trace '(token-type) "TYPE: %S\n" type)
    type))


(defun synfl-token-value (token stream)
  "Return the implementation-dependent value of TOKEN produced by STREAM.

May return `nil' to mean unavailable."
  (funcall (get stream :token-value) token))


(defun synfl-token-text (token stream)
  "Return the original text of TOKEN produced by STREAM.

May return `nil' to mean unavailable."
  (funcall (get stream :token-text) token))



;; parse API

;; There's nothing here yet.



;; Parser functions

(defsubst synfl-handle-length (production)
  "Return the length of the RHS (handle) of PRODUCTION."
  (length (synfl-production-rhs production)))


;; Algorithm 4.7 of the Dragon Book
(defun synfl-lr-parse (stream grammar node-class)
  "Parse STREAM according to GRAMMAR, returning a tree built of NODE-CLASS.

STREAM is a synfl token stream, GRAMMAR a synfl grammar, and NODE-CLASS a
synfl node class record.

If a syntax error is found, return a cons of :error with a list of error
descriptors (does not signal a Lisp error)."

  (setq grammar (synfl-find-grammar grammar))
  ;; note that "tokens" are actually already wrapped in nodes
  ;; this should be efficient in the sense that we are going to have to
  ;; design trees and streams to cooperate with each other anyway, so we
  ;; should usually be able to arrange for them to have a common
  ;; representation
  ;; eg in the terminal stream example, we could copy the grammar symbol's
  ;; attributes to a new gensym and put the node properties on it
  ;; eg in the lock-c application, a token is an extent, and can directly
  ;; carry the node properties
  (flet ((next ()
	   (let* ((raw-token (synfl-stream-next stream))
		  ;; wrap the token in a node
		  (node (funcall (synfl-node-constructor node-class)
				 raw-token)))
	     (funcall (synfl-node-symbol-mutator node-class) node raw-token)
	     node))
	 (type (token) (synfl-token-type token stream))
	 (make-tree (nonterminal children)
	   (funcall (synfl-node-tree-constructor node-class)
		    (funcall (synfl-node-constructor node-class) nonterminal)
		    children))
	 (effect (node object s token stack)
	   (funcall (synfl-production-side-effect object)
		    node object s token stack)))
    (let* ((actions (synfl-grammar-action-table grammar))
	   (gotos (synfl-grammar-goto-table grammar))
	   (stack (list 0))
	   (token (next))
	   (msgctr 0)
	   action verb object s)
      (catch 'result
	(while t
	  (setq action (synfl-action actions (type token) (car stack))
		verb (car action)
		object (cdr action))
	  (synfl-trace '(parser-state-entry)
	    "-> [%S %S %S %S %S %S]\n"
	    ;; nb some of the objects on the stack may be _really_ big
	    token (type token) stack s verb
	    (if (listp object) (list (car object) (cadr object)) object))
	  ;; #### this must go!
	  (when (and (synfl-trace-p '(point))
		     (> (point) msgctr))
	    (setq msgctr (+ msgctr 1000))
	    (message "point: %d" (point)))
	  (cond ((eq verb :shift)	; object is a state
		 (push token stack)
		 (push object stack)
		 (setq s object		; top of stack
		       token (next)))
		((eq verb :reduce)	; object is a production
		 (let ((node (make-tree
			      (synfl-production-lhs object)
			      (let ((i (1- (* 2 (synfl-handle-length object))))
				    (result nil))
				(while (> i 0)
				  (push (nth i stack) result)
				  (decf i 2))
				result))))
		   ;; maybe this should be at the end (node is constructed,
		   ;; but s and stack haven't been fixed up yet)?
		   (effect node object s token stack)
		   (setq stack (nthcdr (* 2 (synfl-handle-length object))
				       stack))
		   (setq s (synfl-goto gotos
				       (synfl-production-lhs object)
				       (car stack)))
		   (push node stack)
		   (push s stack)))
		((eq verb :accept)
		 (throw 'result (nth 1 stack)))
		;; could use error productions here
		(t (throw 'result
			  (synfl-parse-error object s token stack))))
	  ;; nb some of the objects on the stack may be _really_ big
	  (synfl-trace '(parser-state-exit)
	    "<- %S %S\n" token stack)
	  )))))



;; Representation of the tables
;; #### the following makes no sense at construction time, when you don't
;; know how big a vector is needed; use an alist instead.  We can optimize
;; this later.

;; From the Dragon Book, pp. 244-246 "Compaction of LR Parsing Tables,"
;; a space-saving representation of the action table can be based on an
;; array of pointers to alists associating terminals to actions.  So let's
;; make states be non-negative integers, with 0 conventionally the initial
;; state.  Values in the action tables are conses of verbs and objects.

;; Goto tables can be compactly expressed as a mapping from nonterminals
;; to a list of state pairs, representing transitions.  Since nonterminals
;; are currently represented as symbols, an alist whose keys are nonterminals
;; and whose values are alists seems reasonable.

;; The `cdr's on tables (to skip leading atoms) could be omitted and probably
;; would be more efficient.
;; #### :accept and :undefined never need OBJECT, :error might not.
(defun synfl-set-action (table terminal state verb object)
  "Set the action in TABLE for TERMINAL and STATE from VERB and OBJECT.

Implementation details:
TABLE is a list whose car is :action-table (a meaningful place-holder) and
whose cdr is an alist with states for keys.  The value associated to each
state is an alist with terminals for keys and values which are a cons of
the verb and the object."
  ;; this assq is incorrect with a bignum's worth of states.  ;-)
  (let ((newentry (cons terminal (cons verb object)))
	(subtable (assq state (cdr table))))
    (if subtable
	(let ((entry (assq terminal subtable)))
	  (if (and entry (not (equal entry newentry)))
	      (error 'invalid-change "parser conflict"
		     entry newentry state terminal)
	    (setcdr subtable (cons newentry (cdr subtable)))))
      ;; link in a new element of the alist on states
      (setcdr table (cons (cons state
				;; value is an alist with one entry
				(list newentry))
			  ;; tail of the alist on states
			  (cdr table))))))


(defun synfl-set-goto (table nonterminal from to)
  "Set TABLE to goto state TO on NONTERMINAL in state FROM."
  (let ((subtable (assq nonterminal (cdr table))))
    (if subtable
	(if (let ((el (assq from subtable)))
	      (and el (not (eq (cdr el) to))))
	    (error 'invalid-change "already in goto table" nonterminal from)
	  (setcdr subtable (cons (cons from to) (cdr subtable))))
      (setcdr table (cons (cons nonterminal
				(list (cons from to)))
			  (cdr table))))))


(defun synfl-action (action-table terminal state)
  "Return the action from ACTION-TABLE for STATE and TERMINAL.

This is a cons whose car is the verb, and may be :shift, :reduce, :accept,
:error, or :undefined.  :undefined should only be encountered when generating
the table.  The cdr is the object, a state if the verb is :shift, and a
production if it is :reduce.  Other verbs give implementation-dependent
objects.

Callers must check for `nil' return, which is implicitly an error."
  (cdr (assq terminal (cdr (assq state (cdr action-table))))))


(defun synfl-action-verb (action-table terminal state)
  "Return the verb from ACTION-TABLE for STATE and TERMINAL.

This may be :shift, :reduce, :accept, :error, or :undefined.
:undefined should only be encountered when generating the table."
  (car (synfl-action action-table terminal state)))


(defun synfl-action-object (action-table terminal state)
  "Return the object from ACTION-TABLE for STATE and TERMINAL.

This is a state if the verb is :shift, and a production if it is :reduce.
Other verbs give implementation-dependent objects."
  (cdr (synfl-action action-table terminal state)))


(defun synfl-goto (goto-table nonterminal state)
  "Return the goto state for STATE and NONTERMINAL according to GOTO-TABLE.

Callers must check for `nil' return, which is implicitly an error."
  (cdr (assq state (cdr (assq nonterminal (cdr goto-table))))))


;; #### does this function make any sense?
(defun synfl-parse-error (object state token stack)
  "Return (not signal) a syntax error, which is a cons with car :error.

The argument OBJECT may be a state or a production."
  (list :error object state token stack))



;; Set of items contructions

;; An item is represented as a vector [processed production position lookahead]
;; where production = the production being represented
;;       position   = current position ("dot") in the production
;;       lookahead  = lookahead used in canonical LR and LALR parsers

;; #### Perhaps it would be useful to use an indirect representation with
;; items being symbols, and their values (or a property) a vector as above.
;; However, this would require checking for existence of a given item in
;; some complex way.

;; making items

(defsubst synfl-make-item (production position)
  (vector production position nil))

;; item accessors

(defsubst synfl-item-production (item) (aref item 0))
(defsubst synfl-item-position (item) (aref item 1))
;; lookahead is not used in LR(0) calculations
(defsubst synfl-item-lookahead (item) (aref item 2))
(defsubst synfl-item-next (item)
  "Return the first element of the continuation of ITEM."
  (nth (synfl-item-position item) (synfl-production-rhs
				   (synfl-item-production item))))
(defsubst synfl-item-nonterminal (item)
  "Return the first element of the continuation of ITEM if nonterminal."
  (let ((next (synfl-item-next item)))
    (when (synfl-nonterminal-p next) next)))

;; item mutators

(defsubst synfl-item-set-production (item value) (aset item 0 value))
(defsubst synfl-item-set-position (item value) (aset item 1 value))
(defsubst synfl-item-set-lookahead (item value) (aset item 2 value))

;; first and follow functions

(defsubst synfl-first-map (grammar)
  (get (synfl-find-grammar grammar) :first-alist))
(defsubst synfl-initialize-first-map (grammar)
  (setq grammar (synfl-find-grammar grammar))
  (put grammar :first-alist (mapcar (lambda (x) (list x x))
				    (synfl-grammar-terminals grammar))))
(defsubst synfl-add-to-first-map (grammar symbol terminal)
  "Add to GRAMMAR's first map an association of SYMBOL to TERMINAL."
  (setq grammar (synfl-find-grammar grammar))
  (let ((map (assq symbol (synfl-first-map grammar))))
    (if map
	(setcdr map (cons terminal (cdr map)))
      (put grammar :first-alist (cons (list symbol terminal)
				      (synfl-first-map grammar))))))

(defsubst synfl-follow-map (grammar)
  (get (synfl-find-grammar grammar) :follow-alist))
(defsubst synfl-initialize-follow-map (grammar map)
  (put (synfl-find-grammar grammar) :follow-alist map))
(defsubst synfl-add-to-follow-map (grammar pair)
  "Add to GRAMMAR's follow map an association for PAIR (a cons)."
  (setq grammar (synfl-find-grammar grammar))
  (put grammar :follow-alist (cons pair (synfl-follow-map grammar))))


;; basic set of items operations

;; Dragon Book, p. 222
(defun synfl-compute-closure (set)
  "Return the closure of set of items SET."
  (if (null set)
      nil
    (let ((productions (synfl-grammar-productions
			(synfl-grammar (synfl-production-lhs
					(synfl-item-production (car set))))))
	  (added (make-hash-table :test #'equal))
	  (notdone t))
      ;; mark everything in set as added
      (mapc (lambda (x) (puthash x t added)) set)
      (while notdone
	(setq notdone nil)
	;; uncomment this to follow the argument on pp. 222-3 of ASU
	;; #### currently this is ugly to do with `synfl-trace'
	;; maybe change all the synfl-*-print functions to return strings?
	;;(synfl-trace '(debug-closure) "tracing closure")
	;;(let ((c (reverse set)))
	;;  (princ "[")
	;;  (synfl-item-print (car c))
	;;  (setq c (cdr c))
	;;  (while c
	;;    (princ "\n ")
	;;    (synfl-item-print (car c))
	;;    (setq c (cdr c)))
	;;  (princ "]\n"))
	(mapc (lambda (item)
		(let ((nonterminal (synfl-item-nonterminal item)))
		  ;; optimization
		  (when nonterminal
		    (mapc (lambda (production)
			    ;; the body won't be executed if (null nonterminal)
			    (when (eq nonterminal
				      (synfl-production-lhs production))
			      (let ((newitem (synfl-make-item production 0)))
				(when (not (gethash newitem added))
				  (push newitem set)
				  (puthash newitem t added)
				  (setq notdone t)))))
			  productions))))
	      set))
      set)))


;; Dragon Book, p. 224
(defun synfl-compute-goto (set symbol)
  "Return the goto set for SET and SYMBOL."
  (flet ((increment-position (item)
	   (synfl-item-set-position item (1+ (synfl-item-position item)))
	   item))
    (synfl-compute-closure
     (delq nil (mapcar (lambda (item)
			 (if (eq (synfl-item-next item) symbol)
			     (increment-position (copy-sequence item))
			   nil))
		       set)))))


;; Dragon Book, p. 224
(defun synfl-compute-sets-of-slr-items (grammar)
  "Compute the collection of sets of LR(0) items for GRAMMAR."
  (let* ((grammar (synfl-copy-grammar grammar))
	 (start (synfl-make-nonterminal :start grammar))
	 (oldstart (synfl-grammar-start-symbol grammar))
	 (newproduction (list start (list oldstart) nil))
	 (collection (list (synfl-compute-closure
			    (list (synfl-make-item newproduction 0)))))
	 (added (make-hash-table :test #'equal))
	 (i -1)
	 (notdone t))
    (put grammar :start-symbol start)
    (push newproduction (get grammar :productions))
    ;; collection is initialized to a singleton
    ;; it is guaranteed that the initial state is 0
    (puthash (car collection) (setq i (1+ i)) added)
    (while notdone
      (setq notdone nil)
      (mapc (lambda (set)
	      (mapc (lambda (symbol)
		      (let ((goto (synfl-compute-goto set symbol)))
			(when (and goto (not (gethash goto added)))
			  (puthash goto (setq i (1+ i)) added)
			  (push goto collection)
			  (setq notdone t))))
		    (synfl-grammar-symbols grammar)))
	    collection))
    added))
    

;; this should actually be folded into the sets of items construction
;; above?
;; Algorithm 4.8, Dragon Book, p. 227
(defun synfl-generate-slr-tables (grammar)
  "Generate the simple LR parsing tables for GRAMMAR."

  (setq grammar (synfl-find-grammar grammar))

  (let ((terminals (synfl-grammar-terminals grammar))
	(nonterminals (synfl-grammar-nonterminals grammar))
	(start-symbol (synfl-grammar-start-symbol grammar))
	(collection (synfl-compute-sets-of-slr-items grammar))
	(atable (list :action-table))
	(gtable (list :goto-table)))
    (synfl-trace '(debug-slr)
      "Number of states is %d\n" (hash-table-count collection))
    (maphash
     (lambda (set state)
       ;; generate the action table for the STATE corresponding to SET
       (mapcar
	(lambda (item)
	  (synfl-item-print item)
	  (terpri)
	  (let* ((production (synfl-item-production item))
		 (lhs (synfl-production-lhs production))
		 (rhs (synfl-production-rhs production)))
	    (cond
	     ((= (synfl-item-position item) (length rhs))
	      ;; Rule 2c
	      (when (and (eq :start lhs) (eq start-symbol (car rhs)))
		(synfl-trace '(debug-slr)
		  "-> setting accept action on start production\n")
		(synfl-set-action atable :eof state :accept nil))
	      ;; Rule 2b
	      (mapc (lambda (symbol)
		      (synfl-set-action atable symbol state
					:reduce production)
		      (synfl-trace '(debug-slr)
			"-> setting reduction to %S on %S\n" lhs symbol))
		    (synfl-follow lhs)))
	     (t
	      (let ((symbol (synfl-item-next item)))
		;; Rule 2a
		(when (synfl-terminal-p symbol)
		  (synfl-trace '(debug-slr)
		    "-> shifting %S\n" (synfl-name symbol))
		  (synfl-set-action atable symbol state :shift
				    (gethash (synfl-compute-goto set symbol)
					     collection)))
		;; Rule 5
		(when (and (eq :start lhs)
			   (eq 0 (synfl-item-position item))
			   (not (= 0 state)))
		  (error 'invalid-state
			 "initial state should be 0" state)))))))
	set)
       (mapc (lambda (terminal)
	       (when (not (synfl-action-verb atable terminal state))
		 ;; Rule 4
		 (synfl-set-action atable terminal state :error nil)))
	     (cons :eof terminals))
       ;; generate the goto table for the STATE	corresponding to SET
       (mapc (lambda (nonterminal)
	       (let ((goto (synfl-compute-goto set nonterminal)))
		 (if goto
		     ;; Rule 3
		     (progn
		       (synfl-trace '(debug-slr)
			 "-> setting goto for %S\n" nonterminal)
		       (synfl-set-goto gtable nonterminal state
				       (gethash goto collection)))
		   ;; Rule 4
		   (synfl-set-goto gtable nonterminal set :error))))
	     nonterminals))
     collection)
    ;; #### invent an API
    (put grammar :action-table atable)
    (put grammar :goto-table gtable)))


(defun synfl-generate-parser (grammar)
  "Generate action and goto tables for GRAMMAR.

Put them in the :action-table and :goto-table properties of GRAMMAR."
  (synfl-generate-slr-tables grammar))


;; FIRST is defined in the Dragon Book, sec. 4.4, p. 188.
;; It is a map from a grammar symbol to the set of terminals that
;; can come first in a string derived from that symbol.
;; The map is represented as a list of (symbol . FIRST(symbol)).
(defun synfl-first (thing)
  "Get the FIRST set for THING."
  (cond ((symbolp thing)
	 (when thing
	   (cdr (assq thing (synfl-first-map (synfl-grammar thing))))))
	((listp thing)
	 (let ((first nil))
	   (while thing
	     (setq first (append (delq nil (synfl-first (car thing)))
				 first))
	     (if (not (memq nil (synfl-first (car thing))))
		 (setq thing nil)
	       (setq thing (cdr thing))
	       (unless thing (setq first (cons nil first)))))
	   first))
	(t (error 'wrong-type-argument "first not defined for" thing))))


(defun synfl-generate-first (grammar)
  "Generate the FIRST function for the symbols of GRAMMAR."
  (setq grammar (synfl-find-grammar grammar))
  (synfl-check-grammar grammar)
  (let ((productions (synfl-grammar-productions grammar))
	(in-play t))
    ;; initialize first from Rule 1
    (synfl-initialize-first-map grammar)
    ;; handle rule 2
    (mapcar (lambda (p)
	      (when (null (synfl-production-rhs p))
		(synfl-add-to-first-map grammar (synfl-production-lhs p) nil)
		(setq productions (delete p productions))))
	    productions)
    ;; handle rule 3
    (while in-play
      (setq in-play nil)
      (mapcar (lambda (p)
		(let* ((rhs (synfl-production-rhs p))
		       (lhs (synfl-production-lhs p))
		       (first (synfl-first lhs)))
		  (while rhs
		    (let ((more (synfl-first (car rhs))))
		      (mapcar (lambda (x)
				(unless (or (null x) (memq x first))
				  (synfl-add-to-first-map grammar lhs x)
				  (setq in-play t)))
			      more)
		      (if (not (memq nil more))
			  (setq rhs nil)
			(setq rhs (cdr rhs))
			(unless (or rhs (memq nil first))
			  (setq in-play t)
			  (synfl-add-to-first-map grammar lhs nil)))))))
	      productions))))


;; FOLLOW is defined in the Dragon Book, sec. 4.4, p. 188.
(defun synfl-follow (nonterminal)
  "Get the FOLLOW set for NONTERMINAL."
  (cdr (assq nonterminal (synfl-follow-map (synfl-grammar nonterminal)))))


(defun synfl-generate-follow (grammar)
  "Generate the FOLLOW function for the nonterminal symbols of GRAMMAR."
  (setq grammar (synfl-find-grammar grammar))
  (synfl-check-grammar grammar)
  (let* ((productions (synfl-grammar-productions grammar)))
    (flet ((put-follow (symbol set)
	     (let* ((follow (synfl-follow symbol))
		    (pair (cons symbol follow)))
	       (synfl-initialize-follow-map
		grammar
		(cons (cons symbol (union set follow))
		      (delete pair (synfl-follow-map grammar)))))))
      ;; initialize follow from Rule 1
      (synfl-initialize-follow-map grammar
				   ;; #### move this into
				   ;; synfl-initialize-follow-map
				   (list
				    (list
				     (synfl-grammar-start-symbol grammar)
				     :eof)))
      ;; handle Rule 2
      (mapc (lambda (p)
	      (let* ((rhs (synfl-production-rhs p))
		     sym)
		(while rhs
		  (setq sym (car rhs))
		  (setq rhs (cdr rhs))
		  (when rhs
		    (put-follow sym (delq nil (synfl-first rhs)))))))
	    productions)
      ;; handle Rule 3
      (let ((in-play t)
	    old-follow)
	(while in-play
	  (setq in-play nil)
	  (mapc (lambda (p)
		  (let* ((rhs (synfl-production-rhs p))
			 (lhs (synfl-production-lhs p))
			 sym)
		    (while rhs
		      (setq sym (car rhs))
		      (setq old-follow (synfl-follow sym))
		      (setq rhs (cdr rhs))
		      (when (or (not rhs) (memq nil (synfl-first rhs)))
			(put-follow sym (synfl-follow lhs))
			(when (or (set-difference old-follow
						  (synfl-follow sym))
				  (set-difference (synfl-follow sym)
						  old-follow))
			  (setq in-play t))))))
		productions))))))



;; debugging and inspection functions
;; See also synfl-trace.el (maybe these should move there?)
;; #### these should probably all return strings rather than do a `princ'
;; for better interface with `synfl-trace'

(defun synfl-grammar-print (name)
  "Pretty-print the contents of the grammar with name NAME using `princ'."

  (let* ((grammar (synfl-find-grammar name))
	 (terminals (synfl-grammar-terminals grammar))
	 (nonterminals (synfl-grammar-nonterminals grammar))
	 (start (synfl-grammar-start-symbol grammar))
	 (productions (synfl-grammar-productions grammar)))
    (princ (format "Grammar object:\t\t%S\n\n" grammar))
    (princ (format "Terminal symbols:\t%S" (synfl-name (car terminals))))
    (mapc (lambda (x) (princ (format " %S" (synfl-name x)))) (cdr terminals))
    (princ "\n\n")
    (princ (format "Nonterminal symbols:\t%S" (synfl-name start)))
    (mapc (lambda (x) (princ (format " %S" (synfl-name x))))
	  (delq start nonterminals))
    (princ "\n\n")
    (princ "Productions:\n")
    (let ((i 0))
      (mapc (lambda (x)
	      (setq i (1+ i))
	      (princ (format "%-3d " i))
	      (synfl-production-print x t)
	      (terpri))
	    productions))))


(defun synfl-production-print (production &optional annotate)
  "Pretty-print a synfl grammar production using `princ'.

If optional argument ANNOTATE is non-`nil', describe side effects."

  (let* ((side-effect (synfl-production-side-effect production))
	 (p (format "%S ->" (synfl-production-lhs production)))
	 (se (when side-effect (format "[%S]" side-effect))))
    (mapc (lambda (y) (setq p (concat p (format " %S" y))))
	  (synfl-production-rhs production))
    (princ p)
    (when (and annotate se)
      (princ (make-string (- 72 (length p) (length se)) ?\ ))
      (princ se))))
    

(defun synfl-tables-print (grammar)
  "Pretty-print the compiled tables in the Dragon Book style using `princ'."

  (let* ((object (synfl-find-grammar grammar))
	 (terminals (synfl-grammar-terminals object))
	 (nonterminals (synfl-grammar-nonterminals object))
	 (productions (synfl-grammar-productions object))
	 (atable (synfl-grammar-action-table object))
	 (gtable (synfl-grammar-goto-table object))
	 (m0 (1+ (length terminals)))	; includes EOF marker
	 ;;(m1 (length nonterminals))
	 (n (synfl-grammar-state-count object))
	 (i 0)
	 (ht (make-hash-table :test #'equal :size (length productions))))
    ;; this might be a useful thing to have in general
    (mapc (lambda (x)
	    (setq i (1+ i))
	    (puthash x i ht))
	  productions)
    (princ "Parser tables:\n\tACTION")
    (princ (make-string m0 ?\t))
    (princ "GOTO\nSTATE")
    (mapc (lambda (x) (princ "\t") (princ x)) terminals)
    (princ "\t:eof")
    (mapc (lambda (x) (princ "\t") (princ x)) nonterminals)
    (terpri)
    (setq i 0)
    (while (< i n)
      (princ i)
      (mapc (lambda (x)
	      (let ((a (synfl-action atable x i)))
		(princ "\t")
		(when a
		  (cond ((eq (car a) :shift) (princ (format "s%S" (cdr a))))
			((eq (car a) :reduce)
			 (princ (format "r%S" (gethash (cdr a) ht))))
			((eq (car a) :accept) (princ "acc"))
			((eq (car a) :error)
			 ;; for Dragon Book style this clutters things up
			 ;;(princ (format "e%S" (cdr a))))
			 )
			(t (princ (format "Oops! %S" a)))))))
	    (append terminals (list :eof)))
      (mapc (lambda (x)
	      (let ((a (synfl-goto gtable x i)))
		(princ "\t")
		(when a (princ a))))
	    nonterminals)
      (terpri)
      (setq i (1+ i)))))

(defun synfl-item-print (item)
  (let* ((production (synfl-item-production item))
	 (lhs (synfl-production-lhs production))
	 (rhs (synfl-production-rhs production))
	 (i (synfl-item-position item)))
    (princ (format "%S ->" lhs))
    (while (> i 0)
      (princ (format " %S" (car rhs)))
      (setq rhs (cdr rhs)
	    i (1- i)))
    (princ " :dot")
    (while rhs
      (princ (format " %S" (car rhs)))
      (setq rhs (cdr rhs)))))


;; #### NO-REWIND is not currently useful since there is no seek operation.
;; Get rid of it?
(defun synfl-stream-print (stream &optional no-rewind)
  "Print description of each token in STREAM using `princ'.

Starts from current position.  Rewinds STREAM when done.
Optional argument NO-REWIND suppresses rewinding."
  (princ "Stream details:\n")
  (let ((token nil))
    (while (not (eq token :eof))
      (setq token (synfl-stream-next stream))
      (princ (if (eq token :eof)
		 :eof
	       (list token
		     :type
		     (synfl-token-type token stream)
		     :value
		     (synfl-token-value token stream)
		     :text
		     (synfl-token-text token stream))))
      (terpri)))
  (unless no-rewind (synfl-stream-rewind stream)))


(defun synfl-collection-print (collection)
  "Print COLLECTION using `princ'.

COLLECTION is a collection of sets of LR items, typically generated with
a function like `synfl-compute-sets-of-slr-items'."
  (princ "LR collection of sets of items:\n")
  (maphash (lambda (set state)
	     (princ (format "State %d:\n" state))
	     (mapcar (lambda (item)
		       (princ " ")
		       (synfl-item-print item)
		       (terpri))
		     (reverse set)))
	   collection))

(provide 'synfl)

;;; synfl.el ends here
