Initial revision

author: Vern Paxson <vern@ee.lbl.gov> 1987-11-08 22:24:44 +0000
committer: Vern Paxson <vern@ee.lbl.gov> 1987-11-08 22:24:44 +0000
commit: 2cc578462372baa1b85936749946608d7f36415f (patch)
tree: b103972512328042bbc4e7541616e88369619b2c
12 files changed, 5540 insertions, 0 deletions
diff --git a/ccl.c b/ccl.c
new file mode 100644
index 0000000..fa15c02
--- /dev/null
+++ b/ccl.c
@@ -0,0 +1,98 @@
+/* lexccl - routines for character classes */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include "flexdef.h"
+
+/* ccladd - add a single character to a ccl
+ *
+ * synopsis
+ *    int cclp;
+ *    char ch;
+ *    ccladd( cclp, ch );
+ */
+ccladd( cclp, ch )
+int cclp;
+char ch;
+
+    {
+    int ind, len, newpos, i;
+
+    len = ccllen[cclp];
+    ind = cclmap[cclp];
+
+    /* check to see if the character is already in the ccl */
+
+    for ( i = 0; i < len; ++i )
+	if ( ccltbl[ind + i] == ch )
+	    return;
+
+    newpos = ind + len;
+
+    if ( newpos >= current_max_ccl_tbl_size )
+	{
+	current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
+
+	++num_reallocs;
+
+	ccltbl = reallocate_character_array( ccltbl, current_max_ccl_tbl_size );
+	}
+
+    ccllen[cclp] = len + 1;
+    ccltbl[newpos] = ch;
+    }
+
+
+/* cclinit - make an empty ccl
+ *
+ * synopsis
+ *    int cclinit();
+ *    new_ccl = cclinit();
+ */
+int cclinit()
+
+    {
+    if ( ++lastccl >= current_maxccls )
+	{
+	current_maxccls += MAXCCLS_INCREMENT;
+
+	++num_reallocs;
+
+	cclmap = reallocate_integer_array( cclmap, current_maxccls );
+	ccllen = reallocate_integer_array( ccllen, current_maxccls );
+	cclng = reallocate_integer_array( cclng, current_maxccls );
+	}
+
+    if ( lastccl == 1 )
+	/* we're making the first ccl */
+	cclmap[lastccl] = 0;
+
+    else
+	/* the new pointer is just past the end of the last ccl.  Since
+	 * the cclmap points to the \first/ character of a ccl, adding the
+	 * length of the ccl to the cclmap pointer will produce a cursor
+	 * to the first free space
+	 */
+	cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];
+
+    ccllen[lastccl] = 0;
+    cclng[lastccl] = 0;	/* ccl's start out life un-negated */
+
+    return ( lastccl );
+    }
+
+
+/* cclnegate - negate a ccl
+ *
+ * synopsis
+ *    int cclp;
+ *    cclnegate( ccl );
+ */
+cclnegate( cclp )
+int cclp;
+
+    {
+    cclng[cclp] = 1;
+    }
diff --git a/dfa.c b/dfa.c
new file mode 100644
index 0000000..d709df8
--- /dev/null
+++ b/dfa.c
@@ -0,0 +1,460 @@
+/* lexdfa - DFA construction routines */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include "flexdef.h"
+
+/* epsclosure - construct the epsilon closure of a set of ndfa states
+ *
+ * synopsis
+ *    int t[current_max_dfa_size], numstates, accset[accnum + 1], nacc;
+ *    int hashval;
+ *    int *epsclosure();
+ *    t = epsclosure( t, &numstates, accset, &nacc, &hashval );
+ *
+ * NOTES
+ *    the epsilon closure is the set of all states reachable by an arbitrary
+ *  number of epsilon transitions which themselves do not have epsilon
+ *  transitions going out, unioned with the set of states which have non-null
+ *  accepting numbers.  t is an array of size numstates of nfa state numbers.
+ *  Upon return, t holds the epsilon closure and numstates is updated.  accset
+ *  holds a list of the accepting numbers, and the size of accset is given
+ *  by nacc.  t may be subjected to reallocation if it is not large enough
+ *  to hold the epsilon closure.
+ *
+ *    hashval is the hash value for the dfa corresponding to the state set
+ */
+int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
+int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
+
+    {
+    register int stkpos, ns, tsp;
+    int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
+    int stkend, nstate;
+    static int did_stk_init = false, *stk; 
+
+#define MARK_STATE(state) \
+	trans1[state] = trans1[state] - MARKER_DIFFERENCE;
+
+#define IS_MARKED(state) (trans1[state] < 0)
+
+#define UNMARK_STATE(state) \
+	trans1[state] = trans1[state] + MARKER_DIFFERENCE;
+
+#define CHECK_ACCEPT(state) \
+	{ \
+	nfaccnum = accptnum[state]; \
+	if ( nfaccnum != NIL ) \
+	    accset[++nacc] = nfaccnum; \
+	}
+
+#define DO_REALLOCATION \
+	{ \
+	current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
+	++num_reallocs; \
+	t = reallocate_integer_array( t, current_max_dfa_size ); \
+	stk = reallocate_integer_array( stk, current_max_dfa_size ); \
+	} \
+
+#define PUT_ON_STACK(state) \
+	{ \
+	if ( ++stkend >= current_max_dfa_size ) \
+	    DO_REALLOCATION \
+	stk[stkend] = state; \
+	MARK_STATE(state) \
+	}
+
+#define ADD_STATE(state) \
+	{ \
+	if ( ++numstates >= current_max_dfa_size ) \
+	    DO_REALLOCATION \
+	t[numstates] = state; \
+	hashval = hashval + state; \
+	}
+
+#define STACK_STATE(state) \
+	{ \
+	PUT_ON_STACK(state) \
+	CHECK_ACCEPT(state) \
+	if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
+	    ADD_STATE(state) \
+	}
+
+    if ( ! did_stk_init )
+	{
+	stk = allocate_integer_array( current_max_dfa_size );
+	did_stk_init = true;
+	}
+
+    nacc = stkend = hashval = 0;
+
+    for ( nstate = 1; nstate <= numstates; ++nstate )
+	{
+	ns = t[nstate];
+
+	/* the state could be marked if we've already pushed it onto
+	 * the stack
+	 */
+	if ( ! IS_MARKED(ns) )
+	    PUT_ON_STACK(ns)
+
+	CHECK_ACCEPT(ns)
+	hashval = hashval + ns;
+	}
+
+    for ( stkpos = 1; stkpos <= stkend; ++stkpos )
+	{
+	ns = stk[stkpos];
+	transsym = transchar[ns];
+
+	if ( transsym == SYM_EPSILON )
+	    {
+	    tsp = trans1[ns] + MARKER_DIFFERENCE;
+
+	    if ( tsp != NO_TRANSITION )
+		{
+		if ( ! IS_MARKED(tsp) )
+		    STACK_STATE(tsp)
+
+		tsp = trans2[ns];
+
+		if ( tsp != NO_TRANSITION )
+		    if ( ! IS_MARKED(tsp) )
+			STACK_STATE(tsp)
+		}
+	    }
+	}
+
+    /* clear out "visit" markers */
+
+    for ( stkpos = 1; stkpos <= stkend; ++stkpos )
+	{
+	if ( IS_MARKED(stk[stkpos]) )
+	    {
+	    UNMARK_STATE(stk[stkpos])
+	    }
+	else
+	    lexfatal( "consistency check failed in epsclosure()" );
+	}
+
+    *ns_addr = numstates;
+    *hv_addr = hashval;
+    *nacc_addr = nacc;
+
+    return ( t );
+    }
+
+
+
+/* increase_max_dfas - increase the maximum number of DFAs */
+
+increase_max_dfas()
+
+    {
+    int old_max = current_max_dfas;
+
+    current_max_dfas += MAX_DFAS_INCREMENT;
+
+    ++num_reallocs;
+
+    base = reallocate_integer_array( base, current_max_dfas );
+    def = reallocate_integer_array( def, current_max_dfas );
+    dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
+    accsiz = reallocate_integer_array( accsiz, current_max_dfas );
+    dhash = reallocate_integer_array( dhash, current_max_dfas );
+    todo = reallocate_integer_array( todo, current_max_dfas );
+    dss = reallocate_integer_pointer_array( dss, current_max_dfas );
+    dfaacc = reallocate_integer_pointer_array( dfaacc, current_max_dfas );
+
+    /* fix up todo queue */
+    if ( todo_next < todo_head )
+	{ /* queue was wrapped around the end */
+	register int i;
+
+	for ( i = 0; i < todo_next; ++i )
+	    todo[old_max + i] = todo[i];
+	
+	todo_next += old_max;
+	}
+    }
+
+
+/* snstods - converts a set of ndfa states into a dfa state
+ *
+ * synopsis
+ *    int sns[numstates], numstates, newds, accset[accnum + 1], nacc, hashval;
+ *    int snstods();
+ *    is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds );
+ *
+ * on return, the dfa state number is in newds.
+ */
+int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
+int sns[], numstates, accset[], nacc, hashval, *newds_addr;
+
+    {
+    int didsort = 0;
+    register int i, j;
+    int newds, *oldsns;
+    char *malloc();
+
+    for ( i = 1; i <= lastdfa; ++i )
+	if ( hashval == dhash[i] )
+	    {
+	    if ( numstates == dfasiz[i] )
+		{
+		oldsns = dss[i];
+
+		if ( ! didsort )
+		    {
+		    /* we sort the states in sns so we can compare it to
+		     * oldsns quickly.  we use bubble because there probably
+		     * aren't very many states
+		     */
+		    bubble( sns, numstates );
+		    didsort = 1;
+		    }
+
+		for ( j = 1; j <= numstates; ++j )
+		    if ( sns[j] != oldsns[j] )
+			break;
+
+		if ( j > numstates )
+		    {
+		    ++dfaeql;
+		    *newds_addr = i;
+		    return ( 0 );
+		    }
+
+		++hshcol;
+		}
+
+	    else
+		++hshsave;
+	    }
+
+    /* make a new dfa */
+
+    if ( ++lastdfa >= current_max_dfas )
+	increase_max_dfas();
+
+    newds = lastdfa;
+
+    if ( ! (dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) )) )
+	lexfatal( "dynamic memory failure in snstods()" );
+
+    /* if we haven't already sorted the states in sns, we do so now, so that
+     * future comparisons with it can be made quickly
+     */
+
+    if ( ! didsort )
+	bubble( sns, numstates );
+
+    for ( i = 1; i <= numstates; ++i )
+	dss[newds][i] = sns[i];
+
+    dfasiz[newds] = numstates;
+    dhash[newds] = hashval;
+
+    if ( nacc == 0 )
+	{
+	dfaacc[newds] = 0;
+	accsiz[newds] = 0;
+	}
+
+    else if ( reject )
+	{
+	/* we sort the accepting set in increasing order so the disambiguating
+	 * rule that the first rule listed is considered match in the event of
+	 * ties will work.  We use a bubble sort since the list is probably
+	 * quite small.
+	 */
+
+	bubble( accset, nacc );
+
+	if ( ! (dfaacc[newds] =
+		(int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) )) )
+	    lexfatal( "dynamic memory failure in snstods()" );
+
+	/* save the accepting set for later */
+	for ( i = 1; i <= nacc; ++i )
+	    dfaacc[newds][i] = accset[i];
+
+	accsiz[newds] = nacc;
+	}
+
+    else
+	{ /* find lowest numbered rule so the disambiguating rule will work */
+	j = accnum + 1;
+
+	for ( i = 1; i <= nacc; ++i )
+	    if ( accset[i] < j )
+		j = accset[i];
+
+	dfaacc[newds] = (int *) j;
+	}
+
+    *newds_addr = newds;
+
+    return ( 1 );
+    }
+
+
+/* symfollowset - follow the symbol transitions one step
+ *
+ * synopsis
+ *    int ds[current_max_dfa_size], dsize, transsym;
+ *    int nset[current_max_dfa_size], numstates;
+ *    numstates = symfollowset( ds, dsize, transsym, nset );
+ */
+int symfollowset( ds, dsize, transsym, nset )
+int ds[], dsize, transsym, nset[];
+
+    {
+    int ns, tsp, sym, i, j, lenccl, ch, numstates;
+    int ccllist;
+
+    numstates = 0;
+
+    for ( i = 1; i <= dsize; ++i )
+	{ /* for each nfa state ns in the state set of ds */
+	ns = ds[i];
+	sym = transchar[ns];
+	tsp = trans1[ns];
+
+	if ( sym < 0 )
+	    { /* it's a character class */
+	    sym = -sym;
+	    ccllist = cclmap[sym];
+	    lenccl = ccllen[sym];
+
+	    if ( cclng[sym] )
+		{
+		for ( j = 0; j < lenccl; ++j )
+		    { /* loop through negated character class */
+		    ch = ccltbl[ccllist + j];
+
+		    if ( ch > transsym )
+			break;	/* transsym isn't in negated ccl */
+
+		    else if ( ch == transsym )
+			/* next 2 */ goto bottom;
+		    }
+
+		/* didn't find transsym in ccl */
+		nset[++numstates] = tsp;
+		}
+
+	    else
+		for ( j = 0; j < lenccl; ++j )
+		    {
+		    ch = ccltbl[ccllist + j];
+
+		    if ( ch > transsym )
+			break;
+
+		    else if ( ch == transsym )
+			{
+			nset[++numstates] = tsp;
+			break;
+			}
+		    }
+	    }
+
+	else if ( sym >= 'A' && sym <= 'Z' && caseins )
+	    lexfatal( "consistency check failed in symfollowset" );
+
+	else if ( sym == SYM_EPSILON )
+	    { /* do nothing */
+	    }
+
+	else if ( ecgroup[sym] == transsym )
+	    nset[++numstates] = tsp;
+
+bottom:
+	;
+	}
+
+    return ( numstates );
+    }
+
+
+/* sympartition - partition characters with same out-transitions
+ *
+ * synopsis
+ *    integer ds[current_max_dfa_size], numstates, duplist[numecs];
+ *    symlist[numecs];
+ *    sympartition( ds, numstates, symlist, duplist );
+ */
+sympartition( ds, numstates, symlist, duplist )
+int ds[], numstates, duplist[];
+int symlist[];
+
+    {
+    int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
+
+    /* partitioning is done by creating equivalence classes for those
+     * characters which have out-transitions from the given state.  Thus
+     * we are really creating equivalence classes of equivalence classes.
+     */
+
+    for ( i = 1; i <= numecs; ++i )
+	{ /* initialize equivalence class list */
+	duplist[i] = i - 1;
+	dupfwd[i] = i + 1;
+	}
+
+    duplist[1] = NIL;
+    dupfwd[numecs] = NIL;
+
+    for ( i = 1; i <= numstates; ++i )
+	{
+	ns = ds[i];
+	tch = transchar[ns];
+
+	if ( tch != SYM_EPSILON )
+	    {
+	    if ( tch < -lastccl || tch > CSIZE )
+		lexfatal( "bad transition character detected in sympartition()" );
+
+	    if ( tch > 0 )
+		{ /* character transition */
+		mkechar( ecgroup[tch], dupfwd, duplist );
+		symlist[ecgroup[tch]] = 1;
+		}
+
+	    else
+		{ /* character class */
+		tch = -tch;
+
+		lenccl = ccllen[tch];
+		cclp = cclmap[tch];
+		mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs );
+
+		if ( cclng[tch] )
+		    {
+		    j = 0;
+
+		    for ( k = 0; k < lenccl; ++k )
+			{
+			ich = ccltbl[cclp + k];
+
+			for ( ++j; j < ich; ++j )
+			    symlist[j] = 1;
+			}
+
+		    for ( ++j; j <= numecs; ++j )
+			symlist[j] = 1;
+		    }
+
+		else
+		    for ( k = 0; k < lenccl; ++k )
+			{
+			ich = ccltbl[cclp + k];
+			symlist[ich] = 1;
+			}
+		}
+	    }
+	}
+    }
diff --git a/ecs.c b/ecs.c
new file mode 100644
index 0000000..2a60c9b
--- /dev/null
+++ b/ecs.c
@@ -0,0 +1,190 @@
+/* lexecs - equivalence class routines */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include "flexdef.h"
+
+/* ccl2ecl - convert character classes to set of equivalence classes
+ *
+ * synopsis
+ *    ccl2ecl();
+ */
+ccl2ecl()
+
+    {
+    int i, ich, newlen, cclp, ccls, cclmec;
+
+    for ( i = 1; i <= lastccl; ++i )
+	{
+	/* we loop through each character class, and for each character
+	 * in the class, add the character's equivalence class to the
+	 * new "character" class we are creating.  Thus when we are all
+	 * done, character classes will really consist of collections
+	 * of equivalence classes
+	 */
+
+	newlen = 0;
+	cclp = cclmap[i];
+
+	for ( ccls = 0; ccls < ccllen[i]; ++ccls )
+	    {
+	    ich = ccltbl[cclp + ccls];
+	    cclmec = ecgroup[ich];
+	    if ( cclmec > 0 )
+		{
+		ccltbl[cclp + newlen] = cclmec;
+		++newlen;
+		}
+	    }
+
+	ccllen[i] = newlen;
+	}
+    }
+
+
+/* cre8ecs - associate equivalence class numbers with class members
+ *
+ * synopsis
+ *    int cre8ecs();
+ *    number of classes = cre8ecs( fwd, bck, num );
+ *
+ *  fwd is the forward linked-list of equivalence class members.  bck
+ *  is the backward linked-list, and num is the number of class members.
+ *  Returned is the number of classes.
+ */
+int cre8ecs( fwd, bck, num )
+int fwd[], bck[], num;
+
+    {
+    int i, j, numcl;
+
+    numcl = 0;
+
+    /* create equivalence class numbers.  From now on, abs( bck(x) )
+     * is the equivalence class number for object x.  If bck(x)
+     * is positive, then x is the representative of its equivalence
+     * class.
+     */
+
+    for ( i = 1; i <= num; ++i )
+	if ( bck[i] == NIL )
+	    {
+	    bck[i] = ++numcl;
+	    for ( j = fwd[i]; j != NIL; j = fwd[j] )
+		bck[j] = -numcl;
+	    }
+
+    return ( numcl );
+    }
+
+
+/* mkeccl - update equivalence classes based on character class xtions
+ *
+ * synopsis
+ *    char ccls[];
+ *    int lenccl, fwd[llsiz], bck[llsiz], llsiz;
+ *    mkeccl( ccls, lenccl, fwd, bck, llsiz );
+ *
+ * where ccls contains the elements of the character class, lenccl is the
+ * number of elements in the ccl, fwd is the forward link-list of equivalent
+ * characters, bck is the backward link-list, and llsiz size of the link-list
+ */
+mkeccl( ccls, lenccl, fwd, bck, llsiz )
+char ccls[];
+int lenccl, fwd[], bck[], llsiz;
+
+    {
+    int cclp, oldec, newec;
+    int cclm, i, j;
+
+    /* note that it doesn't matter whether or not the character class is
+     * negated.  The same results will be obtained in either case.
+     */
+
+    cclp = 0;
+
+    while ( cclp < lenccl )
+	{
+	cclm = ccls[cclp];
+	oldec = bck[cclm];
+	newec = cclm;
+
+	j = cclp + 1;
+
+	for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
+	    { /* look for the symbol in the character class */
+	    for ( ; j < lenccl && ccls[j] <= i; ++j )
+		if ( ccls[j] == i )
+		    {
+		    /* we found an old companion of cclm in the ccl.
+		     * link it into the new equivalence class and flag it as
+		     * having been processed
+		     */
+
+		    bck[i] = newec;
+		    fwd[newec] = i;
+		    newec = i;
+		    ccls[j] = -i;	/* set flag so we don't reprocess */
+
+		    /* get next equivalence class member */
+		    /* next 2 */ goto next_pt;
+		    }
+
+	    /* symbol isn't in character class.  Put it in the old equivalence
+	     * class
+	     */
+
+	    bck[i] = oldec;
+
+	    if ( oldec != NIL )
+		fwd[oldec] = i;
+
+	    oldec = i;
+next_pt:
+	    ;
+	    }
+
+	if ( bck[cclm] != NIL || oldec != bck[cclm] )
+	    {
+	    bck[cclm] = NIL;
+	    fwd[oldec] = NIL;
+	    }
+
+	fwd[newec] = NIL;
+
+	/* find next ccl member to process */
+
+	for ( ++cclp; ccls[cclp] < 0 && cclp < lenccl; ++cclp )
+	    {
+	    /* reset "doesn't need processing" flag */
+	    ccls[cclp] = -ccls[cclp];
+	    }
+	}
+    }
+
+
+/* mkechar - create equivalence class for single character
+ *
+ * synopsis
+ *    int tch, fwd[], bck[];
+ *    mkechar( tch, fwd, bck );
+ */
+mkechar( tch, fwd, bck )
+int tch, fwd[], bck[];
+
+    {
+    /* if until now the character has been a proper subset of
+     * an equivalence class, break it away to create a new ec
+     */
+
+    if ( fwd[tch] != NIL )
+	bck[fwd[tch]] = bck[tch];
+
+    if ( bck[tch] != NIL )
+	fwd[bck[tch]] = fwd[tch];
+
+    fwd[tch] = NIL;
+    bck[tch] = NIL;
+    }
diff --git a/flexdef.h b/flexdef.h
new file mode 100644
index 0000000..b41b649
--- /dev/null
+++ b/flexdef.h
@@ -0,0 +1,429 @@
+/*
+ *  Symbol definitions for flex.
+ *
+ * modification history
+ * --------------------
+ * 02a   vp  27jun86  .translated into C/FTL
+ */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include <stdio.h>
+
+
+/* maximum line length we'll have to deal with */
+#define MAXLINE BUFSIZ
+
+/* maximum size of file name */
+#define FILENAMESIZE 1024
+
+#define min(x,y) (x < y ? x : y)
+#define max(x,y) (x > y ? x : y)
+
+#define true 1
+#define false 0
+
+
+#ifndef DEFAULT_SKELETON_FILE
+#define DEFAULT_SKELETON_FILE "flex.skel"
+#endif
+
+/* maximum number of characters per line recognized by Fortran compiler */
+#define DATALINEWIDTH 72
+
+/* string to indent Fortran data statements with */
+#define DATAINDENTSTR "      "
+/* width of dataindent string in Fortran columns */
+#define DATAINDENTWIDTH 6
+
+/* number of data items per line for -f output */
+#define NUMDATAITEMS 10
+
+/* number of lines of data in -f output before inserting a blank line for
+ * readability.
+ */
+#define NUMDATALINES 10
+
+/* returns true if an nfa state has an epsilon out-transition slot
+ * that can be used.  This definition is currently not used.
+ */
+#define FREE_EPSILON(state) \
+	(transchar[state] == SYM_EPSILON && \
+	 trans2[state] == NO_TRANSITION && \
+	 finalst[state] != state)
+
+/* returns true if an nfa state has an epsilon out-transition character
+ * and both slots are free
+ */
+#define SUPER_FREE_EPSILON(state) \
+	(transchar[state] == SYM_EPSILON && \
+	 trans1[state] == NO_TRANSITION) \
+
+/* maximum number of NFA states that can comprise a DFA state.  It's real
+ * big because if there's a lot of rules, the initial state will have a
+ * huge epsilon closure.
+ */
+#define INITIAL_MAX_DFA_SIZE 750
+#define MAX_DFA_SIZE_INCREMENT 750
+
+/* array names to be used in generated machine.  They're short because
+ * we write out one data statement (which names the array) for each element
+ * in the array.
+ */
+
+#define ALIST 'l'	/* points to list of rules accepted for a state */
+#define ACCEPT 'a'	/* list of rules accepted for a state */
+#define ECARRAY 'e'	/* maps input characters to equivalence classes */
+#define MATCHARRAY 'm'	/* maps equivalence classes to meta-equivalence classes */
+#define BASEARRAY 'b'	/* "base" array */
+#define DEFARRAY 'd'	/* "default" array */
+#define NEXTARRAY 'n'	/* "next" array */
+#define CHECKARRAY 'c'	/* "check" array */
+
+/* NIL must be 0.  If not, its special meaning when making equivalence classes
+ * (it marks the representative of a given e.c.) will be unidentifiable
+ */
+#define NIL 0
+
+#define JAM -1	/* to mark a missing DFA transition */
+#define NO_TRANSITION NIL
+#define UNIQUE -1	/* marks a symbol as an e.c. representative */
+#define INFINITY -1	/* for x{5,} constructions */
+
+/* size of input alphabet - should be size of ASCII set */
+#define CSIZE 127
+
+#define INITIAL_MAXCCLS 100	/* max number of unique character classes */
+#define MAXCCLS_INCREMENT 100
+
+/* size of table holding members of character classes */
+#define INITIAL_MAX_CCL_TBL_SIZE 500
+#define MAX_CCL_TBL_SIZE_INCREMENT 250
+
+#define INITIAL_MNS 2000	/* default maximum number of nfa states */
+#define MNS_INCREMENT 1000	/* amount to bump above by if it's not enough */
+
+#define INITIAL_MAX_DFAS 1000	/* default maximum number of dfa states */
+#define MAX_DFAS_INCREMENT 1000
+
+#define JAMSTATE -32766	/* marks a reference to the state that always jams */
+
+/* enough so that if it's subtracted from an NFA state number, the result
+ * is guarenteed to be negative
+ */
+#define MARKER_DIFFERENCE 32000
+#define MAXIMUM_MNS 31999
+
+/* maximum number of nxt/chk pairs for non-templates */
+#define INITIAL_MAX_XPAIRS 2000
+#define MAX_XPAIRS_INCREMENT 2000
+
+/* maximum number of nxt/chk pairs needed for templates */
+#define INITIAL_MAX_TEMPLATE_XPAIRS 2500
+#define MAX_TEMPLATE_XPAIRS_INCREMENT 2500
+
+#define SYM_EPSILON 0	/* to mark transitions on the symbol epsilon */
+
+#define INITIAL_MAX_SCS 40	/* maximum number of start conditions */
+#define MAX_SCS_INCREMENT 40	/* amount to bump by if it's not enough */
+
+#define ONE_STACK_SIZE 500	/* stack of states with only one out-transition */
+#define SAME_TRANS -1	/* transition is the same as "default" entry for state */
+
+/* the following percentages are used to tune table compression:
+
+ * the percentage the number of out-transitions a state must be of the
+ * number of equivalence classes in order to be considered for table
+ * compaction by using protos
+ */
+#define PROTO_SIZE_PERCENTAGE 15
+
+/* the percentage the number of homogeneous out-transitions of a state
+ * must be of the number of total out-transitions of the state in order
+ * that the state's transition table is first compared with a potential 
+ * template of the most common out-transition instead of with the first
+ * proto in the proto queue
+ */
+#define CHECK_COM_PERCENTAGE 50
+
+/* the percentage the number of differences between a state's transition
+ * table and the proto it was first compared with must be of the total
+ * number of out-transitions of the state in order to keep the first
+ * proto as a good match and not search any further
+ */
+#define FIRST_MATCH_DIFF_PERCENTAGE 10
+
+/* the percentage the number of differences between a state's transition
+ * table and the most similar proto must be of the state's total number
+ * of out-transitions to use the proto as an acceptable close match
+ */
+#define ACCEPTABLE_DIFF_PERCENTAGE 50
+
+/* the percentage the number of homogenous out-transitions of a state
+ * must be of the number of total out-transitions of the state in order
+ * to consider making a template from the state
+ */
+#define TEMPLATE_SAME_PERCENTAGE 60
+
+/* the percentage the number of differences between a state's transition
+ * table and the most similar proto must be of the state's total number
+ * of out-transitions to create a new proto from the state
+ */
+#define NEW_PROTO_DIFF_PERCENTAGE 20
+
+/* the percentage the total number of out-transitions of a state must be
+ * of the number of equivalence classes in order to consider trying to
+ * fit the transition table into "holes" inside the nxt/chk table.
+ */
+#define INTERIOR_FIT_PERCENTAGE 15
+
+/* size of region set aside to cache the complete transition table of
+ * protos on the proto queue to enable quick comparisons
+ */
+#define PROT_SAVE_SIZE 2000
+
+#define MSP 50	/* maximum number of saved protos (protos on the proto queue) */
+
+/* number that, if used to subscript an array, has a good chance of producing
+ * an error; should be small enough to fit into a short
+ */
+#define BAD_SUBSCRIPT -32767
+
+
+/* Declarations for global variables. */
+
+/* variables for symbol tables:
+ * sctbl - start-condition symbol table
+ * ndtbl - name-definition symbol table
+ * ccltab - character class text symbol table
+ */
+
+struct hash_entry
+    {
+    struct hash_entry *prev, *next;
+    char *name;
+    char *val;
+    } ;
+
+typedef struct hash_entry *hash_table[];
+
+#define NAME_TABLE_HASH_SIZE 101
+#define START_COND_HASH_SIZE 101
+#define CCL_HASH_SIZE 101
+
+extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; 
+extern struct hash_entry *sctbl[START_COND_HASH_SIZE];
+extern struct hash_entry *ccltab[CCL_HASH_SIZE];
+
+
+/* variables for flags:
+ * printstats - if true (-v), dump statistics
+ * syntaxerror - true if a syntax error has been found
+ * eofseen - true if we've seen an eof in the input file
+ * ddebug - if true (-d), make a "debug" scanner
+ * trace - if true (-T), trace processing
+ * spprdflt - if true (-s), suppress the default rule
+ * interactive - if true (-I), generate an interactive scanner
+ * caseins - if true (-i), generate a case-insensitive scanner
+ * useecs - if true (-ce flag), use equivalence classes
+ * fulltbl - if true (-cf flag), don't compress the DFA state table
+ * usemecs - if true (-cm flag), use meta-equivalence classes
+ * reject - if true (-r flag), generate tables for REJECT macro
+ */
+
+extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
+extern int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject;
+
+
+/* variables used in the flex input routines:
+ * datapos - characters on current output line
+ * dataline - number of contiguous lines of data in current data
+ *    statement.  Used to generate readable -f output
+ * skelfile - fd of the skeleton file
+ * yyin - input file
+ * infilename - name of input file
+ * linenum - current input line number
+ */
+
+extern int datapos, dataline, linenum;
+extern FILE *skelfile, *yyin;
+extern char *infilename;
+
+
+/* variables for stack of states having only one out-transition:
+ * onestate - state number
+ * onesym - transition symbol
+ * onenext - target state
+ * onedef - default base entry
+ * onesp - stack pointer
+ */
+
+extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
+extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
+
+
+/* variables for nfa machine data:
+ * current_mns - current maximum on number of NFA states
+ * accnum - number of the last accepting state
+ * firstst - physically the first state of a fragment
+ * lastst - last physical state of fragment
+ * finalst - last logical state of fragment
+ * transchar - transition character
+ * trans1 - transition state
+ * trans2 - 2nd transition state for epsilons
+ * accptnum - accepting number
+ * lastnfa - last nfa state number created
+ */
+
+extern int current_mns;
+extern int accnum, *firstst, *lastst, *finalst, *transchar;
+extern int *trans1, *trans2, *accptnum, lastnfa;
+
+
+/* variables for protos:
+ * numtemps - number of templates created
+ * numprots - number of protos created
+ * protprev - backlink to a more-recently used proto
+ * protnext - forward link to a less-recently used proto
+ * prottbl - base/def table entry for proto
+ * protcomst - common state of proto
+ * firstprot - number of the most recently used proto
+ * lastprot - number of the least recently used proto
+ * protsave contains the entire state array for protos
+ */
+
+extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
+extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
+
+
+/* variables for managing equivalence classes:
+ * numecs - number of equivalence classes
+ * nextecm - forward link of Equivalenc Class members
+ * ecgroup - class number or backward link of EC members
+ * nummecs - number of meta-equivalence classes (used to compress
+ *   templates)
+ * tecfwd - forward link of meta-equivalence classes members
+ * tecbck - backward link of MEC's
+ */
+
+extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs;
+extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1];
+
+
+/* variables for start conditions:
+ * lastsc - last start condition created
+ * current_max_scs - current limit on number of start conditions
+ * scset - set of rules active in start condition
+ * scbol - set of rules active only at the beginning of line in a s.c.
+ * scxclu - true if start condition is exclusive
+ * actvsc - stack of active start conditions for the current rule
+ */
+
+extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc;
+
+
+/* variables for dfa machine data:
+ * current_max_dfa_size - current maximum number of NFA states in DFA
+ * current_max_xpairs - current maximum number of non-template xtion pairs
+ * current_max_template_xpairs - current maximum number of template pairs
+ * current_max_dfas - current maximum number DFA states
+ * lastdfa - last dfa state number created
+ * nxt - state to enter upon reading character
+ * chk - check value to see if "nxt" applies
+ * tnxt - internal nxt table for templates
+ * base - offset into "nxt" for given state
+ * def - where to go if "chk" disallows "nxt" entry
+ * tblend - last "nxt/chk" table entry being used
+ * firstfree - first empty entry in "nxt/chk" table
+ * dss - nfa state set for each dfa
+ * dfasiz - size of nfa state set for each dfa
+ * dfaacc - accepting set for each dfa state (or accepting number, if
+ *    -r is not given)
+ * accsiz - size of accepting set for each dfa state
+ * dhash - dfa state hash value
+ * todo - queue of DFAs still to be processed
+ * todo_head - head of todo queue
+ * todo_next - next available entry on todo queue
+ * numas - number of DFA accepting states created; note that this
+ *    is not necessarily the same value as accnum, which is the analogous
+ *    value for the NFA
+ * numsnpairs - number of state/nextstate transition pairs
+ * jambase - position in base/def where the default jam table starts
+ * jamstate - state number corresponding to "jam" state
+ */
+
+extern int current_max_dfa_size, current_max_xpairs;
+extern int current_max_template_xpairs, current_max_dfas;
+extern int lastdfa, lasttemp, *nxt, *chk, *tnxt;
+extern int *base, *def, tblend, firstfree, **dss, *dfasiz, **dfaacc;
+extern int *accsiz, *dhash, *todo, todo_head, todo_next, numas;
+extern int numsnpairs, jambase, jamstate;
+
+
+/* variables for ccl information:
+ * lastccl - ccl index of the last created ccl
+ * current_maxccls - current limit on the maximum number of unique ccl's
+ * cclmap - maps a ccl index to its set pointer
+ * ccllen - gives the length of a ccl
+ * cclng - true for a given ccl if the ccl is negated
+ * cclreuse - counts how many times a ccl is re-used
+ * current_max_ccl_tbl_size - current limit on number of characters needed
+ *	to represent the unique ccl's
+ * ccltbl - holds the characters in each ccl - indexed by cclmap
+ */
+
+extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
+extern int current_max_ccl_tbl_size;
+extern char *ccltbl;
+
+
+/* variables for miscellaneous information:
+ * starttime - real-time when we started
+ * endtime - real-time when we ended
+ * nmstr - last NAME scanned by the scanner
+ * sectnum - section number currently being parsed
+ * nummt - number of empty nxt/chk table entries
+ * hshcol - number of hash collisions detected by snstods
+ * dfaeql - number of times a newly created dfa was equal to an old one
+ * numeps - number of epsilon NFA states created
+ * eps2 - number of epsilon states which have 2 out-transitions
+ * num_reallocs - number of times it was necessary to realloc() a group
+ *		  of arrays
+ * tmpuses - number of DFA states that chain to templates
+ * totnst - total number of NFA states used to make DFA states
+ * peakpairs - peak number of transition pairs we had to store internally
+ * numuniq - number of unique transitions
+ * numdup - number of duplicate transitions
+ * hshsave - number of hash collisions saved by checking number of states
+ */
+
+extern char *starttime, *endtime, nmstr[MAXLINE];
+extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
+extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
+
+char *allocate_array(), *reallocate_array();
+
+#define allocate_integer_array(size) \
+	(int *) allocate_array( size, sizeof( int ) )
+
+#define reallocate_integer_array(array,size) \
+	(int *) reallocate_array( (char *) array, size, sizeof( int ) )
+
+#define allocate_integer_pointer_array(size) \
+	(int **) allocate_array( size, sizeof( int * ) )
+
+#define reallocate_integer_pointer_array(array,size) \
+	(int **) reallocate_array( (char *) array, size, sizeof( int * ) )
+
+#define allocate_character_array(size) allocate_array( size, sizeof( char ) )
+
+#define reallocate_character_array(array,size) \
+	reallocate_array( array, size, sizeof( char ) )
+
+
+/* used to communicate between scanner and parser.  The type should really
+ * be YYSTYPE, but we can't easily get our hands on it.
+ */
+extern int yylval;
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..d0a7ae1
--- /dev/null
+++ b/main.c
@@ -0,0 +1,507 @@
+/* flex - tool to generate fast lexical analyzers
+ *
+ * Copyright (c) University of California, 1987
+ *
+ *
+ * ver   date  who remarks
+ * ---   ----  --- -------------------------------------------------------
+ * 04a 27Jun86 VP  .translated from Ratfor into C
+ * 01a 22Aug83 VP  .written.  Original version by Jef Poskanzer.
+ */
+
+
+#include "flexdef.h"
+
+
+/* these globals are all defined and commented in flexdef.h */
+int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
+int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject;
+int datapos, dataline, linenum;
+FILE *skelfile = NULL;
+char *infilename = NULL;
+int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
+int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
+int current_mns;
+int accnum, *firstst, *lastst, *finalst, *transchar;
+int *trans1, *trans2, *accptnum, lastnfa;
+int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
+int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
+int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1];
+int tecbck[CSIZE + 1];
+int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc;
+int current_max_dfa_size, current_max_xpairs;
+int current_max_template_xpairs, current_max_dfas;
+int lastdfa, *nxt, *chk, *tnxt;
+int *base, *def, tblend, firstfree, numtemps, **dss, *dfasiz, **dfaacc;
+int *accsiz, *dhash, *todo, todo_head, todo_next, numas;
+int numsnpairs, jambase, jamstate;
+int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
+int current_max_ccl_tbl_size;
+char *ccltbl;
+char *starttime, *endtime, nmstr[MAXLINE];
+int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
+int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
+
+
+/* flex - main program
+ *
+ * synopsis (from the shell)
+ *    flex [-v] [file ...]
+ */
+
+main( argc, argv )
+int argc;
+char **argv;
+
+    {
+    lexinit( argc, argv );
+    readin();
+
+    if ( ! syntaxerror )
+	{
+	/* convert the ndfa to a dfa */
+	ntod();
+
+	/* generate the ratfor state transition tables from the dfa */
+	gentabs();
+	}
+
+    /* note, lexend does not return.  It exits with its argument as status. */
+
+    lexend( 0 );
+    }
+
+
+/* lexend - terminate flex
+ *
+ * synopsis
+ *    int status;
+ *    lexend( status );
+ *
+ *    status is exit status.
+ *
+ * note
+ *    This routine does not return.
+ */
+
+lexend( status )
+int status;
+
+    {
+    int tblsiz;
+    char *gettime();
+
+    if ( skelfile != NULL )
+	(void) fclose( skelfile );
+
+    if ( printstats )
+	{
+	endtime = gettime();
+
+	fprintf( stderr, "flex usage statistics:\n" );
+	fprintf( stderr, "  started at %s, finished at %s\n",
+		 starttime, endtime );
+
+	if ( ! genftl )
+	    fprintf( stderr, "  Ratfor scanner generated\n" );
+
+	fprintf( stderr, "  %d/%d NFA states\n", lastnfa, current_mns );
+	fprintf( stderr, "  %d/%d DFA states (%d words)\n", lastdfa,
+			 current_max_dfas, totnst );
+	fprintf( stderr, "  %d rules\n", accnum );
+	fprintf( stderr, "  %d/%d start conditions\n", lastsc,
+			 current_max_scs );
+	fprintf( stderr, "  %d epsilon states, %d double epsilon states\n",
+		 numeps, eps2 );
+
+	if ( lastccl == 0 )
+	    fprintf( stderr, "  no character classes\n" );
+	else
+	    fprintf( stderr,
+	"  %d/%d character classes needed %d/%d words of storage, %d reused\n",
+		     lastccl, current_maxccls,
+		     cclmap[lastccl] + ccllen[lastccl] - 1,
+		     current_max_ccl_tbl_size, cclreuse );
+
+	fprintf( stderr, "  %d state/nextstate pairs created\n", numsnpairs );
+	fprintf( stderr, "  %d/%d unique/duplicate transitions\n",
+		 numuniq, numdup );
+
+	if ( fulltbl )
+	    {
+	    tblsiz = lastdfa * numecs;
+	    fprintf( stderr, "  %d table entries\n", tblsiz );
+	    }
+
+	else
+	    {
+	    tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend;
+
+	    fprintf( stderr, "  %d/%d base/def entries created\n",
+		     lastdfa + numtemps, current_max_dfas );
+	    fprintf( stderr, "  %d/%d (peak %d) nxt/chk entries created\n",
+		     tblend, current_max_xpairs, peakpairs );
+	    fprintf( stderr,
+		     "  %d/%d (peak %d) template nxt/chk entries created\n",
+		     numtemps * nummecs, current_max_template_xpairs,
+		     numtemps * numecs );
+	    fprintf( stderr, "  %d empty table entries\n", nummt );
+	    fprintf( stderr, "  %d protos created\n", numprots );
+	    fprintf( stderr, "  %d templates created, %d uses\n",
+		     numtemps, tmpuses );
+	    }
+
+	if ( useecs )
+	    {
+	    tblsiz = tblsiz + CSIZE;
+	    fprintf( stderr, "  %d/%d equivalence classes created\n",
+		     numecs, CSIZE );
+	    }
+
+	if ( usemecs )
+	    {
+	    tblsiz = tblsiz + numecs;
+	    fprintf( stderr, "  %d/%d meta-equivalence classes created\n",
+		     nummecs, CSIZE );
+	    }
+
+	fprintf( stderr, "  %d (%d saved) hash collisions, %d DFAs equal\n",
+		 hshcol, hshsave, dfaeql );
+	fprintf( stderr, "  %d sets of reallocations needed\n", num_reallocs );
+	fprintf( stderr, "  %d total table entries needed\n", tblsiz );
+	}
+
+    exit( status );
+    }
+
+
+/* lexinit - initialize flex
+ *
+ * synopsis
+ *    int argc;
+ *    char **argv;
+ *    lexinit( argc, argv );
+ */
+
+lexinit( argc, argv )
+int argc;
+char **argv;
+
+    {
+    int i;
+    char *arg, *skelname = DEFAULT_SKELETON_FILE, *gettime(), clower();
+    int sawcmpflag, use_stdout;
+
+    printstats = syntaxerror = trace = spprdflt = interactive = caseins = false;
+    ddebug = fulltbl = reject = false;
+    usemecs = genftl = useecs = true;
+
+    sawcmpflag = false;
+    use_stdout = false;
+
+    /* read flags */
+    for ( --argc, ++argv; argc ; --argc, ++argv )
+	{
+	if ( argv[0][0] != '-' || argv[0][1] == '\0' )
+	    break;
+
+	arg = argv[0];
+
+	for ( i = 1; arg[i] != '\0'; ++i )
+	    switch ( arg[i] )
+		{
+		case 'c':
+		    if ( i != 1 )
+			lexerror( "-c flag must be given separately" );
+
+		    if ( ! sawcmpflag )
+			{
+			useecs = false;
+			usemecs = false;
+			fulltbl = false;
+			sawcmpflag = true;
+			}
+
+		    for ( ++i; arg[i] != '\0'; ++i )
+			switch ( clower( arg[i] ) )
+			    {
+			    case 'e':
+				useecs = true;
+				break;
+
+			    case 'f':
+				fulltbl = true;
+				break;
+
+			    case 'm':
+				usemecs = true;
+				break;
+
+			    default:
+				lerrif( "unknown -c option %c",
+					(int) arg[i] );
+				break;
+			    }
+		    
+		    goto get_next_arg;
+
+		case 'd':
+		    ddebug = true;
+		    break;
+
+		case 'f':
+		    useecs = usemecs = false;
+		    fulltbl = true;
+		    break;
+
+		case 'I':
+		    interactive = true;
+		    break;
+
+		case 'i':
+		    caseins = true;
+		    break;
+
+		case 'l':
+		    use_stdout = false;
+		    break;
+
+		case 'n':
+		    printstats = false;
+		    break;
+
+		case 'r':
+		    reject = true;
+		    break;
+
+		case 'S':
+		    if ( i != 1 )
+			lexerror( "-S flag must be given separately" );
+
+		    skelname = arg + i + 1;
+		    goto get_next_arg;
+
+		case 's':
+		    spprdflt = true;
+		    break;
+
+		case 't':
+		    use_stdout = true;
+		    break;
+
+		case 'T':
+		    trace = true;
+		    break;
+
+		case 'v':
+		    printstats = true;
+		    break;
+
+		default:
+		    lerrif( "unknown flag %c", (int) arg[i] );
+		    break;
+		}
+
+get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */
+	;
+	}
+
+    if ( fulltbl && usemecs )
+	lexerror( "full table and -cm don't make sense together" );
+
+    if ( fulltbl && interactive )
+	lexerror( "full table and -I are (currently) incompatible" );
+
+    if ( ! use_stdout )
+	{
+	FILE *prev_stdout = freopen( "lex.yy.c", "w", stdout );
+
+	if ( prev_stdout == NULL )
+	    lexerror( "could not create lex.yy.c" );
+	}
+
+    if ( argc )
+	{
+	if ( argc > 1 )
+	    lexerror( "extraneous argument(s) given" );
+
+	yyin = fopen( infilename = argv[0], "r" );
+
+	if ( yyin == NULL )
+	    lerrsf( "can't open %s", argv[0] );
+	}
+
+    else
+	yyin = stdin;
+
+    lastccl = 0;
+    lastsc = 0;
+
+    /* initialize the statistics */
+    starttime = gettime();
+
+    if ( (skelfile = fopen( skelname, "r" )) == NULL )
+	lerrsf( "can't open skeleton file %s", skelname );
+
+    lastdfa = lastnfa = accnum = numas = numsnpairs = tmpuses = 0;
+    numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0;
+    numuniq = numdup = hshsave = eofseen = datapos = dataline = 0;
+    onesp = numprots = 0;
+
+    linenum = sectnum = 1;
+    firstprot = NIL;
+
+    /* used in mkprot() so that the first proto goes in slot 1
+     * of the proto queue
+     */
+    lastprot = 1;
+
+    if ( useecs )
+	{
+	/* set up doubly-linked equivalence classes */
+	ecgroup[1] = NIL;
+
+	for ( i = 2; i <= CSIZE; ++i )
+	    {
+	    ecgroup[i] = i - 1;
+	    nextecm[i - 1] = i;
+	    }
+
+	nextecm[CSIZE] = NIL;
+	}
+
+    else
+	{ /* put everything in its own equivalence class */
+	for ( i = 1; i <= CSIZE; ++i )
+	    {
+	    ecgroup[i] = i;
+	    nextecm[i] = BAD_SUBSCRIPT;	/* to catch errors */
+	    }
+	}
+
+    set_up_initial_allocations();
+    }
+
+
+/* readin - read in the rules section of the input file(s)
+ *
+ * synopsis
+ *    readin();
+ */
+readin()
+
+    {
+    if ( genftl )
+	{
+	fputs( "#define YYDEFAULTACTION ", stdout );
+
+	if ( spprdflt )
+	    fputs( "YYFATALERROR( \"flex scanner jammed\" )", stdout );
+	else
+	    fputs( "ECHO", stdout );
+
+	fputs( ";\n", stdout );
+
+	if ( ddebug )
+	    puts( "#define LEX_DEBUG" );
+	if ( useecs )
+	    puts( "#define LEX_USE_ECS" );
+	if ( usemecs )
+	    puts( "#define LEX_USE_MECS" );
+	if ( interactive )
+	    puts( "#define LEX_INTERACTIVE_SCANNER" );
+	if ( reject )
+	    puts( "#define LEX_REJECT_ENABLED" );
+	if ( fulltbl )
+	    puts( "#define LEX_FULL_TABLE" );
+	}
+
+    else
+	{
+	fputs( "define(YYDEFAULTACTION,", stdout );
+
+	if ( spprdflt )
+	    fputs( "call error( \"flex scanner jammed\" )", stdout );
+	else
+	    fputs( "ECHO", stdout );
+
+	fputs( ")\n", stdout );
+
+	if ( ddebug )
+	    puts( "define(LEX_DEBUG,)" );
+	if ( useecs )
+	    puts( "define(LEX_USE_ECS,)" );
+	if ( usemecs )
+	    puts( "define(LEX_USE_MECS,)" );
+	if ( reject )
+	    puts( "define(LEX_REJECT_ENABLED,)" );
+	if ( fulltbl )
+	    puts( "define(LEX_FULL_TABLE,)" );
+	}
+
+    skelout();
+
+    line_directive_out();
+
+    if ( yyparse() )
+	lerrif( "fatal parse error at line %d", linenum );
+
+    if ( useecs )
+	{
+	numecs = cre8ecs( nextecm, ecgroup, CSIZE );
+	ccl2ecl();
+	}
+
+    else
+	numecs = CSIZE;
+    }
+
+
+
+/* set_up_initial_allocations - allocate memory for internal tables */
+
+set_up_initial_allocations()
+
+    {
+    current_mns = INITIAL_MNS;
+    firstst = allocate_integer_array( current_mns );
+    lastst = allocate_integer_array( current_mns );
+    finalst = allocate_integer_array( current_mns );
+    transchar = allocate_integer_array( current_mns );
+    trans1 = allocate_integer_array( current_mns );
+    trans2 = allocate_integer_array( current_mns );
+    accptnum = allocate_integer_array( current_mns );
+
+    current_max_scs = INITIAL_MAX_SCS;
+    scset = allocate_integer_array( current_max_scs );
+    scbol = allocate_integer_array( current_max_scs );
+    scxclu = allocate_integer_array( current_max_scs );
+    actvsc = allocate_integer_array( current_max_scs );
+
+    current_maxccls = INITIAL_MAXCCLS;
+    cclmap = allocate_integer_array( current_maxccls );
+    ccllen = allocate_integer_array( current_maxccls );
+    cclng = allocate_integer_array( current_maxccls );
+
+    current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE;
+    ccltbl = allocate_character_array( current_max_ccl_tbl_size );
+
+    current_max_dfa_size = INITIAL_MAX_DFA_SIZE;
+
+    current_max_xpairs = INITIAL_MAX_XPAIRS;
+    nxt = allocate_integer_array( current_max_xpairs );
+    chk = allocate_integer_array( current_max_xpairs );
+
+    current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS;
+    tnxt = allocate_integer_array( current_max_template_xpairs );
+
+    current_max_dfas = INITIAL_MAX_DFAS;
+    base = allocate_integer_array( current_max_dfas );
+    def = allocate_integer_array( current_max_dfas );
+    dfasiz = allocate_integer_array( current_max_dfas );
+    accsiz = allocate_integer_array( current_max_dfas );
+    dhash = allocate_integer_array( current_max_dfas );
+    todo = allocate_integer_array( current_max_dfas );
+    dss = allocate_integer_pointer_array( current_max_dfas );
+    dfaacc = allocate_integer_pointer_array( current_max_dfas );
+    }
diff --git a/misc.c b/misc.c
new file mode 100644
index 0000000..3364e4c
--- /dev/null
+++ b/misc.c
@@ -0,0 +1,646 @@
+/* lexmisc - miscellaneous flex routines */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include <ctype.h>
+#include "flexdef.h"
+
+char *malloc(), *realloc();
+
+
+/* allocate_array - allocate memory for an integer array of the given size */
+
+char *allocate_array( size, element_size )
+int size, element_size;
+
+    {
+    register char *mem = malloc( (unsigned) (element_size * size) );
+
+    if ( mem == NULL )
+	lexfatal( "memory allocation failed in allocate_array()" );
+
+    return ( mem );
+    }
+
+
+/* bubble - bubble sort an integer array in increasing order
+ *
+ * synopsis
+ *   int v[n], n;
+ *   bubble( v, n );
+ *
+ * description
+ *   sorts the first n elements of array v and replaces them in
+ *   increasing order.
+ *
+ * passed
+ *   v - the array to be sorted
+ *   n - the number of elements of 'v' to be sorted */
+
+bubble( v, n )
+int v[], n;
+
+    {
+    register int i, j, k;
+
+    for ( i = n; i > 1; --i )
+	for ( j = 1; j < i; ++j )
+	    if ( v[j] > v[j + 1] )	/* compare */
+		{
+		k = v[j];	/* exchange */
+		v[j] = v[j + 1];
+		v[j + 1] = k;
+		}
+    }
+
+
+/* clower - replace upper-case letter to lower-case
+ *
+ * synopsis:
+ *    char clower(), c;
+ *    c = clower( c );
+ */
+
+char clower( c )
+register char c;
+
+    {
+    return ( isupper(c) ? tolower(c) : c );
+    }
+
+
+/* copy_string - returns a dynamically allocated copy of a string
+ *
+ * synopsis
+ *    char *str, *copy, *copy_string();
+ *    copy = copy_string( str );
+ */
+
+char *copy_string( str )
+register char *str;
+
+    {
+    register char *c;
+    char *copy;
+
+    /* find length */
+    for ( c = str; *c; ++c )
+	;
+
+    copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) );
+
+    if ( copy == NULL )
+	lexfatal( "dynamic memory failure in copy_string()" );
+
+    for ( c = copy; (*c++ = *str++); )
+	;
+    
+    return ( copy );
+    }
+
+
+/* cshell - shell sort a character array in increasing order
+ *
+ * synopsis
+ *
+ *   char v[n];
+ *   int n;
+ *   cshell( v, n );
+ *
+ * description
+ *   does a shell sort of the first n elements of array v.
+ *
+ * passed
+ *   v - array to be sorted
+ *   n - number of elements of v to be sorted
+ */
+cshell( v, n )
+char v[];
+int n;
+
+    {
+    int gap, i, j, jg;
+    char k;
+
+    for ( gap = n / 2; gap > 0; gap = gap / 2 )
+	for ( i = gap; i < n; ++i )
+	    for ( j = i - gap; j >= 0; j = j - gap )
+		{
+		jg = j + gap;
+
+		if ( v[j] <= v[jg] )
+		    break;
+
+		k = v[j];
+		v[j] = v[jg];
+		v[jg] = k;
+		}
+    }
+
+
+/* dataend - finish up a block of data declarations
+ *
+ * synopsis
+ *    dataend();
+ */
+dataend()
+
+    {
+    if ( datapos > 0 )
+	dataflush();
+
+    if ( genftl )
+	/* add terminator for initialization */
+	puts( "    } ;\n" );
+
+    dataline = 0;
+    }
+
+
+
+/* dataflush - flush generated data statements
+ *
+ * synopsis
+ *    dataflush();
+ */
+dataflush()
+
+    {
+    putchar( '\n' );
+
+    if ( genftl )
+	{
+	if ( ++dataline >= NUMDATALINES )
+	    {
+	    /* put out a blank line so that the table is grouped into
+	     * large blocks that enable the user to find elements easily
+	     */
+	    putchar( '\n' );
+	    dataline = 0;
+	    }
+	}
+
+    /* reset the number of characters written on the current line */
+    datapos = 0;
+    }
+
+
+/* gettime - return current time
+ *
+ * synopsis
+ *    char *gettime(), *time_str;
+ *    time_str = gettime();
+ */
+
+/* include sys/types.h to use time_t and make lint happy */
+
+#include <sys/types.h>
+
+char *gettime()
+
+    {
+    time_t t, time();
+    char *result, *ctime(), *copy_string();
+
+    t = time( (long *) 0 );
+
+    result = copy_string( ctime( &t ) );
+
+    /* get rid of trailing newline */
+    result[24] = '\0';
+
+    return ( result );
+    }
+
+
+/* lerrif - report an error message formatted with one integer argument
+ *
+ * synopsis
+ *    char msg[];
+ *    int arg;
+ *    lerrif( msg, arg );
+ */
+
+lerrif( msg, arg )
+char msg[];
+int arg;
+
+    {
+    char errmsg[MAXLINE];
+    (void) sprintf( errmsg, msg, arg );
+    lexerror( errmsg );
+    }
+
+
+/* lerrsf - report an error message formatted with one string argument
+ *
+ * synopsis
+ *    char msg[], arg[];
+ *    lerrsf( msg, arg );
+ */
+
+lerrsf( msg, arg )
+char msg[], arg[];
+
+    {
+    char errmsg[MAXLINE];
+    (void) sprintf( errmsg, msg, arg );
+    lexerror( errmsg );
+    }
+
+
+/* lexerror - report an error message and terminate
+ *
+ * synopsis
+ *    char msg[];
+ *    lexerror( msg );
+ */
+
+lexerror( msg )
+char msg[];
+
+    {
+    fprintf( stderr, "flex: %s\n", msg );
+    lexend( 1 );
+    }
+
+
+/* lexfatal - report a fatal error message and terminate
+ *
+ * synopsis
+ *    char msg[];
+ *    lexfatal( msg );
+ */
+
+lexfatal( msg )
+char msg[];
+
+    {
+    fprintf( stderr, "flex: fatal internal error %s\n", msg );
+    lexend( 1 );
+    }
+
+
+/* line_directive_out - spit out a "# line" statement */
+
+line_directive_out()
+
+    {
+    if ( infilename ) 
+        printf( "# line %d \"%s\"\n", linenum, infilename );
+    }
+
+
+/* mk2data - generate a data statement for a two-dimensional array
+ *
+ * synopsis
+ *    char name;
+ *    int row, column, value;
+ *    mk2data( name, row, column, value );
+ *
+ *  generates a data statement initializing "name(row, column)" to "value"
+ *  Note that name is only a character; NOT a string.  If we're generating
+ *  FTL (-f flag), "name", "row", and "column" get ignored.
+ */
+mk2data( name, row, column, value )
+char name;
+int row, column, value;
+
+    {
+    int datalen;
+    static char dindent[] = DATAINDENTSTR;
+
+    if ( genftl )
+	{
+	if ( datapos >= NUMDATAITEMS )
+	    {
+	    putchar( ',' );
+	    dataflush();
+	    }
+
+	if ( datapos == 0 )
+	    /* indent */
+	    fputs( "    ", stdout );
+
+	else
+	    putchar( ',' );
+
+	++datapos;
+
+	printf( "%5d", value );
+	}
+
+    else
+	{
+	/* figure out length of data statement to be written.  7 is the constant
+	 * overhead of a one character name, '(', ',',  and ')' to delimit
+	 * the array reference, a '/' and a '/' to delimit the value, and
+	 * room for a blank or a comma between this data statement and the
+	 * previous one
+	 */
+
+	datalen = 7 + numdigs( row ) + numdigs( column ) + numdigs( value );
+
+	if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 )
+	    {
+	    if ( datapos != 0 )
+		dataflush();
+
+	    /* precede data statement with '%' so rat4 preprocessor doesn't have
+	     * to bother looking at it -- speed hack
+	     */
+	    printf( "%%%sdata ", dindent );
+
+	    /* 4 is the constant overhead of writing out the word "DATA" */
+	    datapos = DATAINDENTWIDTH + 4 + datalen;
+	    }
+
+	else
+	    {
+	    putchar( ',' );
+	    datapos = datapos + datalen;
+	    }
+
+	printf( "%c(%d,%d)/%d/", name, row, column, value );
+	}
+    }
+
+
+/* mkdata - generate a data statement
+ *
+ * synopsis
+ *    char name;
+ *    int arrayelm, value;
+ *    mkdata( name, arrayelm, value );
+ *
+ *  generates a data statement initializing "name(arrayelm)" to "value"
+ *  Note that name is only a character; NOT a string.  If we're generating
+ *  FTL (-f flag), "name" and "arrayelm" get ignored.
+ */
+mkdata( name, arrayelm, value )
+char name;
+int arrayelm, value;
+
+    {
+    int datalen;
+    static char dindent[] = DATAINDENTSTR;
+
+    if ( genftl )
+	{
+	if ( datapos >= NUMDATAITEMS )
+	    {
+	    putchar( ',' );
+	    dataflush();
+	    }
+
+	if ( datapos == 0 )
+	    /* indent */
+	    fputs( "    ", stdout );
+
+	else
+	    putchar( ',' );
+
+	++datapos;
+
+	printf( "%5d", value );
+	}
+
+    else
+	{
+	/* figure out length of data statement to be written.  6 is the constant
+	 * overhead of a one character name, '(' and ')' to delimit the array
+	 * reference, a '/' and a '/' to delimit the value, and room for a
+	 * blank or a comma between this data statement and the previous one
+	 */
+
+	datalen = 6 + numdigs( arrayelm ) + numdigs( value );
+
+	if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 )
+	    {
+	    if ( datapos != 0 )
+		dataflush();
+
+	    /* precede data statement with '%' so rat4 preprocessor doesn't have
+	     * to bother looking at it -- speed hack
+	     */
+	    printf( "%%%sdata ", dindent );
+
+	    /* 4 is the constant overhead of writing out the word "DATA" */
+	    datapos = DATAINDENTWIDTH + 4 + datalen;
+	    }
+
+	else
+	    {
+	    putchar( ',' );
+	    datapos = datapos + datalen;
+	    }
+
+	printf( "%c(%d)/%d/", name, arrayelm, value );
+	}
+    }
+
+
+/* myctoi - return the integer represented by a string of digits
+ *
+ * synopsis
+ *    char array[];
+ *    int val, myctoi();
+ *    val = myctoi( array );
+ *
+ */
+
+int myctoi( array )
+char array[];
+
+    {
+    int val = 0;
+
+    (void) sscanf( array, "%d", &val );
+
+    return ( val );
+    }
+
+
+/* myesc - return character corresponding to escape sequence
+ *
+ * synopsis
+ *    char array[], c, myesc();
+ *    c = myesc( array );
+ *
+ */
+
+char myesc( array )
+char array[];
+
+    {
+    switch ( array[1] )
+	{
+	case 'n': return ( '\n' );
+	case 't': return ( '\t' );
+	case 'f': return ( '\f' );
+	case 'r': return ( '\r' );
+	case 'b': return ( '\b' );
+
+	case '0':
+	    if ( isdigit(array[2]) )
+		{ /* \0<octal> */
+		char c, esc_char;
+		register int sptr = 2;
+
+		while ( isdigit(array[sptr]) )
+		    /* don't increment inside loop control because the
+		     * macro will expand it to two increments!  (Not a
+		     * problem with the C version of the macro)
+		     */
+		    ++sptr;
+
+		c = array[sptr];
+		array[sptr] = '\0';
+
+		esc_char = otoi( array + 2 );
+		array[sptr] = c;
+
+		if ( esc_char == '\0' )
+		    {
+		    synerr( "escape sequence for null not allowed" );
+		    return ( 1 );
+		    }
+
+		return ( esc_char );
+		}
+
+	    else
+		{
+		synerr( "escape sequence for null not allowed" );
+		return ( 1 );
+		}
+
+#ifdef NOTDEF
+	case '^':
+	    {
+	    register char next_char = array[2];
+
+	    if ( next_char == '?' )
+		return ( 0x7f );
+	    
+	    else if ( next_char >= 'A' && next_char <= 'Z' )
+		return ( next_char - 'A' + 1 );
+    
+	    else if ( next_char >= 'a' && next_char <= 'z' )
+		return ( next_char - 'z' + 1 );
+    
+	    synerr( "illegal \\^ escape sequence" );
+
+	    return ( 1 );
+	    }
+#endif
+	}
+    
+    return ( array[1] );
+    }
+
+
+/* numdigs - number of digits (includes leading sign) in number
+ *
+ * synopsis
+ *    int numdigs, x;
+ *    num = numdigs( x );
+ */
+int numdigs( x )
+int x;
+
+    {
+    if ( x < 0 )
+	{
+	/* the only negative numbers we expect to encounter are very
+	 * small ones
+	 */
+	if ( x < -9 )
+	    lexfatal( "assumption of small negative numbers botched in numdigs()" );
+
+	return ( 2 );
+	}
+
+    if ( x < 10 )
+	return ( 1 );
+    else if ( x < 100 )
+	return ( 2 );
+    else if ( x < 1000 )
+	return ( 3 );
+    else if ( x < 10000 )
+	return ( 4 );
+    else if ( x < 100000 )
+	return ( 5 );
+    else
+	return ( 6 );
+    }
+
+
+/* otoi - convert an octal digit string to an integer value
+ *
+ * synopsis:
+ *    int val, otoi();
+ *    char str[];
+ *    val = otoi( str );
+ */
+
+int otoi( str )
+char str[];
+
+    {
+#ifdef FTLSOURCE
+    fortran int gctoi()
+    int dummy = 1;
+
+    return ( gctoi( str, dummy, 8 ) );
+#else
+    int result;
+
+    (void) sscanf( str, "%o", &result );
+
+    return ( result );
+#endif
+    }
+
+
+
+
+/* reallocate_array - increase the size of a dynamic array */
+
+char *reallocate_array( array, size, element_size )
+char *array;
+int size, element_size;
+
+    {
+    register char *new_array = realloc( array,
+					(unsigned) (size * element_size ));
+
+    if ( new_array == NULL )
+	lexfatal( "attempt to increase array size failed" );
+    
+    return ( new_array );
+    }
+
+
+/* skelout - write out one section of the lexskel file
+ *
+ * synopsis
+ *    skelout();
+ *
+ * DESCRIPTION
+ *    Copies from skelfile to stdout until a line beginning with "%%" or
+ *    EOF is found.
+ */
+skelout()
+
+    {
+    char buf[MAXLINE];
+
+    while ( fgets( buf, MAXLINE, skelfile ) != NULL )
+	if ( buf[0] == '%' && buf[1] == '%' )
+	    break;
+	else
+	    fputs( buf, stdout );
+    }
diff --git a/nfa.c b/nfa.c
new file mode 100644
index 0000000..d514ce1
--- /dev/null
+++ b/nfa.c
@@ -0,0 +1,542 @@
+/* lexnfa - NFA construction routines */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include "flexdef.h"
+
+/* add_accept - add an accepting state to a machine
+ *
+ * synopsis
+ *
+ *   add_accept( mach, headcnt, trailcnt );
+ *
+ * the global ACCNUM is incremented and the new value becomes mach's
+ * accepting number.  if headcnt or trailcnt is non-zero then the machine
+ * recognizes a pattern with trailing context.  headcnt is the number of
+ * characters in the matched part of the pattern, or zero if the matched
+ * part has variable length.  trailcnt is the number of trailing context
+ * characters in the pattern, or zero if the trailing context has variable
+ * length.
+ */
+add_accept( mach, headcnt, trailcnt )
+int mach, headcnt, trailcnt;
+
+    {
+    int astate;
+
+    printf( "case %d:\n", ++accnum );
+
+    if ( headcnt > 0 || trailcnt > 0 )
+	{ /* do trailing context magic to not match the trailing characters */
+	printf( "YYDOBEFORESCAN; /* undo effects of setting up yytext */\n" );
+
+	if ( headcnt > 0 )
+	    {
+	    if ( ! genftl || headcnt > 1 )
+		printf( "yycbufp = yybbufp + %d;\n",
+			genftl ? headcnt - 1 : headcnt );
+	    else
+		printf( "yycbufp = yybbufp;\n" );
+	    }
+
+	else
+	    printf( "yycbufp -= %d;\n", trailcnt );
+
+	printf( "YYDOBEFOREACTION; /* set up yytext again */\n" );
+	}
+
+    line_directive_out();
+
+    /* hang the accepting number off an epsilon state.  if it is associated
+     * with a state that has a non-epsilon out-transition, then the state
+     * will accept BEFORE it makes that transition, i.e. one character too soon
+     */
+
+    if ( transchar[finalst[mach]] == SYM_EPSILON )
+	accptnum[finalst[mach]] = accnum;
+
+    else
+	{
+	astate = mkstate( SYM_EPSILON );
+	accptnum[astate] = accnum;
+	mach = link_machines( mach, astate );
+	}
+    }
+
+
+/* copysingl - make a given number of copies of a singleton machine
+ *
+ * synopsis
+ *
+ *   newsng = copysingl( singl, num );
+ *
+ *     newsng - a new singleton composed of num copies of singl
+ *     singl  - a singleton machine
+ *     num    - the number of copies of singl to be present in newsng
+ */
+int copysingl( singl, num )
+int singl, num;
+
+    {
+    int copy, i;
+
+    copy = mkstate( SYM_EPSILON );
+
+    for ( i = 1; i <= num; ++i )
+	copy = link_machines( copy, dupmachine( singl ) );
+
+    return ( copy );
+    }
+
+
+/* dumpnfa - debugging routine to write out an nfa
+ *
+ * synopsis
+ *    int state1;
+ *    dumpnfa( state1 );
+ */
+dumpnfa( state1 )
+int state1;
+
+    {
+    int sym, tsp1, tsp2, anum, ns;
+
+    fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n",
+	     state1 );
+
+    /* we probably should loop starting at firstst[state1] and going to
+     * lastst[state1], but they're not maintained properly when we "or"
+     * all of the rules together.  So we use our knowledge that the machine
+     * starts at state 1 and ends at lastnfa.
+     */
+
+    /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
+    for ( ns = 1; ns <= lastnfa; ++ns )
+	{
+	fprintf( stderr, "state # %4d\t", ns );
+
+	sym = transchar[ns];
+	tsp1 = trans1[ns];
+	tsp2 = trans2[ns];
+	anum = accptnum[ns];
+
+	fprintf( stderr, "%3d:  %4d, %4d", sym, tsp1, tsp2 );
+
+	if ( anum != NIL )
+	    fprintf( stderr, "  [%d]", anum );
+
+	fprintf( stderr, "\n" );
+	}
+
+    fprintf( stderr, "********** end of dump\n" );
+    }
+
+
+/* dupmachine - make a duplicate of a given machine
+ *
+ * synopsis
+ *
+ *   copy = dupmachine( mach );
+ *
+ *     copy - holds duplicate of mach
+ *     mach - machine to be duplicated
+ *
+ * note that the copy of mach is NOT an exact duplicate; rather, all the
+ * transition states values are adjusted so that the copy is self-contained,
+ * as the original should have been.
+ *
+ * also note that the original MUST be contiguous, with its low and high
+ * states accessible by the arrays firstst and lastst
+ */
+int dupmachine( mach )
+int mach;
+
+    {
+    int i, state, init, last = lastst[mach], state_offset;
+
+    for ( i = firstst[mach]; i <= last; ++i )
+	{
+	state = mkstate( transchar[i] );
+
+	if ( trans1[i] != NO_TRANSITION )
+	    {
+	    mkxtion( finalst[state], trans1[i] + state - i );
+
+	    if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION )
+		mkxtion( finalst[state], trans2[i] + state - i );
+	    }
+
+	accptnum[state] = accptnum[i];
+	}
+
+    state_offset = state - i + 1;
+
+    init = mach + state_offset;
+    firstst[init] = firstst[mach] + state_offset;
+    finalst[init] = finalst[mach] + state_offset;
+    lastst[init] = lastst[mach] + state_offset;
+
+    return ( init );
+    }
+
+
+/* link_machines - connect two machines together
+ *
+ * synopsis
+ *
+ *   new = link_machines( first, last );
+ *
+ *     new    - a machine constructed by connecting first to last
+ *     first  - the machine whose successor is to be last
+ *     last   - the machine whose predecessor is to be first
+ *
+ * note: this routine concatenates the machine first with the machine
+ *  last to produce a machine new which will pattern-match first first
+ *  and then last, and will fail if either of the sub-patterns fails.
+ *  FIRST is set to new by the operation.  last is unmolested.
+ */
+int link_machines( first, last )
+int first, last;
+
+    {
+    if ( first == NIL )
+	return ( last );
+
+    else if ( last == NIL )
+	return ( first );
+
+    else
+	{
+	mkxtion( finalst[first], last );
+	finalst[first] = finalst[last];
+	lastst[first] = max( lastst[first], lastst[last] );
+	firstst[first] = min( firstst[first], firstst[last] );
+
+	return ( first );
+	}
+    }
+
+
+/* mkbranch - make a machine that branches to two machines
+ *
+ * synopsis
+ *
+ *   branch = mkbranch( first, second );
+ *
+ *     branch - a machine which matches either first's pattern or second's
+ *     first, second - machines whose patterns are to be or'ed (the | operator)
+ *
+ * note that first and second are NEITHER destroyed by the operation.  Also,
+ * the resulting machine CANNOT be used with any other "mk" operation except
+ * more mkbranch's.  Compare with mkor()
+ */
+int mkbranch( first, second )
+int first, second;
+
+    {
+    int eps;
+
+    if ( first == NO_TRANSITION )
+	return ( second );
+
+    else if ( second == NO_TRANSITION )
+	return ( first );
+
+    eps = mkstate( SYM_EPSILON );
+
+    mkxtion( eps, first );
+    mkxtion( eps, second );
+
+    return ( eps );
+    }
+
+
+/* mkclos - convert a machine into a closure
+ *
+ * synopsis
+ *   new = mkclos( state );
+ *
+ *     new - a new state which matches the closure of "state"
+ */
+int mkclos( state )
+int state;
+
+    {
+    return ( mkopt( mkposcl( state ) ) );
+    }
+
+
+/* mkopt - make a machine optional
+ *
+ * synopsis
+ *
+ *   new = mkopt( mach );
+ *
+ *     new  - a machine which optionally matches whatever mach matched
+ *     mach - the machine to make optional
+ *
+ * notes:
+ *     1. mach must be the last machine created
+ *     2. mach is destroyed by the call
+ */
+int mkopt( mach )
+int mach;
+
+    {
+    int eps;
+
+    if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
+	{
+	eps = mkstate( SYM_EPSILON );
+	mach = link_machines( mach, eps );
+	}
+
+    /* can't skimp on the following if FREE_EPSILON(mach) is true because
+     * some state interior to "mach" might point back to the beginning
+     * for a closure
+     */
+    eps = mkstate( SYM_EPSILON );
+    mach = link_machines( eps, mach );
+
+    mkxtion( mach, finalst[mach] );
+
+    return ( mach );
+    }
+
+
+/* mkor - make a machine that matches either one of two machines
+ *
+ * synopsis
+ *
+ *   new = mkor( first, second );
+ *
+ *     new - a machine which matches either first's pattern or second's
+ *     first, second - machines whose patterns are to be or'ed (the | operator)
+ *
+ * note that first and second are both destroyed by the operation
+ * the code is rather convoluted because an attempt is made to minimize
+ * the number of epsilon states needed
+ */
+int mkor( first, second )
+int first, second;
+
+    {
+    int eps, orend;
+
+    if ( first == NIL )
+	return ( second );
+
+    else if ( second == NIL )
+	return ( first );
+
+    else
+	{
+	/* see comment in mkopt() about why we can't use the first state
+	 * of "first" or "second" if they satisfy "FREE_EPSILON"
+	 */
+	eps = mkstate( SYM_EPSILON );
+
+	first = link_machines( eps, first );
+
+	mkxtion( first, second );
+
+	if ( SUPER_FREE_EPSILON(finalst[first]) &&
+	     accptnum[finalst[first]] == NIL )
+	    {
+	    orend = finalst[first];
+	    mkxtion( finalst[second], orend );
+	    }
+
+	else if ( SUPER_FREE_EPSILON(finalst[second]) &&
+		  accptnum[finalst[second]] == NIL )
+	    {
+	    orend = finalst[second];
+	    mkxtion( finalst[first], orend );
+	    }
+
+	else
+	    {
+	    eps = mkstate( SYM_EPSILON );
+
+	    first = link_machines( first, eps );
+	    orend = finalst[first];
+
+	    mkxtion( finalst[second], orend );
+	    }
+	}
+
+    finalst[first] = orend;
+    return ( first );
+    }
+
+
+/* mkposcl - convert a machine into a positive closure
+ *
+ * synopsis
+ *   new = mkposcl( state );
+ *
+ *    new - a machine matching the positive closure of "state"
+ */
+int mkposcl( state )
+int state;
+
+    {
+    int eps;
+
+    if ( SUPER_FREE_EPSILON(finalst[state]) )
+	{
+	mkxtion( finalst[state], state );
+	return ( state );
+	}
+
+    else
+	{
+	eps = mkstate( SYM_EPSILON );
+	mkxtion( eps, state );
+	return ( link_machines( state, eps ) );
+	}
+    }
+
+
+/* mkrep - make a replicated machine
+ *
+ * synopsis
+ *   new = mkrep( mach, lb, ub );
+ *
+ *    new - a machine that matches whatever "mach" matched from "lb"
+ *          number of times to "ub" number of times
+ *
+ * note
+ *   if "ub" is INFINITY then "new" matches "lb" or more occurances of "mach"
+ */
+int mkrep( mach, lb, ub )
+int mach, lb, ub;
+
+    {
+    int base, tail, copy, i;
+
+    base = copysingl( mach, lb - 1 );
+
+    if ( ub == INFINITY )
+	{
+	copy = dupmachine( mach );
+	mach = link_machines( mach, link_machines( base, mkclos( copy ) ) );
+	}
+
+    else
+	{
+	tail = mkstate( SYM_EPSILON );
+
+	for ( i = lb; i < ub; ++i )
+	    {
+	    copy = dupmachine( mach );
+	    tail = mkopt( link_machines( copy, tail ) );
+	    }
+
+	mach = link_machines( mach, link_machines( base, tail ) );
+	}
+
+    return ( mach );
+    }
+
+
+/* mkstate - create a state with a transition on a given symbol
+ *
+ * synopsis
+ *
+ *   state = mkstate( sym );
+ *
+ *     state - a new state matching sym
+ *     sym   - the symbol the new state is to have an out-transition on
+ *
+ * note that this routine makes new states in ascending order through the
+ * state array (and increments LASTNFA accordingly).  The routine DUPMACHINE
+ * relies on machines being made in ascending order and that they are
+ * CONTIGUOUS.  Change it and you will have to rewrite DUPMACHINE (kludge
+ * that it admittedly is)
+ */
+int mkstate( sym )
+int sym;
+
+    {
+    if ( ++lastnfa >= current_mns )
+	{
+	if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
+	    lerrif( "input rules are too complicated (>= %d NFA states)",
+		    current_mns );
+	
+	++num_reallocs;
+
+	transchar = reallocate_integer_array( transchar, current_mns );
+	trans1 = reallocate_integer_array( trans1, current_mns );
+	trans2 = reallocate_integer_array( trans2, current_mns );
+	accptnum = reallocate_integer_array( accptnum, current_mns );
+	firstst = reallocate_integer_array( firstst, current_mns );
+	finalst = reallocate_integer_array( finalst, current_mns );
+	lastst = reallocate_integer_array( lastst, current_mns );
+	}
+
+    transchar[lastnfa] = sym;
+    trans1[lastnfa] = NO_TRANSITION;
+    trans2[lastnfa] = NO_TRANSITION;
+    accptnum[lastnfa] = NIL;
+    firstst[lastnfa] = lastnfa;
+    finalst[lastnfa] = lastnfa;
+    lastst[lastnfa] = lastnfa;
+
+    /* fix up equivalence classes base on this transition.  Note that any
+     * character which has its own transition gets its own equivalence class.
+     * Thus only characters which are only in character classes have a chance
+     * at being in the same equivalence class.  E.g. "a|b" puts 'a' and 'b'
+     * into two different equivalence classes.  "[ab]" puts them in the same
+     * equivalence class (barring other differences elsewhere in the input).
+     */
+
+    if ( sym < 0 )
+	{
+	/* we don't have to update the equivalence classes since that was
+	 * already done when the ccl was created for the first time
+	 */
+	}
+
+    else if ( sym == SYM_EPSILON )
+	++numeps;
+
+    else
+	{
+	if ( useecs )
+	    mkechar( sym, nextecm, ecgroup );
+	}
+
+    return ( lastnfa );
+    }
+
+
+/* mkxtion - make a transition from one state to another
+ *
+ * synopsis
+ *
+ *   mkxtion( statefrom, stateto );
+ *
+ *     statefrom - the state from which the transition is to be made
+ *     stateto   - the state to which the transition is to be made
+ */
+mkxtion( statefrom, stateto )
+int statefrom, stateto;
+
+    {
+    if ( trans1[statefrom] == NO_TRANSITION )
+	trans1[statefrom] = stateto;
+
+    else if ( (transchar[statefrom] != SYM_EPSILON) ||
+	      (trans2[statefrom] != NO_TRANSITION) )
+	lexfatal( "found too many transitions in mkxtion()" );
+
+    else
+	{ /* second out-transition for an epsilon state */
+	++eps2;
+	trans2[statefrom] = stateto;
+	}
+    }
diff --git a/parse.y b/parse.y
new file mode 100644
index 0000000..b5cb379
--- /dev/null
+++ b/parse.y
@@ -0,0 +1,473 @@
+/* lexparse.y - parser for flex input */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL
+
+%{
+#include "flexdef.h"
+
+int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
+int trlcontxt, xcluflg, cclsorted, varlength;
+char clower();
+
+static int madeany = false;  /* whether we've made the '.' character class */
+
+%}
+
+%%
+goal            :  initlex sect1 sect1end sect2
+		;
+
+initlex         :
+			{
+			/* initialize for processing rules */
+
+			/* create default DFA start condition */
+			scinstal( "0", false );
+			}
+		;
+			
+sect1		:  sect1 startconddecl WHITESPACE namelist1 '\n'
+		|
+		|  error '\n'
+			{ synerr( "unknown error processing section 1" ); }
+		;
+
+sect1end	:  SECTEND
+		;
+
+startconddecl   :  SCDECL
+			{
+			/* these productions are separate from the s1object
+			 * rule because the semantics must be done before
+			 * we parse the remainder of an s1object
+			 */
+
+			xcluflg = false;
+			}
+		
+		|  XSCDECL
+			{ xcluflg = true; }
+		;
+
+namelist1	:  namelist1 WHITESPACE NAME
+			{ scinstal( nmstr, xcluflg ); }
+
+		|  NAME
+			{ scinstal( nmstr, xcluflg ); }
+
+		|  error
+                        { synerr( "bad start condition list" ); }
+		;
+
+sect2           :  sect2 initforrule lexrule '\n'
+		|
+		;
+
+initforrule     :
+			{
+			/* initialize for a parse of one rule */
+			trlcontxt = varlength = false;
+			trailcnt = headcnt = rulelen = 0;
+			}
+		;
+
+lexrule         :  scon '^' re eol 
+                        {
+			pat = link_machines( $3, $4 );
+			add_accept( pat, headcnt, trailcnt );
+
+			for ( i = 1; i <= actvp; ++i )
+			    scbol[actvsc[i]] = mkbranch( scbol[actvsc[i]], pat );
+			}
+
+		|  scon re eol 
+                        {
+			pat = link_machines( $2, $3 );
+			add_accept( pat, headcnt, trailcnt );
+
+			for ( i = 1; i <= actvp; ++i )
+			    scset[actvsc[i]] = mkbranch( scset[actvsc[i]], pat );
+			}
+
+                |  '^' re eol 
+			{
+			pat = link_machines( $2, $3 );
+			add_accept( pat, headcnt, trailcnt );
+
+			/* add to all non-exclusive start conditions,
+			 * including the default (0) start condition
+			 */
+
+			for ( i = 1; i <= lastsc; ++i )
+			    if ( ! scxclu[i] )
+				scbol[i] = mkbranch( scbol[i], pat );
+			}
+
+                |  re eol 
+			{
+			pat = link_machines( $1, $2 );
+			add_accept( pat, headcnt, trailcnt );
+
+			for ( i = 1; i <= lastsc; ++i )
+			    if ( ! scxclu[i] )
+				scset[i] = mkbranch( scset[i], pat );
+			}
+
+                |  error
+			{ synerr( "unrecognized rule" ); }
+		;
+
+scon            :  '<' namelist2 '>'
+		;
+
+namelist2       :  namelist2 ',' NAME
+                        {
+			if ( (scnum = sclookup( nmstr )) == 0 )
+			    synerr( "undeclared start condition" );
+
+			else
+			    actvsc[++actvp] = scnum;
+			}
+
+		|  NAME
+			{
+			if ( (scnum = sclookup( nmstr )) == 0 )
+			    synerr( "undeclared start condition" );
+			else
+			    actvsc[actvp = 1] = scnum;
+			}
+
+		|  error
+			{ synerr( "bad start condition list" ); }
+		;
+
+eol             :  '$'
+                        {
+			if ( trlcontxt )
+			    {
+			    synerr( "trailing context used twice" );
+			    $$ = mkstate( SYM_EPSILON );
+			    }
+			else
+			    {
+			    trlcontxt = true;
+
+			    if ( ! varlength )
+				headcnt = rulelen;
+
+			    ++rulelen;
+			    trailcnt = 1;
+
+			    eps = mkstate( SYM_EPSILON );
+			    $$ = link_machines( eps, mkstate( '\n' ) );
+			    }
+			}
+
+		|
+		        {
+		        $$ = mkstate( SYM_EPSILON );
+
+			if ( trlcontxt )
+			    {
+			    if ( varlength && headcnt == 0 )
+				/* both head and trail are variable-length */
+				synerr( "illegal trailing context" );
+
+			    else
+				trailcnt = rulelen;
+			    }
+		        }
+		;
+
+re              :  re '|' series
+                        {
+			varlength = true;
+
+			$$ = mkor( $1, $3 );
+			}
+
+		|  re2 series
+			{ $$ = link_machines( $1, $2 ); }
+
+		|  series
+			{ $$ = $1; }
+		;
+
+
+re2		:  re '/'
+			{
+			/* this rule is separate from the others for "re" so
+			 * that the reduction will occur before the trailing
+			 * series is parsed
+			 */
+
+			if ( trlcontxt )
+			    synerr( "trailing context used twice" );
+			else
+			    trlcontxt = true;
+
+			if ( varlength )
+			    /* the trailing context had better be fixed-length */
+			    varlength = false;
+			else
+			    headcnt = rulelen;
+
+			rulelen = 0;
+			$$ = $1;
+			}
+		;
+
+series          :  series singleton
+                        {
+			/* this is where concatenation of adjacent patterns
+			 * gets done
+			 */
+			$$ = link_machines( $1, $2 );
+			}
+
+		|  singleton
+			{ $$ = $1; }
+		;
+
+singleton       :  singleton '*'
+                        {
+			varlength = true;
+
+			$$ = mkclos( $1 );
+			}
+			
+		|  singleton '+'
+			{
+			varlength = true;
+
+			$$ = mkposcl( $1 );
+			}
+
+		|  singleton '?'
+			{
+			varlength = true;
+
+			$$ = mkopt( $1 );
+			}
+
+		|  singleton '{' NUMBER ',' NUMBER '}'
+			{
+			varlength = true;
+
+			if ( $3 > $5 || $3 <= 0 )
+			    {
+			    synerr( "bad iteration values" );
+			    $$ = $1;
+			    }
+			else
+			    $$ = mkrep( $1, $3, $5 );
+			}
+				
+		|  singleton '{' NUMBER ',' '}'
+			{
+			varlength = true;
+
+			if ( $3 <= 0 )
+			    {
+			    synerr( "iteration value must be positive" );
+			    $$ = $1;
+			    }
+
+			else
+			    $$ = mkrep( $1, $3, INFINITY );
+			}
+
+		|  singleton '{' NUMBER '}'
+			{
+			rulelen = rulelen + $3;
+
+			if ( $3 <= 0 )
+			    {
+			    synerr( "iteration value must be positive" );
+			    $$ = $1;
+			    }
+
+			else
+			    $$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
+			}
+
+		|  '.'
+			{
+			if ( ! madeany )
+			    {
+			    /* create the '.' character class */
+			    anyccl = cclinit();
+			    ccladd( anyccl, '\n' );
+			    cclnegate( anyccl );
+
+			    if ( useecs )
+				mkeccl( ccltbl + cclmap[anyccl],
+					ccllen[anyccl], nextecm,
+					ecgroup, CSIZE );
+			    
+			    madeany = true;
+			    }
+
+			++rulelen;
+
+			$$ = mkstate( -anyccl );
+			}
+
+		|  fullccl
+			{
+			if ( ! cclsorted )
+			    /* sort characters for fast searching.  We use a
+			     * shell sort since this list could be large.
+			     */
+			    cshell( ccltbl + cclmap[$1], ccllen[$1] );
+
+			if ( useecs )
+			    mkeccl( ccltbl + cclmap[$1], ccllen[$1],
+				    nextecm, ecgroup, CSIZE );
+				     
+			++rulelen;
+
+			$$ = mkstate( -$1 );
+			}
+
+		|  PREVCCL
+			{
+			++rulelen;
+
+			$$ = mkstate( -$1 );
+			}
+
+		|  '"' string '"'
+			{ $$ = $2; }
+
+		|  '(' re ')'
+			{ $$ = $2; }
+
+		|  CHAR
+			{
+			++rulelen;
+
+			if ( $1 == '\0' )
+			    synerr( "null in rule" );
+
+			if ( caseins && $1 >= 'A' && $1 <= 'Z' )
+			    $1 = clower( $1 );
+
+			$$ = mkstate( $1 );
+			}
+		;
+
+fullccl		:  '[' ccl ']'
+			{ $$ = $2; }
+
+		|  '[' '^' ccl ']'
+			{
+			/* *Sigh* - to be compatible Unix lex, negated ccls
+			 * match newlines
+			 */
+#ifdef NOTDEF
+			ccladd( $3, '\n' ); /* negated ccls don't match '\n' */
+			cclsorted = false; /* because we added the newline */
+#endif
+			cclnegate( $3 );
+			$$ = $3;
+			}
+		;
+
+ccl             :  ccl CHAR '-' CHAR
+                        {
+			if ( $2 > $4 )
+			    synerr( "negative range in character class" );
+
+			else
+			    {
+			    if ( caseins )
+				{
+				if ( $2 >= 'A' && $2 <= 'Z' )
+				    $2 = clower( $2 );
+				if ( $4 >= 'A' && $4 <= 'Z' )
+				    $4 = clower( $4 );
+				}
+
+			    for ( i = $2; i <= $4; ++i )
+			        ccladd( $1, i );
+
+			    /* keep track if this ccl is staying in alphabetical
+			     * order
+			     */
+			    cclsorted = cclsorted && ($2 > lastchar);
+			    lastchar = $4;
+			    }
+			
+			$$ = $1;
+			}
+
+		|  ccl CHAR
+		        {
+			if ( caseins )
+			    if ( $2 >= 'A' && $2 <= 'Z' )
+				$2 = clower( $2 );
+
+			ccladd( $1, $2 );
+			cclsorted = cclsorted && ($2 > lastchar);
+			lastchar = $2;
+			$$ = $1;
+			}
+
+		|
+			{
+			cclsorted = true;
+			lastchar = 0;
+			$$ = cclinit();
+			}
+		;
+
+string		:  string CHAR
+                        {
+			if ( caseins )
+			    if ( $2 >= 'A' && $2 <= 'Z' )
+				$2 = clower( $2 );
+
+			++rulelen;
+
+			$$ = link_machines( $1, mkstate( $2 ) );
+			}
+
+		|
+			{ $$ = mkstate( SYM_EPSILON ); }
+		;
+
+%%
+
+/* synerr - report a syntax error
+ *
+ * synopsis
+ *    char str[];
+ *    synerr( str );
+ */
+
+synerr( str )
+char str[];
+
+    {
+    syntaxerror = true;
+    fprintf( stderr, "Syntax error at line %d:  %s\n", linenum, str );
+    }
+
+
+/* yyerror - eat up an error message from the parser
+ *
+ * synopsis
+ *    char msg[];
+ *    yyerror( msg );
+ */
+
+yyerror( msg )
+char msg[];
+
+    {
+    }
diff --git a/scan.l b/scan.l
new file mode 100644
index 0000000..5f344dc
--- /dev/null
+++ b/scan.l
@@ -0,0 +1,370 @@
+/* flexscan.l - scanner for flex input */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+%{
+#include "flexdef.h"
+#include "strings.h"
+#include "y.tab.h"
+
+#undef YYDECL
+#define YYDECL \
+	int lexscan()
+
+#undef yywrap
+#define yywrap(result) \
+	{ \
+	if ( ! did_second_skelout ) \
+		skelout(); \
+	result = 1; \
+	}
+
+#define RETURNCHAR \
+	yylval = yytext[0]; \
+	return ( CHAR );
+
+#define RETURNNAME \
+	(void) strcpy( nmstr, yytext ); \
+	return ( NAME );
+
+#define PUT_BACK_STRING(str, start) \
+	for ( i = strlen( str ) - 1; i >= start; --i ) \
+	    unput(str[i])
+%}
+
+%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
+%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT
+%x ACTION_STRING PERCENT_BRACE_ACTION
+
+WS		[ \t]+
+
+OPTWS		[ \t]*
+
+NAME		[a-z_][a-z_0-9]*
+
+SCNAME		{NAME}
+
+ESCSEQ		\\([^^\n]|"^".|0[0-9]{1,3})
+
+%%
+    static int bracelevel;
+    int i, cclval;
+    char nmdef[MAXLINE], myesc();
+    static int didadef, did_second_skelout = false;
+
+^{WS}.*\n		++linenum; ECHO; /* indented code */
+^#.*\n			++linenum; ECHO; /* either a Ratfor comment or a CPP directive */
+^"/*"			ECHO; BEGIN(C_COMMENT);
+^"%s"(tart)?		return ( SCDECL );
+^"%x"			return ( XSCDECL );
+^"%{".*\n		++linenum; line_directive_out(); BEGIN(CODEBLOCK);
+{WS}			return ( WHITESPACE );
+
+^"%%".*			{
+			sectnum = 2;
+			skelout();
+			line_directive_out();
+			BEGIN(SECT2PROLOG);
+			return ( SECTEND );
+			}
+
+^"%"[^sx{%].*\n		{
+			fprintf( stderr,
+			     "old-style lex command at line %d ignored:\n\t%s",
+				 linenum, yytext );
+			++linenum;
+			}
+
+^{NAME}			{
+			(void) strcpy( nmstr, yytext );
+			didadef = false;
+			BEGIN(PICKUPDEF);
+			}
+
+{SCNAME}		RETURNNAME;
+^{OPTWS}\n		++linenum; /* allows blank lines in section 1 */
+\n			++linenum; return ( '\n' );
+.			synerr( "illegal character" ); BEGIN(RECOVER);
+
+
+<C_COMMENT>"*/"		ECHO; BEGIN(0);
+<C_COMMENT>"*/".*\n	++linenum; ECHO; BEGIN(0);
+<C_COMMENT>[^*\n]+	ECHO;
+<C_COMMENT>"*"		ECHO;
+<C_COMMENT>\n		++linenum; ECHO;
+
+<CODEBLOCK>^"%}".*\n	++linenum; BEGIN(0);
+<CODEBLOCK>.*\n		++linenum; ECHO;
+
+<PICKUPDEF>{WS}		/* separates name and definition */
+
+<PICKUPDEF>[^ \t\n].*	{
+			(void) strcpy( nmdef, yytext );
+
+			for ( i = strlen( nmdef ) - 1;
+			      i >= 0 &&
+			      nmdef[i] == ' ' || nmdef[i] == '\t';
+			      --i )
+			    ;
+
+			nmdef[i + 1] = '\0';
+
+                        ndinstal( nmstr, nmdef );
+			didadef = true;
+			}
+
+<PICKUPDEF>\n		{
+			if ( ! didadef )
+			    synerr( "incomplete name definition" );
+			BEGIN(0);
+			++linenum;
+			}
+
+<RECOVER>.*\n		++linenum; RETURNNAME;
+
+
+<SECT2PROLOG>.*\n/[^ \t\n]	{
+			++linenum;
+			ECHO;
+			skelout();
+			did_second_skelout = true;
+			BEGIN(SECT2);
+			}
+
+<SECT2PROLOG>.*\n	++linenum; ECHO;
+
+<SECT2>^{OPTWS}\n	++linenum; /* allow blank lines in section 2 */
+<SECT2>^{WS}.*\n	{
+			synerr( "indented code found outside of action" );
+			++linenum;
+			}
+<SECT2>"<"		BEGIN(SC); return ( '<' );
+<SECT2>^"^"		return ( '^' );
+<SECT2>\"		BEGIN(QUOTE); return ( '"' );
+<SECT2>"{"/[0-9]		BEGIN(NUM); return ( '{' );
+<SECT2>"{"[^0-9\n][^}\n]*	BEGIN(BRACEERROR);
+<SECT2>"$"/[ \t\n]	return ( '$' );
+
+<SECT2>{WS}"%{"		{
+			bracelevel = 1;
+			BEGIN(PERCENT_BRACE_ACTION);
+			return ( '\n' );
+			}
+<SECT2>{WS}"|".*\n	++linenum; return ( '\n' );
+
+<SECT2>{WS}	|
+<SECT2>{OPTWS}/\n	{
+			bracelevel = 0;
+			BEGIN(ACTION);
+			return ( '\n' );
+			}
+
+<SECT2>^{OPTWS}\n	++linenum; return ( '\n' );
+
+<SECT2>^"%%".*		{
+			/* guarentee that the SECT3 rule will have something
+			 * to match
+			 */
+			yyless(1);
+			sectnum = 3;
+			BEGIN(SECT3);
+			return ( EOF ); /* to stop the parser */
+			}
+
+<SECT2>"["([^\\\]\n]|{ESCSEQ})+"]"	{
+			(void) strcpy( nmstr, yytext );
+
+			/* check to see if we've already encountered this ccl */
+			if ( (cclval = ccllookup( nmstr )) )
+			    {
+			    yylval = cclval;
+			    ++cclreuse;
+			    return ( PREVCCL );
+			    }
+			else
+			    {
+			    /* we fudge a bit.  We know that this ccl will
+			     * soon be numbered as lastccl + 1 by cclinit
+			     */
+			    cclinstal( nmstr, lastccl + 1 );
+
+			    /* push back everything but the leading bracket
+			     * so the ccl can be rescanned
+			     */
+			    PUT_BACK_STRING(nmstr, 1);
+
+			    BEGIN(FIRSTCCL);
+			    return ( '[' );
+			    }
+			}
+
+<SECT2>"{"{NAME}"}"	{
+			register char *nmdefptr;
+			char *ndlookup();
+
+			(void) strcpy( nmstr, yytext );
+			nmstr[yyleng - 1] = '\0';  /* chop trailing brace */
+
+			/* lookup from "nmstr + 1" to chop leading brace */
+			if ( ! (nmdefptr = ndlookup( nmstr + 1 )) )
+			    synerr( "undefined {name}" );
+
+			else
+			    { /* push back name surrounded by ()'s */
+			    unput(')');
+			    PUT_BACK_STRING(nmdefptr, 0);
+			    unput('(');
+			    }
+			}
+
+<SECT2>[/|*+?.()]	return ( yytext[0] );
+<SECT2>.		RETURNCHAR;
+<SECT2>\n		++linenum; return ( '\n' );
+
+
+<SC>","			return ( ',' );
+<SC>">"			BEGIN(SECT2); return ( '>' );
+<SC>">"/"^"		BEGIN(CARETISBOL); return ( '>' );
+<SC>{SCNAME}		RETURNNAME;
+<SC>.			synerr( "bad start condition name" );
+
+<CARETISBOL>"^"		BEGIN(SECT2); return ( '^' );
+
+
+<QUOTE>[^"\n]		RETURNCHAR;
+<QUOTE>\"		BEGIN(SECT2); return ( '"' );
+
+<QUOTE>\n		{
+			synerr( "missing quote" );
+			BEGIN(SECT2);
+			++linenum;
+			return ( '"' );
+			}
+
+
+<FIRSTCCL>"^"/[^-\n]	BEGIN(CCL); return ( '^' );
+<FIRSTCCL>"^"/-		return ( '^' );
+<FIRSTCCL>-		BEGIN(CCL); yylval = '-'; return ( CHAR );
+<FIRSTCCL>.		BEGIN(CCL); RETURNCHAR;
+
+<CCL>-/[^\]\n]		return ( '-' );
+<CCL>[^\]\n]		RETURNCHAR;
+<CCL>"]"			BEGIN(SECT2); return ( ']' );
+
+
+<NUM>[0-9]+		{
+			yylval = myctoi( yytext );
+			return ( NUMBER );
+			}
+
+<NUM>","			return ( ',' );
+<NUM>"}"			BEGIN(SECT2); return ( '}' );
+
+<NUM>.			{
+			synerr( "bad character inside {}'s" );
+			BEGIN(SECT2);
+			return ( '}' );
+			}
+
+<NUM>\n			{
+			synerr( "missing }" );
+			BEGIN(SECT2);
+			++linenum;
+			return ( '}' );
+			}
+
+
+<BRACEERROR>"}"		synerr( "bad name in {}'s" ); BEGIN(SECT2);
+<BRACEERROR>\n		synerr( "missing }" ); ++linenum; BEGIN(SECT2);
+
+
+<PERCENT_BRACE_ACTION>{OPTWS}"%}".*	bracelevel = 0;
+<PERCENT_BRACE_ACTION>.*		ECHO;
+<PERCENT_BRACE_ACTION>\n		{
+			++linenum;
+			ECHO;
+			if ( bracelevel == 0 )
+			    {
+			    if ( genftl )
+				puts( "\tbreak;" );
+			    BEGIN(SECT2);
+			    }
+			}
+
+<ACTION>"{"		ECHO; ++bracelevel;
+<ACTION>"}"		ECHO; --bracelevel;
+<ACTION>[^{}"'/\n]+	ECHO;
+<ACTION>"/*"		ECHO; BEGIN(ACTION_COMMENT);
+<ACTION>"'"([^'\\\n]|\\.)*"'"	ECHO; /* character constant */
+<ACTION>\"		ECHO; BEGIN(ACTION_STRING);
+<ACTION>\n		{
+			++linenum;
+			ECHO;
+			if ( bracelevel == 0 )
+			    {
+			    if ( genftl )
+				puts( "\tbreak;" );
+			    BEGIN(SECT2);
+			    }
+			}
+<ACTION>.		ECHO;
+
+<ACTION_COMMENT>"*/"	ECHO; BEGIN(ACTION);
+<ACTION_COMMENT>[^*\n]+	ECHO;
+<ACTION_COMMENT>"*"	ECHO;
+<ACTION_COMMENT>\n	++linenum; ECHO;
+<ACTION_COMMENT>.	ECHO;
+
+<ACTION_STRING>[^"\\\n]+	ECHO;
+<ACTION_STRING>\\.	ECHO;
+<ACTION_STRING>\n	++linenum; ECHO;
+<ACTION_STRING>\"	ECHO; BEGIN(ACTION);
+<ACTION_STRING>.	ECHO;
+
+
+<SECT2,QUOTE,CCL>{ESCSEQ}	{
+			yylval = myesc( yytext );
+			return ( CHAR );
+			}
+
+<FIRSTCCL>{ESCSEQ}	{
+			yylval = myesc( yytext );
+			BEGIN(CCL);
+			return ( CHAR );
+			}
+
+
+<SECT3>.|\n		{
+			register int numchars;
+
+			/* black magic - we know the names of a lex scanner's
+			 * internal variables.  We cap the input buffer with
+			 * an end-of-string and dump it to the output.
+			 */
+			YYDOBEFORESCAN;	/* recover from setting up yytext */
+
+			yychbuf[yyebufp + 1] = '\0';
+
+			/* ignore the first character; it's the second '%'
+			 * put back by the yyless(1) above
+			 */
+			fputs( yychbuf + yycbufp + 1, stdout );
+
+			/* if we don't do this, the data written by write()
+			 * can get overwritten when stdout is finally flushed
+			 */
+			(void) fflush( stdout );
+
+			while ( (numchars = read( fileno(yyin), yychbuf,
+						  YYBUFMAX )) > 0 )
+			    (void) write( fileno(stdout), yychbuf, numchars );
+	
+			if ( numchars < 0 )
+			    lexerror( "fatal read error in section 3" );
+
+			return ( EOF );
+			}
+
+%%
diff --git a/sym.c b/sym.c
new file mode 100644
index 0000000..af50831
--- /dev/null
+++ b/sym.c
@@ -0,0 +1,291 @@
+/* lexsym - symbol table routines */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include "flexdef.h"
+
+struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE];
+struct hash_entry *sctbl[START_COND_HASH_SIZE];
+struct hash_entry *ccltab[CCL_HASH_SIZE];
+
+
+/* addsym - add symbol and definition to symbol table
+ *
+ * synopsis
+ *    char sym[], def[];
+ *    hash_table table;
+ *    int table_size;
+ *    -1/0 = addsym( sym, def, table, table_size );
+ *
+ * -1 is returned if the symbol already exists, and the change not made.
+ */
+
+int addsym( sym, def, table, table_size )
+register char sym[];
+char def[];
+hash_table table;
+int table_size;
+
+    {
+    int hash_val = hashfunct( sym, table_size );
+    register struct hash_entry *entry = table[hash_val];
+    register struct hash_entry *new_entry;
+    register struct hash_entry *successor;
+    char *malloc();
+
+    while ( entry )
+	{
+	if ( ! strcmp( sym, entry->name ) )
+	    { /* entry already exists */
+	    return ( -1 );
+	    }
+	
+	entry = entry->next;
+	}
+    
+    /* create new entry */
+    new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) );
+
+    if ( new_entry == NULL )
+	lexfatal( "symbol table memory allocation failed" );
+
+    if ( (successor = table[hash_val]) )
+	{
+	new_entry->next = successor;
+	successor->prev = new_entry;
+	}
+    else
+	new_entry->next = NULL;
+
+    new_entry->prev = NULL;
+    new_entry->name = sym;
+    new_entry->val = def;
+
+    table[hash_val] = new_entry;
+
+    return ( 0 );
+    }
+
+
+/* cclinstal - save the text of a character class
+ *
+ * synopsis
+ *    char ccltxt[];
+ *    int cclnum;
+ *    cclinstal( ccltxt, cclnum );
+ */
+cclinstal( ccltxt, cclnum )
+char ccltxt[];
+int cclnum;
+
+    {
+    /* we don't bother checking the return status because we are not called
+     * unless the symbol is new
+     */
+    char *copy_string();
+
+    (void) addsym( copy_string( ccltxt ), (char *) cclnum,
+		   ccltab, CCL_HASH_SIZE );
+    }
+
+
+/* ccllookup - lookup the number associated with character class text
+ *
+ * synopsis
+ *    char ccltxt[];
+ *    int ccllookup, cclval;
+ *    cclval/0 = ccllookup( ccltxt );
+ */
+int ccllookup( ccltxt )
+char ccltxt[];
+
+    {
+    char *getdef();
+
+    return ( (int) getdef( ccltxt, ccltab, CCL_HASH_SIZE ) );
+    }
+
+
+/* findsym - find symbol in symbol table
+ *
+ * synopsis
+ *    char sym[];
+ *    hash_table table;
+ *    int table_size;
+ *    struct hash_entry *entry, *findsym();
+ *    entry = findsym( sym, table, table_size );
+ */
+
+struct hash_entry *findsym( sym, table, table_size )
+register char sym[];
+hash_table table;
+int table_size;
+
+    {
+    register struct hash_entry *entry = table[hashfunct( sym, table_size )];
+
+    while ( entry )
+	{
+	if ( ! strcmp( sym, entry->name ) )
+	    return ( entry );
+	entry = entry->next;
+	}
+
+    return ( NULL );
+    }
+
+
+/* getdef - get symbol definition from symbol table
+ *
+ * synopsis
+ *    char sym[];
+ *    hash_table table;
+ *    int table_size;
+ *    char *def, *getdef();
+ *    def = getdef( sym, table, table_size );
+ */
+
+char *getdef( sym, table, table_size )
+register char sym[];
+hash_table table;
+int table_size;
+
+    {
+    register struct hash_entry *entry = findsym( sym, table, table_size );
+
+    if ( entry )
+	return ( entry->val );
+    
+    return ( NULL );
+    }
+
+    
+/* hashfunct - compute the hash value for "str" and hash size "hash_size"
+ *
+ * synopsis
+ *    char str[];
+ *    int hash_size, hash_val;
+ *    hash_val = hashfunct( str, hash_size );
+ */
+
+int hashfunct( str, hash_size )
+register char str[];
+int hash_size;
+
+    {
+    register int hashval;
+    register int locstr;
+
+    hashval = 0;
+    locstr = 0;
+
+    while ( str[locstr] )
+	hashval = ((hashval << 1) + str[locstr++]) % hash_size;
+
+    return ( hashval );
+    }
+
+
+/* ndinstal - install a name definition
+ *
+ * synopsis
+ *    char nd[], def[];
+ *    ndinstal( nd, def );
+ */
+ndinstal( nd, def )
+char nd[], def[];
+
+    {
+    char *copy_string();
+
+    if ( addsym( copy_string( nd ), copy_string( def ),
+		 ndtbl, NAME_TABLE_HASH_SIZE ) )
+	synerr( "name defined twice" );
+    }
+
+
+/* ndlookup - lookup a name definition
+ *
+ * synopsis
+ *    char nd[], *def;
+ *    char *ndlookup();
+ *    def/NULL = ndlookup( nd );
+ */
+char *ndlookup( nd )
+char nd[];
+
+    {
+    char *getdef();
+
+    return ( getdef( nd, ndtbl, NAME_TABLE_HASH_SIZE ) );
+    }
+
+
+/* scinstal - make a start condition
+ *
+ * synopsis
+ *    char str[];
+ *    int xcluflg;
+ *    scinstal( str, xcluflg );
+ *
+ * NOTE
+ *    the start condition is Exclusive if xcluflg is true
+ */
+scinstal( str, xcluflg )
+char str[];
+int xcluflg;
+
+    {
+    char *copy_string();
+
+    if ( genftl )
+	{
+	/* bit of a hack.  We know how the default start-condition is
+	 * declared, and don't put out a define for it, because it
+	 * would come out as "#define 0 1"
+	 */
+
+	if ( strcmp( str, "0" ) )
+	    printf( "#define %s %d\n", str, lastsc * 2 );
+	}
+
+    else
+	printf( "define(YYLEX_SC_%s,%d)\n", str, lastsc * 2 );
+
+    if ( ++lastsc >= current_max_scs )
+	{
+	current_max_scs += MAX_SCS_INCREMENT;
+
+	++num_reallocs;
+
+	scset = reallocate_integer_array( scset, current_max_scs );
+	scbol = reallocate_integer_array( scbol, current_max_scs );
+	scxclu = reallocate_integer_array( scxclu, current_max_scs );
+	actvsc = reallocate_integer_array( actvsc, current_max_scs );
+	}
+
+    if ( addsym( copy_string( str ), (char *) lastsc,
+	 sctbl, START_COND_HASH_SIZE ) )
+	lerrsf( "start condition %s declared twice", str );
+
+    scset[lastsc] = mkstate( SYM_EPSILON );
+    scbol[lastsc] = mkstate( SYM_EPSILON );
+    scxclu[lastsc] = xcluflg;
+    }
+
+
+/* sclookup - lookup the number associated with a start condition
+ *
+ * synopsis
+ *    char str[], scnum;
+ *    int sclookup;
+ *    scnum/0 = sclookup( str );
+ */
+int sclookup( str )
+char str[];
+
+    {
+    return ( (int) getdef( str, sctbl, START_COND_HASH_SIZE ) );
+    }
diff --git a/tblcmp.c b/tblcmp.c
new file mode 100644
index 0000000..ae9bfd7
--- /dev/null
+++ b/tblcmp.c
@@ -0,0 +1,1324 @@
+/* lexcmp - table compression routines */
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+#include "flexdef.h"
+
+/* bldtbl - build table entries for dfa state
+ *
+ * synopsis
+ *   int state[numecs], statenum, totaltrans, comstate, comfreq;
+ *   bldtbl( state, statenum, totaltrans, comstate, comfreq );
+ *
+ * State is the statenum'th dfa state.  It is indexed by equivalence class and
+ * gives the number of the state to enter for a given equivalence class.
+ * totaltrans is the total number of transitions out of the state.  Comstate
+ * is that state which is the destination of the most transitions out of State.
+ * Comfreq is how many transitions there are out of State to Comstate.
+ *
+ * A note on terminology:
+ *    "protos" are transition tables which have a high probability of
+ * either being redundant (a state processed later will have an identical
+ * transition table) or nearly redundant (a state processed later will have
+ * many of the same out-transitions).  A "most recently used" queue of
+ * protos is kept around with the hope that most states will find a proto
+ * which is similar enough to be usable, and therefore compacting the
+ * output tables.
+ *    "templates" are a special type of proto.  If a transition table is
+ * homogenous or nearly homogenous (all transitions go to the same destination)
+ * then the odds are good that future states will also go to the same destination
+ * state on basically the same character set.  These homogenous states are
+ * so common when dealing with large rule sets that they merit special
+ * attention.  If the transition table were simply made into a proto, then
+ * (typically) each subsequent, similar state will differ from the proto
+ * for two out-transitions.  One of these out-transitions will be that
+ * character on which the proto does not go to the common destination,
+ * and one will be that character on which the state does not go to the
+ * common destination.  Templates, on the other hand, go to the common
+ * state on EVERY transition character, and therefore cost only one
+ * difference.
+ */
+bldtbl( state, statenum, totaltrans, comstate, comfreq )
+int state[], statenum, totaltrans, comstate, comfreq;
+
+    {
+    int extptr, extrct[2][CSIZE + 1];
+    int mindiff, minprot, i, d;
+    int checkcom;
+
+    /* If extptr is 0 then the first array of extrct holds the result of the
+     * "best difference" to date, which is those transitions which occur in
+     * "state" but not in the proto which, to date, has the fewest differences
+     * between itself and "state".  If extptr is 1 then the second array of
+     * extrct hold the best difference.  The two arrays are toggled
+     * between so that the best difference to date can be kept around and
+     * also a difference just created by checking against a candidate "best"
+     * proto.
+     */
+
+    extptr = 0;
+
+    /* if the state has too few out-transitions, don't bother trying to
+     * compact its tables
+     */
+
+    if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
+	mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
+
+    else
+	{
+	/* checkcom is true if we should only check "state" against
+	 * protos which have the same "comstate" value
+	 */
+
+	checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
+
+	minprot = firstprot;
+	mindiff = totaltrans;
+
+	if ( checkcom )
+	    {
+	    /* find first proto which has the same "comstate" */
+	    for ( i = firstprot; i != NIL; i = protnext[i] )
+		if ( protcomst[i] == comstate )
+		    {
+		    minprot = i;
+		    mindiff = tbldiff( state, minprot, extrct[extptr] );
+		    break;
+		    }
+	    }
+
+	else
+	    {
+	    /* since we've decided that the most common destination out
+	     * of "state" does not occur with a high enough frequency,
+	     * we set the "comstate" to zero, assuring that if this state
+	     * is entered into the proto list, it will not be considered
+	     * a template.
+	     */
+	    comstate = 0;
+
+	    if ( firstprot != NIL )
+		{
+		minprot = firstprot;
+		mindiff = tbldiff( state, minprot, extrct[extptr] );
+		}
+	    }
+
+	/* we now have the first interesting proto in "minprot".  If
+	 * it matches within the tolerances set for the first proto,
+	 * we don't want to bother scanning the rest of the proto list
+	 * to see if we have any other reasonable matches.
+	 */
+
+	if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
+	    { /* not a good enough match.  Scan the rest of the protos */
+	    for ( i = minprot; i != NIL; i = protnext[i] )
+		{
+		d = tbldiff( state, i, extrct[1 - extptr] );
+		if ( d < mindiff )
+		    {
+		    extptr = 1 - extptr;
+		    mindiff = d;
+		    minprot = i;
+		    }
+		}
+	    }
+
+	/* check if the proto we've decided on as our best bet is close
+	 * enough to the state we want to match to be usable
+	 */
+
+	if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
+	    {
+	    /* no good.  If the state is homogeneous enough, we make a
+	     * template out of it.  Otherwise, we make a proto.
+	     */
+
+	    if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE )
+		mktemplate( state, statenum, comstate );
+
+	    else
+		{
+		mkprot( state, statenum, comstate );
+		mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
+		}
+	    }
+
+	else
+	    { /* use the proto */
+	    mkentry( extrct[extptr], numecs, statenum,
+		     prottbl[minprot], mindiff );
+
+	    /* if this state was sufficiently different from the proto
+	     * we built it from, make it, too, a proto
+	     */
+
+	    if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
+		mkprot( state, statenum, comstate );
+
+	    /* since mkprot added a new proto to the proto queue, it's possible
+	     * that "minprot" is no longer on the proto queue (if it happened
+	     * to have been the last entry, it would have been bumped off).
+	     * If it's not there, then the new proto took its physical place
+	     * (though logically the new proto is at the beginning of the
+	     * queue), so in that case the following call will do nothing.
+	     */
+
+	    mv2front( minprot );
+	    }
+	}
+    }
+
+
+/* cmptmps - compress template table entries
+ *
+ * synopsis
+ *    cmptmps();
+ *
+ *  template tables are compressed by using the 'template equivalence
+ *  classes', which are collections of transition character equivalence
+ *  classes which always appear together in templates - really meta-equivalence
+ *  classes.  until this point, the tables for templates have been stored
+ *  up at the top end of the nxt array; they will now be compressed and have
+ *  table entries made for them.
+ */
+cmptmps()
+
+    {
+    int tmpstorage[CSIZE + 1];
+    register int *tmp = tmpstorage, i, j;
+    int totaltrans, trans;
+
+    peakpairs = numtemps * numecs + tblend;
+
+    if ( usemecs )
+	{
+	/* create equivalence classes base on data gathered on template
+	 * transitions
+	 */
+
+	nummecs = cre8ecs( tecfwd, tecbck, numecs );
+	}
+    
+    else
+	nummecs = numecs;
+
+    if ( lastdfa + numtemps + 1 >= current_max_dfas )
+	increase_max_dfas();
+
+    /* loop through each template */
+
+    for ( i = 1; i <= numtemps; ++i )
+	{
+	totaltrans = 0;	/* number of non-jam transitions out of this template */
+
+	for ( j = 1; j <= numecs; ++j )
+	    {
+	    trans = tnxt[numecs * i + j];
+
+	    if ( usemecs )
+		{
+		/* the absolute value of tecbck is the meta-equivalence class
+		 * of a given equivalence class, as set up by cre8ecs
+		 */
+		if ( tecbck[j] > 0 )
+		    {
+		    tmp[tecbck[j]] = trans;
+
+		    if ( trans > 0 )
+			++totaltrans;
+		    }
+		}
+
+	    else
+		{
+		tmp[j] = trans;
+
+		if ( trans > 0 )
+		    ++totaltrans;
+		}
+	    }
+
+	/* it is assumed (in a rather subtle way) in the skeleton that
+	 * if we're using meta-equivalence classes, the def[] entry for
+	 * all templates is the jam template, i.e. templates never default
+	 * to other non-jam table entries (e.g. another template)
+	 */
+
+	/* leave room for the jam-state after the last real state */
+	mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
+	}
+    }
+
+
+
+/* expand_nxt_chk - expand the next check arrays */
+
+expand_nxt_chk()
+
+    {
+    register int old_max = current_max_xpairs;
+
+    current_max_xpairs += MAX_XPAIRS_INCREMENT;
+
+    ++num_reallocs;
+
+    nxt = reallocate_integer_array( nxt, current_max_xpairs );
+    chk = reallocate_integer_array( chk, current_max_xpairs );
+
+    bzero( (char *) (chk + old_max),
+	   MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) );
+    }
+
+
+/* gentabs - generate data statements for the transition tables
+ *
+ * synopsis
+ *    gentabs();
+ */
+gentabs()
+
+    {
+    int i, j, k, numrows, *accset, nacc, *acc_array;
+    char clower();
+
+    /* *everything* is done in terms of arrays starting at 1, so provide
+     * a null entry for the zero element of all FTL arrays
+     */
+    static char ftl_long_decl[] = "static long int %c[%d] =\n    {   0,\n";
+    static char ftl_short_decl[] = "static short int %c[%d] =\n    {   0,\n";
+    static char ftl_char_decl[] = "static char %c[%d] =\n    {   0,\n";
+
+    acc_array = allocate_integer_array( current_max_dfas );
+    nummt = 0;
+
+    if ( fulltbl )
+	jambase = lastdfa + 1;	/* home of "jam" pseudo-state */
+
+    printf( "#define YYJAM %d\n", jamstate );
+    printf( "#define YYJAMBASE %d\n", jambase );
+
+    if ( usemecs )
+	printf( "#define YYTEMPLATE %d\n", lastdfa + 2 );
+
+#ifdef NOTDEF
+/* unsupported code */
+    if ( ! genftl )
+	{ /* ratfor scanner */
+	static char vardata[] = "%%%sdata %s/%d/\n";
+	static char dindent[] = DATAINDENTSTR;
+	static char arydecl[] = "integer %c(%d)\n";
+	static char ary2decl[] = "integer %c(%d,%d)\n";
+
+	skelout();
+
+	if ( reject )
+	    {
+	    /* write out the pointers into the accepting lists for each state,
+	     * and the accepting lists
+	     */
+
+	    /* alist needs to be lastdfa + 2 because we tell where a state's
+	     * accepting list ends by checking the beginning of the next state,
+	     * and there's an entry in alist for the default, "jam" pseudo-state
+	     * (this latter entry is needed because states jam by making
+	     * a transition to the state; see the flex skeleton.  By the way,
+	     * I *think* we could get rid of the jam state entirely by
+	     * slight modification of the skeleton ...)
+	     */
+
+	    printf( arydecl, ALIST, lastdfa + 2 );
+
+	    printf( arydecl, ACCEPT, max( numas, 1 ) );
+	    }
+
+	else
+	    printf( arydecl, ALIST, lastdfa + 1 );
+
+	if ( useecs )
+	    printf( arydecl, ECARRAY, CSIZE );
+	if ( usemecs )
+	    printf( arydecl, MATCHARRAY, numecs );
+
+	if ( fulltbl )
+	    {
+	    printf( ary2decl, NEXTARRAY, lastdfa, numecs );
+	    printf( vardata, dindent, "yyjam", 0 );
+	    }
+
+	else
+	    {
+	    printf( arydecl, BASEARRAY, lastdfa + numtemps );
+	    printf( arydecl, DEFARRAY, lastdfa + numtemps );
+	    printf( arydecl, NEXTARRAY, tblend );
+	    printf( arydecl, CHECKARRAY, tblend );
+
+	    printf( vardata, dindent, "yyjam", jambase );
+
+	    /* the first template begins right after the default jam table,
+	     * which itself begins right after the last dfa
+	     */
+
+	    printf( vardata, dindent, "yytmp", lastdfa + 2 );
+	    }
+	}
+#endif NOTDEF
+
+    if ( reject )
+	{
+	/* write out accepting list and pointer list
+	 * first we generate the ACCEPT array.  In the process, we compute
+	 * the indices that will go into the ALIST array, and save the
+	 * indices in the dfaacc array
+	 */
+
+	if ( genftl )
+	    printf( accnum > 127 ? ftl_short_decl : ftl_char_decl,
+		    ACCEPT, max( numas, 1 ) + 1 );
+
+	j = 1;	/* index into ACCEPT array */
+
+	for ( i = 1; i <= lastdfa; ++i )
+	    {
+	    acc_array[i] = j;
+
+	    if ( accsiz[i] != 0 )
+		{
+		accset = dfaacc[i];
+		nacc = accsiz[i];
+
+		if ( trace )
+		    fprintf( stderr, "state # %d accepts: ", i );
+
+		for ( k = 1; k <= nacc; ++k )
+		    {
+		    mkdata( ACCEPT, j++, accset[k] );
+
+		    if ( trace )
+			{
+			fprintf( stderr, "[%d]", accset[k] );
+
+			if ( k < nacc )
+			    fputs( ", ", stderr );
+			else
+			    putc( '\n', stderr );
+			}
+		    }
+		}
+	    }
+
+	/* add accepting number for the "jam" state */
+	acc_array[i] = j;
+
+	dataend();
+	}
+    
+    else
+	{
+	for ( i = 1; i <= lastdfa; ++i )
+	    acc_array[i] = (int) dfaacc[i];
+	
+	acc_array[i] = 0; /* add (null) accepting number for jam state */
+	}
+
+    /* spit out ALIST array.  If we're doing "reject", it'll be pointers
+     * into the ACCEPT array.  Otherwise it's actual accepting numbers.
+     * In either case, we just dump the numbers.
+     */
+
+    if ( genftl )
+	{
+	/* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays
+	 * beginning at 0 and for "jam" state
+	 */
+	k = lastdfa + 2;
+
+	if ( reject )
+	    /* we put a "cap" on the table associating lists of accepting
+	     * numbers with state numbers.  This is needed because we tell
+	     * where the end of an accepting list is by looking at where
+	     * the list for the next state starts.
+	     */
+	    ++k;
+
+	printf( ((reject && numas > 126) || accnum > 127) ?
+		ftl_short_decl : ftl_char_decl, ALIST, k );
+	}
+
+    for ( i = 1; i <= lastdfa; ++i )
+	{
+	mkdata( ALIST, i, acc_array[i] );
+
+	if ( ! reject && trace && acc_array[i] )
+	    fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] );
+	}
+
+    /* add entry for "jam" state */
+    mkdata( ALIST, i, acc_array[i] );
+
+    if ( reject )
+	/* add "cap" for the list */
+	mkdata( ALIST, i + 1, acc_array[i] );
+
+    dataend();
+
+    if ( useecs )
+	{
+	/* write out equivalence classes */
+
+	if ( genftl )
+	    printf( ftl_char_decl, ECARRAY, CSIZE + 1 );
+
+	for ( i = 1; i <= CSIZE; ++i )
+	    {
+	    if ( caseins && (i >= 'A') && (i <= 'Z') )
+		ecgroup[i] = ecgroup[clower( i )];
+
+	    ecgroup[i] = abs( ecgroup[i] );
+	    mkdata( ECARRAY, i, ecgroup[i] );
+	    }
+
+	dataend();
+
+	if ( trace )
+	    {
+	    fputs( "\n\nEquivalence Classes:\n\n", stderr );
+
+	    numrows = (CSIZE + 1) / 8;
+
+	    for ( j = 1; j <= numrows; ++j )
+		{
+		for ( i = j; i <= CSIZE; i = i + numrows )
+		    {
+		    if ( i >= 1 && i <= 31 )
+			fprintf( stderr, "^%c = %-2d",
+				 'A' + i - 1, ecgroup[i] );
+
+		    else if ( i >= 32 && i <= 126 )
+			fprintf( stderr, " %c = %-2d", i, ecgroup[i] );
+
+		    else if ( i == 127 )
+			fprintf( stderr, "^@ = %-2d", ecgroup[i] );
+
+		    else
+			fprintf( stderr, "\nSomething Weird: %d = %d\n", i,
+				 ecgroup[i] );
+
+		    putc( '\t', stderr );
+		    }
+
+		putc( '\n', stderr );
+		}
+	    }
+	}
+
+    if ( usemecs )
+	{
+	/* write out meta-equivalence classes (used to index templates with) */
+
+	if ( trace )
+	    fputs( "\n\nMeta-Equivalence Classes:\n", stderr );
+
+	if ( genftl )
+	    printf( ftl_char_decl, MATCHARRAY, numecs + 1 );
+
+	for ( i = 1; i <= numecs; ++i )
+	    {
+	    if ( trace )
+		fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) );
+
+	    mkdata( MATCHARRAY, i, abs( tecbck[i] ) );
+	    }
+
+	dataend();
+	}
+
+    if ( ! fulltbl )
+	{
+	int total_states = lastdfa + numtemps;
+
+	if ( genftl )
+	    printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl,
+		    BASEARRAY, total_states + 1 );
+
+	for ( i = 1; i <= lastdfa; ++i )
+	    {
+	    register int d = def[i];
+
+	    if ( base[i] == JAMSTATE )
+		base[i] = jambase;
+
+	    if ( d == JAMSTATE )
+		def[i] = jamstate;
+
+	    else if ( d < 0 )
+		{
+		/* template reference */
+		++tmpuses;
+		def[i] = lastdfa - d + 1;
+		}
+
+	    mkdata( BASEARRAY, i, base[i] );
+	    }
+
+	/* generate jam state's base index */
+	mkdata( BASEARRAY, i, base[i] );
+
+	for ( ++i /* skip jam state */; i <= total_states; ++i )
+	    {
+	    mkdata( BASEARRAY, i, base[i] );
+	    def[i] = jamstate;
+	    }
+
+	dataend();
+
+	if ( genftl )
+	    printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl,
+		    DEFARRAY, total_states + 1 );
+
+	for ( i = 1; i <= total_states; ++i )
+	    mkdata( DEFARRAY, i, def[i] );
+
+	dataend();
+
+	if ( genftl )
+	    printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl,
+		    NEXTARRAY, tblend + 1 );
+
+	for ( i = 1; i <= tblend; ++i )
+	    {
+	    if ( nxt[i] == 0 )
+		nxt[i] = jamstate;	/* new state is the JAM state */
+
+	    mkdata( NEXTARRAY, i, nxt[i] );
+	    }
+
+	dataend();
+
+	if ( genftl )
+	    printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl,
+		    CHECKARRAY, tblend + 1 );
+
+	for ( i = 1; i <= tblend; ++i )
+	    {
+	    if ( chk[i] == 0 )
+		++nummt;
+
+	    mkdata( CHECKARRAY, i, chk[i] );
+	    }
+
+	dataend();
+	}
+
+    skelout();
+
+    /* copy remainder of input to output */
+
+    line_directive_out();
+    (void) lexscan(); /* copy remainder of input to output */
+    }
+
+
+/* inittbl - initialize transition tables
+ *
+ * synopsis
+ *   inittbl();
+ *
+ * Initializes "firstfree" to be one beyond the end of the table.  Initializes
+ * all "chk" entries to be zero.  Note that templates are built in their
+ * own tbase/tdef tables.  They are shifted down to be contiguous
+ * with the non-template entries during table generation.
+ */
+inittbl()
+
+    {
+    register int i;
+
+    bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) );
+
+    tblend = 0;
+    firstfree = tblend + 1;
+    numtemps = 0;
+
+    if ( usemecs )
+	{
+	/* set up doubly-linked meta-equivalence classes
+	 * these are sets of equivalence classes which all have identical
+	 * transitions out of TEMPLATES
+	 */
+
+	tecbck[1] = NIL;
+
+	for ( i = 2; i <= numecs; ++i )
+	    {
+	    tecbck[i] = i - 1;
+	    tecfwd[i - 1] = i;
+	    }
+
+	tecfwd[numecs] = NIL;
+	}
+    }
+
+
+/* mkdeftbl - make the default, "jam" table entries
+ *
+ * synopsis
+ *   mkdeftbl();
+ */
+mkdeftbl()
+
+    {
+    int i;
+
+    jamstate = lastdfa + 1;
+
+    if ( tblend + numecs > current_max_xpairs )
+	expand_nxt_chk();
+
+    for ( i = 1; i <= numecs; ++i )
+	{
+	nxt[tblend + i] = 0;
+	chk[tblend + i] = jamstate;
+	}
+
+    jambase = tblend;
+
+    base[jamstate] = jambase;
+
+    /* should generate a run-time array bounds check if
+     * ever used as a default
+     */
+    def[jamstate] = BAD_SUBSCRIPT;
+
+    tblend += numecs;
+    ++numtemps;
+    }
+
+
+/* mkentry - create base/def and nxt/chk entries for transition array
+ *
+ * synopsis
+ *   int state[numchars + 1], numchars, statenum, deflink, totaltrans;
+ *   mkentry( state, numchars, statenum, deflink, totaltrans );
+ *
+ * "state" is a transition array "numchars" characters in size, "statenum"
+ * is the offset to be used into the base/def tables, and "deflink" is the
+ * entry to put in the "def" table entry.  If "deflink" is equal to
+ * "JAMSTATE", then no attempt will be made to fit zero entries of "state"
+ * (i.e. jam entries) into the table.  It is assumed that by linking to
+ * "JAMSTATE" they will be taken care of.  In any case, entries in "state"
+ * marking transitions to "SAME_TRANS" are treated as though they will be
+ * taken care of by whereever "deflink" points.  "totaltrans" is the total
+ * number of transitions out of the state.  If it is below a certain threshold,
+ * the tables are searched for an interior spot that will accomodate the
+ * state array.
+ */
+mkentry( state, numchars, statenum, deflink, totaltrans )
+register int *state;
+int numchars, statenum, deflink, totaltrans;
+
+    {
+    register int minec, maxec, i, baseaddr;
+    int tblbase, tbllast;
+
+    if ( totaltrans == 0 )
+	{ /* there are no out-transitions */
+	if ( deflink == JAMSTATE )
+	    base[statenum] = JAMSTATE;
+	else
+	    base[statenum] = 0;
+
+	def[statenum] = deflink;
+	return;
+	}
+
+    for ( minec = 1; minec <= numchars; ++minec )
+	{
+	if ( state[minec] != SAME_TRANS )
+	    if ( state[minec] != 0 || deflink != JAMSTATE )
+		break;
+	}
+
+    if ( totaltrans == 1 )
+	{
+	/* there's only one out-transition.  Save it for later to fill
+	 * in holes in the tables.
+	 */
+	stack1( statenum, minec, state[minec], deflink );
+	return;
+	}
+
+    for ( maxec = numchars; maxec > 0; --maxec )
+	{
+	if ( state[maxec] != SAME_TRANS )
+	    if ( state[maxec] != 0 || deflink != JAMSTATE )
+		break;
+	}
+
+    /* Whether we try to fit the state table in the middle of the table
+     * entries we have already generated, or if we just take the state
+     * table at the end of the nxt/chk tables, we must make sure that we
+     * have a valid base address (i.e. non-negative).  Note that not only are
+     * negative base addresses dangerous at run-time (because indexing the
+     * next array with one and a low-valued character might generate an
+     * array-out-of-bounds error message), but at compile-time negative
+     * base addresses denote TEMPLATES.
+     */
+
+    /* find the first transition of state that we need to worry about. */
+    if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
+	{ /* attempt to squeeze it into the middle of the tabls */
+	baseaddr = firstfree;
+
+	while ( baseaddr < minec )
+	    {
+	    /* using baseaddr would result in a negative base address below
+	     * find the next free slot
+	     */
+	    for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
+		;
+	    }
+
+	if ( baseaddr + maxec - minec >= current_max_xpairs )
+	    expand_nxt_chk();
+
+	for ( i = minec; i <= maxec; ++i )
+	    if ( state[i] != SAME_TRANS )
+		if ( state[i] != 0 || deflink != JAMSTATE )
+		    if ( chk[baseaddr + i - minec] != 0 )
+			{ /* baseaddr unsuitable - find another */
+			for ( ++baseaddr;
+			      baseaddr < current_max_xpairs &&
+			      chk[baseaddr] != 0;
+			      ++baseaddr )
+			    ;
+
+			if ( baseaddr + maxec - minec >= current_max_xpairs )
+			    expand_nxt_chk();
+
+			/* reset the loop counter so we'll start all
+			 * over again next time it's incremented
+			 */
+
+			i = minec - 1;
+			}
+	}
+
+    else
+	{
+	/* ensure that the base address we eventually generate is
+	 * non-negative
+	 */
+	baseaddr = max( tblend + 1, minec );
+	}
+
+    tblbase = baseaddr - minec;
+    tbllast = tblbase + maxec;
+
+    if ( tbllast >= current_max_xpairs )
+	expand_nxt_chk();
+
+    base[statenum] = tblbase;
+    def[statenum] = deflink;
+
+    for ( i = minec; i <= maxec; ++i )
+	if ( state[i] != SAME_TRANS )
+	    if ( state[i] != 0 || deflink != JAMSTATE )
+		{
+		nxt[tblbase + i] = state[i];
+		chk[tblbase + i] = statenum;
+		}
+
+    if ( baseaddr == firstfree )
+	/* find next free slot in tables */
+	for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
+	    ;
+
+    tblend = max( tblend, tbllast );
+    }
+
+
+/* mk1tbl - create table entries for a state (or state fragment) which
+ *            has only one out-transition
+ *
+ * synopsis
+ *   int state, sym, onenxt, onedef;
+ *   mk1tbl( state, sym, onenxt, onedef );
+ */
+mk1tbl( state, sym, onenxt, onedef )
+int state, sym, onenxt, onedef;
+
+    {
+    if ( firstfree < sym )
+	firstfree = sym;
+
+    while ( chk[firstfree] != 0 )
+	if ( ++firstfree >= current_max_xpairs )
+	    expand_nxt_chk();
+
+    base[state] = firstfree - sym;
+    def[state] = onedef;
+    chk[firstfree] = state;
+    nxt[firstfree] = onenxt;
+
+    if ( firstfree > tblend )
+	{
+	tblend = firstfree++;
+
+	if ( firstfree >= current_max_xpairs )
+	    expand_nxt_chk();
+	}
+    }
+
+
+/* mkprot - create new proto entry
+ *
+ * synopsis
+ *   int state[], statenum, comstate;
+ *   mkprot( state, statenum, comstate );
+ */
+mkprot( state, statenum, comstate )
+int state[], statenum, comstate;
+
+    {
+    int i, slot, tblbase;
+
+    if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
+	{
+	/* gotta make room for the new proto by dropping last entry in
+	 * the queue
+	 */
+	slot = lastprot;
+	lastprot = protprev[lastprot];
+	protnext[lastprot] = NIL;
+	}
+
+    else
+	slot = numprots;
+
+    protnext[slot] = firstprot;
+
+    if ( firstprot != NIL )
+	protprev[firstprot] = slot;
+
+    firstprot = slot;
+    prottbl[slot] = statenum;
+    protcomst[slot] = comstate;
+
+    /* copy state into save area so it can be compared with rapidly */
+    tblbase = numecs * (slot - 1);
+
+    for ( i = 1; i <= numecs; ++i )
+	protsave[tblbase + i] = state[i];
+    }
+
+
+/* mktemplate - create a template entry based on a state, and connect the state
+ *              to it
+ *
+ * synopsis
+ *   int state[], statenum, comstate, totaltrans;
+ *   mktemplate( state, statenum, comstate, totaltrans );
+ */
+mktemplate( state, statenum, comstate )
+int state[], statenum, comstate;
+
+    {
+    int i, numdiff, tmpbase, tmp[CSIZE + 1];
+    char transset[CSIZE + 1];
+    int tsptr;
+
+    ++numtemps;
+
+    tsptr = 0;
+
+    /* calculate where we will temporarily store the transition table
+     * of the template in the tnxt[] array.  The final transition table
+     * gets created by cmptmps()
+     */
+
+    tmpbase = numtemps * numecs;
+
+    if ( tmpbase + numecs >= current_max_template_xpairs )
+	{
+	current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
+
+	++num_reallocs;
+
+	tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs );
+	}
+
+    for ( i = 1; i <= numecs; ++i )
+	if ( state[i] == 0 )
+	    tnxt[tmpbase + i] = 0;
+	else
+	    {
+	    transset[tsptr++] = i;
+	    tnxt[tmpbase + i] = comstate;
+	    }
+
+    if ( usemecs )
+	mkeccl( transset, tsptr, tecfwd, tecbck, numecs );
+
+    mkprot( tnxt + tmpbase, -numtemps, comstate );
+
+    /* we rely on the fact that mkprot adds things to the beginning
+     * of the proto queue
+     */
+
+    numdiff = tbldiff( state, firstprot, tmp );
+    mkentry( tmp, numecs, statenum, -numtemps, numdiff );
+    }
+
+
+/* mv2front - move proto queue element to front of queue
+ *
+ * synopsis
+ *   int qelm;
+ *   mv2front( qelm );
+ */
+mv2front( qelm )
+int qelm;
+
+    {
+    if ( firstprot != qelm )
+	{
+	if ( qelm == lastprot )
+	    lastprot = protprev[lastprot];
+
+	protnext[protprev[qelm]] = protnext[qelm];
+
+	if ( protnext[qelm] != NIL )
+	    protprev[protnext[qelm]] = protprev[qelm];
+
+	protprev[qelm] = NIL;
+	protnext[qelm] = firstprot;
+	protprev[firstprot] = qelm;
+	firstprot = qelm;
+	}
+    }
+
+
+/* ntod - convert an ndfa to a dfa
+ *
+ * synopsis
+ *    ntod();
+ *
+ *  creates the dfa corresponding to the ndfa we've constructed.  the
+ *  dfa starts out in state #1.
+ */
+ntod()
+
+    {
+    int *accset, ds, nacc, newds;
+    int duplist[CSIZE + 1], sym, hashval, numstates, dsize;
+    int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1];
+    int *nset, *dset;
+    int targptr, totaltrans, i, comstate, comfreq, targ;
+    int *epsclosure(), snstods(), symlist[CSIZE + 1];
+
+    accset = allocate_integer_array( accnum + 1 );
+    nset = allocate_integer_array( current_max_dfa_size );
+
+    todo_head = todo_next = 0;
+
+#define ADD_QUEUE_ELEMENT(element) \
+	if ( ++element >= current_max_dfas ) \
+	    { /* check for queue overflowing */ \
+	    if ( todo_head == 0 ) \
+		increase_max_dfas(); \
+	    else \
+		element = 0; \
+	    }
+
+#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1))
+
+    for ( i = 0; i <= CSIZE; ++i )
+	{
+	duplist[i] = NIL;
+	symlist[i] = false;
+	}
+
+    for ( i = 0; i <= accnum; ++i )
+	accset[i] = NIL;
+
+    if ( trace )
+	{
+	dumpnfa( scset[1] );
+	fputs( "\n\nDFA Dump:\n\n", stderr );
+	}
+
+    inittbl();
+
+    if ( genftl )
+	skelout();
+
+    if ( fulltbl )
+	{
+	if ( genftl )
+	    {
+	    /* declare it "short" because it's a real long-shot that that
+	     * won't be large enough
+	     */
+	    printf( "static short int %c[][%d] =\n    {\n", NEXTARRAY,
+		    numecs + 1 );
+
+	    /* generate 0 entries for state #0 */
+	    for ( i = 0; i <= numecs; ++i )
+		mk2data( NEXTARRAY, 0, 0, 0 );
+
+	    /* force ',' and dataflush() next call to mk2data */
+	    datapos = NUMDATAITEMS;
+
+	    /* force extra blank line next dataflush() */
+	    dataline = NUMDATALINES;
+	    }
+	}
+
+    /* create the first states */
+
+    for ( i = 1; i <= lastsc * 2; ++i )
+	{
+	numstates = 1;
+
+	/* for each start condition, make one state for the case when
+	 * we're at the beginning of the line (the '%' operator) and
+	 * one for the case when we're not
+	 */
+	if ( i % 2 == 1 )
+	    nset[numstates] = scset[(i / 2) + 1];
+	else
+	    nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
+
+	nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
+
+	if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
+	    {
+	    numas = numas + nacc;
+	    totnst = totnst + numstates;
+
+	    todo[todo_next] = ds;
+	    ADD_QUEUE_ELEMENT(todo_next);
+	    }
+	}
+
+    while ( todo_head != todo_next )
+	{
+	targptr = 0;
+	totaltrans = 0;
+
+	for ( i = 1; i <= numecs; ++i )
+	    state[i] = 0;
+
+	ds = todo[todo_head];
+	todo_head = NEXT_QUEUE_ELEMENT(todo_head);
+
+	dset = dss[ds];
+	dsize = dfasiz[ds];
+
+	if ( trace )
+	    fprintf( stderr, "state # %d:\n", ds );
+
+	sympartition( dset, dsize, symlist, duplist );
+
+	for ( sym = 1; sym <= numecs; ++sym )
+	    {
+	    if ( symlist[sym] )
+		{
+		symlist[sym] = 0;
+
+		if ( duplist[sym] == NIL )
+		    { /* symbol has unique out-transitions */
+		    numstates = symfollowset( dset, dsize, sym, nset );
+		    nset = epsclosure( nset, &numstates, accset,
+				       &nacc, &hashval );
+
+		    if ( snstods( nset, numstates, accset,
+				  nacc, hashval, &newds ) )
+			{
+			totnst = totnst + numstates;
+			todo[todo_next] = newds;
+			ADD_QUEUE_ELEMENT(todo_next);
+			numas = numas + nacc;
+			}
+
+		    state[sym] = newds;
+
+		    if ( trace )
+			fprintf( stderr, "\t%d\t%d\n", sym, newds );
+
+		    targfreq[++targptr] = 1;
+		    targstate[targptr] = newds;
+		    ++numuniq;
+		    }
+
+		else
+		    {
+		    /* sym's equivalence class has the same transitions
+		     * as duplist(sym)'s equivalence class
+		     */
+		    targ = state[duplist[sym]];
+		    state[sym] = targ;
+
+		    if ( trace )
+			fprintf( stderr, "\t%d\t%d\n", sym, targ );
+
+		    /* update frequency count for destination state */
+
+		    i = 0;
+		    while ( targstate[++i] != targ )
+			;
+
+		    ++targfreq[i];
+		    ++numdup;
+		    }
+
+		++totaltrans;
+		duplist[sym] = NIL;
+		}
+	    }
+
+	numsnpairs = numsnpairs + totaltrans;
+
+	if ( caseins && ! useecs )
+	    {
+	    register int j;
+
+	    for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
+		state[i] = state[j];
+	    }
+
+	if ( fulltbl )
+	    {
+	    if ( genftl )
+		{
+		/* supply array's 0-element */
+		mk2data( NEXTARRAY, 0, 0, 0 );
+
+		for ( i = 1; i <= numecs; ++i )
+		    mk2data( NEXTARRAY, 0, 0, state[i] );
+
+		/* force ',' and dataflush() next call to mk2data */
+		datapos = NUMDATAITEMS;
+
+		/* force extra blank line next dataflush() */
+		dataline = NUMDATALINES;
+		}
+
+	    else
+		{
+		for ( i = 1; i <= numecs; ++i )
+		    mk2data( NEXTARRAY, ds, i, state[i] );
+		}
+	    }
+
+	else
+	    {
+	    /* determine which destination state is the most common, and
+	     * how many transitions to it there are
+	     */
+
+	    comfreq = 0;
+	    comstate = 0;
+
+	    for ( i = 1; i <= targptr; ++i )
+		if ( targfreq[i] > comfreq )
+		    {
+		    comfreq = targfreq[i];
+		    comstate = targstate[i];
+		    }
+
+	    bldtbl( state, ds, totaltrans, comstate, comfreq );
+	    }
+	}
+
+    if ( fulltbl )
+	dataend();
+
+    else
+	{
+	cmptmps();  /* create compressed template entries */
+
+	/* create tables for all the states with only one out-transition */
+	while ( onesp > 0 )
+	    {
+	    mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
+		    onedef[onesp] );
+	    --onesp;
+	    }
+
+	mkdeftbl();
+	}
+    }
+
+
+/* stack1 - save states with only one out-transition to be processed later
+ *
+ * synopsis
+ *   int statenum, sym, nextstate, deflink;
+ *   stack1( statenum, sym, nextstate, deflink );
+ *
+ * if there's room for another state one the "one-transition" stack, the
+ * state is pushed onto it, to be processed later by mk1tbl.  If there's
+ * no room, we process the sucker right now.
+ */
+stack1( statenum, sym, nextstate, deflink )
+int statenum, sym, nextstate, deflink;
+
+    {
+    if ( onesp >= ONE_STACK_SIZE )
+	mk1tbl( statenum, sym, nextstate, deflink );
+
+    else
+	{
+	++onesp;
+	onestate[onesp] = statenum;
+	onesym[onesp] = sym;
+	onenext[onesp] = nextstate;
+	onedef[onesp] = deflink;
+	}
+    }
+
+
+/* tbldiff - compute differences between two state tables
+ *
+ * synopsis
+ *   int state[], pr, ext[];
+ *   int tbldiff, numdifferences;
+ *   numdifferences = tbldiff( state, pr, ext )
+ *
+ * "state" is the state array which is to be extracted from the pr'th
+ * proto.  "pr" is both the number of the proto we are extracting from
+ * and an index into the save area where we can find the proto's complete
+ * state table.  Each entry in "state" which differs from the corresponding
+ * entry of "pr" will appear in "ext".
+ * Entries which are the same in both "state" and "pr" will be marked
+ * as transitions to "SAME_TRANS" in "ext".  The total number of differences
+ * between "state" and "pr" is returned as function value.  Note that this
+ * number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
+ */
+int tbldiff( state, pr, ext )
+int state[], pr, ext[];
+
+    {
+    register int i, *sp = state, *ep = ext, *protp;
+    register int numdiff = 0;
+
+    protp = &protsave[numecs * (pr - 1)];
+
+    for ( i = numecs; i > 0; --i )
+	{
+	if ( *++protp == *++sp )
+	    *++ep = SAME_TRANS;
+	else
+	    {
+	    *++ep = *sp;
+	    ++numdiff;
+	    }
+	}
+
+    return ( numdiff );
+    }
diff --git a/yylex.c b/yylex.c
new file mode 100644
index 0000000..f4300aa
--- /dev/null
+++ b/yylex.c
@@ -0,0 +1,210 @@
+#include "flexdef.h"
+#include "y.tab.h"
+
+/*
+ * Copyright (c) University of California, 1987
+ */
+
+/* yylex - scan for a regular expression token
+ *
+ * synopsis
+ *
+ *   token = yylex();
+ *
+ *     token - return token found
+ */
+int yylex()
+
+    {
+    int toktype;
+    static int beglin = false;
+
+    if ( eofseen )
+	toktype = EOF;
+    else
+	toktype = lexscan();
+
+    if ( toktype == EOF )
+	{
+	eofseen = 1;
+
+	if ( sectnum == 1 )
+	    {
+	    synerr( "unexpected EOF" );
+	    sectnum = 2;
+	    toktype = SECTEND;
+	    }
+
+	else if ( sectnum == 2 )
+	    {
+	    sectnum = 3;
+	    toktype = SECTEND;
+	    }
+
+	else
+	    toktype = 0;
+	}
+
+    if ( trace )
+	{
+	if ( beglin )
+	    {
+	    fprintf( stderr, "%d\t", accnum + 1 );
+	    beglin = 0;
+	    }
+
+	switch ( toktype )
+	    {
+	    case '<':
+	    case '>':
+	    case '^':
+	    case '$':
+	    case '"':
+	    case '[':
+	    case ']':
+	    case '{':
+	    case '}':
+	    case '|':
+	    case '(':
+	    case ')':
+	    case '-':
+	    case '/':
+	    case '\\':
+	    case '?':
+	    case '.':
+	    case '*':
+	    case '+':
+	    case ',':
+		(void) putc( toktype, stderr );
+		break;
+
+	    case '\n':
+		(void) putc( '\n', stderr );
+
+		if ( sectnum == 2 )
+		    beglin = 1;
+
+		break;
+
+	    case SCDECL:
+		fputs( "%s", stderr );
+		break;
+
+	    case XSCDECL:
+		fputs( "%x", stderr );
+		break;
+
+	    case WHITESPACE:
+		(void) putc( ' ', stderr );
+		break;
+
+	    case SECTEND:
+		fputs( "%%\n", stderr );
+
+		/* we set beglin to be true so we'll start
+		 * writing out numbers as we echo rules.  lexscan() has
+		 * already assigned sectnum
+		 */
+
+		if ( sectnum == 2 )
+		    beglin = 1;
+
+		break;
+
+	    case NAME:
+		fprintf( stderr, "'%s'", nmstr );
+		break;
+
+	    case CHAR:
+		switch ( yylval )
+		    {
+		    case '<':
+		    case '>':
+		    case '^':
+		    case '$':
+		    case '"':
+		    case '[':
+		    case ']':
+		    case '{':
+		    case '}':
+		    case '|':
+		    case '(':
+		    case ')':
+		    case '-':
+		    case '/':
+		    case '\\':
+		    case '?':
+		    case '.':
+		    case '*':
+		    case '+':
+		    case ',':
+			fprintf( stderr, "\\%c", yylval );
+			break;
+
+		    case 1:
+		    case 2:
+		    case 3:
+		    case 4:
+		    case 5:
+		    case 6:
+		    case 7:
+		    case 8:
+		    case 9:
+		    case 10:
+		    case 11:
+		    case 12:
+		    case 13:
+		    case 14:
+		    case 15:
+		    case 16:
+		    case 17:
+		    case 18:
+		    case 19:
+		    case 20:
+		    case 21:
+		    case 22:
+		    case 23:
+		    case 24:
+		    case 25:
+		    case 26:
+		    case 27:
+		    case 28:
+		    case 29:
+		    case 30:
+		    case 31:
+			fprintf( stderr, "^%c", 'A' + yylval - 1 );
+			break;
+
+		    case 127:
+			(void) putc( '^', stderr );
+			(void) putc( '@', stderr );
+			break;
+
+		    default:
+			(void) putc( yylval, stderr );
+			break;
+		    }
+			
+		break;
+
+	    case NUMBER:
+		fprintf( stderr, "%d", yylval );
+		break;
+
+	    case PREVCCL:
+		fprintf( stderr, "[%d]", yylval );
+		break;
+
+	    case 0:
+		fprintf( stderr, "End Marker" );
+		break;
+
+	    default:
+		fprintf( stderr, "*Something Weird* - tok: %d val: %d\n",
+			 toktype, yylval );
+		break;
+	    }
+	}
+	    
+    return ( toktype );
+    }
author	Vern Paxson <vern@ee.lbl.gov>	1987-11-08 22:24:44 +0000
committer	Vern Paxson <vern@ee.lbl.gov>	1987-11-08 22:24:44 +0000
commit	2cc578462372baa1b85936749946608d7f36415f (patch)
tree	b103972512328042bbc4e7541616e88369619b2c