diff options
author | Vern Paxson <vern@ee.lbl.gov> | 1987-11-08 22:24:44 +0000 |
---|---|---|
committer | Vern Paxson <vern@ee.lbl.gov> | 1987-11-08 22:24:44 +0000 |
commit | 2cc578462372baa1b85936749946608d7f36415f (patch) | |
tree | b103972512328042bbc4e7541616e88369619b2c |
Initial revision
-rw-r--r-- | ccl.c | 98 | ||||
-rw-r--r-- | dfa.c | 460 | ||||
-rw-r--r-- | ecs.c | 190 | ||||
-rw-r--r-- | flexdef.h | 429 | ||||
-rw-r--r-- | main.c | 507 | ||||
-rw-r--r-- | misc.c | 646 | ||||
-rw-r--r-- | nfa.c | 542 | ||||
-rw-r--r-- | parse.y | 473 | ||||
-rw-r--r-- | scan.l | 370 | ||||
-rw-r--r-- | sym.c | 291 | ||||
-rw-r--r-- | tblcmp.c | 1324 | ||||
-rw-r--r-- | yylex.c | 210 |
12 files changed, 5540 insertions, 0 deletions
@@ -0,0 +1,98 @@ +/* lexccl - routines for character classes */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* ccladd - add a single character to a ccl + * + * synopsis + * int cclp; + * char ch; + * ccladd( cclp, ch ); + */ +ccladd( cclp, ch ) +int cclp; +char ch; + + { + int ind, len, newpos, i; + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + /* check to see if the character is already in the ccl */ + + for ( i = 0; i < len; ++i ) + if ( ccltbl[ind + i] == ch ) + return; + + newpos = ind + len; + + if ( newpos >= current_max_ccl_tbl_size ) + { + current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; + + ++num_reallocs; + + ccltbl = reallocate_character_array( ccltbl, current_max_ccl_tbl_size ); + } + + ccllen[cclp] = len + 1; + ccltbl[newpos] = ch; + } + + +/* cclinit - make an empty ccl + * + * synopsis + * int cclinit(); + * new_ccl = cclinit(); + */ +int cclinit() + + { + if ( ++lastccl >= current_maxccls ) + { + current_maxccls += MAXCCLS_INCREMENT; + + ++num_reallocs; + + cclmap = reallocate_integer_array( cclmap, current_maxccls ); + ccllen = reallocate_integer_array( ccllen, current_maxccls ); + cclng = reallocate_integer_array( cclng, current_maxccls ); + } + + if ( lastccl == 1 ) + /* we're making the first ccl */ + cclmap[lastccl] = 0; + + else + /* the new pointer is just past the end of the last ccl. Since + * the cclmap points to the \first/ character of a ccl, adding the + * length of the ccl to the cclmap pointer will produce a cursor + * to the first free space + */ + cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; + + ccllen[lastccl] = 0; + cclng[lastccl] = 0; /* ccl's start out life un-negated */ + + return ( lastccl ); + } + + +/* cclnegate - negate a ccl + * + * synopsis + * int cclp; + * cclnegate( ccl ); + */ +cclnegate( cclp ) +int cclp; + + { + cclng[cclp] = 1; + } @@ -0,0 +1,460 @@ +/* lexdfa - DFA construction routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* epsclosure - construct the epsilon closure of a set of ndfa states + * + * synopsis + * int t[current_max_dfa_size], numstates, accset[accnum + 1], nacc; + * int hashval; + * int *epsclosure(); + * t = epsclosure( t, &numstates, accset, &nacc, &hashval ); + * + * NOTES + * the epsilon closure is the set of all states reachable by an arbitrary + * number of epsilon transitions which themselves do not have epsilon + * transitions going out, unioned with the set of states which have non-null + * accepting numbers. t is an array of size numstates of nfa state numbers. + * Upon return, t holds the epsilon closure and numstates is updated. accset + * holds a list of the accepting numbers, and the size of accset is given + * by nacc. t may be subjected to reallocation if it is not large enough + * to hold the epsilon closure. + * + * hashval is the hash value for the dfa corresponding to the state set + */ +int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) +int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; + + { + register int stkpos, ns, tsp; + int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; + int stkend, nstate; + static int did_stk_init = false, *stk; + +#define MARK_STATE(state) \ + trans1[state] = trans1[state] - MARKER_DIFFERENCE; + +#define IS_MARKED(state) (trans1[state] < 0) + +#define UNMARK_STATE(state) \ + trans1[state] = trans1[state] + MARKER_DIFFERENCE; + +#define CHECK_ACCEPT(state) \ + { \ + nfaccnum = accptnum[state]; \ + if ( nfaccnum != NIL ) \ + accset[++nacc] = nfaccnum; \ + } + +#define DO_REALLOCATION \ + { \ + current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ + ++num_reallocs; \ + t = reallocate_integer_array( t, current_max_dfa_size ); \ + stk = reallocate_integer_array( stk, current_max_dfa_size ); \ + } \ + +#define PUT_ON_STACK(state) \ + { \ + if ( ++stkend >= current_max_dfa_size ) \ + DO_REALLOCATION \ + stk[stkend] = state; \ + MARK_STATE(state) \ + } + +#define ADD_STATE(state) \ + { \ + if ( ++numstates >= current_max_dfa_size ) \ + DO_REALLOCATION \ + t[numstates] = state; \ + hashval = hashval + state; \ + } + +#define STACK_STATE(state) \ + { \ + PUT_ON_STACK(state) \ + CHECK_ACCEPT(state) \ + if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ + ADD_STATE(state) \ + } + + if ( ! did_stk_init ) + { + stk = allocate_integer_array( current_max_dfa_size ); + did_stk_init = true; + } + + nacc = stkend = hashval = 0; + + for ( nstate = 1; nstate <= numstates; ++nstate ) + { + ns = t[nstate]; + + /* the state could be marked if we've already pushed it onto + * the stack + */ + if ( ! IS_MARKED(ns) ) + PUT_ON_STACK(ns) + + CHECK_ACCEPT(ns) + hashval = hashval + ns; + } + + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) + { + ns = stk[stkpos]; + transsym = transchar[ns]; + + if ( transsym == SYM_EPSILON ) + { + tsp = trans1[ns] + MARKER_DIFFERENCE; + + if ( tsp != NO_TRANSITION ) + { + if ( ! IS_MARKED(tsp) ) + STACK_STATE(tsp) + + tsp = trans2[ns]; + + if ( tsp != NO_TRANSITION ) + if ( ! IS_MARKED(tsp) ) + STACK_STATE(tsp) + } + } + } + + /* clear out "visit" markers */ + + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) + { + if ( IS_MARKED(stk[stkpos]) ) + { + UNMARK_STATE(stk[stkpos]) + } + else + lexfatal( "consistency check failed in epsclosure()" ); + } + + *ns_addr = numstates; + *hv_addr = hashval; + *nacc_addr = nacc; + + return ( t ); + } + + + +/* increase_max_dfas - increase the maximum number of DFAs */ + +increase_max_dfas() + + { + int old_max = current_max_dfas; + + current_max_dfas += MAX_DFAS_INCREMENT; + + ++num_reallocs; + + base = reallocate_integer_array( base, current_max_dfas ); + def = reallocate_integer_array( def, current_max_dfas ); + dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); + accsiz = reallocate_integer_array( accsiz, current_max_dfas ); + dhash = reallocate_integer_array( dhash, current_max_dfas ); + todo = reallocate_integer_array( todo, current_max_dfas ); + dss = reallocate_integer_pointer_array( dss, current_max_dfas ); + dfaacc = reallocate_integer_pointer_array( dfaacc, current_max_dfas ); + + /* fix up todo queue */ + if ( todo_next < todo_head ) + { /* queue was wrapped around the end */ + register int i; + + for ( i = 0; i < todo_next; ++i ) + todo[old_max + i] = todo[i]; + + todo_next += old_max; + } + } + + +/* snstods - converts a set of ndfa states into a dfa state + * + * synopsis + * int sns[numstates], numstates, newds, accset[accnum + 1], nacc, hashval; + * int snstods(); + * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds ); + * + * on return, the dfa state number is in newds. + */ +int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) +int sns[], numstates, accset[], nacc, hashval, *newds_addr; + + { + int didsort = 0; + register int i, j; + int newds, *oldsns; + char *malloc(); + + for ( i = 1; i <= lastdfa; ++i ) + if ( hashval == dhash[i] ) + { + if ( numstates == dfasiz[i] ) + { + oldsns = dss[i]; + + if ( ! didsort ) + { + /* we sort the states in sns so we can compare it to + * oldsns quickly. we use bubble because there probably + * aren't very many states + */ + bubble( sns, numstates ); + didsort = 1; + } + + for ( j = 1; j <= numstates; ++j ) + if ( sns[j] != oldsns[j] ) + break; + + if ( j > numstates ) + { + ++dfaeql; + *newds_addr = i; + return ( 0 ); + } + + ++hshcol; + } + + else + ++hshsave; + } + + /* make a new dfa */ + + if ( ++lastdfa >= current_max_dfas ) + increase_max_dfas(); + + newds = lastdfa; + + if ( ! (dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) )) ) + lexfatal( "dynamic memory failure in snstods()" ); + + /* if we haven't already sorted the states in sns, we do so now, so that + * future comparisons with it can be made quickly + */ + + if ( ! didsort ) + bubble( sns, numstates ); + + for ( i = 1; i <= numstates; ++i ) + dss[newds][i] = sns[i]; + + dfasiz[newds] = numstates; + dhash[newds] = hashval; + + if ( nacc == 0 ) + { + dfaacc[newds] = 0; + accsiz[newds] = 0; + } + + else if ( reject ) + { + /* we sort the accepting set in increasing order so the disambiguating + * rule that the first rule listed is considered match in the event of + * ties will work. We use a bubble sort since the list is probably + * quite small. + */ + + bubble( accset, nacc ); + + if ( ! (dfaacc[newds] = + (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) )) ) + lexfatal( "dynamic memory failure in snstods()" ); + + /* save the accepting set for later */ + for ( i = 1; i <= nacc; ++i ) + dfaacc[newds][i] = accset[i]; + + accsiz[newds] = nacc; + } + + else + { /* find lowest numbered rule so the disambiguating rule will work */ + j = accnum + 1; + + for ( i = 1; i <= nacc; ++i ) + if ( accset[i] < j ) + j = accset[i]; + + dfaacc[newds] = (int *) j; + } + + *newds_addr = newds; + + return ( 1 ); + } + + +/* symfollowset - follow the symbol transitions one step + * + * synopsis + * int ds[current_max_dfa_size], dsize, transsym; + * int nset[current_max_dfa_size], numstates; + * numstates = symfollowset( ds, dsize, transsym, nset ); + */ +int symfollowset( ds, dsize, transsym, nset ) +int ds[], dsize, transsym, nset[]; + + { + int ns, tsp, sym, i, j, lenccl, ch, numstates; + int ccllist; + + numstates = 0; + + for ( i = 1; i <= dsize; ++i ) + { /* for each nfa state ns in the state set of ds */ + ns = ds[i]; + sym = transchar[ns]; + tsp = trans1[ns]; + + if ( sym < 0 ) + { /* it's a character class */ + sym = -sym; + ccllist = cclmap[sym]; + lenccl = ccllen[sym]; + + if ( cclng[sym] ) + { + for ( j = 0; j < lenccl; ++j ) + { /* loop through negated character class */ + ch = ccltbl[ccllist + j]; + + if ( ch > transsym ) + break; /* transsym isn't in negated ccl */ + + else if ( ch == transsym ) + /* next 2 */ goto bottom; + } + + /* didn't find transsym in ccl */ + nset[++numstates] = tsp; + } + + else + for ( j = 0; j < lenccl; ++j ) + { + ch = ccltbl[ccllist + j]; + + if ( ch > transsym ) + break; + + else if ( ch == transsym ) + { + nset[++numstates] = tsp; + break; + } + } + } + + else if ( sym >= 'A' && sym <= 'Z' && caseins ) + lexfatal( "consistency check failed in symfollowset" ); + + else if ( sym == SYM_EPSILON ) + { /* do nothing */ + } + + else if ( ecgroup[sym] == transsym ) + nset[++numstates] = tsp; + +bottom: + ; + } + + return ( numstates ); + } + + +/* sympartition - partition characters with same out-transitions + * + * synopsis + * integer ds[current_max_dfa_size], numstates, duplist[numecs]; + * symlist[numecs]; + * sympartition( ds, numstates, symlist, duplist ); + */ +sympartition( ds, numstates, symlist, duplist ) +int ds[], numstates, duplist[]; +int symlist[]; + + { + int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; + + /* partitioning is done by creating equivalence classes for those + * characters which have out-transitions from the given state. Thus + * we are really creating equivalence classes of equivalence classes. + */ + + for ( i = 1; i <= numecs; ++i ) + { /* initialize equivalence class list */ + duplist[i] = i - 1; + dupfwd[i] = i + 1; + } + + duplist[1] = NIL; + dupfwd[numecs] = NIL; + + for ( i = 1; i <= numstates; ++i ) + { + ns = ds[i]; + tch = transchar[ns]; + + if ( tch != SYM_EPSILON ) + { + if ( tch < -lastccl || tch > CSIZE ) + lexfatal( "bad transition character detected in sympartition()" ); + + if ( tch > 0 ) + { /* character transition */ + mkechar( ecgroup[tch], dupfwd, duplist ); + symlist[ecgroup[tch]] = 1; + } + + else + { /* character class */ + tch = -tch; + + lenccl = ccllen[tch]; + cclp = cclmap[tch]; + mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs ); + + if ( cclng[tch] ) + { + j = 0; + + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; + + for ( ++j; j < ich; ++j ) + symlist[j] = 1; + } + + for ( ++j; j <= numecs; ++j ) + symlist[j] = 1; + } + + else + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; + symlist[ich] = 1; + } + } + } + } + } @@ -0,0 +1,190 @@ +/* lexecs - equivalence class routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* ccl2ecl - convert character classes to set of equivalence classes + * + * synopsis + * ccl2ecl(); + */ +ccl2ecl() + + { + int i, ich, newlen, cclp, ccls, cclmec; + + for ( i = 1; i <= lastccl; ++i ) + { + /* we loop through each character class, and for each character + * in the class, add the character's equivalence class to the + * new "character" class we are creating. Thus when we are all + * done, character classes will really consist of collections + * of equivalence classes + */ + + newlen = 0; + cclp = cclmap[i]; + + for ( ccls = 0; ccls < ccllen[i]; ++ccls ) + { + ich = ccltbl[cclp + ccls]; + cclmec = ecgroup[ich]; + if ( cclmec > 0 ) + { + ccltbl[cclp + newlen] = cclmec; + ++newlen; + } + } + + ccllen[i] = newlen; + } + } + + +/* cre8ecs - associate equivalence class numbers with class members + * + * synopsis + * int cre8ecs(); + * number of classes = cre8ecs( fwd, bck, num ); + * + * fwd is the forward linked-list of equivalence class members. bck + * is the backward linked-list, and num is the number of class members. + * Returned is the number of classes. + */ +int cre8ecs( fwd, bck, num ) +int fwd[], bck[], num; + + { + int i, j, numcl; + + numcl = 0; + + /* create equivalence class numbers. From now on, abs( bck(x) ) + * is the equivalence class number for object x. If bck(x) + * is positive, then x is the representative of its equivalence + * class. + */ + + for ( i = 1; i <= num; ++i ) + if ( bck[i] == NIL ) + { + bck[i] = ++numcl; + for ( j = fwd[i]; j != NIL; j = fwd[j] ) + bck[j] = -numcl; + } + + return ( numcl ); + } + + +/* mkeccl - update equivalence classes based on character class xtions + * + * synopsis + * char ccls[]; + * int lenccl, fwd[llsiz], bck[llsiz], llsiz; + * mkeccl( ccls, lenccl, fwd, bck, llsiz ); + * + * where ccls contains the elements of the character class, lenccl is the + * number of elements in the ccl, fwd is the forward link-list of equivalent + * characters, bck is the backward link-list, and llsiz size of the link-list + */ +mkeccl( ccls, lenccl, fwd, bck, llsiz ) +char ccls[]; +int lenccl, fwd[], bck[], llsiz; + + { + int cclp, oldec, newec; + int cclm, i, j; + + /* note that it doesn't matter whether or not the character class is + * negated. The same results will be obtained in either case. + */ + + cclp = 0; + + while ( cclp < lenccl ) + { + cclm = ccls[cclp]; + oldec = bck[cclm]; + newec = cclm; + + j = cclp + 1; + + for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) + { /* look for the symbol in the character class */ + for ( ; j < lenccl && ccls[j] <= i; ++j ) + if ( ccls[j] == i ) + { + /* we found an old companion of cclm in the ccl. + * link it into the new equivalence class and flag it as + * having been processed + */ + + bck[i] = newec; + fwd[newec] = i; + newec = i; + ccls[j] = -i; /* set flag so we don't reprocess */ + + /* get next equivalence class member */ + /* next 2 */ goto next_pt; + } + + /* symbol isn't in character class. Put it in the old equivalence + * class + */ + + bck[i] = oldec; + + if ( oldec != NIL ) + fwd[oldec] = i; + + oldec = i; +next_pt: + ; + } + + if ( bck[cclm] != NIL || oldec != bck[cclm] ) + { + bck[cclm] = NIL; + fwd[oldec] = NIL; + } + + fwd[newec] = NIL; + + /* find next ccl member to process */ + + for ( ++cclp; ccls[cclp] < 0 && cclp < lenccl; ++cclp ) + { + /* reset "doesn't need processing" flag */ + ccls[cclp] = -ccls[cclp]; + } + } + } + + +/* mkechar - create equivalence class for single character + * + * synopsis + * int tch, fwd[], bck[]; + * mkechar( tch, fwd, bck ); + */ +mkechar( tch, fwd, bck ) +int tch, fwd[], bck[]; + + { + /* if until now the character has been a proper subset of + * an equivalence class, break it away to create a new ec + */ + + if ( fwd[tch] != NIL ) + bck[fwd[tch]] = bck[tch]; + + if ( bck[tch] != NIL ) + fwd[bck[tch]] = fwd[tch]; + + fwd[tch] = NIL; + bck[tch] = NIL; + } diff --git a/flexdef.h b/flexdef.h new file mode 100644 index 0000000..b41b649 --- /dev/null +++ b/flexdef.h @@ -0,0 +1,429 @@ +/* + * Symbol definitions for flex. + * + * modification history + * -------------------- + * 02a vp 27jun86 .translated into C/FTL + */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include <stdio.h> + + +/* maximum line length we'll have to deal with */ +#define MAXLINE BUFSIZ + +/* maximum size of file name */ +#define FILENAMESIZE 1024 + +#define min(x,y) (x < y ? x : y) +#define max(x,y) (x > y ? x : y) + +#define true 1 +#define false 0 + + +#ifndef DEFAULT_SKELETON_FILE +#define DEFAULT_SKELETON_FILE "flex.skel" +#endif + +/* maximum number of characters per line recognized by Fortran compiler */ +#define DATALINEWIDTH 72 + +/* string to indent Fortran data statements with */ +#define DATAINDENTSTR " " +/* width of dataindent string in Fortran columns */ +#define DATAINDENTWIDTH 6 + +/* number of data items per line for -f output */ +#define NUMDATAITEMS 10 + +/* number of lines of data in -f output before inserting a blank line for + * readability. + */ +#define NUMDATALINES 10 + +/* returns true if an nfa state has an epsilon out-transition slot + * that can be used. This definition is currently not used. + */ +#define FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans2[state] == NO_TRANSITION && \ + finalst[state] != state) + +/* returns true if an nfa state has an epsilon out-transition character + * and both slots are free + */ +#define SUPER_FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans1[state] == NO_TRANSITION) \ + +/* maximum number of NFA states that can comprise a DFA state. It's real + * big because if there's a lot of rules, the initial state will have a + * huge epsilon closure. + */ +#define INITIAL_MAX_DFA_SIZE 750 +#define MAX_DFA_SIZE_INCREMENT 750 + +/* array names to be used in generated machine. They're short because + * we write out one data statement (which names the array) for each element + * in the array. + */ + +#define ALIST 'l' /* points to list of rules accepted for a state */ +#define ACCEPT 'a' /* list of rules accepted for a state */ +#define ECARRAY 'e' /* maps input characters to equivalence classes */ +#define MATCHARRAY 'm' /* maps equivalence classes to meta-equivalence classes */ +#define BASEARRAY 'b' /* "base" array */ +#define DEFARRAY 'd' /* "default" array */ +#define NEXTARRAY 'n' /* "next" array */ +#define CHECKARRAY 'c' /* "check" array */ + +/* NIL must be 0. If not, its special meaning when making equivalence classes + * (it marks the representative of a given e.c.) will be unidentifiable + */ +#define NIL 0 + +#define JAM -1 /* to mark a missing DFA transition */ +#define NO_TRANSITION NIL +#define UNIQUE -1 /* marks a symbol as an e.c. representative */ +#define INFINITY -1 /* for x{5,} constructions */ + +/* size of input alphabet - should be size of ASCII set */ +#define CSIZE 127 + +#define INITIAL_MAXCCLS 100 /* max number of unique character classes */ +#define MAXCCLS_INCREMENT 100 + +/* size of table holding members of character classes */ +#define INITIAL_MAX_CCL_TBL_SIZE 500 +#define MAX_CCL_TBL_SIZE_INCREMENT 250 + +#define INITIAL_MNS 2000 /* default maximum number of nfa states */ +#define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */ + +#define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */ +#define MAX_DFAS_INCREMENT 1000 + +#define JAMSTATE -32766 /* marks a reference to the state that always jams */ + +/* enough so that if it's subtracted from an NFA state number, the result + * is guarenteed to be negative + */ +#define MARKER_DIFFERENCE 32000 +#define MAXIMUM_MNS 31999 + +/* maximum number of nxt/chk pairs for non-templates */ +#define INITIAL_MAX_XPAIRS 2000 +#define MAX_XPAIRS_INCREMENT 2000 + +/* maximum number of nxt/chk pairs needed for templates */ +#define INITIAL_MAX_TEMPLATE_XPAIRS 2500 +#define MAX_TEMPLATE_XPAIRS_INCREMENT 2500 + +#define SYM_EPSILON 0 /* to mark transitions on the symbol epsilon */ + +#define INITIAL_MAX_SCS 40 /* maximum number of start conditions */ +#define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */ + +#define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */ +#define SAME_TRANS -1 /* transition is the same as "default" entry for state */ + +/* the following percentages are used to tune table compression: + + * the percentage the number of out-transitions a state must be of the + * number of equivalence classes in order to be considered for table + * compaction by using protos + */ +#define PROTO_SIZE_PERCENTAGE 15 + +/* the percentage the number of homogeneous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * that the state's transition table is first compared with a potential + * template of the most common out-transition instead of with the first + * proto in the proto queue + */ +#define CHECK_COM_PERCENTAGE 50 + +/* the percentage the number of differences between a state's transition + * table and the proto it was first compared with must be of the total + * number of out-transitions of the state in order to keep the first + * proto as a good match and not search any further + */ +#define FIRST_MATCH_DIFF_PERCENTAGE 10 + +/* the percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to use the proto as an acceptable close match + */ +#define ACCEPTABLE_DIFF_PERCENTAGE 50 + +/* the percentage the number of homogenous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * to consider making a template from the state + */ +#define TEMPLATE_SAME_PERCENTAGE 60 + +/* the percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to create a new proto from the state + */ +#define NEW_PROTO_DIFF_PERCENTAGE 20 + +/* the percentage the total number of out-transitions of a state must be + * of the number of equivalence classes in order to consider trying to + * fit the transition table into "holes" inside the nxt/chk table. + */ +#define INTERIOR_FIT_PERCENTAGE 15 + +/* size of region set aside to cache the complete transition table of + * protos on the proto queue to enable quick comparisons + */ +#define PROT_SAVE_SIZE 2000 + +#define MSP 50 /* maximum number of saved protos (protos on the proto queue) */ + +/* number that, if used to subscript an array, has a good chance of producing + * an error; should be small enough to fit into a short + */ +#define BAD_SUBSCRIPT -32767 + + +/* Declarations for global variables. */ + +/* variables for symbol tables: + * sctbl - start-condition symbol table + * ndtbl - name-definition symbol table + * ccltab - character class text symbol table + */ + +struct hash_entry + { + struct hash_entry *prev, *next; + char *name; + char *val; + } ; + +typedef struct hash_entry *hash_table[]; + +#define NAME_TABLE_HASH_SIZE 101 +#define START_COND_HASH_SIZE 101 +#define CCL_HASH_SIZE 101 + +extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; +extern struct hash_entry *sctbl[START_COND_HASH_SIZE]; +extern struct hash_entry *ccltab[CCL_HASH_SIZE]; + + +/* variables for flags: + * printstats - if true (-v), dump statistics + * syntaxerror - true if a syntax error has been found + * eofseen - true if we've seen an eof in the input file + * ddebug - if true (-d), make a "debug" scanner + * trace - if true (-T), trace processing + * spprdflt - if true (-s), suppress the default rule + * interactive - if true (-I), generate an interactive scanner + * caseins - if true (-i), generate a case-insensitive scanner + * useecs - if true (-ce flag), use equivalence classes + * fulltbl - if true (-cf flag), don't compress the DFA state table + * usemecs - if true (-cm flag), use meta-equivalence classes + * reject - if true (-r flag), generate tables for REJECT macro + */ + +extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +extern int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject; + + +/* variables used in the flex input routines: + * datapos - characters on current output line + * dataline - number of contiguous lines of data in current data + * statement. Used to generate readable -f output + * skelfile - fd of the skeleton file + * yyin - input file + * infilename - name of input file + * linenum - current input line number + */ + +extern int datapos, dataline, linenum; +extern FILE *skelfile, *yyin; +extern char *infilename; + + +/* variables for stack of states having only one out-transition: + * onestate - state number + * onesym - transition symbol + * onenext - target state + * onedef - default base entry + * onesp - stack pointer + */ + +extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; + + +/* variables for nfa machine data: + * current_mns - current maximum on number of NFA states + * accnum - number of the last accepting state + * firstst - physically the first state of a fragment + * lastst - last physical state of fragment + * finalst - last logical state of fragment + * transchar - transition character + * trans1 - transition state + * trans2 - 2nd transition state for epsilons + * accptnum - accepting number + * lastnfa - last nfa state number created + */ + +extern int current_mns; +extern int accnum, *firstst, *lastst, *finalst, *transchar; +extern int *trans1, *trans2, *accptnum, lastnfa; + + +/* variables for protos: + * numtemps - number of templates created + * numprots - number of protos created + * protprev - backlink to a more-recently used proto + * protnext - forward link to a less-recently used proto + * prottbl - base/def table entry for proto + * protcomst - common state of proto + * firstprot - number of the most recently used proto + * lastprot - number of the least recently used proto + * protsave contains the entire state array for protos + */ + +extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; + + +/* variables for managing equivalence classes: + * numecs - number of equivalence classes + * nextecm - forward link of Equivalenc Class members + * ecgroup - class number or backward link of EC members + * nummecs - number of meta-equivalence classes (used to compress + * templates) + * tecfwd - forward link of meta-equivalence classes members + * tecbck - backward link of MEC's + */ + +extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; +extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; + + +/* variables for start conditions: + * lastsc - last start condition created + * current_max_scs - current limit on number of start conditions + * scset - set of rules active in start condition + * scbol - set of rules active only at the beginning of line in a s.c. + * scxclu - true if start condition is exclusive + * actvsc - stack of active start conditions for the current rule + */ + +extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; + + +/* variables for dfa machine data: + * current_max_dfa_size - current maximum number of NFA states in DFA + * current_max_xpairs - current maximum number of non-template xtion pairs + * current_max_template_xpairs - current maximum number of template pairs + * current_max_dfas - current maximum number DFA states + * lastdfa - last dfa state number created + * nxt - state to enter upon reading character + * chk - check value to see if "nxt" applies + * tnxt - internal nxt table for templates + * base - offset into "nxt" for given state + * def - where to go if "chk" disallows "nxt" entry + * tblend - last "nxt/chk" table entry being used + * firstfree - first empty entry in "nxt/chk" table + * dss - nfa state set for each dfa + * dfasiz - size of nfa state set for each dfa + * dfaacc - accepting set for each dfa state (or accepting number, if + * -r is not given) + * accsiz - size of accepting set for each dfa state + * dhash - dfa state hash value + * todo - queue of DFAs still to be processed + * todo_head - head of todo queue + * todo_next - next available entry on todo queue + * numas - number of DFA accepting states created; note that this + * is not necessarily the same value as accnum, which is the analogous + * value for the NFA + * numsnpairs - number of state/nextstate transition pairs + * jambase - position in base/def where the default jam table starts + * jamstate - state number corresponding to "jam" state + */ + +extern int current_max_dfa_size, current_max_xpairs; +extern int current_max_template_xpairs, current_max_dfas; +extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; +extern int *base, *def, tblend, firstfree, **dss, *dfasiz, **dfaacc; +extern int *accsiz, *dhash, *todo, todo_head, todo_next, numas; +extern int numsnpairs, jambase, jamstate; + + +/* variables for ccl information: + * lastccl - ccl index of the last created ccl + * current_maxccls - current limit on the maximum number of unique ccl's + * cclmap - maps a ccl index to its set pointer + * ccllen - gives the length of a ccl + * cclng - true for a given ccl if the ccl is negated + * cclreuse - counts how many times a ccl is re-used + * current_max_ccl_tbl_size - current limit on number of characters needed + * to represent the unique ccl's + * ccltbl - holds the characters in each ccl - indexed by cclmap + */ + +extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; +extern int current_max_ccl_tbl_size; +extern char *ccltbl; + + +/* variables for miscellaneous information: + * starttime - real-time when we started + * endtime - real-time when we ended + * nmstr - last NAME scanned by the scanner + * sectnum - section number currently being parsed + * nummt - number of empty nxt/chk table entries + * hshcol - number of hash collisions detected by snstods + * dfaeql - number of times a newly created dfa was equal to an old one + * numeps - number of epsilon NFA states created + * eps2 - number of epsilon states which have 2 out-transitions + * num_reallocs - number of times it was necessary to realloc() a group + * of arrays + * tmpuses - number of DFA states that chain to templates + * totnst - total number of NFA states used to make DFA states + * peakpairs - peak number of transition pairs we had to store internally + * numuniq - number of unique transitions + * numdup - number of duplicate transitions + * hshsave - number of hash collisions saved by checking number of states + */ + +extern char *starttime, *endtime, nmstr[MAXLINE]; +extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; + +char *allocate_array(), *reallocate_array(); + +#define allocate_integer_array(size) \ + (int *) allocate_array( size, sizeof( int ) ) + +#define reallocate_integer_array(array,size) \ + (int *) reallocate_array( (char *) array, size, sizeof( int ) ) + +#define allocate_integer_pointer_array(size) \ + (int **) allocate_array( size, sizeof( int * ) ) + +#define reallocate_integer_pointer_array(array,size) \ + (int **) reallocate_array( (char *) array, size, sizeof( int * ) ) + +#define allocate_character_array(size) allocate_array( size, sizeof( char ) ) + +#define reallocate_character_array(array,size) \ + reallocate_array( array, size, sizeof( char ) ) + + +/* used to communicate between scanner and parser. The type should really + * be YYSTYPE, but we can't easily get our hands on it. + */ +extern int yylval; @@ -0,0 +1,507 @@ +/* flex - tool to generate fast lexical analyzers + * + * Copyright (c) University of California, 1987 + * + * + * ver date who remarks + * --- ---- --- ------------------------------------------------------- + * 04a 27Jun86 VP .translated from Ratfor into C + * 01a 22Aug83 VP .written. Original version by Jef Poskanzer. + */ + + +#include "flexdef.h" + + +/* these globals are all defined and commented in flexdef.h */ +int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject; +int datapos, dataline, linenum; +FILE *skelfile = NULL; +char *infilename = NULL; +int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; +int current_mns; +int accnum, *firstst, *lastst, *finalst, *transchar; +int *trans1, *trans2, *accptnum, lastnfa; +int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; +int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; +int tecbck[CSIZE + 1]; +int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; +int current_max_dfa_size, current_max_xpairs; +int current_max_template_xpairs, current_max_dfas; +int lastdfa, *nxt, *chk, *tnxt; +int *base, *def, tblend, firstfree, numtemps, **dss, *dfasiz, **dfaacc; +int *accsiz, *dhash, *todo, todo_head, todo_next, numas; +int numsnpairs, jambase, jamstate; +int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; +int current_max_ccl_tbl_size; +char *ccltbl; +char *starttime, *endtime, nmstr[MAXLINE]; +int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; + + +/* flex - main program + * + * synopsis (from the shell) + * flex [-v] [file ...] + */ + +main( argc, argv ) +int argc; +char **argv; + + { + lexinit( argc, argv ); + readin(); + + if ( ! syntaxerror ) + { + /* convert the ndfa to a dfa */ + ntod(); + + /* generate the ratfor state transition tables from the dfa */ + gentabs(); + } + + /* note, lexend does not return. It exits with its argument as status. */ + + lexend( 0 ); + } + + +/* lexend - terminate flex + * + * synopsis + * int status; + * lexend( status ); + * + * status is exit status. + * + * note + * This routine does not return. + */ + +lexend( status ) +int status; + + { + int tblsiz; + char *gettime(); + + if ( skelfile != NULL ) + (void) fclose( skelfile ); + + if ( printstats ) + { + endtime = gettime(); + + fprintf( stderr, "flex usage statistics:\n" ); + fprintf( stderr, " started at %s, finished at %s\n", + starttime, endtime ); + + if ( ! genftl ) + fprintf( stderr, " Ratfor scanner generated\n" ); + + fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); + fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, + current_max_dfas, totnst ); + fprintf( stderr, " %d rules\n", accnum ); + fprintf( stderr, " %d/%d start conditions\n", lastsc, + current_max_scs ); + fprintf( stderr, " %d epsilon states, %d double epsilon states\n", + numeps, eps2 ); + + if ( lastccl == 0 ) + fprintf( stderr, " no character classes\n" ); + else + fprintf( stderr, + " %d/%d character classes needed %d/%d words of storage, %d reused\n", + lastccl, current_maxccls, + cclmap[lastccl] + ccllen[lastccl] - 1, + current_max_ccl_tbl_size, cclreuse ); + + fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs ); + fprintf( stderr, " %d/%d unique/duplicate transitions\n", + numuniq, numdup ); + + if ( fulltbl ) + { + tblsiz = lastdfa * numecs; + fprintf( stderr, " %d table entries\n", tblsiz ); + } + + else + { + tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; + + fprintf( stderr, " %d/%d base/def entries created\n", + lastdfa + numtemps, current_max_dfas ); + fprintf( stderr, " %d/%d (peak %d) nxt/chk entries created\n", + tblend, current_max_xpairs, peakpairs ); + fprintf( stderr, + " %d/%d (peak %d) template nxt/chk entries created\n", + numtemps * nummecs, current_max_template_xpairs, + numtemps * numecs ); + fprintf( stderr, " %d empty table entries\n", nummt ); + fprintf( stderr, " %d protos created\n", numprots ); + fprintf( stderr, " %d templates created, %d uses\n", + numtemps, tmpuses ); + } + + if ( useecs ) + { + tblsiz = tblsiz + CSIZE; + fprintf( stderr, " %d/%d equivalence classes created\n", + numecs, CSIZE ); + } + + if ( usemecs ) + { + tblsiz = tblsiz + numecs; + fprintf( stderr, " %d/%d meta-equivalence classes created\n", + nummecs, CSIZE ); + } + + fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n", + hshcol, hshsave, dfaeql ); + fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs ); + fprintf( stderr, " %d total table entries needed\n", tblsiz ); + } + + exit( status ); + } + + +/* lexinit - initialize flex + * + * synopsis + * int argc; + * char **argv; + * lexinit( argc, argv ); + */ + +lexinit( argc, argv ) +int argc; +char **argv; + + { + int i; + char *arg, *skelname = DEFAULT_SKELETON_FILE, *gettime(), clower(); + int sawcmpflag, use_stdout; + + printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; + ddebug = fulltbl = reject = false; + usemecs = genftl = useecs = true; + + sawcmpflag = false; + use_stdout = false; + + /* read flags */ + for ( --argc, ++argv; argc ; --argc, ++argv ) + { + if ( argv[0][0] != '-' || argv[0][1] == '\0' ) + break; + + arg = argv[0]; + + for ( i = 1; arg[i] != '\0'; ++i ) + switch ( arg[i] ) + { + case 'c': + if ( i != 1 ) + lexerror( "-c flag must be given separately" ); + + if ( ! sawcmpflag ) + { + useecs = false; + usemecs = false; + fulltbl = false; + sawcmpflag = true; + } + + for ( ++i; arg[i] != '\0'; ++i ) + switch ( clower( arg[i] ) ) + { + case 'e': + useecs = true; + break; + + case 'f': + fulltbl = true; + break; + + case 'm': + usemecs = true; + break; + + default: + lerrif( "unknown -c option %c", + (int) arg[i] ); + break; + } + + goto get_next_arg; + + case 'd': + ddebug = true; + break; + + case 'f': + useecs = usemecs = false; + fulltbl = true; + break; + + case 'I': + interactive = true; + break; + + case 'i': + caseins = true; + break; + + case 'l': + use_stdout = false; + break; + + case 'n': + printstats = false; + break; + + case 'r': + reject = true; + break; + + case 'S': + if ( i != 1 ) + lexerror( "-S flag must be given separately" ); + + skelname = arg + i + 1; + goto get_next_arg; + + case 's': + spprdflt = true; + break; + + case 't': + use_stdout = true; + break; + + case 'T': + trace = true; + break; + + case 'v': + printstats = true; + break; + + default: + lerrif( "unknown flag %c", (int) arg[i] ); + break; + } + +get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ + ; + } + + if ( fulltbl && usemecs ) + lexerror( "full table and -cm don't make sense together" ); + + if ( fulltbl && interactive ) + lexerror( "full table and -I are (currently) incompatible" ); + + if ( ! use_stdout ) + { + FILE *prev_stdout = freopen( "lex.yy.c", "w", stdout ); + + if ( prev_stdout == NULL ) + lexerror( "could not create lex.yy.c" ); + } + + if ( argc ) + { + if ( argc > 1 ) + lexerror( "extraneous argument(s) given" ); + + yyin = fopen( infilename = argv[0], "r" ); + + if ( yyin == NULL ) + lerrsf( "can't open %s", argv[0] ); + } + + else + yyin = stdin; + + lastccl = 0; + lastsc = 0; + + /* initialize the statistics */ + starttime = gettime(); + + if ( (skelfile = fopen( skelname, "r" )) == NULL ) + lerrsf( "can't open skeleton file %s", skelname ); + + lastdfa = lastnfa = accnum = numas = numsnpairs = tmpuses = 0; + numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; + numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; + onesp = numprots = 0; + + linenum = sectnum = 1; + firstprot = NIL; + + /* used in mkprot() so that the first proto goes in slot 1 + * of the proto queue + */ + lastprot = 1; + + if ( useecs ) + { + /* set up doubly-linked equivalence classes */ + ecgroup[1] = NIL; + + for ( i = 2; i <= CSIZE; ++i ) + { + ecgroup[i] = i - 1; + nextecm[i - 1] = i; + } + + nextecm[CSIZE] = NIL; + } + + else + { /* put everything in its own equivalence class */ + for ( i = 1; i <= CSIZE; ++i ) + { + ecgroup[i] = i; + nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ + } + } + + set_up_initial_allocations(); + } + + +/* readin - read in the rules section of the input file(s) + * + * synopsis + * readin(); + */ +readin() + + { + if ( genftl ) + { + fputs( "#define YYDEFAULTACTION ", stdout ); + + if ( spprdflt ) + fputs( "YYFATALERROR( \"flex scanner jammed\" )", stdout ); + else + fputs( "ECHO", stdout ); + + fputs( ";\n", stdout ); + + if ( ddebug ) + puts( "#define LEX_DEBUG" ); + if ( useecs ) + puts( "#define LEX_USE_ECS" ); + if ( usemecs ) + puts( "#define LEX_USE_MECS" ); + if ( interactive ) + puts( "#define LEX_INTERACTIVE_SCANNER" ); + if ( reject ) + puts( "#define LEX_REJECT_ENABLED" ); + if ( fulltbl ) + puts( "#define LEX_FULL_TABLE" ); + } + + else + { + fputs( "define(YYDEFAULTACTION,", stdout ); + + if ( spprdflt ) + fputs( "call error( \"flex scanner jammed\" )", stdout ); + else + fputs( "ECHO", stdout ); + + fputs( ")\n", stdout ); + + if ( ddebug ) + puts( "define(LEX_DEBUG,)" ); + if ( useecs ) + puts( "define(LEX_USE_ECS,)" ); + if ( usemecs ) + puts( "define(LEX_USE_MECS,)" ); + if ( reject ) + puts( "define(LEX_REJECT_ENABLED,)" ); + if ( fulltbl ) + puts( "define(LEX_FULL_TABLE,)" ); + } + + skelout(); + + line_directive_out(); + + if ( yyparse() ) + lerrif( "fatal parse error at line %d", linenum ); + + if ( useecs ) + { + numecs = cre8ecs( nextecm, ecgroup, CSIZE ); + ccl2ecl(); + } + + else + numecs = CSIZE; + } + + + +/* set_up_initial_allocations - allocate memory for internal tables */ + +set_up_initial_allocations() + + { + current_mns = INITIAL_MNS; + firstst = allocate_integer_array( current_mns ); + lastst = allocate_integer_array( current_mns ); + finalst = allocate_integer_array( current_mns ); + transchar = allocate_integer_array( current_mns ); + trans1 = allocate_integer_array( current_mns ); + trans2 = allocate_integer_array( current_mns ); + accptnum = allocate_integer_array( current_mns ); + + current_max_scs = INITIAL_MAX_SCS; + scset = allocate_integer_array( current_max_scs ); + scbol = allocate_integer_array( current_max_scs ); + scxclu = allocate_integer_array( current_max_scs ); + actvsc = allocate_integer_array( current_max_scs ); + + current_maxccls = INITIAL_MAXCCLS; + cclmap = allocate_integer_array( current_maxccls ); + ccllen = allocate_integer_array( current_maxccls ); + cclng = allocate_integer_array( current_maxccls ); + + current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; + ccltbl = allocate_character_array( current_max_ccl_tbl_size ); + + current_max_dfa_size = INITIAL_MAX_DFA_SIZE; + + current_max_xpairs = INITIAL_MAX_XPAIRS; + nxt = allocate_integer_array( current_max_xpairs ); + chk = allocate_integer_array( current_max_xpairs ); + + current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; + tnxt = allocate_integer_array( current_max_template_xpairs ); + + current_max_dfas = INITIAL_MAX_DFAS; + base = allocate_integer_array( current_max_dfas ); + def = allocate_integer_array( current_max_dfas ); + dfasiz = allocate_integer_array( current_max_dfas ); + accsiz = allocate_integer_array( current_max_dfas ); + dhash = allocate_integer_array( current_max_dfas ); + todo = allocate_integer_array( current_max_dfas ); + dss = allocate_integer_pointer_array( current_max_dfas ); + dfaacc = allocate_integer_pointer_array( current_max_dfas ); + } @@ -0,0 +1,646 @@ +/* lexmisc - miscellaneous flex routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include <ctype.h> +#include "flexdef.h" + +char *malloc(), *realloc(); + + +/* allocate_array - allocate memory for an integer array of the given size */ + +char *allocate_array( size, element_size ) +int size, element_size; + + { + register char *mem = malloc( (unsigned) (element_size * size) ); + + if ( mem == NULL ) + lexfatal( "memory allocation failed in allocate_array()" ); + + return ( mem ); + } + + +/* bubble - bubble sort an integer array in increasing order + * + * synopsis + * int v[n], n; + * bubble( v, n ); + * + * description + * sorts the first n elements of array v and replaces them in + * increasing order. + * + * passed + * v - the array to be sorted + * n - the number of elements of 'v' to be sorted */ + +bubble( v, n ) +int v[], n; + + { + register int i, j, k; + + for ( i = n; i > 1; --i ) + for ( j = 1; j < i; ++j ) + if ( v[j] > v[j + 1] ) /* compare */ + { + k = v[j]; /* exchange */ + v[j] = v[j + 1]; + v[j + 1] = k; + } + } + + +/* clower - replace upper-case letter to lower-case + * + * synopsis: + * char clower(), c; + * c = clower( c ); + */ + +char clower( c ) +register char c; + + { + return ( isupper(c) ? tolower(c) : c ); + } + + +/* copy_string - returns a dynamically allocated copy of a string + * + * synopsis + * char *str, *copy, *copy_string(); + * copy = copy_string( str ); + */ + +char *copy_string( str ) +register char *str; + + { + register char *c; + char *copy; + + /* find length */ + for ( c = str; *c; ++c ) + ; + + copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) ); + + if ( copy == NULL ) + lexfatal( "dynamic memory failure in copy_string()" ); + + for ( c = copy; (*c++ = *str++); ) + ; + + return ( copy ); + } + + +/* cshell - shell sort a character array in increasing order + * + * synopsis + * + * char v[n]; + * int n; + * cshell( v, n ); + * + * description + * does a shell sort of the first n elements of array v. + * + * passed + * v - array to be sorted + * n - number of elements of v to be sorted + */ +cshell( v, n ) +char v[]; +int n; + + { + int gap, i, j, jg; + char k; + + for ( gap = n / 2; gap > 0; gap = gap / 2 ) + for ( i = gap; i < n; ++i ) + for ( j = i - gap; j >= 0; j = j - gap ) + { + jg = j + gap; + + if ( v[j] <= v[jg] ) + break; + + k = v[j]; + v[j] = v[jg]; + v[jg] = k; + } + } + + +/* dataend - finish up a block of data declarations + * + * synopsis + * dataend(); + */ +dataend() + + { + if ( datapos > 0 ) + dataflush(); + + if ( genftl ) + /* add terminator for initialization */ + puts( " } ;\n" ); + + dataline = 0; + } + + + +/* dataflush - flush generated data statements + * + * synopsis + * dataflush(); + */ +dataflush() + + { + putchar( '\n' ); + + if ( genftl ) + { + if ( ++dataline >= NUMDATALINES ) + { + /* put out a blank line so that the table is grouped into + * large blocks that enable the user to find elements easily + */ + putchar( '\n' ); + dataline = 0; + } + } + + /* reset the number of characters written on the current line */ + datapos = 0; + } + + +/* gettime - return current time + * + * synopsis + * char *gettime(), *time_str; + * time_str = gettime(); + */ + +/* include sys/types.h to use time_t and make lint happy */ + +#include <sys/types.h> + +char *gettime() + + { + time_t t, time(); + char *result, *ctime(), *copy_string(); + + t = time( (long *) 0 ); + + result = copy_string( ctime( &t ) ); + + /* get rid of trailing newline */ + result[24] = '\0'; + + return ( result ); + } + + +/* lerrif - report an error message formatted with one integer argument + * + * synopsis + * char msg[]; + * int arg; + * lerrif( msg, arg ); + */ + +lerrif( msg, arg ) +char msg[]; +int arg; + + { + char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); + lexerror( errmsg ); + } + + +/* lerrsf - report an error message formatted with one string argument + * + * synopsis + * char msg[], arg[]; + * lerrsf( msg, arg ); + */ + +lerrsf( msg, arg ) +char msg[], arg[]; + + { + char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); + lexerror( errmsg ); + } + + +/* lexerror - report an error message and terminate + * + * synopsis + * char msg[]; + * lexerror( msg ); + */ + +lexerror( msg ) +char msg[]; + + { + fprintf( stderr, "flex: %s\n", msg ); + lexend( 1 ); + } + + +/* lexfatal - report a fatal error message and terminate + * + * synopsis + * char msg[]; + * lexfatal( msg ); + */ + +lexfatal( msg ) +char msg[]; + + { + fprintf( stderr, "flex: fatal internal error %s\n", msg ); + lexend( 1 ); + } + + +/* line_directive_out - spit out a "# line" statement */ + +line_directive_out() + + { + if ( infilename ) + printf( "# line %d \"%s\"\n", linenum, infilename ); + } + + +/* mk2data - generate a data statement for a two-dimensional array + * + * synopsis + * char name; + * int row, column, value; + * mk2data( name, row, column, value ); + * + * generates a data statement initializing "name(row, column)" to "value" + * Note that name is only a character; NOT a string. If we're generating + * FTL (-f flag), "name", "row", and "column" get ignored. + */ +mk2data( name, row, column, value ) +char name; +int row, column, value; + + { + int datalen; + static char dindent[] = DATAINDENTSTR; + + if ( genftl ) + { + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); + + else + putchar( ',' ); + + ++datapos; + + printf( "%5d", value ); + } + + else + { + /* figure out length of data statement to be written. 7 is the constant + * overhead of a one character name, '(', ',', and ')' to delimit + * the array reference, a '/' and a '/' to delimit the value, and + * room for a blank or a comma between this data statement and the + * previous one + */ + + datalen = 7 + numdigs( row ) + numdigs( column ) + numdigs( value ); + + if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 ) + { + if ( datapos != 0 ) + dataflush(); + + /* precede data statement with '%' so rat4 preprocessor doesn't have + * to bother looking at it -- speed hack + */ + printf( "%%%sdata ", dindent ); + + /* 4 is the constant overhead of writing out the word "DATA" */ + datapos = DATAINDENTWIDTH + 4 + datalen; + } + + else + { + putchar( ',' ); + datapos = datapos + datalen; + } + + printf( "%c(%d,%d)/%d/", name, row, column, value ); + } + } + + +/* mkdata - generate a data statement + * + * synopsis + * char name; + * int arrayelm, value; + * mkdata( name, arrayelm, value ); + * + * generates a data statement initializing "name(arrayelm)" to "value" + * Note that name is only a character; NOT a string. If we're generating + * FTL (-f flag), "name" and "arrayelm" get ignored. + */ +mkdata( name, arrayelm, value ) +char name; +int arrayelm, value; + + { + int datalen; + static char dindent[] = DATAINDENTSTR; + + if ( genftl ) + { + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); + + else + putchar( ',' ); + + ++datapos; + + printf( "%5d", value ); + } + + else + { + /* figure out length of data statement to be written. 6 is the constant + * overhead of a one character name, '(' and ')' to delimit the array + * reference, a '/' and a '/' to delimit the value, and room for a + * blank or a comma between this data statement and the previous one + */ + + datalen = 6 + numdigs( arrayelm ) + numdigs( value ); + + if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 ) + { + if ( datapos != 0 ) + dataflush(); + + /* precede data statement with '%' so rat4 preprocessor doesn't have + * to bother looking at it -- speed hack + */ + printf( "%%%sdata ", dindent ); + + /* 4 is the constant overhead of writing out the word "DATA" */ + datapos = DATAINDENTWIDTH + 4 + datalen; + } + + else + { + putchar( ',' ); + datapos = datapos + datalen; + } + + printf( "%c(%d)/%d/", name, arrayelm, value ); + } + } + + +/* myctoi - return the integer represented by a string of digits + * + * synopsis + * char array[]; + * int val, myctoi(); + * val = myctoi( array ); + * + */ + +int myctoi( array ) +char array[]; + + { + int val = 0; + + (void) sscanf( array, "%d", &val ); + + return ( val ); + } + + +/* myesc - return character corresponding to escape sequence + * + * synopsis + * char array[], c, myesc(); + * c = myesc( array ); + * + */ + +char myesc( array ) +char array[]; + + { + switch ( array[1] ) + { + case 'n': return ( '\n' ); + case 't': return ( '\t' ); + case 'f': return ( '\f' ); + case 'r': return ( '\r' ); + case 'b': return ( '\b' ); + + case '0': + if ( isdigit(array[2]) ) + { /* \0<octal> */ + char c, esc_char; + register int sptr = 2; + + while ( isdigit(array[sptr]) ) + /* don't increment inside loop control because the + * macro will expand it to two increments! (Not a + * problem with the C version of the macro) + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = otoi( array + 2 ); + array[sptr] = c; + + if ( esc_char == '\0' ) + { + synerr( "escape sequence for null not allowed" ); + return ( 1 ); + } + + return ( esc_char ); + } + + else + { + synerr( "escape sequence for null not allowed" ); + return ( 1 ); + } + +#ifdef NOTDEF + case '^': + { + register char next_char = array[2]; + + if ( next_char == '?' ) + return ( 0x7f ); + + else if ( next_char >= 'A' && next_char <= 'Z' ) + return ( next_char - 'A' + 1 ); + + else if ( next_char >= 'a' && next_char <= 'z' ) + return ( next_char - 'z' + 1 ); + + synerr( "illegal \\^ escape sequence" ); + + return ( 1 ); + } +#endif + } + + return ( array[1] ); + } + + +/* numdigs - number of digits (includes leading sign) in number + * + * synopsis + * int numdigs, x; + * num = numdigs( x ); + */ +int numdigs( x ) +int x; + + { + if ( x < 0 ) + { + /* the only negative numbers we expect to encounter are very + * small ones + */ + if ( x < -9 ) + lexfatal( "assumption of small negative numbers botched in numdigs()" ); + + return ( 2 ); + } + + if ( x < 10 ) + return ( 1 ); + else if ( x < 100 ) + return ( 2 ); + else if ( x < 1000 ) + return ( 3 ); + else if ( x < 10000 ) + return ( 4 ); + else if ( x < 100000 ) + return ( 5 ); + else + return ( 6 ); + } + + +/* otoi - convert an octal digit string to an integer value + * + * synopsis: + * int val, otoi(); + * char str[]; + * val = otoi( str ); + */ + +int otoi( str ) +char str[]; + + { +#ifdef FTLSOURCE + fortran int gctoi() + int dummy = 1; + + return ( gctoi( str, dummy, 8 ) ); +#else + int result; + + (void) sscanf( str, "%o", &result ); + + return ( result ); +#endif + } + + + + +/* reallocate_array - increase the size of a dynamic array */ + +char *reallocate_array( array, size, element_size ) +char *array; +int size, element_size; + + { + register char *new_array = realloc( array, + (unsigned) (size * element_size )); + + if ( new_array == NULL ) + lexfatal( "attempt to increase array size failed" ); + + return ( new_array ); + } + + +/* skelout - write out one section of the lexskel file + * + * synopsis + * skelout(); + * + * DESCRIPTION + * Copies from skelfile to stdout until a line beginning with "%%" or + * EOF is found. + */ +skelout() + + { + char buf[MAXLINE]; + + while ( fgets( buf, MAXLINE, skelfile ) != NULL ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + fputs( buf, stdout ); + } @@ -0,0 +1,542 @@ +/* lexnfa - NFA construction routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* add_accept - add an accepting state to a machine + * + * synopsis + * + * add_accept( mach, headcnt, trailcnt ); + * + * the global ACCNUM is incremented and the new value becomes mach's + * accepting number. if headcnt or trailcnt is non-zero then the machine + * recognizes a pattern with trailing context. headcnt is the number of + * characters in the matched part of the pattern, or zero if the matched + * part has variable length. trailcnt is the number of trailing context + * characters in the pattern, or zero if the trailing context has variable + * length. + */ +add_accept( mach, headcnt, trailcnt ) +int mach, headcnt, trailcnt; + + { + int astate; + + printf( "case %d:\n", ++accnum ); + + if ( headcnt > 0 || trailcnt > 0 ) + { /* do trailing context magic to not match the trailing characters */ + printf( "YYDOBEFORESCAN; /* undo effects of setting up yytext */\n" ); + + if ( headcnt > 0 ) + { + if ( ! genftl || headcnt > 1 ) + printf( "yycbufp = yybbufp + %d;\n", + genftl ? headcnt - 1 : headcnt ); + else + printf( "yycbufp = yybbufp;\n" ); + } + + else + printf( "yycbufp -= %d;\n", trailcnt ); + + printf( "YYDOBEFOREACTION; /* set up yytext again */\n" ); + } + + line_directive_out(); + + /* hang the accepting number off an epsilon state. if it is associated + * with a state that has a non-epsilon out-transition, then the state + * will accept BEFORE it makes that transition, i.e. one character too soon + */ + + if ( transchar[finalst[mach]] == SYM_EPSILON ) + accptnum[finalst[mach]] = accnum; + + else + { + astate = mkstate( SYM_EPSILON ); + accptnum[astate] = accnum; + mach = link_machines( mach, astate ); + } + } + + +/* copysingl - make a given number of copies of a singleton machine + * + * synopsis + * + * newsng = copysingl( singl, num ); + * + * newsng - a new singleton composed of num copies of singl + * singl - a singleton machine + * num - the number of copies of singl to be present in newsng + */ +int copysingl( singl, num ) +int singl, num; + + { + int copy, i; + + copy = mkstate( SYM_EPSILON ); + + for ( i = 1; i <= num; ++i ) + copy = link_machines( copy, dupmachine( singl ) ); + + return ( copy ); + } + + +/* dumpnfa - debugging routine to write out an nfa + * + * synopsis + * int state1; + * dumpnfa( state1 ); + */ +dumpnfa( state1 ) +int state1; + + { + int sym, tsp1, tsp2, anum, ns; + + fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n", + state1 ); + + /* we probably should loop starting at firstst[state1] and going to + * lastst[state1], but they're not maintained properly when we "or" + * all of the rules together. So we use our knowledge that the machine + * starts at state 1 and ends at lastnfa. + */ + + /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ + for ( ns = 1; ns <= lastnfa; ++ns ) + { + fprintf( stderr, "state # %4d\t", ns ); + + sym = transchar[ns]; + tsp1 = trans1[ns]; + tsp2 = trans2[ns]; + anum = accptnum[ns]; + + fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); + + if ( anum != NIL ) + fprintf( stderr, " [%d]", anum ); + + fprintf( stderr, "\n" ); + } + + fprintf( stderr, "********** end of dump\n" ); + } + + +/* dupmachine - make a duplicate of a given machine + * + * synopsis + * + * copy = dupmachine( mach ); + * + * copy - holds duplicate of mach + * mach - machine to be duplicated + * + * note that the copy of mach is NOT an exact duplicate; rather, all the + * transition states values are adjusted so that the copy is self-contained, + * as the original should have been. + * + * also note that the original MUST be contiguous, with its low and high + * states accessible by the arrays firstst and lastst + */ +int dupmachine( mach ) +int mach; + + { + int i, state, init, last = lastst[mach], state_offset; + + for ( i = firstst[mach]; i <= last; ++i ) + { + state = mkstate( transchar[i] ); + + if ( trans1[i] != NO_TRANSITION ) + { + mkxtion( finalst[state], trans1[i] + state - i ); + + if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION ) + mkxtion( finalst[state], trans2[i] + state - i ); + } + + accptnum[state] = accptnum[i]; + } + + state_offset = state - i + 1; + + init = mach + state_offset; + firstst[init] = firstst[mach] + state_offset; + finalst[init] = finalst[mach] + state_offset; + lastst[init] = lastst[mach] + state_offset; + + return ( init ); + } + + +/* link_machines - connect two machines together + * + * synopsis + * + * new = link_machines( first, last ); + * + * new - a machine constructed by connecting first to last + * first - the machine whose successor is to be last + * last - the machine whose predecessor is to be first + * + * note: this routine concatenates the machine first with the machine + * last to produce a machine new which will pattern-match first first + * and then last, and will fail if either of the sub-patterns fails. + * FIRST is set to new by the operation. last is unmolested. + */ +int link_machines( first, last ) +int first, last; + + { + if ( first == NIL ) + return ( last ); + + else if ( last == NIL ) + return ( first ); + + else + { + mkxtion( finalst[first], last ); + finalst[first] = finalst[last]; + lastst[first] = max( lastst[first], lastst[last] ); + firstst[first] = min( firstst[first], firstst[last] ); + + return ( first ); + } + } + + +/* mkbranch - make a machine that branches to two machines + * + * synopsis + * + * branch = mkbranch( first, second ); + * + * branch - a machine which matches either first's pattern or second's + * first, second - machines whose patterns are to be or'ed (the | operator) + * + * note that first and second are NEITHER destroyed by the operation. Also, + * the resulting machine CANNOT be used with any other "mk" operation except + * more mkbranch's. Compare with mkor() + */ +int mkbranch( first, second ) +int first, second; + + { + int eps; + + if ( first == NO_TRANSITION ) + return ( second ); + + else if ( second == NO_TRANSITION ) + return ( first ); + + eps = mkstate( SYM_EPSILON ); + + mkxtion( eps, first ); + mkxtion( eps, second ); + + return ( eps ); + } + + +/* mkclos - convert a machine into a closure + * + * synopsis + * new = mkclos( state ); + * + * new - a new state which matches the closure of "state" + */ +int mkclos( state ) +int state; + + { + return ( mkopt( mkposcl( state ) ) ); + } + + +/* mkopt - make a machine optional + * + * synopsis + * + * new = mkopt( mach ); + * + * new - a machine which optionally matches whatever mach matched + * mach - the machine to make optional + * + * notes: + * 1. mach must be the last machine created + * 2. mach is destroyed by the call + */ +int mkopt( mach ) +int mach; + + { + int eps; + + if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) + { + eps = mkstate( SYM_EPSILON ); + mach = link_machines( mach, eps ); + } + + /* can't skimp on the following if FREE_EPSILON(mach) is true because + * some state interior to "mach" might point back to the beginning + * for a closure + */ + eps = mkstate( SYM_EPSILON ); + mach = link_machines( eps, mach ); + + mkxtion( mach, finalst[mach] ); + + return ( mach ); + } + + +/* mkor - make a machine that matches either one of two machines + * + * synopsis + * + * new = mkor( first, second ); + * + * new - a machine which matches either first's pattern or second's + * first, second - machines whose patterns are to be or'ed (the | operator) + * + * note that first and second are both destroyed by the operation + * the code is rather convoluted because an attempt is made to minimize + * the number of epsilon states needed + */ +int mkor( first, second ) +int first, second; + + { + int eps, orend; + + if ( first == NIL ) + return ( second ); + + else if ( second == NIL ) + return ( first ); + + else + { + /* see comment in mkopt() about why we can't use the first state + * of "first" or "second" if they satisfy "FREE_EPSILON" + */ + eps = mkstate( SYM_EPSILON ); + + first = link_machines( eps, first ); + + mkxtion( first, second ); + + if ( SUPER_FREE_EPSILON(finalst[first]) && + accptnum[finalst[first]] == NIL ) + { + orend = finalst[first]; + mkxtion( finalst[second], orend ); + } + + else if ( SUPER_FREE_EPSILON(finalst[second]) && + accptnum[finalst[second]] == NIL ) + { + orend = finalst[second]; + mkxtion( finalst[first], orend ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + + first = link_machines( first, eps ); + orend = finalst[first]; + + mkxtion( finalst[second], orend ); + } + } + + finalst[first] = orend; + return ( first ); + } + + +/* mkposcl - convert a machine into a positive closure + * + * synopsis + * new = mkposcl( state ); + * + * new - a machine matching the positive closure of "state" + */ +int mkposcl( state ) +int state; + + { + int eps; + + if ( SUPER_FREE_EPSILON(finalst[state]) ) + { + mkxtion( finalst[state], state ); + return ( state ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + mkxtion( eps, state ); + return ( link_machines( state, eps ) ); + } + } + + +/* mkrep - make a replicated machine + * + * synopsis + * new = mkrep( mach, lb, ub ); + * + * new - a machine that matches whatever "mach" matched from "lb" + * number of times to "ub" number of times + * + * note + * if "ub" is INFINITY then "new" matches "lb" or more occurances of "mach" + */ +int mkrep( mach, lb, ub ) +int mach, lb, ub; + + { + int base, tail, copy, i; + + base = copysingl( mach, lb - 1 ); + + if ( ub == INFINITY ) + { + copy = dupmachine( mach ); + mach = link_machines( mach, link_machines( base, mkclos( copy ) ) ); + } + + else + { + tail = mkstate( SYM_EPSILON ); + + for ( i = lb; i < ub; ++i ) + { + copy = dupmachine( mach ); + tail = mkopt( link_machines( copy, tail ) ); + } + + mach = link_machines( mach, link_machines( base, tail ) ); + } + + return ( mach ); + } + + +/* mkstate - create a state with a transition on a given symbol + * + * synopsis + * + * state = mkstate( sym ); + * + * state - a new state matching sym + * sym - the symbol the new state is to have an out-transition on + * + * note that this routine makes new states in ascending order through the + * state array (and increments LASTNFA accordingly). The routine DUPMACHINE + * relies on machines being made in ascending order and that they are + * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge + * that it admittedly is) + */ +int mkstate( sym ) +int sym; + + { + if ( ++lastnfa >= current_mns ) + { + if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) + lerrif( "input rules are too complicated (>= %d NFA states)", + current_mns ); + + ++num_reallocs; + + transchar = reallocate_integer_array( transchar, current_mns ); + trans1 = reallocate_integer_array( trans1, current_mns ); + trans2 = reallocate_integer_array( trans2, current_mns ); + accptnum = reallocate_integer_array( accptnum, current_mns ); + firstst = reallocate_integer_array( firstst, current_mns ); + finalst = reallocate_integer_array( finalst, current_mns ); + lastst = reallocate_integer_array( lastst, current_mns ); + } + + transchar[lastnfa] = sym; + trans1[lastnfa] = NO_TRANSITION; + trans2[lastnfa] = NO_TRANSITION; + accptnum[lastnfa] = NIL; + firstst[lastnfa] = lastnfa; + finalst[lastnfa] = lastnfa; + lastst[lastnfa] = lastnfa; + + /* fix up equivalence classes base on this transition. Note that any + * character which has its own transition gets its own equivalence class. + * Thus only characters which are only in character classes have a chance + * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' + * into two different equivalence classes. "[ab]" puts them in the same + * equivalence class (barring other differences elsewhere in the input). + */ + + if ( sym < 0 ) + { + /* we don't have to update the equivalence classes since that was + * already done when the ccl was created for the first time + */ + } + + else if ( sym == SYM_EPSILON ) + ++numeps; + + else + { + if ( useecs ) + mkechar( sym, nextecm, ecgroup ); + } + + return ( lastnfa ); + } + + +/* mkxtion - make a transition from one state to another + * + * synopsis + * + * mkxtion( statefrom, stateto ); + * + * statefrom - the state from which the transition is to be made + * stateto - the state to which the transition is to be made + */ +mkxtion( statefrom, stateto ) +int statefrom, stateto; + + { + if ( trans1[statefrom] == NO_TRANSITION ) + trans1[statefrom] = stateto; + + else if ( (transchar[statefrom] != SYM_EPSILON) || + (trans2[statefrom] != NO_TRANSITION) ) + lexfatal( "found too many transitions in mkxtion()" ); + + else + { /* second out-transition for an epsilon state */ + ++eps2; + trans2[statefrom] = stateto; + } + } @@ -0,0 +1,473 @@ +/* lexparse.y - parser for flex input */ + +/* + * Copyright (c) University of California, 1987 + */ + +%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL + +%{ +#include "flexdef.h" + +int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; +int trlcontxt, xcluflg, cclsorted, varlength; +char clower(); + +static int madeany = false; /* whether we've made the '.' character class */ + +%} + +%% +goal : initlex sect1 sect1end sect2 + ; + +initlex : + { + /* initialize for processing rules */ + + /* create default DFA start condition */ + scinstal( "0", false ); + } + ; + +sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' + | + | error '\n' + { synerr( "unknown error processing section 1" ); } + ; + +sect1end : SECTEND + ; + +startconddecl : SCDECL + { + /* these productions are separate from the s1object + * rule because the semantics must be done before + * we parse the remainder of an s1object + */ + + xcluflg = false; + } + + | XSCDECL + { xcluflg = true; } + ; + +namelist1 : namelist1 WHITESPACE NAME + { scinstal( nmstr, xcluflg ); } + + | NAME + { scinstal( nmstr, xcluflg ); } + + | error + { synerr( "bad start condition list" ); } + ; + +sect2 : sect2 initforrule lexrule '\n' + | + ; + +initforrule : + { + /* initialize for a parse of one rule */ + trlcontxt = varlength = false; + trailcnt = headcnt = rulelen = 0; + } + ; + +lexrule : scon '^' re eol + { + pat = link_machines( $3, $4 ); + add_accept( pat, headcnt, trailcnt ); + + for ( i = 1; i <= actvp; ++i ) + scbol[actvsc[i]] = mkbranch( scbol[actvsc[i]], pat ); + } + + | scon re eol + { + pat = link_machines( $2, $3 ); + add_accept( pat, headcnt, trailcnt ); + + for ( i = 1; i <= actvp; ++i ) + scset[actvsc[i]] = mkbranch( scset[actvsc[i]], pat ); + } + + | '^' re eol + { + pat = link_machines( $2, $3 ); + add_accept( pat, headcnt, trailcnt ); + + /* add to all non-exclusive start conditions, + * including the default (0) start condition + */ + + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scbol[i] = mkbranch( scbol[i], pat ); + } + + | re eol + { + pat = link_machines( $1, $2 ); + add_accept( pat, headcnt, trailcnt ); + + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scset[i] = mkbranch( scset[i], pat ); + } + + | error + { synerr( "unrecognized rule" ); } + ; + +scon : '<' namelist2 '>' + ; + +namelist2 : namelist2 ',' NAME + { + if ( (scnum = sclookup( nmstr )) == 0 ) + synerr( "undeclared start condition" ); + + else + actvsc[++actvp] = scnum; + } + + | NAME + { + if ( (scnum = sclookup( nmstr )) == 0 ) + synerr( "undeclared start condition" ); + else + actvsc[actvp = 1] = scnum; + } + + | error + { synerr( "bad start condition list" ); } + ; + +eol : '$' + { + if ( trlcontxt ) + { + synerr( "trailing context used twice" ); + $$ = mkstate( SYM_EPSILON ); + } + else + { + trlcontxt = true; + + if ( ! varlength ) + headcnt = rulelen; + + ++rulelen; + trailcnt = 1; + + eps = mkstate( SYM_EPSILON ); + $$ = link_machines( eps, mkstate( '\n' ) ); + } + } + + | + { + $$ = mkstate( SYM_EPSILON ); + + if ( trlcontxt ) + { + if ( varlength && headcnt == 0 ) + /* both head and trail are variable-length */ + synerr( "illegal trailing context" ); + + else + trailcnt = rulelen; + } + } + ; + +re : re '|' series + { + varlength = true; + + $$ = mkor( $1, $3 ); + } + + | re2 series + { $$ = link_machines( $1, $2 ); } + + | series + { $$ = $1; } + ; + + +re2 : re '/' + { + /* this rule is separate from the others for "re" so + * that the reduction will occur before the trailing + * series is parsed + */ + + if ( trlcontxt ) + synerr( "trailing context used twice" ); + else + trlcontxt = true; + + if ( varlength ) + /* the trailing context had better be fixed-length */ + varlength = false; + else + headcnt = rulelen; + + rulelen = 0; + $$ = $1; + } + ; + +series : series singleton + { + /* this is where concatenation of adjacent patterns + * gets done + */ + $$ = link_machines( $1, $2 ); + } + + | singleton + { $$ = $1; } + ; + +singleton : singleton '*' + { + varlength = true; + + $$ = mkclos( $1 ); + } + + | singleton '+' + { + varlength = true; + + $$ = mkposcl( $1 ); + } + + | singleton '?' + { + varlength = true; + + $$ = mkopt( $1 ); + } + + | singleton '{' NUMBER ',' NUMBER '}' + { + varlength = true; + + if ( $3 > $5 || $3 <= 0 ) + { + synerr( "bad iteration values" ); + $$ = $1; + } + else + $$ = mkrep( $1, $3, $5 ); + } + + | singleton '{' NUMBER ',' '}' + { + varlength = true; + + if ( $3 <= 0 ) + { + synerr( "iteration value must be positive" ); + $$ = $1; + } + + else + $$ = mkrep( $1, $3, INFINITY ); + } + + | singleton '{' NUMBER '}' + { + rulelen = rulelen + $3; + + if ( $3 <= 0 ) + { + synerr( "iteration value must be positive" ); + $$ = $1; + } + + else + $$ = link_machines( $1, copysingl( $1, $3 - 1 ) ); + } + + | '.' + { + if ( ! madeany ) + { + /* create the '.' character class */ + anyccl = cclinit(); + ccladd( anyccl, '\n' ); + cclnegate( anyccl ); + + if ( useecs ) + mkeccl( ccltbl + cclmap[anyccl], + ccllen[anyccl], nextecm, + ecgroup, CSIZE ); + + madeany = true; + } + + ++rulelen; + + $$ = mkstate( -anyccl ); + } + + | fullccl + { + if ( ! cclsorted ) + /* sort characters for fast searching. We use a + * shell sort since this list could be large. + */ + cshell( ccltbl + cclmap[$1], ccllen[$1] ); + + if ( useecs ) + mkeccl( ccltbl + cclmap[$1], ccllen[$1], + nextecm, ecgroup, CSIZE ); + + ++rulelen; + + $$ = mkstate( -$1 ); + } + + | PREVCCL + { + ++rulelen; + + $$ = mkstate( -$1 ); + } + + | '"' string '"' + { $$ = $2; } + + | '(' re ')' + { $$ = $2; } + + | CHAR + { + ++rulelen; + + if ( $1 == '\0' ) + synerr( "null in rule" ); + + if ( caseins && $1 >= 'A' && $1 <= 'Z' ) + $1 = clower( $1 ); + + $$ = mkstate( $1 ); + } + ; + +fullccl : '[' ccl ']' + { $$ = $2; } + + | '[' '^' ccl ']' + { + /* *Sigh* - to be compatible Unix lex, negated ccls + * match newlines + */ +#ifdef NOTDEF + ccladd( $3, '\n' ); /* negated ccls don't match '\n' */ + cclsorted = false; /* because we added the newline */ +#endif + cclnegate( $3 ); + $$ = $3; + } + ; + +ccl : ccl CHAR '-' CHAR + { + if ( $2 > $4 ) + synerr( "negative range in character class" ); + + else + { + if ( caseins ) + { + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + if ( $4 >= 'A' && $4 <= 'Z' ) + $4 = clower( $4 ); + } + + for ( i = $2; i <= $4; ++i ) + ccladd( $1, i ); + + /* keep track if this ccl is staying in alphabetical + * order + */ + cclsorted = cclsorted && ($2 > lastchar); + lastchar = $4; + } + + $$ = $1; + } + + | ccl CHAR + { + if ( caseins ) + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + + ccladd( $1, $2 ); + cclsorted = cclsorted && ($2 > lastchar); + lastchar = $2; + $$ = $1; + } + + | + { + cclsorted = true; + lastchar = 0; + $$ = cclinit(); + } + ; + +string : string CHAR + { + if ( caseins ) + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + + ++rulelen; + + $$ = link_machines( $1, mkstate( $2 ) ); + } + + | + { $$ = mkstate( SYM_EPSILON ); } + ; + +%% + +/* synerr - report a syntax error + * + * synopsis + * char str[]; + * synerr( str ); + */ + +synerr( str ) +char str[]; + + { + syntaxerror = true; + fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str ); + } + + +/* yyerror - eat up an error message from the parser + * + * synopsis + * char msg[]; + * yyerror( msg ); + */ + +yyerror( msg ) +char msg[]; + + { + } @@ -0,0 +1,370 @@ +/* flexscan.l - scanner for flex input */ + +/* + * Copyright (c) University of California, 1987 + */ + +%{ +#include "flexdef.h" +#include "strings.h" +#include "y.tab.h" + +#undef YYDECL +#define YYDECL \ + int lexscan() + +#undef yywrap +#define yywrap(result) \ + { \ + if ( ! did_second_skelout ) \ + skelout(); \ + result = 1; \ + } + +#define RETURNCHAR \ + yylval = yytext[0]; \ + return ( CHAR ); + +#define RETURNNAME \ + (void) strcpy( nmstr, yytext ); \ + return ( NAME ); + +#define PUT_BACK_STRING(str, start) \ + for ( i = strlen( str ) - 1; i >= start; --i ) \ + unput(str[i]) +%} + +%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE +%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT +%x ACTION_STRING PERCENT_BRACE_ACTION + +WS [ \t]+ + +OPTWS [ \t]* + +NAME [a-z_][a-z_0-9]* + +SCNAME {NAME} + +ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) + +%% + static int bracelevel; + int i, cclval; + char nmdef[MAXLINE], myesc(); + static int didadef, did_second_skelout = false; + +^{WS}.*\n ++linenum; ECHO; /* indented code */ +^#.*\n ++linenum; ECHO; /* either a Ratfor comment or a CPP directive */ +^"/*" ECHO; BEGIN(C_COMMENT); +^"%s"(tart)? return ( SCDECL ); +^"%x" return ( XSCDECL ); +^"%{".*\n ++linenum; line_directive_out(); BEGIN(CODEBLOCK); +{WS} return ( WHITESPACE ); + +^"%%".* { + sectnum = 2; + skelout(); + line_directive_out(); + BEGIN(SECT2PROLOG); + return ( SECTEND ); + } + +^"%"[^sx{%].*\n { + fprintf( stderr, + "old-style lex command at line %d ignored:\n\t%s", + linenum, yytext ); + ++linenum; + } + +^{NAME} { + (void) strcpy( nmstr, yytext ); + didadef = false; + BEGIN(PICKUPDEF); + } + +{SCNAME} RETURNNAME; +^{OPTWS}\n ++linenum; /* allows blank lines in section 1 */ +\n ++linenum; return ( '\n' ); +. synerr( "illegal character" ); BEGIN(RECOVER); + + +<C_COMMENT>"*/" ECHO; BEGIN(0); +<C_COMMENT>"*/".*\n ++linenum; ECHO; BEGIN(0); +<C_COMMENT>[^*\n]+ ECHO; +<C_COMMENT>"*" ECHO; +<C_COMMENT>\n ++linenum; ECHO; + +<CODEBLOCK>^"%}".*\n ++linenum; BEGIN(0); +<CODEBLOCK>.*\n ++linenum; ECHO; + +<PICKUPDEF>{WS} /* separates name and definition */ + +<PICKUPDEF>[^ \t\n].* { + (void) strcpy( nmdef, yytext ); + + for ( i = strlen( nmdef ) - 1; + i >= 0 && + nmdef[i] == ' ' || nmdef[i] == '\t'; + --i ) + ; + + nmdef[i + 1] = '\0'; + + ndinstal( nmstr, nmdef ); + didadef = true; + } + +<PICKUPDEF>\n { + if ( ! didadef ) + synerr( "incomplete name definition" ); + BEGIN(0); + ++linenum; + } + +<RECOVER>.*\n ++linenum; RETURNNAME; + + +<SECT2PROLOG>.*\n/[^ \t\n] { + ++linenum; + ECHO; + skelout(); + did_second_skelout = true; + BEGIN(SECT2); + } + +<SECT2PROLOG>.*\n ++linenum; ECHO; + +<SECT2>^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ +<SECT2>^{WS}.*\n { + synerr( "indented code found outside of action" ); + ++linenum; + } +<SECT2>"<" BEGIN(SC); return ( '<' ); +<SECT2>^"^" return ( '^' ); +<SECT2>\" BEGIN(QUOTE); return ( '"' ); +<SECT2>"{"/[0-9] BEGIN(NUM); return ( '{' ); +<SECT2>"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR); +<SECT2>"$"/[ \t\n] return ( '$' ); + +<SECT2>{WS}"%{" { + bracelevel = 1; + BEGIN(PERCENT_BRACE_ACTION); + return ( '\n' ); + } +<SECT2>{WS}"|".*\n ++linenum; return ( '\n' ); + +<SECT2>{WS} | +<SECT2>{OPTWS}/\n { + bracelevel = 0; + BEGIN(ACTION); + return ( '\n' ); + } + +<SECT2>^{OPTWS}\n ++linenum; return ( '\n' ); + +<SECT2>^"%%".* { + /* guarentee that the SECT3 rule will have something + * to match + */ + yyless(1); + sectnum = 3; + BEGIN(SECT3); + return ( EOF ); /* to stop the parser */ + } + +<SECT2>"["([^\\\]\n]|{ESCSEQ})+"]" { + (void) strcpy( nmstr, yytext ); + + /* check to see if we've already encountered this ccl */ + if ( (cclval = ccllookup( nmstr )) ) + { + yylval = cclval; + ++cclreuse; + return ( PREVCCL ); + } + else + { + /* we fudge a bit. We know that this ccl will + * soon be numbered as lastccl + 1 by cclinit + */ + cclinstal( nmstr, lastccl + 1 ); + + /* push back everything but the leading bracket + * so the ccl can be rescanned + */ + PUT_BACK_STRING(nmstr, 1); + + BEGIN(FIRSTCCL); + return ( '[' ); + } + } + +<SECT2>"{"{NAME}"}" { + register char *nmdefptr; + char *ndlookup(); + + (void) strcpy( nmstr, yytext ); + nmstr[yyleng - 1] = '\0'; /* chop trailing brace */ + + /* lookup from "nmstr + 1" to chop leading brace */ + if ( ! (nmdefptr = ndlookup( nmstr + 1 )) ) + synerr( "undefined {name}" ); + + else + { /* push back name surrounded by ()'s */ + unput(')'); + PUT_BACK_STRING(nmdefptr, 0); + unput('('); + } + } + +<SECT2>[/|*+?.()] return ( yytext[0] ); +<SECT2>. RETURNCHAR; +<SECT2>\n ++linenum; return ( '\n' ); + + +<SC>"," return ( ',' ); +<SC>">" BEGIN(SECT2); return ( '>' ); +<SC>">"/"^" BEGIN(CARETISBOL); return ( '>' ); +<SC>{SCNAME} RETURNNAME; +<SC>. synerr( "bad start condition name" ); + +<CARETISBOL>"^" BEGIN(SECT2); return ( '^' ); + + +<QUOTE>[^"\n] RETURNCHAR; +<QUOTE>\" BEGIN(SECT2); return ( '"' ); + +<QUOTE>\n { + synerr( "missing quote" ); + BEGIN(SECT2); + ++linenum; + return ( '"' ); + } + + +<FIRSTCCL>"^"/[^-\n] BEGIN(CCL); return ( '^' ); +<FIRSTCCL>"^"/- return ( '^' ); +<FIRSTCCL>- BEGIN(CCL); yylval = '-'; return ( CHAR ); +<FIRSTCCL>. BEGIN(CCL); RETURNCHAR; + +<CCL>-/[^\]\n] return ( '-' ); +<CCL>[^\]\n] RETURNCHAR; +<CCL>"]" BEGIN(SECT2); return ( ']' ); + + +<NUM>[0-9]+ { + yylval = myctoi( yytext ); + return ( NUMBER ); + } + +<NUM>"," return ( ',' ); +<NUM>"}" BEGIN(SECT2); return ( '}' ); + +<NUM>. { + synerr( "bad character inside {}'s" ); + BEGIN(SECT2); + return ( '}' ); + } + +<NUM>\n { + synerr( "missing }" ); + BEGIN(SECT2); + ++linenum; + return ( '}' ); + } + + +<BRACEERROR>"}" synerr( "bad name in {}'s" ); BEGIN(SECT2); +<BRACEERROR>\n synerr( "missing }" ); ++linenum; BEGIN(SECT2); + + +<PERCENT_BRACE_ACTION>{OPTWS}"%}".* bracelevel = 0; +<PERCENT_BRACE_ACTION>.* ECHO; +<PERCENT_BRACE_ACTION>\n { + ++linenum; + ECHO; + if ( bracelevel == 0 ) + { + if ( genftl ) + puts( "\tbreak;" ); + BEGIN(SECT2); + } + } + +<ACTION>"{" ECHO; ++bracelevel; +<ACTION>"}" ECHO; --bracelevel; +<ACTION>[^{}"'/\n]+ ECHO; +<ACTION>"/*" ECHO; BEGIN(ACTION_COMMENT); +<ACTION>"'"([^'\\\n]|\\.)*"'" ECHO; /* character constant */ +<ACTION>\" ECHO; BEGIN(ACTION_STRING); +<ACTION>\n { + ++linenum; + ECHO; + if ( bracelevel == 0 ) + { + if ( genftl ) + puts( "\tbreak;" ); + BEGIN(SECT2); + } + } +<ACTION>. ECHO; + +<ACTION_COMMENT>"*/" ECHO; BEGIN(ACTION); +<ACTION_COMMENT>[^*\n]+ ECHO; +<ACTION_COMMENT>"*" ECHO; +<ACTION_COMMENT>\n ++linenum; ECHO; +<ACTION_COMMENT>. ECHO; + +<ACTION_STRING>[^"\\\n]+ ECHO; +<ACTION_STRING>\\. ECHO; +<ACTION_STRING>\n ++linenum; ECHO; +<ACTION_STRING>\" ECHO; BEGIN(ACTION); +<ACTION_STRING>. ECHO; + + +<SECT2,QUOTE,CCL>{ESCSEQ} { + yylval = myesc( yytext ); + return ( CHAR ); + } + +<FIRSTCCL>{ESCSEQ} { + yylval = myesc( yytext ); + BEGIN(CCL); + return ( CHAR ); + } + + +<SECT3>.|\n { + register int numchars; + + /* black magic - we know the names of a lex scanner's + * internal variables. We cap the input buffer with + * an end-of-string and dump it to the output. + */ + YYDOBEFORESCAN; /* recover from setting up yytext */ + + yychbuf[yyebufp + 1] = '\0'; + + /* ignore the first character; it's the second '%' + * put back by the yyless(1) above + */ + fputs( yychbuf + yycbufp + 1, stdout ); + + /* if we don't do this, the data written by write() + * can get overwritten when stdout is finally flushed + */ + (void) fflush( stdout ); + + while ( (numchars = read( fileno(yyin), yychbuf, + YYBUFMAX )) > 0 ) + (void) write( fileno(stdout), yychbuf, numchars ); + + if ( numchars < 0 ) + lexerror( "fatal read error in section 3" ); + + return ( EOF ); + } + +%% @@ -0,0 +1,291 @@ +/* lexsym - symbol table routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; +struct hash_entry *sctbl[START_COND_HASH_SIZE]; +struct hash_entry *ccltab[CCL_HASH_SIZE]; + + +/* addsym - add symbol and definition to symbol table + * + * synopsis + * char sym[], def[]; + * hash_table table; + * int table_size; + * -1/0 = addsym( sym, def, table, table_size ); + * + * -1 is returned if the symbol already exists, and the change not made. + */ + +int addsym( sym, def, table, table_size ) +register char sym[]; +char def[]; +hash_table table; +int table_size; + + { + int hash_val = hashfunct( sym, table_size ); + register struct hash_entry *entry = table[hash_val]; + register struct hash_entry *new_entry; + register struct hash_entry *successor; + char *malloc(); + + while ( entry ) + { + if ( ! strcmp( sym, entry->name ) ) + { /* entry already exists */ + return ( -1 ); + } + + entry = entry->next; + } + + /* create new entry */ + new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); + + if ( new_entry == NULL ) + lexfatal( "symbol table memory allocation failed" ); + + if ( (successor = table[hash_val]) ) + { + new_entry->next = successor; + successor->prev = new_entry; + } + else + new_entry->next = NULL; + + new_entry->prev = NULL; + new_entry->name = sym; + new_entry->val = def; + + table[hash_val] = new_entry; + + return ( 0 ); + } + + +/* cclinstal - save the text of a character class + * + * synopsis + * char ccltxt[]; + * int cclnum; + * cclinstal( ccltxt, cclnum ); + */ +cclinstal( ccltxt, cclnum ) +char ccltxt[]; +int cclnum; + + { + /* we don't bother checking the return status because we are not called + * unless the symbol is new + */ + char *copy_string(); + + (void) addsym( copy_string( ccltxt ), (char *) cclnum, + ccltab, CCL_HASH_SIZE ); + } + + +/* ccllookup - lookup the number associated with character class text + * + * synopsis + * char ccltxt[]; + * int ccllookup, cclval; + * cclval/0 = ccllookup( ccltxt ); + */ +int ccllookup( ccltxt ) +char ccltxt[]; + + { + char *getdef(); + + return ( (int) getdef( ccltxt, ccltab, CCL_HASH_SIZE ) ); + } + + +/* findsym - find symbol in symbol table + * + * synopsis + * char sym[]; + * hash_table table; + * int table_size; + * struct hash_entry *entry, *findsym(); + * entry = findsym( sym, table, table_size ); + */ + +struct hash_entry *findsym( sym, table, table_size ) +register char sym[]; +hash_table table; +int table_size; + + { + register struct hash_entry *entry = table[hashfunct( sym, table_size )]; + + while ( entry ) + { + if ( ! strcmp( sym, entry->name ) ) + return ( entry ); + entry = entry->next; + } + + return ( NULL ); + } + + +/* getdef - get symbol definition from symbol table + * + * synopsis + * char sym[]; + * hash_table table; + * int table_size; + * char *def, *getdef(); + * def = getdef( sym, table, table_size ); + */ + +char *getdef( sym, table, table_size ) +register char sym[]; +hash_table table; +int table_size; + + { + register struct hash_entry *entry = findsym( sym, table, table_size ); + + if ( entry ) + return ( entry->val ); + + return ( NULL ); + } + + +/* hashfunct - compute the hash value for "str" and hash size "hash_size" + * + * synopsis + * char str[]; + * int hash_size, hash_val; + * hash_val = hashfunct( str, hash_size ); + */ + +int hashfunct( str, hash_size ) +register char str[]; +int hash_size; + + { + register int hashval; + register int locstr; + + hashval = 0; + locstr = 0; + + while ( str[locstr] ) + hashval = ((hashval << 1) + str[locstr++]) % hash_size; + + return ( hashval ); + } + + +/* ndinstal - install a name definition + * + * synopsis + * char nd[], def[]; + * ndinstal( nd, def ); + */ +ndinstal( nd, def ) +char nd[], def[]; + + { + char *copy_string(); + + if ( addsym( copy_string( nd ), copy_string( def ), + ndtbl, NAME_TABLE_HASH_SIZE ) ) + synerr( "name defined twice" ); + } + + +/* ndlookup - lookup a name definition + * + * synopsis + * char nd[], *def; + * char *ndlookup(); + * def/NULL = ndlookup( nd ); + */ +char *ndlookup( nd ) +char nd[]; + + { + char *getdef(); + + return ( getdef( nd, ndtbl, NAME_TABLE_HASH_SIZE ) ); + } + + +/* scinstal - make a start condition + * + * synopsis + * char str[]; + * int xcluflg; + * scinstal( str, xcluflg ); + * + * NOTE + * the start condition is Exclusive if xcluflg is true + */ +scinstal( str, xcluflg ) +char str[]; +int xcluflg; + + { + char *copy_string(); + + if ( genftl ) + { + /* bit of a hack. We know how the default start-condition is + * declared, and don't put out a define for it, because it + * would come out as "#define 0 1" + */ + + if ( strcmp( str, "0" ) ) + printf( "#define %s %d\n", str, lastsc * 2 ); + } + + else + printf( "define(YYLEX_SC_%s,%d)\n", str, lastsc * 2 ); + + if ( ++lastsc >= current_max_scs ) + { + current_max_scs += MAX_SCS_INCREMENT; + + ++num_reallocs; + + scset = reallocate_integer_array( scset, current_max_scs ); + scbol = reallocate_integer_array( scbol, current_max_scs ); + scxclu = reallocate_integer_array( scxclu, current_max_scs ); + actvsc = reallocate_integer_array( actvsc, current_max_scs ); + } + + if ( addsym( copy_string( str ), (char *) lastsc, + sctbl, START_COND_HASH_SIZE ) ) + lerrsf( "start condition %s declared twice", str ); + + scset[lastsc] = mkstate( SYM_EPSILON ); + scbol[lastsc] = mkstate( SYM_EPSILON ); + scxclu[lastsc] = xcluflg; + } + + +/* sclookup - lookup the number associated with a start condition + * + * synopsis + * char str[], scnum; + * int sclookup; + * scnum/0 = sclookup( str ); + */ +int sclookup( str ) +char str[]; + + { + return ( (int) getdef( str, sctbl, START_COND_HASH_SIZE ) ); + } diff --git a/tblcmp.c b/tblcmp.c new file mode 100644 index 0000000..ae9bfd7 --- /dev/null +++ b/tblcmp.c @@ -0,0 +1,1324 @@ +/* lexcmp - table compression routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* bldtbl - build table entries for dfa state + * + * synopsis + * int state[numecs], statenum, totaltrans, comstate, comfreq; + * bldtbl( state, statenum, totaltrans, comstate, comfreq ); + * + * State is the statenum'th dfa state. It is indexed by equivalence class and + * gives the number of the state to enter for a given equivalence class. + * totaltrans is the total number of transitions out of the state. Comstate + * is that state which is the destination of the most transitions out of State. + * Comfreq is how many transitions there are out of State to Comstate. + * + * A note on terminology: + * "protos" are transition tables which have a high probability of + * either being redundant (a state processed later will have an identical + * transition table) or nearly redundant (a state processed later will have + * many of the same out-transitions). A "most recently used" queue of + * protos is kept around with the hope that most states will find a proto + * which is similar enough to be usable, and therefore compacting the + * output tables. + * "templates" are a special type of proto. If a transition table is + * homogenous or nearly homogenous (all transitions go to the same destination) + * then the odds are good that future states will also go to the same destination + * state on basically the same character set. These homogenous states are + * so common when dealing with large rule sets that they merit special + * attention. If the transition table were simply made into a proto, then + * (typically) each subsequent, similar state will differ from the proto + * for two out-transitions. One of these out-transitions will be that + * character on which the proto does not go to the common destination, + * and one will be that character on which the state does not go to the + * common destination. Templates, on the other hand, go to the common + * state on EVERY transition character, and therefore cost only one + * difference. + */ +bldtbl( state, statenum, totaltrans, comstate, comfreq ) +int state[], statenum, totaltrans, comstate, comfreq; + + { + int extptr, extrct[2][CSIZE + 1]; + int mindiff, minprot, i, d; + int checkcom; + + /* If extptr is 0 then the first array of extrct holds the result of the + * "best difference" to date, which is those transitions which occur in + * "state" but not in the proto which, to date, has the fewest differences + * between itself and "state". If extptr is 1 then the second array of + * extrct hold the best difference. The two arrays are toggled + * between so that the best difference to date can be kept around and + * also a difference just created by checking against a candidate "best" + * proto. + */ + + extptr = 0; + + /* if the state has too few out-transitions, don't bother trying to + * compact its tables + */ + + if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + + else + { + /* checkcom is true if we should only check "state" against + * protos which have the same "comstate" value + */ + + checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; + + minprot = firstprot; + mindiff = totaltrans; + + if ( checkcom ) + { + /* find first proto which has the same "comstate" */ + for ( i = firstprot; i != NIL; i = protnext[i] ) + if ( protcomst[i] == comstate ) + { + minprot = i; + mindiff = tbldiff( state, minprot, extrct[extptr] ); + break; + } + } + + else + { + /* since we've decided that the most common destination out + * of "state" does not occur with a high enough frequency, + * we set the "comstate" to zero, assuring that if this state + * is entered into the proto list, it will not be considered + * a template. + */ + comstate = 0; + + if ( firstprot != NIL ) + { + minprot = firstprot; + mindiff = tbldiff( state, minprot, extrct[extptr] ); + } + } + + /* we now have the first interesting proto in "minprot". If + * it matches within the tolerances set for the first proto, + * we don't want to bother scanning the rest of the proto list + * to see if we have any other reasonable matches. + */ + + if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) + { /* not a good enough match. Scan the rest of the protos */ + for ( i = minprot; i != NIL; i = protnext[i] ) + { + d = tbldiff( state, i, extrct[1 - extptr] ); + if ( d < mindiff ) + { + extptr = 1 - extptr; + mindiff = d; + minprot = i; + } + } + } + + /* check if the proto we've decided on as our best bet is close + * enough to the state we want to match to be usable + */ + + if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) + { + /* no good. If the state is homogeneous enough, we make a + * template out of it. Otherwise, we make a proto. + */ + + if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE ) + mktemplate( state, statenum, comstate ); + + else + { + mkprot( state, statenum, comstate ); + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + } + } + + else + { /* use the proto */ + mkentry( extrct[extptr], numecs, statenum, + prottbl[minprot], mindiff ); + + /* if this state was sufficiently different from the proto + * we built it from, make it, too, a proto + */ + + if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) + mkprot( state, statenum, comstate ); + + /* since mkprot added a new proto to the proto queue, it's possible + * that "minprot" is no longer on the proto queue (if it happened + * to have been the last entry, it would have been bumped off). + * If it's not there, then the new proto took its physical place + * (though logically the new proto is at the beginning of the + * queue), so in that case the following call will do nothing. + */ + + mv2front( minprot ); + } + } + } + + +/* cmptmps - compress template table entries + * + * synopsis + * cmptmps(); + * + * template tables are compressed by using the 'template equivalence + * classes', which are collections of transition character equivalence + * classes which always appear together in templates - really meta-equivalence + * classes. until this point, the tables for templates have been stored + * up at the top end of the nxt array; they will now be compressed and have + * table entries made for them. + */ +cmptmps() + + { + int tmpstorage[CSIZE + 1]; + register int *tmp = tmpstorage, i, j; + int totaltrans, trans; + + peakpairs = numtemps * numecs + tblend; + + if ( usemecs ) + { + /* create equivalence classes base on data gathered on template + * transitions + */ + + nummecs = cre8ecs( tecfwd, tecbck, numecs ); + } + + else + nummecs = numecs; + + if ( lastdfa + numtemps + 1 >= current_max_dfas ) + increase_max_dfas(); + + /* loop through each template */ + + for ( i = 1; i <= numtemps; ++i ) + { + totaltrans = 0; /* number of non-jam transitions out of this template */ + + for ( j = 1; j <= numecs; ++j ) + { + trans = tnxt[numecs * i + j]; + + if ( usemecs ) + { + /* the absolute value of tecbck is the meta-equivalence class + * of a given equivalence class, as set up by cre8ecs + */ + if ( tecbck[j] > 0 ) + { + tmp[tecbck[j]] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + else + { + tmp[j] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + /* it is assumed (in a rather subtle way) in the skeleton that + * if we're using meta-equivalence classes, the def[] entry for + * all templates is the jam template, i.e. templates never default + * to other non-jam table entries (e.g. another template) + */ + + /* leave room for the jam-state after the last real state */ + mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); + } + } + + + +/* expand_nxt_chk - expand the next check arrays */ + +expand_nxt_chk() + + { + register int old_max = current_max_xpairs; + + current_max_xpairs += MAX_XPAIRS_INCREMENT; + + ++num_reallocs; + + nxt = reallocate_integer_array( nxt, current_max_xpairs ); + chk = reallocate_integer_array( chk, current_max_xpairs ); + + bzero( (char *) (chk + old_max), + MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); + } + + +/* gentabs - generate data statements for the transition tables + * + * synopsis + * gentabs(); + */ +gentabs() + + { + int i, j, k, numrows, *accset, nacc, *acc_array; + char clower(); + + /* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all FTL arrays + */ + static char ftl_long_decl[] = "static long int %c[%d] =\n { 0,\n"; + static char ftl_short_decl[] = "static short int %c[%d] =\n { 0,\n"; + static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n"; + + acc_array = allocate_integer_array( current_max_dfas ); + nummt = 0; + + if ( fulltbl ) + jambase = lastdfa + 1; /* home of "jam" pseudo-state */ + + printf( "#define YYJAM %d\n", jamstate ); + printf( "#define YYJAMBASE %d\n", jambase ); + + if ( usemecs ) + printf( "#define YYTEMPLATE %d\n", lastdfa + 2 ); + +#ifdef NOTDEF +/* unsupported code */ + if ( ! genftl ) + { /* ratfor scanner */ + static char vardata[] = "%%%sdata %s/%d/\n"; + static char dindent[] = DATAINDENTSTR; + static char arydecl[] = "integer %c(%d)\n"; + static char ary2decl[] = "integer %c(%d,%d)\n"; + + skelout(); + + if ( reject ) + { + /* write out the pointers into the accepting lists for each state, + * and the accepting lists + */ + + /* alist needs to be lastdfa + 2 because we tell where a state's + * accepting list ends by checking the beginning of the next state, + * and there's an entry in alist for the default, "jam" pseudo-state + * (this latter entry is needed because states jam by making + * a transition to the state; see the flex skeleton. By the way, + * I *think* we could get rid of the jam state entirely by + * slight modification of the skeleton ...) + */ + + printf( arydecl, ALIST, lastdfa + 2 ); + + printf( arydecl, ACCEPT, max( numas, 1 ) ); + } + + else + printf( arydecl, ALIST, lastdfa + 1 ); + + if ( useecs ) + printf( arydecl, ECARRAY, CSIZE ); + if ( usemecs ) + printf( arydecl, MATCHARRAY, numecs ); + + if ( fulltbl ) + { + printf( ary2decl, NEXTARRAY, lastdfa, numecs ); + printf( vardata, dindent, "yyjam", 0 ); + } + + else + { + printf( arydecl, BASEARRAY, lastdfa + numtemps ); + printf( arydecl, DEFARRAY, lastdfa + numtemps ); + printf( arydecl, NEXTARRAY, tblend ); + printf( arydecl, CHECKARRAY, tblend ); + + printf( vardata, dindent, "yyjam", jambase ); + + /* the first template begins right after the default jam table, + * which itself begins right after the last dfa + */ + + printf( vardata, dindent, "yytmp", lastdfa + 2 ); + } + } +#endif NOTDEF + + if ( reject ) + { + /* write out accepting list and pointer list + * first we generate the ACCEPT array. In the process, we compute + * the indices that will go into the ALIST array, and save the + * indices in the dfaacc array + */ + + if ( genftl ) + printf( accnum > 127 ? ftl_short_decl : ftl_char_decl, + ACCEPT, max( numas, 1 ) + 1 ); + + j = 1; /* index into ACCEPT array */ + + for ( i = 1; i <= lastdfa; ++i ) + { + acc_array[i] = j; + + if ( accsiz[i] != 0 ) + { + accset = dfaacc[i]; + nacc = accsiz[i]; + + if ( trace ) + fprintf( stderr, "state # %d accepts: ", i ); + + for ( k = 1; k <= nacc; ++k ) + { + mkdata( ACCEPT, j++, accset[k] ); + + if ( trace ) + { + fprintf( stderr, "[%d]", accset[k] ); + + if ( k < nacc ) + fputs( ", ", stderr ); + else + putc( '\n', stderr ); + } + } + } + } + + /* add accepting number for the "jam" state */ + acc_array[i] = j; + + dataend(); + } + + else + { + for ( i = 1; i <= lastdfa; ++i ) + acc_array[i] = (int) dfaacc[i]; + + acc_array[i] = 0; /* add (null) accepting number for jam state */ + } + + /* spit out ALIST array. If we're doing "reject", it'll be pointers + * into the ACCEPT array. Otherwise it's actual accepting numbers. + * In either case, we just dump the numbers. + */ + + if ( genftl ) + { + /* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays + * beginning at 0 and for "jam" state + */ + k = lastdfa + 2; + + if ( reject ) + /* we put a "cap" on the table associating lists of accepting + * numbers with state numbers. This is needed because we tell + * where the end of an accepting list is by looking at where + * the list for the next state starts. + */ + ++k; + + printf( ((reject && numas > 126) || accnum > 127) ? + ftl_short_decl : ftl_char_decl, ALIST, k ); + } + + for ( i = 1; i <= lastdfa; ++i ) + { + mkdata( ALIST, i, acc_array[i] ); + + if ( ! reject && trace && acc_array[i] ) + fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] ); + } + + /* add entry for "jam" state */ + mkdata( ALIST, i, acc_array[i] ); + + if ( reject ) + /* add "cap" for the list */ + mkdata( ALIST, i + 1, acc_array[i] ); + + dataend(); + + if ( useecs ) + { + /* write out equivalence classes */ + + if ( genftl ) + printf( ftl_char_decl, ECARRAY, CSIZE + 1 ); + + for ( i = 1; i <= CSIZE; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; + + ecgroup[i] = abs( ecgroup[i] ); + mkdata( ECARRAY, i, ecgroup[i] ); + } + + dataend(); + + if ( trace ) + { + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + + numrows = (CSIZE + 1) / 8; + + for ( j = 1; j <= numrows; ++j ) + { + for ( i = j; i <= CSIZE; i = i + numrows ) + { + if ( i >= 1 && i <= 31 ) + fprintf( stderr, "^%c = %-2d", + 'A' + i - 1, ecgroup[i] ); + + else if ( i >= 32 && i <= 126 ) + fprintf( stderr, " %c = %-2d", i, ecgroup[i] ); + + else if ( i == 127 ) + fprintf( stderr, "^@ = %-2d", ecgroup[i] ); + + else + fprintf( stderr, "\nSomething Weird: %d = %d\n", i, + ecgroup[i] ); + + putc( '\t', stderr ); + } + + putc( '\n', stderr ); + } + } + } + + if ( usemecs ) + { + /* write out meta-equivalence classes (used to index templates with) */ + + if ( trace ) + fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); + + if ( genftl ) + printf( ftl_char_decl, MATCHARRAY, numecs + 1 ); + + for ( i = 1; i <= numecs; ++i ) + { + if ( trace ) + fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) ); + + mkdata( MATCHARRAY, i, abs( tecbck[i] ) ); + } + + dataend(); + } + + if ( ! fulltbl ) + { + int total_states = lastdfa + numtemps; + + if ( genftl ) + printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl, + BASEARRAY, total_states + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + register int d = def[i]; + + if ( base[i] == JAMSTATE ) + base[i] = jambase; + + if ( d == JAMSTATE ) + def[i] = jamstate; + + else if ( d < 0 ) + { + /* template reference */ + ++tmpuses; + def[i] = lastdfa - d + 1; + } + + mkdata( BASEARRAY, i, base[i] ); + } + + /* generate jam state's base index */ + mkdata( BASEARRAY, i, base[i] ); + + for ( ++i /* skip jam state */; i <= total_states; ++i ) + { + mkdata( BASEARRAY, i, base[i] ); + def[i] = jamstate; + } + + dataend(); + + if ( genftl ) + printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl, + DEFARRAY, total_states + 1 ); + + for ( i = 1; i <= total_states; ++i ) + mkdata( DEFARRAY, i, def[i] ); + + dataend(); + + if ( genftl ) + printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl, + NEXTARRAY, tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( nxt[i] == 0 ) + nxt[i] = jamstate; /* new state is the JAM state */ + + mkdata( NEXTARRAY, i, nxt[i] ); + } + + dataend(); + + if ( genftl ) + printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl, + CHECKARRAY, tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( chk[i] == 0 ) + ++nummt; + + mkdata( CHECKARRAY, i, chk[i] ); + } + + dataend(); + } + + skelout(); + + /* copy remainder of input to output */ + + line_directive_out(); + (void) lexscan(); /* copy remainder of input to output */ + } + + +/* inittbl - initialize transition tables + * + * synopsis + * inittbl(); + * + * Initializes "firstfree" to be one beyond the end of the table. Initializes + * all "chk" entries to be zero. Note that templates are built in their + * own tbase/tdef tables. They are shifted down to be contiguous + * with the non-template entries during table generation. + */ +inittbl() + + { + register int i; + + bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) ); + + tblend = 0; + firstfree = tblend + 1; + numtemps = 0; + + if ( usemecs ) + { + /* set up doubly-linked meta-equivalence classes + * these are sets of equivalence classes which all have identical + * transitions out of TEMPLATES + */ + + tecbck[1] = NIL; + + for ( i = 2; i <= numecs; ++i ) + { + tecbck[i] = i - 1; + tecfwd[i - 1] = i; + } + + tecfwd[numecs] = NIL; + } + } + + +/* mkdeftbl - make the default, "jam" table entries + * + * synopsis + * mkdeftbl(); + */ +mkdeftbl() + + { + int i; + + jamstate = lastdfa + 1; + + if ( tblend + numecs > current_max_xpairs ) + expand_nxt_chk(); + + for ( i = 1; i <= numecs; ++i ) + { + nxt[tblend + i] = 0; + chk[tblend + i] = jamstate; + } + + jambase = tblend; + + base[jamstate] = jambase; + + /* should generate a run-time array bounds check if + * ever used as a default + */ + def[jamstate] = BAD_SUBSCRIPT; + + tblend += numecs; + ++numtemps; + } + + +/* mkentry - create base/def and nxt/chk entries for transition array + * + * synopsis + * int state[numchars + 1], numchars, statenum, deflink, totaltrans; + * mkentry( state, numchars, statenum, deflink, totaltrans ); + * + * "state" is a transition array "numchars" characters in size, "statenum" + * is the offset to be used into the base/def tables, and "deflink" is the + * entry to put in the "def" table entry. If "deflink" is equal to + * "JAMSTATE", then no attempt will be made to fit zero entries of "state" + * (i.e. jam entries) into the table. It is assumed that by linking to + * "JAMSTATE" they will be taken care of. In any case, entries in "state" + * marking transitions to "SAME_TRANS" are treated as though they will be + * taken care of by whereever "deflink" points. "totaltrans" is the total + * number of transitions out of the state. If it is below a certain threshold, + * the tables are searched for an interior spot that will accomodate the + * state array. + */ +mkentry( state, numchars, statenum, deflink, totaltrans ) +register int *state; +int numchars, statenum, deflink, totaltrans; + + { + register int minec, maxec, i, baseaddr; + int tblbase, tbllast; + + if ( totaltrans == 0 ) + { /* there are no out-transitions */ + if ( deflink == JAMSTATE ) + base[statenum] = JAMSTATE; + else + base[statenum] = 0; + + def[statenum] = deflink; + return; + } + + for ( minec = 1; minec <= numchars; ++minec ) + { + if ( state[minec] != SAME_TRANS ) + if ( state[minec] != 0 || deflink != JAMSTATE ) + break; + } + + if ( totaltrans == 1 ) + { + /* there's only one out-transition. Save it for later to fill + * in holes in the tables. + */ + stack1( statenum, minec, state[minec], deflink ); + return; + } + + for ( maxec = numchars; maxec > 0; --maxec ) + { + if ( state[maxec] != SAME_TRANS ) + if ( state[maxec] != 0 || deflink != JAMSTATE ) + break; + } + + /* Whether we try to fit the state table in the middle of the table + * entries we have already generated, or if we just take the state + * table at the end of the nxt/chk tables, we must make sure that we + * have a valid base address (i.e. non-negative). Note that not only are + * negative base addresses dangerous at run-time (because indexing the + * next array with one and a low-valued character might generate an + * array-out-of-bounds error message), but at compile-time negative + * base addresses denote TEMPLATES. + */ + + /* find the first transition of state that we need to worry about. */ + if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) + { /* attempt to squeeze it into the middle of the tabls */ + baseaddr = firstfree; + + while ( baseaddr < minec ) + { + /* using baseaddr would result in a negative base address below + * find the next free slot + */ + for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) + ; + } + + if ( baseaddr + maxec - minec >= current_max_xpairs ) + expand_nxt_chk(); + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + if ( chk[baseaddr + i - minec] != 0 ) + { /* baseaddr unsuitable - find another */ + for ( ++baseaddr; + baseaddr < current_max_xpairs && + chk[baseaddr] != 0; + ++baseaddr ) + ; + + if ( baseaddr + maxec - minec >= current_max_xpairs ) + expand_nxt_chk(); + + /* reset the loop counter so we'll start all + * over again next time it's incremented + */ + + i = minec - 1; + } + } + + else + { + /* ensure that the base address we eventually generate is + * non-negative + */ + baseaddr = max( tblend + 1, minec ); + } + + tblbase = baseaddr - minec; + tbllast = tblbase + maxec; + + if ( tbllast >= current_max_xpairs ) + expand_nxt_chk(); + + base[statenum] = tblbase; + def[statenum] = deflink; + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + { + nxt[tblbase + i] = state[i]; + chk[tblbase + i] = statenum; + } + + if ( baseaddr == firstfree ) + /* find next free slot in tables */ + for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) + ; + + tblend = max( tblend, tbllast ); + } + + +/* mk1tbl - create table entries for a state (or state fragment) which + * has only one out-transition + * + * synopsis + * int state, sym, onenxt, onedef; + * mk1tbl( state, sym, onenxt, onedef ); + */ +mk1tbl( state, sym, onenxt, onedef ) +int state, sym, onenxt, onedef; + + { + if ( firstfree < sym ) + firstfree = sym; + + while ( chk[firstfree] != 0 ) + if ( ++firstfree >= current_max_xpairs ) + expand_nxt_chk(); + + base[state] = firstfree - sym; + def[state] = onedef; + chk[firstfree] = state; + nxt[firstfree] = onenxt; + + if ( firstfree > tblend ) + { + tblend = firstfree++; + + if ( firstfree >= current_max_xpairs ) + expand_nxt_chk(); + } + } + + +/* mkprot - create new proto entry + * + * synopsis + * int state[], statenum, comstate; + * mkprot( state, statenum, comstate ); + */ +mkprot( state, statenum, comstate ) +int state[], statenum, comstate; + + { + int i, slot, tblbase; + + if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) + { + /* gotta make room for the new proto by dropping last entry in + * the queue + */ + slot = lastprot; + lastprot = protprev[lastprot]; + protnext[lastprot] = NIL; + } + + else + slot = numprots; + + protnext[slot] = firstprot; + + if ( firstprot != NIL ) + protprev[firstprot] = slot; + + firstprot = slot; + prottbl[slot] = statenum; + protcomst[slot] = comstate; + + /* copy state into save area so it can be compared with rapidly */ + tblbase = numecs * (slot - 1); + + for ( i = 1; i <= numecs; ++i ) + protsave[tblbase + i] = state[i]; + } + + +/* mktemplate - create a template entry based on a state, and connect the state + * to it + * + * synopsis + * int state[], statenum, comstate, totaltrans; + * mktemplate( state, statenum, comstate, totaltrans ); + */ +mktemplate( state, statenum, comstate ) +int state[], statenum, comstate; + + { + int i, numdiff, tmpbase, tmp[CSIZE + 1]; + char transset[CSIZE + 1]; + int tsptr; + + ++numtemps; + + tsptr = 0; + + /* calculate where we will temporarily store the transition table + * of the template in the tnxt[] array. The final transition table + * gets created by cmptmps() + */ + + tmpbase = numtemps * numecs; + + if ( tmpbase + numecs >= current_max_template_xpairs ) + { + current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; + + ++num_reallocs; + + tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs ); + } + + for ( i = 1; i <= numecs; ++i ) + if ( state[i] == 0 ) + tnxt[tmpbase + i] = 0; + else + { + transset[tsptr++] = i; + tnxt[tmpbase + i] = comstate; + } + + if ( usemecs ) + mkeccl( transset, tsptr, tecfwd, tecbck, numecs ); + + mkprot( tnxt + tmpbase, -numtemps, comstate ); + + /* we rely on the fact that mkprot adds things to the beginning + * of the proto queue + */ + + numdiff = tbldiff( state, firstprot, tmp ); + mkentry( tmp, numecs, statenum, -numtemps, numdiff ); + } + + +/* mv2front - move proto queue element to front of queue + * + * synopsis + * int qelm; + * mv2front( qelm ); + */ +mv2front( qelm ) +int qelm; + + { + if ( firstprot != qelm ) + { + if ( qelm == lastprot ) + lastprot = protprev[lastprot]; + + protnext[protprev[qelm]] = protnext[qelm]; + + if ( protnext[qelm] != NIL ) + protprev[protnext[qelm]] = protprev[qelm]; + + protprev[qelm] = NIL; + protnext[qelm] = firstprot; + protprev[firstprot] = qelm; + firstprot = qelm; + } + } + + +/* ntod - convert an ndfa to a dfa + * + * synopsis + * ntod(); + * + * creates the dfa corresponding to the ndfa we've constructed. the + * dfa starts out in state #1. + */ +ntod() + + { + int *accset, ds, nacc, newds; + int duplist[CSIZE + 1], sym, hashval, numstates, dsize; + int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1]; + int *nset, *dset; + int targptr, totaltrans, i, comstate, comfreq, targ; + int *epsclosure(), snstods(), symlist[CSIZE + 1]; + + accset = allocate_integer_array( accnum + 1 ); + nset = allocate_integer_array( current_max_dfa_size ); + + todo_head = todo_next = 0; + +#define ADD_QUEUE_ELEMENT(element) \ + if ( ++element >= current_max_dfas ) \ + { /* check for queue overflowing */ \ + if ( todo_head == 0 ) \ + increase_max_dfas(); \ + else \ + element = 0; \ + } + +#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1)) + + for ( i = 0; i <= CSIZE; ++i ) + { + duplist[i] = NIL; + symlist[i] = false; + } + + for ( i = 0; i <= accnum; ++i ) + accset[i] = NIL; + + if ( trace ) + { + dumpnfa( scset[1] ); + fputs( "\n\nDFA Dump:\n\n", stderr ); + } + + inittbl(); + + if ( genftl ) + skelout(); + + if ( fulltbl ) + { + if ( genftl ) + { + /* declare it "short" because it's a real long-shot that that + * won't be large enough + */ + printf( "static short int %c[][%d] =\n {\n", NEXTARRAY, + numecs + 1 ); + + /* generate 0 entries for state #0 */ + for ( i = 0; i <= numecs; ++i ) + mk2data( NEXTARRAY, 0, 0, 0 ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + } + + /* create the first states */ + + for ( i = 1; i <= lastsc * 2; ++i ) + { + numstates = 1; + + /* for each start condition, make one state for the case when + * we're at the beginning of the line (the '%' operator) and + * one for the case when we're not + */ + if ( i % 2 == 1 ) + nset[numstates] = scset[(i / 2) + 1]; + else + nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); + + nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) + { + numas = numas + nacc; + totnst = totnst + numstates; + + todo[todo_next] = ds; + ADD_QUEUE_ELEMENT(todo_next); + } + } + + while ( todo_head != todo_next ) + { + targptr = 0; + totaltrans = 0; + + for ( i = 1; i <= numecs; ++i ) + state[i] = 0; + + ds = todo[todo_head]; + todo_head = NEXT_QUEUE_ELEMENT(todo_head); + + dset = dss[ds]; + dsize = dfasiz[ds]; + + if ( trace ) + fprintf( stderr, "state # %d:\n", ds ); + + sympartition( dset, dsize, symlist, duplist ); + + for ( sym = 1; sym <= numecs; ++sym ) + { + if ( symlist[sym] ) + { + symlist[sym] = 0; + + if ( duplist[sym] == NIL ) + { /* symbol has unique out-transitions */ + numstates = symfollowset( dset, dsize, sym, nset ); + nset = epsclosure( nset, &numstates, accset, + &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, + nacc, hashval, &newds ) ) + { + totnst = totnst + numstates; + todo[todo_next] = newds; + ADD_QUEUE_ELEMENT(todo_next); + numas = numas + nacc; + } + + state[sym] = newds; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, newds ); + + targfreq[++targptr] = 1; + targstate[targptr] = newds; + ++numuniq; + } + + else + { + /* sym's equivalence class has the same transitions + * as duplist(sym)'s equivalence class + */ + targ = state[duplist[sym]]; + state[sym] = targ; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, targ ); + + /* update frequency count for destination state */ + + i = 0; + while ( targstate[++i] != targ ) + ; + + ++targfreq[i]; + ++numdup; + } + + ++totaltrans; + duplist[sym] = NIL; + } + } + + numsnpairs = numsnpairs + totaltrans; + + if ( caseins && ! useecs ) + { + register int j; + + for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) + state[i] = state[j]; + } + + if ( fulltbl ) + { + if ( genftl ) + { + /* supply array's 0-element */ + mk2data( NEXTARRAY, 0, 0, 0 ); + + for ( i = 1; i <= numecs; ++i ) + mk2data( NEXTARRAY, 0, 0, state[i] ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + + else + { + for ( i = 1; i <= numecs; ++i ) + mk2data( NEXTARRAY, ds, i, state[i] ); + } + } + + else + { + /* determine which destination state is the most common, and + * how many transitions to it there are + */ + + comfreq = 0; + comstate = 0; + + for ( i = 1; i <= targptr; ++i ) + if ( targfreq[i] > comfreq ) + { + comfreq = targfreq[i]; + comstate = targstate[i]; + } + + bldtbl( state, ds, totaltrans, comstate, comfreq ); + } + } + + if ( fulltbl ) + dataend(); + + else + { + cmptmps(); /* create compressed template entries */ + + /* create tables for all the states with only one out-transition */ + while ( onesp > 0 ) + { + mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], + onedef[onesp] ); + --onesp; + } + + mkdeftbl(); + } + } + + +/* stack1 - save states with only one out-transition to be processed later + * + * synopsis + * int statenum, sym, nextstate, deflink; + * stack1( statenum, sym, nextstate, deflink ); + * + * if there's room for another state one the "one-transition" stack, the + * state is pushed onto it, to be processed later by mk1tbl. If there's + * no room, we process the sucker right now. + */ +stack1( statenum, sym, nextstate, deflink ) +int statenum, sym, nextstate, deflink; + + { + if ( onesp >= ONE_STACK_SIZE ) + mk1tbl( statenum, sym, nextstate, deflink ); + + else + { + ++onesp; + onestate[onesp] = statenum; + onesym[onesp] = sym; + onenext[onesp] = nextstate; + onedef[onesp] = deflink; + } + } + + +/* tbldiff - compute differences between two state tables + * + * synopsis + * int state[], pr, ext[]; + * int tbldiff, numdifferences; + * numdifferences = tbldiff( state, pr, ext ) + * + * "state" is the state array which is to be extracted from the pr'th + * proto. "pr" is both the number of the proto we are extracting from + * and an index into the save area where we can find the proto's complete + * state table. Each entry in "state" which differs from the corresponding + * entry of "pr" will appear in "ext". + * Entries which are the same in both "state" and "pr" will be marked + * as transitions to "SAME_TRANS" in "ext". The total number of differences + * between "state" and "pr" is returned as function value. Note that this + * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". + */ +int tbldiff( state, pr, ext ) +int state[], pr, ext[]; + + { + register int i, *sp = state, *ep = ext, *protp; + register int numdiff = 0; + + protp = &protsave[numecs * (pr - 1)]; + + for ( i = numecs; i > 0; --i ) + { + if ( *++protp == *++sp ) + *++ep = SAME_TRANS; + else + { + *++ep = *sp; + ++numdiff; + } + } + + return ( numdiff ); + } @@ -0,0 +1,210 @@ +#include "flexdef.h" +#include "y.tab.h" + +/* + * Copyright (c) University of California, 1987 + */ + +/* yylex - scan for a regular expression token + * + * synopsis + * + * token = yylex(); + * + * token - return token found + */ +int yylex() + + { + int toktype; + static int beglin = false; + + if ( eofseen ) + toktype = EOF; + else + toktype = lexscan(); + + if ( toktype == EOF ) + { + eofseen = 1; + + if ( sectnum == 1 ) + { + synerr( "unexpected EOF" ); + sectnum = 2; + toktype = SECTEND; + } + + else if ( sectnum == 2 ) + { + sectnum = 3; + toktype = SECTEND; + } + + else + toktype = 0; + } + + if ( trace ) + { + if ( beglin ) + { + fprintf( stderr, "%d\t", accnum + 1 ); + beglin = 0; + } + + switch ( toktype ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + (void) putc( toktype, stderr ); + break; + + case '\n': + (void) putc( '\n', stderr ); + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case SCDECL: + fputs( "%s", stderr ); + break; + + case XSCDECL: + fputs( "%x", stderr ); + break; + + case WHITESPACE: + (void) putc( ' ', stderr ); + break; + + case SECTEND: + fputs( "%%\n", stderr ); + + /* we set beglin to be true so we'll start + * writing out numbers as we echo rules. lexscan() has + * already assigned sectnum + */ + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case NAME: + fprintf( stderr, "'%s'", nmstr ); + break; + + case CHAR: + switch ( yylval ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + fprintf( stderr, "\\%c", yylval ); + break; + + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + case 17: + case 18: + case 19: + case 20: + case 21: + case 22: + case 23: + case 24: + case 25: + case 26: + case 27: + case 28: + case 29: + case 30: + case 31: + fprintf( stderr, "^%c", 'A' + yylval - 1 ); + break; + + case 127: + (void) putc( '^', stderr ); + (void) putc( '@', stderr ); + break; + + default: + (void) putc( yylval, stderr ); + break; + } + + break; + + case NUMBER: + fprintf( stderr, "%d", yylval ); + break; + + case PREVCCL: + fprintf( stderr, "[%d]", yylval ); + break; + + case 0: + fprintf( stderr, "End Marker" ); + break; + + default: + fprintf( stderr, "*Something Weird* - tok: %d val: %d\n", + toktype, yylval ); + break; + } + } + + return ( toktype ); + } |