From 2cc578462372baa1b85936749946608d7f36415f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 Nov 1987 22:24:44 +0000 Subject: Initial revision --- ccl.c | 98 +++++ dfa.c | 460 +++++++++++++++++++++ ecs.c | 190 +++++++++ flexdef.h | 429 ++++++++++++++++++++ main.c | 507 +++++++++++++++++++++++ misc.c | 646 ++++++++++++++++++++++++++++++ nfa.c | 542 +++++++++++++++++++++++++ parse.y | 473 ++++++++++++++++++++++ scan.l | 370 +++++++++++++++++ sym.c | 291 ++++++++++++++ tblcmp.c | 1324 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ yylex.c | 210 ++++++++++ 12 files changed, 5540 insertions(+) create mode 100644 ccl.c create mode 100644 dfa.c create mode 100644 ecs.c create mode 100644 flexdef.h create mode 100644 main.c create mode 100644 misc.c create mode 100644 nfa.c create mode 100644 parse.y create mode 100644 scan.l create mode 100644 sym.c create mode 100644 tblcmp.c create mode 100644 yylex.c diff --git a/ccl.c b/ccl.c new file mode 100644 index 0000000..fa15c02 --- /dev/null +++ b/ccl.c @@ -0,0 +1,98 @@ +/* lexccl - routines for character classes */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* ccladd - add a single character to a ccl + * + * synopsis + * int cclp; + * char ch; + * ccladd( cclp, ch ); + */ +ccladd( cclp, ch ) +int cclp; +char ch; + + { + int ind, len, newpos, i; + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + /* check to see if the character is already in the ccl */ + + for ( i = 0; i < len; ++i ) + if ( ccltbl[ind + i] == ch ) + return; + + newpos = ind + len; + + if ( newpos >= current_max_ccl_tbl_size ) + { + current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; + + ++num_reallocs; + + ccltbl = reallocate_character_array( ccltbl, current_max_ccl_tbl_size ); + } + + ccllen[cclp] = len + 1; + ccltbl[newpos] = ch; + } + + +/* cclinit - make an empty ccl + * + * synopsis + * int cclinit(); + * new_ccl = cclinit(); + */ +int cclinit() + + { + if ( ++lastccl >= current_maxccls ) + { + current_maxccls += MAXCCLS_INCREMENT; + + ++num_reallocs; + + cclmap = reallocate_integer_array( cclmap, current_maxccls ); + ccllen = reallocate_integer_array( ccllen, current_maxccls ); + cclng = reallocate_integer_array( cclng, current_maxccls ); + } + + if ( lastccl == 1 ) + /* we're making the first ccl */ + cclmap[lastccl] = 0; + + else + /* the new pointer is just past the end of the last ccl. Since + * the cclmap points to the \first/ character of a ccl, adding the + * length of the ccl to the cclmap pointer will produce a cursor + * to the first free space + */ + cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; + + ccllen[lastccl] = 0; + cclng[lastccl] = 0; /* ccl's start out life un-negated */ + + return ( lastccl ); + } + + +/* cclnegate - negate a ccl + * + * synopsis + * int cclp; + * cclnegate( ccl ); + */ +cclnegate( cclp ) +int cclp; + + { + cclng[cclp] = 1; + } diff --git a/dfa.c b/dfa.c new file mode 100644 index 0000000..d709df8 --- /dev/null +++ b/dfa.c @@ -0,0 +1,460 @@ +/* lexdfa - DFA construction routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* epsclosure - construct the epsilon closure of a set of ndfa states + * + * synopsis + * int t[current_max_dfa_size], numstates, accset[accnum + 1], nacc; + * int hashval; + * int *epsclosure(); + * t = epsclosure( t, &numstates, accset, &nacc, &hashval ); + * + * NOTES + * the epsilon closure is the set of all states reachable by an arbitrary + * number of epsilon transitions which themselves do not have epsilon + * transitions going out, unioned with the set of states which have non-null + * accepting numbers. t is an array of size numstates of nfa state numbers. + * Upon return, t holds the epsilon closure and numstates is updated. accset + * holds a list of the accepting numbers, and the size of accset is given + * by nacc. t may be subjected to reallocation if it is not large enough + * to hold the epsilon closure. + * + * hashval is the hash value for the dfa corresponding to the state set + */ +int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) +int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; + + { + register int stkpos, ns, tsp; + int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; + int stkend, nstate; + static int did_stk_init = false, *stk; + +#define MARK_STATE(state) \ + trans1[state] = trans1[state] - MARKER_DIFFERENCE; + +#define IS_MARKED(state) (trans1[state] < 0) + +#define UNMARK_STATE(state) \ + trans1[state] = trans1[state] + MARKER_DIFFERENCE; + +#define CHECK_ACCEPT(state) \ + { \ + nfaccnum = accptnum[state]; \ + if ( nfaccnum != NIL ) \ + accset[++nacc] = nfaccnum; \ + } + +#define DO_REALLOCATION \ + { \ + current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ + ++num_reallocs; \ + t = reallocate_integer_array( t, current_max_dfa_size ); \ + stk = reallocate_integer_array( stk, current_max_dfa_size ); \ + } \ + +#define PUT_ON_STACK(state) \ + { \ + if ( ++stkend >= current_max_dfa_size ) \ + DO_REALLOCATION \ + stk[stkend] = state; \ + MARK_STATE(state) \ + } + +#define ADD_STATE(state) \ + { \ + if ( ++numstates >= current_max_dfa_size ) \ + DO_REALLOCATION \ + t[numstates] = state; \ + hashval = hashval + state; \ + } + +#define STACK_STATE(state) \ + { \ + PUT_ON_STACK(state) \ + CHECK_ACCEPT(state) \ + if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ + ADD_STATE(state) \ + } + + if ( ! did_stk_init ) + { + stk = allocate_integer_array( current_max_dfa_size ); + did_stk_init = true; + } + + nacc = stkend = hashval = 0; + + for ( nstate = 1; nstate <= numstates; ++nstate ) + { + ns = t[nstate]; + + /* the state could be marked if we've already pushed it onto + * the stack + */ + if ( ! IS_MARKED(ns) ) + PUT_ON_STACK(ns) + + CHECK_ACCEPT(ns) + hashval = hashval + ns; + } + + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) + { + ns = stk[stkpos]; + transsym = transchar[ns]; + + if ( transsym == SYM_EPSILON ) + { + tsp = trans1[ns] + MARKER_DIFFERENCE; + + if ( tsp != NO_TRANSITION ) + { + if ( ! IS_MARKED(tsp) ) + STACK_STATE(tsp) + + tsp = trans2[ns]; + + if ( tsp != NO_TRANSITION ) + if ( ! IS_MARKED(tsp) ) + STACK_STATE(tsp) + } + } + } + + /* clear out "visit" markers */ + + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) + { + if ( IS_MARKED(stk[stkpos]) ) + { + UNMARK_STATE(stk[stkpos]) + } + else + lexfatal( "consistency check failed in epsclosure()" ); + } + + *ns_addr = numstates; + *hv_addr = hashval; + *nacc_addr = nacc; + + return ( t ); + } + + + +/* increase_max_dfas - increase the maximum number of DFAs */ + +increase_max_dfas() + + { + int old_max = current_max_dfas; + + current_max_dfas += MAX_DFAS_INCREMENT; + + ++num_reallocs; + + base = reallocate_integer_array( base, current_max_dfas ); + def = reallocate_integer_array( def, current_max_dfas ); + dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); + accsiz = reallocate_integer_array( accsiz, current_max_dfas ); + dhash = reallocate_integer_array( dhash, current_max_dfas ); + todo = reallocate_integer_array( todo, current_max_dfas ); + dss = reallocate_integer_pointer_array( dss, current_max_dfas ); + dfaacc = reallocate_integer_pointer_array( dfaacc, current_max_dfas ); + + /* fix up todo queue */ + if ( todo_next < todo_head ) + { /* queue was wrapped around the end */ + register int i; + + for ( i = 0; i < todo_next; ++i ) + todo[old_max + i] = todo[i]; + + todo_next += old_max; + } + } + + +/* snstods - converts a set of ndfa states into a dfa state + * + * synopsis + * int sns[numstates], numstates, newds, accset[accnum + 1], nacc, hashval; + * int snstods(); + * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds ); + * + * on return, the dfa state number is in newds. + */ +int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) +int sns[], numstates, accset[], nacc, hashval, *newds_addr; + + { + int didsort = 0; + register int i, j; + int newds, *oldsns; + char *malloc(); + + for ( i = 1; i <= lastdfa; ++i ) + if ( hashval == dhash[i] ) + { + if ( numstates == dfasiz[i] ) + { + oldsns = dss[i]; + + if ( ! didsort ) + { + /* we sort the states in sns so we can compare it to + * oldsns quickly. we use bubble because there probably + * aren't very many states + */ + bubble( sns, numstates ); + didsort = 1; + } + + for ( j = 1; j <= numstates; ++j ) + if ( sns[j] != oldsns[j] ) + break; + + if ( j > numstates ) + { + ++dfaeql; + *newds_addr = i; + return ( 0 ); + } + + ++hshcol; + } + + else + ++hshsave; + } + + /* make a new dfa */ + + if ( ++lastdfa >= current_max_dfas ) + increase_max_dfas(); + + newds = lastdfa; + + if ( ! (dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) )) ) + lexfatal( "dynamic memory failure in snstods()" ); + + /* if we haven't already sorted the states in sns, we do so now, so that + * future comparisons with it can be made quickly + */ + + if ( ! didsort ) + bubble( sns, numstates ); + + for ( i = 1; i <= numstates; ++i ) + dss[newds][i] = sns[i]; + + dfasiz[newds] = numstates; + dhash[newds] = hashval; + + if ( nacc == 0 ) + { + dfaacc[newds] = 0; + accsiz[newds] = 0; + } + + else if ( reject ) + { + /* we sort the accepting set in increasing order so the disambiguating + * rule that the first rule listed is considered match in the event of + * ties will work. We use a bubble sort since the list is probably + * quite small. + */ + + bubble( accset, nacc ); + + if ( ! (dfaacc[newds] = + (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) )) ) + lexfatal( "dynamic memory failure in snstods()" ); + + /* save the accepting set for later */ + for ( i = 1; i <= nacc; ++i ) + dfaacc[newds][i] = accset[i]; + + accsiz[newds] = nacc; + } + + else + { /* find lowest numbered rule so the disambiguating rule will work */ + j = accnum + 1; + + for ( i = 1; i <= nacc; ++i ) + if ( accset[i] < j ) + j = accset[i]; + + dfaacc[newds] = (int *) j; + } + + *newds_addr = newds; + + return ( 1 ); + } + + +/* symfollowset - follow the symbol transitions one step + * + * synopsis + * int ds[current_max_dfa_size], dsize, transsym; + * int nset[current_max_dfa_size], numstates; + * numstates = symfollowset( ds, dsize, transsym, nset ); + */ +int symfollowset( ds, dsize, transsym, nset ) +int ds[], dsize, transsym, nset[]; + + { + int ns, tsp, sym, i, j, lenccl, ch, numstates; + int ccllist; + + numstates = 0; + + for ( i = 1; i <= dsize; ++i ) + { /* for each nfa state ns in the state set of ds */ + ns = ds[i]; + sym = transchar[ns]; + tsp = trans1[ns]; + + if ( sym < 0 ) + { /* it's a character class */ + sym = -sym; + ccllist = cclmap[sym]; + lenccl = ccllen[sym]; + + if ( cclng[sym] ) + { + for ( j = 0; j < lenccl; ++j ) + { /* loop through negated character class */ + ch = ccltbl[ccllist + j]; + + if ( ch > transsym ) + break; /* transsym isn't in negated ccl */ + + else if ( ch == transsym ) + /* next 2 */ goto bottom; + } + + /* didn't find transsym in ccl */ + nset[++numstates] = tsp; + } + + else + for ( j = 0; j < lenccl; ++j ) + { + ch = ccltbl[ccllist + j]; + + if ( ch > transsym ) + break; + + else if ( ch == transsym ) + { + nset[++numstates] = tsp; + break; + } + } + } + + else if ( sym >= 'A' && sym <= 'Z' && caseins ) + lexfatal( "consistency check failed in symfollowset" ); + + else if ( sym == SYM_EPSILON ) + { /* do nothing */ + } + + else if ( ecgroup[sym] == transsym ) + nset[++numstates] = tsp; + +bottom: + ; + } + + return ( numstates ); + } + + +/* sympartition - partition characters with same out-transitions + * + * synopsis + * integer ds[current_max_dfa_size], numstates, duplist[numecs]; + * symlist[numecs]; + * sympartition( ds, numstates, symlist, duplist ); + */ +sympartition( ds, numstates, symlist, duplist ) +int ds[], numstates, duplist[]; +int symlist[]; + + { + int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; + + /* partitioning is done by creating equivalence classes for those + * characters which have out-transitions from the given state. Thus + * we are really creating equivalence classes of equivalence classes. + */ + + for ( i = 1; i <= numecs; ++i ) + { /* initialize equivalence class list */ + duplist[i] = i - 1; + dupfwd[i] = i + 1; + } + + duplist[1] = NIL; + dupfwd[numecs] = NIL; + + for ( i = 1; i <= numstates; ++i ) + { + ns = ds[i]; + tch = transchar[ns]; + + if ( tch != SYM_EPSILON ) + { + if ( tch < -lastccl || tch > CSIZE ) + lexfatal( "bad transition character detected in sympartition()" ); + + if ( tch > 0 ) + { /* character transition */ + mkechar( ecgroup[tch], dupfwd, duplist ); + symlist[ecgroup[tch]] = 1; + } + + else + { /* character class */ + tch = -tch; + + lenccl = ccllen[tch]; + cclp = cclmap[tch]; + mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs ); + + if ( cclng[tch] ) + { + j = 0; + + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; + + for ( ++j; j < ich; ++j ) + symlist[j] = 1; + } + + for ( ++j; j <= numecs; ++j ) + symlist[j] = 1; + } + + else + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; + symlist[ich] = 1; + } + } + } + } + } diff --git a/ecs.c b/ecs.c new file mode 100644 index 0000000..2a60c9b --- /dev/null +++ b/ecs.c @@ -0,0 +1,190 @@ +/* lexecs - equivalence class routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* ccl2ecl - convert character classes to set of equivalence classes + * + * synopsis + * ccl2ecl(); + */ +ccl2ecl() + + { + int i, ich, newlen, cclp, ccls, cclmec; + + for ( i = 1; i <= lastccl; ++i ) + { + /* we loop through each character class, and for each character + * in the class, add the character's equivalence class to the + * new "character" class we are creating. Thus when we are all + * done, character classes will really consist of collections + * of equivalence classes + */ + + newlen = 0; + cclp = cclmap[i]; + + for ( ccls = 0; ccls < ccllen[i]; ++ccls ) + { + ich = ccltbl[cclp + ccls]; + cclmec = ecgroup[ich]; + if ( cclmec > 0 ) + { + ccltbl[cclp + newlen] = cclmec; + ++newlen; + } + } + + ccllen[i] = newlen; + } + } + + +/* cre8ecs - associate equivalence class numbers with class members + * + * synopsis + * int cre8ecs(); + * number of classes = cre8ecs( fwd, bck, num ); + * + * fwd is the forward linked-list of equivalence class members. bck + * is the backward linked-list, and num is the number of class members. + * Returned is the number of classes. + */ +int cre8ecs( fwd, bck, num ) +int fwd[], bck[], num; + + { + int i, j, numcl; + + numcl = 0; + + /* create equivalence class numbers. From now on, abs( bck(x) ) + * is the equivalence class number for object x. If bck(x) + * is positive, then x is the representative of its equivalence + * class. + */ + + for ( i = 1; i <= num; ++i ) + if ( bck[i] == NIL ) + { + bck[i] = ++numcl; + for ( j = fwd[i]; j != NIL; j = fwd[j] ) + bck[j] = -numcl; + } + + return ( numcl ); + } + + +/* mkeccl - update equivalence classes based on character class xtions + * + * synopsis + * char ccls[]; + * int lenccl, fwd[llsiz], bck[llsiz], llsiz; + * mkeccl( ccls, lenccl, fwd, bck, llsiz ); + * + * where ccls contains the elements of the character class, lenccl is the + * number of elements in the ccl, fwd is the forward link-list of equivalent + * characters, bck is the backward link-list, and llsiz size of the link-list + */ +mkeccl( ccls, lenccl, fwd, bck, llsiz ) +char ccls[]; +int lenccl, fwd[], bck[], llsiz; + + { + int cclp, oldec, newec; + int cclm, i, j; + + /* note that it doesn't matter whether or not the character class is + * negated. The same results will be obtained in either case. + */ + + cclp = 0; + + while ( cclp < lenccl ) + { + cclm = ccls[cclp]; + oldec = bck[cclm]; + newec = cclm; + + j = cclp + 1; + + for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) + { /* look for the symbol in the character class */ + for ( ; j < lenccl && ccls[j] <= i; ++j ) + if ( ccls[j] == i ) + { + /* we found an old companion of cclm in the ccl. + * link it into the new equivalence class and flag it as + * having been processed + */ + + bck[i] = newec; + fwd[newec] = i; + newec = i; + ccls[j] = -i; /* set flag so we don't reprocess */ + + /* get next equivalence class member */ + /* next 2 */ goto next_pt; + } + + /* symbol isn't in character class. Put it in the old equivalence + * class + */ + + bck[i] = oldec; + + if ( oldec != NIL ) + fwd[oldec] = i; + + oldec = i; +next_pt: + ; + } + + if ( bck[cclm] != NIL || oldec != bck[cclm] ) + { + bck[cclm] = NIL; + fwd[oldec] = NIL; + } + + fwd[newec] = NIL; + + /* find next ccl member to process */ + + for ( ++cclp; ccls[cclp] < 0 && cclp < lenccl; ++cclp ) + { + /* reset "doesn't need processing" flag */ + ccls[cclp] = -ccls[cclp]; + } + } + } + + +/* mkechar - create equivalence class for single character + * + * synopsis + * int tch, fwd[], bck[]; + * mkechar( tch, fwd, bck ); + */ +mkechar( tch, fwd, bck ) +int tch, fwd[], bck[]; + + { + /* if until now the character has been a proper subset of + * an equivalence class, break it away to create a new ec + */ + + if ( fwd[tch] != NIL ) + bck[fwd[tch]] = bck[tch]; + + if ( bck[tch] != NIL ) + fwd[bck[tch]] = fwd[tch]; + + fwd[tch] = NIL; + bck[tch] = NIL; + } diff --git a/flexdef.h b/flexdef.h new file mode 100644 index 0000000..b41b649 --- /dev/null +++ b/flexdef.h @@ -0,0 +1,429 @@ +/* + * Symbol definitions for flex. + * + * modification history + * -------------------- + * 02a vp 27jun86 .translated into C/FTL + */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include + + +/* maximum line length we'll have to deal with */ +#define MAXLINE BUFSIZ + +/* maximum size of file name */ +#define FILENAMESIZE 1024 + +#define min(x,y) (x < y ? x : y) +#define max(x,y) (x > y ? x : y) + +#define true 1 +#define false 0 + + +#ifndef DEFAULT_SKELETON_FILE +#define DEFAULT_SKELETON_FILE "flex.skel" +#endif + +/* maximum number of characters per line recognized by Fortran compiler */ +#define DATALINEWIDTH 72 + +/* string to indent Fortran data statements with */ +#define DATAINDENTSTR " " +/* width of dataindent string in Fortran columns */ +#define DATAINDENTWIDTH 6 + +/* number of data items per line for -f output */ +#define NUMDATAITEMS 10 + +/* number of lines of data in -f output before inserting a blank line for + * readability. + */ +#define NUMDATALINES 10 + +/* returns true if an nfa state has an epsilon out-transition slot + * that can be used. This definition is currently not used. + */ +#define FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans2[state] == NO_TRANSITION && \ + finalst[state] != state) + +/* returns true if an nfa state has an epsilon out-transition character + * and both slots are free + */ +#define SUPER_FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans1[state] == NO_TRANSITION) \ + +/* maximum number of NFA states that can comprise a DFA state. It's real + * big because if there's a lot of rules, the initial state will have a + * huge epsilon closure. + */ +#define INITIAL_MAX_DFA_SIZE 750 +#define MAX_DFA_SIZE_INCREMENT 750 + +/* array names to be used in generated machine. They're short because + * we write out one data statement (which names the array) for each element + * in the array. + */ + +#define ALIST 'l' /* points to list of rules accepted for a state */ +#define ACCEPT 'a' /* list of rules accepted for a state */ +#define ECARRAY 'e' /* maps input characters to equivalence classes */ +#define MATCHARRAY 'm' /* maps equivalence classes to meta-equivalence classes */ +#define BASEARRAY 'b' /* "base" array */ +#define DEFARRAY 'd' /* "default" array */ +#define NEXTARRAY 'n' /* "next" array */ +#define CHECKARRAY 'c' /* "check" array */ + +/* NIL must be 0. If not, its special meaning when making equivalence classes + * (it marks the representative of a given e.c.) will be unidentifiable + */ +#define NIL 0 + +#define JAM -1 /* to mark a missing DFA transition */ +#define NO_TRANSITION NIL +#define UNIQUE -1 /* marks a symbol as an e.c. representative */ +#define INFINITY -1 /* for x{5,} constructions */ + +/* size of input alphabet - should be size of ASCII set */ +#define CSIZE 127 + +#define INITIAL_MAXCCLS 100 /* max number of unique character classes */ +#define MAXCCLS_INCREMENT 100 + +/* size of table holding members of character classes */ +#define INITIAL_MAX_CCL_TBL_SIZE 500 +#define MAX_CCL_TBL_SIZE_INCREMENT 250 + +#define INITIAL_MNS 2000 /* default maximum number of nfa states */ +#define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */ + +#define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */ +#define MAX_DFAS_INCREMENT 1000 + +#define JAMSTATE -32766 /* marks a reference to the state that always jams */ + +/* enough so that if it's subtracted from an NFA state number, the result + * is guarenteed to be negative + */ +#define MARKER_DIFFERENCE 32000 +#define MAXIMUM_MNS 31999 + +/* maximum number of nxt/chk pairs for non-templates */ +#define INITIAL_MAX_XPAIRS 2000 +#define MAX_XPAIRS_INCREMENT 2000 + +/* maximum number of nxt/chk pairs needed for templates */ +#define INITIAL_MAX_TEMPLATE_XPAIRS 2500 +#define MAX_TEMPLATE_XPAIRS_INCREMENT 2500 + +#define SYM_EPSILON 0 /* to mark transitions on the symbol epsilon */ + +#define INITIAL_MAX_SCS 40 /* maximum number of start conditions */ +#define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */ + +#define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */ +#define SAME_TRANS -1 /* transition is the same as "default" entry for state */ + +/* the following percentages are used to tune table compression: + + * the percentage the number of out-transitions a state must be of the + * number of equivalence classes in order to be considered for table + * compaction by using protos + */ +#define PROTO_SIZE_PERCENTAGE 15 + +/* the percentage the number of homogeneous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * that the state's transition table is first compared with a potential + * template of the most common out-transition instead of with the first + * proto in the proto queue + */ +#define CHECK_COM_PERCENTAGE 50 + +/* the percentage the number of differences between a state's transition + * table and the proto it was first compared with must be of the total + * number of out-transitions of the state in order to keep the first + * proto as a good match and not search any further + */ +#define FIRST_MATCH_DIFF_PERCENTAGE 10 + +/* the percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to use the proto as an acceptable close match + */ +#define ACCEPTABLE_DIFF_PERCENTAGE 50 + +/* the percentage the number of homogenous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * to consider making a template from the state + */ +#define TEMPLATE_SAME_PERCENTAGE 60 + +/* the percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to create a new proto from the state + */ +#define NEW_PROTO_DIFF_PERCENTAGE 20 + +/* the percentage the total number of out-transitions of a state must be + * of the number of equivalence classes in order to consider trying to + * fit the transition table into "holes" inside the nxt/chk table. + */ +#define INTERIOR_FIT_PERCENTAGE 15 + +/* size of region set aside to cache the complete transition table of + * protos on the proto queue to enable quick comparisons + */ +#define PROT_SAVE_SIZE 2000 + +#define MSP 50 /* maximum number of saved protos (protos on the proto queue) */ + +/* number that, if used to subscript an array, has a good chance of producing + * an error; should be small enough to fit into a short + */ +#define BAD_SUBSCRIPT -32767 + + +/* Declarations for global variables. */ + +/* variables for symbol tables: + * sctbl - start-condition symbol table + * ndtbl - name-definition symbol table + * ccltab - character class text symbol table + */ + +struct hash_entry + { + struct hash_entry *prev, *next; + char *name; + char *val; + } ; + +typedef struct hash_entry *hash_table[]; + +#define NAME_TABLE_HASH_SIZE 101 +#define START_COND_HASH_SIZE 101 +#define CCL_HASH_SIZE 101 + +extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; +extern struct hash_entry *sctbl[START_COND_HASH_SIZE]; +extern struct hash_entry *ccltab[CCL_HASH_SIZE]; + + +/* variables for flags: + * printstats - if true (-v), dump statistics + * syntaxerror - true if a syntax error has been found + * eofseen - true if we've seen an eof in the input file + * ddebug - if true (-d), make a "debug" scanner + * trace - if true (-T), trace processing + * spprdflt - if true (-s), suppress the default rule + * interactive - if true (-I), generate an interactive scanner + * caseins - if true (-i), generate a case-insensitive scanner + * useecs - if true (-ce flag), use equivalence classes + * fulltbl - if true (-cf flag), don't compress the DFA state table + * usemecs - if true (-cm flag), use meta-equivalence classes + * reject - if true (-r flag), generate tables for REJECT macro + */ + +extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +extern int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject; + + +/* variables used in the flex input routines: + * datapos - characters on current output line + * dataline - number of contiguous lines of data in current data + * statement. Used to generate readable -f output + * skelfile - fd of the skeleton file + * yyin - input file + * infilename - name of input file + * linenum - current input line number + */ + +extern int datapos, dataline, linenum; +extern FILE *skelfile, *yyin; +extern char *infilename; + + +/* variables for stack of states having only one out-transition: + * onestate - state number + * onesym - transition symbol + * onenext - target state + * onedef - default base entry + * onesp - stack pointer + */ + +extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; + + +/* variables for nfa machine data: + * current_mns - current maximum on number of NFA states + * accnum - number of the last accepting state + * firstst - physically the first state of a fragment + * lastst - last physical state of fragment + * finalst - last logical state of fragment + * transchar - transition character + * trans1 - transition state + * trans2 - 2nd transition state for epsilons + * accptnum - accepting number + * lastnfa - last nfa state number created + */ + +extern int current_mns; +extern int accnum, *firstst, *lastst, *finalst, *transchar; +extern int *trans1, *trans2, *accptnum, lastnfa; + + +/* variables for protos: + * numtemps - number of templates created + * numprots - number of protos created + * protprev - backlink to a more-recently used proto + * protnext - forward link to a less-recently used proto + * prottbl - base/def table entry for proto + * protcomst - common state of proto + * firstprot - number of the most recently used proto + * lastprot - number of the least recently used proto + * protsave contains the entire state array for protos + */ + +extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; + + +/* variables for managing equivalence classes: + * numecs - number of equivalence classes + * nextecm - forward link of Equivalenc Class members + * ecgroup - class number or backward link of EC members + * nummecs - number of meta-equivalence classes (used to compress + * templates) + * tecfwd - forward link of meta-equivalence classes members + * tecbck - backward link of MEC's + */ + +extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; +extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; + + +/* variables for start conditions: + * lastsc - last start condition created + * current_max_scs - current limit on number of start conditions + * scset - set of rules active in start condition + * scbol - set of rules active only at the beginning of line in a s.c. + * scxclu - true if start condition is exclusive + * actvsc - stack of active start conditions for the current rule + */ + +extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; + + +/* variables for dfa machine data: + * current_max_dfa_size - current maximum number of NFA states in DFA + * current_max_xpairs - current maximum number of non-template xtion pairs + * current_max_template_xpairs - current maximum number of template pairs + * current_max_dfas - current maximum number DFA states + * lastdfa - last dfa state number created + * nxt - state to enter upon reading character + * chk - check value to see if "nxt" applies + * tnxt - internal nxt table for templates + * base - offset into "nxt" for given state + * def - where to go if "chk" disallows "nxt" entry + * tblend - last "nxt/chk" table entry being used + * firstfree - first empty entry in "nxt/chk" table + * dss - nfa state set for each dfa + * dfasiz - size of nfa state set for each dfa + * dfaacc - accepting set for each dfa state (or accepting number, if + * -r is not given) + * accsiz - size of accepting set for each dfa state + * dhash - dfa state hash value + * todo - queue of DFAs still to be processed + * todo_head - head of todo queue + * todo_next - next available entry on todo queue + * numas - number of DFA accepting states created; note that this + * is not necessarily the same value as accnum, which is the analogous + * value for the NFA + * numsnpairs - number of state/nextstate transition pairs + * jambase - position in base/def where the default jam table starts + * jamstate - state number corresponding to "jam" state + */ + +extern int current_max_dfa_size, current_max_xpairs; +extern int current_max_template_xpairs, current_max_dfas; +extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; +extern int *base, *def, tblend, firstfree, **dss, *dfasiz, **dfaacc; +extern int *accsiz, *dhash, *todo, todo_head, todo_next, numas; +extern int numsnpairs, jambase, jamstate; + + +/* variables for ccl information: + * lastccl - ccl index of the last created ccl + * current_maxccls - current limit on the maximum number of unique ccl's + * cclmap - maps a ccl index to its set pointer + * ccllen - gives the length of a ccl + * cclng - true for a given ccl if the ccl is negated + * cclreuse - counts how many times a ccl is re-used + * current_max_ccl_tbl_size - current limit on number of characters needed + * to represent the unique ccl's + * ccltbl - holds the characters in each ccl - indexed by cclmap + */ + +extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; +extern int current_max_ccl_tbl_size; +extern char *ccltbl; + + +/* variables for miscellaneous information: + * starttime - real-time when we started + * endtime - real-time when we ended + * nmstr - last NAME scanned by the scanner + * sectnum - section number currently being parsed + * nummt - number of empty nxt/chk table entries + * hshcol - number of hash collisions detected by snstods + * dfaeql - number of times a newly created dfa was equal to an old one + * numeps - number of epsilon NFA states created + * eps2 - number of epsilon states which have 2 out-transitions + * num_reallocs - number of times it was necessary to realloc() a group + * of arrays + * tmpuses - number of DFA states that chain to templates + * totnst - total number of NFA states used to make DFA states + * peakpairs - peak number of transition pairs we had to store internally + * numuniq - number of unique transitions + * numdup - number of duplicate transitions + * hshsave - number of hash collisions saved by checking number of states + */ + +extern char *starttime, *endtime, nmstr[MAXLINE]; +extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; + +char *allocate_array(), *reallocate_array(); + +#define allocate_integer_array(size) \ + (int *) allocate_array( size, sizeof( int ) ) + +#define reallocate_integer_array(array,size) \ + (int *) reallocate_array( (char *) array, size, sizeof( int ) ) + +#define allocate_integer_pointer_array(size) \ + (int **) allocate_array( size, sizeof( int * ) ) + +#define reallocate_integer_pointer_array(array,size) \ + (int **) reallocate_array( (char *) array, size, sizeof( int * ) ) + +#define allocate_character_array(size) allocate_array( size, sizeof( char ) ) + +#define reallocate_character_array(array,size) \ + reallocate_array( array, size, sizeof( char ) ) + + +/* used to communicate between scanner and parser. The type should really + * be YYSTYPE, but we can't easily get our hands on it. + */ +extern int yylval; diff --git a/main.c b/main.c new file mode 100644 index 0000000..d0a7ae1 --- /dev/null +++ b/main.c @@ -0,0 +1,507 @@ +/* flex - tool to generate fast lexical analyzers + * + * Copyright (c) University of California, 1987 + * + * + * ver date who remarks + * --- ---- --- ------------------------------------------------------- + * 04a 27Jun86 VP .translated from Ratfor into C + * 01a 22Aug83 VP .written. Original version by Jef Poskanzer. + */ + + +#include "flexdef.h" + + +/* these globals are all defined and commented in flexdef.h */ +int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject; +int datapos, dataline, linenum; +FILE *skelfile = NULL; +char *infilename = NULL; +int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; +int current_mns; +int accnum, *firstst, *lastst, *finalst, *transchar; +int *trans1, *trans2, *accptnum, lastnfa; +int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; +int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; +int tecbck[CSIZE + 1]; +int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; +int current_max_dfa_size, current_max_xpairs; +int current_max_template_xpairs, current_max_dfas; +int lastdfa, *nxt, *chk, *tnxt; +int *base, *def, tblend, firstfree, numtemps, **dss, *dfasiz, **dfaacc; +int *accsiz, *dhash, *todo, todo_head, todo_next, numas; +int numsnpairs, jambase, jamstate; +int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; +int current_max_ccl_tbl_size; +char *ccltbl; +char *starttime, *endtime, nmstr[MAXLINE]; +int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; + + +/* flex - main program + * + * synopsis (from the shell) + * flex [-v] [file ...] + */ + +main( argc, argv ) +int argc; +char **argv; + + { + lexinit( argc, argv ); + readin(); + + if ( ! syntaxerror ) + { + /* convert the ndfa to a dfa */ + ntod(); + + /* generate the ratfor state transition tables from the dfa */ + gentabs(); + } + + /* note, lexend does not return. It exits with its argument as status. */ + + lexend( 0 ); + } + + +/* lexend - terminate flex + * + * synopsis + * int status; + * lexend( status ); + * + * status is exit status. + * + * note + * This routine does not return. + */ + +lexend( status ) +int status; + + { + int tblsiz; + char *gettime(); + + if ( skelfile != NULL ) + (void) fclose( skelfile ); + + if ( printstats ) + { + endtime = gettime(); + + fprintf( stderr, "flex usage statistics:\n" ); + fprintf( stderr, " started at %s, finished at %s\n", + starttime, endtime ); + + if ( ! genftl ) + fprintf( stderr, " Ratfor scanner generated\n" ); + + fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); + fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, + current_max_dfas, totnst ); + fprintf( stderr, " %d rules\n", accnum ); + fprintf( stderr, " %d/%d start conditions\n", lastsc, + current_max_scs ); + fprintf( stderr, " %d epsilon states, %d double epsilon states\n", + numeps, eps2 ); + + if ( lastccl == 0 ) + fprintf( stderr, " no character classes\n" ); + else + fprintf( stderr, + " %d/%d character classes needed %d/%d words of storage, %d reused\n", + lastccl, current_maxccls, + cclmap[lastccl] + ccllen[lastccl] - 1, + current_max_ccl_tbl_size, cclreuse ); + + fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs ); + fprintf( stderr, " %d/%d unique/duplicate transitions\n", + numuniq, numdup ); + + if ( fulltbl ) + { + tblsiz = lastdfa * numecs; + fprintf( stderr, " %d table entries\n", tblsiz ); + } + + else + { + tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; + + fprintf( stderr, " %d/%d base/def entries created\n", + lastdfa + numtemps, current_max_dfas ); + fprintf( stderr, " %d/%d (peak %d) nxt/chk entries created\n", + tblend, current_max_xpairs, peakpairs ); + fprintf( stderr, + " %d/%d (peak %d) template nxt/chk entries created\n", + numtemps * nummecs, current_max_template_xpairs, + numtemps * numecs ); + fprintf( stderr, " %d empty table entries\n", nummt ); + fprintf( stderr, " %d protos created\n", numprots ); + fprintf( stderr, " %d templates created, %d uses\n", + numtemps, tmpuses ); + } + + if ( useecs ) + { + tblsiz = tblsiz + CSIZE; + fprintf( stderr, " %d/%d equivalence classes created\n", + numecs, CSIZE ); + } + + if ( usemecs ) + { + tblsiz = tblsiz + numecs; + fprintf( stderr, " %d/%d meta-equivalence classes created\n", + nummecs, CSIZE ); + } + + fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n", + hshcol, hshsave, dfaeql ); + fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs ); + fprintf( stderr, " %d total table entries needed\n", tblsiz ); + } + + exit( status ); + } + + +/* lexinit - initialize flex + * + * synopsis + * int argc; + * char **argv; + * lexinit( argc, argv ); + */ + +lexinit( argc, argv ) +int argc; +char **argv; + + { + int i; + char *arg, *skelname = DEFAULT_SKELETON_FILE, *gettime(), clower(); + int sawcmpflag, use_stdout; + + printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; + ddebug = fulltbl = reject = false; + usemecs = genftl = useecs = true; + + sawcmpflag = false; + use_stdout = false; + + /* read flags */ + for ( --argc, ++argv; argc ; --argc, ++argv ) + { + if ( argv[0][0] != '-' || argv[0][1] == '\0' ) + break; + + arg = argv[0]; + + for ( i = 1; arg[i] != '\0'; ++i ) + switch ( arg[i] ) + { + case 'c': + if ( i != 1 ) + lexerror( "-c flag must be given separately" ); + + if ( ! sawcmpflag ) + { + useecs = false; + usemecs = false; + fulltbl = false; + sawcmpflag = true; + } + + for ( ++i; arg[i] != '\0'; ++i ) + switch ( clower( arg[i] ) ) + { + case 'e': + useecs = true; + break; + + case 'f': + fulltbl = true; + break; + + case 'm': + usemecs = true; + break; + + default: + lerrif( "unknown -c option %c", + (int) arg[i] ); + break; + } + + goto get_next_arg; + + case 'd': + ddebug = true; + break; + + case 'f': + useecs = usemecs = false; + fulltbl = true; + break; + + case 'I': + interactive = true; + break; + + case 'i': + caseins = true; + break; + + case 'l': + use_stdout = false; + break; + + case 'n': + printstats = false; + break; + + case 'r': + reject = true; + break; + + case 'S': + if ( i != 1 ) + lexerror( "-S flag must be given separately" ); + + skelname = arg + i + 1; + goto get_next_arg; + + case 's': + spprdflt = true; + break; + + case 't': + use_stdout = true; + break; + + case 'T': + trace = true; + break; + + case 'v': + printstats = true; + break; + + default: + lerrif( "unknown flag %c", (int) arg[i] ); + break; + } + +get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ + ; + } + + if ( fulltbl && usemecs ) + lexerror( "full table and -cm don't make sense together" ); + + if ( fulltbl && interactive ) + lexerror( "full table and -I are (currently) incompatible" ); + + if ( ! use_stdout ) + { + FILE *prev_stdout = freopen( "lex.yy.c", "w", stdout ); + + if ( prev_stdout == NULL ) + lexerror( "could not create lex.yy.c" ); + } + + if ( argc ) + { + if ( argc > 1 ) + lexerror( "extraneous argument(s) given" ); + + yyin = fopen( infilename = argv[0], "r" ); + + if ( yyin == NULL ) + lerrsf( "can't open %s", argv[0] ); + } + + else + yyin = stdin; + + lastccl = 0; + lastsc = 0; + + /* initialize the statistics */ + starttime = gettime(); + + if ( (skelfile = fopen( skelname, "r" )) == NULL ) + lerrsf( "can't open skeleton file %s", skelname ); + + lastdfa = lastnfa = accnum = numas = numsnpairs = tmpuses = 0; + numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; + numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; + onesp = numprots = 0; + + linenum = sectnum = 1; + firstprot = NIL; + + /* used in mkprot() so that the first proto goes in slot 1 + * of the proto queue + */ + lastprot = 1; + + if ( useecs ) + { + /* set up doubly-linked equivalence classes */ + ecgroup[1] = NIL; + + for ( i = 2; i <= CSIZE; ++i ) + { + ecgroup[i] = i - 1; + nextecm[i - 1] = i; + } + + nextecm[CSIZE] = NIL; + } + + else + { /* put everything in its own equivalence class */ + for ( i = 1; i <= CSIZE; ++i ) + { + ecgroup[i] = i; + nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ + } + } + + set_up_initial_allocations(); + } + + +/* readin - read in the rules section of the input file(s) + * + * synopsis + * readin(); + */ +readin() + + { + if ( genftl ) + { + fputs( "#define YYDEFAULTACTION ", stdout ); + + if ( spprdflt ) + fputs( "YYFATALERROR( \"flex scanner jammed\" )", stdout ); + else + fputs( "ECHO", stdout ); + + fputs( ";\n", stdout ); + + if ( ddebug ) + puts( "#define LEX_DEBUG" ); + if ( useecs ) + puts( "#define LEX_USE_ECS" ); + if ( usemecs ) + puts( "#define LEX_USE_MECS" ); + if ( interactive ) + puts( "#define LEX_INTERACTIVE_SCANNER" ); + if ( reject ) + puts( "#define LEX_REJECT_ENABLED" ); + if ( fulltbl ) + puts( "#define LEX_FULL_TABLE" ); + } + + else + { + fputs( "define(YYDEFAULTACTION,", stdout ); + + if ( spprdflt ) + fputs( "call error( \"flex scanner jammed\" )", stdout ); + else + fputs( "ECHO", stdout ); + + fputs( ")\n", stdout ); + + if ( ddebug ) + puts( "define(LEX_DEBUG,)" ); + if ( useecs ) + puts( "define(LEX_USE_ECS,)" ); + if ( usemecs ) + puts( "define(LEX_USE_MECS,)" ); + if ( reject ) + puts( "define(LEX_REJECT_ENABLED,)" ); + if ( fulltbl ) + puts( "define(LEX_FULL_TABLE,)" ); + } + + skelout(); + + line_directive_out(); + + if ( yyparse() ) + lerrif( "fatal parse error at line %d", linenum ); + + if ( useecs ) + { + numecs = cre8ecs( nextecm, ecgroup, CSIZE ); + ccl2ecl(); + } + + else + numecs = CSIZE; + } + + + +/* set_up_initial_allocations - allocate memory for internal tables */ + +set_up_initial_allocations() + + { + current_mns = INITIAL_MNS; + firstst = allocate_integer_array( current_mns ); + lastst = allocate_integer_array( current_mns ); + finalst = allocate_integer_array( current_mns ); + transchar = allocate_integer_array( current_mns ); + trans1 = allocate_integer_array( current_mns ); + trans2 = allocate_integer_array( current_mns ); + accptnum = allocate_integer_array( current_mns ); + + current_max_scs = INITIAL_MAX_SCS; + scset = allocate_integer_array( current_max_scs ); + scbol = allocate_integer_array( current_max_scs ); + scxclu = allocate_integer_array( current_max_scs ); + actvsc = allocate_integer_array( current_max_scs ); + + current_maxccls = INITIAL_MAXCCLS; + cclmap = allocate_integer_array( current_maxccls ); + ccllen = allocate_integer_array( current_maxccls ); + cclng = allocate_integer_array( current_maxccls ); + + current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; + ccltbl = allocate_character_array( current_max_ccl_tbl_size ); + + current_max_dfa_size = INITIAL_MAX_DFA_SIZE; + + current_max_xpairs = INITIAL_MAX_XPAIRS; + nxt = allocate_integer_array( current_max_xpairs ); + chk = allocate_integer_array( current_max_xpairs ); + + current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; + tnxt = allocate_integer_array( current_max_template_xpairs ); + + current_max_dfas = INITIAL_MAX_DFAS; + base = allocate_integer_array( current_max_dfas ); + def = allocate_integer_array( current_max_dfas ); + dfasiz = allocate_integer_array( current_max_dfas ); + accsiz = allocate_integer_array( current_max_dfas ); + dhash = allocate_integer_array( current_max_dfas ); + todo = allocate_integer_array( current_max_dfas ); + dss = allocate_integer_pointer_array( current_max_dfas ); + dfaacc = allocate_integer_pointer_array( current_max_dfas ); + } diff --git a/misc.c b/misc.c new file mode 100644 index 0000000..3364e4c --- /dev/null +++ b/misc.c @@ -0,0 +1,646 @@ +/* lexmisc - miscellaneous flex routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include +#include "flexdef.h" + +char *malloc(), *realloc(); + + +/* allocate_array - allocate memory for an integer array of the given size */ + +char *allocate_array( size, element_size ) +int size, element_size; + + { + register char *mem = malloc( (unsigned) (element_size * size) ); + + if ( mem == NULL ) + lexfatal( "memory allocation failed in allocate_array()" ); + + return ( mem ); + } + + +/* bubble - bubble sort an integer array in increasing order + * + * synopsis + * int v[n], n; + * bubble( v, n ); + * + * description + * sorts the first n elements of array v and replaces them in + * increasing order. + * + * passed + * v - the array to be sorted + * n - the number of elements of 'v' to be sorted */ + +bubble( v, n ) +int v[], n; + + { + register int i, j, k; + + for ( i = n; i > 1; --i ) + for ( j = 1; j < i; ++j ) + if ( v[j] > v[j + 1] ) /* compare */ + { + k = v[j]; /* exchange */ + v[j] = v[j + 1]; + v[j + 1] = k; + } + } + + +/* clower - replace upper-case letter to lower-case + * + * synopsis: + * char clower(), c; + * c = clower( c ); + */ + +char clower( c ) +register char c; + + { + return ( isupper(c) ? tolower(c) : c ); + } + + +/* copy_string - returns a dynamically allocated copy of a string + * + * synopsis + * char *str, *copy, *copy_string(); + * copy = copy_string( str ); + */ + +char *copy_string( str ) +register char *str; + + { + register char *c; + char *copy; + + /* find length */ + for ( c = str; *c; ++c ) + ; + + copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) ); + + if ( copy == NULL ) + lexfatal( "dynamic memory failure in copy_string()" ); + + for ( c = copy; (*c++ = *str++); ) + ; + + return ( copy ); + } + + +/* cshell - shell sort a character array in increasing order + * + * synopsis + * + * char v[n]; + * int n; + * cshell( v, n ); + * + * description + * does a shell sort of the first n elements of array v. + * + * passed + * v - array to be sorted + * n - number of elements of v to be sorted + */ +cshell( v, n ) +char v[]; +int n; + + { + int gap, i, j, jg; + char k; + + for ( gap = n / 2; gap > 0; gap = gap / 2 ) + for ( i = gap; i < n; ++i ) + for ( j = i - gap; j >= 0; j = j - gap ) + { + jg = j + gap; + + if ( v[j] <= v[jg] ) + break; + + k = v[j]; + v[j] = v[jg]; + v[jg] = k; + } + } + + +/* dataend - finish up a block of data declarations + * + * synopsis + * dataend(); + */ +dataend() + + { + if ( datapos > 0 ) + dataflush(); + + if ( genftl ) + /* add terminator for initialization */ + puts( " } ;\n" ); + + dataline = 0; + } + + + +/* dataflush - flush generated data statements + * + * synopsis + * dataflush(); + */ +dataflush() + + { + putchar( '\n' ); + + if ( genftl ) + { + if ( ++dataline >= NUMDATALINES ) + { + /* put out a blank line so that the table is grouped into + * large blocks that enable the user to find elements easily + */ + putchar( '\n' ); + dataline = 0; + } + } + + /* reset the number of characters written on the current line */ + datapos = 0; + } + + +/* gettime - return current time + * + * synopsis + * char *gettime(), *time_str; + * time_str = gettime(); + */ + +/* include sys/types.h to use time_t and make lint happy */ + +#include + +char *gettime() + + { + time_t t, time(); + char *result, *ctime(), *copy_string(); + + t = time( (long *) 0 ); + + result = copy_string( ctime( &t ) ); + + /* get rid of trailing newline */ + result[24] = '\0'; + + return ( result ); + } + + +/* lerrif - report an error message formatted with one integer argument + * + * synopsis + * char msg[]; + * int arg; + * lerrif( msg, arg ); + */ + +lerrif( msg, arg ) +char msg[]; +int arg; + + { + char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); + lexerror( errmsg ); + } + + +/* lerrsf - report an error message formatted with one string argument + * + * synopsis + * char msg[], arg[]; + * lerrsf( msg, arg ); + */ + +lerrsf( msg, arg ) +char msg[], arg[]; + + { + char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); + lexerror( errmsg ); + } + + +/* lexerror - report an error message and terminate + * + * synopsis + * char msg[]; + * lexerror( msg ); + */ + +lexerror( msg ) +char msg[]; + + { + fprintf( stderr, "flex: %s\n", msg ); + lexend( 1 ); + } + + +/* lexfatal - report a fatal error message and terminate + * + * synopsis + * char msg[]; + * lexfatal( msg ); + */ + +lexfatal( msg ) +char msg[]; + + { + fprintf( stderr, "flex: fatal internal error %s\n", msg ); + lexend( 1 ); + } + + +/* line_directive_out - spit out a "# line" statement */ + +line_directive_out() + + { + if ( infilename ) + printf( "# line %d \"%s\"\n", linenum, infilename ); + } + + +/* mk2data - generate a data statement for a two-dimensional array + * + * synopsis + * char name; + * int row, column, value; + * mk2data( name, row, column, value ); + * + * generates a data statement initializing "name(row, column)" to "value" + * Note that name is only a character; NOT a string. If we're generating + * FTL (-f flag), "name", "row", and "column" get ignored. + */ +mk2data( name, row, column, value ) +char name; +int row, column, value; + + { + int datalen; + static char dindent[] = DATAINDENTSTR; + + if ( genftl ) + { + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); + + else + putchar( ',' ); + + ++datapos; + + printf( "%5d", value ); + } + + else + { + /* figure out length of data statement to be written. 7 is the constant + * overhead of a one character name, '(', ',', and ')' to delimit + * the array reference, a '/' and a '/' to delimit the value, and + * room for a blank or a comma between this data statement and the + * previous one + */ + + datalen = 7 + numdigs( row ) + numdigs( column ) + numdigs( value ); + + if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 ) + { + if ( datapos != 0 ) + dataflush(); + + /* precede data statement with '%' so rat4 preprocessor doesn't have + * to bother looking at it -- speed hack + */ + printf( "%%%sdata ", dindent ); + + /* 4 is the constant overhead of writing out the word "DATA" */ + datapos = DATAINDENTWIDTH + 4 + datalen; + } + + else + { + putchar( ',' ); + datapos = datapos + datalen; + } + + printf( "%c(%d,%d)/%d/", name, row, column, value ); + } + } + + +/* mkdata - generate a data statement + * + * synopsis + * char name; + * int arrayelm, value; + * mkdata( name, arrayelm, value ); + * + * generates a data statement initializing "name(arrayelm)" to "value" + * Note that name is only a character; NOT a string. If we're generating + * FTL (-f flag), "name" and "arrayelm" get ignored. + */ +mkdata( name, arrayelm, value ) +char name; +int arrayelm, value; + + { + int datalen; + static char dindent[] = DATAINDENTSTR; + + if ( genftl ) + { + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); + + else + putchar( ',' ); + + ++datapos; + + printf( "%5d", value ); + } + + else + { + /* figure out length of data statement to be written. 6 is the constant + * overhead of a one character name, '(' and ')' to delimit the array + * reference, a '/' and a '/' to delimit the value, and room for a + * blank or a comma between this data statement and the previous one + */ + + datalen = 6 + numdigs( arrayelm ) + numdigs( value ); + + if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 ) + { + if ( datapos != 0 ) + dataflush(); + + /* precede data statement with '%' so rat4 preprocessor doesn't have + * to bother looking at it -- speed hack + */ + printf( "%%%sdata ", dindent ); + + /* 4 is the constant overhead of writing out the word "DATA" */ + datapos = DATAINDENTWIDTH + 4 + datalen; + } + + else + { + putchar( ',' ); + datapos = datapos + datalen; + } + + printf( "%c(%d)/%d/", name, arrayelm, value ); + } + } + + +/* myctoi - return the integer represented by a string of digits + * + * synopsis + * char array[]; + * int val, myctoi(); + * val = myctoi( array ); + * + */ + +int myctoi( array ) +char array[]; + + { + int val = 0; + + (void) sscanf( array, "%d", &val ); + + return ( val ); + } + + +/* myesc - return character corresponding to escape sequence + * + * synopsis + * char array[], c, myesc(); + * c = myesc( array ); + * + */ + +char myesc( array ) +char array[]; + + { + switch ( array[1] ) + { + case 'n': return ( '\n' ); + case 't': return ( '\t' ); + case 'f': return ( '\f' ); + case 'r': return ( '\r' ); + case 'b': return ( '\b' ); + + case '0': + if ( isdigit(array[2]) ) + { /* \0 */ + char c, esc_char; + register int sptr = 2; + + while ( isdigit(array[sptr]) ) + /* don't increment inside loop control because the + * macro will expand it to two increments! (Not a + * problem with the C version of the macro) + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = otoi( array + 2 ); + array[sptr] = c; + + if ( esc_char == '\0' ) + { + synerr( "escape sequence for null not allowed" ); + return ( 1 ); + } + + return ( esc_char ); + } + + else + { + synerr( "escape sequence for null not allowed" ); + return ( 1 ); + } + +#ifdef NOTDEF + case '^': + { + register char next_char = array[2]; + + if ( next_char == '?' ) + return ( 0x7f ); + + else if ( next_char >= 'A' && next_char <= 'Z' ) + return ( next_char - 'A' + 1 ); + + else if ( next_char >= 'a' && next_char <= 'z' ) + return ( next_char - 'z' + 1 ); + + synerr( "illegal \\^ escape sequence" ); + + return ( 1 ); + } +#endif + } + + return ( array[1] ); + } + + +/* numdigs - number of digits (includes leading sign) in number + * + * synopsis + * int numdigs, x; + * num = numdigs( x ); + */ +int numdigs( x ) +int x; + + { + if ( x < 0 ) + { + /* the only negative numbers we expect to encounter are very + * small ones + */ + if ( x < -9 ) + lexfatal( "assumption of small negative numbers botched in numdigs()" ); + + return ( 2 ); + } + + if ( x < 10 ) + return ( 1 ); + else if ( x < 100 ) + return ( 2 ); + else if ( x < 1000 ) + return ( 3 ); + else if ( x < 10000 ) + return ( 4 ); + else if ( x < 100000 ) + return ( 5 ); + else + return ( 6 ); + } + + +/* otoi - convert an octal digit string to an integer value + * + * synopsis: + * int val, otoi(); + * char str[]; + * val = otoi( str ); + */ + +int otoi( str ) +char str[]; + + { +#ifdef FTLSOURCE + fortran int gctoi() + int dummy = 1; + + return ( gctoi( str, dummy, 8 ) ); +#else + int result; + + (void) sscanf( str, "%o", &result ); + + return ( result ); +#endif + } + + + + +/* reallocate_array - increase the size of a dynamic array */ + +char *reallocate_array( array, size, element_size ) +char *array; +int size, element_size; + + { + register char *new_array = realloc( array, + (unsigned) (size * element_size )); + + if ( new_array == NULL ) + lexfatal( "attempt to increase array size failed" ); + + return ( new_array ); + } + + +/* skelout - write out one section of the lexskel file + * + * synopsis + * skelout(); + * + * DESCRIPTION + * Copies from skelfile to stdout until a line beginning with "%%" or + * EOF is found. + */ +skelout() + + { + char buf[MAXLINE]; + + while ( fgets( buf, MAXLINE, skelfile ) != NULL ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + fputs( buf, stdout ); + } diff --git a/nfa.c b/nfa.c new file mode 100644 index 0000000..d514ce1 --- /dev/null +++ b/nfa.c @@ -0,0 +1,542 @@ +/* lexnfa - NFA construction routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* add_accept - add an accepting state to a machine + * + * synopsis + * + * add_accept( mach, headcnt, trailcnt ); + * + * the global ACCNUM is incremented and the new value becomes mach's + * accepting number. if headcnt or trailcnt is non-zero then the machine + * recognizes a pattern with trailing context. headcnt is the number of + * characters in the matched part of the pattern, or zero if the matched + * part has variable length. trailcnt is the number of trailing context + * characters in the pattern, or zero if the trailing context has variable + * length. + */ +add_accept( mach, headcnt, trailcnt ) +int mach, headcnt, trailcnt; + + { + int astate; + + printf( "case %d:\n", ++accnum ); + + if ( headcnt > 0 || trailcnt > 0 ) + { /* do trailing context magic to not match the trailing characters */ + printf( "YYDOBEFORESCAN; /* undo effects of setting up yytext */\n" ); + + if ( headcnt > 0 ) + { + if ( ! genftl || headcnt > 1 ) + printf( "yycbufp = yybbufp + %d;\n", + genftl ? headcnt - 1 : headcnt ); + else + printf( "yycbufp = yybbufp;\n" ); + } + + else + printf( "yycbufp -= %d;\n", trailcnt ); + + printf( "YYDOBEFOREACTION; /* set up yytext again */\n" ); + } + + line_directive_out(); + + /* hang the accepting number off an epsilon state. if it is associated + * with a state that has a non-epsilon out-transition, then the state + * will accept BEFORE it makes that transition, i.e. one character too soon + */ + + if ( transchar[finalst[mach]] == SYM_EPSILON ) + accptnum[finalst[mach]] = accnum; + + else + { + astate = mkstate( SYM_EPSILON ); + accptnum[astate] = accnum; + mach = link_machines( mach, astate ); + } + } + + +/* copysingl - make a given number of copies of a singleton machine + * + * synopsis + * + * newsng = copysingl( singl, num ); + * + * newsng - a new singleton composed of num copies of singl + * singl - a singleton machine + * num - the number of copies of singl to be present in newsng + */ +int copysingl( singl, num ) +int singl, num; + + { + int copy, i; + + copy = mkstate( SYM_EPSILON ); + + for ( i = 1; i <= num; ++i ) + copy = link_machines( copy, dupmachine( singl ) ); + + return ( copy ); + } + + +/* dumpnfa - debugging routine to write out an nfa + * + * synopsis + * int state1; + * dumpnfa( state1 ); + */ +dumpnfa( state1 ) +int state1; + + { + int sym, tsp1, tsp2, anum, ns; + + fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n", + state1 ); + + /* we probably should loop starting at firstst[state1] and going to + * lastst[state1], but they're not maintained properly when we "or" + * all of the rules together. So we use our knowledge that the machine + * starts at state 1 and ends at lastnfa. + */ + + /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ + for ( ns = 1; ns <= lastnfa; ++ns ) + { + fprintf( stderr, "state # %4d\t", ns ); + + sym = transchar[ns]; + tsp1 = trans1[ns]; + tsp2 = trans2[ns]; + anum = accptnum[ns]; + + fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); + + if ( anum != NIL ) + fprintf( stderr, " [%d]", anum ); + + fprintf( stderr, "\n" ); + } + + fprintf( stderr, "********** end of dump\n" ); + } + + +/* dupmachine - make a duplicate of a given machine + * + * synopsis + * + * copy = dupmachine( mach ); + * + * copy - holds duplicate of mach + * mach - machine to be duplicated + * + * note that the copy of mach is NOT an exact duplicate; rather, all the + * transition states values are adjusted so that the copy is self-contained, + * as the original should have been. + * + * also note that the original MUST be contiguous, with its low and high + * states accessible by the arrays firstst and lastst + */ +int dupmachine( mach ) +int mach; + + { + int i, state, init, last = lastst[mach], state_offset; + + for ( i = firstst[mach]; i <= last; ++i ) + { + state = mkstate( transchar[i] ); + + if ( trans1[i] != NO_TRANSITION ) + { + mkxtion( finalst[state], trans1[i] + state - i ); + + if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION ) + mkxtion( finalst[state], trans2[i] + state - i ); + } + + accptnum[state] = accptnum[i]; + } + + state_offset = state - i + 1; + + init = mach + state_offset; + firstst[init] = firstst[mach] + state_offset; + finalst[init] = finalst[mach] + state_offset; + lastst[init] = lastst[mach] + state_offset; + + return ( init ); + } + + +/* link_machines - connect two machines together + * + * synopsis + * + * new = link_machines( first, last ); + * + * new - a machine constructed by connecting first to last + * first - the machine whose successor is to be last + * last - the machine whose predecessor is to be first + * + * note: this routine concatenates the machine first with the machine + * last to produce a machine new which will pattern-match first first + * and then last, and will fail if either of the sub-patterns fails. + * FIRST is set to new by the operation. last is unmolested. + */ +int link_machines( first, last ) +int first, last; + + { + if ( first == NIL ) + return ( last ); + + else if ( last == NIL ) + return ( first ); + + else + { + mkxtion( finalst[first], last ); + finalst[first] = finalst[last]; + lastst[first] = max( lastst[first], lastst[last] ); + firstst[first] = min( firstst[first], firstst[last] ); + + return ( first ); + } + } + + +/* mkbranch - make a machine that branches to two machines + * + * synopsis + * + * branch = mkbranch( first, second ); + * + * branch - a machine which matches either first's pattern or second's + * first, second - machines whose patterns are to be or'ed (the | operator) + * + * note that first and second are NEITHER destroyed by the operation. Also, + * the resulting machine CANNOT be used with any other "mk" operation except + * more mkbranch's. Compare with mkor() + */ +int mkbranch( first, second ) +int first, second; + + { + int eps; + + if ( first == NO_TRANSITION ) + return ( second ); + + else if ( second == NO_TRANSITION ) + return ( first ); + + eps = mkstate( SYM_EPSILON ); + + mkxtion( eps, first ); + mkxtion( eps, second ); + + return ( eps ); + } + + +/* mkclos - convert a machine into a closure + * + * synopsis + * new = mkclos( state ); + * + * new - a new state which matches the closure of "state" + */ +int mkclos( state ) +int state; + + { + return ( mkopt( mkposcl( state ) ) ); + } + + +/* mkopt - make a machine optional + * + * synopsis + * + * new = mkopt( mach ); + * + * new - a machine which optionally matches whatever mach matched + * mach - the machine to make optional + * + * notes: + * 1. mach must be the last machine created + * 2. mach is destroyed by the call + */ +int mkopt( mach ) +int mach; + + { + int eps; + + if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) + { + eps = mkstate( SYM_EPSILON ); + mach = link_machines( mach, eps ); + } + + /* can't skimp on the following if FREE_EPSILON(mach) is true because + * some state interior to "mach" might point back to the beginning + * for a closure + */ + eps = mkstate( SYM_EPSILON ); + mach = link_machines( eps, mach ); + + mkxtion( mach, finalst[mach] ); + + return ( mach ); + } + + +/* mkor - make a machine that matches either one of two machines + * + * synopsis + * + * new = mkor( first, second ); + * + * new - a machine which matches either first's pattern or second's + * first, second - machines whose patterns are to be or'ed (the | operator) + * + * note that first and second are both destroyed by the operation + * the code is rather convoluted because an attempt is made to minimize + * the number of epsilon states needed + */ +int mkor( first, second ) +int first, second; + + { + int eps, orend; + + if ( first == NIL ) + return ( second ); + + else if ( second == NIL ) + return ( first ); + + else + { + /* see comment in mkopt() about why we can't use the first state + * of "first" or "second" if they satisfy "FREE_EPSILON" + */ + eps = mkstate( SYM_EPSILON ); + + first = link_machines( eps, first ); + + mkxtion( first, second ); + + if ( SUPER_FREE_EPSILON(finalst[first]) && + accptnum[finalst[first]] == NIL ) + { + orend = finalst[first]; + mkxtion( finalst[second], orend ); + } + + else if ( SUPER_FREE_EPSILON(finalst[second]) && + accptnum[finalst[second]] == NIL ) + { + orend = finalst[second]; + mkxtion( finalst[first], orend ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + + first = link_machines( first, eps ); + orend = finalst[first]; + + mkxtion( finalst[second], orend ); + } + } + + finalst[first] = orend; + return ( first ); + } + + +/* mkposcl - convert a machine into a positive closure + * + * synopsis + * new = mkposcl( state ); + * + * new - a machine matching the positive closure of "state" + */ +int mkposcl( state ) +int state; + + { + int eps; + + if ( SUPER_FREE_EPSILON(finalst[state]) ) + { + mkxtion( finalst[state], state ); + return ( state ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + mkxtion( eps, state ); + return ( link_machines( state, eps ) ); + } + } + + +/* mkrep - make a replicated machine + * + * synopsis + * new = mkrep( mach, lb, ub ); + * + * new - a machine that matches whatever "mach" matched from "lb" + * number of times to "ub" number of times + * + * note + * if "ub" is INFINITY then "new" matches "lb" or more occurances of "mach" + */ +int mkrep( mach, lb, ub ) +int mach, lb, ub; + + { + int base, tail, copy, i; + + base = copysingl( mach, lb - 1 ); + + if ( ub == INFINITY ) + { + copy = dupmachine( mach ); + mach = link_machines( mach, link_machines( base, mkclos( copy ) ) ); + } + + else + { + tail = mkstate( SYM_EPSILON ); + + for ( i = lb; i < ub; ++i ) + { + copy = dupmachine( mach ); + tail = mkopt( link_machines( copy, tail ) ); + } + + mach = link_machines( mach, link_machines( base, tail ) ); + } + + return ( mach ); + } + + +/* mkstate - create a state with a transition on a given symbol + * + * synopsis + * + * state = mkstate( sym ); + * + * state - a new state matching sym + * sym - the symbol the new state is to have an out-transition on + * + * note that this routine makes new states in ascending order through the + * state array (and increments LASTNFA accordingly). The routine DUPMACHINE + * relies on machines being made in ascending order and that they are + * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge + * that it admittedly is) + */ +int mkstate( sym ) +int sym; + + { + if ( ++lastnfa >= current_mns ) + { + if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) + lerrif( "input rules are too complicated (>= %d NFA states)", + current_mns ); + + ++num_reallocs; + + transchar = reallocate_integer_array( transchar, current_mns ); + trans1 = reallocate_integer_array( trans1, current_mns ); + trans2 = reallocate_integer_array( trans2, current_mns ); + accptnum = reallocate_integer_array( accptnum, current_mns ); + firstst = reallocate_integer_array( firstst, current_mns ); + finalst = reallocate_integer_array( finalst, current_mns ); + lastst = reallocate_integer_array( lastst, current_mns ); + } + + transchar[lastnfa] = sym; + trans1[lastnfa] = NO_TRANSITION; + trans2[lastnfa] = NO_TRANSITION; + accptnum[lastnfa] = NIL; + firstst[lastnfa] = lastnfa; + finalst[lastnfa] = lastnfa; + lastst[lastnfa] = lastnfa; + + /* fix up equivalence classes base on this transition. Note that any + * character which has its own transition gets its own equivalence class. + * Thus only characters which are only in character classes have a chance + * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' + * into two different equivalence classes. "[ab]" puts them in the same + * equivalence class (barring other differences elsewhere in the input). + */ + + if ( sym < 0 ) + { + /* we don't have to update the equivalence classes since that was + * already done when the ccl was created for the first time + */ + } + + else if ( sym == SYM_EPSILON ) + ++numeps; + + else + { + if ( useecs ) + mkechar( sym, nextecm, ecgroup ); + } + + return ( lastnfa ); + } + + +/* mkxtion - make a transition from one state to another + * + * synopsis + * + * mkxtion( statefrom, stateto ); + * + * statefrom - the state from which the transition is to be made + * stateto - the state to which the transition is to be made + */ +mkxtion( statefrom, stateto ) +int statefrom, stateto; + + { + if ( trans1[statefrom] == NO_TRANSITION ) + trans1[statefrom] = stateto; + + else if ( (transchar[statefrom] != SYM_EPSILON) || + (trans2[statefrom] != NO_TRANSITION) ) + lexfatal( "found too many transitions in mkxtion()" ); + + else + { /* second out-transition for an epsilon state */ + ++eps2; + trans2[statefrom] = stateto; + } + } diff --git a/parse.y b/parse.y new file mode 100644 index 0000000..b5cb379 --- /dev/null +++ b/parse.y @@ -0,0 +1,473 @@ +/* lexparse.y - parser for flex input */ + +/* + * Copyright (c) University of California, 1987 + */ + +%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL + +%{ +#include "flexdef.h" + +int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; +int trlcontxt, xcluflg, cclsorted, varlength; +char clower(); + +static int madeany = false; /* whether we've made the '.' character class */ + +%} + +%% +goal : initlex sect1 sect1end sect2 + ; + +initlex : + { + /* initialize for processing rules */ + + /* create default DFA start condition */ + scinstal( "0", false ); + } + ; + +sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' + | + | error '\n' + { synerr( "unknown error processing section 1" ); } + ; + +sect1end : SECTEND + ; + +startconddecl : SCDECL + { + /* these productions are separate from the s1object + * rule because the semantics must be done before + * we parse the remainder of an s1object + */ + + xcluflg = false; + } + + | XSCDECL + { xcluflg = true; } + ; + +namelist1 : namelist1 WHITESPACE NAME + { scinstal( nmstr, xcluflg ); } + + | NAME + { scinstal( nmstr, xcluflg ); } + + | error + { synerr( "bad start condition list" ); } + ; + +sect2 : sect2 initforrule lexrule '\n' + | + ; + +initforrule : + { + /* initialize for a parse of one rule */ + trlcontxt = varlength = false; + trailcnt = headcnt = rulelen = 0; + } + ; + +lexrule : scon '^' re eol + { + pat = link_machines( $3, $4 ); + add_accept( pat, headcnt, trailcnt ); + + for ( i = 1; i <= actvp; ++i ) + scbol[actvsc[i]] = mkbranch( scbol[actvsc[i]], pat ); + } + + | scon re eol + { + pat = link_machines( $2, $3 ); + add_accept( pat, headcnt, trailcnt ); + + for ( i = 1; i <= actvp; ++i ) + scset[actvsc[i]] = mkbranch( scset[actvsc[i]], pat ); + } + + | '^' re eol + { + pat = link_machines( $2, $3 ); + add_accept( pat, headcnt, trailcnt ); + + /* add to all non-exclusive start conditions, + * including the default (0) start condition + */ + + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scbol[i] = mkbranch( scbol[i], pat ); + } + + | re eol + { + pat = link_machines( $1, $2 ); + add_accept( pat, headcnt, trailcnt ); + + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scset[i] = mkbranch( scset[i], pat ); + } + + | error + { synerr( "unrecognized rule" ); } + ; + +scon : '<' namelist2 '>' + ; + +namelist2 : namelist2 ',' NAME + { + if ( (scnum = sclookup( nmstr )) == 0 ) + synerr( "undeclared start condition" ); + + else + actvsc[++actvp] = scnum; + } + + | NAME + { + if ( (scnum = sclookup( nmstr )) == 0 ) + synerr( "undeclared start condition" ); + else + actvsc[actvp = 1] = scnum; + } + + | error + { synerr( "bad start condition list" ); } + ; + +eol : '$' + { + if ( trlcontxt ) + { + synerr( "trailing context used twice" ); + $$ = mkstate( SYM_EPSILON ); + } + else + { + trlcontxt = true; + + if ( ! varlength ) + headcnt = rulelen; + + ++rulelen; + trailcnt = 1; + + eps = mkstate( SYM_EPSILON ); + $$ = link_machines( eps, mkstate( '\n' ) ); + } + } + + | + { + $$ = mkstate( SYM_EPSILON ); + + if ( trlcontxt ) + { + if ( varlength && headcnt == 0 ) + /* both head and trail are variable-length */ + synerr( "illegal trailing context" ); + + else + trailcnt = rulelen; + } + } + ; + +re : re '|' series + { + varlength = true; + + $$ = mkor( $1, $3 ); + } + + | re2 series + { $$ = link_machines( $1, $2 ); } + + | series + { $$ = $1; } + ; + + +re2 : re '/' + { + /* this rule is separate from the others for "re" so + * that the reduction will occur before the trailing + * series is parsed + */ + + if ( trlcontxt ) + synerr( "trailing context used twice" ); + else + trlcontxt = true; + + if ( varlength ) + /* the trailing context had better be fixed-length */ + varlength = false; + else + headcnt = rulelen; + + rulelen = 0; + $$ = $1; + } + ; + +series : series singleton + { + /* this is where concatenation of adjacent patterns + * gets done + */ + $$ = link_machines( $1, $2 ); + } + + | singleton + { $$ = $1; } + ; + +singleton : singleton '*' + { + varlength = true; + + $$ = mkclos( $1 ); + } + + | singleton '+' + { + varlength = true; + + $$ = mkposcl( $1 ); + } + + | singleton '?' + { + varlength = true; + + $$ = mkopt( $1 ); + } + + | singleton '{' NUMBER ',' NUMBER '}' + { + varlength = true; + + if ( $3 > $5 || $3 <= 0 ) + { + synerr( "bad iteration values" ); + $$ = $1; + } + else + $$ = mkrep( $1, $3, $5 ); + } + + | singleton '{' NUMBER ',' '}' + { + varlength = true; + + if ( $3 <= 0 ) + { + synerr( "iteration value must be positive" ); + $$ = $1; + } + + else + $$ = mkrep( $1, $3, INFINITY ); + } + + | singleton '{' NUMBER '}' + { + rulelen = rulelen + $3; + + if ( $3 <= 0 ) + { + synerr( "iteration value must be positive" ); + $$ = $1; + } + + else + $$ = link_machines( $1, copysingl( $1, $3 - 1 ) ); + } + + | '.' + { + if ( ! madeany ) + { + /* create the '.' character class */ + anyccl = cclinit(); + ccladd( anyccl, '\n' ); + cclnegate( anyccl ); + + if ( useecs ) + mkeccl( ccltbl + cclmap[anyccl], + ccllen[anyccl], nextecm, + ecgroup, CSIZE ); + + madeany = true; + } + + ++rulelen; + + $$ = mkstate( -anyccl ); + } + + | fullccl + { + if ( ! cclsorted ) + /* sort characters for fast searching. We use a + * shell sort since this list could be large. + */ + cshell( ccltbl + cclmap[$1], ccllen[$1] ); + + if ( useecs ) + mkeccl( ccltbl + cclmap[$1], ccllen[$1], + nextecm, ecgroup, CSIZE ); + + ++rulelen; + + $$ = mkstate( -$1 ); + } + + | PREVCCL + { + ++rulelen; + + $$ = mkstate( -$1 ); + } + + | '"' string '"' + { $$ = $2; } + + | '(' re ')' + { $$ = $2; } + + | CHAR + { + ++rulelen; + + if ( $1 == '\0' ) + synerr( "null in rule" ); + + if ( caseins && $1 >= 'A' && $1 <= 'Z' ) + $1 = clower( $1 ); + + $$ = mkstate( $1 ); + } + ; + +fullccl : '[' ccl ']' + { $$ = $2; } + + | '[' '^' ccl ']' + { + /* *Sigh* - to be compatible Unix lex, negated ccls + * match newlines + */ +#ifdef NOTDEF + ccladd( $3, '\n' ); /* negated ccls don't match '\n' */ + cclsorted = false; /* because we added the newline */ +#endif + cclnegate( $3 ); + $$ = $3; + } + ; + +ccl : ccl CHAR '-' CHAR + { + if ( $2 > $4 ) + synerr( "negative range in character class" ); + + else + { + if ( caseins ) + { + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + if ( $4 >= 'A' && $4 <= 'Z' ) + $4 = clower( $4 ); + } + + for ( i = $2; i <= $4; ++i ) + ccladd( $1, i ); + + /* keep track if this ccl is staying in alphabetical + * order + */ + cclsorted = cclsorted && ($2 > lastchar); + lastchar = $4; + } + + $$ = $1; + } + + | ccl CHAR + { + if ( caseins ) + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + + ccladd( $1, $2 ); + cclsorted = cclsorted && ($2 > lastchar); + lastchar = $2; + $$ = $1; + } + + | + { + cclsorted = true; + lastchar = 0; + $$ = cclinit(); + } + ; + +string : string CHAR + { + if ( caseins ) + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + + ++rulelen; + + $$ = link_machines( $1, mkstate( $2 ) ); + } + + | + { $$ = mkstate( SYM_EPSILON ); } + ; + +%% + +/* synerr - report a syntax error + * + * synopsis + * char str[]; + * synerr( str ); + */ + +synerr( str ) +char str[]; + + { + syntaxerror = true; + fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str ); + } + + +/* yyerror - eat up an error message from the parser + * + * synopsis + * char msg[]; + * yyerror( msg ); + */ + +yyerror( msg ) +char msg[]; + + { + } diff --git a/scan.l b/scan.l new file mode 100644 index 0000000..5f344dc --- /dev/null +++ b/scan.l @@ -0,0 +1,370 @@ +/* flexscan.l - scanner for flex input */ + +/* + * Copyright (c) University of California, 1987 + */ + +%{ +#include "flexdef.h" +#include "strings.h" +#include "y.tab.h" + +#undef YYDECL +#define YYDECL \ + int lexscan() + +#undef yywrap +#define yywrap(result) \ + { \ + if ( ! did_second_skelout ) \ + skelout(); \ + result = 1; \ + } + +#define RETURNCHAR \ + yylval = yytext[0]; \ + return ( CHAR ); + +#define RETURNNAME \ + (void) strcpy( nmstr, yytext ); \ + return ( NAME ); + +#define PUT_BACK_STRING(str, start) \ + for ( i = strlen( str ) - 1; i >= start; --i ) \ + unput(str[i]) +%} + +%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE +%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT +%x ACTION_STRING PERCENT_BRACE_ACTION + +WS [ \t]+ + +OPTWS [ \t]* + +NAME [a-z_][a-z_0-9]* + +SCNAME {NAME} + +ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) + +%% + static int bracelevel; + int i, cclval; + char nmdef[MAXLINE], myesc(); + static int didadef, did_second_skelout = false; + +^{WS}.*\n ++linenum; ECHO; /* indented code */ +^#.*\n ++linenum; ECHO; /* either a Ratfor comment or a CPP directive */ +^"/*" ECHO; BEGIN(C_COMMENT); +^"%s"(tart)? return ( SCDECL ); +^"%x" return ( XSCDECL ); +^"%{".*\n ++linenum; line_directive_out(); BEGIN(CODEBLOCK); +{WS} return ( WHITESPACE ); + +^"%%".* { + sectnum = 2; + skelout(); + line_directive_out(); + BEGIN(SECT2PROLOG); + return ( SECTEND ); + } + +^"%"[^sx{%].*\n { + fprintf( stderr, + "old-style lex command at line %d ignored:\n\t%s", + linenum, yytext ); + ++linenum; + } + +^{NAME} { + (void) strcpy( nmstr, yytext ); + didadef = false; + BEGIN(PICKUPDEF); + } + +{SCNAME} RETURNNAME; +^{OPTWS}\n ++linenum; /* allows blank lines in section 1 */ +\n ++linenum; return ( '\n' ); +. synerr( "illegal character" ); BEGIN(RECOVER); + + +"*/" ECHO; BEGIN(0); +"*/".*\n ++linenum; ECHO; BEGIN(0); +[^*\n]+ ECHO; +"*" ECHO; +\n ++linenum; ECHO; + +^"%}".*\n ++linenum; BEGIN(0); +.*\n ++linenum; ECHO; + +{WS} /* separates name and definition */ + +[^ \t\n].* { + (void) strcpy( nmdef, yytext ); + + for ( i = strlen( nmdef ) - 1; + i >= 0 && + nmdef[i] == ' ' || nmdef[i] == '\t'; + --i ) + ; + + nmdef[i + 1] = '\0'; + + ndinstal( nmstr, nmdef ); + didadef = true; + } + +\n { + if ( ! didadef ) + synerr( "incomplete name definition" ); + BEGIN(0); + ++linenum; + } + +.*\n ++linenum; RETURNNAME; + + +.*\n/[^ \t\n] { + ++linenum; + ECHO; + skelout(); + did_second_skelout = true; + BEGIN(SECT2); + } + +.*\n ++linenum; ECHO; + +^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ +^{WS}.*\n { + synerr( "indented code found outside of action" ); + ++linenum; + } +"<" BEGIN(SC); return ( '<' ); +^"^" return ( '^' ); +\" BEGIN(QUOTE); return ( '"' ); +"{"/[0-9] BEGIN(NUM); return ( '{' ); +"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR); +"$"/[ \t\n] return ( '$' ); + +{WS}"%{" { + bracelevel = 1; + BEGIN(PERCENT_BRACE_ACTION); + return ( '\n' ); + } +{WS}"|".*\n ++linenum; return ( '\n' ); + +{WS} | +{OPTWS}/\n { + bracelevel = 0; + BEGIN(ACTION); + return ( '\n' ); + } + +^{OPTWS}\n ++linenum; return ( '\n' ); + +^"%%".* { + /* guarentee that the SECT3 rule will have something + * to match + */ + yyless(1); + sectnum = 3; + BEGIN(SECT3); + return ( EOF ); /* to stop the parser */ + } + +"["([^\\\]\n]|{ESCSEQ})+"]" { + (void) strcpy( nmstr, yytext ); + + /* check to see if we've already encountered this ccl */ + if ( (cclval = ccllookup( nmstr )) ) + { + yylval = cclval; + ++cclreuse; + return ( PREVCCL ); + } + else + { + /* we fudge a bit. We know that this ccl will + * soon be numbered as lastccl + 1 by cclinit + */ + cclinstal( nmstr, lastccl + 1 ); + + /* push back everything but the leading bracket + * so the ccl can be rescanned + */ + PUT_BACK_STRING(nmstr, 1); + + BEGIN(FIRSTCCL); + return ( '[' ); + } + } + +"{"{NAME}"}" { + register char *nmdefptr; + char *ndlookup(); + + (void) strcpy( nmstr, yytext ); + nmstr[yyleng - 1] = '\0'; /* chop trailing brace */ + + /* lookup from "nmstr + 1" to chop leading brace */ + if ( ! (nmdefptr = ndlookup( nmstr + 1 )) ) + synerr( "undefined {name}" ); + + else + { /* push back name surrounded by ()'s */ + unput(')'); + PUT_BACK_STRING(nmdefptr, 0); + unput('('); + } + } + +[/|*+?.()] return ( yytext[0] ); +. RETURNCHAR; +\n ++linenum; return ( '\n' ); + + +"," return ( ',' ); +">" BEGIN(SECT2); return ( '>' ); +">"/"^" BEGIN(CARETISBOL); return ( '>' ); +{SCNAME} RETURNNAME; +. synerr( "bad start condition name" ); + +"^" BEGIN(SECT2); return ( '^' ); + + +[^"\n] RETURNCHAR; +\" BEGIN(SECT2); return ( '"' ); + +\n { + synerr( "missing quote" ); + BEGIN(SECT2); + ++linenum; + return ( '"' ); + } + + +"^"/[^-\n] BEGIN(CCL); return ( '^' ); +"^"/- return ( '^' ); +- BEGIN(CCL); yylval = '-'; return ( CHAR ); +. BEGIN(CCL); RETURNCHAR; + +-/[^\]\n] return ( '-' ); +[^\]\n] RETURNCHAR; +"]" BEGIN(SECT2); return ( ']' ); + + +[0-9]+ { + yylval = myctoi( yytext ); + return ( NUMBER ); + } + +"," return ( ',' ); +"}" BEGIN(SECT2); return ( '}' ); + +. { + synerr( "bad character inside {}'s" ); + BEGIN(SECT2); + return ( '}' ); + } + +\n { + synerr( "missing }" ); + BEGIN(SECT2); + ++linenum; + return ( '}' ); + } + + +"}" synerr( "bad name in {}'s" ); BEGIN(SECT2); +\n synerr( "missing }" ); ++linenum; BEGIN(SECT2); + + +{OPTWS}"%}".* bracelevel = 0; +.* ECHO; +\n { + ++linenum; + ECHO; + if ( bracelevel == 0 ) + { + if ( genftl ) + puts( "\tbreak;" ); + BEGIN(SECT2); + } + } + +"{" ECHO; ++bracelevel; +"}" ECHO; --bracelevel; +[^{}"'/\n]+ ECHO; +"/*" ECHO; BEGIN(ACTION_COMMENT); +"'"([^'\\\n]|\\.)*"'" ECHO; /* character constant */ +\" ECHO; BEGIN(ACTION_STRING); +\n { + ++linenum; + ECHO; + if ( bracelevel == 0 ) + { + if ( genftl ) + puts( "\tbreak;" ); + BEGIN(SECT2); + } + } +. ECHO; + +"*/" ECHO; BEGIN(ACTION); +[^*\n]+ ECHO; +"*" ECHO; +\n ++linenum; ECHO; +. ECHO; + +[^"\\\n]+ ECHO; +\\. ECHO; +\n ++linenum; ECHO; +\" ECHO; BEGIN(ACTION); +. ECHO; + + +{ESCSEQ} { + yylval = myesc( yytext ); + return ( CHAR ); + } + +{ESCSEQ} { + yylval = myesc( yytext ); + BEGIN(CCL); + return ( CHAR ); + } + + +.|\n { + register int numchars; + + /* black magic - we know the names of a lex scanner's + * internal variables. We cap the input buffer with + * an end-of-string and dump it to the output. + */ + YYDOBEFORESCAN; /* recover from setting up yytext */ + + yychbuf[yyebufp + 1] = '\0'; + + /* ignore the first character; it's the second '%' + * put back by the yyless(1) above + */ + fputs( yychbuf + yycbufp + 1, stdout ); + + /* if we don't do this, the data written by write() + * can get overwritten when stdout is finally flushed + */ + (void) fflush( stdout ); + + while ( (numchars = read( fileno(yyin), yychbuf, + YYBUFMAX )) > 0 ) + (void) write( fileno(stdout), yychbuf, numchars ); + + if ( numchars < 0 ) + lexerror( "fatal read error in section 3" ); + + return ( EOF ); + } + +%% diff --git a/sym.c b/sym.c new file mode 100644 index 0000000..af50831 --- /dev/null +++ b/sym.c @@ -0,0 +1,291 @@ +/* lexsym - symbol table routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; +struct hash_entry *sctbl[START_COND_HASH_SIZE]; +struct hash_entry *ccltab[CCL_HASH_SIZE]; + + +/* addsym - add symbol and definition to symbol table + * + * synopsis + * char sym[], def[]; + * hash_table table; + * int table_size; + * -1/0 = addsym( sym, def, table, table_size ); + * + * -1 is returned if the symbol already exists, and the change not made. + */ + +int addsym( sym, def, table, table_size ) +register char sym[]; +char def[]; +hash_table table; +int table_size; + + { + int hash_val = hashfunct( sym, table_size ); + register struct hash_entry *entry = table[hash_val]; + register struct hash_entry *new_entry; + register struct hash_entry *successor; + char *malloc(); + + while ( entry ) + { + if ( ! strcmp( sym, entry->name ) ) + { /* entry already exists */ + return ( -1 ); + } + + entry = entry->next; + } + + /* create new entry */ + new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); + + if ( new_entry == NULL ) + lexfatal( "symbol table memory allocation failed" ); + + if ( (successor = table[hash_val]) ) + { + new_entry->next = successor; + successor->prev = new_entry; + } + else + new_entry->next = NULL; + + new_entry->prev = NULL; + new_entry->name = sym; + new_entry->val = def; + + table[hash_val] = new_entry; + + return ( 0 ); + } + + +/* cclinstal - save the text of a character class + * + * synopsis + * char ccltxt[]; + * int cclnum; + * cclinstal( ccltxt, cclnum ); + */ +cclinstal( ccltxt, cclnum ) +char ccltxt[]; +int cclnum; + + { + /* we don't bother checking the return status because we are not called + * unless the symbol is new + */ + char *copy_string(); + + (void) addsym( copy_string( ccltxt ), (char *) cclnum, + ccltab, CCL_HASH_SIZE ); + } + + +/* ccllookup - lookup the number associated with character class text + * + * synopsis + * char ccltxt[]; + * int ccllookup, cclval; + * cclval/0 = ccllookup( ccltxt ); + */ +int ccllookup( ccltxt ) +char ccltxt[]; + + { + char *getdef(); + + return ( (int) getdef( ccltxt, ccltab, CCL_HASH_SIZE ) ); + } + + +/* findsym - find symbol in symbol table + * + * synopsis + * char sym[]; + * hash_table table; + * int table_size; + * struct hash_entry *entry, *findsym(); + * entry = findsym( sym, table, table_size ); + */ + +struct hash_entry *findsym( sym, table, table_size ) +register char sym[]; +hash_table table; +int table_size; + + { + register struct hash_entry *entry = table[hashfunct( sym, table_size )]; + + while ( entry ) + { + if ( ! strcmp( sym, entry->name ) ) + return ( entry ); + entry = entry->next; + } + + return ( NULL ); + } + + +/* getdef - get symbol definition from symbol table + * + * synopsis + * char sym[]; + * hash_table table; + * int table_size; + * char *def, *getdef(); + * def = getdef( sym, table, table_size ); + */ + +char *getdef( sym, table, table_size ) +register char sym[]; +hash_table table; +int table_size; + + { + register struct hash_entry *entry = findsym( sym, table, table_size ); + + if ( entry ) + return ( entry->val ); + + return ( NULL ); + } + + +/* hashfunct - compute the hash value for "str" and hash size "hash_size" + * + * synopsis + * char str[]; + * int hash_size, hash_val; + * hash_val = hashfunct( str, hash_size ); + */ + +int hashfunct( str, hash_size ) +register char str[]; +int hash_size; + + { + register int hashval; + register int locstr; + + hashval = 0; + locstr = 0; + + while ( str[locstr] ) + hashval = ((hashval << 1) + str[locstr++]) % hash_size; + + return ( hashval ); + } + + +/* ndinstal - install a name definition + * + * synopsis + * char nd[], def[]; + * ndinstal( nd, def ); + */ +ndinstal( nd, def ) +char nd[], def[]; + + { + char *copy_string(); + + if ( addsym( copy_string( nd ), copy_string( def ), + ndtbl, NAME_TABLE_HASH_SIZE ) ) + synerr( "name defined twice" ); + } + + +/* ndlookup - lookup a name definition + * + * synopsis + * char nd[], *def; + * char *ndlookup(); + * def/NULL = ndlookup( nd ); + */ +char *ndlookup( nd ) +char nd[]; + + { + char *getdef(); + + return ( getdef( nd, ndtbl, NAME_TABLE_HASH_SIZE ) ); + } + + +/* scinstal - make a start condition + * + * synopsis + * char str[]; + * int xcluflg; + * scinstal( str, xcluflg ); + * + * NOTE + * the start condition is Exclusive if xcluflg is true + */ +scinstal( str, xcluflg ) +char str[]; +int xcluflg; + + { + char *copy_string(); + + if ( genftl ) + { + /* bit of a hack. We know how the default start-condition is + * declared, and don't put out a define for it, because it + * would come out as "#define 0 1" + */ + + if ( strcmp( str, "0" ) ) + printf( "#define %s %d\n", str, lastsc * 2 ); + } + + else + printf( "define(YYLEX_SC_%s,%d)\n", str, lastsc * 2 ); + + if ( ++lastsc >= current_max_scs ) + { + current_max_scs += MAX_SCS_INCREMENT; + + ++num_reallocs; + + scset = reallocate_integer_array( scset, current_max_scs ); + scbol = reallocate_integer_array( scbol, current_max_scs ); + scxclu = reallocate_integer_array( scxclu, current_max_scs ); + actvsc = reallocate_integer_array( actvsc, current_max_scs ); + } + + if ( addsym( copy_string( str ), (char *) lastsc, + sctbl, START_COND_HASH_SIZE ) ) + lerrsf( "start condition %s declared twice", str ); + + scset[lastsc] = mkstate( SYM_EPSILON ); + scbol[lastsc] = mkstate( SYM_EPSILON ); + scxclu[lastsc] = xcluflg; + } + + +/* sclookup - lookup the number associated with a start condition + * + * synopsis + * char str[], scnum; + * int sclookup; + * scnum/0 = sclookup( str ); + */ +int sclookup( str ) +char str[]; + + { + return ( (int) getdef( str, sctbl, START_COND_HASH_SIZE ) ); + } diff --git a/tblcmp.c b/tblcmp.c new file mode 100644 index 0000000..ae9bfd7 --- /dev/null +++ b/tblcmp.c @@ -0,0 +1,1324 @@ +/* lexcmp - table compression routines */ + +/* + * Copyright (c) University of California, 1987 + */ + +#include "flexdef.h" + +/* bldtbl - build table entries for dfa state + * + * synopsis + * int state[numecs], statenum, totaltrans, comstate, comfreq; + * bldtbl( state, statenum, totaltrans, comstate, comfreq ); + * + * State is the statenum'th dfa state. It is indexed by equivalence class and + * gives the number of the state to enter for a given equivalence class. + * totaltrans is the total number of transitions out of the state. Comstate + * is that state which is the destination of the most transitions out of State. + * Comfreq is how many transitions there are out of State to Comstate. + * + * A note on terminology: + * "protos" are transition tables which have a high probability of + * either being redundant (a state processed later will have an identical + * transition table) or nearly redundant (a state processed later will have + * many of the same out-transitions). A "most recently used" queue of + * protos is kept around with the hope that most states will find a proto + * which is similar enough to be usable, and therefore compacting the + * output tables. + * "templates" are a special type of proto. If a transition table is + * homogenous or nearly homogenous (all transitions go to the same destination) + * then the odds are good that future states will also go to the same destination + * state on basically the same character set. These homogenous states are + * so common when dealing with large rule sets that they merit special + * attention. If the transition table were simply made into a proto, then + * (typically) each subsequent, similar state will differ from the proto + * for two out-transitions. One of these out-transitions will be that + * character on which the proto does not go to the common destination, + * and one will be that character on which the state does not go to the + * common destination. Templates, on the other hand, go to the common + * state on EVERY transition character, and therefore cost only one + * difference. + */ +bldtbl( state, statenum, totaltrans, comstate, comfreq ) +int state[], statenum, totaltrans, comstate, comfreq; + + { + int extptr, extrct[2][CSIZE + 1]; + int mindiff, minprot, i, d; + int checkcom; + + /* If extptr is 0 then the first array of extrct holds the result of the + * "best difference" to date, which is those transitions which occur in + * "state" but not in the proto which, to date, has the fewest differences + * between itself and "state". If extptr is 1 then the second array of + * extrct hold the best difference. The two arrays are toggled + * between so that the best difference to date can be kept around and + * also a difference just created by checking against a candidate "best" + * proto. + */ + + extptr = 0; + + /* if the state has too few out-transitions, don't bother trying to + * compact its tables + */ + + if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + + else + { + /* checkcom is true if we should only check "state" against + * protos which have the same "comstate" value + */ + + checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; + + minprot = firstprot; + mindiff = totaltrans; + + if ( checkcom ) + { + /* find first proto which has the same "comstate" */ + for ( i = firstprot; i != NIL; i = protnext[i] ) + if ( protcomst[i] == comstate ) + { + minprot = i; + mindiff = tbldiff( state, minprot, extrct[extptr] ); + break; + } + } + + else + { + /* since we've decided that the most common destination out + * of "state" does not occur with a high enough frequency, + * we set the "comstate" to zero, assuring that if this state + * is entered into the proto list, it will not be considered + * a template. + */ + comstate = 0; + + if ( firstprot != NIL ) + { + minprot = firstprot; + mindiff = tbldiff( state, minprot, extrct[extptr] ); + } + } + + /* we now have the first interesting proto in "minprot". If + * it matches within the tolerances set for the first proto, + * we don't want to bother scanning the rest of the proto list + * to see if we have any other reasonable matches. + */ + + if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) + { /* not a good enough match. Scan the rest of the protos */ + for ( i = minprot; i != NIL; i = protnext[i] ) + { + d = tbldiff( state, i, extrct[1 - extptr] ); + if ( d < mindiff ) + { + extptr = 1 - extptr; + mindiff = d; + minprot = i; + } + } + } + + /* check if the proto we've decided on as our best bet is close + * enough to the state we want to match to be usable + */ + + if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) + { + /* no good. If the state is homogeneous enough, we make a + * template out of it. Otherwise, we make a proto. + */ + + if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE ) + mktemplate( state, statenum, comstate ); + + else + { + mkprot( state, statenum, comstate ); + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + } + } + + else + { /* use the proto */ + mkentry( extrct[extptr], numecs, statenum, + prottbl[minprot], mindiff ); + + /* if this state was sufficiently different from the proto + * we built it from, make it, too, a proto + */ + + if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) + mkprot( state, statenum, comstate ); + + /* since mkprot added a new proto to the proto queue, it's possible + * that "minprot" is no longer on the proto queue (if it happened + * to have been the last entry, it would have been bumped off). + * If it's not there, then the new proto took its physical place + * (though logically the new proto is at the beginning of the + * queue), so in that case the following call will do nothing. + */ + + mv2front( minprot ); + } + } + } + + +/* cmptmps - compress template table entries + * + * synopsis + * cmptmps(); + * + * template tables are compressed by using the 'template equivalence + * classes', which are collections of transition character equivalence + * classes which always appear together in templates - really meta-equivalence + * classes. until this point, the tables for templates have been stored + * up at the top end of the nxt array; they will now be compressed and have + * table entries made for them. + */ +cmptmps() + + { + int tmpstorage[CSIZE + 1]; + register int *tmp = tmpstorage, i, j; + int totaltrans, trans; + + peakpairs = numtemps * numecs + tblend; + + if ( usemecs ) + { + /* create equivalence classes base on data gathered on template + * transitions + */ + + nummecs = cre8ecs( tecfwd, tecbck, numecs ); + } + + else + nummecs = numecs; + + if ( lastdfa + numtemps + 1 >= current_max_dfas ) + increase_max_dfas(); + + /* loop through each template */ + + for ( i = 1; i <= numtemps; ++i ) + { + totaltrans = 0; /* number of non-jam transitions out of this template */ + + for ( j = 1; j <= numecs; ++j ) + { + trans = tnxt[numecs * i + j]; + + if ( usemecs ) + { + /* the absolute value of tecbck is the meta-equivalence class + * of a given equivalence class, as set up by cre8ecs + */ + if ( tecbck[j] > 0 ) + { + tmp[tecbck[j]] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + else + { + tmp[j] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + /* it is assumed (in a rather subtle way) in the skeleton that + * if we're using meta-equivalence classes, the def[] entry for + * all templates is the jam template, i.e. templates never default + * to other non-jam table entries (e.g. another template) + */ + + /* leave room for the jam-state after the last real state */ + mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); + } + } + + + +/* expand_nxt_chk - expand the next check arrays */ + +expand_nxt_chk() + + { + register int old_max = current_max_xpairs; + + current_max_xpairs += MAX_XPAIRS_INCREMENT; + + ++num_reallocs; + + nxt = reallocate_integer_array( nxt, current_max_xpairs ); + chk = reallocate_integer_array( chk, current_max_xpairs ); + + bzero( (char *) (chk + old_max), + MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); + } + + +/* gentabs - generate data statements for the transition tables + * + * synopsis + * gentabs(); + */ +gentabs() + + { + int i, j, k, numrows, *accset, nacc, *acc_array; + char clower(); + + /* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all FTL arrays + */ + static char ftl_long_decl[] = "static long int %c[%d] =\n { 0,\n"; + static char ftl_short_decl[] = "static short int %c[%d] =\n { 0,\n"; + static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n"; + + acc_array = allocate_integer_array( current_max_dfas ); + nummt = 0; + + if ( fulltbl ) + jambase = lastdfa + 1; /* home of "jam" pseudo-state */ + + printf( "#define YYJAM %d\n", jamstate ); + printf( "#define YYJAMBASE %d\n", jambase ); + + if ( usemecs ) + printf( "#define YYTEMPLATE %d\n", lastdfa + 2 ); + +#ifdef NOTDEF +/* unsupported code */ + if ( ! genftl ) + { /* ratfor scanner */ + static char vardata[] = "%%%sdata %s/%d/\n"; + static char dindent[] = DATAINDENTSTR; + static char arydecl[] = "integer %c(%d)\n"; + static char ary2decl[] = "integer %c(%d,%d)\n"; + + skelout(); + + if ( reject ) + { + /* write out the pointers into the accepting lists for each state, + * and the accepting lists + */ + + /* alist needs to be lastdfa + 2 because we tell where a state's + * accepting list ends by checking the beginning of the next state, + * and there's an entry in alist for the default, "jam" pseudo-state + * (this latter entry is needed because states jam by making + * a transition to the state; see the flex skeleton. By the way, + * I *think* we could get rid of the jam state entirely by + * slight modification of the skeleton ...) + */ + + printf( arydecl, ALIST, lastdfa + 2 ); + + printf( arydecl, ACCEPT, max( numas, 1 ) ); + } + + else + printf( arydecl, ALIST, lastdfa + 1 ); + + if ( useecs ) + printf( arydecl, ECARRAY, CSIZE ); + if ( usemecs ) + printf( arydecl, MATCHARRAY, numecs ); + + if ( fulltbl ) + { + printf( ary2decl, NEXTARRAY, lastdfa, numecs ); + printf( vardata, dindent, "yyjam", 0 ); + } + + else + { + printf( arydecl, BASEARRAY, lastdfa + numtemps ); + printf( arydecl, DEFARRAY, lastdfa + numtemps ); + printf( arydecl, NEXTARRAY, tblend ); + printf( arydecl, CHECKARRAY, tblend ); + + printf( vardata, dindent, "yyjam", jambase ); + + /* the first template begins right after the default jam table, + * which itself begins right after the last dfa + */ + + printf( vardata, dindent, "yytmp", lastdfa + 2 ); + } + } +#endif NOTDEF + + if ( reject ) + { + /* write out accepting list and pointer list + * first we generate the ACCEPT array. In the process, we compute + * the indices that will go into the ALIST array, and save the + * indices in the dfaacc array + */ + + if ( genftl ) + printf( accnum > 127 ? ftl_short_decl : ftl_char_decl, + ACCEPT, max( numas, 1 ) + 1 ); + + j = 1; /* index into ACCEPT array */ + + for ( i = 1; i <= lastdfa; ++i ) + { + acc_array[i] = j; + + if ( accsiz[i] != 0 ) + { + accset = dfaacc[i]; + nacc = accsiz[i]; + + if ( trace ) + fprintf( stderr, "state # %d accepts: ", i ); + + for ( k = 1; k <= nacc; ++k ) + { + mkdata( ACCEPT, j++, accset[k] ); + + if ( trace ) + { + fprintf( stderr, "[%d]", accset[k] ); + + if ( k < nacc ) + fputs( ", ", stderr ); + else + putc( '\n', stderr ); + } + } + } + } + + /* add accepting number for the "jam" state */ + acc_array[i] = j; + + dataend(); + } + + else + { + for ( i = 1; i <= lastdfa; ++i ) + acc_array[i] = (int) dfaacc[i]; + + acc_array[i] = 0; /* add (null) accepting number for jam state */ + } + + /* spit out ALIST array. If we're doing "reject", it'll be pointers + * into the ACCEPT array. Otherwise it's actual accepting numbers. + * In either case, we just dump the numbers. + */ + + if ( genftl ) + { + /* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays + * beginning at 0 and for "jam" state + */ + k = lastdfa + 2; + + if ( reject ) + /* we put a "cap" on the table associating lists of accepting + * numbers with state numbers. This is needed because we tell + * where the end of an accepting list is by looking at where + * the list for the next state starts. + */ + ++k; + + printf( ((reject && numas > 126) || accnum > 127) ? + ftl_short_decl : ftl_char_decl, ALIST, k ); + } + + for ( i = 1; i <= lastdfa; ++i ) + { + mkdata( ALIST, i, acc_array[i] ); + + if ( ! reject && trace && acc_array[i] ) + fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] ); + } + + /* add entry for "jam" state */ + mkdata( ALIST, i, acc_array[i] ); + + if ( reject ) + /* add "cap" for the list */ + mkdata( ALIST, i + 1, acc_array[i] ); + + dataend(); + + if ( useecs ) + { + /* write out equivalence classes */ + + if ( genftl ) + printf( ftl_char_decl, ECARRAY, CSIZE + 1 ); + + for ( i = 1; i <= CSIZE; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; + + ecgroup[i] = abs( ecgroup[i] ); + mkdata( ECARRAY, i, ecgroup[i] ); + } + + dataend(); + + if ( trace ) + { + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + + numrows = (CSIZE + 1) / 8; + + for ( j = 1; j <= numrows; ++j ) + { + for ( i = j; i <= CSIZE; i = i + numrows ) + { + if ( i >= 1 && i <= 31 ) + fprintf( stderr, "^%c = %-2d", + 'A' + i - 1, ecgroup[i] ); + + else if ( i >= 32 && i <= 126 ) + fprintf( stderr, " %c = %-2d", i, ecgroup[i] ); + + else if ( i == 127 ) + fprintf( stderr, "^@ = %-2d", ecgroup[i] ); + + else + fprintf( stderr, "\nSomething Weird: %d = %d\n", i, + ecgroup[i] ); + + putc( '\t', stderr ); + } + + putc( '\n', stderr ); + } + } + } + + if ( usemecs ) + { + /* write out meta-equivalence classes (used to index templates with) */ + + if ( trace ) + fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); + + if ( genftl ) + printf( ftl_char_decl, MATCHARRAY, numecs + 1 ); + + for ( i = 1; i <= numecs; ++i ) + { + if ( trace ) + fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) ); + + mkdata( MATCHARRAY, i, abs( tecbck[i] ) ); + } + + dataend(); + } + + if ( ! fulltbl ) + { + int total_states = lastdfa + numtemps; + + if ( genftl ) + printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl, + BASEARRAY, total_states + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + register int d = def[i]; + + if ( base[i] == JAMSTATE ) + base[i] = jambase; + + if ( d == JAMSTATE ) + def[i] = jamstate; + + else if ( d < 0 ) + { + /* template reference */ + ++tmpuses; + def[i] = lastdfa - d + 1; + } + + mkdata( BASEARRAY, i, base[i] ); + } + + /* generate jam state's base index */ + mkdata( BASEARRAY, i, base[i] ); + + for ( ++i /* skip jam state */; i <= total_states; ++i ) + { + mkdata( BASEARRAY, i, base[i] ); + def[i] = jamstate; + } + + dataend(); + + if ( genftl ) + printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl, + DEFARRAY, total_states + 1 ); + + for ( i = 1; i <= total_states; ++i ) + mkdata( DEFARRAY, i, def[i] ); + + dataend(); + + if ( genftl ) + printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl, + NEXTARRAY, tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( nxt[i] == 0 ) + nxt[i] = jamstate; /* new state is the JAM state */ + + mkdata( NEXTARRAY, i, nxt[i] ); + } + + dataend(); + + if ( genftl ) + printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl, + CHECKARRAY, tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( chk[i] == 0 ) + ++nummt; + + mkdata( CHECKARRAY, i, chk[i] ); + } + + dataend(); + } + + skelout(); + + /* copy remainder of input to output */ + + line_directive_out(); + (void) lexscan(); /* copy remainder of input to output */ + } + + +/* inittbl - initialize transition tables + * + * synopsis + * inittbl(); + * + * Initializes "firstfree" to be one beyond the end of the table. Initializes + * all "chk" entries to be zero. Note that templates are built in their + * own tbase/tdef tables. They are shifted down to be contiguous + * with the non-template entries during table generation. + */ +inittbl() + + { + register int i; + + bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) ); + + tblend = 0; + firstfree = tblend + 1; + numtemps = 0; + + if ( usemecs ) + { + /* set up doubly-linked meta-equivalence classes + * these are sets of equivalence classes which all have identical + * transitions out of TEMPLATES + */ + + tecbck[1] = NIL; + + for ( i = 2; i <= numecs; ++i ) + { + tecbck[i] = i - 1; + tecfwd[i - 1] = i; + } + + tecfwd[numecs] = NIL; + } + } + + +/* mkdeftbl - make the default, "jam" table entries + * + * synopsis + * mkdeftbl(); + */ +mkdeftbl() + + { + int i; + + jamstate = lastdfa + 1; + + if ( tblend + numecs > current_max_xpairs ) + expand_nxt_chk(); + + for ( i = 1; i <= numecs; ++i ) + { + nxt[tblend + i] = 0; + chk[tblend + i] = jamstate; + } + + jambase = tblend; + + base[jamstate] = jambase; + + /* should generate a run-time array bounds check if + * ever used as a default + */ + def[jamstate] = BAD_SUBSCRIPT; + + tblend += numecs; + ++numtemps; + } + + +/* mkentry - create base/def and nxt/chk entries for transition array + * + * synopsis + * int state[numchars + 1], numchars, statenum, deflink, totaltrans; + * mkentry( state, numchars, statenum, deflink, totaltrans ); + * + * "state" is a transition array "numchars" characters in size, "statenum" + * is the offset to be used into the base/def tables, and "deflink" is the + * entry to put in the "def" table entry. If "deflink" is equal to + * "JAMSTATE", then no attempt will be made to fit zero entries of "state" + * (i.e. jam entries) into the table. It is assumed that by linking to + * "JAMSTATE" they will be taken care of. In any case, entries in "state" + * marking transitions to "SAME_TRANS" are treated as though they will be + * taken care of by whereever "deflink" points. "totaltrans" is the total + * number of transitions out of the state. If it is below a certain threshold, + * the tables are searched for an interior spot that will accomodate the + * state array. + */ +mkentry( state, numchars, statenum, deflink, totaltrans ) +register int *state; +int numchars, statenum, deflink, totaltrans; + + { + register int minec, maxec, i, baseaddr; + int tblbase, tbllast; + + if ( totaltrans == 0 ) + { /* there are no out-transitions */ + if ( deflink == JAMSTATE ) + base[statenum] = JAMSTATE; + else + base[statenum] = 0; + + def[statenum] = deflink; + return; + } + + for ( minec = 1; minec <= numchars; ++minec ) + { + if ( state[minec] != SAME_TRANS ) + if ( state[minec] != 0 || deflink != JAMSTATE ) + break; + } + + if ( totaltrans == 1 ) + { + /* there's only one out-transition. Save it for later to fill + * in holes in the tables. + */ + stack1( statenum, minec, state[minec], deflink ); + return; + } + + for ( maxec = numchars; maxec > 0; --maxec ) + { + if ( state[maxec] != SAME_TRANS ) + if ( state[maxec] != 0 || deflink != JAMSTATE ) + break; + } + + /* Whether we try to fit the state table in the middle of the table + * entries we have already generated, or if we just take the state + * table at the end of the nxt/chk tables, we must make sure that we + * have a valid base address (i.e. non-negative). Note that not only are + * negative base addresses dangerous at run-time (because indexing the + * next array with one and a low-valued character might generate an + * array-out-of-bounds error message), but at compile-time negative + * base addresses denote TEMPLATES. + */ + + /* find the first transition of state that we need to worry about. */ + if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) + { /* attempt to squeeze it into the middle of the tabls */ + baseaddr = firstfree; + + while ( baseaddr < minec ) + { + /* using baseaddr would result in a negative base address below + * find the next free slot + */ + for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) + ; + } + + if ( baseaddr + maxec - minec >= current_max_xpairs ) + expand_nxt_chk(); + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + if ( chk[baseaddr + i - minec] != 0 ) + { /* baseaddr unsuitable - find another */ + for ( ++baseaddr; + baseaddr < current_max_xpairs && + chk[baseaddr] != 0; + ++baseaddr ) + ; + + if ( baseaddr + maxec - minec >= current_max_xpairs ) + expand_nxt_chk(); + + /* reset the loop counter so we'll start all + * over again next time it's incremented + */ + + i = minec - 1; + } + } + + else + { + /* ensure that the base address we eventually generate is + * non-negative + */ + baseaddr = max( tblend + 1, minec ); + } + + tblbase = baseaddr - minec; + tbllast = tblbase + maxec; + + if ( tbllast >= current_max_xpairs ) + expand_nxt_chk(); + + base[statenum] = tblbase; + def[statenum] = deflink; + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + { + nxt[tblbase + i] = state[i]; + chk[tblbase + i] = statenum; + } + + if ( baseaddr == firstfree ) + /* find next free slot in tables */ + for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) + ; + + tblend = max( tblend, tbllast ); + } + + +/* mk1tbl - create table entries for a state (or state fragment) which + * has only one out-transition + * + * synopsis + * int state, sym, onenxt, onedef; + * mk1tbl( state, sym, onenxt, onedef ); + */ +mk1tbl( state, sym, onenxt, onedef ) +int state, sym, onenxt, onedef; + + { + if ( firstfree < sym ) + firstfree = sym; + + while ( chk[firstfree] != 0 ) + if ( ++firstfree >= current_max_xpairs ) + expand_nxt_chk(); + + base[state] = firstfree - sym; + def[state] = onedef; + chk[firstfree] = state; + nxt[firstfree] = onenxt; + + if ( firstfree > tblend ) + { + tblend = firstfree++; + + if ( firstfree >= current_max_xpairs ) + expand_nxt_chk(); + } + } + + +/* mkprot - create new proto entry + * + * synopsis + * int state[], statenum, comstate; + * mkprot( state, statenum, comstate ); + */ +mkprot( state, statenum, comstate ) +int state[], statenum, comstate; + + { + int i, slot, tblbase; + + if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) + { + /* gotta make room for the new proto by dropping last entry in + * the queue + */ + slot = lastprot; + lastprot = protprev[lastprot]; + protnext[lastprot] = NIL; + } + + else + slot = numprots; + + protnext[slot] = firstprot; + + if ( firstprot != NIL ) + protprev[firstprot] = slot; + + firstprot = slot; + prottbl[slot] = statenum; + protcomst[slot] = comstate; + + /* copy state into save area so it can be compared with rapidly */ + tblbase = numecs * (slot - 1); + + for ( i = 1; i <= numecs; ++i ) + protsave[tblbase + i] = state[i]; + } + + +/* mktemplate - create a template entry based on a state, and connect the state + * to it + * + * synopsis + * int state[], statenum, comstate, totaltrans; + * mktemplate( state, statenum, comstate, totaltrans ); + */ +mktemplate( state, statenum, comstate ) +int state[], statenum, comstate; + + { + int i, numdiff, tmpbase, tmp[CSIZE + 1]; + char transset[CSIZE + 1]; + int tsptr; + + ++numtemps; + + tsptr = 0; + + /* calculate where we will temporarily store the transition table + * of the template in the tnxt[] array. The final transition table + * gets created by cmptmps() + */ + + tmpbase = numtemps * numecs; + + if ( tmpbase + numecs >= current_max_template_xpairs ) + { + current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; + + ++num_reallocs; + + tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs ); + } + + for ( i = 1; i <= numecs; ++i ) + if ( state[i] == 0 ) + tnxt[tmpbase + i] = 0; + else + { + transset[tsptr++] = i; + tnxt[tmpbase + i] = comstate; + } + + if ( usemecs ) + mkeccl( transset, tsptr, tecfwd, tecbck, numecs ); + + mkprot( tnxt + tmpbase, -numtemps, comstate ); + + /* we rely on the fact that mkprot adds things to the beginning + * of the proto queue + */ + + numdiff = tbldiff( state, firstprot, tmp ); + mkentry( tmp, numecs, statenum, -numtemps, numdiff ); + } + + +/* mv2front - move proto queue element to front of queue + * + * synopsis + * int qelm; + * mv2front( qelm ); + */ +mv2front( qelm ) +int qelm; + + { + if ( firstprot != qelm ) + { + if ( qelm == lastprot ) + lastprot = protprev[lastprot]; + + protnext[protprev[qelm]] = protnext[qelm]; + + if ( protnext[qelm] != NIL ) + protprev[protnext[qelm]] = protprev[qelm]; + + protprev[qelm] = NIL; + protnext[qelm] = firstprot; + protprev[firstprot] = qelm; + firstprot = qelm; + } + } + + +/* ntod - convert an ndfa to a dfa + * + * synopsis + * ntod(); + * + * creates the dfa corresponding to the ndfa we've constructed. the + * dfa starts out in state #1. + */ +ntod() + + { + int *accset, ds, nacc, newds; + int duplist[CSIZE + 1], sym, hashval, numstates, dsize; + int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1]; + int *nset, *dset; + int targptr, totaltrans, i, comstate, comfreq, targ; + int *epsclosure(), snstods(), symlist[CSIZE + 1]; + + accset = allocate_integer_array( accnum + 1 ); + nset = allocate_integer_array( current_max_dfa_size ); + + todo_head = todo_next = 0; + +#define ADD_QUEUE_ELEMENT(element) \ + if ( ++element >= current_max_dfas ) \ + { /* check for queue overflowing */ \ + if ( todo_head == 0 ) \ + increase_max_dfas(); \ + else \ + element = 0; \ + } + +#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1)) + + for ( i = 0; i <= CSIZE; ++i ) + { + duplist[i] = NIL; + symlist[i] = false; + } + + for ( i = 0; i <= accnum; ++i ) + accset[i] = NIL; + + if ( trace ) + { + dumpnfa( scset[1] ); + fputs( "\n\nDFA Dump:\n\n", stderr ); + } + + inittbl(); + + if ( genftl ) + skelout(); + + if ( fulltbl ) + { + if ( genftl ) + { + /* declare it "short" because it's a real long-shot that that + * won't be large enough + */ + printf( "static short int %c[][%d] =\n {\n", NEXTARRAY, + numecs + 1 ); + + /* generate 0 entries for state #0 */ + for ( i = 0; i <= numecs; ++i ) + mk2data( NEXTARRAY, 0, 0, 0 ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + } + + /* create the first states */ + + for ( i = 1; i <= lastsc * 2; ++i ) + { + numstates = 1; + + /* for each start condition, make one state for the case when + * we're at the beginning of the line (the '%' operator) and + * one for the case when we're not + */ + if ( i % 2 == 1 ) + nset[numstates] = scset[(i / 2) + 1]; + else + nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); + + nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) + { + numas = numas + nacc; + totnst = totnst + numstates; + + todo[todo_next] = ds; + ADD_QUEUE_ELEMENT(todo_next); + } + } + + while ( todo_head != todo_next ) + { + targptr = 0; + totaltrans = 0; + + for ( i = 1; i <= numecs; ++i ) + state[i] = 0; + + ds = todo[todo_head]; + todo_head = NEXT_QUEUE_ELEMENT(todo_head); + + dset = dss[ds]; + dsize = dfasiz[ds]; + + if ( trace ) + fprintf( stderr, "state # %d:\n", ds ); + + sympartition( dset, dsize, symlist, duplist ); + + for ( sym = 1; sym <= numecs; ++sym ) + { + if ( symlist[sym] ) + { + symlist[sym] = 0; + + if ( duplist[sym] == NIL ) + { /* symbol has unique out-transitions */ + numstates = symfollowset( dset, dsize, sym, nset ); + nset = epsclosure( nset, &numstates, accset, + &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, + nacc, hashval, &newds ) ) + { + totnst = totnst + numstates; + todo[todo_next] = newds; + ADD_QUEUE_ELEMENT(todo_next); + numas = numas + nacc; + } + + state[sym] = newds; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, newds ); + + targfreq[++targptr] = 1; + targstate[targptr] = newds; + ++numuniq; + } + + else + { + /* sym's equivalence class has the same transitions + * as duplist(sym)'s equivalence class + */ + targ = state[duplist[sym]]; + state[sym] = targ; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, targ ); + + /* update frequency count for destination state */ + + i = 0; + while ( targstate[++i] != targ ) + ; + + ++targfreq[i]; + ++numdup; + } + + ++totaltrans; + duplist[sym] = NIL; + } + } + + numsnpairs = numsnpairs + totaltrans; + + if ( caseins && ! useecs ) + { + register int j; + + for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) + state[i] = state[j]; + } + + if ( fulltbl ) + { + if ( genftl ) + { + /* supply array's 0-element */ + mk2data( NEXTARRAY, 0, 0, 0 ); + + for ( i = 1; i <= numecs; ++i ) + mk2data( NEXTARRAY, 0, 0, state[i] ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + + else + { + for ( i = 1; i <= numecs; ++i ) + mk2data( NEXTARRAY, ds, i, state[i] ); + } + } + + else + { + /* determine which destination state is the most common, and + * how many transitions to it there are + */ + + comfreq = 0; + comstate = 0; + + for ( i = 1; i <= targptr; ++i ) + if ( targfreq[i] > comfreq ) + { + comfreq = targfreq[i]; + comstate = targstate[i]; + } + + bldtbl( state, ds, totaltrans, comstate, comfreq ); + } + } + + if ( fulltbl ) + dataend(); + + else + { + cmptmps(); /* create compressed template entries */ + + /* create tables for all the states with only one out-transition */ + while ( onesp > 0 ) + { + mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], + onedef[onesp] ); + --onesp; + } + + mkdeftbl(); + } + } + + +/* stack1 - save states with only one out-transition to be processed later + * + * synopsis + * int statenum, sym, nextstate, deflink; + * stack1( statenum, sym, nextstate, deflink ); + * + * if there's room for another state one the "one-transition" stack, the + * state is pushed onto it, to be processed later by mk1tbl. If there's + * no room, we process the sucker right now. + */ +stack1( statenum, sym, nextstate, deflink ) +int statenum, sym, nextstate, deflink; + + { + if ( onesp >= ONE_STACK_SIZE ) + mk1tbl( statenum, sym, nextstate, deflink ); + + else + { + ++onesp; + onestate[onesp] = statenum; + onesym[onesp] = sym; + onenext[onesp] = nextstate; + onedef[onesp] = deflink; + } + } + + +/* tbldiff - compute differences between two state tables + * + * synopsis + * int state[], pr, ext[]; + * int tbldiff, numdifferences; + * numdifferences = tbldiff( state, pr, ext ) + * + * "state" is the state array which is to be extracted from the pr'th + * proto. "pr" is both the number of the proto we are extracting from + * and an index into the save area where we can find the proto's complete + * state table. Each entry in "state" which differs from the corresponding + * entry of "pr" will appear in "ext". + * Entries which are the same in both "state" and "pr" will be marked + * as transitions to "SAME_TRANS" in "ext". The total number of differences + * between "state" and "pr" is returned as function value. Note that this + * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". + */ +int tbldiff( state, pr, ext ) +int state[], pr, ext[]; + + { + register int i, *sp = state, *ep = ext, *protp; + register int numdiff = 0; + + protp = &protsave[numecs * (pr - 1)]; + + for ( i = numecs; i > 0; --i ) + { + if ( *++protp == *++sp ) + *++ep = SAME_TRANS; + else + { + *++ep = *sp; + ++numdiff; + } + } + + return ( numdiff ); + } diff --git a/yylex.c b/yylex.c new file mode 100644 index 0000000..f4300aa --- /dev/null +++ b/yylex.c @@ -0,0 +1,210 @@ +#include "flexdef.h" +#include "y.tab.h" + +/* + * Copyright (c) University of California, 1987 + */ + +/* yylex - scan for a regular expression token + * + * synopsis + * + * token = yylex(); + * + * token - return token found + */ +int yylex() + + { + int toktype; + static int beglin = false; + + if ( eofseen ) + toktype = EOF; + else + toktype = lexscan(); + + if ( toktype == EOF ) + { + eofseen = 1; + + if ( sectnum == 1 ) + { + synerr( "unexpected EOF" ); + sectnum = 2; + toktype = SECTEND; + } + + else if ( sectnum == 2 ) + { + sectnum = 3; + toktype = SECTEND; + } + + else + toktype = 0; + } + + if ( trace ) + { + if ( beglin ) + { + fprintf( stderr, "%d\t", accnum + 1 ); + beglin = 0; + } + + switch ( toktype ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + (void) putc( toktype, stderr ); + break; + + case '\n': + (void) putc( '\n', stderr ); + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case SCDECL: + fputs( "%s", stderr ); + break; + + case XSCDECL: + fputs( "%x", stderr ); + break; + + case WHITESPACE: + (void) putc( ' ', stderr ); + break; + + case SECTEND: + fputs( "%%\n", stderr ); + + /* we set beglin to be true so we'll start + * writing out numbers as we echo rules. lexscan() has + * already assigned sectnum + */ + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case NAME: + fprintf( stderr, "'%s'", nmstr ); + break; + + case CHAR: + switch ( yylval ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + fprintf( stderr, "\\%c", yylval ); + break; + + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + case 17: + case 18: + case 19: + case 20: + case 21: + case 22: + case 23: + case 24: + case 25: + case 26: + case 27: + case 28: + case 29: + case 30: + case 31: + fprintf( stderr, "^%c", 'A' + yylval - 1 ); + break; + + case 127: + (void) putc( '^', stderr ); + (void) putc( '@', stderr ); + break; + + default: + (void) putc( yylval, stderr ); + break; + } + + break; + + case NUMBER: + fprintf( stderr, "%d", yylval ); + break; + + case PREVCCL: + fprintf( stderr, "[%d]", yylval ); + break; + + case 0: + fprintf( stderr, "End Marker" ); + break; + + default: + fprintf( stderr, "*Something Weird* - tok: %d val: %d\n", + toktype, yylval ); + break; + } + } + + return ( toktype ); + } -- cgit v1.2.3 From c58120445fe8edf709bbb987a3d665f6d5201b55 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 13 Feb 1988 11:00:46 +0000 Subject: Beta Release. --- ccl.c | 15 +- dfa.c | 40 +++-- ecs.c | 16 +- flexdef.h | 85 ++++++++-- main.c | 182 +++++++++++--------- misc.c | 304 +++++++++++++-------------------- nfa.c | 58 +++++-- parse.y | 16 +- scan.l | 118 +++++++------ sym.c | 38 +++-- tblcmp.c | 570 +++++++++++++++++++++++++++++++++++++++++++++----------------- yylex.c | 15 +- 12 files changed, 911 insertions(+), 546 deletions(-) diff --git a/ccl.c b/ccl.c index fa15c02..a99241b 100644 --- a/ccl.c +++ b/ccl.c @@ -1,7 +1,15 @@ -/* lexccl - routines for character classes */ +/* flexccl - routines for character classes */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include "flexdef.h" @@ -13,6 +21,7 @@ * char ch; * ccladd( cclp, ch ); */ + ccladd( cclp, ch ) int cclp; char ch; @@ -51,6 +60,7 @@ char ch; * int cclinit(); * new_ccl = cclinit(); */ + int cclinit() { @@ -90,6 +100,7 @@ int cclinit() * int cclp; * cclnegate( ccl ); */ + cclnegate( cclp ) int cclp; diff --git a/dfa.c b/dfa.c index d709df8..e5cc9a2 100644 --- a/dfa.c +++ b/dfa.c @@ -1,7 +1,15 @@ -/* lexdfa - DFA construction routines */ +/* flexdfa - DFA construction routines */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include "flexdef.h" @@ -26,6 +34,7 @@ * * hashval is the hash value for the dfa corresponding to the state set */ + int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; @@ -136,7 +145,7 @@ int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; UNMARK_STATE(stk[stkpos]) } else - lexfatal( "consistency check failed in epsclosure()" ); + flexfatal( "consistency check failed in epsclosure()" ); } *ns_addr = numstates; @@ -166,7 +175,7 @@ increase_max_dfas() dhash = reallocate_integer_array( dhash, current_max_dfas ); todo = reallocate_integer_array( todo, current_max_dfas ); dss = reallocate_integer_pointer_array( dss, current_max_dfas ); - dfaacc = reallocate_integer_pointer_array( dfaacc, current_max_dfas ); + dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); /* fix up todo queue */ if ( todo_next < todo_head ) @@ -190,6 +199,7 @@ increase_max_dfas() * * on return, the dfa state number is in newds. */ + int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) int sns[], numstates, accset[], nacc, hashval, *newds_addr; @@ -242,7 +252,7 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; newds = lastdfa; if ( ! (dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) )) ) - lexfatal( "dynamic memory failure in snstods()" ); + flexfatal( "dynamic memory failure in snstods()" ); /* if we haven't already sorted the states in sns, we do so now, so that * future comparisons with it can be made quickly @@ -259,7 +269,7 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; if ( nacc == 0 ) { - dfaacc[newds] = 0; + dfaacc[newds].dfaacc_state = 0; accsiz[newds] = 0; } @@ -273,13 +283,15 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; bubble( accset, nacc ); - if ( ! (dfaacc[newds] = - (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) )) ) - lexfatal( "dynamic memory failure in snstods()" ); + dfaacc[newds].dfaacc_state = + (int) malloc( (unsigned) ((nacc + 1) * sizeof( int )) ); + + if ( ! dfaacc[newds].dfaacc_state ) + flexfatal( "dynamic memory failure in snstods()" ); /* save the accepting set for later */ for ( i = 1; i <= nacc; ++i ) - dfaacc[newds][i] = accset[i]; + dfaacc[newds].dfaacc_set[i] = accset[i]; accsiz[newds] = nacc; } @@ -292,7 +304,7 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; if ( accset[i] < j ) j = accset[i]; - dfaacc[newds] = (int *) j; + dfaacc[newds].dfaacc_state = j; } *newds_addr = newds; @@ -308,6 +320,7 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; * int nset[current_max_dfa_size], numstates; * numstates = symfollowset( ds, dsize, transsym, nset ); */ + int symfollowset( ds, dsize, transsym, nset ) int ds[], dsize, transsym, nset[]; @@ -363,7 +376,7 @@ int ds[], dsize, transsym, nset[]; } else if ( sym >= 'A' && sym <= 'Z' && caseins ) - lexfatal( "consistency check failed in symfollowset" ); + flexfatal( "consistency check failed in symfollowset" ); else if ( sym == SYM_EPSILON ) { /* do nothing */ @@ -387,6 +400,7 @@ bottom: * symlist[numecs]; * sympartition( ds, numstates, symlist, duplist ); */ + sympartition( ds, numstates, symlist, duplist ) int ds[], numstates, duplist[]; int symlist[]; @@ -416,7 +430,7 @@ int symlist[]; if ( tch != SYM_EPSILON ) { if ( tch < -lastccl || tch > CSIZE ) - lexfatal( "bad transition character detected in sympartition()" ); + flexfatal( "bad transition character detected in sympartition()" ); if ( tch > 0 ) { /* character transition */ diff --git a/ecs.c b/ecs.c index 2a60c9b..8c01338 100644 --- a/ecs.c +++ b/ecs.c @@ -1,7 +1,15 @@ -/* lexecs - equivalence class routines */ +/* flexecs - equivalence class routines */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include "flexdef.h" @@ -11,6 +19,7 @@ * synopsis * ccl2ecl(); */ + ccl2ecl() { @@ -54,6 +63,7 @@ ccl2ecl() * is the backward linked-list, and num is the number of class members. * Returned is the number of classes. */ + int cre8ecs( fwd, bck, num ) int fwd[], bck[], num; @@ -91,6 +101,7 @@ int fwd[], bck[], num; * number of elements in the ccl, fwd is the forward link-list of equivalent * characters, bck is the backward link-list, and llsiz size of the link-list */ + mkeccl( ccls, lenccl, fwd, bck, llsiz ) char ccls[]; int lenccl, fwd[], bck[], llsiz; @@ -171,6 +182,7 @@ next_pt: * int tch, fwd[], bck[]; * mkechar( tch, fwd, bck ); */ + mkechar( tch, fwd, bck ) int tch, fwd[], bck[]; diff --git a/flexdef.h b/flexdef.h index b41b649..d668a03 100644 --- a/flexdef.h +++ b/flexdef.h @@ -1,17 +1,35 @@ /* - * Symbol definitions for flex. + * Definitions for flex. * * modification history * -------------------- - * 02a vp 27jun86 .translated into C/FTL + * 02b kg, vp 30sep87 .added definitions for fast scanner; misc. cleanup + * 02a vp 27jun86 .translated into C/FTL */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include +#ifdef SV +#include +#define bzero(s, n) memset((char *)(s), '\000', (unsigned)(n)) +#else +#include +#endif + +char *sprintf(); /* keep lint happy */ + /* maximum line length we'll have to deal with */ #define MAXLINE BUFSIZ @@ -30,13 +48,21 @@ #define DEFAULT_SKELETON_FILE "flex.skel" #endif -/* maximum number of characters per line recognized by Fortran compiler */ -#define DATALINEWIDTH 72 +#ifndef FAST_SKELETON_FILE +#define FAST_SKELETON_FILE "flex.fastskel" +#endif + +/* special nxt[] action number for the "at the end of the input buffer" state */ +/* note: -1 is already taken by YY_NEW_FILE */ +#define END_OF_BUFFER_ACTION -3 +/* action number for default action for fast scanners */ +#define DEFAULT_ACTION -2 -/* string to indent Fortran data statements with */ -#define DATAINDENTSTR " " -/* width of dataindent string in Fortran columns */ -#define DATAINDENTWIDTH 6 +/* special chk[] values marking the slots taking by end-of-buffer and action + * numbers + */ +#define EOB_POSITION -1 +#define ACTION_POSITION -2 /* number of data items per line for -f output */ #define NUMDATAITEMS 10 @@ -46,6 +72,9 @@ */ #define NUMDATALINES 10 +/* transition_struct_out() definitions */ +#define TRANS_STRUCT_PRINT_LENGTH 15 + /* returns true if an nfa state has an epsilon out-transition slot * that can be used. This definition is currently not used. */ @@ -186,11 +215,22 @@ #define MSP 50 /* maximum number of saved protos (protos on the proto queue) */ +/* maximum number of out-transitions a state can have that we'll rummage + * around through the interior of the internal fast table looking for a + * spot for it + */ +#define MAX_XTIONS_FOR_FULL_INTERIOR_FIT 4 + /* number that, if used to subscript an array, has a good chance of producing * an error; should be small enough to fit into a short */ #define BAD_SUBSCRIPT -32767 +/* absolute value of largest number that can be stored in a short, with a + * bit of slop thrown in for general paranoia. + */ +#define MAX_SHORT 32766 + /* Declarations for global variables. */ @@ -231,10 +271,13 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * fulltbl - if true (-cf flag), don't compress the DFA state table * usemecs - if true (-cm flag), use meta-equivalence classes * reject - if true (-r flag), generate tables for REJECT macro + * fullspd - if true (-F flag), use Jacobson method of table representation + * gen_line_dirs - if true (i.e., no -L flag), generate #line directives */ extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; -extern int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject; +extern int interactive, caseins, useecs, fulltbl, usemecs, reject; +extern int fullspd, gen_line_dirs; /* variables used in the flex input routines: @@ -243,13 +286,16 @@ extern int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject; * statement. Used to generate readable -f output * skelfile - fd of the skeleton file * yyin - input file + * temp_action_file - temporary file to hold actions + * action_file_name - name of the temporary file * infilename - name of input file * linenum - current input line number */ extern int datapos, dataline, linenum; -extern FILE *skelfile, *yyin; +extern FILE *skelfile, *yyin, *temp_action_file; extern char *infilename; +extern char *action_file_name; /* variables for stack of states having only one out-transition: @@ -352,15 +398,21 @@ extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; * numsnpairs - number of state/nextstate transition pairs * jambase - position in base/def where the default jam table starts * jamstate - state number corresponding to "jam" state + * end_of_buffer_state - end-of-buffer dfa state number */ extern int current_max_dfa_size, current_max_xpairs; extern int current_max_template_xpairs, current_max_dfas; extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; -extern int *base, *def, tblend, firstfree, **dss, *dfasiz, **dfaacc; +extern int *base, *def, tblend, firstfree, **dss, *dfasiz; +extern union dfaacc_union + { + int *dfaacc_set; + int dfaacc_state; + } *dfaacc; extern int *accsiz, *dhash, *todo, todo_head, todo_next, numas; extern int numsnpairs, jambase, jamstate; - +extern int end_of_buffer_state; /* variables for ccl information: * lastccl - ccl index of the last created ccl @@ -414,9 +466,16 @@ char *allocate_array(), *reallocate_array(); #define allocate_integer_pointer_array(size) \ (int **) allocate_array( size, sizeof( int * ) ) +#define allocate_dfaacc_union(size) \ + (union dfaacc_union *) \ + allocate_array( size, sizeof( union dfaacc_union ) ) + #define reallocate_integer_pointer_array(array,size) \ (int **) reallocate_array( (char *) array, size, sizeof( int * ) ) +#define reallocate_dfaacc_union(array, size) \ + (union dfaacc_union *) reallocate_array( (char *) array, size, sizeof( union dfaacc_union ) ) + #define allocate_character_array(size) allocate_array( size, sizeof( char ) ) #define reallocate_character_array(array,size) \ diff --git a/main.c b/main.c index d0a7ae1..aff4dc4 100644 --- a/main.c +++ b/main.c @@ -1,21 +1,31 @@ /* flex - tool to generate fast lexical analyzers * - * Copyright (c) University of California, 1987 * + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. * - * ver date who remarks - * --- ---- --- ------------------------------------------------------- - * 04a 27Jun86 VP .translated from Ratfor into C - * 01a 22Aug83 VP .written. Original version by Jef Poskanzer. + * + * ver date who remarks + * --- ---- ------ ------------------------------------------------------- + * 04b 30sep87 kg, vp .implemented (part of) Van Jacobson's fast scanner design + * 04a 27jun86 vp .translated from Ratfor into C + * 01a 22aug83 vp .written. Original version by Jef Poskanzer. */ - #include "flexdef.h" /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; -int interactive, caseins, genftl, useecs, fulltbl, usemecs, reject; +int interactive, caseins, useecs, fulltbl, usemecs, reject; +int fullspd, gen_line_dirs; int datapos, dataline, linenum; FILE *skelfile = NULL; char *infilename = NULL; @@ -32,7 +42,8 @@ int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; int current_max_dfa_size, current_max_xpairs; int current_max_template_xpairs, current_max_dfas; int lastdfa, *nxt, *chk, *tnxt; -int *base, *def, tblend, firstfree, numtemps, **dss, *dfasiz, **dfaacc; +int *base, *def, tblend, firstfree, numtemps, **dss, *dfasiz; +union dfaacc_union *dfaacc; int *accsiz, *dhash, *todo, todo_head, todo_next, numas; int numsnpairs, jambase, jamstate; int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; @@ -41,6 +52,9 @@ char *ccltbl; char *starttime, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; +FILE *temp_action_file; +int end_of_buffer_state; +char *action_file_name = "/tmp/flexXXXXXX"; /* flex - main program @@ -54,7 +68,8 @@ int argc; char **argv; { - lexinit( argc, argv ); + flexinit( argc, argv ); + readin(); if ( ! syntaxerror ) @@ -62,21 +77,21 @@ char **argv; /* convert the ndfa to a dfa */ ntod(); - /* generate the ratfor state transition tables from the dfa */ - gentabs(); + /* generate the C state transition tables from the DFA */ + make_tables(); } - /* note, lexend does not return. It exits with its argument as status. */ + /* note, flexend does not return. It exits with its argument as status. */ - lexend( 0 ); + flexend( 0 ); } -/* lexend - terminate flex +/* flexend - terminate flex * * synopsis * int status; - * lexend( status ); + * flexend( status ); * * status is exit status. * @@ -84,7 +99,7 @@ char **argv; * This routine does not return. */ -lexend( status ) +flexend( status ) int status; { @@ -94,6 +109,9 @@ int status; if ( skelfile != NULL ) (void) fclose( skelfile ); + (void) fclose( temp_action_file ); + (void) unlink( action_file_name ); + if ( printstats ) { endtime = gettime(); @@ -102,9 +120,6 @@ int status; fprintf( stderr, " started at %s, finished at %s\n", starttime, endtime ); - if ( ! genftl ) - fprintf( stderr, " Ratfor scanner generated\n" ); - fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, current_max_dfas, totnst ); @@ -175,26 +190,25 @@ int status; } -/* lexinit - initialize flex +/* flexinit - initialize flex * * synopsis * int argc; * char **argv; - * lexinit( argc, argv ); + * flexinit( argc, argv ); */ -lexinit( argc, argv ) +flexinit( argc, argv ) int argc; char **argv; { - int i; - char *arg, *skelname = DEFAULT_SKELETON_FILE, *gettime(), clower(); - int sawcmpflag, use_stdout; + int i, sawcmpflag, use_stdout; + char *arg, *skelname = NULL, *gettime(), clower(), *mktemp(); printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; - ddebug = fulltbl = reject = false; - usemecs = genftl = useecs = true; + ddebug = fulltbl = reject = fullspd = false; + gen_line_dirs = usemecs = useecs = true; sawcmpflag = false; use_stdout = false; @@ -212,7 +226,7 @@ char **argv; { case 'c': if ( i != 1 ) - lexerror( "-c flag must be given separately" ); + flexerror( "-c flag must be given separately" ); if ( ! sawcmpflag ) { @@ -262,21 +276,22 @@ char **argv; caseins = true; break; - case 'l': - use_stdout = false; - break; - - case 'n': - printstats = false; + case 'L': + gen_line_dirs = false; break; case 'r': reject = true; break; + case 'F': + fullspd = true; + useecs = usemecs = false; + break; + case 'S': if ( i != 1 ) - lexerror( "-S flag must be given separately" ); + flexerror( "-S flag must be given separately" ); skelname = arg + i + 1; goto get_next_arg; @@ -307,23 +322,41 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ } if ( fulltbl && usemecs ) - lexerror( "full table and -cm don't make sense together" ); + flexerror( "full table and -cm don't make sense together" ); if ( fulltbl && interactive ) - lexerror( "full table and -I are (currently) incompatible" ); + flexerror( "full table and -I are (currently) incompatible" ); + + if ( (fulltbl || fullspd) && reject ) + flexerror( "reject (-r) cannot be used with -f or -F" ); + + if ( fulltbl && fullspd ) + flexerror( "full table and -F are mutually exclusive" ); + + if ( ! skelname ) + { + static char skeleton_name_storage[400]; + + skelname = skeleton_name_storage; + + if ( fullspd || fulltbl ) + (void) strcpy( skelname, FAST_SKELETON_FILE ); + else + (void) strcpy( skelname, DEFAULT_SKELETON_FILE ); + } if ( ! use_stdout ) { FILE *prev_stdout = freopen( "lex.yy.c", "w", stdout ); if ( prev_stdout == NULL ) - lexerror( "could not create lex.yy.c" ); + flexerror( "could not create lex.yy.c" ); } if ( argc ) { if ( argc > 1 ) - lexerror( "extraneous argument(s) given" ); + flexerror( "extraneous argument(s) given" ); yyin = fopen( infilename = argv[0], "r" ); @@ -343,6 +376,11 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ if ( (skelfile = fopen( skelname, "r" )) == NULL ) lerrsf( "can't open skeleton file %s", skelname ); + (void) mktemp( action_file_name ); + + if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL ) + lerrsf( "can't open temporary action file %s", action_file_name ); + lastdfa = lastnfa = accnum = numas = numsnpairs = tmpuses = 0; numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; @@ -388,60 +426,35 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ * synopsis * readin(); */ + readin() { - if ( genftl ) - { - fputs( "#define YYDEFAULTACTION ", stdout ); - - if ( spprdflt ) - fputs( "YYFATALERROR( \"flex scanner jammed\" )", stdout ); - else - fputs( "ECHO", stdout ); - - fputs( ";\n", stdout ); - - if ( ddebug ) - puts( "#define LEX_DEBUG" ); - if ( useecs ) - puts( "#define LEX_USE_ECS" ); - if ( usemecs ) - puts( "#define LEX_USE_MECS" ); - if ( interactive ) - puts( "#define LEX_INTERACTIVE_SCANNER" ); - if ( reject ) - puts( "#define LEX_REJECT_ENABLED" ); - if ( fulltbl ) - puts( "#define LEX_FULL_TABLE" ); - } + fputs( "#define YY_DEFAULT_ACTION ", stdout ); + if ( spprdflt ) + fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )", stdout ); else - { - fputs( "define(YYDEFAULTACTION,", stdout ); - - if ( spprdflt ) - fputs( "call error( \"flex scanner jammed\" )", stdout ); - else - fputs( "ECHO", stdout ); + fputs( "ECHO", stdout ); - fputs( ")\n", stdout ); + fputs( ";\n", stdout ); - if ( ddebug ) - puts( "define(LEX_DEBUG,)" ); - if ( useecs ) - puts( "define(LEX_USE_ECS,)" ); - if ( usemecs ) - puts( "define(LEX_USE_MECS,)" ); - if ( reject ) - puts( "define(LEX_REJECT_ENABLED,)" ); - if ( fulltbl ) - puts( "define(LEX_FULL_TABLE,)" ); - } + if ( ddebug ) + puts( "#define FLEX_DEBUG" ); + if ( useecs ) + puts( "#define FLEX_USE_ECS" ); + if ( usemecs ) + puts( "#define FLEX_USE_MECS" ); + if ( interactive ) + puts( "#define FLEX_INTERACTIVE_SCANNER" ); + if ( reject ) + puts( "#define FLEX_REJECT_ENABLED" ); + if ( fulltbl ) + puts( "#define FLEX_FULL_TABLE" ); skelout(); - line_directive_out(); + line_directive_out( stdout ); if ( yyparse() ) lerrif( "fatal parse error at line %d", linenum ); @@ -454,6 +467,7 @@ readin() else numecs = CSIZE; + } @@ -503,5 +517,5 @@ set_up_initial_allocations() dhash = allocate_integer_array( current_max_dfas ); todo = allocate_integer_array( current_max_dfas ); dss = allocate_integer_pointer_array( current_max_dfas ); - dfaacc = allocate_integer_pointer_array( current_max_dfas ); + dfaacc = allocate_dfaacc_union( current_max_dfas ); } diff --git a/misc.c b/misc.c index 3364e4c..157c3d3 100644 --- a/misc.c +++ b/misc.c @@ -1,7 +1,15 @@ -/* lexmisc - miscellaneous flex routines */ +/* flexmisc - miscellaneous flex routines */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include @@ -10,6 +18,27 @@ char *malloc(), *realloc(); +/* action_out - write the actions from the temporary file to lex.yy.c + * + * synopsis + * action_out(); + * + * Copies the action file up to %% (or end-of-file) to lex.yy.c + */ + +action_out() + + { + char buf[MAXLINE]; + + while ( fgets( buf, MAXLINE, temp_action_file ) != NULL ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + fputs( buf, stdout ); + } + + /* allocate_array - allocate memory for an integer array of the given size */ char *allocate_array( size, element_size ) @@ -19,7 +48,7 @@ int size, element_size; register char *mem = malloc( (unsigned) (element_size * size) ); if ( mem == NULL ) - lexfatal( "memory allocation failed in allocate_array()" ); + flexfatal( "memory allocation failed in allocate_array()" ); return ( mem ); } @@ -92,7 +121,7 @@ register char *str; copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) ); if ( copy == NULL ) - lexfatal( "dynamic memory failure in copy_string()" ); + flexfatal( "dynamic memory failure in copy_string()" ); for ( c = copy; (*c++ = *str++); ) ; @@ -151,9 +180,8 @@ dataend() if ( datapos > 0 ) dataflush(); - if ( genftl ) - /* add terminator for initialization */ - puts( " } ;\n" ); + /* add terminator for initialization */ + puts( " } ;\n" ); dataline = 0; } @@ -170,23 +198,19 @@ dataflush() { putchar( '\n' ); - if ( genftl ) + if ( ++dataline >= NUMDATALINES ) { - if ( ++dataline >= NUMDATALINES ) - { - /* put out a blank line so that the table is grouped into - * large blocks that enable the user to find elements easily - */ - putchar( '\n' ); - dataline = 0; - } + /* put out a blank line so that the table is grouped into + * large blocks that enable the user to find elements easily + */ + putchar( '\n' ); + dataline = 0; } /* reset the number of characters written on the current line */ datapos = 0; } - /* gettime - return current time * * synopsis @@ -230,7 +254,7 @@ int arg; { char errmsg[MAXLINE]; (void) sprintf( errmsg, msg, arg ); - lexerror( errmsg ); + flexerror( errmsg ); } @@ -246,199 +270,115 @@ char msg[], arg[]; { char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); - lexerror( errmsg ); + flexerror( errmsg ); } -/* lexerror - report an error message and terminate +/* flexerror - report an error message and terminate * * synopsis * char msg[]; - * lexerror( msg ); + * flexerror( msg ); */ -lexerror( msg ) +flexerror( msg ) char msg[]; { fprintf( stderr, "flex: %s\n", msg ); - lexend( 1 ); + flexend( 1 ); } -/* lexfatal - report a fatal error message and terminate +/* flexfatal - report a fatal error message and terminate * * synopsis * char msg[]; - * lexfatal( msg ); + * flexfatal( msg ); */ -lexfatal( msg ) +flexfatal( msg ) char msg[]; { fprintf( stderr, "flex: fatal internal error %s\n", msg ); - lexend( 1 ); + flexend( 1 ); } /* line_directive_out - spit out a "# line" statement */ -line_directive_out() +line_directive_out( output_file_name ) +FILE *output_file_name; { - if ( infilename ) - printf( "# line %d \"%s\"\n", linenum, infilename ); + if ( infilename && gen_line_dirs ) + fprintf( output_file_name, "# line %d \"%s\"\n", linenum, infilename ); } /* mk2data - generate a data statement for a two-dimensional array * * synopsis - * char name; - * int row, column, value; - * mk2data( name, row, column, value ); + * int value; + * mk2data( value ); * - * generates a data statement initializing "name(row, column)" to "value" - * Note that name is only a character; NOT a string. If we're generating - * FTL (-f flag), "name", "row", and "column" get ignored. + * generates a data statement initializing the current 2-D array to "value" */ -mk2data( name, row, column, value ) -char name; -int row, column, value; +mk2data( value ) +int value; { - int datalen; - static char dindent[] = DATAINDENTSTR; - - if ( genftl ) + if ( datapos >= NUMDATAITEMS ) { - if ( datapos >= NUMDATAITEMS ) - { - putchar( ',' ); - dataflush(); - } - - if ( datapos == 0 ) - /* indent */ - fputs( " ", stdout ); - - else - putchar( ',' ); - - ++datapos; - - printf( "%5d", value ); + putchar( ',' ); + dataflush(); } - else - { - /* figure out length of data statement to be written. 7 is the constant - * overhead of a one character name, '(', ',', and ')' to delimit - * the array reference, a '/' and a '/' to delimit the value, and - * room for a blank or a comma between this data statement and the - * previous one - */ - - datalen = 7 + numdigs( row ) + numdigs( column ) + numdigs( value ); - - if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 ) - { - if ( datapos != 0 ) - dataflush(); - - /* precede data statement with '%' so rat4 preprocessor doesn't have - * to bother looking at it -- speed hack - */ - printf( "%%%sdata ", dindent ); + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); - /* 4 is the constant overhead of writing out the word "DATA" */ - datapos = DATAINDENTWIDTH + 4 + datalen; - } + else + putchar( ',' ); - else - { - putchar( ',' ); - datapos = datapos + datalen; - } + ++datapos; - printf( "%c(%d,%d)/%d/", name, row, column, value ); - } + printf( "%5d", value ); } /* mkdata - generate a data statement * * synopsis - * char name; - * int arrayelm, value; - * mkdata( name, arrayelm, value ); + * int value; + * mkdata( value ); * - * generates a data statement initializing "name(arrayelm)" to "value" - * Note that name is only a character; NOT a string. If we're generating - * FTL (-f flag), "name" and "arrayelm" get ignored. + * generates a data statement initializing the current array element to + * "value" */ -mkdata( name, arrayelm, value ) -char name; -int arrayelm, value; +mkdata( value ) +int value; { - int datalen; - static char dindent[] = DATAINDENTSTR; - - if ( genftl ) + if ( datapos >= NUMDATAITEMS ) { - if ( datapos >= NUMDATAITEMS ) - { - putchar( ',' ); - dataflush(); - } - - if ( datapos == 0 ) - /* indent */ - fputs( " ", stdout ); - - else - putchar( ',' ); - - ++datapos; - - printf( "%5d", value ); + putchar( ',' ); + dataflush(); } - else - { - /* figure out length of data statement to be written. 6 is the constant - * overhead of a one character name, '(' and ')' to delimit the array - * reference, a '/' and a '/' to delimit the value, and room for a - * blank or a comma between this data statement and the previous one - */ - - datalen = 6 + numdigs( arrayelm ) + numdigs( value ); - - if ( datalen + datapos >= DATALINEWIDTH | datapos == 0 ) - { - if ( datapos != 0 ) - dataflush(); - - /* precede data statement with '%' so rat4 preprocessor doesn't have - * to bother looking at it -- speed hack - */ - printf( "%%%sdata ", dindent ); + if ( datapos == 0 ) + /* indent */ + fputs( " ", stdout ); - /* 4 is the constant overhead of writing out the word "DATA" */ - datapos = DATAINDENTWIDTH + 4 + datalen; - } + else + putchar( ',' ); - else - { - putchar( ',' ); - datapos = datapos + datalen; - } + ++datapos; - printf( "%c(%d)/%d/", name, arrayelm, value ); - } + printf( "%5d", value ); } @@ -542,42 +482,6 @@ char array[]; } -/* numdigs - number of digits (includes leading sign) in number - * - * synopsis - * int numdigs, x; - * num = numdigs( x ); - */ -int numdigs( x ) -int x; - - { - if ( x < 0 ) - { - /* the only negative numbers we expect to encounter are very - * small ones - */ - if ( x < -9 ) - lexfatal( "assumption of small negative numbers botched in numdigs()" ); - - return ( 2 ); - } - - if ( x < 10 ) - return ( 1 ); - else if ( x < 100 ) - return ( 2 ); - else if ( x < 1000 ) - return ( 3 ); - else if ( x < 10000 ) - return ( 4 ); - else if ( x < 100000 ) - return ( 5 ); - else - return ( 6 ); - } - - /* otoi - convert an octal digit string to an integer value * * synopsis: @@ -618,13 +522,13 @@ int size, element_size; (unsigned) (size * element_size )); if ( new_array == NULL ) - lexfatal( "attempt to increase array size failed" ); + flexfatal( "attempt to increase array size failed" ); return ( new_array ); } -/* skelout - write out one section of the lexskel file +/* skelout - write out one section of the skeleton file * * synopsis * skelout(); @@ -644,3 +548,33 @@ skelout() else fputs( buf, stdout ); } + + +/* transition_struct_out - output a yy_trans_info structure + * + * synopsis + * int element_v, element_n; + * transition_struct_out( element_v, element_n ); + * + * outputs the yy_trans_info structure with the two elements, element_v and + * element_n. Formats the output with spaces and carriage returns. + */ + +transition_struct_out( element_v, element_n ) +int element_v, element_n; + + { + printf( "%7d, %5d,", element_v, element_n ); + + datapos += TRANS_STRUCT_PRINT_LENGTH; + + if ( datapos >= 75 ) + { + printf( "\n" ); + + if ( ++dataline % 10 == 0 ) + printf( "\n" ); + + datapos = 0; + } + } diff --git a/nfa.c b/nfa.c index d514ce1..9b93892 100644 --- a/nfa.c +++ b/nfa.c @@ -1,7 +1,15 @@ -/* lexnfa - NFA construction routines */ +/* flexnfa - NFA construction routines */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include "flexdef.h" @@ -20,34 +28,46 @@ * characters in the pattern, or zero if the trailing context has variable * length. */ + add_accept( mach, headcnt, trailcnt ) int mach, headcnt, trailcnt; { int astate; - printf( "case %d:\n", ++accnum ); + fprintf( temp_action_file, "case %d:\n", ++accnum ); if ( headcnt > 0 || trailcnt > 0 ) { /* do trailing context magic to not match the trailing characters */ - printf( "YYDOBEFORESCAN; /* undo effects of setting up yytext */\n" ); + fprintf( temp_action_file, + "YY_DO_BEFORE_SCAN; /* undo effects of setting up yytext */\n" ); if ( headcnt > 0 ) { - if ( ! genftl || headcnt > 1 ) - printf( "yycbufp = yybbufp + %d;\n", - genftl ? headcnt - 1 : headcnt ); + int head_offset = headcnt - 1; + + if ( fullspd || fulltbl ) + /* with the fast skeleton, yy_c_buf_p points to the *next* + * character to scan, rather than the one that was last + * scanned + */ + ++head_offset; + + if ( head_offset > 0 ) + fprintf( temp_action_file, "yy_c_buf_p = yy_b_buf_p + %d;\n", + head_offset ); + else - printf( "yycbufp = yybbufp;\n" ); + fprintf( temp_action_file, "yy_c_buf_p = yy_b_buf_p;\n" ); } else - printf( "yycbufp -= %d;\n", trailcnt ); - - printf( "YYDOBEFOREACTION; /* set up yytext again */\n" ); + fprintf( temp_action_file, "yy_c_buf_p -= %d;\n", trailcnt ); + + fprintf( temp_action_file, "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); } - line_directive_out(); + line_directive_out( temp_action_file ); /* hang the accepting number off an epsilon state. if it is associated * with a state that has a non-epsilon out-transition, then the state @@ -76,6 +96,7 @@ int mach, headcnt, trailcnt; * singl - a singleton machine * num - the number of copies of singl to be present in newsng */ + int copysingl( singl, num ) int singl, num; @@ -97,6 +118,7 @@ int singl, num; * int state1; * dumpnfa( state1 ); */ + dumpnfa( state1 ) int state1; @@ -150,6 +172,7 @@ int state1; * also note that the original MUST be contiguous, with its low and high * states accessible by the arrays firstst and lastst */ + int dupmachine( mach ) int mach; @@ -197,6 +220,7 @@ int mach; * and then last, and will fail if either of the sub-patterns fails. * FIRST is set to new by the operation. last is unmolested. */ + int link_machines( first, last ) int first, last; @@ -232,6 +256,7 @@ int first, last; * the resulting machine CANNOT be used with any other "mk" operation except * more mkbranch's. Compare with mkor() */ + int mkbranch( first, second ) int first, second; @@ -260,6 +285,7 @@ int first, second; * * new - a new state which matches the closure of "state" */ + int mkclos( state ) int state; @@ -281,6 +307,7 @@ int state; * 1. mach must be the last machine created * 2. mach is destroyed by the call */ + int mkopt( mach ) int mach; @@ -319,6 +346,7 @@ int mach; * the code is rather convoluted because an attempt is made to minimize * the number of epsilon states needed */ + int mkor( first, second ) int first, second; @@ -379,6 +407,7 @@ int first, second; * * new - a machine matching the positive closure of "state" */ + int mkposcl( state ) int state; @@ -411,6 +440,7 @@ int state; * note * if "ub" is INFINITY then "new" matches "lb" or more occurances of "mach" */ + int mkrep( mach, lb, ub ) int mach, lb, ub; @@ -457,6 +487,7 @@ int mach, lb, ub; * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge * that it admittedly is) */ + int mkstate( sym ) int sym; @@ -523,6 +554,7 @@ int sym; * statefrom - the state from which the transition is to be made * stateto - the state to which the transition is to be made */ + mkxtion( statefrom, stateto ) int statefrom, stateto; @@ -532,7 +564,7 @@ int statefrom, stateto; else if ( (transchar[statefrom] != SYM_EPSILON) || (trans2[statefrom] != NO_TRANSITION) ) - lexfatal( "found too many transitions in mkxtion()" ); + flexfatal( "found too many transitions in mkxtion()" ); else { /* second out-transition for an epsilon state */ diff --git a/parse.y b/parse.y index b5cb379..f5094e5 100644 --- a/parse.y +++ b/parse.y @@ -1,7 +1,15 @@ -/* lexparse.y - parser for flex input */ +/* flexparse.y - parser for flex input */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL @@ -63,7 +71,7 @@ namelist1 : namelist1 WHITESPACE NAME { synerr( "bad start condition list" ); } ; -sect2 : sect2 initforrule lexrule '\n' +sect2 : sect2 initforrule flexrule '\n' | ; @@ -75,7 +83,7 @@ initforrule : } ; -lexrule : scon '^' re eol +flexrule : scon '^' re eol { pat = link_machines( $3, $4 ); add_accept( pat, headcnt, trailcnt ); diff --git a/scan.l b/scan.l index 5f344dc..c1fcaa4 100644 --- a/scan.l +++ b/scan.l @@ -1,25 +1,27 @@ /* flexscan.l - scanner for flex input */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ %{ #include "flexdef.h" -#include "strings.h" #include "y.tab.h" -#undef YYDECL -#define YYDECL \ - int lexscan() +#define ACTION_ECHO fprintf( temp_action_file, "%s", yytext ) +#define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" ); -#undef yywrap -#define yywrap(result) \ - { \ - if ( ! did_second_skelout ) \ - skelout(); \ - result = 1; \ - } +#undef YY_DECL +#define YY_DECL \ + int flexscan() #define RETURNCHAR \ yylval = yytext[0]; \ @@ -52,20 +54,19 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) static int bracelevel; int i, cclval; char nmdef[MAXLINE], myesc(); - static int didadef, did_second_skelout = false; + static int didadef; ^{WS}.*\n ++linenum; ECHO; /* indented code */ -^#.*\n ++linenum; ECHO; /* either a Ratfor comment or a CPP directive */ +^#.*\n ++linenum; ECHO; /* treat as a comment */ ^"/*" ECHO; BEGIN(C_COMMENT); ^"%s"(tart)? return ( SCDECL ); ^"%x" return ( XSCDECL ); -^"%{".*\n ++linenum; line_directive_out(); BEGIN(CODEBLOCK); +^"%{".*\n ++linenum; line_directive_out( stdout ); BEGIN(CODEBLOCK); {WS} return ( WHITESPACE ); ^"%%".* { sectnum = 2; - skelout(); - line_directive_out(); + line_directive_out( stdout ); BEGIN(SECT2PROLOG); return ( SECTEND ); } @@ -127,13 +128,12 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) .*\n/[^ \t\n] { ++linenum; - ECHO; - skelout(); - did_second_skelout = true; + ACTION_ECHO; + MARK_END_OF_PROLOG; BEGIN(SECT2); } -.*\n ++linenum; ECHO; +.*\n ++linenum; ACTION_ECHO; ^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ ^{WS}.*\n { @@ -154,7 +154,14 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) } {WS}"|".*\n ++linenum; return ( '\n' ); -{WS} | +{WS} { /* needs to be separate from following rule due to + * bug with trailing context + */ + bracelevel = 0; + BEGIN(ACTION); + return ( '\n' ); + } + {OPTWS}/\n { bracelevel = 0; BEGIN(ACTION); @@ -281,47 +288,45 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) {OPTWS}"%}".* bracelevel = 0; -.* ECHO; +.* ACTION_ECHO; \n { ++linenum; - ECHO; + ACTION_ECHO; if ( bracelevel == 0 ) { - if ( genftl ) - puts( "\tbreak;" ); + fputs( "\tbreak;\n", temp_action_file ); BEGIN(SECT2); } } -"{" ECHO; ++bracelevel; -"}" ECHO; --bracelevel; -[^{}"'/\n]+ ECHO; -"/*" ECHO; BEGIN(ACTION_COMMENT); -"'"([^'\\\n]|\\.)*"'" ECHO; /* character constant */ -\" ECHO; BEGIN(ACTION_STRING); +"{" ACTION_ECHO; ++bracelevel; +"}" ACTION_ECHO; --bracelevel; +[^{}"'/\n]+ ACTION_ECHO; +"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); +"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ +\" ACTION_ECHO; BEGIN(ACTION_STRING); \n { ++linenum; - ECHO; + ACTION_ECHO; if ( bracelevel == 0 ) { - if ( genftl ) - puts( "\tbreak;" ); + fputs( "\tbreak;\n", temp_action_file ); BEGIN(SECT2); } } -. ECHO; +. ACTION_ECHO; -"*/" ECHO; BEGIN(ACTION); -[^*\n]+ ECHO; -"*" ECHO; -\n ++linenum; ECHO; -. ECHO; +"*/" ACTION_ECHO; BEGIN(ACTION); +[^*\n]+ ACTION_ECHO; +"*" ACTION_ECHO; +\n ++linenum; ACTION_ECHO; +. ACTION_ECHO; -[^"\\\n]+ ECHO; -\\. ECHO; -\n ++linenum; ECHO; -\" ECHO; BEGIN(ACTION); -. ECHO; +[^"\\\n]+ ACTION_ECHO; +\\. ACTION_ECHO; +\n ++linenum; ACTION_ECHO; +\" ACTION_ECHO; BEGIN(ACTION); +. ACTION_ECHO; {ESCSEQ} { @@ -339,32 +344,35 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) .|\n { register int numchars; - /* black magic - we know the names of a lex scanner's + /* black magic - we know the names of a flex scanner's * internal variables. We cap the input buffer with * an end-of-string and dump it to the output. */ - YYDOBEFORESCAN; /* recover from setting up yytext */ + YY_DO_BEFORE_SCAN; /* recover from setting up yytext */ - yychbuf[yyebufp + 1] = '\0'; +#ifdef FLEX_FAST_SKEL + fputs( yy_c_buf_p + 1, stdout ); +#else + yy_ch_buf[yy_e_buf_p + 1] = '\0'; /* ignore the first character; it's the second '%' * put back by the yyless(1) above */ - fputs( yychbuf + yycbufp + 1, stdout ); + fputs( yy_ch_buf + yy_c_buf_p + 1, stdout ); +#endif /* if we don't do this, the data written by write() * can get overwritten when stdout is finally flushed */ (void) fflush( stdout ); - while ( (numchars = read( fileno(yyin), yychbuf, - YYBUFMAX )) > 0 ) - (void) write( fileno(stdout), yychbuf, numchars ); + while ( (numchars = read( fileno(yyin), yy_ch_buf, + YY_BUF_MAX )) > 0 ) + (void) write( fileno(stdout), yy_ch_buf, numchars ); if ( numchars < 0 ) - lexerror( "fatal read error in section 3" ); + flexerror( "fatal read error in section 3" ); return ( EOF ); } - %% diff --git a/sym.c b/sym.c index af50831..776a163 100644 --- a/sym.c +++ b/sym.c @@ -1,7 +1,15 @@ -/* lexsym - symbol table routines */ +/* flexsym - symbol table routines */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include "flexdef.h" @@ -49,7 +57,7 @@ int table_size; new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); if ( new_entry == NULL ) - lexfatal( "symbol table memory allocation failed" ); + flexfatal( "symbol table memory allocation failed" ); if ( (successor = table[hash_val]) ) { @@ -76,6 +84,7 @@ int table_size; * int cclnum; * cclinstal( ccltxt, cclnum ); */ + cclinstal( ccltxt, cclnum ) char ccltxt[]; int cclnum; @@ -98,6 +107,7 @@ int cclnum; * int ccllookup, cclval; * cclval/0 = ccllookup( ccltxt ); */ + int ccllookup( ccltxt ) char ccltxt[]; @@ -194,6 +204,7 @@ int hash_size; * char nd[], def[]; * ndinstal( nd, def ); */ + ndinstal( nd, def ) char nd[], def[]; @@ -213,6 +224,7 @@ char nd[], def[]; * char *ndlookup(); * def/NULL = ndlookup( nd ); */ + char *ndlookup( nd ) char nd[]; @@ -233,6 +245,7 @@ char nd[]; * NOTE * the start condition is Exclusive if xcluflg is true */ + scinstal( str, xcluflg ) char str[]; int xcluflg; @@ -240,19 +253,13 @@ int xcluflg; { char *copy_string(); - if ( genftl ) - { - /* bit of a hack. We know how the default start-condition is - * declared, and don't put out a define for it, because it - * would come out as "#define 0 1" - */ + /* bit of a hack. We know how the default start-condition is + * declared, and don't put out a define for it, because it + * would come out as "#define 0 1" + */ - if ( strcmp( str, "0" ) ) - printf( "#define %s %d\n", str, lastsc * 2 ); - } - - else - printf( "define(YYLEX_SC_%s,%d)\n", str, lastsc * 2 ); + if ( strcmp( str, "0" ) ) + printf( "#define %s %d\n", str, lastsc * 2 ); if ( ++lastsc >= current_max_scs ) { @@ -283,6 +290,7 @@ int xcluflg; * int sclookup; * scnum/0 = sclookup( str ); */ + int sclookup( str ) char str[]; diff --git a/tblcmp.c b/tblcmp.c index ae9bfd7..8fb9a67 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -1,7 +1,15 @@ -/* lexcmp - table compression routines */ +/* flexcmp - table compression routines */ /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ #include "flexdef.h" @@ -40,6 +48,7 @@ * state on EVERY transition character, and therefore cost only one * difference. */ + bldtbl( state, statenum, totaltrans, comstate, comfreq ) int state[], statenum, totaltrans, comstate, comfreq; @@ -185,6 +194,7 @@ int state[], statenum, totaltrans, comstate, comfreq; * up at the top end of the nxt array; they will now be compressed and have * table entries made for them. */ + cmptmps() { @@ -274,98 +284,231 @@ expand_nxt_chk() } -/* gentabs - generate data statements for the transition tables +/* find_table_space - finds a space in the table for a state to be placed * * synopsis - * gentabs(); + * int *state, numtrans, block_start; + * int find_table_space(); + * + * block_start = find_table_space( state, numtrans ); + * + * State is the state to be added to the full speed transition table. + * Numtrans is the number of out-transititions for the state. + * + * find_table_space() returns the position of the start of the first block (in + * chk) able to accomodate the state + * + * In determining if a state will or will not fit, find_table_space() must take + * into account the fact that an end-of-buffer state will be added at [0], + * and an action number will be added in [-1]. */ -gentabs() +int find_table_space( state, numtrans ) +int *state, numtrans; + { - int i, j, k, numrows, *accset, nacc, *acc_array; - char clower(); + /* firstfree is the position of the first possible occurence of two + * consecutive unused records in the chk and nxt arrays + */ + register int i; + register int *state_ptr, *chk_ptr; + register int *ptr_to_last_entry_in_state; - /* *everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all FTL arrays + /* if there are too many out-transititions, put the state at the end of + * nxt and chk */ - static char ftl_long_decl[] = "static long int %c[%d] =\n { 0,\n"; - static char ftl_short_decl[] = "static short int %c[%d] =\n { 0,\n"; - static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n"; + if ( numtrans > MAX_XTIONS_FOR_FULL_INTERIOR_FIT ) + { + /* if table is empty, return the first available spot in chk/nxt, + * which should be 1 + */ + if ( tblend < 2 ) + return ( 1 ); - acc_array = allocate_integer_array( current_max_dfas ); - nummt = 0; + i = tblend - numecs; /* start searching for table space near the + * end of chk/nxt arrays + */ + } - if ( fulltbl ) - jambase = lastdfa + 1; /* home of "jam" pseudo-state */ + else + i = firstfree; /* start searching for table space from the + * beginning (skipping only the elements + * which will definitely not hold the new + * state) + */ - printf( "#define YYJAM %d\n", jamstate ); - printf( "#define YYJAMBASE %d\n", jambase ); + while ( 1 ) /* loops until a space is found */ + { + if ( i + numecs > current_max_xpairs ) + expand_nxt_chk(); - if ( usemecs ) - printf( "#define YYTEMPLATE %d\n", lastdfa + 2 ); + /* loops until space for end-of-buffer and action number are found */ + while ( 1 ) + { + if ( chk[i - 1] == 0 ) /* check for action number space */ + { + if ( chk[i] == 0 ) /* check for end-of-buffer space */ + break; -#ifdef NOTDEF -/* unsupported code */ - if ( ! genftl ) - { /* ratfor scanner */ - static char vardata[] = "%%%sdata %s/%d/\n"; - static char dindent[] = DATAINDENTSTR; - static char arydecl[] = "integer %c(%d)\n"; - static char ary2decl[] = "integer %c(%d,%d)\n"; + else + i += 2; /* since i != 0, there is no use checking to + * see if (++i) - 1 == 0, because that's the + * same as i == 0, so we skip a space + */ + } - skelout(); + else + ++i; - if ( reject ) - { - /* write out the pointers into the accepting lists for each state, - * and the accepting lists - */ + if ( i + numecs > current_max_xpairs ) + expand_nxt_chk(); + } - /* alist needs to be lastdfa + 2 because we tell where a state's - * accepting list ends by checking the beginning of the next state, - * and there's an entry in alist for the default, "jam" pseudo-state - * (this latter entry is needed because states jam by making - * a transition to the state; see the flex skeleton. By the way, - * I *think* we could get rid of the jam state entirely by - * slight modification of the skeleton ...) - */ + /* if we started search from the beginning, store the new firstfree for + * the next call of find_table_space() + */ + if ( numtrans <= MAX_XTIONS_FOR_FULL_INTERIOR_FIT ) + firstfree = i + 1; - printf( arydecl, ALIST, lastdfa + 2 ); + /* check to see if all elements in chk (and therefore nxt) that are + * needed for the new state have not yet been taken + */ - printf( arydecl, ACCEPT, max( numas, 1 ) ); - } + state_ptr = &state[1]; + ptr_to_last_entry_in_state = &chk[i + numecs + 1]; + + for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state; + ++chk_ptr ) + if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) + break; + + if ( chk_ptr == ptr_to_last_entry_in_state ) + return ( i ); else - printf( arydecl, ALIST, lastdfa + 1 ); + ++i; + } + } - if ( useecs ) - printf( arydecl, ECARRAY, CSIZE ); - if ( usemecs ) - printf( arydecl, MATCHARRAY, numecs ); - if ( fulltbl ) - { - printf( ary2decl, NEXTARRAY, lastdfa, numecs ); - printf( vardata, dindent, "yyjam", 0 ); - } +/* genctbl - generates full speed compressed transition table + * + * synopsis + * genctbl(); + */ - else - { - printf( arydecl, BASEARRAY, lastdfa + numtemps ); - printf( arydecl, DEFARRAY, lastdfa + numtemps ); - printf( arydecl, NEXTARRAY, tblend ); - printf( arydecl, CHECKARRAY, tblend ); +genctbl() - printf( vardata, dindent, "yyjam", jambase ); + { + register int i; - /* the first template begins right after the default jam table, - * which itself begins right after the last dfa - */ + /* table of verify for transition and offset to next state */ + printf( "static struct yy_trans_info yy_transition[%d] =\n", + tblend + numecs + 1 ); + printf( " {\n" ); + + /* We want the transition to be represented as the offset to the + * next state, not the actual state number, which is what it currently is. + * The offset is base[nxt[i]] - base[chk[i]]. That's just the + * difference between the starting points of the two involved states + * (to - from). + * + * first, though, we need to find some way to put in our end-of-buffer + * flags and states. We do this by making a state with absolutely no + * transitions. We put it at the end of the table. + */ + /* at this point, we're guarenteed that there's enough room in nxt[] + * and chk[] to hold tblend + numecs entries. We need just two slots. + * One for the action and one for the end-of-buffer transition. We + * now *assume* that we're guarenteed the only character we'll try to + * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to + * make sure there's room for jam entries for other characters. + */ - printf( vardata, dindent, "yytmp", lastdfa + 2 ); - } + base[lastdfa + 1] = tblend + 2; + nxt[tblend + 1] = END_OF_BUFFER_ACTION; + chk[tblend + 1] = numecs + 1; + chk[tblend + 2] = 1; /* anything but EOB */ + + /* make sure every state has a end-of-buffer transition and an action # */ + for ( i = 0; i <= lastdfa; ++i ) + { + chk[base[i]] = EOB_POSITION; + chk[base[i] - 1] = ACTION_POSITION; + nxt[base[i] - 1] = dfaacc[i].dfaacc_state; /* action number */ } -#endif NOTDEF + + for ( i = 0; i <= lastsc * 2; ++i ) + nxt[base[i] - 1] = DEFAULT_ACTION; + + dataline = 0; + datapos = 0; + + for ( i = 0; i <= tblend; ++i ) + { + if ( chk[i] == EOB_POSITION ) + transition_struct_out( 0, base[lastdfa + 1] - i ); + + else if ( chk[i] == ACTION_POSITION ) + transition_struct_out( 0, nxt[i] ); + + else if ( chk[i] > numecs || chk[i] == 0 ) + transition_struct_out( 0, 0 ); /* unused slot */ + + else /* verify, transitition */ + transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) ); + } + + + /* here's the final, end-of-buffer state */ + transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); + transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); + + printf( " };\n" ); + printf( "\n" ); + + /* table of pointers to start states */ + printf( "static struct yy_trans_info *yy_state_ptr[%d] =\n", + lastsc * 2 + 1 ); + printf( " {\n" ); + + for ( i = 0; i <= lastsc * 2; ++i ) + printf( " &yy_transition[%d],\n", base[i] ); + + printf( " };\n" ); + } + + +/* gentabs - generate data statements for the transition tables + * + * synopsis + * gentabs(); + */ + +gentabs() + + { + int i, j, k, numrows, *accset, nacc, *acc_array; + char clower(); + + /* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all FTL arrays + */ + static char ftl_long_decl[] = "static long int %c[%d] =\n { 0,\n"; + static char ftl_short_decl[] = "static short int %c[%d] =\n { 0,\n"; + static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n"; + + acc_array = allocate_integer_array( current_max_dfas ); + nummt = 0; + + if ( fulltbl ) + jambase = lastdfa + 1; /* home of "jam" pseudo-state */ + + printf( "#define YY_JAM %d\n", jamstate ); + printf( "#define YY_JAM_BASE %d\n", jambase ); + + if ( usemecs ) + printf( "#define YY_TEMPLATE %d\n", lastdfa + 2 ); if ( reject ) { @@ -375,9 +518,8 @@ gentabs() * indices in the dfaacc array */ - if ( genftl ) - printf( accnum > 127 ? ftl_short_decl : ftl_char_decl, - ACCEPT, max( numas, 1 ) + 1 ); + printf( accnum > 127 ? ftl_short_decl : ftl_char_decl, + ACCEPT, max( numas, 1 ) + 1 ); j = 1; /* index into ACCEPT array */ @@ -387,7 +529,7 @@ gentabs() if ( accsiz[i] != 0 ) { - accset = dfaacc[i]; + accset = dfaacc[i].dfaacc_set; nacc = accsiz[i]; if ( trace ) @@ -395,7 +537,8 @@ gentabs() for ( k = 1; k <= nacc; ++k ) { - mkdata( ACCEPT, j++, accset[k] ); + ++j; + mkdata( accset[k] ); if ( trace ) { @@ -419,7 +562,7 @@ gentabs() else { for ( i = 1; i <= lastdfa; ++i ) - acc_array[i] = (int) dfaacc[i]; + acc_array[i] = dfaacc[i].dfaacc_state; acc_array[i] = 0; /* add (null) accepting number for jam state */ } @@ -429,39 +572,42 @@ gentabs() * In either case, we just dump the numbers. */ - if ( genftl ) - { - /* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays - * beginning at 0 and for "jam" state + /* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays + * beginning at 0 and for "jam" state + */ + k = lastdfa + 2; + + if ( reject ) + /* we put a "cap" on the table associating lists of accepting + * numbers with state numbers. This is needed because we tell + * where the end of an accepting list is by looking at where + * the list for the next state starts. */ - k = lastdfa + 2; + ++k; - if ( reject ) - /* we put a "cap" on the table associating lists of accepting - * numbers with state numbers. This is needed because we tell - * where the end of an accepting list is by looking at where - * the list for the next state starts. - */ - ++k; + printf( ((reject && numas > 126) || accnum > 127) ? + ftl_short_decl : ftl_char_decl, ALIST, k ); - printf( ((reject && numas > 126) || accnum > 127) ? - ftl_short_decl : ftl_char_decl, ALIST, k ); - } + /* set up default actions */ + for ( i = 1; i <= lastsc * 2; ++i ) + acc_array[i] = DEFAULT_ACTION; + + acc_array[end_of_buffer_state] = END_OF_BUFFER_ACTION; for ( i = 1; i <= lastdfa; ++i ) { - mkdata( ALIST, i, acc_array[i] ); + mkdata( acc_array[i] ); if ( ! reject && trace && acc_array[i] ) fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] ); } /* add entry for "jam" state */ - mkdata( ALIST, i, acc_array[i] ); + mkdata( acc_array[i] ); if ( reject ) /* add "cap" for the list */ - mkdata( ALIST, i + 1, acc_array[i] ); + mkdata( acc_array[i] ); dataend(); @@ -469,8 +615,7 @@ gentabs() { /* write out equivalence classes */ - if ( genftl ) - printf( ftl_char_decl, ECARRAY, CSIZE + 1 ); + printf( ftl_char_decl, ECARRAY, CSIZE + 1 ); for ( i = 1; i <= CSIZE; ++i ) { @@ -478,7 +623,7 @@ gentabs() ecgroup[i] = ecgroup[clower( i )]; ecgroup[i] = abs( ecgroup[i] ); - mkdata( ECARRAY, i, ecgroup[i] ); + mkdata( ecgroup[i] ); } dataend(); @@ -522,15 +667,14 @@ gentabs() if ( trace ) fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); - if ( genftl ) - printf( ftl_char_decl, MATCHARRAY, numecs + 1 ); + printf( ftl_char_decl, MATCHARRAY, numecs + 1 ); for ( i = 1; i <= numecs; ++i ) { if ( trace ) fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) ); - mkdata( MATCHARRAY, i, abs( tecbck[i] ) ); + mkdata( abs( tecbck[i] ) ); } dataend(); @@ -540,9 +684,8 @@ gentabs() { int total_states = lastdfa + numtemps; - if ( genftl ) - printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl, - BASEARRAY, total_states + 1 ); + printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl, + BASEARRAY, total_states + 1 ); for ( i = 1; i <= lastdfa; ++i ) { @@ -561,64 +704,54 @@ gentabs() def[i] = lastdfa - d + 1; } - mkdata( BASEARRAY, i, base[i] ); + mkdata( base[i] ); } /* generate jam state's base index */ - mkdata( BASEARRAY, i, base[i] ); + mkdata( base[i] ); for ( ++i /* skip jam state */; i <= total_states; ++i ) { - mkdata( BASEARRAY, i, base[i] ); + mkdata( base[i] ); def[i] = jamstate; } dataend(); - if ( genftl ) - printf( tblend > 32766 ? ftl_long_decl : ftl_short_decl, - DEFARRAY, total_states + 1 ); + printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl, + DEFARRAY, total_states + 1 ); for ( i = 1; i <= total_states; ++i ) - mkdata( DEFARRAY, i, def[i] ); + mkdata( def[i] ); dataend(); - if ( genftl ) - printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl, - NEXTARRAY, tblend + 1 ); + printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl, + NEXTARRAY, tblend + 1 ); for ( i = 1; i <= tblend; ++i ) { if ( nxt[i] == 0 ) nxt[i] = jamstate; /* new state is the JAM state */ - mkdata( NEXTARRAY, i, nxt[i] ); + mkdata( nxt[i] ); } dataend(); - if ( genftl ) - printf( lastdfa > 32766 ? ftl_long_decl : ftl_short_decl, - CHECKARRAY, tblend + 1 ); + printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl, + CHECKARRAY, tblend + 1 ); for ( i = 1; i <= tblend; ++i ) { if ( chk[i] == 0 ) ++nummt; - mkdata( CHECKARRAY, i, chk[i] ); + mkdata( chk[i] ); } dataend(); } - - skelout(); - - /* copy remainder of input to output */ - - line_directive_out(); - (void) lexscan(); /* copy remainder of input to output */ } @@ -663,11 +796,65 @@ inittbl() } +/* make_tables - generate transition tables + * + * synopsis + * make_tables(); + * + * Generates transition tables and finishes generating output file + */ + +make_tables() + + { + if ( fullspd ) + { /* need to define YY_TRANS_OFFSET_TYPE as a size large + * enough to hold the biggest offset + */ + int total_table_size = tblend + numecs + 1; + + printf( "#define YY_TRANS_OFFSET_TYPE %s\n", + total_table_size > MAX_SHORT ? "long" : "short" ); + } + + if ( fullspd || fulltbl ) + skelout(); + + /* compute the tables and copy them to output file */ + if ( fullspd ) + genctbl(); + + else + gentabs(); + + skelout(); + + (void) fclose( temp_action_file ); + temp_action_file = fopen( action_file_name, "r" ); + + /* copy prolog from action_file to output file */ + action_out(); + + skelout(); + + /* copy actions from action_file to output file */ + action_out(); + + skelout(); + + /* copy remainder of input to output */ + + line_directive_out( stdout ); + (void) flexscan(); /* copy remainder of input to output */ + } + + /* mkdeftbl - make the default, "jam" table entries * * synopsis * mkdeftbl(); */ + mkdeftbl() { @@ -716,6 +903,7 @@ mkdeftbl() * the tables are searched for an interior spot that will accomodate the * state array. */ + mkentry( state, numchars, statenum, deflink, totaltrans ) register int *state; int numchars, statenum, deflink, totaltrans; @@ -848,6 +1036,7 @@ int numchars, statenum, deflink, totaltrans; * int state, sym, onenxt, onedef; * mk1tbl( state, sym, onenxt, onedef ); */ + mk1tbl( state, sym, onenxt, onedef ) int state, sym, onenxt, onedef; @@ -880,6 +1069,7 @@ int state, sym, onenxt, onedef; * int state[], statenum, comstate; * mkprot( state, statenum, comstate ); */ + mkprot( state, statenum, comstate ) int state[], statenum, comstate; @@ -923,6 +1113,7 @@ int state[], statenum, comstate; * int state[], statenum, comstate, totaltrans; * mktemplate( state, statenum, comstate, totaltrans ); */ + mktemplate( state, statenum, comstate ) int state[], statenum, comstate; @@ -980,6 +1171,7 @@ int state[], statenum, comstate; * int qelm; * mv2front( qelm ); */ + mv2front( qelm ) int qelm; @@ -1020,6 +1212,12 @@ ntod() int targptr, totaltrans, i, comstate, comfreq, targ; int *epsclosure(), snstods(), symlist[CSIZE + 1]; + /* this is so find_table_space(...) will know where to start looking in + * chk/nxt for unused records for space to put in the state + */ + if ( fullspd ) + firstfree = 0; + accset = allocate_integer_array( accnum + 1 ); nset = allocate_integer_array( current_max_dfa_size ); @@ -1053,29 +1251,30 @@ ntod() inittbl(); - if ( genftl ) - skelout(); + if ( fullspd ) + { + for ( i = 0; i <= numecs; ++i ) + state[i] = 0; + place_state( state, 0, 0 ); + } if ( fulltbl ) { - if ( genftl ) - { - /* declare it "short" because it's a real long-shot that that - * won't be large enough - */ - printf( "static short int %c[][%d] =\n {\n", NEXTARRAY, - numecs + 1 ); + /* declare it "short" because it's a real long-shot that that + * won't be large enough + */ + printf( "static short int %c[][%d] =\n {\n", NEXTARRAY, + numecs + 1 ); /* '}' so vi doesn't get too confused */ - /* generate 0 entries for state #0 */ - for ( i = 0; i <= numecs; ++i ) - mk2data( NEXTARRAY, 0, 0, 0 ); + /* generate 0 entries for state #0 */ + for ( i = 0; i <= numecs; ++i ) + mk2data( 0 ); - /* force ',' and dataflush() next call to mk2data */ - datapos = NUMDATAITEMS; + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; - /* force extra blank line next dataflush() */ - dataline = NUMDATALINES; - } + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; } /* create the first states */ @@ -1105,6 +1304,17 @@ ntod() } } + if ( fulltbl ) + { + if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) + flexfatal( "could not create unique end-of-buffer state" ); + + numas += 1; + + todo[todo_next] = end_of_buffer_state; + ADD_QUEUE_ELEMENT(todo_next); + } + while ( todo_head != todo_next ) { targptr = 0; @@ -1193,28 +1403,25 @@ ntod() if ( fulltbl ) { - if ( genftl ) - { - /* supply array's 0-element */ - mk2data( NEXTARRAY, 0, 0, 0 ); - - for ( i = 1; i <= numecs; ++i ) - mk2data( NEXTARRAY, 0, 0, state[i] ); + /* supply array's 0-element */ + if ( ds == end_of_buffer_state ) + mk2data( 0 ); + else + mk2data( end_of_buffer_state ); - /* force ',' and dataflush() next call to mk2data */ - datapos = NUMDATAITEMS; + for ( i = 1; i <= numecs; ++i ) + mk2data( state[i] ); - /* force extra blank line next dataflush() */ - dataline = NUMDATALINES; - } + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; - else - { - for ( i = 1; i <= numecs; ++i ) - mk2data( NEXTARRAY, ds, i, state[i] ); - } + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; } + else if ( fullspd ) + place_state( state, ds, totaltrans ); + else { /* determine which destination state is the most common, and @@ -1252,6 +1459,53 @@ ntod() mkdeftbl(); } + + } + + +/* place_state - place a state into full speed transition table + * + * synopsis + * int *state, statenum, transnum; + * place_state( state, statenum, transnum ); + * + * State is the statenum'th state. It is indexed by equivalence class and + * gives the number of the state to enter for a given equivalence class. + * Transnum is the number of out-transitions for the state. + */ + +place_state( state, statenum, transnum ) +int *state, statenum, transnum; + + { + register int i; + register int *state_ptr; + int position = find_table_space( state, transnum ); + + /* base is the table of start positions */ + base[statenum] = position; + + /* put in action number marker; this non-zero number makes sure that + * find_table_space() knows that this position in chk/nxt is taken + * and should not be used for another accepting number in another state + */ + chk[position - 1] = 1; + + /* put in end-of-buffer marker; this is for the same purposes as above */ + chk[position] = 1; + + /* place the state into chk and nxt */ + state_ptr = &state[1]; + + for ( i = 1; i <= numecs; ++i, ++state_ptr ) + if ( *state_ptr != 0 ) + { + chk[position + i] = i; + nxt[position + i] = *state_ptr; + } + + if ( position + numecs > tblend ) + tblend = position + numecs; } @@ -1265,6 +1519,7 @@ ntod() * state is pushed onto it, to be processed later by mk1tbl. If there's * no room, we process the sucker right now. */ + stack1( statenum, sym, nextstate, deflink ) int statenum, sym, nextstate, deflink; @@ -1300,6 +1555,7 @@ int statenum, sym, nextstate, deflink; * between "state" and "pr" is returned as function value. Note that this * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". */ + int tbldiff( state, pr, ext ) int state[], pr, ext[]; diff --git a/yylex.c b/yylex.c index f4300aa..0694cab 100644 --- a/yylex.c +++ b/yylex.c @@ -2,7 +2,15 @@ #include "y.tab.h" /* - * Copyright (c) University of California, 1987 + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. */ /* yylex - scan for a regular expression token @@ -13,6 +21,7 @@ * * token - return token found */ + int yylex() { @@ -22,7 +31,7 @@ int yylex() if ( eofseen ) toktype = EOF; else - toktype = lexscan(); + toktype = flexscan(); if ( toktype == EOF ) { @@ -102,7 +111,7 @@ int yylex() fputs( "%%\n", stderr ); /* we set beglin to be true so we'll start - * writing out numbers as we echo rules. lexscan() has + * writing out numbers as we echo rules. flexscan() has * already assigned sectnum */ -- cgit v1.2.3 From 47a36b88d9d70ac6496f98cccb54577b8f1e2253 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:42:15 +0000 Subject: changed name from flexccl.c -> ccl.c --- ccl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccl.c b/ccl.c index a99241b..163065a 100644 --- a/ccl.c +++ b/ccl.c @@ -1,4 +1,4 @@ -/* flexccl - routines for character classes */ +/* ccl - routines for character classes */ /* * Copyright (c) 1987, the University of California -- cgit v1.2.3 From 4dfee9040ec7831f7f1f2c36f829cfc71ae8af96 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:42:32 +0000 Subject: changed name from flexdfa.c to dfa.c --- dfa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dfa.c b/dfa.c index e5cc9a2..a2ca2b6 100644 --- a/dfa.c +++ b/dfa.c @@ -1,4 +1,4 @@ -/* flexdfa - DFA construction routines */ +/* dfa - DFA construction routines */ /* * Copyright (c) 1987, the University of California -- cgit v1.2.3 From f82a61a9ee52a4ea939e5e25c57798f9bda27188 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:42:49 +0000 Subject: changed name from flexecs.c to ecs.c --- ecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecs.c b/ecs.c index 8c01338..dc43c5e 100644 --- a/ecs.c +++ b/ecs.c @@ -1,4 +1,4 @@ -/* flexecs - equivalence class routines */ +/* ecs - equivalence class routines */ /* * Copyright (c) 1987, the University of California -- cgit v1.2.3 From 6f1eb21e140e325b98cfc7cdb1125af51d44a992 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:45:45 +0000 Subject: fixed typos, enhanced symbol table definition. --- flexdef.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/flexdef.h b/flexdef.h index d668a03..eebc31e 100644 --- a/flexdef.h +++ b/flexdef.h @@ -140,7 +140,7 @@ char *sprintf(); /* keep lint happy */ #define JAMSTATE -32766 /* marks a reference to the state that always jams */ /* enough so that if it's subtracted from an NFA state number, the result - * is guarenteed to be negative + * is guaranteed to be negative */ #define MARKER_DIFFERENCE 32000 #define MAXIMUM_MNS 31999 @@ -190,7 +190,7 @@ char *sprintf(); /* keep lint happy */ */ #define ACCEPTABLE_DIFF_PERCENTAGE 50 -/* the percentage the number of homogenous out-transitions of a state +/* the percentage the number of homogeneous out-transitions of a state * must be of the number of total out-transitions of the state in order * to consider making a template from the state */ @@ -244,7 +244,8 @@ struct hash_entry { struct hash_entry *prev, *next; char *name; - char *val; + char *str_val; + int int_val; } ; typedef struct hash_entry *hash_table[]; @@ -346,7 +347,7 @@ extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; /* variables for managing equivalence classes: * numecs - number of equivalence classes - * nextecm - forward link of Equivalenc Class members + * nextecm - forward link of Equivalence Class members * ecgroup - class number or backward link of EC members * nummecs - number of meta-equivalence classes (used to compress * templates) -- cgit v1.2.3 From 5099a94810cd7573d2a7367bdb81eb19f8ba46f3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:46:58 +0000 Subject: fixed bug causing core dumps if skeleton files could not be opened. Added -cF. Added fullspd to be equivalent to fulltbl for which options is cannot be mixed with. --- main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/main.c b/main.c index aff4dc4..abd1ba0 100644 --- a/main.c +++ b/main.c @@ -109,8 +109,11 @@ int status; if ( skelfile != NULL ) (void) fclose( skelfile ); - (void) fclose( temp_action_file ); - (void) unlink( action_file_name ); + if ( temp_action_file ) + { + (void) fclose( temp_action_file ); + (void) unlink( action_file_name ); + } if ( printstats ) { @@ -243,6 +246,10 @@ char **argv; useecs = true; break; + case 'F': + fullspd = true; + break; + case 'f': fulltbl = true; break; @@ -285,8 +292,8 @@ char **argv; break; case 'F': - fullspd = true; useecs = usemecs = false; + fullspd = true; break; case 'S': @@ -321,10 +328,10 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ ; } - if ( fulltbl && usemecs ) + if ( (fulltbl || fullspd) && usemecs ) flexerror( "full table and -cm don't make sense together" ); - if ( fulltbl && interactive ) + if ( (fulltbl || fullspd) && interactive ) flexerror( "full table and -I are (currently) incompatible" ); if ( (fulltbl || fullspd) && reject ) -- cgit v1.2.3 From 78e23d2ecee70c3cf01fe4de33cca335e931dd8a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:48:28 +0000 Subject: changed name from flexmisc.c -> misc.c --- misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc.c b/misc.c index 157c3d3..0ff49f5 100644 --- a/misc.c +++ b/misc.c @@ -1,4 +1,4 @@ -/* flexmisc - miscellaneous flex routines */ +/* misc - miscellaneous flex routines */ /* * Copyright (c) 1987, the University of California -- cgit v1.2.3 From 282bcb1a3e4ea4c03d19e56a49068b5ae0b73fbc Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:48:53 +0000 Subject: changed name from flexnfa.c -> nfa.c corrected some typos. --- nfa.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nfa.c b/nfa.c index 9b93892..a2b2c8e 100644 --- a/nfa.c +++ b/nfa.c @@ -1,4 +1,4 @@ -/* flexnfa - NFA construction routines */ +/* nfa - NFA construction routines */ /* * Copyright (c) 1987, the University of California @@ -71,7 +71,8 @@ int mach, headcnt, trailcnt; /* hang the accepting number off an epsilon state. if it is associated * with a state that has a non-epsilon out-transition, then the state - * will accept BEFORE it makes that transition, i.e. one character too soon + * will accept BEFORE it makes that transition, i.e., one character + * too soon */ if ( transchar[finalst[mach]] == SYM_EPSILON ) @@ -438,7 +439,7 @@ int state; * number of times to "ub" number of times * * note - * if "ub" is INFINITY then "new" matches "lb" or more occurances of "mach" + * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach" */ int mkrep( mach, lb, ub ) -- cgit v1.2.3 From 6ea7a979e1f07da057b4d104730ef91b38727829 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:50:42 +0000 Subject: changed name from flexparse.y -> parse.y added start condition "INITIAL" made a{3} have "variable length" --- parse.y | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/parse.y b/parse.y index f5094e5..e116407 100644 --- a/parse.y +++ b/parse.y @@ -1,4 +1,4 @@ -/* flexparse.y - parser for flex input */ +/* parse.y - parser for flex input */ /* * Copyright (c) 1987, the University of California @@ -34,7 +34,7 @@ initlex : /* initialize for processing rules */ /* create default DFA start condition */ - scinstal( "0", false ); + scinstal( "INITIAL", false ); } ; @@ -291,7 +291,11 @@ singleton : singleton '*' | singleton '{' NUMBER '}' { - rulelen = rulelen + $3; + /* the singleton could be something like "(foo)", + * in which case we have no idea what its length + * is, so we punt here. + */ + varlength = true; if ( $3 <= 0 ) { -- cgit v1.2.3 From 24bfb1cc8eb4185a4f94c36fe027d7958172f3d5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:51:30 +0000 Subject: Changed name from flexscan.l -> scan.l fixed bug in added block comments between rules. --- scan.l | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/scan.l b/scan.l index c1fcaa4..0482d50 100644 --- a/scan.l +++ b/scan.l @@ -1,4 +1,4 @@ -/* flexscan.l - scanner for flex input */ +/* scan.l - scanner for flex input */ /* * Copyright (c) 1987, the University of California @@ -14,7 +14,7 @@ %{ #include "flexdef.h" -#include "y.tab.h" +#include "parse.h" #define ACTION_ECHO fprintf( temp_action_file, "%s", yytext ) #define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" ); @@ -37,7 +37,7 @@ %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE -%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT +%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT C_COMMENT_2 ACTION_COMMENT %x ACTION_STRING PERCENT_BRACE_ACTION WS [ \t]+ @@ -51,10 +51,9 @@ SCNAME {NAME} ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) %% - static int bracelevel; + static int bracelevel, didadef; int i, cclval; char nmdef[MAXLINE], myesc(); - static int didadef; ^{WS}.*\n ++linenum; ECHO; /* indented code */ ^#.*\n ++linenum; ECHO; /* treat as a comment */ @@ -123,7 +122,7 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) ++linenum; } -.*\n ++linenum; RETURNNAME; +.*\n ++linenum; BEGIN(0); RETURNNAME; .*\n/[^ \t\n] { @@ -136,10 +135,17 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) .*\n ++linenum; ACTION_ECHO; ^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ -^{WS}.*\n { + + /* this horrible mess of a rule matches indented lines which + * do not contain "/*". We need to make the distinction because + * otherwise this rule will be taken instead of the rule which + * matches the beginning of comments like this one + */ +^{WS}([^/\n]|"/"[^*\n])*("/"?)\n { synerr( "indented code found outside of action" ); ++linenum; } + "<" BEGIN(SC); return ( '<' ); ^"^" return ( '^' ); \" BEGIN(QUOTE); return ( '"' ); @@ -154,6 +160,8 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) } {WS}"|".*\n ++linenum; return ( '\n' ); +^{OPTWS}"/*" ACTION_ECHO; BEGIN(C_COMMENT_2); + {WS} { /* needs to be separate from following rule due to * bug with trailing context */ @@ -171,7 +179,7 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) ^{OPTWS}\n ++linenum; return ( '\n' ); ^"%%".* { - /* guarentee that the SECT3 rule will have something + /* guarantee that the SECT3 rule will have something * to match */ yyless(1); @@ -294,7 +302,7 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) ACTION_ECHO; if ( bracelevel == 0 ) { - fputs( "\tbreak;\n", temp_action_file ); + fputs( "\tYY_BREAK\n", temp_action_file ); BEGIN(SECT2); } } @@ -310,7 +318,7 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) ACTION_ECHO; if ( bracelevel == 0 ) { - fputs( "\tbreak;\n", temp_action_file ); + fputs( "\tYY_BREAK\n", temp_action_file ); BEGIN(SECT2); } } @@ -322,6 +330,12 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) \n ++linenum; ACTION_ECHO; . ACTION_ECHO; +"*/" ACTION_ECHO; BEGIN(SECT2); +"*/".*\n ++linenum; ACTION_ECHO; BEGIN(SECT2); +[^*\n]+ ACTION_ECHO; +"*" ACTION_ECHO; +\n ++linenum; ACTION_ECHO; + [^"\\\n]+ ACTION_ECHO; \\. ACTION_ECHO; \n ++linenum; ACTION_ECHO; -- cgit v1.2.3 From f6799ea411f7ef2b2a5637b3c15724b420bd92d4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:52:09 +0000 Subject: changed name from flexsym.c -> sym.c revamped calling sequences, etc., for extended table struct definition which now has both char * and int fields. --- sym.c | 72 ++++++++++++++++++++++++++----------------------------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/sym.c b/sym.c index 776a163..d4c15a2 100644 --- a/sym.c +++ b/sym.c @@ -1,4 +1,4 @@ -/* flexsym - symbol table routines */ +/* sym - symbol table routines */ /* * Copyright (c) 1987, the University of California @@ -18,21 +18,25 @@ struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; struct hash_entry *sctbl[START_COND_HASH_SIZE]; struct hash_entry *ccltab[CCL_HASH_SIZE]; +struct hash_entry *findsym(); -/* addsym - add symbol and definition to symbol table + +/* addsym - add symbol and definitions to symbol table * * synopsis - * char sym[], def[]; + * char sym[], *str_def; + * int int_def; * hash_table table; * int table_size; - * -1/0 = addsym( sym, def, table, table_size ); + * 0 / -1 = addsym( sym, def, int_def, table, table_size ); * * -1 is returned if the symbol already exists, and the change not made. */ -int addsym( sym, def, table, table_size ) +int addsym( sym, str_def, int_def, table, table_size ) register char sym[]; -char def[]; +char *str_def; +int int_def; hash_table table; int table_size; @@ -52,7 +56,7 @@ int table_size; entry = entry->next; } - + /* create new entry */ new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); @@ -69,7 +73,8 @@ int table_size; new_entry->prev = NULL; new_entry->name = sym; - new_entry->val = def; + new_entry->str_val = str_def; + new_entry->int_val = int_def; table[hash_val] = new_entry; @@ -95,7 +100,7 @@ int cclnum; */ char *copy_string(); - (void) addsym( copy_string( ccltxt ), (char *) cclnum, + (void) addsym( copy_string( ccltxt ), (char *) 0, cclnum, ccltab, CCL_HASH_SIZE ); } @@ -112,9 +117,7 @@ int ccllookup( ccltxt ) char ccltxt[]; { - char *getdef(); - - return ( (int) getdef( ccltxt, ccltab, CCL_HASH_SIZE ) ); + return ( findsym( ccltxt, ccltab, CCL_HASH_SIZE )->int_val ); } @@ -135,6 +138,10 @@ int table_size; { register struct hash_entry *entry = table[hashfunct( sym, table_size )]; + static struct hash_entry empty_entry = + { + (struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0, + } ; while ( entry ) { @@ -143,32 +150,7 @@ int table_size; entry = entry->next; } - return ( NULL ); - } - - -/* getdef - get symbol definition from symbol table - * - * synopsis - * char sym[]; - * hash_table table; - * int table_size; - * char *def, *getdef(); - * def = getdef( sym, table, table_size ); - */ - -char *getdef( sym, table, table_size ) -register char sym[]; -hash_table table; -int table_size; - - { - register struct hash_entry *entry = findsym( sym, table, table_size ); - - if ( entry ) - return ( entry->val ); - - return ( NULL ); + return ( &empty_entry ); } @@ -211,7 +193,7 @@ char nd[], def[]; { char *copy_string(); - if ( addsym( copy_string( nd ), copy_string( def ), + if ( addsym( copy_string( nd ), copy_string( def ), 0, ndtbl, NAME_TABLE_HASH_SIZE ) ) synerr( "name defined twice" ); } @@ -229,9 +211,7 @@ char *ndlookup( nd ) char nd[]; { - char *getdef(); - - return ( getdef( nd, ndtbl, NAME_TABLE_HASH_SIZE ) ); + return ( findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val ); } @@ -257,6 +237,10 @@ int xcluflg; * declared, and don't put out a define for it, because it * would come out as "#define 0 1" */ + /* actually, this is no longer the case. The default start-condition + * is now called "INITIAL". But we keep the following for the sake + * of future robustness. + */ if ( strcmp( str, "0" ) ) printf( "#define %s %d\n", str, lastsc * 2 ); @@ -273,7 +257,7 @@ int xcluflg; actvsc = reallocate_integer_array( actvsc, current_max_scs ); } - if ( addsym( copy_string( str ), (char *) lastsc, + if ( addsym( copy_string( str ), (char *) 0, lastsc, sctbl, START_COND_HASH_SIZE ) ) lerrsf( "start condition %s declared twice", str ); @@ -295,5 +279,5 @@ int sclookup( str ) char str[]; { - return ( (int) getdef( str, sctbl, START_COND_HASH_SIZE ) ); + return ( findsym( str, sctbl, START_COND_HASH_SIZE )->int_val ); } -- cgit v1.2.3 From 693a38cb7d2abb45aa47d73d34505ea32b072a23 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:53:13 +0000 Subject: Changed name from flexcmp.c -> tblcmp.c fixed misc. typos made generating ec tables be a routine --- tblcmp.c | 156 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 84 insertions(+), 72 deletions(-) diff --git a/tblcmp.c b/tblcmp.c index 8fb9a67..eadec69 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -1,4 +1,4 @@ -/* flexcmp - table compression routines */ +/* tblcmp - table compression routines */ /* * Copyright (c) 1987, the University of California @@ -35,18 +35,18 @@ * which is similar enough to be usable, and therefore compacting the * output tables. * "templates" are a special type of proto. If a transition table is - * homogenous or nearly homogenous (all transitions go to the same destination) - * then the odds are good that future states will also go to the same destination - * state on basically the same character set. These homogenous states are - * so common when dealing with large rule sets that they merit special - * attention. If the transition table were simply made into a proto, then - * (typically) each subsequent, similar state will differ from the proto - * for two out-transitions. One of these out-transitions will be that - * character on which the proto does not go to the common destination, - * and one will be that character on which the state does not go to the - * common destination. Templates, on the other hand, go to the common - * state on EVERY transition character, and therefore cost only one - * difference. + * homogeneous or nearly homogeneous (all transitions go to the same + * destination) then the odds are good that future states will also go + * to the same destination state on basically the same character set. + * These homogeneous states are so common when dealing with large rule + * sets that they merit special attention. If the transition table were + * simply made into a proto, then (typically) each subsequent, similar + * state will differ from the proto for two out-transitions. One of these + * out-transitions will be that character on which the proto does not go + * to the common destination, and one will be that character on which the + * state does not go to the common destination. Templates, on the other + * hand, go to the common state on EVERY transition character, and therefore + * cost only one difference. */ bldtbl( state, statenum, totaltrans, comstate, comfreq ) @@ -254,8 +254,8 @@ cmptmps() /* it is assumed (in a rather subtle way) in the skeleton that * if we're using meta-equivalence classes, the def[] entry for - * all templates is the jam template, i.e. templates never default - * to other non-jam table entries (e.g. another template) + * all templates is the jam template, i.e., templates never default + * to other non-jam table entries (e.g., another template) */ /* leave room for the jam-state after the last real state */ @@ -293,10 +293,10 @@ expand_nxt_chk() * block_start = find_table_space( state, numtrans ); * * State is the state to be added to the full speed transition table. - * Numtrans is the number of out-transititions for the state. + * Numtrans is the number of out-transitions for the state. * * find_table_space() returns the position of the start of the first block (in - * chk) able to accomodate the state + * chk) able to accommodate the state * * In determining if a state will or will not fit, find_table_space() must take * into account the fact that an end-of-buffer state will be added at [0], @@ -307,14 +307,14 @@ int find_table_space( state, numtrans ) int *state, numtrans; { - /* firstfree is the position of the first possible occurence of two + /* firstfree is the position of the first possible occurrence of two * consecutive unused records in the chk and nxt arrays */ register int i; register int *state_ptr, *chk_ptr; register int *ptr_to_last_entry_in_state; - /* if there are too many out-transititions, put the state at the end of + /* if there are too many out-transitions, put the state at the end of * nxt and chk */ if ( numtrans > MAX_XTIONS_FOR_FULL_INTERIOR_FIT ) @@ -417,10 +417,10 @@ genctbl() * flags and states. We do this by making a state with absolutely no * transitions. We put it at the end of the table. */ - /* at this point, we're guarenteed that there's enough room in nxt[] + /* at this point, we're guaranteed that there's enough room in nxt[] * and chk[] to hold tblend + numecs entries. We need just two slots. * One for the action and one for the end-of-buffer transition. We - * now *assume* that we're guarenteed the only character we'll try to + * now *assume* that we're guaranteed the only character we'll try to * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to * make sure there's room for jam entries for other characters. */ @@ -455,7 +455,7 @@ genctbl() else if ( chk[i] > numecs || chk[i] == 0 ) transition_struct_out( 0, 0 ); /* unused slot */ - else /* verify, transitition */ + else /* verify, transition */ transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) ); } @@ -476,6 +476,9 @@ genctbl() printf( " &yy_transition[%d],\n", base[i] ); printf( " };\n" ); + + if ( useecs ) + genecs(); } @@ -612,53 +615,7 @@ gentabs() dataend(); if ( useecs ) - { - /* write out equivalence classes */ - - printf( ftl_char_decl, ECARRAY, CSIZE + 1 ); - - for ( i = 1; i <= CSIZE; ++i ) - { - if ( caseins && (i >= 'A') && (i <= 'Z') ) - ecgroup[i] = ecgroup[clower( i )]; - - ecgroup[i] = abs( ecgroup[i] ); - mkdata( ecgroup[i] ); - } - - dataend(); - - if ( trace ) - { - fputs( "\n\nEquivalence Classes:\n\n", stderr ); - - numrows = (CSIZE + 1) / 8; - - for ( j = 1; j <= numrows; ++j ) - { - for ( i = j; i <= CSIZE; i = i + numrows ) - { - if ( i >= 1 && i <= 31 ) - fprintf( stderr, "^%c = %-2d", - 'A' + i - 1, ecgroup[i] ); - - else if ( i >= 32 && i <= 126 ) - fprintf( stderr, " %c = %-2d", i, ecgroup[i] ); - - else if ( i == 127 ) - fprintf( stderr, "^@ = %-2d", ecgroup[i] ); - - else - fprintf( stderr, "\nSomething Weird: %d = %d\n", i, - ecgroup[i] ); - - putc( '\t', stderr ); - } - - putc( '\n', stderr ); - } - } - } + genecs(); if ( usemecs ) { @@ -755,6 +712,61 @@ gentabs() } +/* generate equivalence-class tables */ + +genecs() + + { + register int i, j; + static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n"; + int numrows; + + printf( ftl_char_decl, ECARRAY, CSIZE + 1 ); + + for ( i = 1; i <= CSIZE; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; + + ecgroup[i] = abs( ecgroup[i] ); + mkdata( ecgroup[i] ); + } + + dataend(); + + if ( trace ) + { + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + + numrows = (CSIZE + 1) / 8; + + for ( j = 1; j <= numrows; ++j ) + { + for ( i = j; i <= CSIZE; i = i + numrows ) + { + if ( i >= 1 && i <= 31 ) + fprintf( stderr, "^%c = %-2d", + 'A' + i - 1, ecgroup[i] ); + + else if ( i >= 32 && i <= 126 ) + fprintf( stderr, " %c = %-2d", i, ecgroup[i] ); + + else if ( i == 127 ) + fprintf( stderr, "^@ = %-2d", ecgroup[i] ); + + else + fprintf( stderr, "\nSomething Weird: %d = %d\n", i, + ecgroup[i] ); + + putc( '\t', stderr ); + } + + putc( '\n', stderr ); + } + } + } + + /* inittbl - initialize transition tables * * synopsis @@ -895,12 +907,12 @@ mkdeftbl() * is the offset to be used into the base/def tables, and "deflink" is the * entry to put in the "def" table entry. If "deflink" is equal to * "JAMSTATE", then no attempt will be made to fit zero entries of "state" - * (i.e. jam entries) into the table. It is assumed that by linking to + * (i.e., jam entries) into the table. It is assumed that by linking to * "JAMSTATE" they will be taken care of. In any case, entries in "state" * marking transitions to "SAME_TRANS" are treated as though they will be * taken care of by whereever "deflink" points. "totaltrans" is the total * number of transitions out of the state. If it is below a certain threshold, - * the tables are searched for an interior spot that will accomodate the + * the tables are searched for an interior spot that will accommodate the * state array. */ @@ -949,7 +961,7 @@ int numchars, statenum, deflink, totaltrans; /* Whether we try to fit the state table in the middle of the table * entries we have already generated, or if we just take the state * table at the end of the nxt/chk tables, we must make sure that we - * have a valid base address (i.e. non-negative). Note that not only are + * have a valid base address (i.e., non-negative). Note that not only are * negative base addresses dangerous at run-time (because indexing the * next array with one and a low-valued character might generate an * array-out-of-bounds error message), but at compile-time negative -- cgit v1.2.3 From 9d7b9720120c63ec34691a469b4526bd74626025 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:54:30 +0000 Subject: added identifying comment. changed to include "parse.h" instead of "y.tab.h" --- yylex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yylex.c b/yylex.c index 0694cab..8e26254 100644 --- a/yylex.c +++ b/yylex.c @@ -1,5 +1,7 @@ +/* yylex - scanner front-end for flex */ + #include "flexdef.h" -#include "y.tab.h" +#include "parse.h" /* * Copyright (c) 1987, the University of California -- cgit v1.2.3 From d7919e2ef2d42a140092c1d95b2de9394a75e90c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 20:55:02 +0000 Subject: Initial revision --- NEWS | 17 +++++++++ README | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 NEWS create mode 100644 README diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..74288e2 --- /dev/null +++ b/NEWS @@ -0,0 +1,17 @@ +Changes between beta-test release of Feb. '88 and initial release: + + - many files renamed to remove "flex" suffix + - input() routine added to compressed and fast skeletons + - unput() routine added to compressed skeleton + - -d, -ce support for fast scanners + - symbol table extended to avoid ugly casts of ints <-> char *'s; + this may relieve MS-DOS woes + - actions are now separated with YY_BREAK instead of simple "break"'s + - fixed bug causing core-dumps if skeleton file could not be opened + - fixed bugs in logic deciding which options cannot be intermixed + - initial start condition can now be referred to as "INITIAL" + - fixed bug which would incorrectly computer trailing context + count for a pattern like "(foo){3}"; now this is considered + "variable length", even though it isn't. + - block comments allowed between rules + - misc. typos corrected diff --git a/README b/README new file mode 100644 index 0000000..9354207 --- /dev/null +++ b/README @@ -0,0 +1,126 @@ +This is the initial release of flex, a replacement for the lex(1) +tool. As the copyright indicates, this distribution can be freely +redistributed. + +Some notes on the distribution: + + Yes, there are some niggling lex features which are not available which + seem like they'd be easy to add. They're not, or if they are then the + straight-forward implementation of them would slow down the scanner. + Unfortunately I am unable to do any further work on flex other than bug + fixes, so if there's something you've just gotta have, you'd better + be willing to dive into the code. I'll be happy to give (fairly + high-level) advice on how to proceed. + + The compressed tables have been tested pretty thoroughly in the past, + though may be suffering from bit-rot. The fast/full tables have been + recently implemented and are more likely to have bugs. + + For a System V machine, add the #define "SV". Not guaranteed to do + the full job, but a step in the right direction. + + Flex has been successfully ported to Sun Unix and 4.3BSD Vax Unix. + + +The flex distribution consists of the following files: + + README This message + + Makefile + flexdef.h + parse.y + scan.l + ccl.c + dfa.c flex sources + ecs.c + main.c + misc.c + nfa.c + scan.c + sym.c + tblcmp.c + yylex.c + + flex.skel + flex.fastskel + flexskelcom.h skeleton scanner sources + flexskeldef.h + fastskeldef.h + + flex.1 manual entry + + Timings a brief note comparing timings of flex vs. lex + +The files are packaged as three compressed shell archives, ~90Kb in size +total. Create a directory where you want flex to live, cd there, and use + + uncompress flexdist.1.Z + uncompress flexdist.2.Z + uncompress flexdist.3.Z + sh flexdist.1 + sh flexdist.2 + sh flexdist.3 + +to extract them. + +Either move {flexskelcom.h,flexskeldef.h,fastskeldef.h} into /usr/include +or edit {flex.skel,flex.fastskel,flexskeldef.h,fastskeldef.h,scan.c} +and wire in the full pathname of where you are going to keep the include files. + +Decide where you want to keep {flex.skel,flex.fastskel} (suggestion: +/usr/local/lib) and move it there. Edit "Makefile" and change the +definitions of SKELETON_FILE and F_SKELETON_FILE to reflect the full +pathnames of {flex.skel,flex.fastskel}. + +To make flex for the first time, use: + + make first_flex + +which uses a pre-generated copy of the scanner whose source is in flex. +For subsequent makes, just use: + + make + +Assuming it builds successfully, you can test it using + + make test + +The "diff" should not show any differences. + +If you're feeling adventurous, rebuild scan.c using various +combinations of FLEX_FLAGS, each time trying "make test" when +you're done. To rebuild it, do + + rm scan.c + make FLEX_FLAGS="..." + +where "..." is one of: + + -ist -c + -ist -ce + -ist -cm + -istf + -istF + +and testing using: + + make FLEX_FLAGS="..." test + + +Format the manual entry using + + nroff -man flex.1 + + +Please send problems and feedback to: + + vern@lbl-{csam,rtsg}.arpa or ucbvax!lbl-csam.arpa!vern + + Vern Paxson + Real Time Systems Group + Bldg. 46A + Lawrence Berkeley Laboratory + 1 Cyclotron Rd. + Berkeley, CA 94720 + + (415) 486-6411 -- cgit v1.2.3 From bddda11eda7b56672692c76c12a23b3d466865a3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 21:27:06 +0000 Subject: *** empty log message *** --- NEWS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 74288e2..c382644 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,6 @@ Changes between beta-test release of Feb. '88 and initial release: - - many files renamed to remove "flex" suffix + - many files renamed to remove "flex" prefix - input() routine added to compressed and fast skeletons - unput() routine added to compressed skeleton - -d, -ce support for fast scanners @@ -9,7 +9,7 @@ Changes between beta-test release of Feb. '88 and initial release: - actions are now separated with YY_BREAK instead of simple "break"'s - fixed bug causing core-dumps if skeleton file could not be opened - fixed bugs in logic deciding which options cannot be intermixed - - initial start condition can now be referred to as "INITIAL" + - initial start condition can now be referred to as - fixed bug which would incorrectly computer trailing context count for a pattern like "(foo){3}"; now this is considered "variable length", even though it isn't. -- cgit v1.2.3 From 3273d87d5110b0c0a31a2d40ffb41cd1fa86d2cb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 21:27:34 +0000 Subject: removed minor lint fluff --- tblcmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tblcmp.c b/tblcmp.c index eadec69..bee8c48 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -491,7 +491,7 @@ genctbl() gentabs() { - int i, j, k, numrows, *accset, nacc, *acc_array; + int i, j, k, *accset, nacc, *acc_array; char clower(); /* *everything* is done in terms of arrays starting at 1, so provide -- cgit v1.2.3 From f8ef29d6ffa78f9d178307a5b4daa60cd8c950a6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 21:27:58 +0000 Subject: final tweaking --- README | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/README b/README index 9354207..cbdc670 100644 --- a/README +++ b/README @@ -26,6 +26,8 @@ The flex distribution consists of the following files: README This message + Changes Differences between this release and the beta-test + Makefile flexdef.h parse.y @@ -36,11 +38,12 @@ The flex distribution consists of the following files: main.c misc.c nfa.c - scan.c sym.c tblcmp.c yylex.c + scan.c.dist pre-flex'd version of scan.l + flex.skel flex.fastskel flexskelcom.h skeleton scanner sources @@ -51,15 +54,18 @@ The flex distribution consists of the following files: Timings a brief note comparing timings of flex vs. lex -The files are packaged as three compressed shell archives, ~90Kb in size -total. Create a directory where you want flex to live, cd there, and use - - uncompress flexdist.1.Z - uncompress flexdist.2.Z - uncompress flexdist.3.Z - sh flexdist.1 - sh flexdist.2 - sh flexdist.3 +The files are packaged as a compressed shell archive, which in turn +contains seven shell archives. Create a directory where you want flex +to live, cd there, and use + + uncompress flex.shar.Z + sh flex.shar.1 + sh flex.shar.2 + sh flex.shar.3 + sh flex.shar.4 + sh flex.shar.5 + sh flex.shar.6 + sh flex.shar.7 to extract them. -- cgit v1.2.3 From af46744ec0822c4ebe1e8c373454a707552120c1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 21:42:47 +0000 Subject: forgot sh flex.shar --- README | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README b/README index cbdc670..5e1a46d 100644 --- a/README +++ b/README @@ -59,6 +59,8 @@ contains seven shell archives. Create a directory where you want flex to live, cd there, and use uncompress flex.shar.Z + sh flex.shar + sh flex.shar.1 sh flex.shar.2 sh flex.shar.3 -- cgit v1.2.3 From 0e0a017608aac16a9d86cefe5f79a36ae60f8c07 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Apr 1988 21:57:35 +0000 Subject: minor tweaks --- README | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/README b/README index 5e1a46d..dabbaaf 100644 --- a/README +++ b/README @@ -72,7 +72,7 @@ to live, cd there, and use to extract them. Either move {flexskelcom.h,flexskeldef.h,fastskeldef.h} into /usr/include -or edit {flex.skel,flex.fastskel,flexskeldef.h,fastskeldef.h,scan.c} +or edit {flex.skel,flex.fastskel,flexskeldef.h,fastskeldef.h,scan.c.dist} and wire in the full pathname of where you are going to keep the include files. Decide where you want to keep {flex.skel,flex.fastskel} (suggestion: @@ -85,9 +85,6 @@ To make flex for the first time, use: make first_flex which uses a pre-generated copy of the scanner whose source is in flex. -For subsequent makes, just use: - - make Assuming it builds successfully, you can test it using @@ -107,8 +104,8 @@ where "..." is one of: -ist -c -ist -ce -ist -cm - -istf - -istF + -ist -cfe + -ist -cFe and testing using: -- cgit v1.2.3 From 06b04fbeac7a4615c816e1daadb80475dbc689f1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 7 May 1988 00:06:10 +0000 Subject: added RCS id --- ccl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ccl.c b/ccl.c index 163065a..41e6d47 100644 --- a/ccl.c +++ b/ccl.c @@ -1,5 +1,9 @@ /* ccl - routines for character classes */ +#ifndef lint +static char rcsid[] = "@(#) $Header$ (LBL)"; +#endif + /* * Copyright (c) 1987, the University of California * -- cgit v1.2.3 From 38d7959960592775fd3c97f2db987f957a690a52 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 19:51:06 +0000 Subject: Added list_character_set() --- ccl.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/ccl.c b/ccl.c index 41e6d47..48addb3 100644 --- a/ccl.c +++ b/ccl.c @@ -1,9 +1,5 @@ /* ccl - routines for character classes */ -#ifndef lint -static char rcsid[] = "@(#) $Header$ (LBL)"; -#endif - /* * Copyright (c) 1987, the University of California * @@ -18,6 +14,11 @@ static char rcsid[] = "@(#) $Header$ (LBL)"; #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + /* ccladd - add a single character to a ccl * * synopsis @@ -111,3 +112,48 @@ int cclp; { cclng[cclp] = 1; } + + +/* list_character_set - list the members of a set of characters in CCL form + * + * synopsis + * int cset[CSIZE + 1]; + * list_character_set( cset ); + * + * writes to stderr a character-class representation of those characters + * present in the given set. A character is present if it has a non-zero + * value in the set array. + */ + +list_character_set( cset ) +int cset[]; + + { + register int i; + char *readable_form(); + + putc( '[', stderr ); + + for ( i = 1; i <= CSIZE; ++i ) + { + if ( cset[i] ) + { + register int start_char = i; + + putc( ' ', stderr ); + + fputs( readable_form( i ), stderr ); + + while ( ++i <= CSIZE && cset[i] ) + ; + + if ( i - 1 > start_char ) + /* this was a run */ + fprintf( stderr, "-%s", readable_form( i - 1 ) ); + + putc( ' ', stderr ); + } + } + + putc( ']', stderr ); + } -- cgit v1.2.3 From 8db18cb1c64ae60e7bd5a4de71e7dc9c282a7aa7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 19:51:36 +0000 Subject: added RCS id added check_for_backtracking() added dump_associated_rules() added dump_transitions() shortened reallocate_integer_pointer_array to reallocate_int_ptr_array removed some dfaacc_{state,set} abuses --- dfa.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 150 insertions(+), 7 deletions(-) diff --git a/dfa.c b/dfa.c index a2ca2b6..29a8078 100644 --- a/dfa.c +++ b/dfa.c @@ -14,6 +14,144 @@ #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + + +/* check_for_backtracking - check a DFA state for backtracking + * + * synopsis + * int ds, state[numecs]; + * check_for_backtracking( ds, state ); + * + * ds is the number of the state to check and state[] is its out-transitions, + * indexed by equivalence class, and state_rules[] is the set of rules + * associated with this state + */ + +check_for_backtracking( ds, state ) +int ds; +int state[]; + + { + if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state ) + { /* state is non-accepting */ + ++num_backtracking; + + if ( performance_report ) + { + fprintf( stderr, "State #%d is non-accepting -\n", ds ); + + /* identify the state */ + dump_associated_rules( ds ); + + /* now identify it further using the out- and jam-transitions */ + dump_transitions( state ); + + putc( '\n', stderr ); + } + } + } + + +/* dump_associated_rules - list the rules associated with a DFA state + * + * synopisis + * int ds; + * dump_associated_rules( ds ); + * + * goes through the set of NFA states associated with the DFA and + * extracts the first MAX_ASSOC_RULES unique rules, sorts them, + * and writes a report to stderr + */ + +dump_associated_rules( ds ) +int ds; + + { + register int i, j; + register int rule_set[MAX_ASSOC_RULES + 1]; + register int num_rules = 0; + int *dset = dss[ds]; + int size = dfasiz[ds]; + + for ( i = 1; i <= size; ++i ) + { + register rule_num = assoc_rule[dset[i]]; + + for ( j = 1; j <= num_rules; ++j ) + if ( rule_num == rule_set[j] ) + break; + + if ( j > num_rules ) + { /* new rule */ + if ( num_rules < MAX_ASSOC_RULES ) + rule_set[++num_rules] = rule_num; + } + } + + bubble( rule_set, num_rules ); + + fprintf( stderr, " associated rules:" ); + + for ( i = 1; i <= num_rules; ++i ) + { + if ( i % 8 == 1 ) + putc( '\n', stderr ); + + fprintf( stderr, "\t%d", rule_set[i] ); + } + + putc( '\n', stderr ); + } + + +/* dump_transitions - list the transitions associated with a DFA state + * + * synopisis + * int state[numecs]; + * dump_transitions( state ); + * + * goes through the set of out-transitions and lists them in human-readable + * form (i.e., not as equivalence classes); also lists jam transitions + * (i.e., all those which are not out-transitions, plus EOF) + */ + +dump_transitions( state ) +int state[]; + + { + register int i, ec; + int out_char_set[CSIZE + 1]; + + for ( i = 1; i <= CSIZE; ++i ) + { + ec = ecgroup[i]; + + if ( ec < 0 ) + ec = -ec; + + out_char_set[i] = state[ec]; + } + + fprintf( stderr, " out-transitions: " ); + + list_character_set( out_char_set ); + + /* now invert the members of the set to get the jam transitions */ + for ( i = 1; i <= CSIZE; ++i ) + out_char_set[i] = ! out_char_set[i]; + + fprintf( stderr, "\n jam-transitions: EOF " ); + + list_character_set( out_char_set ); + + putc( '\n', stderr ); + } + + /* epsclosure - construct the epsilon closure of a set of ndfa states * * synopsis @@ -156,7 +294,6 @@ int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; } - /* increase_max_dfas - increase the maximum number of DFAs */ increase_max_dfas() @@ -174,7 +311,7 @@ increase_max_dfas() accsiz = reallocate_integer_array( accsiz, current_max_dfas ); dhash = reallocate_integer_array( dhash, current_max_dfas ); todo = reallocate_integer_array( todo, current_max_dfas ); - dss = reallocate_integer_pointer_array( dss, current_max_dfas ); + dss = reallocate_int_ptr_array( dss, current_max_dfas ); dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); /* fix up todo queue */ @@ -251,7 +388,9 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; newds = lastdfa; - if ( ! (dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) )) ) + dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) ); + + if ( ! dss[newds] ) flexfatal( "dynamic memory failure in snstods()" ); /* if we haven't already sorted the states in sns, we do so now, so that @@ -269,7 +408,11 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; if ( nacc == 0 ) { - dfaacc[newds].dfaacc_state = 0; + if ( reject ) + dfaacc[newds].dfaacc_set = (int *) 0; + else + dfaacc[newds].dfaacc_state = 0; + accsiz[newds] = 0; } @@ -283,10 +426,10 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; bubble( accset, nacc ); - dfaacc[newds].dfaacc_state = - (int) malloc( (unsigned) ((nacc + 1) * sizeof( int )) ); + dfaacc[newds].dfaacc_set = + (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) ); - if ( ! dfaacc[newds].dfaacc_state ) + if ( ! dfaacc[newds].dfaacc_set ) flexfatal( "dynamic memory failure in snstods()" ); /* save the accepting set for later */ -- cgit v1.2.3 From 5d16dc529a2ef7e993cb24933d946ee850d73cef Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 19:53:13 +0000 Subject: added RCS id added PROCFLG to avoid assumption of signed char's --- ecs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ecs.c b/ecs.c index dc43c5e..1a87d41 100644 --- a/ecs.c +++ b/ecs.c @@ -14,6 +14,11 @@ #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + /* ccl2ecl - convert character classes to set of equivalence classes * * synopsis @@ -110,6 +115,8 @@ int lenccl, fwd[], bck[], llsiz; int cclp, oldec, newec; int cclm, i, j; +#define PROCFLG 0x80 + /* note that it doesn't matter whether or not the character class is * negated. The same results will be obtained in either case. */ @@ -126,7 +133,7 @@ int lenccl, fwd[], bck[], llsiz; for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) { /* look for the symbol in the character class */ - for ( ; j < lenccl && ccls[j] <= i; ++j ) + for ( ; j < lenccl && (ccls[j] <= i || (ccls[j] & PROCFLG)); ++j ) if ( ccls[j] == i ) { /* we found an old companion of cclm in the ccl. @@ -137,10 +144,11 @@ int lenccl, fwd[], bck[], llsiz; bck[i] = newec; fwd[newec] = i; newec = i; - ccls[j] = -i; /* set flag so we don't reprocess */ + ccls[j] |= PROCFLG; /* set flag so we don't reprocess */ /* get next equivalence class member */ - /* next 2 */ goto next_pt; + /* continue 2 */ + goto next_pt; } /* symbol isn't in character class. Put it in the old equivalence @@ -167,10 +175,10 @@ next_pt: /* find next ccl member to process */ - for ( ++cclp; ccls[cclp] < 0 && cclp < lenccl; ++cclp ) + for ( ++cclp; (ccls[cclp] & PROCFLG) && cclp < lenccl; ++cclp ) { /* reset "doesn't need processing" flag */ - ccls[cclp] = -ccls[cclp]; + ccls[cclp] &= ~PROCFLG; } } } -- cgit v1.2.3 From cefd4798df19a26b69abad5f4444e675b0918c2f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 19:59:29 +0000 Subject: removed revision history added RCS header added VMS, MS_DOS ifdef's removed DEFAULT_ACTION, changed END_OF_BUFFER_ACTION shortened MAX_XTIONS_FOR_FULL_INTERIOR_FIT to MAX_XTIONS_FULL_INTERIOR_FIT added MAX_ASSOC_RULES added performance_report, assoc_rule gloabls added num_backtracking gloabl shortened allocate_integer_pointer_array, reallocate_integer_pointer_array --- flexdef.h | 77 +++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/flexdef.h b/flexdef.h index eebc31e..7df8148 100644 --- a/flexdef.h +++ b/flexdef.h @@ -1,11 +1,4 @@ -/* - * Definitions for flex. - * - * modification history - * -------------------- - * 02b kg, vp 30sep87 .added definitions for fast scanner; misc. cleanup - * 02a vp 27jun86 .translated into C/FTL - */ +/* flexdef - definitions file for flex */ /* * Copyright (c) 1987, the University of California @@ -19,16 +12,38 @@ * public, are made available for use by anyone. */ +/* @(#) $Header$ (LBL) */ + +#ifndef FILE #include +#endif #ifdef SV #include -#define bzero(s, n) memset((char *)(s), '\000', (unsigned)(n)) +#define bzero(s, n) memset((char *)(s), '\0', (unsigned)(n)) +#ifndef VMS +char *memset(); #else +/* memset is needed for old versions of the VMS C runtime library */ +#define memset(s, c, n) \ + { \ + register char *t = s; \ + register unsigned int m = n; \ + while ( m-- > 0 ) \ + *t++ = c; \ + } +#define unlink delete +#define SHORT_FILE_NAMES +#endif +#endif + +#ifndef SV #include #endif +#ifdef lint char *sprintf(); /* keep lint happy */ +#endif /* maximum line length we'll have to deal with */ @@ -37,8 +52,13 @@ char *sprintf(); /* keep lint happy */ /* maximum size of file name */ #define FILENAMESIZE 1024 -#define min(x,y) (x < y ? x : y) -#define max(x,y) (x > y ? x : y) +#define min(x,y) ((x) < (y) ? (x) : (y)) +#define max(x,y) ((x) > (y) ? (x) : (y)) + +#ifdef MS_DOS +#define abs(x) ((x) < 0 ? -(x) : (x)) +#define SHORT_FILE_NAMES +#endif #define true 1 #define false 0 @@ -52,11 +72,10 @@ char *sprintf(); /* keep lint happy */ #define FAST_SKELETON_FILE "flex.fastskel" #endif -/* special nxt[] action number for the "at the end of the input buffer" state */ -/* note: -1 is already taken by YY_NEW_FILE */ -#define END_OF_BUFFER_ACTION -3 -/* action number for default action for fast scanners */ -#define DEFAULT_ACTION -2 +/* special internal nxt[] action number for the "at the end of the + * input buffer" state + */ +#define END_OF_BUFFER_ACTION 0 /* special chk[] values marking the slots taking by end-of-buffer and action * numbers @@ -219,7 +238,12 @@ char *sprintf(); /* keep lint happy */ * around through the interior of the internal fast table looking for a * spot for it */ -#define MAX_XTIONS_FOR_FULL_INTERIOR_FIT 4 +#define MAX_XTIONS_FULL_INTERIOR_FIT 4 + +/* maximum number of rules which will be reported as being associated + * with a DFA state + */ +#define MAX_ASSOC_RULES 100 /* number that, if used to subscript an array, has a good chance of producing * an error; should be small enough to fit into a short @@ -274,11 +298,13 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * reject - if true (-r flag), generate tables for REJECT macro * fullspd - if true (-F flag), use Jacobson method of table representation * gen_line_dirs - if true (i.e., no -L flag), generate #line directives + * performance_report - if true (i.e., -p flag), generate a report relating + * to scanner performance */ extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs, reject; -extern int fullspd, gen_line_dirs; +extern int fullspd, gen_line_dirs, performance_report; /* variables used in the flex input routines: @@ -314,6 +340,7 @@ extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; /* variables for nfa machine data: * current_mns - current maximum on number of NFA states * accnum - number of the last accepting state + * lastnfa - last nfa state number created * firstst - physically the first state of a fragment * lastst - last physical state of fragment * finalst - last logical state of fragment @@ -321,12 +348,12 @@ extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; * trans1 - transition state * trans2 - 2nd transition state for epsilons * accptnum - accepting number - * lastnfa - last nfa state number created + * assoc_rule - rule associated with this NFA state (or 0 if none) */ -extern int current_mns; -extern int accnum, *firstst, *lastst, *finalst, *transchar; -extern int *trans1, *trans2, *accptnum, lastnfa; +extern int current_mns, accnum, lastnfa; +extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; +extern int *accptnum, *assoc_rule; /* variables for protos: @@ -450,11 +477,13 @@ extern char *ccltbl; * numuniq - number of unique transitions * numdup - number of duplicate transitions * hshsave - number of hash collisions saved by checking number of states + * num_backtracking - number of DFA states requiring back-tracking */ extern char *starttime, *endtime, nmstr[MAXLINE]; extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; +extern int num_backtracking; char *allocate_array(), *reallocate_array(); @@ -464,14 +493,14 @@ char *allocate_array(), *reallocate_array(); #define reallocate_integer_array(array,size) \ (int *) reallocate_array( (char *) array, size, sizeof( int ) ) -#define allocate_integer_pointer_array(size) \ +#define allocate_int_ptr_array(size) \ (int **) allocate_array( size, sizeof( int * ) ) #define allocate_dfaacc_union(size) \ (union dfaacc_union *) \ allocate_array( size, sizeof( union dfaacc_union ) ) -#define reallocate_integer_pointer_array(array,size) \ +#define reallocate_int_ptr_array(array,size) \ (int **) reallocate_array( (char *) array, size, sizeof( int * ) ) #define reallocate_dfaacc_union(array, size) \ -- cgit v1.2.3 From cea87b702446c7584638e1d7c18b039514f89ae4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:03:38 +0000 Subject: Added RCS header removed revision history misc additions and fixes to globals VMS ifdef's backtracking statistics -p flag name shortenings --- main.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/main.c b/main.c index abd1ba0..1e4f473 100644 --- a/main.c +++ b/main.c @@ -11,29 +11,28 @@ * may be created provided the new works, if made available to the general * public, are made available for use by anyone. * - * - * ver date who remarks - * --- ---- ------ ------------------------------------------------------- - * 04b 30sep87 kg, vp .implemented (part of) Van Jacobson's fast scanner design - * 04a 27jun86 vp .translated from Ratfor into C - * 01a 22aug83 vp .written. Original version by Jef Poskanzer. */ #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs, reject; -int fullspd, gen_line_dirs; +int fullspd, gen_line_dirs, performance_report; int datapos, dataline, linenum; FILE *skelfile = NULL; char *infilename = NULL; int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; -int current_mns; -int accnum, *firstst, *lastst, *finalst, *transchar; -int *trans1, *trans2, *accptnum, lastnfa; +int current_mns, accnum, lastnfa; +int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; +int *accptnum, *assoc_rule; int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; @@ -42,7 +41,7 @@ int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; int current_max_dfa_size, current_max_xpairs; int current_max_template_xpairs, current_max_dfas; int lastdfa, *nxt, *chk, *tnxt; -int *base, *def, tblend, firstfree, numtemps, **dss, *dfasiz; +int *base, *def, tblend, firstfree, **dss, *dfasiz; union dfaacc_union *dfaacc; int *accsiz, *dhash, *todo, todo_head, todo_next, numas; int numsnpairs, jambase, jamstate; @@ -52,9 +51,14 @@ char *ccltbl; char *starttime, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; +int num_backtracking; FILE *temp_action_file; int end_of_buffer_state; +#ifndef SHORT_FILE_NAMES char *action_file_name = "/tmp/flexXXXXXX"; +#else +char *action_file_name = "flexXXXXXX.tmp"; +#endif /* flex - main program @@ -126,7 +130,14 @@ int status; fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, current_max_dfas, totnst ); - fprintf( stderr, " %d rules\n", accnum ); + fprintf( stderr, " %d rules\n", accnum - 1 /* - 1 for def. rule */ ); + + if ( num_backtracking == 0 ) + fprintf( stderr, " No backtracking\n" ); + else + fprintf( stderr, " %d backtracking (non-accepting) states\n", + num_backtracking ); + fprintf( stderr, " %d/%d start conditions\n", lastsc, current_max_scs ); fprintf( stderr, " %d epsilon states, %d double epsilon states\n", @@ -138,7 +149,7 @@ int status; fprintf( stderr, " %d/%d character classes needed %d/%d words of storage, %d reused\n", lastccl, current_maxccls, - cclmap[lastccl] + ccllen[lastccl] - 1, + cclmap[lastccl] + ccllen[lastccl], current_max_ccl_tbl_size, cclreuse ); fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs ); @@ -189,7 +200,11 @@ int status; fprintf( stderr, " %d total table entries needed\n", tblsiz ); } +#ifndef VMS exit( status ); +#else + exit( status + 1 ); +#endif } @@ -210,7 +225,7 @@ char **argv; char *arg, *skelname = NULL, *gettime(), clower(), *mktemp(); printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; - ddebug = fulltbl = reject = fullspd = false; + performance_report = ddebug = fulltbl = reject = fullspd = false; gen_line_dirs = usemecs = useecs = true; sawcmpflag = false; @@ -275,6 +290,11 @@ char **argv; fulltbl = true; break; + case 'F': + useecs = usemecs = false; + fullspd = true; + break; + case 'I': interactive = true; break; @@ -287,13 +307,12 @@ char **argv; gen_line_dirs = false; break; - case 'r': - reject = true; + case 'p': + performance_report = true; break; - case 'F': - useecs = usemecs = false; - fullspd = true; + case 'r': + reject = true; break; case 'S': @@ -340,6 +359,9 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ if ( fulltbl && fullspd ) flexerror( "full table and -F are mutually exclusive" ); + if ( performance_report && reject ) + fprintf( stderr, "Reject guarentees performance penalties\n" ); + if ( ! skelname ) { static char skeleton_name_storage[400]; @@ -354,7 +376,11 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ if ( ! use_stdout ) { +#ifndef SHORT_FILE_NAMES FILE *prev_stdout = freopen( "lex.yy.c", "w", stdout ); +#else + FILE *prev_stdout = freopen( "lexyy.c", "w", stdout ); +#endif if ( prev_stdout == NULL ) flexerror( "could not create lex.yy.c" ); @@ -391,7 +417,7 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ lastdfa = lastnfa = accnum = numas = numsnpairs = tmpuses = 0; numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; - onesp = numprots = 0; + num_backtracking = onesp = numprots = 0; linenum = sectnum = 1; firstprot = NIL; @@ -492,6 +518,7 @@ set_up_initial_allocations() trans1 = allocate_integer_array( current_mns ); trans2 = allocate_integer_array( current_mns ); accptnum = allocate_integer_array( current_mns ); + assoc_rule = allocate_integer_array( current_mns ); current_max_scs = INITIAL_MAX_SCS; scset = allocate_integer_array( current_max_scs ); @@ -523,6 +550,6 @@ set_up_initial_allocations() accsiz = allocate_integer_array( current_max_dfas ); dhash = allocate_integer_array( current_max_dfas ); todo = allocate_integer_array( current_max_dfas ); - dss = allocate_integer_pointer_array( current_max_dfas ); + dss = allocate_int_ptr_array( current_max_dfas ); dfaacc = allocate_dfaacc_union( current_max_dfas ); } -- cgit v1.2.3 From 9ed30faa2ac78e809cbce2e585203aa3865c3b83 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:04:47 +0000 Subject: RCS header check before malloc()'ing for 16 bit overflow MS_DOS, VMS ifdef's removed commented-out \^ code removed FTLSOURCE code added readable_form() --- misc.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 32 deletions(-) diff --git a/misc.c b/misc.c index 0ff49f5..53220a4 100644 --- a/misc.c +++ b/misc.c @@ -15,6 +15,11 @@ #include #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + char *malloc(), *realloc(); @@ -45,7 +50,16 @@ char *allocate_array( size, element_size ) int size, element_size; { - register char *mem = malloc( (unsigned) (element_size * size) ); + register char *mem; + + /* on 16-bit int machines (e.g., 80286) we might be trying to + * allocate more than a signed int can hold, and that won't + * work. Cheap test: + */ + if ( element_size * size <= 0 ) + flexfatal( "request for < 1 byte in allocate_array()" ); + + mem = malloc( (unsigned) (element_size * size) ); if ( mem == NULL ) flexfatal( "memory allocation failed in allocate_array()" ); @@ -220,7 +234,18 @@ dataflush() /* include sys/types.h to use time_t and make lint happy */ +#ifndef MS_DOS +#ifndef VMS #include +#else +#include +#endif +#endif + +#ifdef MS_DOS +#include +typedef long time_t; +#endif char *gettime() @@ -456,26 +481,6 @@ char array[]; synerr( "escape sequence for null not allowed" ); return ( 1 ); } - -#ifdef NOTDEF - case '^': - { - register char next_char = array[2]; - - if ( next_char == '?' ) - return ( 0x7f ); - - else if ( next_char >= 'A' && next_char <= 'Z' ) - return ( next_char - 'A' + 1 ); - - else if ( next_char >= 'a' && next_char <= 'z' ) - return ( next_char - 'z' + 1 ); - - synerr( "illegal \\^ escape sequence" ); - - return ( 1 ); - } -#endif } return ( array[1] ); @@ -494,21 +499,57 @@ int otoi( str ) char str[]; { -#ifdef FTLSOURCE - fortran int gctoi() - int dummy = 1; - - return ( gctoi( str, dummy, 8 ) ); -#else int result; (void) sscanf( str, "%o", &result ); return ( result ); -#endif } +/* readable_form - return the the human-readable form of a character + * + * synopsis: + * int c; + * char *readable_form(); + * = readable_form( c ); + * + * The returned string is in static storage. + */ + +char *readable_form( c ) +register int c; + + { + static char rform[10]; + + if ( (c >= 0 && c < 32) || c == 127 ) + { + switch ( c ) + { + case '\n': return ( "\\n" ); + case '\t': return ( "\\t" ); + case '\f': return ( "\\f" ); + case '\r': return ( "\\r" ); + case '\b': return ( "\\b" ); + + default: + sprintf( rform, "\\%.3o", c ); + return ( rform ); + } + } + + else if ( c == ' ' ) + return ( "' '" ); + + else + { + rform[0] = c; + rform[1] = '\0'; + + return ( rform ); + } + } /* reallocate_array - increase the size of a dynamic array */ @@ -518,8 +559,13 @@ char *array; int size, element_size; { - register char *new_array = realloc( array, - (unsigned) (size * element_size )); + register char *new_array; + + /* same worry as in allocate_array(): */ + if ( size * element_size <= 0 ) + flexfatal( "attempt to increase array size by less than 1 byte" ); + + new_array = realloc( array, (unsigned) (size * element_size )); if ( new_array == NULL ) flexfatal( "attempt to increase array size failed" ); @@ -570,10 +616,10 @@ int element_v, element_n; if ( datapos >= 75 ) { - printf( "\n" ); + putchar( '\n' ); if ( ++dataline % 10 == 0 ) - printf( "\n" ); + putchar( '\n' ); datapos = 0; } -- cgit v1.2.3 From f93b389944d3e91d454a7a79b522e79bc0b8c301 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:05:57 +0000 Subject: RCS ident yy_cp, yy_bp support name shortenings assoc_rule support --- nfa.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/nfa.c b/nfa.c index a2b2c8e..641a182 100644 --- a/nfa.c +++ b/nfa.c @@ -14,6 +14,11 @@ #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + /* add_accept - add an accepting state to a machine * * synopsis @@ -39,6 +44,10 @@ int mach, headcnt, trailcnt; if ( headcnt > 0 || trailcnt > 0 ) { /* do trailing context magic to not match the trailing characters */ + char *scanner_cp = + (fulltbl || fullspd) ? "yy_c_buf_p = yy_cp" : "yy_c_buf_p"; + char *scanner_bp = (fulltbl || fullspd) ? "yy_bp" : "yy_b_buf_p"; + fprintf( temp_action_file, "YY_DO_BEFORE_SCAN; /* undo effects of setting up yytext */\n" ); @@ -47,22 +56,23 @@ int mach, headcnt, trailcnt; int head_offset = headcnt - 1; if ( fullspd || fulltbl ) - /* with the fast skeleton, yy_c_buf_p points to the *next* - * character to scan, rather than the one that was last - * scanned + /* with the fast skeleton, the character pointer points + * to the *next* character to scan, rather than the one + * that was last scanned */ ++head_offset; if ( head_offset > 0 ) - fprintf( temp_action_file, "yy_c_buf_p = yy_b_buf_p + %d;\n", - head_offset ); + fprintf( temp_action_file, "%s = %s + %d;\n", + scanner_cp, scanner_bp, head_offset ); else - fprintf( temp_action_file, "yy_c_buf_p = yy_b_buf_p;\n" ); + fprintf( temp_action_file, "%s = %s;\n", + scanner_cp, scanner_bp ); } else - fprintf( temp_action_file, "yy_c_buf_p -= %d;\n", trailcnt ); + fprintf( temp_action_file, "%s -= %d;\n", scanner_cp, trailcnt ); fprintf( temp_action_file, "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); } @@ -501,22 +511,24 @@ int sym; ++num_reallocs; + firstst = reallocate_integer_array( firstst, current_mns ); + lastst = reallocate_integer_array( lastst, current_mns ); + finalst = reallocate_integer_array( finalst, current_mns ); transchar = reallocate_integer_array( transchar, current_mns ); trans1 = reallocate_integer_array( trans1, current_mns ); trans2 = reallocate_integer_array( trans2, current_mns ); accptnum = reallocate_integer_array( accptnum, current_mns ); - firstst = reallocate_integer_array( firstst, current_mns ); - finalst = reallocate_integer_array( finalst, current_mns ); - lastst = reallocate_integer_array( lastst, current_mns ); + assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); } + firstst[lastnfa] = lastnfa; + finalst[lastnfa] = lastnfa; + lastst[lastnfa] = lastnfa; transchar[lastnfa] = sym; trans1[lastnfa] = NO_TRANSITION; trans2[lastnfa] = NO_TRANSITION; accptnum[lastnfa] = NIL; - firstst[lastnfa] = lastnfa; - finalst[lastnfa] = lastnfa; - lastst[lastnfa] = lastnfa; + assoc_rule[lastnfa] = linenum; /* identify rules by line number in input */ /* fix up equivalence classes base on this transition. Note that any * character which has its own transition gets its own equivalence class. -- cgit v1.2.3 From 32fe716b3738f3305606140d5676d5376e22c277 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:06:39 +0000 Subject: RCS header bug fix due to missing default rule, could have to backtrack when backtrack variables haven't been set up --- parse.y | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/parse.y b/parse.y index e116407..d9cf7be 100644 --- a/parse.y +++ b/parse.y @@ -15,8 +15,14 @@ %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL %{ + #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; int trlcontxt, xcluflg, cclsorted, varlength; char clower(); @@ -27,6 +33,22 @@ static int madeany = false; /* whether we've made the '.' character class */ %% goal : initlex sect1 sect1end sect2 + { /* add default rule */ + int def_rule; + + pat = cclinit(); + cclnegate( pat ); + + def_rule = mkstate( -pat ); + + add_accept( def_rule, 0, 0 ); + + for ( i = 1; i <= lastsc; ++i ) + scset[i] = mkbranch( scset[i], def_rule ); + + fputs( "YY_DEFAULT_ACTION;\n\tYY_BREAK\n", + temp_action_file ); + } ; initlex : @@ -89,7 +111,8 @@ flexrule : scon '^' re eol add_accept( pat, headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) - scbol[actvsc[i]] = mkbranch( scbol[actvsc[i]], pat ); + scbol[actvsc[i]] = + mkbranch( scbol[actvsc[i]], pat ); } | scon re eol @@ -98,7 +121,8 @@ flexrule : scon '^' re eol add_accept( pat, headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) - scset[actvsc[i]] = mkbranch( scset[actvsc[i]], pat ); + scset[actvsc[i]] = + mkbranch( scset[actvsc[i]], pat ); } | '^' re eol -- cgit v1.2.3 From 5fa802e597f944e411179df9f4c3f7e9f9bcb292 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:07:29 +0000 Subject: RCS header removed \^ from ESCSEQ --- scan.l | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scan.l b/scan.l index 0482d50..88c2d22 100644 --- a/scan.l +++ b/scan.l @@ -16,6 +16,11 @@ #include "flexdef.h" #include "parse.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + #define ACTION_ECHO fprintf( temp_action_file, "%s", yytext ) #define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" ); @@ -48,7 +53,7 @@ NAME [a-z_][a-z_0-9]* SCNAME {NAME} -ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) +ESCSEQ \\([^\n]|0[0-9]{1,3}) %% static int bracelevel, didadef; @@ -266,7 +271,7 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) -/[^\]\n] return ( '-' ); [^\]\n] RETURNCHAR; -"]" BEGIN(SECT2); return ( ']' ); +"]" BEGIN(SECT2); return ( ']' ); [0-9]+ { @@ -365,7 +370,7 @@ ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3}) YY_DO_BEFORE_SCAN; /* recover from setting up yytext */ #ifdef FLEX_FAST_SKEL - fputs( yy_c_buf_p + 1, stdout ); + fputs( yy_cp + 1, stdout ); #else yy_ch_buf[yy_e_buf_p + 1] = '\0'; -- cgit v1.2.3 From 50c8adcc9c16f2095f77eea479d436504fee1c70 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:08:00 +0000 Subject: RCS header changed "entry" to "sym_entry" to avoid conflict with old keyword --- sym.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/sym.c b/sym.c index d4c15a2..025d15b 100644 --- a/sym.c +++ b/sym.c @@ -14,6 +14,11 @@ #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; struct hash_entry *sctbl[START_COND_HASH_SIZE]; struct hash_entry *ccltab[CCL_HASH_SIZE]; @@ -42,19 +47,19 @@ int table_size; { int hash_val = hashfunct( sym, table_size ); - register struct hash_entry *entry = table[hash_val]; + register struct hash_entry *sym_entry = table[hash_val]; register struct hash_entry *new_entry; register struct hash_entry *successor; char *malloc(); - while ( entry ) + while ( sym_entry ) { - if ( ! strcmp( sym, entry->name ) ) + if ( ! strcmp( sym, sym_entry->name ) ) { /* entry already exists */ return ( -1 ); } - entry = entry->next; + sym_entry = sym_entry->next; } /* create new entry */ @@ -127,8 +132,8 @@ char ccltxt[]; * char sym[]; * hash_table table; * int table_size; - * struct hash_entry *entry, *findsym(); - * entry = findsym( sym, table, table_size ); + * struct hash_entry *sym_entry, *findsym(); + * sym_entry = findsym( sym, table, table_size ); */ struct hash_entry *findsym( sym, table, table_size ) @@ -137,17 +142,17 @@ hash_table table; int table_size; { - register struct hash_entry *entry = table[hashfunct( sym, table_size )]; + register struct hash_entry *sym_entry = table[hashfunct( sym, table_size )]; static struct hash_entry empty_entry = { (struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0, } ; - while ( entry ) + while ( sym_entry ) { - if ( ! strcmp( sym, entry->name ) ) - return ( entry ); - entry = entry->next; + if ( ! strcmp( sym, sym_entry->name ) ) + return ( sym_entry ); + sym_entry = sym_entry->next; } return ( &empty_entry ); -- cgit v1.2.3 From 04153018792074e5286f89e3d194580f184c8385 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:08:46 +0000 Subject: RCS header MAX_XTIONS_FOR_FULL_INTERIOR_FIT -> MAX_XTIONS_FULL_INTERIOR_FIT made back-tracking accepting number be one greater than the last legit accepting number, instead of 0. This way, end-of-buffer can take 0 and no negative accepting numbers are needed. added genftbl() changed last ftl references to C added check for UNSIGNED_CHAR's added back-track logic to make_tables() added checking and report for backtracking fixed fence-post error with onesp stack pointer --- tblcmp.c | 239 ++++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 151 insertions(+), 88 deletions(-) diff --git a/tblcmp.c b/tblcmp.c index bee8c48..18c2e2d 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -14,6 +14,11 @@ #include "flexdef.h" +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + /* bldtbl - build table entries for dfa state * * synopsis @@ -317,7 +322,7 @@ int *state, numtrans; /* if there are too many out-transitions, put the state at the end of * nxt and chk */ - if ( numtrans > MAX_XTIONS_FOR_FULL_INTERIOR_FIT ) + if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) { /* if table is empty, return the first available spot in chk/nxt, * which should be 1 @@ -367,7 +372,7 @@ int *state, numtrans; /* if we started search from the beginning, store the new firstfree for * the next call of find_table_space() */ - if ( numtrans <= MAX_XTIONS_FOR_FULL_INTERIOR_FIT ) + if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) firstfree = i + 1; /* check to see if all elements in chk (and therefore nxt) that are @@ -429,18 +434,18 @@ genctbl() nxt[tblend + 1] = END_OF_BUFFER_ACTION; chk[tblend + 1] = numecs + 1; chk[tblend + 2] = 1; /* anything but EOB */ + nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */ /* make sure every state has a end-of-buffer transition and an action # */ for ( i = 0; i <= lastdfa; ++i ) { + register int anum = dfaacc[i].dfaacc_state; + chk[base[i]] = EOB_POSITION; chk[base[i] - 1] = ACTION_POSITION; - nxt[base[i] - 1] = dfaacc[i].dfaacc_state; /* action number */ + nxt[base[i] - 1] = anum ? anum : accnum + 1; /* action number */ } - for ( i = 0; i <= lastsc * 2; ++i ) - nxt[base[i] - 1] = DEFAULT_ACTION; - dataline = 0; datapos = 0; @@ -482,6 +487,54 @@ genctbl() } +/* genftbl - generates full transition table + * + * synopsis + * genftbl(); + */ + +genftbl() + + { + register int i; + + /* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays + */ + static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; + static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; + +#ifdef UNSIGNED_CHAR + printf( C_short_decl, ALIST, lastdfa + 1 ); +#else + printf( accnum > 127 ? C_short_decl : C_char_decl, ALIST, lastdfa + 1 ); +#endif + + for ( i = 1; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; + + if ( i == end_of_buffer_state ) + mkdata( END_OF_BUFFER_ACTION ); + + else + mkdata( anum ? anum : accnum + 1 ); + + if ( trace && anum ) + fprintf( stderr, "state # %d accepts: [%d]\n", i, anum ); + } + + dataend(); + + if ( useecs ) + genecs(); + + /* don't have to dump the actual full table entries - they were created + * on-the-fly + */ + } + + /* gentabs - generate data statements for the transition tables * * synopsis @@ -491,22 +544,18 @@ genctbl() gentabs() { - int i, j, k, *accset, nacc, *acc_array; - char clower(); + int i, j, k, *accset, nacc, *acc_array, total_states; /* *everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all FTL arrays + * a null entry for the zero element of all C arrays */ - static char ftl_long_decl[] = "static long int %c[%d] =\n { 0,\n"; - static char ftl_short_decl[] = "static short int %c[%d] =\n { 0,\n"; - static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n"; + static char C_long_decl[] = "static long int %c[%d] =\n { 0,\n"; + static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; + static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; acc_array = allocate_integer_array( current_max_dfas ); nummt = 0; - if ( fulltbl ) - jambase = lastdfa + 1; /* home of "jam" pseudo-state */ - printf( "#define YY_JAM %d\n", jamstate ); printf( "#define YY_JAM_BASE %d\n", jambase ); @@ -521,7 +570,7 @@ gentabs() * indices in the dfaacc array */ - printf( accnum > 127 ? ftl_short_decl : ftl_char_decl, + printf( accnum > 127 ? C_short_decl : C_char_decl, ACCEPT, max( numas, 1 ) + 1 ); j = 1; /* index into ACCEPT array */ @@ -561,13 +610,14 @@ gentabs() dataend(); } - + else { for ( i = 1; i <= lastdfa; ++i ) acc_array[i] = dfaacc[i].dfaacc_state; - - acc_array[i] = 0; /* add (null) accepting number for jam state */ + + /* add accepting number for jam state */ + acc_array[i] = 0; } /* spit out ALIST array. If we're doing "reject", it'll be pointers @@ -575,7 +625,7 @@ gentabs() * In either case, we just dump the numbers. */ - /* "lastdfa + 2" is the size of ALIST; includes room for FTL arrays + /* "lastdfa + 2" is the size of ALIST; includes room for C arrays * beginning at 0 and for "jam" state */ k = lastdfa + 2; @@ -588,14 +638,12 @@ gentabs() */ ++k; +#ifdef UNSIGNED_CHAR + printf( C_short_decl, ALIST, k ); +#else printf( ((reject && numas > 126) || accnum > 127) ? - ftl_short_decl : ftl_char_decl, ALIST, k ); - - /* set up default actions */ - for ( i = 1; i <= lastsc * 2; ++i ) - acc_array[i] = DEFAULT_ACTION; - - acc_array[end_of_buffer_state] = END_OF_BUFFER_ACTION; + C_short_decl : C_char_decl, ALIST, k ); +#endif for ( i = 1; i <= lastdfa; ++i ) { @@ -624,7 +672,7 @@ gentabs() if ( trace ) fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); - printf( ftl_char_decl, MATCHARRAY, numecs + 1 ); + printf( C_char_decl, MATCHARRAY, numecs + 1 ); for ( i = 1; i <= numecs; ++i ) { @@ -637,78 +685,75 @@ gentabs() dataend(); } - if ( ! fulltbl ) - { - int total_states = lastdfa + numtemps; + total_states = lastdfa + numtemps; - printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl, - BASEARRAY, total_states + 1 ); + printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + BASEARRAY, total_states + 1 ); - for ( i = 1; i <= lastdfa; ++i ) - { - register int d = def[i]; - - if ( base[i] == JAMSTATE ) - base[i] = jambase; + for ( i = 1; i <= lastdfa; ++i ) + { + register int d = def[i]; - if ( d == JAMSTATE ) - def[i] = jamstate; + if ( base[i] == JAMSTATE ) + base[i] = jambase; - else if ( d < 0 ) - { - /* template reference */ - ++tmpuses; - def[i] = lastdfa - d + 1; - } + if ( d == JAMSTATE ) + def[i] = jamstate; - mkdata( base[i] ); + else if ( d < 0 ) + { + /* template reference */ + ++tmpuses; + def[i] = lastdfa - d + 1; } - /* generate jam state's base index */ mkdata( base[i] ); + } - for ( ++i /* skip jam state */; i <= total_states; ++i ) - { - mkdata( base[i] ); - def[i] = jamstate; - } + /* generate jam state's base index */ + mkdata( base[i] ); - dataend(); + for ( ++i /* skip jam state */; i <= total_states; ++i ) + { + mkdata( base[i] ); + def[i] = jamstate; + } - printf( tblend > MAX_SHORT ? ftl_long_decl : ftl_short_decl, - DEFARRAY, total_states + 1 ); + dataend(); - for ( i = 1; i <= total_states; ++i ) - mkdata( def[i] ); + printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + DEFARRAY, total_states + 1 ); - dataend(); + for ( i = 1; i <= total_states; ++i ) + mkdata( def[i] ); - printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl, - NEXTARRAY, tblend + 1 ); + dataend(); - for ( i = 1; i <= tblend; ++i ) - { - if ( nxt[i] == 0 ) - nxt[i] = jamstate; /* new state is the JAM state */ + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + NEXTARRAY, tblend + 1 ); - mkdata( nxt[i] ); - } + for ( i = 1; i <= tblend; ++i ) + { + if ( nxt[i] == 0 || chk[i] == 0 ) + nxt[i] = jamstate; /* new state is the JAM state */ - dataend(); + mkdata( nxt[i] ); + } - printf( lastdfa > MAX_SHORT ? ftl_long_decl : ftl_short_decl, - CHECKARRAY, tblend + 1 ); + dataend(); - for ( i = 1; i <= tblend; ++i ) - { - if ( chk[i] == 0 ) - ++nummt; + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + CHECKARRAY, tblend + 1 ); - mkdata( chk[i] ); - } + for ( i = 1; i <= tblend; ++i ) + { + if ( chk[i] == 0 ) + ++nummt; - dataend(); + mkdata( chk[i] ); } + + dataend(); } @@ -718,10 +763,11 @@ genecs() { register int i, j; - static char ftl_char_decl[] = "static char %c[%d] =\n { 0,\n"; + static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; int numrows; + char clower(); - printf( ftl_char_decl, ECARRAY, CSIZE + 1 ); + printf( C_char_decl, ECARRAY, CSIZE + 1 ); for ( i = 1; i <= CSIZE; ++i ) { @@ -830,11 +876,20 @@ make_tables() } if ( fullspd || fulltbl ) + { skelout(); - /* compute the tables and copy them to output file */ - if ( fullspd ) - genctbl(); + if ( num_backtracking > 0 ) + { + printf( "#define FLEX_USES_BACKTRACKING\n" ); + printf( "#define YY_BACK_TRACK %d\n", accnum + 1 ); + } + + if ( fullspd ) + genctbl(); + else + genftbl(); + } else gentabs(); @@ -1223,6 +1278,7 @@ ntod() int *nset, *dset; int targptr, totaltrans, i, comstate, comfreq, targ; int *epsclosure(), snstods(), symlist[CSIZE + 1]; + int num_start_states; /* this is so find_table_space(...) will know where to start looking in * chk/nxt for unused records for space to put in the state @@ -1291,7 +1347,9 @@ ntod() /* create the first states */ - for ( i = 1; i <= lastsc * 2; ++i ) + num_start_states = lastsc * 2; + + for ( i = 1; i <= num_start_states; ++i ) { numstates = 1; @@ -1322,6 +1380,7 @@ ntod() flexfatal( "could not create unique end-of-buffer state" ); numas += 1; + ++num_start_states; todo[todo_next] = end_of_buffer_state; ADD_QUEUE_ELEMENT(todo_next); @@ -1413,16 +1472,20 @@ ntod() state[i] = state[j]; } + if ( ds > num_start_states ) + check_for_backtracking( ds, state ); + if ( fulltbl ) { /* supply array's 0-element */ if ( ds == end_of_buffer_state ) - mk2data( 0 ); + mk2data( -end_of_buffer_state ); else mk2data( end_of_buffer_state ); for ( i = 1; i <= numecs; ++i ) - mk2data( state[i] ); + /* jams are marked by negative of state number */ + mk2data( state[i] ? state[i] : -ds ); /* force ',' and dataflush() next call to mk2data */ datapos = NUMDATAITEMS; @@ -1457,7 +1520,7 @@ ntod() if ( fulltbl ) dataend(); - else + else if ( ! fullspd ) { cmptmps(); /* create compressed template entries */ @@ -1536,7 +1599,7 @@ stack1( statenum, sym, nextstate, deflink ) int statenum, sym, nextstate, deflink; { - if ( onesp >= ONE_STACK_SIZE ) + if ( onesp >= ONE_STACK_SIZE - 1 ) mk1tbl( statenum, sym, nextstate, deflink ); else -- cgit v1.2.3 From 16ceb2c2884573b898d4c029ea4966bcfd6225e6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 8 May 1988 20:13:35 +0000 Subject: RCS header changed display style of non-printings from ^x to \0xx --- yylex.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/yylex.c b/yylex.c index 8e26254..cebf756 100644 --- a/yylex.c +++ b/yylex.c @@ -1,8 +1,5 @@ /* yylex - scanner front-end for flex */ -#include "flexdef.h" -#include "parse.h" - /* * Copyright (c) 1987, the University of California * @@ -15,6 +12,14 @@ * public, are made available for use by anyone. */ +#include "flexdef.h" +#include "parse.h" + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + /* yylex - scan for a regular expression token * * synopsis @@ -183,12 +188,8 @@ int yylex() case 29: case 30: case 31: - fprintf( stderr, "^%c", 'A' + yylval - 1 ); - break; - case 127: - (void) putc( '^', stderr ); - (void) putc( '@', stderr ); + fprintf( stderr, "\\%.3o", yylval ); break; default: -- cgit v1.2.3 From 6eb5d4c2674d037ac44a109c8684444421cd1523 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 9 May 1988 17:01:59 +0000 Subject: Initial revision --- gen.c | 590 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 590 insertions(+) create mode 100644 gen.c diff --git a/gen.c b/gen.c new file mode 100644 index 0000000..a17f709 --- /dev/null +++ b/gen.c @@ -0,0 +1,590 @@ +/* gen - actual generation (writing) of flex scanners */ + +/* + * Copyright (c) 1987, the University of California + * + * The United States Government has rights in this work pursuant to + * contract no. DE-AC03-76SF00098 between the United States Department of + * Energy and the University of California. + * + * This program may be redistributed. Enhancements and derivative works + * may be created provided the new works, if made available to the general + * public, are made available for use by anyone. + */ + +#include "flexdef.h" + +#ifndef lint +static char rcsid[] = + "@(#) $Header$ (LBL)"; +#endif + + +/* genctbl - generates full speed compressed transition table + * + * synopsis + * genctbl(); + */ + +genctbl() + + { + register int i; + + /* table of verify for transition and offset to next state */ + printf( "static struct yy_trans_info yy_transition[%d] =\n", + tblend + numecs + 1 ); + printf( " {\n" ); + + /* We want the transition to be represented as the offset to the + * next state, not the actual state number, which is what it currently is. + * The offset is base[nxt[i]] - base[chk[i]]. That's just the + * difference between the starting points of the two involved states + * (to - from). + * + * first, though, we need to find some way to put in our end-of-buffer + * flags and states. We do this by making a state with absolutely no + * transitions. We put it at the end of the table. + */ + /* at this point, we're guaranteed that there's enough room in nxt[] + * and chk[] to hold tblend + numecs entries. We need just two slots. + * One for the action and one for the end-of-buffer transition. We + * now *assume* that we're guaranteed the only character we'll try to + * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to + * make sure there's room for jam entries for other characters. + */ + + base[lastdfa + 1] = tblend + 2; + nxt[tblend + 1] = END_OF_BUFFER_ACTION; + chk[tblend + 1] = numecs + 1; + chk[tblend + 2] = 1; /* anything but EOB */ + nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */ + + /* make sure every state has a end-of-buffer transition and an action # */ + for ( i = 0; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; + + chk[base[i]] = EOB_POSITION; + chk[base[i] - 1] = ACTION_POSITION; + nxt[base[i] - 1] = anum ? anum : accnum + 1; /* action number */ + } + + dataline = 0; + datapos = 0; + + for ( i = 0; i <= tblend; ++i ) + { + if ( chk[i] == EOB_POSITION ) + transition_struct_out( 0, base[lastdfa + 1] - i ); + + else if ( chk[i] == ACTION_POSITION ) + transition_struct_out( 0, nxt[i] ); + + else if ( chk[i] > numecs || chk[i] == 0 ) + transition_struct_out( 0, 0 ); /* unused slot */ + + else /* verify, transition */ + transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) ); + } + + + /* here's the final, end-of-buffer state */ + transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); + transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); + + printf( " };\n" ); + printf( "\n" ); + + /* table of pointers to start states */ + printf( "static struct yy_trans_info *yy_state_ptr[%d] =\n", + lastsc * 2 + 1 ); + printf( " {\n" ); + + for ( i = 0; i <= lastsc * 2; ++i ) + printf( " &yy_transition[%d],\n", base[i] ); + + printf( " };\n" ); + + if ( useecs ) + genecs(); + } + + +/* genftbl - generates full transition table + * + * synopsis + * genftbl(); + */ + +genftbl() + + { + register int i; + + /* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays + */ + static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; + static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; + +#ifdef UNSIGNED_CHAR + printf( C_short_decl, ALIST, lastdfa + 1 ); +#else + printf( accnum > 127 ? C_short_decl : C_char_decl, ALIST, lastdfa + 1 ); +#endif + + for ( i = 1; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; + + if ( i == end_of_buffer_state ) + mkdata( END_OF_BUFFER_ACTION ); + + else + mkdata( anum ? anum : accnum + 1 ); + + if ( trace && anum ) + fprintf( stderr, "state # %d accepts: [%d]\n", i, anum ); + } + + dataend(); + + if ( useecs ) + genecs(); + + /* don't have to dump the actual full table entries - they were created + * on-the-fly + */ + } + + +/* gentabs - generate data statements for the transition tables + * + * synopsis + * gentabs(); + */ + +gentabs() + + { + int i, j, k, *accset, nacc, *acc_array, total_states; + + /* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays + */ + static char C_long_decl[] = "static long int %c[%d] =\n { 0,\n"; + static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; + static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; + + acc_array = allocate_integer_array( current_max_dfas ); + nummt = 0; + + printf( "#define YY_JAM %d\n", jamstate ); + printf( "#define YY_JAM_BASE %d\n", jambase ); + + if ( usemecs ) + printf( "#define YY_TEMPLATE %d\n", lastdfa + 2 ); + + if ( reject ) + { + /* write out accepting list and pointer list + * first we generate the ACCEPT array. In the process, we compute + * the indices that will go into the ALIST array, and save the + * indices in the dfaacc array + */ + + printf( accnum > 127 ? C_short_decl : C_char_decl, + ACCEPT, max( numas, 1 ) + 1 ); + + j = 1; /* index into ACCEPT array */ + + for ( i = 1; i <= lastdfa; ++i ) + { + acc_array[i] = j; + + if ( accsiz[i] != 0 ) + { + accset = dfaacc[i].dfaacc_set; + nacc = accsiz[i]; + + if ( trace ) + fprintf( stderr, "state # %d accepts: ", i ); + + for ( k = 1; k <= nacc; ++k ) + { + ++j; + mkdata( accset[k] ); + + if ( trace ) + { + fprintf( stderr, "[%d]", accset[k] ); + + if ( k < nacc ) + fputs( ", ", stderr ); + else + putc( '\n', stderr ); + } + } + } + } + + /* add accepting number for the "jam" state */ + acc_array[i] = j; + + dataend(); + } + + else + { + for ( i = 1; i <= lastdfa; ++i ) + acc_array[i] = dfaacc[i].dfaacc_state; + + /* add accepting number for jam state */ + acc_array[i] = 0; + } + + /* spit out ALIST array. If we're doing "reject", it'll be pointers + * into the ACCEPT array. Otherwise it's actual accepting numbers. + * In either case, we just dump the numbers. + */ + + /* "lastdfa + 2" is the size of ALIST; includes room for C arrays + * beginning at 0 and for "jam" state + */ + k = lastdfa + 2; + + if ( reject ) + /* we put a "cap" on the table associating lists of accepting + * numbers with state numbers. This is needed because we tell + * where the end of an accepting list is by looking at where + * the list for the next state starts. + */ + ++k; + +#ifdef UNSIGNED_CHAR + printf( C_short_decl, ALIST, k ); +#else + printf( ((reject && numas > 126) || accnum > 127) ? + C_short_decl : C_char_decl, ALIST, k ); +#endif + + for ( i = 1; i <= lastdfa; ++i ) + { + mkdata( acc_array[i] ); + + if ( ! reject && trace && acc_array[i] ) + fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] ); + } + + /* add entry for "jam" state */ + mkdata( acc_array[i] ); + + if ( reject ) + /* add "cap" for the list */ + mkdata( acc_array[i] ); + + dataend(); + + if ( useecs ) + genecs(); + + if ( usemecs ) + { + /* write out meta-equivalence classes (used to index templates with) */ + + if ( trace ) + fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); + + printf( C_char_decl, MATCHARRAY, numecs + 1 ); + + for ( i = 1; i <= numecs; ++i ) + { + if ( trace ) + fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) ); + + mkdata( abs( tecbck[i] ) ); + } + + dataend(); + } + + total_states = lastdfa + numtemps; + + printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + BASEARRAY, total_states + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + register int d = def[i]; + + if ( base[i] == JAMSTATE ) + base[i] = jambase; + + if ( d == JAMSTATE ) + def[i] = jamstate; + + else if ( d < 0 ) + { + /* template reference */ + ++tmpuses; + def[i] = lastdfa - d + 1; + } + + mkdata( base[i] ); + } + + /* generate jam state's base index */ + mkdata( base[i] ); + + for ( ++i /* skip jam state */; i <= total_states; ++i ) + { + mkdata( base[i] ); + def[i] = jamstate; + } + + dataend(); + + printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + DEFARRAY, total_states + 1 ); + + for ( i = 1; i <= total_states; ++i ) + mkdata( def[i] ); + + dataend(); + + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + NEXTARRAY, tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( nxt[i] == 0 || chk[i] == 0 ) + nxt[i] = jamstate; /* new state is the JAM state */ + + mkdata( nxt[i] ); + } + + dataend(); + + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + CHECKARRAY, tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( chk[i] == 0 ) + ++nummt; + + mkdata( chk[i] ); + } + + dataend(); + } + + +/* generate equivalence-class tables */ + +genecs() + + { + register int i, j; + static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; + int numrows; + char clower(); + + printf( C_char_decl, ECARRAY, CSIZE + 1 ); + + for ( i = 1; i <= CSIZE; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; + + ecgroup[i] = abs( ecgroup[i] ); + mkdata( ecgroup[i] ); + } + + dataend(); + + if ( trace ) + { + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + + numrows = (CSIZE + 1) / 8; + + for ( j = 1; j <= numrows; ++j ) + { + for ( i = j; i <= CSIZE; i = i + numrows ) + { + char *readable_form(); + + fprintf( stderr, "%4s = %-2d", + readable_form( i ), ecgroup[i] ); + + putc( ' ', stderr ); + } + + putc( '\n', stderr ); + } + } + } + + +/* generate the code to find the next state */ + +gen_next_state() + + { + char *char_map = useecs ? "e[*yy_cp]" : "*yy_cp"; + + if ( fulltbl ) + { + printf( "while ( (yy_current_state = n[yy_current_state][%s]) > 0 )\n", + char_map ); + + puts( "\t{" ); + + if ( num_backtracking > 0 ) + { + puts( "\tif ( l[yy_current_state] )" ); + puts( "\t\t{" ); + puts( "\t\tyy_last_accepting_state = yy_current_state;" ); + puts( "\t\tyy_last_accepting_cpos = yy_cp;" ); + puts( "\t\t}" ); + } + + puts( "yy_cp++;" ); + puts( "\t}" ); + + puts( "yy_current_state = -yy_current_state;" ); + } + + else if ( fullspd ) + { + puts( "{" ); + puts( "register char yy_c;" ); + printf( "\nfor ( yy_c = %s;\n", char_map ); + puts( " (yy_trans_info = &yy_current_state[yy_c])->v == yy_c;" ); + printf( " yy_c = %s )\n", char_map ); + + puts( "\t{" ); + + puts( "\tyy_current_state += yy_trans_info->n;" ); + + if ( num_backtracking > 0 ) + { + puts( "\tif ( yy_current_state[-1].n )" ); + puts( "\t\t{" ); + puts( "yy_last_accepting_state = yy_current_state;" ); + puts( "yy_last_accepting_cpos = yy_c_buf_p;" ); + puts( "\t\t}" ); + } + + puts( "\t}" ); + puts( "}" ); + } + + else + { /* compressed */ + puts( "do" ); + + puts( "\t{" ); + printf( "\tregister char yy_c = %s;\n", char_map ); + puts( + "\twhile ( c[b[yy_current_state] + yy_sym] != yy_current_state )" ); + puts( "\t\t{" ); + puts( "yy_current_state = d[yy_current_state];" ); + + if ( usemecs ) + { + /* we've arrange it so that templates are never chained + * to one another. This means we can afford make a + * very simple test to see if we need to convert to + * yy_c's meta-equivalence class without worrying + * about erroneously looking up the meta-equivalence + * class twice + */ + puts( "\t\tif ( yy_current_state >= YY_TEMPLATE )" ); + puts( "\t\t\tyy_c = m[yy_c];" ); + } + + puts( "\t\t}" ); + + puts( "\tyy_current_state = n[b[yy_current_state] + yy_c];" ); + + if ( reject ) + puts( "\t*yy_state_ptr++ = yy_current_state;" ); + + puts( "\t}" ); + + if ( interactive ) + puts( "while ( b[yy_current_state] != YY_JAM_BASE );" ); + else + puts( "while ( yy_current_state != YY_JAM );" ); + } + } + + +/* make_tables - generate transition tables + * + * synopsis + * make_tables(); + * + * Generates transition tables and finishes generating output file + */ + +make_tables() + + { + if ( fullspd ) + { /* need to define YY_TRANS_OFFSET_TYPE as a size large + * enough to hold the biggest offset + */ + int total_table_size = tblend + numecs + 1; + + printf( "#define YY_TRANS_OFFSET_TYPE %s\n", + total_table_size > MAX_SHORT ? "long" : "short" ); + } + + skelout(); + + if ( fullspd || fulltbl ) + { + if ( num_backtracking > 0 ) + { + printf( "#define FLEX_USES_BACKTRACKING\n" ); + printf( "#define YY_BACK_TRACK %d\n", accnum + 1 ); + } + + if ( fullspd ) + genctbl(); + else + genftbl(); + } + + else + gentabs(); + + skelout(); + + (void) fclose( temp_action_file ); + temp_action_file = fopen( action_file_name, "r" ); + + /* copy prolog from action_file to output file */ + action_out(); + + skelout(); + + /* copy actions from action_file to output file */ + action_out(); + + skelout(); + + /* generate code to find next state */ + gen_next_state(); + + skelout(); + + /* copy remainder of input to output */ + + line_directive_out( stdout ); + (void) flexscan(); /* copy remainder of input to output */ + } -- cgit v1.2.3 From b8643800d49340b6a0662a7e6d83653138f98b3a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 25 Nov 1988 21:27:32 +0000 Subject: added ntod() --- dfa.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) diff --git a/dfa.c b/dfa.c index 29a8078..8e77e01 100644 --- a/dfa.c +++ b/dfa.c @@ -327,6 +327,289 @@ increase_max_dfas() } +/* ntod - convert an ndfa to a dfa + * + * synopsis + * ntod(); + * + * creates the dfa corresponding to the ndfa we've constructed. the + * dfa starts out in state #1. + */ +ntod() + + { + int *accset, ds, nacc, newds; + int duplist[CSIZE + 1], sym, hashval, numstates, dsize; + int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1]; + int *nset, *dset; + int targptr, totaltrans, i, comstate, comfreq, targ; + int *epsclosure(), snstods(), symlist[CSIZE + 1]; + int num_start_states; + + /* this is so find_table_space(...) will know where to start looking in + * chk/nxt for unused records for space to put in the state + */ + if ( fullspd ) + firstfree = 0; + + accset = allocate_integer_array( accnum + 1 ); + nset = allocate_integer_array( current_max_dfa_size ); + + todo_head = todo_next = 0; + +#define ADD_QUEUE_ELEMENT(element) \ + if ( ++element >= current_max_dfas ) \ + { /* check for queue overflowing */ \ + if ( todo_head == 0 ) \ + increase_max_dfas(); \ + else \ + element = 0; \ + } + +#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1)) + + for ( i = 0; i <= CSIZE; ++i ) + { + duplist[i] = NIL; + symlist[i] = false; + } + + for ( i = 0; i <= accnum; ++i ) + accset[i] = NIL; + + if ( trace ) + { + dumpnfa( scset[1] ); + fputs( "\n\nDFA Dump:\n\n", stderr ); + } + + inittbl(); + + if ( fullspd ) + { + for ( i = 0; i <= numecs; ++i ) + state[i] = 0; + place_state( state, 0, 0 ); + } + + if ( fulltbl ) + { + /* declare it "short" because it's a real long-shot that that + * won't be large enough + */ + printf( "static short int %c[][%d] =\n {\n", NEXTARRAY, + numecs + 1 ); /* '}' so vi doesn't get too confused */ + + /* generate 0 entries for state #0 */ + for ( i = 0; i <= numecs; ++i ) + mk2data( 0 ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + + /* create the first states */ + + num_start_states = lastsc * 2; + + for ( i = 1; i <= num_start_states; ++i ) + { + numstates = 1; + + /* for each start condition, make one state for the case when + * we're at the beginning of the line (the '%' operator) and + * one for the case when we're not + */ + if ( i % 2 == 1 ) + nset[numstates] = scset[(i / 2) + 1]; + else + nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); + + nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) + { + numas = numas + nacc; + totnst = totnst + numstates; + + todo[todo_next] = ds; + ADD_QUEUE_ELEMENT(todo_next); + } + } + + if ( ! fullspd ) + { + if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) + flexfatal( "could not create unique end-of-buffer state" ); + + numas += 1; + ++num_start_states; + + todo[todo_next] = end_of_buffer_state; + ADD_QUEUE_ELEMENT(todo_next); + } + + while ( todo_head != todo_next ) + { + targptr = 0; + totaltrans = 0; + + for ( i = 1; i <= numecs; ++i ) + state[i] = 0; + + ds = todo[todo_head]; + todo_head = NEXT_QUEUE_ELEMENT(todo_head); + + dset = dss[ds]; + dsize = dfasiz[ds]; + + if ( trace ) + fprintf( stderr, "state # %d:\n", ds ); + + sympartition( dset, dsize, symlist, duplist ); + + for ( sym = 1; sym <= numecs; ++sym ) + { + if ( symlist[sym] ) + { + symlist[sym] = 0; + + if ( duplist[sym] == NIL ) + { /* symbol has unique out-transitions */ + numstates = symfollowset( dset, dsize, sym, nset ); + nset = epsclosure( nset, &numstates, accset, + &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, + nacc, hashval, &newds ) ) + { + totnst = totnst + numstates; + todo[todo_next] = newds; + ADD_QUEUE_ELEMENT(todo_next); + numas = numas + nacc; + } + + state[sym] = newds; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, newds ); + + targfreq[++targptr] = 1; + targstate[targptr] = newds; + ++numuniq; + } + + else + { + /* sym's equivalence class has the same transitions + * as duplist(sym)'s equivalence class + */ + targ = state[duplist[sym]]; + state[sym] = targ; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, targ ); + + /* update frequency count for destination state */ + + i = 0; + while ( targstate[++i] != targ ) + ; + + ++targfreq[i]; + ++numdup; + } + + ++totaltrans; + duplist[sym] = NIL; + } + } + + numsnpairs = numsnpairs + totaltrans; + + if ( caseins && ! useecs ) + { + register int j; + + for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) + state[i] = state[j]; + } + + if ( ds > num_start_states ) + check_for_backtracking( ds, state ); + + if ( fulltbl ) + { + /* supply array's 0-element */ + if ( ds == end_of_buffer_state ) + mk2data( -end_of_buffer_state ); + else + mk2data( end_of_buffer_state ); + + for ( i = 1; i <= numecs; ++i ) + /* jams are marked by negative of state number */ + mk2data( state[i] ? state[i] : -ds ); + + /* force ',' and dataflush() next call to mk2data */ + datapos = NUMDATAITEMS; + + /* force extra blank line next dataflush() */ + dataline = NUMDATALINES; + } + + else if ( fullspd ) + place_state( state, ds, totaltrans ); + + else if ( ds == end_of_buffer_state ) + /* special case this state to make sure it does what it's + * supposed to, i.e., jam on end-of-buffer + */ + stack1( ds, 0, 0, JAMSTATE ); + + else /* normal, compressed state */ + { + /* determine which destination state is the most common, and + * how many transitions to it there are + */ + + comfreq = 0; + comstate = 0; + + for ( i = 1; i <= targptr; ++i ) + if ( targfreq[i] > comfreq ) + { + comfreq = targfreq[i]; + comstate = targstate[i]; + } + + bldtbl( state, ds, totaltrans, comstate, comfreq ); + } + } + + if ( fulltbl ) + dataend(); + + else if ( ! fullspd ) + { + cmptmps(); /* create compressed template entries */ + + /* create tables for all the states with only one out-transition */ + while ( onesp > 0 ) + { + mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], + onedef[onesp] ); + --onesp; + } + + mkdeftbl(); + } + + } + + /* snstods - converts a set of ndfa states into a dfa state * * synopsis -- cgit v1.2.3 From 64f79934309a057589406ecbd3229b5287007e64 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 25 Nov 1988 21:28:36 +0000 Subject: Added END_OF_BUFFER_ACTION and bol_needed --- flexdef.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/flexdef.h b/flexdef.h index 7df8148..58c3012 100644 --- a/flexdef.h +++ b/flexdef.h @@ -39,11 +39,10 @@ char *memset(); #ifndef SV #include -#endif - #ifdef lint char *sprintf(); /* keep lint happy */ #endif +#endif /* maximum line length we'll have to deal with */ @@ -72,11 +71,6 @@ char *sprintf(); /* keep lint happy */ #define FAST_SKELETON_FILE "flex.fastskel" #endif -/* special internal nxt[] action number for the "at the end of the - * input buffer" state - */ -#define END_OF_BUFFER_ACTION 0 - /* special chk[] values marking the slots taking by end-of-buffer and action * numbers */ @@ -478,12 +472,13 @@ extern char *ccltbl; * numdup - number of duplicate transitions * hshsave - number of hash collisions saved by checking number of states * num_backtracking - number of DFA states requiring back-tracking + * bol_needed - whether scanner needs beginning-of-line recognition */ extern char *starttime, *endtime, nmstr[MAXLINE]; extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -extern int num_backtracking; +extern int num_backtracking, bol_needed; char *allocate_array(), *reallocate_array(); -- cgit v1.2.3 From cab3c88cf1d0762ebedf14d3bf972003c0da7ee9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 25 Nov 1988 21:30:05 +0000 Subject: added -p flag generation of #define's for scanner --- main.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/main.c b/main.c index 1e4f473..feb112f 100644 --- a/main.c +++ b/main.c @@ -51,7 +51,7 @@ char *ccltbl; char *starttime, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -int num_backtracking; +int num_backtracking, bol_needed; FILE *temp_action_file; int end_of_buffer_state; #ifndef SHORT_FILE_NAMES @@ -88,6 +88,8 @@ char **argv; /* note, flexend does not return. It exits with its argument as status. */ flexend( 0 ); + + /*NOTREACHED*/ } @@ -134,10 +136,15 @@ int status; if ( num_backtracking == 0 ) fprintf( stderr, " No backtracking\n" ); - else + else if ( fullspd || fulltbl ) fprintf( stderr, " %d backtracking (non-accepting) states\n", num_backtracking ); - + else + fprintf( stderr, " compressed tables always backtrack\n" ); + + if ( bol_needed ) + fprintf( stderr, " Beginning-of-line patterns used\n" ); + fprintf( stderr, " %d/%d start conditions\n", lastsc, current_max_scs ); fprintf( stderr, " %d epsilon states, %d double epsilon states\n", @@ -418,6 +425,7 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; num_backtracking = onesp = numprots = 0; + bol_needed = false; linenum = sectnum = 1; firstprot = NIL; @@ -463,27 +471,15 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ readin() { - fputs( "#define YY_DEFAULT_ACTION ", stdout ); - - if ( spprdflt ) - fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )", stdout ); - else - fputs( "ECHO", stdout ); - - fputs( ";\n", stdout ); - if ( ddebug ) puts( "#define FLEX_DEBUG" ); - if ( useecs ) - puts( "#define FLEX_USE_ECS" ); - if ( usemecs ) - puts( "#define FLEX_USE_MECS" ); - if ( interactive ) - puts( "#define FLEX_INTERACTIVE_SCANNER" ); - if ( reject ) - puts( "#define FLEX_REJECT_ENABLED" ); + if ( fulltbl ) puts( "#define FLEX_FULL_TABLE" ); + else if ( fullspd ) + puts( "#define FLEX_FAST_COMPRESSED" ); + else + puts( "#define FLEX_COMPRESSED" ); skelout(); -- cgit v1.2.3 From a3d014f763f47eec1265a918a5f1a4c7cc014691 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 13:56:03 +0000 Subject: updated for beta release --- README | 41 +++++++++-------------------------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/README b/README index dabbaaf..3c77332 100644 --- a/README +++ b/README @@ -1,27 +1,3 @@ -This is the initial release of flex, a replacement for the lex(1) -tool. As the copyright indicates, this distribution can be freely -redistributed. - -Some notes on the distribution: - - Yes, there are some niggling lex features which are not available which - seem like they'd be easy to add. They're not, or if they are then the - straight-forward implementation of them would slow down the scanner. - Unfortunately I am unable to do any further work on flex other than bug - fixes, so if there's something you've just gotta have, you'd better - be willing to dive into the code. I'll be happy to give (fairly - high-level) advice on how to proceed. - - The compressed tables have been tested pretty thoroughly in the past, - though may be suffering from bit-rot. The fast/full tables have been - recently implemented and are more likely to have bugs. - - For a System V machine, add the #define "SV". Not guaranteed to do - the full job, but a step in the right direction. - - Flex has been successfully ported to Sun Unix and 4.3BSD Vax Unix. - - The flex distribution consists of the following files: README This message @@ -78,7 +54,8 @@ and wire in the full pathname of where you are going to keep the include files. Decide where you want to keep {flex.skel,flex.fastskel} (suggestion: /usr/local/lib) and move it there. Edit "Makefile" and change the definitions of SKELETON_FILE and F_SKELETON_FILE to reflect the full -pathnames of {flex.skel,flex.fastskel}. +pathnames of {flex.skel,flex.fastskel}. For a System V machine, add +"-DSV" to CFLAGS. To make flex for the first time, use: @@ -101,11 +78,11 @@ you're done. To rebuild it, do where "..." is one of: - -ist -c - -ist -ce - -ist -cm - -ist -cfe - -ist -cFe + -c + -ce + -cm + -cfe + -cFe and testing using: @@ -119,10 +96,10 @@ Format the manual entry using Please send problems and feedback to: - vern@lbl-{csam,rtsg}.arpa or ucbvax!lbl-csam.arpa!vern + vern@{csam.lbl.gov,rtsg.ee.lbl.gov} or ucbvax!csam.lbl.gov!vern Vern Paxson - Real Time Systems Group + Real Time Systems Bldg. 46A Lawrence Berkeley Laboratory 1 Cyclotron Rd. -- cgit v1.2.3 From c5af13dd429d71393f914df3a9b1444247a880d6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 13:56:38 +0000 Subject: list_character_set() modified to take a FILE to write to ... --- ccl.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/ccl.c b/ccl.c index 48addb3..7d1b552 100644 --- a/ccl.c +++ b/ccl.c @@ -71,7 +71,7 @@ int cclinit() { if ( ++lastccl >= current_maxccls ) { - current_maxccls += MAXCCLS_INCREMENT; + current_maxccls += MAX_CCLS_INCREMENT; ++num_reallocs; @@ -118,21 +118,23 @@ int cclp; * * synopsis * int cset[CSIZE + 1]; + * FILE *file; * list_character_set( cset ); * - * writes to stderr a character-class representation of those characters - * present in the given set. A character is present if it has a non-zero - * value in the set array. + * writes to the given file a character-class representation of those + * characters present in the given set. A character is present if it + * has a non-zero value in the set array. */ -list_character_set( cset ) +list_character_set( file, cset ) +FILE *file; int cset[]; { register int i; char *readable_form(); - putc( '[', stderr ); + putc( '[', file ); for ( i = 1; i <= CSIZE; ++i ) { @@ -140,20 +142,20 @@ int cset[]; { register int start_char = i; - putc( ' ', stderr ); + putc( ' ', file ); - fputs( readable_form( i ), stderr ); + fputs( readable_form( i ), file ); while ( ++i <= CSIZE && cset[i] ) ; if ( i - 1 > start_char ) /* this was a run */ - fprintf( stderr, "-%s", readable_form( i - 1 ) ); + fprintf( file, "-%s", readable_form( i - 1 ) ); - putc( ' ', stderr ); + putc( ' ', file ); } } - putc( ']', stderr ); + putc( ']', file ); } -- cgit v1.2.3 From 74139db5555c4794065728c8a4339de09437647b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:01:30 +0000 Subject: added backtrack report added checking for dangerous trailing context considerable minor cleanup --- dfa.c | 199 +++++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 124 insertions(+), 75 deletions(-) diff --git a/dfa.c b/dfa.c index 8e77e01..796a8d3 100644 --- a/dfa.c +++ b/dfa.c @@ -40,17 +40,79 @@ int state[]; { /* state is non-accepting */ ++num_backtracking; - if ( performance_report ) + if ( backtrack_report ) { - fprintf( stderr, "State #%d is non-accepting -\n", ds ); + fprintf( backtrack_file, "State #%d is non-accepting -\n", ds ); /* identify the state */ - dump_associated_rules( ds ); + dump_associated_rules( backtrack_file, ds ); /* now identify it further using the out- and jam-transitions */ - dump_transitions( state ); + dump_transitions( backtrack_file, state ); - putc( '\n', stderr ); + putc( '\n', backtrack_file ); + } + } + } + + +/* check_trailing_context - check to see if NFA state set constitutes + * "dangerous" trailing context + * + * synopsis + * int nfa_states[num_states+1], num_states; + * int accset[nacc+1], nacc; + * int check_trailing_context(); + * true/false = check_trailing_context( nfa_states, num_states, + * accset, nacc ); + * + * NOTES + * Trailing context is "dangerous" if both the head and the trailing + * part are of variable size \and/ there's a DFA state which contains + * both an accepting state for the head part of the rule and NFA states + * which occur after the beginning of the trailing context. + * When such a rule is matched, it's impossible to tell if having been + * in the DFA state indicates the beginning of the trailing context + * or further-along scanning of the pattern. In these cases, a warning + * message is issued. + * + * nfa_states[1 .. num_states] is the list of NFA states in the DFA. + * accset[1 .. nacc] is the list of accepting numbers for the DFA state. + */ + +int check_trailing_context( nfa_states, num_states, accset, nacc ) +int *nfa_states, num_states; +int *accset; +register int nacc; + + { + register int i, j; + + for ( i = 1; i <= num_states; ++i ) + { + int ns = nfa_states[i]; + register int type = state_type[ns]; + register int ar = assoc_rule[ns]; + + if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) + { /* do nothing */ + } + + else if ( type == STATE_TRAILING_CONTEXT ) + { + /* potential trouble. Scan set of accepting numbers for + * the one marking the end of the "head". We assume that + * this looping will be fairly cheap since it's rare that + * an accepting number set is large. + */ + for ( j = 1; j <= nacc; ++j ) + if ( accset[j] & YY_TRAILING_HEAD_MASK ) + { + fprintf( stderr, + "flex: Dangerous trailing context in rule at line %d\n", + rule_linenum[ar] ); + return; + } } } } @@ -60,51 +122,53 @@ int state[]; * * synopisis * int ds; - * dump_associated_rules( ds ); + * FILE *file; + * dump_associated_rules( file, ds ); * * goes through the set of NFA states associated with the DFA and * extracts the first MAX_ASSOC_RULES unique rules, sorts them, - * and writes a report to stderr + * and writes a report to the given file */ -dump_associated_rules( ds ) +dump_associated_rules( file, ds ) +FILE *file; int ds; { register int i, j; - register int rule_set[MAX_ASSOC_RULES + 1]; - register int num_rules = 0; + register int num_associated_rules = 0; + int rule_set[MAX_ASSOC_RULES + 1]; int *dset = dss[ds]; int size = dfasiz[ds]; for ( i = 1; i <= size; ++i ) { - register rule_num = assoc_rule[dset[i]]; + register rule_num = rule_linenum[assoc_rule[dset[i]]]; - for ( j = 1; j <= num_rules; ++j ) + for ( j = 1; j <= num_associated_rules; ++j ) if ( rule_num == rule_set[j] ) break; - if ( j > num_rules ) + if ( j > num_associated_rules ) { /* new rule */ - if ( num_rules < MAX_ASSOC_RULES ) - rule_set[++num_rules] = rule_num; + if ( num_associated_rules < MAX_ASSOC_RULES ) + rule_set[++num_associated_rules] = rule_num; } } - bubble( rule_set, num_rules ); + bubble( rule_set, num_associated_rules ); - fprintf( stderr, " associated rules:" ); + fprintf( file, " associated rules:" ); - for ( i = 1; i <= num_rules; ++i ) + for ( i = 1; i <= num_associated_rules; ++i ) { if ( i % 8 == 1 ) - putc( '\n', stderr ); + putc( '\n', file ); - fprintf( stderr, "\t%d", rule_set[i] ); + fprintf( file, "\t%d", rule_set[i] ); } - putc( '\n', stderr ); + putc( '\n', file ); } @@ -112,14 +176,17 @@ int ds; * * synopisis * int state[numecs]; - * dump_transitions( state ); + * FILE *file; + * dump_transitions( file, state ); * * goes through the set of out-transitions and lists them in human-readable * form (i.e., not as equivalence classes); also lists jam transitions - * (i.e., all those which are not out-transitions, plus EOF) + * (i.e., all those which are not out-transitions, plus EOF). The dump + * is done to the given file. */ -dump_transitions( state ) +dump_transitions( file, state ) +FILE *file; int state[]; { @@ -136,26 +203,26 @@ int state[]; out_char_set[i] = state[ec]; } - fprintf( stderr, " out-transitions: " ); + fprintf( file, " out-transitions: " ); - list_character_set( out_char_set ); + list_character_set( file, out_char_set ); /* now invert the members of the set to get the jam transitions */ for ( i = 1; i <= CSIZE; ++i ) out_char_set[i] = ! out_char_set[i]; - fprintf( stderr, "\n jam-transitions: EOF " ); + fprintf( file, "\n jam-transitions: EOF " ); - list_character_set( out_char_set ); + list_character_set( file, out_char_set ); - putc( '\n', stderr ); + putc( '\n', file ); } /* epsclosure - construct the epsilon closure of a set of ndfa states * * synopsis - * int t[current_max_dfa_size], numstates, accset[accnum + 1], nacc; + * int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc; * int hashval; * int *epsclosure(); * t = epsclosure( t, &numstates, accset, &nacc, &hashval ); @@ -299,8 +366,6 @@ int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; increase_max_dfas() { - int old_max = current_max_dfas; - current_max_dfas += MAX_DFAS_INCREMENT; ++num_reallocs; @@ -310,20 +375,8 @@ increase_max_dfas() dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); accsiz = reallocate_integer_array( accsiz, current_max_dfas ); dhash = reallocate_integer_array( dhash, current_max_dfas ); - todo = reallocate_integer_array( todo, current_max_dfas ); dss = reallocate_int_ptr_array( dss, current_max_dfas ); dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); - - /* fix up todo queue */ - if ( todo_next < todo_head ) - { /* queue was wrapped around the end */ - register int i; - - for ( i = 0; i < todo_next; ++i ) - todo[old_max + i] = todo[i]; - - todo_next += old_max; - } } @@ -345,6 +398,7 @@ ntod() int targptr, totaltrans, i, comstate, comfreq, targ; int *epsclosure(), snstods(), symlist[CSIZE + 1]; int num_start_states; + int todo_head, todo_next; /* this is so find_table_space(...) will know where to start looking in * chk/nxt for unused records for space to put in the state @@ -352,29 +406,24 @@ ntod() if ( fullspd ) firstfree = 0; - accset = allocate_integer_array( accnum + 1 ); + accset = allocate_integer_array( num_rules + 1 ); nset = allocate_integer_array( current_max_dfa_size ); + /* the "todo" queue is represented by the head, which is the DFA + * state currently being processed, and the "next", which is the + * next DFA state number available (not in use). We depend on the + * fact that snstods() returns DFA's \in increasing order/, and thus + * need only know the bounds of the dfas to be processed. + */ todo_head = todo_next = 0; -#define ADD_QUEUE_ELEMENT(element) \ - if ( ++element >= current_max_dfas ) \ - { /* check for queue overflowing */ \ - if ( todo_head == 0 ) \ - increase_max_dfas(); \ - else \ - element = 0; \ - } - -#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1)) - for ( i = 0; i <= CSIZE; ++i ) { duplist[i] = NIL; symlist[i] = false; } - for ( i = 0; i <= accnum; ++i ) + for ( i = 0; i <= num_rules; ++i ) accset[i] = NIL; if ( trace ) @@ -397,7 +446,7 @@ ntod() /* declare it "short" because it's a real long-shot that that * won't be large enough */ - printf( "static short int %c[][%d] =\n {\n", NEXTARRAY, + printf( "static short int %s[][%d] =\n {\n", NEXTARRAY, numecs + 1 ); /* '}' so vi doesn't get too confused */ /* generate 0 entries for state #0 */ @@ -432,11 +481,12 @@ ntod() if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) { - numas = numas + nacc; - totnst = totnst + numstates; + numas += nacc; + totnst += numstates; + ++todo_next; - todo[todo_next] = ds; - ADD_QUEUE_ELEMENT(todo_next); + if ( variable_trailing_context_rules && nacc > 0 ) + check_trailing_context( nset, numstates, accset, nacc ); } } @@ -445,14 +495,12 @@ ntod() if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) flexfatal( "could not create unique end-of-buffer state" ); - numas += 1; + ++numas; ++num_start_states; - - todo[todo_next] = end_of_buffer_state; - ADD_QUEUE_ELEMENT(todo_next); + ++todo_next; } - while ( todo_head != todo_next ) + while ( todo_head < todo_next ) { targptr = 0; totaltrans = 0; @@ -460,8 +508,7 @@ ntod() for ( i = 1; i <= numecs; ++i ) state[i] = 0; - ds = todo[todo_head]; - todo_head = NEXT_QUEUE_ELEMENT(todo_head); + ds = ++todo_head; dset = dss[ds]; dsize = dfasiz[ds]; @@ -487,9 +534,12 @@ ntod() nacc, hashval, &newds ) ) { totnst = totnst + numstates; - todo[todo_next] = newds; - ADD_QUEUE_ELEMENT(todo_next); - numas = numas + nacc; + ++todo_next; + numas += nacc; + + if ( variable_trailing_context_rules && nacc > 0 ) + check_trailing_context( nset, numstates, + accset, nacc ); } state[sym] = newds; @@ -606,14 +656,13 @@ ntod() mkdeftbl(); } - } /* snstods - converts a set of ndfa states into a dfa state * * synopsis - * int sns[numstates], numstates, newds, accset[accnum + 1], nacc, hashval; + * int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval; * int snstods(); * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds ); * @@ -724,7 +773,7 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; else { /* find lowest numbered rule so the disambiguating rule will work */ - j = accnum + 1; + j = num_rules + 1; for ( i = 1; i <= nacc; ++i ) if ( accset[i] < j ) -- cgit v1.2.3 From 27a1d11a83ff3b7f4808a3427e0a0a3125bb5ffe Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:05:17 +0000 Subject: a zillion changes/additions/cleanups --- flexdef.h | 117 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 25 deletions(-) diff --git a/flexdef.h b/flexdef.h index 58c3012..0d9dab8 100644 --- a/flexdef.h +++ b/flexdef.h @@ -115,14 +115,35 @@ char *sprintf(); /* keep lint happy */ * in the array. */ -#define ALIST 'l' /* points to list of rules accepted for a state */ -#define ACCEPT 'a' /* list of rules accepted for a state */ -#define ECARRAY 'e' /* maps input characters to equivalence classes */ -#define MATCHARRAY 'm' /* maps equivalence classes to meta-equivalence classes */ -#define BASEARRAY 'b' /* "base" array */ -#define DEFARRAY 'd' /* "default" array */ -#define NEXTARRAY 'n' /* "next" array */ -#define CHECKARRAY 'c' /* "check" array */ +/* points to list of rules accepted for a state */ +#define ALIST "yy_accept" +#define ACCEPT "yy_acclist" /* list of rules accepted for a state */ +#define ECARRAY "yy_ec" /* maps input characters to equivalence classes */ +/* maps equivalence classes to meta-equivalence classes */ +#define MATCHARRAY "yy_meta" +#define BASEARRAY "yy_base" /* "base" array */ +#define DEFARRAY "yy_def" /* "default" array */ +#define NEXTARRAY "yy_nxt" /* "next" array */ +#define CHECKARRAY "yy_chk" /* "check" array */ + + +/* a note on the following masks. They are used to mark accepting numbers + * as being special. As such, they implicitly limit the number of accepting + * numbers (i.e., rules) because if there are too many rules the rule numbers + * will overload the mask bits. Fortunately, this limit is \large/ (0x2000 == + * 8192) so unlikely to actually cause any problems. A check is made in + * new_rule() to ensure that this limit is not reached. + */ + +/* mask to mark a trailing context accepting number */ +#define YY_TRAILING_MASK 0x2000 + +/* mask to mark the accepting number of the "head" of a trailing context rule */ +#define YY_TRAILING_HEAD_MASK 0x4000 + +/* maximum number of rules, as outlined in the above note */ +#define MAX_RULE (YY_TRAILING_MASK - 1) + /* NIL must be 0. If not, its special meaning when making equivalence classes * (it marks the representative of a given e.c.) will be unidentifiable @@ -137,13 +158,16 @@ char *sprintf(); /* keep lint happy */ /* size of input alphabet - should be size of ASCII set */ #define CSIZE 127 -#define INITIAL_MAXCCLS 100 /* max number of unique character classes */ -#define MAXCCLS_INCREMENT 100 +#define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ +#define MAX_CCLS_INCREMENT 100 /* size of table holding members of character classes */ #define INITIAL_MAX_CCL_TBL_SIZE 500 #define MAX_CCL_TBL_SIZE_INCREMENT 250 +#define INITIAL_MAX_RULES 100 /* default maximum number of rules */ +#define MAX_RULES_INCREMENT 100 + #define INITIAL_MNS 2000 /* default maximum number of nfa states */ #define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */ @@ -289,34 +313,49 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * useecs - if true (-ce flag), use equivalence classes * fulltbl - if true (-cf flag), don't compress the DFA state table * usemecs - if true (-cm flag), use meta-equivalence classes - * reject - if true (-r flag), generate tables for REJECT macro * fullspd - if true (-F flag), use Jacobson method of table representation * gen_line_dirs - if true (i.e., no -L flag), generate #line directives * performance_report - if true (i.e., -p flag), generate a report relating * to scanner performance + * backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file + * listing backtracking states + * yymore_used - if true, yymore() is used in input rules + * reject - if true, generate backtracking tables for REJECT macro + * real_reject - if true, scanner really uses REJECT (as opposed to just + * having "reject" set for variable trailing context) + * yymore_really_used - has a REALLY_xxx value indicating whether a + * %used or %notused was used with yymore() + * reject_really_used - same for REJECT */ extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; -extern int interactive, caseins, useecs, fulltbl, usemecs, reject; -extern int fullspd, gen_line_dirs, performance_report; +extern int interactive, caseins, useecs, fulltbl, usemecs; +extern int fullspd, gen_line_dirs, performance_report, backtrack_report; +extern int yymore_used, reject, real_reject; + +#define REALLY_NOT_DETERMINED 0 +#define REALLY_USED 1 +#define REALLY_NOT_USED 2 +extern int yymore_really_used, reject_really_used; /* variables used in the flex input routines: * datapos - characters on current output line * dataline - number of contiguous lines of data in current data * statement. Used to generate readable -f output - * skelfile - fd of the skeleton file + * skelfile - the skeleton file * yyin - input file * temp_action_file - temporary file to hold actions + * backtrack_file - file to summarize backtracking states to * action_file_name - name of the temporary file * infilename - name of input file * linenum - current input line number */ extern int datapos, dataline, linenum; -extern FILE *skelfile, *yyin, *temp_action_file; +extern FILE *skelfile, *yyin, *temp_action_file, *backtrack_file; extern char *infilename; -extern char *action_file_name; +extern char action_file_name[]; /* variables for stack of states having only one out-transition: @@ -333,7 +372,9 @@ extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; /* variables for nfa machine data: * current_mns - current maximum on number of NFA states - * accnum - number of the last accepting state + * num_rules - number of the last accepting state; also is number of + * rules created so far + * current_max_rules - current maximum number of rules * lastnfa - last nfa state number created * firstst - physically the first state of a fragment * lastst - last physical state of fragment @@ -343,11 +384,40 @@ extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; * trans2 - 2nd transition state for epsilons * accptnum - accepting number * assoc_rule - rule associated with this NFA state (or 0 if none) + * state_type - a STATE_xxx type identifying whether the state is part + * of a normal rule, the leading state in a trailing context + * rule (i.e., the state which marks the transition from + * recognizing the text-to-be-matched to the beginning of + * the trailing context), or a subsequent state in a trailing + * context rule + * rule_type - a RULE_xxx type identifying whether this a a ho-hum + * normal rule or one which has variable head & trailing + * context + * rule_linenum - line number associated with rule + */ + +extern int current_mns, num_rules, current_max_rules, lastnfa; +extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; +extern int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; + +/* different types of states; values are useful as masks, as well, for + * routines like check_trailing_context() */ +#define STATE_NORMAL 0x1 +#define STATE_TRAILING_CONTEXT 0x2 -extern int current_mns, accnum, lastnfa; -extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; -extern int *accptnum, *assoc_rule; +/* global holding current type of state we're making */ + +extern int current_state_type; + +/* different types of rules */ +#define RULE_NORMAL 0 +#define RULE_VARIABLE 1 + +/* true if the input rules include a rule with both variable-length head + * and trailing context, false otherwise + */ +extern int variable_trailing_context_rules; /* variables for protos: @@ -411,11 +481,8 @@ extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; * -r is not given) * accsiz - size of accepting set for each dfa state * dhash - dfa state hash value - * todo - queue of DFAs still to be processed - * todo_head - head of todo queue - * todo_next - next available entry on todo queue * numas - number of DFA accepting states created; note that this - * is not necessarily the same value as accnum, which is the analogous + * is not necessarily the same value as num_rules, which is the analogous * value for the NFA * numsnpairs - number of state/nextstate transition pairs * jambase - position in base/def where the default jam table starts @@ -432,7 +499,7 @@ extern union dfaacc_union int *dfaacc_set; int dfaacc_state; } *dfaacc; -extern int *accsiz, *dhash, *todo, todo_head, todo_next, numas; +extern int *accsiz, *dhash, numas; extern int numsnpairs, jambase, jamstate; extern int end_of_buffer_state; -- cgit v1.2.3 From 4ef08d86daf586c928100a6bf18c499e88b3bd90 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:06:22 +0000 Subject: major overhaul for merged skeleton --- gen.c | 777 +++++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 600 insertions(+), 177 deletions(-) diff --git a/gen.c b/gen.c index a17f709..52a42a6 100644 --- a/gen.c +++ b/gen.c @@ -20,6 +20,87 @@ static char rcsid[] = #endif +static int indent_level = 0; /* each level is 4 spaces */ + +#define indent_up() (++indent_level) +#define indent_down() (--indent_level) +#define set_indent(indent_val) indent_level = indent_val + + + +/* indent to the current level */ + +do_indent() + + { + register int i = indent_level * 4; + + while ( i >= 8 ) + { + putchar( '\t' ); + i -= 8; + } + + while ( i > 0 ) + { + putchar( ' ' ); + --i; + } + } + + +/* generate the code to keep backtracking information */ + +gen_backtracking() + + { + if ( reject || num_backtracking == 0 ) + return; + + if ( fullspd ) + indent_puts( "if ( yy_current_state[-1].yy_nxt )" ); + else + indent_puts( "if ( yy_accept[yy_current_state] )" ); + + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_last_accepting_state = yy_current_state;" ); + indent_puts( "yy_last_accepting_cpos = yy_cp;" ); + indent_puts( "}" ); + indent_down(); + } + + +/* generate the code to perform the backtrack */ + +gen_bt_action() + + { + if ( reject || num_backtracking == 0 ) + return; + + set_indent( 4 ); + + indent_puts( "case 0: /* must backtrack */" ); + indent_puts( "/* undo the effects of YY_DO_BEFORE_ACTION */" ); + indent_puts( "*yy_cp = yy_hold_char;" ); + + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_last_accepting_cpos + 1;" ); + else + /* backtracking info for compressed tables is taken \after/ + * yy_cp has been incremented for the next state + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + indent_puts( "continue; /* go to \"YY_DO_BEFORE_ACTION\" */" ); + putchar( '\n' ); + + set_indent( 0 ); + } + + /* genctbl - generates full speed compressed transition table * * synopsis @@ -30,6 +111,7 @@ genctbl() { register int i; + int end_of_buffer_action = num_rules + 1; /* table of verify for transition and offset to next state */ printf( "static struct yy_trans_info yy_transition[%d] =\n", @@ -55,7 +137,7 @@ genctbl() */ base[lastdfa + 1] = tblend + 2; - nxt[tblend + 1] = END_OF_BUFFER_ACTION; + nxt[tblend + 1] = end_of_buffer_action; chk[tblend + 1] = numecs + 1; chk[tblend + 2] = 1; /* anything but EOB */ nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */ @@ -67,7 +149,7 @@ genctbl() chk[base[i]] = EOB_POSITION; chk[base[i] - 1] = ACTION_POSITION; - nxt[base[i] - 1] = anum ? anum : accnum + 1; /* action number */ + nxt[base[i] - 1] = anum; /* action number */ } dataline = 0; @@ -97,7 +179,7 @@ genctbl() printf( "\n" ); /* table of pointers to start states */ - printf( "static struct yy_trans_info *yy_state_ptr[%d] =\n", + printf( "static struct yy_trans_info *yy_start_state_list[%d] =\n", lastsc * 2 + 1 ); printf( " {\n" ); @@ -111,6 +193,170 @@ genctbl() } +/* generate equivalence-class tables */ + +genecs() + + { + register int i, j; + static char C_char_decl[] = "static char %s[%d] =\n { 0,\n"; + int numrows; + char clower(); + + printf( C_char_decl, ECARRAY, CSIZE + 1 ); + + for ( i = 1; i <= CSIZE; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; + + ecgroup[i] = abs( ecgroup[i] ); + mkdata( ecgroup[i] ); + } + + dataend(); + + if ( trace ) + { + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + + numrows = (CSIZE + 1) / 8; + + for ( j = 1; j <= numrows; ++j ) + { + for ( i = j; i <= CSIZE; i = i + numrows ) + { + char *readable_form(); + + fprintf( stderr, "%4s = %-2d", + readable_form( i ), ecgroup[i] ); + + putc( ' ', stderr ); + } + + putc( '\n', stderr ); + } + } + } + + +/* generate the code to find the action number */ + +gen_find_action() + + { + if ( fullspd ) + indent_puts( "yy_act = yy_current_state[-1].yy_nxt;" ); + + else if ( fulltbl ) + indent_puts( "yy_act = yy_accept[yy_current_state];" ); + + else if ( reject ) + { + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); + + puts( "find_rule: /* we branch to this label when backtracking */" ); + + indent_puts( "for ( ; ; ) /* until we find what rule we matched */" ); + + indent_up(); + + indent_puts( "{" ); + + indent_puts( "if ( yy_lp && yy_lp < yy_accept[yy_current_state + 1] )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_act = yy_acclist[yy_lp];" ); + + if ( variable_trailing_context_rules ) + { + indent_puts( "if ( yy_act & YY_TRAILING_HEAD_MASK ||" ); + indent_puts( " yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + + indent_puts( "if ( yy_act == yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_looking_for_trail_begin = 0;" ); + indent_puts( "yy_act &= ~YY_TRAILING_HEAD_MASK;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else if ( yy_act & YY_TRAILING_MASK )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( + "yy_looking_for_trail_begin = yy_act & ~YY_TRAILING_MASK;" ); + indent_puts( + "yy_looking_for_trail_begin |= YY_TRAILING_HEAD_MASK;" ); + + if ( real_reject ) + { + /* remember matched text in case we back up due to REJECT */ + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "++yy_lp;" ); + indent_puts( "goto find_rule;" ); + } + + else + { + /* remember matched text in case we back up due to trailing context + * plus REJECT + */ + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( "--yy_cp;" ); + + /* we could consolidate the following two lines with those at + * the beginning, but at the cost of complaints that we're + * branching inside a loop + */ + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); + + indent_puts( "}" ); + + indent_down(); + } + + else + /* compressed */ + indent_puts( "yy_act = yy_accept[yy_current_state];" ); + } + + /* genftbl - generates full transition table * * synopsis @@ -121,28 +367,23 @@ genftbl() { register int i; + int end_of_buffer_action = num_rules + 1; /* *everything* is done in terms of arrays starting at 1, so provide * a null entry for the zero element of all C arrays */ - static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; - static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; + static char C_short_decl[] = "static short int %s[%d] =\n { 0,\n"; -#ifdef UNSIGNED_CHAR printf( C_short_decl, ALIST, lastdfa + 1 ); -#else - printf( accnum > 127 ? C_short_decl : C_char_decl, ALIST, lastdfa + 1 ); -#endif + + + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; for ( i = 1; i <= lastdfa; ++i ) { register int anum = dfaacc[i].dfaacc_state; - if ( i == end_of_buffer_state ) - mkdata( END_OF_BUFFER_ACTION ); - - else - mkdata( anum ? anum : accnum + 1 ); + mkdata( anum ); if ( trace && anum ) fprintf( stderr, "state # %d accepts: [%d]\n", i, anum ); @@ -159,6 +400,205 @@ genftbl() } +/* generate the code to find the next compressed-table state */ + +gen_next_compressed_state() + + { + char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; + + indent_put2s( "register char yy_c = %s;", char_map ); + + /* save the backtracking info \before/ computing the next state + * because we always compute one more state than needed - we + * always proceed until we reach a jam state + */ + gen_backtracking(); + + indent_puts( + "while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_current_state = yy_def[yy_current_state];" ); + + if ( usemecs ) + { + /* we've arrange it so that templates are never chained + * to one another. This means we can afford make a + * very simple test to see if we need to convert to + * yy_c's meta-equivalence class without worrying + * about erroneously looking up the meta-equivalence + * class twice + */ + do_indent(); + /* lastdfa + 2 is the beginning of the templates */ + printf( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); + + indent_up(); + indent_puts( "yy_c = yy_meta[yy_c];" ); + indent_down(); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( + "yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];" ); + } + + +/* generate the code to find the next match */ + +gen_next_match() + + { /* NOTE - changes in here should be reflected in get_next_state() */ + char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; + char *char_map_2 = useecs ? "yy_ec[*++yy_cp]" : "*++yy_cp"; + + if ( fulltbl ) + { + indent_put2s( + "while ( (yy_current_state = yy_nxt[yy_current_state][%s]) > 0 )", + char_map ); + + indent_up(); + + if ( num_backtracking > 0 ) + { + indent_puts( "{" ); + gen_backtracking(); + putchar( '\n' ); + } + + indent_puts( "++yy_cp;" ); + + if ( num_backtracking > 0 ) + indent_puts( "}" ); + + indent_down(); + + putchar( '\n' ); + indent_puts( "yy_current_state = -yy_current_state;" ); + } + + else if ( fullspd ) + { + indent_puts( "{" ); + indent_puts( "register struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "register char yy_c;\n" ); + indent_put2s( "for ( yy_c = %s;", char_map ); + indent_puts( + " (yy_trans_info = &yy_current_state[yy_c])->yy_verify == yy_c;" ); + indent_put2s( " yy_c = %s )", char_map_2 ); + + indent_up(); + + if ( num_backtracking > 0 ) + indent_puts( "{" ); + + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + + if ( num_backtracking > 0 ) + { + putchar( '\n' ); + gen_backtracking(); + indent_puts( "}" ); + } + + indent_down(); + indent_puts( "}" ); + } + + else + { /* compressed */ + indent_puts( "do" ); + + indent_up(); + indent_puts( "{" ); + + gen_next_state(); + + indent_puts( "++yy_cp;" ); + + indent_puts( "}" ); + indent_down(); + + do_indent(); + + if ( interactive ) + printf( "while ( yy_base[yy_current_state] != %d );\n", jambase ); + else + printf( "while ( yy_current_state != %d );\n", jamstate ); + + if ( ! reject ) + { + /* do the guaranteed-needed backtrack to figure out the match */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + } + } + } + + +/* generate the code to find the next state */ + +gen_next_state() + + { /* NOTE - changes in here should be reflected in get_next_match() */ + char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; + + if ( fulltbl ) + { + indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];", + char_map ); + gen_backtracking(); + } + + else if ( fullspd ) + { + indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;", + char_map ); + gen_backtracking(); + } + + else + { + gen_next_compressed_state(); + + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + } + + +/* generate the code to find the start state */ + +gen_start_state() + + { + if ( fullspd ) + indent_put2s( "yy_current_state = yy_start_state_list[yy_start%s];", + bol_needed ? " + (yy_bp[-1] == '\\n' ? 1 : 0)" : "" ); + + else + { + indent_puts( "yy_current_state = yy_start;" ); + + if ( bol_needed ) + { + indent_puts( "if ( yy_bp[-1] == '\\n' )" ); + indent_up(); + indent_puts( "++yy_current_state;" ); + indent_down(); + } + + if ( reject ) + /* set up for storing up states */ + indent_puts( "yy_state_ptr = yy_state_buf;" ); + } + } + + /* gentabs - generate data statements for the transition tables * * synopsis @@ -169,33 +609,42 @@ gentabs() { int i, j, k, *accset, nacc, *acc_array, total_states; + int end_of_buffer_action = num_rules + 1; /* *everything* is done in terms of arrays starting at 1, so provide * a null entry for the zero element of all C arrays */ - static char C_long_decl[] = "static long int %c[%d] =\n { 0,\n"; - static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; - static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; + static char C_long_decl[] = "static long int %s[%d] =\n { 0,\n"; + static char C_short_decl[] = "static short int %s[%d] =\n { 0,\n"; + static char C_char_decl[] = "static char %s[%d] =\n { 0,\n"; acc_array = allocate_integer_array( current_max_dfas ); nummt = 0; - printf( "#define YY_JAM %d\n", jamstate ); - printf( "#define YY_JAM_BASE %d\n", jambase ); - - if ( usemecs ) - printf( "#define YY_TEMPLATE %d\n", lastdfa + 2 ); + /* the compressed table format jams by entering the "jam state", + * losing information about the previous state in the process. + * In order to recover the previous state, we effectively need + * to keep backtracking information. + */ + ++num_backtracking; if ( reject ) { /* write out accepting list and pointer list + * * first we generate the ACCEPT array. In the process, we compute * the indices that will go into the ALIST array, and save the * indices in the dfaacc array */ + int EOB_accepting_list[2]; - printf( accnum > 127 ? C_short_decl : C_char_decl, - ACCEPT, max( numas, 1 ) + 1 ); + printf( C_short_decl, ACCEPT, max( numas, 1 ) + 1 ); + + /* set up accepting structures for the End Of Buffer state */ + EOB_accepting_list[0] = 0; + EOB_accepting_list[1] = end_of_buffer_action; + accsiz[end_of_buffer_state] = 1; + dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; j = 1; /* index into ACCEPT array */ @@ -213,8 +662,20 @@ gentabs() for ( k = 1; k <= nacc; ++k ) { + int accnum = accset[k]; + ++j; - mkdata( accset[k] ); + + if ( variable_trailing_context_rules && accnum > 0 && + rule_type[accnum] == RULE_VARIABLE ) + { + /* special hack to flag accepting number as part + * of trailing context rule + */ + accnum |= YY_TRAILING_MASK; + } + + mkdata( accnum ); if ( trace ) { @@ -237,6 +698,8 @@ gentabs() else { + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; + for ( i = 1; i <= lastdfa; ++i ) acc_array[i] = dfaacc[i].dfaacc_state; @@ -262,12 +725,7 @@ gentabs() */ ++k; -#ifdef UNSIGNED_CHAR printf( C_short_decl, ALIST, k ); -#else - printf( ((reject && numas > 126) || accnum > 127) ? - C_short_decl : C_char_decl, ALIST, k ); -#endif for ( i = 1; i <= lastdfa; ++i ) { @@ -381,145 +839,30 @@ gentabs() } -/* generate equivalence-class tables */ +/* write out a formatted string (with a secondary string argument) at the + * current indentation level, adding a final newline + */ -genecs() +indent_put2s( fmt, arg ) +char fmt[], arg[]; { - register int i, j; - static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; - int numrows; - char clower(); - - printf( C_char_decl, ECARRAY, CSIZE + 1 ); - - for ( i = 1; i <= CSIZE; ++i ) - { - if ( caseins && (i >= 'A') && (i <= 'Z') ) - ecgroup[i] = ecgroup[clower( i )]; - - ecgroup[i] = abs( ecgroup[i] ); - mkdata( ecgroup[i] ); - } - - dataend(); - - if ( trace ) - { - fputs( "\n\nEquivalence Classes:\n\n", stderr ); - - numrows = (CSIZE + 1) / 8; - - for ( j = 1; j <= numrows; ++j ) - { - for ( i = j; i <= CSIZE; i = i + numrows ) - { - char *readable_form(); - - fprintf( stderr, "%4s = %-2d", - readable_form( i ), ecgroup[i] ); - - putc( ' ', stderr ); - } - - putc( '\n', stderr ); - } - } + do_indent(); + printf( fmt, arg ); + putchar( '\n' ); } -/* generate the code to find the next state */ +/* write out a string at the current indentation level, adding a final + * newline + */ -gen_next_state() +indent_puts( str ) +char str[]; { - char *char_map = useecs ? "e[*yy_cp]" : "*yy_cp"; - - if ( fulltbl ) - { - printf( "while ( (yy_current_state = n[yy_current_state][%s]) > 0 )\n", - char_map ); - - puts( "\t{" ); - - if ( num_backtracking > 0 ) - { - puts( "\tif ( l[yy_current_state] )" ); - puts( "\t\t{" ); - puts( "\t\tyy_last_accepting_state = yy_current_state;" ); - puts( "\t\tyy_last_accepting_cpos = yy_cp;" ); - puts( "\t\t}" ); - } - - puts( "yy_cp++;" ); - puts( "\t}" ); - - puts( "yy_current_state = -yy_current_state;" ); - } - - else if ( fullspd ) - { - puts( "{" ); - puts( "register char yy_c;" ); - printf( "\nfor ( yy_c = %s;\n", char_map ); - puts( " (yy_trans_info = &yy_current_state[yy_c])->v == yy_c;" ); - printf( " yy_c = %s )\n", char_map ); - - puts( "\t{" ); - - puts( "\tyy_current_state += yy_trans_info->n;" ); - - if ( num_backtracking > 0 ) - { - puts( "\tif ( yy_current_state[-1].n )" ); - puts( "\t\t{" ); - puts( "yy_last_accepting_state = yy_current_state;" ); - puts( "yy_last_accepting_cpos = yy_c_buf_p;" ); - puts( "\t\t}" ); - } - - puts( "\t}" ); - puts( "}" ); - } - - else - { /* compressed */ - puts( "do" ); - - puts( "\t{" ); - printf( "\tregister char yy_c = %s;\n", char_map ); - puts( - "\twhile ( c[b[yy_current_state] + yy_sym] != yy_current_state )" ); - puts( "\t\t{" ); - puts( "yy_current_state = d[yy_current_state];" ); - - if ( usemecs ) - { - /* we've arrange it so that templates are never chained - * to one another. This means we can afford make a - * very simple test to see if we need to convert to - * yy_c's meta-equivalence class without worrying - * about erroneously looking up the meta-equivalence - * class twice - */ - puts( "\t\tif ( yy_current_state >= YY_TEMPLATE )" ); - puts( "\t\t\tyy_c = m[yy_c];" ); - } - - puts( "\t\t}" ); - - puts( "\tyy_current_state = n[b[yy_current_state] + yy_c];" ); - - if ( reject ) - puts( "\t*yy_state_ptr++ = yy_current_state;" ); - - puts( "\t}" ); - - if ( interactive ) - puts( "while ( b[yy_current_state] != YY_JAM_BASE );" ); - else - puts( "while ( yy_current_state != YY_JAM );" ); - } + do_indent(); + puts( str ); } @@ -534,34 +877,94 @@ gen_next_state() make_tables() { + printf( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); + if ( fullspd ) - { /* need to define YY_TRANS_OFFSET_TYPE as a size large + { /* need to define the transet type as a size large * enough to hold the biggest offset */ int total_table_size = tblend + numecs + 1; - - printf( "#define YY_TRANS_OFFSET_TYPE %s\n", - total_table_size > MAX_SHORT ? "long" : "short" ); + char *trans_offset_type = + total_table_size > MAX_SHORT ? "long" : "short"; + + set_indent( 0 ); + indent_puts( "struct yy_trans_info" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "short yy_verify;" ); + + /* in cases where its sister yy_verify *is* a "yes, there is a + * transition", yy_nxt is the offset (in records) to the next state. + * In most cases where there is no transition, the value of yy_nxt + * is irrelevant. If yy_nxt is the -1th record of a state, though, + * then yy_nxt is the action number for that state + */ + + indent_put2s( "%s yy_nxt;", trans_offset_type ); + indent_puts( "};" ); + indent_down(); + + indent_puts( "typedef struct yy_trans_info *yy_state_type;" ); } - - skelout(); - if ( fullspd || fulltbl ) + else + indent_puts( "typedef int yy_state_type;" ); + + if ( fullspd ) + genctbl(); + + else if ( fulltbl ) + genftbl(); + + else + gentabs(); + + if ( reject ) { - if ( num_backtracking > 0 ) + /* declare state buffer variables */ + puts( "yy_trans_info yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); + puts( "char *yy_full_match;" ); + puts( "int yy_lp;" ); + + if ( variable_trailing_context_rules ) { - printf( "#define FLEX_USES_BACKTRACKING\n" ); - printf( "#define YY_BACK_TRACK %d\n", accnum + 1 ); + puts( "int yy_looking_for_trail_begin = 0;" ); + puts( "int yy_full_lp;" ); + puts( "int *yy_full_state;" ); + printf( "#define YY_TRAILING_MASK 0x%x\n", YY_TRAILING_MASK ); + printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", + YY_TRAILING_HEAD_MASK ); } - if ( fullspd ) - genctbl(); - else - genftbl(); - } + puts( "#define REJECT \\" ); + puts( "{ \\" ); + puts( + "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \\" ); + puts( + "yy_cp = yy_full_match; /* restore poss. backed-over text */ \\" ); + if ( variable_trailing_context_rules ) + { + puts( "yy_lp = yy_full_lp; /* restore orig. accepting pos. */ \\" ); + puts( + "yy_state_ptr = yy_full_state; /* restore orig. state */ \\" ); + puts( + "yy_current_state = *yy_state_ptr; /* restore curr. state */ \\" ); + } + + puts( "++yy_lp; \\" ); + puts( "goto find_rule; \\" ); + puts( "}" ); + } + else - gentabs(); + { + puts( "/* the intent behind this definition is that it'll catch" ); + puts( " * any uses of REJECT which flex missed" ); + puts( " */" ); + puts( "#define REJECT reject_used_but_not_detected" ); + } + skelout(); @@ -573,12 +976,32 @@ make_tables() skelout(); + set_indent( 2 ); + + gen_start_state(); + gen_next_match(); + + skelout(); + set_indent( 3 ); + gen_find_action(); + /* copy actions from action_file to output file */ + skelout(); + indent_up(); + gen_bt_action(); action_out(); + /* generate code for yy_get_previous_state() */ + set_indent( 1 ); skelout(); - /* generate code to find next state */ + if ( bol_needed ) + indent_puts( "register char *yy_bp = yytext;\n" ); + + gen_start_state(); + + set_indent( 2 ); + skelout(); gen_next_state(); skelout(); -- cgit v1.2.3 From 46109a2632726ede318e67f4ec4b44e9537eb0db Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:07:02 +0000 Subject: added checking for features being Really used backtracking, performance reports misc. cleanup --- main.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 98 insertions(+), 23 deletions(-) diff --git a/main.c b/main.c index feb112f..ca1acb7 100644 --- a/main.c +++ b/main.c @@ -23,16 +23,20 @@ static char rcsid[] = /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; -int interactive, caseins, useecs, fulltbl, usemecs, reject; -int fullspd, gen_line_dirs, performance_report; +int interactive, caseins, useecs, fulltbl, usemecs; +int fullspd, gen_line_dirs, performance_report, backtrack_report; +int yymore_used, reject, real_reject; +int yymore_really_used, reject_really_used; int datapos, dataline, linenum; FILE *skelfile = NULL; char *infilename = NULL; int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; -int current_mns, accnum, lastnfa; +int current_mns, num_rules, current_max_rules, lastnfa; int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; -int *accptnum, *assoc_rule; +int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; +int current_state_type; +int variable_trailing_context_rules; int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; @@ -43,7 +47,7 @@ int current_max_template_xpairs, current_max_dfas; int lastdfa, *nxt, *chk, *tnxt; int *base, *def, tblend, firstfree, **dss, *dfasiz; union dfaacc_union *dfaacc; -int *accsiz, *dhash, *todo, todo_head, todo_next, numas; +int *accsiz, *dhash, numas; int numsnpairs, jambase, jamstate; int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; int current_max_ccl_tbl_size; @@ -53,11 +57,12 @@ int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; int num_backtracking, bol_needed; FILE *temp_action_file; +FILE *backtrack_file; int end_of_buffer_state; #ifndef SHORT_FILE_NAMES -char *action_file_name = "/tmp/flexXXXXXX"; +char action_file_name[] = "/tmp/flexXXXXXX"; #else -char *action_file_name = "flexXXXXXX.tmp"; +char action_file_name[] = "flexXXXXXX.tmp"; #endif @@ -78,6 +83,46 @@ char **argv; if ( ! syntaxerror ) { + if ( yymore_really_used == REALLY_USED ) + yymore_used = true; + else if ( yymore_really_used == REALLY_NOT_USED ) + yymore_used = false; + + if ( reject_really_used == REALLY_USED ) + reject = true; + else if ( reject_really_used == REALLY_NOT_USED ) + reject = false; + + if ( performance_report ) + { + if ( yymore_used ) + fprintf( stderr, + "yymore() entails a minor performance penalty\n" ); + + if ( reject ) + fprintf( stderr, + "REJECT entails a large performance penalty\n" ); + + if ( variable_trailing_context_rules ) + fprintf( stderr, + "Variable trailing context rules entail a large performance penalty\n" ); + } + + if ( reject ) + real_reject = true; + + if ( variable_trailing_context_rules ) + reject = true; + + if ( (fulltbl || fullspd) && reject ) + { + if ( real_reject ) + flexerror( "REJECT cannot be used with -f or -F" ); + else + flexerror( + "variable trailing context rules cannot be used with -f or -F" ); + } + /* convert the ndfa to a dfa */ ntod(); @@ -121,6 +166,20 @@ int status; (void) unlink( action_file_name ); } + if ( backtrack_report ) + { + if ( num_backtracking == 0 ) + fprintf( backtrack_file, "No backtracking.\n" ); + else if ( fullspd || fulltbl ) + fprintf( backtrack_file, + "%d backtracking (non-accepting) states.\n", + num_backtracking ); + else + fprintf( backtrack_file, "Compressed tables always backtrack.\n" ); + + (void) fclose( backtrack_file ); + } + if ( printstats ) { endtime = gettime(); @@ -132,7 +191,7 @@ int status; fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, current_max_dfas, totnst ); - fprintf( stderr, " %d rules\n", accnum - 1 /* - 1 for def. rule */ ); + fprintf( stderr, " %d rules\n", num_rules - 1 /* - 1 for def. rule */ ); if ( num_backtracking == 0 ) fprintf( stderr, " No backtracking\n" ); @@ -232,7 +291,9 @@ char **argv; char *arg, *skelname = NULL, *gettime(), clower(), *mktemp(); printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; - performance_report = ddebug = fulltbl = reject = fullspd = false; + backtrack_report = performance_report = ddebug = fulltbl = fullspd = false; + yymore_used = reject = false; + yymore_really_used = reject_really_used = false; gen_line_dirs = usemecs = useecs = true; sawcmpflag = false; @@ -249,6 +310,10 @@ char **argv; for ( i = 1; arg[i] != '\0'; ++i ) switch ( arg[i] ) { + case 'b': + backtrack_report = true; + break; + case 'c': if ( i != 1 ) flexerror( "-c flag must be given separately" ); @@ -318,10 +383,6 @@ char **argv; performance_report = true; break; - case 'r': - reject = true; - break; - case 'S': if ( i != 1 ) flexerror( "-S flag must be given separately" ); @@ -360,15 +421,9 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ if ( (fulltbl || fullspd) && interactive ) flexerror( "full table and -I are (currently) incompatible" ); - if ( (fulltbl || fullspd) && reject ) - flexerror( "reject (-r) cannot be used with -f or -F" ); - if ( fulltbl && fullspd ) flexerror( "full table and -F are mutually exclusive" ); - if ( performance_report && reject ) - fprintf( stderr, "Reject guarentees performance penalties\n" ); - if ( ! skelname ) { static char skeleton_name_storage[400]; @@ -407,6 +462,22 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ else yyin = stdin; + if ( backtrack_report ) + { +#ifndef SHORT_FILE_NAMES + backtrack_file = fopen( "lex.backtrack", "w" ); +#else + backtrack_file = fopen( "lex.bck", "w" ); +#endif + + if ( backtrack_file == NULL ) + flexerror( "could not create lex.backtrack" ); + } + + else + backtrack_file = NULL; + + lastccl = 0; lastsc = 0; @@ -421,11 +492,11 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL ) lerrsf( "can't open temporary action file %s", action_file_name ); - lastdfa = lastnfa = accnum = numas = numsnpairs = tmpuses = 0; + lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0; numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; num_backtracking = onesp = numprots = 0; - bol_needed = false; + variable_trailing_context_rules = bol_needed = false; linenum = sectnum = 1; firstprot = NIL; @@ -515,6 +586,11 @@ set_up_initial_allocations() trans2 = allocate_integer_array( current_mns ); accptnum = allocate_integer_array( current_mns ); assoc_rule = allocate_integer_array( current_mns ); + state_type = allocate_integer_array( current_mns ); + + current_max_rules = INITIAL_MAX_RULES; + rule_type = allocate_integer_array( current_max_rules ); + rule_linenum = allocate_integer_array( current_max_rules ); current_max_scs = INITIAL_MAX_SCS; scset = allocate_integer_array( current_max_scs ); @@ -522,7 +598,7 @@ set_up_initial_allocations() scxclu = allocate_integer_array( current_max_scs ); actvsc = allocate_integer_array( current_max_scs ); - current_maxccls = INITIAL_MAXCCLS; + current_maxccls = INITIAL_MAX_CCLS; cclmap = allocate_integer_array( current_maxccls ); ccllen = allocate_integer_array( current_maxccls ); cclng = allocate_integer_array( current_maxccls ); @@ -545,7 +621,6 @@ set_up_initial_allocations() dfasiz = allocate_integer_array( current_max_dfas ); accsiz = allocate_integer_array( current_max_dfas ); dhash = allocate_integer_array( current_max_dfas ); - todo = allocate_integer_array( current_max_dfas ); dss = allocate_int_ptr_array( current_max_dfas ); dfaacc = allocate_dfaacc_union( current_max_dfas ); } -- cgit v1.2.3 From 9bd627ab6964dffe3730a537e7c50afc45d6765b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:07:39 +0000 Subject: added all_lower() and all_upper() --- misc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/misc.c b/misc.c index 53220a4..74934e4 100644 --- a/misc.c +++ b/misc.c @@ -68,6 +68,52 @@ int size, element_size; } +/* all_lower - true if a string is all lower-case + * + * synopsis: + * char *str; + * int all_lower(); + * true/false = all_lower( str ); + */ + +int all_lower( str ) +register char *str; + + { + while ( *str ) + { + if ( ! islower( *str ) ) + return ( 0 ); + ++str; + } + + return ( 1 ); + } + + +/* all_upper - true if a string is all upper-case + * + * synopsis: + * char *str; + * int all_upper(); + * true/false = all_upper( str ); + */ + +int all_upper( str ) +register char *str; + + { + while ( *str ) + { + if ( ! isupper( *str ) ) + return ( 0 ); + ++str; + } + + return ( 1 ); + } + + /* bubble - bubble sort an integer array in increasing order * * synopsis -- cgit v1.2.3 From 52f3adc1d688e32a6299dd7f52a70d4bac455560 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:08:47 +0000 Subject: changes for variable trailing context --- nfa.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++----------------- parse.y | 95 ++++++++++++++++++++++----- 2 files changed, 241 insertions(+), 77 deletions(-) diff --git a/nfa.c b/nfa.c index 641a182..90d7471 100644 --- a/nfa.c +++ b/nfa.c @@ -23,62 +23,15 @@ static char rcsid[] = * * synopsis * - * add_accept( mach, headcnt, trailcnt ); - * - * the global ACCNUM is incremented and the new value becomes mach's - * accepting number. if headcnt or trailcnt is non-zero then the machine - * recognizes a pattern with trailing context. headcnt is the number of - * characters in the matched part of the pattern, or zero if the matched - * part has variable length. trailcnt is the number of trailing context - * characters in the pattern, or zero if the trailing context has variable - * length. + * add_accept( mach, accepting_number ); + * + * accepting_number becomes mach's accepting number. */ -add_accept( mach, headcnt, trailcnt ) -int mach, headcnt, trailcnt; +add_accept( mach, accepting_number ) +int mach; { - int astate; - - fprintf( temp_action_file, "case %d:\n", ++accnum ); - - if ( headcnt > 0 || trailcnt > 0 ) - { /* do trailing context magic to not match the trailing characters */ - char *scanner_cp = - (fulltbl || fullspd) ? "yy_c_buf_p = yy_cp" : "yy_c_buf_p"; - char *scanner_bp = (fulltbl || fullspd) ? "yy_bp" : "yy_b_buf_p"; - - fprintf( temp_action_file, - "YY_DO_BEFORE_SCAN; /* undo effects of setting up yytext */\n" ); - - if ( headcnt > 0 ) - { - int head_offset = headcnt - 1; - - if ( fullspd || fulltbl ) - /* with the fast skeleton, the character pointer points - * to the *next* character to scan, rather than the one - * that was last scanned - */ - ++head_offset; - - if ( head_offset > 0 ) - fprintf( temp_action_file, "%s = %s + %d;\n", - scanner_cp, scanner_bp, head_offset ); - - else - fprintf( temp_action_file, "%s = %s;\n", - scanner_cp, scanner_bp ); - } - - else - fprintf( temp_action_file, "%s -= %d;\n", scanner_cp, trailcnt ); - - fprintf( temp_action_file, "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); - } - - line_directive_out( temp_action_file ); - /* hang the accepting number off an epsilon state. if it is associated * with a state that has a non-epsilon out-transition, then the state * will accept BEFORE it makes that transition, i.e., one character @@ -86,12 +39,12 @@ int mach, headcnt, trailcnt; */ if ( transchar[finalst[mach]] == SYM_EPSILON ) - accptnum[finalst[mach]] = accnum; + accptnum[finalst[mach]] = accepting_number; else { - astate = mkstate( SYM_EPSILON ); - accptnum[astate] = accnum; + int astate = mkstate( SYM_EPSILON ); + accptnum[astate] = accepting_number; mach = link_machines( mach, astate ); } } @@ -215,6 +168,82 @@ int mach; return ( init ); } +/* finish_rule - finish up the processing for a rule + * + * synopsis + * + * finish_rule( mach, variable_trail_rule, headcnt, trailcnt ); + * + * An accepting number is added to the given machine. If variable_trail_rule + * is true then the rule has trailing context and both the head and trail + * are variable size. Otherwise if headcnt or trailcnt is non-zero then + * the machine recognizes a pattern with trailing context and headcnt is + * the number of characters in the matched part of the pattern, or zero + * if the matched part has variable length. trailcnt is the number of + * trailing context characters in the pattern, or zero if the trailing + * context has variable length. + */ + +finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) +int mach, variable_trail_rule, headcnt, trailcnt; + + { + add_accept( mach, num_rules ); + + /* we did this in new_rule(), but it often gets the wrong + * number because we do it before we start parsing the current rule + */ + rule_type[num_rules] = linenum; + + fprintf( temp_action_file, "case %d:\n", num_rules ); + + if ( variable_trail_rule ) + { + rule_type[num_rules] = RULE_VARIABLE; + + if ( performance_report ) + fprintf( stderr, "Variable trailing context rule at line %d\n", + rule_linenum[num_rules] ); + + variable_trailing_context_rules = true; + } + + else + { + rule_type[num_rules] = RULE_NORMAL; + + if ( headcnt > 0 || trailcnt > 0 ) + { + /* do trailing context magic to not match the trailing characters */ + char *scanner_cp = "yy_c_buf_p = yy_cp"; + char *scanner_bp = "yy_bp"; + + fprintf( temp_action_file, + "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); + + if ( headcnt > 0 ) + { + if ( headcnt > 0 ) + fprintf( temp_action_file, "%s = %s + %d;\n", + scanner_cp, scanner_bp, headcnt ); + + else + fprintf( temp_action_file, "%s = %s;\n", + scanner_cp, scanner_bp ); + } + + else + fprintf( temp_action_file, + "%s -= %d;\n", scanner_cp, trailcnt ); + + fprintf( temp_action_file, + "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); + } + } + + line_directive_out( temp_action_file ); + } + /* link_machines - connect two machines together * @@ -254,6 +283,49 @@ int first, last; } +/* mark_beginning_as_normal - mark each "beginning" state in a machine + * as being a "normal" (i.e., not trailing context- + * associated) states + * + * synopsis + * + * mark_beginning_as_normal( mach ) + * + * mach - machine to mark + * + * The "beginning" states are the epsilon closure of the first state + */ + +mark_beginning_as_normal( mach ) +register int mach; + + { + switch ( state_type[mach] ) + { + case STATE_NORMAL: + /* oh, we've already visited here */ + return; + + case STATE_TRAILING_CONTEXT: + state_type[mach] = STATE_NORMAL; + + if ( transchar[mach] == SYM_EPSILON ) + { + if ( trans1[mach] != NO_TRANSITION ) + mark_beginning_as_normal( trans1[mach] ); + + if ( trans2[mach] != NO_TRANSITION ) + mark_beginning_as_normal( trans2[mach] ); + } + break; + + default: + flexerror( "bad state type in mark_beginning_as_normal()" ); + break; + } + } + + /* mkbranch - make a machine that branches to two machines * * synopsis @@ -456,14 +528,15 @@ int mkrep( mach, lb, ub ) int mach, lb, ub; { - int base, tail, copy, i; + int base_mach, tail, copy, i; - base = copysingl( mach, lb - 1 ); + base_mach = copysingl( mach, lb - 1 ); if ( ub == INFINITY ) { copy = dupmachine( mach ); - mach = link_machines( mach, link_machines( base, mkclos( copy ) ) ); + mach = link_machines( mach, + link_machines( base_mach, mkclos( copy ) ) ); } else @@ -476,7 +549,7 @@ int mach, lb, ub; tail = mkopt( link_machines( copy, tail ) ); } - mach = link_machines( mach, link_machines( base, tail ) ); + mach = link_machines( mach, link_machines( base_mach, tail ) ); } return ( mach ); @@ -519,6 +592,7 @@ int sym; trans2 = reallocate_integer_array( trans2, current_mns ); accptnum = reallocate_integer_array( accptnum, current_mns ); assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); + state_type = reallocate_integer_array( state_type, current_mns ); } firstst[lastnfa] = lastnfa; @@ -528,7 +602,8 @@ int sym; trans1[lastnfa] = NO_TRANSITION; trans2[lastnfa] = NO_TRANSITION; accptnum[lastnfa] = NIL; - assoc_rule[lastnfa] = linenum; /* identify rules by line number in input */ + assoc_rule[lastnfa] = num_rules; + state_type[lastnfa] = current_state_type; /* fix up equivalence classes base on this transition. Note that any * character which has its own transition gets its own equivalence class. @@ -585,3 +660,31 @@ int statefrom, stateto; trans2[statefrom] = stateto; } } + +/* new_rule - initialize for a new rule + * + * synopsis + * + * new_rule(); + * + * the global num_rules is incremented and the any corresponding dynamic + * arrays (such as rule_type[]) are grown as needed. + */ + +new_rule() + + { + if ( ++num_rules >= current_max_rules ) + { + ++num_reallocs; + current_max_rules += MAX_RULES_INCREMENT; + rule_type = reallocate_integer_array( rule_type, current_max_rules ); + rule_linenum = + reallocate_integer_array( rule_linenum, current_max_rules ); + } + + if ( num_rules > MAX_RULE ) + lerrif( "too many rules (> %d)!", MAX_RULE ); + + rule_linenum[num_rules] = linenum; + } diff --git a/parse.y b/parse.y index d9cf7be..55cb3c5 100644 --- a/parse.y +++ b/parse.y @@ -24,7 +24,7 @@ static char rcsid[] = #endif int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; -int trlcontxt, xcluflg, cclsorted, varlength; +int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; char clower(); static int madeany = false; /* whether we've made the '.' character class */ @@ -32,7 +32,7 @@ static int madeany = false; /* whether we've made the '.' character class */ %} %% -goal : initlex sect1 sect1end sect2 +goal : initlex sect1 sect1end sect2 initforrule { /* add default rule */ int def_rule; @@ -41,13 +41,18 @@ goal : initlex sect1 sect1end sect2 def_rule = mkstate( -pat ); - add_accept( def_rule, 0, 0 ); + finish_rule( def_rule, variable_trail_rule, 0, 0 ); for ( i = 1; i <= lastsc; ++i ) scset[i] = mkbranch( scset[i], def_rule ); - fputs( "YY_DEFAULT_ACTION;\n\tYY_BREAK\n", - temp_action_file ); + if ( spprdflt ) + fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )", + temp_action_file ); + else + fputs( "ECHO", temp_action_file ); + + fputs( ";\n\tYY_BREAK\n", temp_action_file ); } ; @@ -100,25 +105,38 @@ sect2 : sect2 initforrule flexrule '\n' initforrule : { /* initialize for a parse of one rule */ - trlcontxt = varlength = false; + trlcontxt = variable_trail_rule = varlength = false; trailcnt = headcnt = rulelen = 0; + current_state_type = STATE_NORMAL; + new_rule(); } ; flexrule : scon '^' re eol { pat = link_machines( $3, $4 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) scbol[actvsc[i]] = mkbranch( scbol[actvsc[i]], pat ); + + if ( ! bol_needed ) + { + bol_needed = true; + + if ( performance_report ) + fprintf( stderr, + "'^' operator results in sub-optimal performance\n" ); + } } | scon re eol { pat = link_machines( $2, $3 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) scset[actvsc[i]] = @@ -128,7 +146,8 @@ flexrule : scon '^' re eol | '^' re eol { pat = link_machines( $2, $3 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); /* add to all non-exclusive start conditions, * including the default (0) start condition @@ -137,12 +156,22 @@ flexrule : scon '^' re eol for ( i = 1; i <= lastsc; ++i ) if ( ! scxclu[i] ) scbol[i] = mkbranch( scbol[i], pat ); + + if ( ! bol_needed ) + { + bol_needed = true; + + if ( performance_report ) + fprintf( stderr, + "'^' operator results in sub-optimal performance\n" ); + } } | re eol { pat = link_machines( $1, $2 ); - add_accept( pat, headcnt, trailcnt ); + finish_rule( pat, variable_trail_rule, + headcnt, trailcnt ); for ( i = 1; i <= lastsc; ++i ) if ( ! scxclu[i] ) @@ -207,8 +236,7 @@ eol : '$' { if ( varlength && headcnt == 0 ) /* both head and trail are variable-length */ - synerr( "illegal trailing context" ); - + variable_trail_rule = true; else trailcnt = rulelen; } @@ -223,7 +251,33 @@ re : re '|' series } | re2 series - { $$ = link_machines( $1, $2 ); } + { + if ( transchar[lastst[$2]] != SYM_EPSILON ) + /* provide final transition \now/ so it + * will be marked as a trailing context + * state + */ + $2 = link_machines( $2, mkstate( SYM_EPSILON ) ); + + mark_beginning_as_normal( $2 ); + current_state_type = STATE_NORMAL; + + if ( varlength && headcnt == 0 ) + { /* variable trailing context rule */ + /* mark the first part of the rule as the accepting + * "head" part of a trailing context rule + */ + /* by the way, we didn't do this at the beginning + * of this production because back then + * current_state_type was set up for a trail + * rule, and add_accept() can create a new + * state ... + */ + add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); + } + + $$ = link_machines( $1, $2 ); + } | series { $$ = $1; } @@ -243,12 +297,14 @@ re2 : re '/' trlcontxt = true; if ( varlength ) - /* the trailing context had better be fixed-length */ + /* we hope the trailing context is fixed-length */ varlength = false; else headcnt = rulelen; rulelen = 0; + + current_state_type = STATE_TRAILING_CONTEXT; $$ = $1; } ; @@ -290,13 +346,18 @@ singleton : singleton '*' { varlength = true; - if ( $3 > $5 || $3 <= 0 ) + if ( $3 > $5 || $3 < 0 ) { synerr( "bad iteration values" ); $$ = $1; } else - $$ = mkrep( $1, $3, $5 ); + { + if ( $3 == 0 ) + $$ = mkopt( mkrep( $1, $3, $5 ) ); + else + $$ = mkrep( $1, $3, $5 ); + } } | singleton '{' NUMBER ',' '}' @@ -491,7 +552,7 @@ char str[]; { syntaxerror = true; - fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str ); + fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str ); } -- cgit v1.2.3 From 8b268ec3e79c291fcc9158e3275cd5173196cf5b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:10:31 +0000 Subject: changed to look for yymore, REJECT, %used and %unused removed gross magic for dealing with section 3 --- scan.l | 119 +++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 67 insertions(+), 52 deletions(-) diff --git a/scan.l b/scan.l index 88c2d22..4ac8ba1 100644 --- a/scan.l +++ b/scan.l @@ -39,17 +39,26 @@ static char rcsid[] = #define PUT_BACK_STRING(str, start) \ for ( i = strlen( str ) - 1; i >= start; --i ) \ unput(str[i]) + +#define CHECK_REJECT(str) \ + if ( all_upper( str ) ) \ + reject = true; + +#define CHECK_YYMORE(str) \ + if ( all_lower( str ) ) \ + yymore_used = true; %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE %x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT C_COMMENT_2 ACTION_COMMENT -%x ACTION_STRING PERCENT_BRACE_ACTION +%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST -WS [ \t]+ +WS [ \t\f]+ +OPTWS [ \t\f]* +NOT_WS [^ \t\f\n] -OPTWS [ \t]* - -NAME [a-z_][a-z_0-9]* +NAME [a-z_][a-z_0-9-]* +NOT_NAME [^a-z_\n]+ SCNAME {NAME} @@ -57,15 +66,21 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) %% static int bracelevel, didadef; - int i, cclval; + int i, indented_code, checking_used; char nmdef[MAXLINE], myesc(); -^{WS}.*\n ++linenum; ECHO; /* indented code */ +^{WS} indented_code = true; BEGIN(CODEBLOCK); ^#.*\n ++linenum; ECHO; /* treat as a comment */ ^"/*" ECHO; BEGIN(C_COMMENT); ^"%s"(tart)? return ( SCDECL ); ^"%x" return ( XSCDECL ); -^"%{".*\n ++linenum; line_directive_out( stdout ); BEGIN(CODEBLOCK); +^"%{".*\n { + ++linenum; + line_directive_out( stdout ); + indented_code = false; + BEGIN(CODEBLOCK); + } + {WS} return ( WHITESPACE ); ^"%%".* { @@ -75,7 +90,11 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) return ( SECTEND ); } -^"%"[^sx{%].*\n { +^"%used" checking_used = REALLY_USED; BEGIN(USED_LIST); +^"%not"{OPTWS}"used" checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); + + +^"%"[^sx]" ".*\n { fprintf( stderr, "old-style lex command at line %d ignored:\n\t%s", linenum, yytext ); @@ -100,12 +119,22 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) "*" ECHO; \n ++linenum; ECHO; + ^"%}".*\n ++linenum; BEGIN(0); -.*\n ++linenum; ECHO; +"reject" ECHO; CHECK_REJECT(yytext); +"yymore" ECHO; CHECK_YYMORE(yytext); +{NAME}|{NOT_NAME}|. ECHO; +\n { + ++linenum; + ECHO; + if ( indented_code ) + BEGIN(0); + } + {WS} /* separates name and definition */ -[^ \t\n].* { +{NOT_WS}.* { (void) strcpy( nmdef, yytext ); for ( i = strlen( nmdef ) - 1; @@ -130,7 +159,24 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) .*\n ++linenum; BEGIN(0); RETURNNAME; -.*\n/[^ \t\n] { +\n ++linenum; BEGIN(0); +{WS} +"reject" { + if ( all_upper( yytext ) ) + reject_really_used = checking_used; + else + synerr( "unrecognized %used/%notused construct" ); + } +"yymore" { + if ( all_lower( yytext ) ) + yymore_really_used = checking_used; + else + synerr( "unrecognized %used/%notused construct" ); + } +{NOT_WS}+ synerr( "unrecognized %used/%notused construct" ); + + +.*\n/{NOT_WS} { ++linenum; ACTION_ECHO; MARK_END_OF_PROLOG; @@ -184,16 +230,14 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) ^{OPTWS}\n ++linenum; return ( '\n' ); ^"%%".* { - /* guarantee that the SECT3 rule will have something - * to match - */ - yyless(1); sectnum = 3; BEGIN(SECT3); return ( EOF ); /* to stop the parser */ } "["([^\\\]\n]|{ESCSEQ})+"]" { + int cclval; + (void) strcpy( nmstr, yytext ); /* check to see if we've already encountered this ccl */ @@ -301,7 +345,9 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) {OPTWS}"%}".* bracelevel = 0; -.* ACTION_ECHO; +"reject" ACTION_ECHO; CHECK_REJECT(yytext); +"yymore" ACTION_ECHO; CHECK_YYMORE(yytext); +{NAME}|{NOT_NAME}|. ACTION_ECHO; \n { ++linenum; ACTION_ECHO; @@ -312,9 +358,11 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) } } + /* REJECT and yymore() are checked for above, in PERCENT_BRACE_ACTION */ "{" ACTION_ECHO; ++bracelevel; "}" ACTION_ECHO; --bracelevel; -[^{}"'/\n]+ ACTION_ECHO; +[^a-z_{}"'/\n]+ ACTION_ECHO; +{NAME} ACTION_ECHO; "/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ \" ACTION_ECHO; BEGIN(ACTION_STRING); @@ -360,38 +408,5 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) } -.|\n { - register int numchars; - - /* black magic - we know the names of a flex scanner's - * internal variables. We cap the input buffer with - * an end-of-string and dump it to the output. - */ - YY_DO_BEFORE_SCAN; /* recover from setting up yytext */ - -#ifdef FLEX_FAST_SKEL - fputs( yy_cp + 1, stdout ); -#else - yy_ch_buf[yy_e_buf_p + 1] = '\0'; - - /* ignore the first character; it's the second '%' - * put back by the yyless(1) above - */ - fputs( yy_ch_buf + yy_c_buf_p + 1, stdout ); -#endif - - /* if we don't do this, the data written by write() - * can get overwritten when stdout is finally flushed - */ - (void) fflush( stdout ); - - while ( (numchars = read( fileno(yyin), yy_ch_buf, - YY_BUF_MAX )) > 0 ) - (void) write( fileno(stdout), yy_ch_buf, numchars ); - - if ( numchars < 0 ) - flexerror( "fatal read error in section 3" ); - - return ( EOF ); - } +.*(\n?) ECHO; %% -- cgit v1.2.3 From 478a3d9a52416dc02471a306d75bf2e1b724f2de Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:11:11 +0000 Subject: the most piddling format change imaginable --- sym.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sym.c b/sym.c index 025d15b..78111c7 100644 --- a/sym.c +++ b/sym.c @@ -263,7 +263,7 @@ int xcluflg; } if ( addsym( copy_string( str ), (char *) 0, lastsc, - sctbl, START_COND_HASH_SIZE ) ) + sctbl, START_COND_HASH_SIZE ) ) lerrsf( "start condition %s declared twice", str ); scset[lastsc] = mkstate( SYM_EPSILON ); -- cgit v1.2.3 From 4a69f03f712f9ad2d522e162fc1aed3389c66156 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:12:45 +0000 Subject: moved table generation code to gen.c moved ntod() to dfa.c --- tblcmp.c | 768 +-------------------------------------------------------------- 1 file changed, 7 insertions(+), 761 deletions(-) diff --git a/tblcmp.c b/tblcmp.c index 18c2e2d..320eec9 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -396,423 +396,6 @@ int *state, numtrans; } -/* genctbl - generates full speed compressed transition table - * - * synopsis - * genctbl(); - */ - -genctbl() - - { - register int i; - - /* table of verify for transition and offset to next state */ - printf( "static struct yy_trans_info yy_transition[%d] =\n", - tblend + numecs + 1 ); - printf( " {\n" ); - - /* We want the transition to be represented as the offset to the - * next state, not the actual state number, which is what it currently is. - * The offset is base[nxt[i]] - base[chk[i]]. That's just the - * difference between the starting points of the two involved states - * (to - from). - * - * first, though, we need to find some way to put in our end-of-buffer - * flags and states. We do this by making a state with absolutely no - * transitions. We put it at the end of the table. - */ - /* at this point, we're guaranteed that there's enough room in nxt[] - * and chk[] to hold tblend + numecs entries. We need just two slots. - * One for the action and one for the end-of-buffer transition. We - * now *assume* that we're guaranteed the only character we'll try to - * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to - * make sure there's room for jam entries for other characters. - */ - - base[lastdfa + 1] = tblend + 2; - nxt[tblend + 1] = END_OF_BUFFER_ACTION; - chk[tblend + 1] = numecs + 1; - chk[tblend + 2] = 1; /* anything but EOB */ - nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */ - - /* make sure every state has a end-of-buffer transition and an action # */ - for ( i = 0; i <= lastdfa; ++i ) - { - register int anum = dfaacc[i].dfaacc_state; - - chk[base[i]] = EOB_POSITION; - chk[base[i] - 1] = ACTION_POSITION; - nxt[base[i] - 1] = anum ? anum : accnum + 1; /* action number */ - } - - dataline = 0; - datapos = 0; - - for ( i = 0; i <= tblend; ++i ) - { - if ( chk[i] == EOB_POSITION ) - transition_struct_out( 0, base[lastdfa + 1] - i ); - - else if ( chk[i] == ACTION_POSITION ) - transition_struct_out( 0, nxt[i] ); - - else if ( chk[i] > numecs || chk[i] == 0 ) - transition_struct_out( 0, 0 ); /* unused slot */ - - else /* verify, transition */ - transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) ); - } - - - /* here's the final, end-of-buffer state */ - transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); - transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); - - printf( " };\n" ); - printf( "\n" ); - - /* table of pointers to start states */ - printf( "static struct yy_trans_info *yy_state_ptr[%d] =\n", - lastsc * 2 + 1 ); - printf( " {\n" ); - - for ( i = 0; i <= lastsc * 2; ++i ) - printf( " &yy_transition[%d],\n", base[i] ); - - printf( " };\n" ); - - if ( useecs ) - genecs(); - } - - -/* genftbl - generates full transition table - * - * synopsis - * genftbl(); - */ - -genftbl() - - { - register int i; - - /* *everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all C arrays - */ - static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; - static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; - -#ifdef UNSIGNED_CHAR - printf( C_short_decl, ALIST, lastdfa + 1 ); -#else - printf( accnum > 127 ? C_short_decl : C_char_decl, ALIST, lastdfa + 1 ); -#endif - - for ( i = 1; i <= lastdfa; ++i ) - { - register int anum = dfaacc[i].dfaacc_state; - - if ( i == end_of_buffer_state ) - mkdata( END_OF_BUFFER_ACTION ); - - else - mkdata( anum ? anum : accnum + 1 ); - - if ( trace && anum ) - fprintf( stderr, "state # %d accepts: [%d]\n", i, anum ); - } - - dataend(); - - if ( useecs ) - genecs(); - - /* don't have to dump the actual full table entries - they were created - * on-the-fly - */ - } - - -/* gentabs - generate data statements for the transition tables - * - * synopsis - * gentabs(); - */ - -gentabs() - - { - int i, j, k, *accset, nacc, *acc_array, total_states; - - /* *everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all C arrays - */ - static char C_long_decl[] = "static long int %c[%d] =\n { 0,\n"; - static char C_short_decl[] = "static short int %c[%d] =\n { 0,\n"; - static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; - - acc_array = allocate_integer_array( current_max_dfas ); - nummt = 0; - - printf( "#define YY_JAM %d\n", jamstate ); - printf( "#define YY_JAM_BASE %d\n", jambase ); - - if ( usemecs ) - printf( "#define YY_TEMPLATE %d\n", lastdfa + 2 ); - - if ( reject ) - { - /* write out accepting list and pointer list - * first we generate the ACCEPT array. In the process, we compute - * the indices that will go into the ALIST array, and save the - * indices in the dfaacc array - */ - - printf( accnum > 127 ? C_short_decl : C_char_decl, - ACCEPT, max( numas, 1 ) + 1 ); - - j = 1; /* index into ACCEPT array */ - - for ( i = 1; i <= lastdfa; ++i ) - { - acc_array[i] = j; - - if ( accsiz[i] != 0 ) - { - accset = dfaacc[i].dfaacc_set; - nacc = accsiz[i]; - - if ( trace ) - fprintf( stderr, "state # %d accepts: ", i ); - - for ( k = 1; k <= nacc; ++k ) - { - ++j; - mkdata( accset[k] ); - - if ( trace ) - { - fprintf( stderr, "[%d]", accset[k] ); - - if ( k < nacc ) - fputs( ", ", stderr ); - else - putc( '\n', stderr ); - } - } - } - } - - /* add accepting number for the "jam" state */ - acc_array[i] = j; - - dataend(); - } - - else - { - for ( i = 1; i <= lastdfa; ++i ) - acc_array[i] = dfaacc[i].dfaacc_state; - - /* add accepting number for jam state */ - acc_array[i] = 0; - } - - /* spit out ALIST array. If we're doing "reject", it'll be pointers - * into the ACCEPT array. Otherwise it's actual accepting numbers. - * In either case, we just dump the numbers. - */ - - /* "lastdfa + 2" is the size of ALIST; includes room for C arrays - * beginning at 0 and for "jam" state - */ - k = lastdfa + 2; - - if ( reject ) - /* we put a "cap" on the table associating lists of accepting - * numbers with state numbers. This is needed because we tell - * where the end of an accepting list is by looking at where - * the list for the next state starts. - */ - ++k; - -#ifdef UNSIGNED_CHAR - printf( C_short_decl, ALIST, k ); -#else - printf( ((reject && numas > 126) || accnum > 127) ? - C_short_decl : C_char_decl, ALIST, k ); -#endif - - for ( i = 1; i <= lastdfa; ++i ) - { - mkdata( acc_array[i] ); - - if ( ! reject && trace && acc_array[i] ) - fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] ); - } - - /* add entry for "jam" state */ - mkdata( acc_array[i] ); - - if ( reject ) - /* add "cap" for the list */ - mkdata( acc_array[i] ); - - dataend(); - - if ( useecs ) - genecs(); - - if ( usemecs ) - { - /* write out meta-equivalence classes (used to index templates with) */ - - if ( trace ) - fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); - - printf( C_char_decl, MATCHARRAY, numecs + 1 ); - - for ( i = 1; i <= numecs; ++i ) - { - if ( trace ) - fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) ); - - mkdata( abs( tecbck[i] ) ); - } - - dataend(); - } - - total_states = lastdfa + numtemps; - - printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, - BASEARRAY, total_states + 1 ); - - for ( i = 1; i <= lastdfa; ++i ) - { - register int d = def[i]; - - if ( base[i] == JAMSTATE ) - base[i] = jambase; - - if ( d == JAMSTATE ) - def[i] = jamstate; - - else if ( d < 0 ) - { - /* template reference */ - ++tmpuses; - def[i] = lastdfa - d + 1; - } - - mkdata( base[i] ); - } - - /* generate jam state's base index */ - mkdata( base[i] ); - - for ( ++i /* skip jam state */; i <= total_states; ++i ) - { - mkdata( base[i] ); - def[i] = jamstate; - } - - dataend(); - - printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, - DEFARRAY, total_states + 1 ); - - for ( i = 1; i <= total_states; ++i ) - mkdata( def[i] ); - - dataend(); - - printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, - NEXTARRAY, tblend + 1 ); - - for ( i = 1; i <= tblend; ++i ) - { - if ( nxt[i] == 0 || chk[i] == 0 ) - nxt[i] = jamstate; /* new state is the JAM state */ - - mkdata( nxt[i] ); - } - - dataend(); - - printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, - CHECKARRAY, tblend + 1 ); - - for ( i = 1; i <= tblend; ++i ) - { - if ( chk[i] == 0 ) - ++nummt; - - mkdata( chk[i] ); - } - - dataend(); - } - - -/* generate equivalence-class tables */ - -genecs() - - { - register int i, j; - static char C_char_decl[] = "static char %c[%d] =\n { 0,\n"; - int numrows; - char clower(); - - printf( C_char_decl, ECARRAY, CSIZE + 1 ); - - for ( i = 1; i <= CSIZE; ++i ) - { - if ( caseins && (i >= 'A') && (i <= 'Z') ) - ecgroup[i] = ecgroup[clower( i )]; - - ecgroup[i] = abs( ecgroup[i] ); - mkdata( ecgroup[i] ); - } - - dataend(); - - if ( trace ) - { - fputs( "\n\nEquivalence Classes:\n\n", stderr ); - - numrows = (CSIZE + 1) / 8; - - for ( j = 1; j <= numrows; ++j ) - { - for ( i = j; i <= CSIZE; i = i + numrows ) - { - if ( i >= 1 && i <= 31 ) - fprintf( stderr, "^%c = %-2d", - 'A' + i - 1, ecgroup[i] ); - - else if ( i >= 32 && i <= 126 ) - fprintf( stderr, " %c = %-2d", i, ecgroup[i] ); - - else if ( i == 127 ) - fprintf( stderr, "^@ = %-2d", ecgroup[i] ); - - else - fprintf( stderr, "\nSomething Weird: %d = %d\n", i, - ecgroup[i] ); - - putc( '\t', stderr ); - } - - putc( '\n', stderr ); - } - } - } - - /* inittbl - initialize transition tables * * synopsis @@ -854,68 +437,6 @@ inittbl() } -/* make_tables - generate transition tables - * - * synopsis - * make_tables(); - * - * Generates transition tables and finishes generating output file - */ - -make_tables() - - { - if ( fullspd ) - { /* need to define YY_TRANS_OFFSET_TYPE as a size large - * enough to hold the biggest offset - */ - int total_table_size = tblend + numecs + 1; - - printf( "#define YY_TRANS_OFFSET_TYPE %s\n", - total_table_size > MAX_SHORT ? "long" : "short" ); - } - - if ( fullspd || fulltbl ) - { - skelout(); - - if ( num_backtracking > 0 ) - { - printf( "#define FLEX_USES_BACKTRACKING\n" ); - printf( "#define YY_BACK_TRACK %d\n", accnum + 1 ); - } - - if ( fullspd ) - genctbl(); - else - genftbl(); - } - - else - gentabs(); - - skelout(); - - (void) fclose( temp_action_file ); - temp_action_file = fopen( action_file_name, "r" ); - - /* copy prolog from action_file to output file */ - action_out(); - - skelout(); - - /* copy actions from action_file to output file */ - action_out(); - - skelout(); - - /* copy remainder of input to output */ - - line_directive_out( stdout ); - (void) flexscan(); /* copy remainder of input to output */ - } - - /* mkdeftbl - make the default, "jam" table entries * * synopsis @@ -929,9 +450,15 @@ mkdeftbl() jamstate = lastdfa + 1; + ++tblend; /* room for transition on end-of-buffer character */ + if ( tblend + numecs > current_max_xpairs ) expand_nxt_chk(); + /* add in default end-of-buffer transition */ + nxt[tblend] = end_of_buffer_state; + chk[tblend] = jamstate; + for ( i = 1; i <= numecs; ++i ) { nxt[tblend + i] = 0; @@ -941,11 +468,7 @@ mkdeftbl() jambase = tblend; base[jamstate] = jambase; - - /* should generate a run-time array bounds check if - * ever used as a default - */ - def[jamstate] = BAD_SUBSCRIPT; + def[jamstate] = 0; tblend += numecs; ++numtemps; @@ -1261,283 +784,6 @@ int qelm; } -/* ntod - convert an ndfa to a dfa - * - * synopsis - * ntod(); - * - * creates the dfa corresponding to the ndfa we've constructed. the - * dfa starts out in state #1. - */ -ntod() - - { - int *accset, ds, nacc, newds; - int duplist[CSIZE + 1], sym, hashval, numstates, dsize; - int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1]; - int *nset, *dset; - int targptr, totaltrans, i, comstate, comfreq, targ; - int *epsclosure(), snstods(), symlist[CSIZE + 1]; - int num_start_states; - - /* this is so find_table_space(...) will know where to start looking in - * chk/nxt for unused records for space to put in the state - */ - if ( fullspd ) - firstfree = 0; - - accset = allocate_integer_array( accnum + 1 ); - nset = allocate_integer_array( current_max_dfa_size ); - - todo_head = todo_next = 0; - -#define ADD_QUEUE_ELEMENT(element) \ - if ( ++element >= current_max_dfas ) \ - { /* check for queue overflowing */ \ - if ( todo_head == 0 ) \ - increase_max_dfas(); \ - else \ - element = 0; \ - } - -#define NEXT_QUEUE_ELEMENT(element) ((element + 1) % (current_max_dfas + 1)) - - for ( i = 0; i <= CSIZE; ++i ) - { - duplist[i] = NIL; - symlist[i] = false; - } - - for ( i = 0; i <= accnum; ++i ) - accset[i] = NIL; - - if ( trace ) - { - dumpnfa( scset[1] ); - fputs( "\n\nDFA Dump:\n\n", stderr ); - } - - inittbl(); - - if ( fullspd ) - { - for ( i = 0; i <= numecs; ++i ) - state[i] = 0; - place_state( state, 0, 0 ); - } - - if ( fulltbl ) - { - /* declare it "short" because it's a real long-shot that that - * won't be large enough - */ - printf( "static short int %c[][%d] =\n {\n", NEXTARRAY, - numecs + 1 ); /* '}' so vi doesn't get too confused */ - - /* generate 0 entries for state #0 */ - for ( i = 0; i <= numecs; ++i ) - mk2data( 0 ); - - /* force ',' and dataflush() next call to mk2data */ - datapos = NUMDATAITEMS; - - /* force extra blank line next dataflush() */ - dataline = NUMDATALINES; - } - - /* create the first states */ - - num_start_states = lastsc * 2; - - for ( i = 1; i <= num_start_states; ++i ) - { - numstates = 1; - - /* for each start condition, make one state for the case when - * we're at the beginning of the line (the '%' operator) and - * one for the case when we're not - */ - if ( i % 2 == 1 ) - nset[numstates] = scset[(i / 2) + 1]; - else - nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); - - nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); - - if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) - { - numas = numas + nacc; - totnst = totnst + numstates; - - todo[todo_next] = ds; - ADD_QUEUE_ELEMENT(todo_next); - } - } - - if ( fulltbl ) - { - if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) - flexfatal( "could not create unique end-of-buffer state" ); - - numas += 1; - ++num_start_states; - - todo[todo_next] = end_of_buffer_state; - ADD_QUEUE_ELEMENT(todo_next); - } - - while ( todo_head != todo_next ) - { - targptr = 0; - totaltrans = 0; - - for ( i = 1; i <= numecs; ++i ) - state[i] = 0; - - ds = todo[todo_head]; - todo_head = NEXT_QUEUE_ELEMENT(todo_head); - - dset = dss[ds]; - dsize = dfasiz[ds]; - - if ( trace ) - fprintf( stderr, "state # %d:\n", ds ); - - sympartition( dset, dsize, symlist, duplist ); - - for ( sym = 1; sym <= numecs; ++sym ) - { - if ( symlist[sym] ) - { - symlist[sym] = 0; - - if ( duplist[sym] == NIL ) - { /* symbol has unique out-transitions */ - numstates = symfollowset( dset, dsize, sym, nset ); - nset = epsclosure( nset, &numstates, accset, - &nacc, &hashval ); - - if ( snstods( nset, numstates, accset, - nacc, hashval, &newds ) ) - { - totnst = totnst + numstates; - todo[todo_next] = newds; - ADD_QUEUE_ELEMENT(todo_next); - numas = numas + nacc; - } - - state[sym] = newds; - - if ( trace ) - fprintf( stderr, "\t%d\t%d\n", sym, newds ); - - targfreq[++targptr] = 1; - targstate[targptr] = newds; - ++numuniq; - } - - else - { - /* sym's equivalence class has the same transitions - * as duplist(sym)'s equivalence class - */ - targ = state[duplist[sym]]; - state[sym] = targ; - - if ( trace ) - fprintf( stderr, "\t%d\t%d\n", sym, targ ); - - /* update frequency count for destination state */ - - i = 0; - while ( targstate[++i] != targ ) - ; - - ++targfreq[i]; - ++numdup; - } - - ++totaltrans; - duplist[sym] = NIL; - } - } - - numsnpairs = numsnpairs + totaltrans; - - if ( caseins && ! useecs ) - { - register int j; - - for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) - state[i] = state[j]; - } - - if ( ds > num_start_states ) - check_for_backtracking( ds, state ); - - if ( fulltbl ) - { - /* supply array's 0-element */ - if ( ds == end_of_buffer_state ) - mk2data( -end_of_buffer_state ); - else - mk2data( end_of_buffer_state ); - - for ( i = 1; i <= numecs; ++i ) - /* jams are marked by negative of state number */ - mk2data( state[i] ? state[i] : -ds ); - - /* force ',' and dataflush() next call to mk2data */ - datapos = NUMDATAITEMS; - - /* force extra blank line next dataflush() */ - dataline = NUMDATALINES; - } - - else if ( fullspd ) - place_state( state, ds, totaltrans ); - - else - { - /* determine which destination state is the most common, and - * how many transitions to it there are - */ - - comfreq = 0; - comstate = 0; - - for ( i = 1; i <= targptr; ++i ) - if ( targfreq[i] > comfreq ) - { - comfreq = targfreq[i]; - comstate = targstate[i]; - } - - bldtbl( state, ds, totaltrans, comstate, comfreq ); - } - } - - if ( fulltbl ) - dataend(); - - else if ( ! fullspd ) - { - cmptmps(); /* create compressed template entries */ - - /* create tables for all the states with only one out-transition */ - while ( onesp > 0 ) - { - mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], - onedef[onesp] ); - --onesp; - } - - mkdeftbl(); - } - - } - - /* place_state - place a state into full speed transition table * * synopsis -- cgit v1.2.3 From 0ce161acfabcbc30268942fdf2b664c0667a9f7f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 19 May 1989 14:13:16 +0000 Subject: renamed accnum to num_rules --- yylex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yylex.c b/yylex.c index cebf756..9874c0e 100644 --- a/yylex.c +++ b/yylex.c @@ -65,7 +65,7 @@ int yylex() { if ( beglin ) { - fprintf( stderr, "%d\t", accnum + 1 ); + fprintf( stderr, "%d\t", num_rules + 1 ); beglin = 0; } -- cgit v1.2.3 From a8bd4c0dc7996a4bdf6ef13889d1ea6d9b8385ca Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 May 1989 00:32:10 +0000 Subject: Initial revision --- flex.skl | 443 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 443 insertions(+) create mode 100644 flex.skl diff --git a/flex.skl b/flex.skl new file mode 100644 index 0000000..5ac11d2 --- /dev/null +++ b/flex.skl @@ -0,0 +1,443 @@ +/* A lexical scanner generated by flex */ + +/* scanner skeleton version: + * $Header$ + */ + +#include + +#define FLEX_SCANNER + +/* returned upon end-of-file */ +#define YY_END_TOK 0 + +/* amount of stuff to slurp up with each read */ +#define YY_READ_BUF_SIZE 8192 + +#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of input buffer */ + +/* number of characters one rule can match. One less than YY_BUF_SIZE to make + * sure we never access beyond the end of an array + */ +#define YY_BUF_MAX (YY_BUF_SIZE - 1) + +/* copy whatever the last rule matched to the standard output */ + +#define ECHO fputs( yytext, yyout ) + +/* gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#define YY_INPUT(buf,result,max_size) \ + if ( (result = read( fileno(yyin), buf, max_size )) < 0 ) \ + YY_FATAL_ERROR( "read() in flex scanner failed" ); +#define YY_NULL 0 + +/* report a fatal error */ +#define YY_FATAL_ERROR(msg) \ + { \ + fputs( msg, stderr ); \ + putc( '\n', stderr ); \ + exit( 1 ); \ + } + +/* default yywrap function - always treat EOF as an EOF */ +#define yywrap() 1 + +/* enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way that old Unix-lex does it + */ +#define BEGIN yy_start = 1 + + +/* default declaration of generated scanner - a define so the user can + * easily add parameters + */ +#define YY_DECL int yylex() + +/* code executed at the end of each rule */ +#define YY_BREAK break; + +#define YY_END_OF_BUFFER_CHAR 0 + +/* done after the current pattern has been matched and before the + * corresponding action - sets up yytext + */ +#define YY_DO_BEFORE_ACTION \ + yytext = yy_bp; \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +/* returns the length of the matched text */ +#define yyleng (yy_cp - yy_bp) + +#define EOB_ACT_RESTART_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* return all but the first 'n' matched characters back to the input stream */ +#define yyless(n) \ + { \ + *yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \ + yy_c_buf_p = yy_cp = yy_bp + n; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } + +#define unput(c) yyunput( c, yy_bp ) + +#define YY_USER_ACTION + +%% section 1 code and the data tables for the DFA go here + +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; + +/* these variables are all declared out here so that section 3 code can + * manipulate them + */ +static char *yy_c_buf_p; /* points to current character in buffer */ +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* true when we've seen an EOF for the current input file */ +static int yy_eof_has_been_seen; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + +/* yy_ch_buf has to be 2 characters longer than YY_BUF_SIZE because we need + * to put in 2 end-of-buffer characters (this is explained where it is + * done) at the end of yy_ch_buf + */ +static char yy_ch_buf[YY_BUF_SIZE + 2]; + +/* yy_hold_char holds the character lost when yytext is formed */ +static char yy_hold_char; +char *yytext; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +static yy_state_type yy_get_previous_state(); +static int yy_get_next_buffer(); + +static yyunput(); +static input(); + +YY_DECL + { + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + +%% user's declarations go here + + if ( yy_init ) + { + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + +new_file: + /* this is where we enter upon encountering an end-of-file and + * yywrap() indicating that we should continue processing + */ + + /* we put in the '\n' and start reading from [1] so that an + * initial match-at-newline will be true. + */ + + yy_ch_buf[0] = '\n'; + yy_n_chars = 1; + + /* we always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yy_eof_has_been_seen = 0; + + yytext = yy_c_buf_p = &yy_ch_buf[1]; + yy_hold_char = *yy_c_buf_p; + yy_init = 0; + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* support of yytext */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of the + * current run. + */ + yy_bp = yy_cp; + +%% code to set up and find next match goes here + + /* bogus while loop to let YY_BACK_TRACK and EOB_ACT_LAST_MATCH + * actions branch here without introducing an optimizer-daunting + * goto + */ + while ( 1 ) + { +%% code to find the action number goes here + + YY_DO_BEFORE_ACTION; + YY_USER_ACTION; + +#ifdef FLEX_DEBUG + fprintf( stderr, "--accepting rule #%d (\"%s\")\n", + yy_act, yytext ); +#endif + switch ( yy_act ) + { +%% actions go here + + case YY_END_OF_BUFFER: + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + + yytext = yy_bp; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + { + /* note: because we've taken care in + * yy_get_next_buffer() to have set up yytext, + * we can now set up yy_c_buf_p so that if some + * total hoser (like flex itself) wants + * to call the scanner after we return the + * YY_NULL, it'll still work - another YY_NULL + * will get returned. + */ + yy_c_buf_p = yytext; + return ( YY_NULL ); + } + + else + goto new_file; + } + break; + + case EOB_ACT_RESTART_SCAN: + yy_c_buf_p = yytext; + yy_hold_char = *yy_c_buf_p; + break; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = &yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext; + continue; /* go to "YY_DO_BEFORE_ACTION" */ + } + break; + + default: + printf( "action # %d\n", yy_act ); + YY_FATAL_ERROR( "fatal flex scanner internal error" ); + } + + break; /* exit bogus while loop */ + } + } + } + + +/* yy_get_next_buffer - try to read in new buffer + * + * synopsis + * int yy_get_next_buffer(); + * + * returns a code representing an action + * EOB_ACT_LAST_MATCH - + * EOB_ACT_RESTART_SCAN - restart the scanner + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + + { + if ( yy_c_buf_p != &yy_ch_buf[yy_n_chars + 1] ) + { + YY_FATAL_ERROR( "NULL in input" ); + /*NOTREACHED*/ + } + + else + { /* try to read more data */ + register char *dest = yy_ch_buf; + register char *source = yytext - 1; /* copy prev. char, too */ + register int number_to_move, i; + int ret_val; + + /* first move last chars to start of buffer */ + number_to_move = yy_c_buf_p - yytext; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_eof_has_been_seen ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_n_chars = 0; + + else + /* read in more data */ + YY_INPUT( (&yy_ch_buf[number_to_move]), yy_n_chars, + YY_BUF_SIZE - number_to_move - 1 ); + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == 1 ) + ret_val = EOB_ACT_END_OF_FILE; + else + ret_val = EOB_ACT_LAST_MATCH; + + yy_eof_has_been_seen = 1; + } + + else + ret_val = EOB_ACT_RESTART_SCAN; + + yy_n_chars += number_to_move; + yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + /* yytext begins at the second character in + * yy_ch_buf; the first character is the one which + * preceded it before reading in the latest buffer; + * it needs to be kept around in case it's a + * newline, so yy_get_previous_state() will have + * with '^' rules active + */ + + yytext = &yy_ch_buf[1]; + + return ( ret_val ); + } + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached + * + * synopsis + * yy_state_type yy_get_previous_state(); + */ + +static yy_state_type yy_get_previous_state() + + { + register yy_state_type yy_current_state; + register char *yy_cp; + +%% code to get the start state into yy_current_state goes here + + for ( yy_cp = yytext; yy_cp < yy_c_buf_p; ++yy_cp ) + { +%% code to find the next state goes here + } + + return ( yy_current_state ); + } + + +static yyunput( c, yy_bp ) +int c; +register char *yy_bp; + + { + register char *yy_cp = yy_c_buf_p; + + *yy_cp = yy_hold_char; /* undo effects of setting up yytext */ + + if ( yy_cp < yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + register int number_to_move = yy_n_chars + 2; /* +2 for EOB chars */ + register char *dest = &yy_ch_buf[YY_BUF_SIZE + 2]; + register char *source = &yy_ch_buf[number_to_move]; + + while ( source > yy_ch_buf ) + *--dest = *--source; + + yy_cp += dest - source; + yy_bp += dest - source; + + if ( yy_cp < yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + if ( yy_cp > yy_bp && yy_cp[-1] == '\n' ) + yy_cp[-2] = '\n'; + + *--yy_cp = c; + + YY_DO_BEFORE_ACTION; /* set up yytext again */ + } + + +static int input() + + { + int c; + char *yy_cp = yy_c_buf_p; + + *yy_cp = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { /* need more input */ + yytext = yy_c_buf_p; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + /* this code, unfortunately, is somewhat redundant with + * that above + */ + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + { + yy_c_buf_p = yytext; + return ( EOF ); + } + + yy_ch_buf[0] = '\n'; + yy_n_chars = 1; + yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + yy_eof_has_been_seen = 0; + yytext = yy_c_buf_p = &yy_ch_buf[1]; + yy_hold_char = *yy_c_buf_p; + + return ( input() ); + } + break; + + case EOB_ACT_RESTART_SCAN: + yy_c_buf_p = yytext; + break; + + case EOB_ACT_LAST_MATCH: + YY_FATAL_ERROR( "unexpected last match in input()" ); + } + } + + c = *yy_c_buf_p; + yy_hold_char = *++yy_c_buf_p; + + return ( c ); + } -- cgit v1.2.3 From 071c8d64b6def6889e14c790753c9c165fe8224c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 May 1989 11:42:56 +0000 Subject: added BSD copyright notice --- ccl.c | 32 +++++++++++++++++++++++++------- dfa.c | 32 +++++++++++++++++++++++++------- misc.c | 34 ++++++++++++++++++++++++++-------- parse.y | 28 +++++++++++++++++++++++----- scan.l | 26 +++++++++++++++++++++----- sym.c | 32 +++++++++++++++++++++++++------- tblcmp.c | 32 +++++++++++++++++++++++++------- yylex.c | 34 ++++++++++++++++++++++++++-------- 8 files changed, 196 insertions(+), 54 deletions(-) diff --git a/ccl.c b/ccl.c index 7d1b552..37f3351 100644 --- a/ccl.c +++ b/ccl.c @@ -1,24 +1,42 @@ /* ccl - routines for character classes */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + /* ccladd - add a single character to a ccl * * synopsis diff --git a/dfa.c b/dfa.c index 796a8d3..e177797 100644 --- a/dfa.c +++ b/dfa.c @@ -1,24 +1,42 @@ /* dfa - DFA construction routines */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + /* check_for_backtracking - check a DFA state for backtracking * diff --git a/misc.c b/misc.c index 74934e4..5b36e57 100644 --- a/misc.c +++ b/misc.c @@ -1,25 +1,43 @@ /* misc - miscellaneous flex routines */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include +#include "flexdef.h" + char *malloc(), *realloc(); diff --git a/parse.y b/parse.y index 55cb3c5..a26c5d4 100644 --- a/parse.y +++ b/parse.y @@ -1,15 +1,27 @@ /* parse.y - parser for flex input */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL @@ -19,8 +31,14 @@ #include "flexdef.h" #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; diff --git a/scan.l b/scan.l index 4ac8ba1..4717cfa 100644 --- a/scan.l +++ b/scan.l @@ -1,15 +1,27 @@ /* scan.l - scanner for flex input */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ %{ @@ -17,6 +29,10 @@ #include "parse.h" #ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; #endif diff --git a/sym.c b/sym.c index 78111c7..34314f5 100644 --- a/sym.c +++ b/sym.c @@ -1,24 +1,42 @@ /* sym - symbol table routines */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; struct hash_entry *sctbl[START_COND_HASH_SIZE]; struct hash_entry *ccltab[CCL_HASH_SIZE]; diff --git a/tblcmp.c b/tblcmp.c index 320eec9..18d653f 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -1,24 +1,42 @@ /* tblcmp - table compression routines */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + /* bldtbl - build table entries for dfa state * * synopsis diff --git a/yylex.c b/yylex.c index 9874c0e..357aa07 100644 --- a/yylex.c +++ b/yylex.c @@ -1,25 +1,43 @@ /* yylex - scanner front-end for flex */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" -#include "parse.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" +#include "parse.h" + /* yylex - scan for a regular expression token * * synopsis -- cgit v1.2.3 From 43b812204a705a06d77086ab8c02f5b9a9917b32 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 May 1989 11:44:18 +0000 Subject: Added BSD copyright notice --- ecs.c | 32 +++++++++++++++++++++++++------- gen.c | 32 +++++++++++++++++++++++++------- nfa.c | 32 +++++++++++++++++++++++++------- 3 files changed, 75 insertions(+), 21 deletions(-) diff --git a/ecs.c b/ecs.c index 1a87d41..b9b84c2 100644 --- a/ecs.c +++ b/ecs.c @@ -1,24 +1,42 @@ /* ecs - equivalence class routines */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + /* ccl2ecl - convert character classes to set of equivalence classes * * synopsis diff --git a/gen.c b/gen.c index 52a42a6..fc81c84 100644 --- a/gen.c +++ b/gen.c @@ -1,24 +1,42 @@ /* gen - actual generation (writing) of flex scanners */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + static int indent_level = 0; /* each level is 4 spaces */ diff --git a/nfa.c b/nfa.c index 90d7471..d1c6f4e 100644 --- a/nfa.c +++ b/nfa.c @@ -1,24 +1,42 @@ /* nfa - NFA construction routines */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + /* add_accept - add an accepting state to a machine * * synopsis -- cgit v1.2.3 From fac491fba31fcf3292d026e389e75777433eabb8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 May 1989 11:44:55 +0000 Subject: added BSD copyright notice. Removed references to FAST_SKELETON_FILE. --- main.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/main.c b/main.c index ca1acb7..bd2a90a 100644 --- a/main.c +++ b/main.c @@ -1,26 +1,45 @@ /* flex - tool to generate fast lexical analyzers * * - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * */ -#include "flexdef.h" - #ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + static char rcsid[] = "@(#) $Header$ (LBL)"; + #endif +#include "flexdef.h" + + /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; @@ -429,11 +448,7 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ static char skeleton_name_storage[400]; skelname = skeleton_name_storage; - - if ( fullspd || fulltbl ) - (void) strcpy( skelname, FAST_SKELETON_FILE ); - else - (void) strcpy( skelname, DEFAULT_SKELETON_FILE ); + (void) strcpy( skelname, DEFAULT_SKELETON_FILE ); } if ( ! use_stdout ) -- cgit v1.2.3 From f96c672fbd4cbf28d155c4a39fb9c24e0f545736 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 May 1989 11:45:36 +0000 Subject: Added BSD copyright notice. Removed FAST_SKELETON_FILE. --- flexdef.h | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/flexdef.h b/flexdef.h index 0d9dab8..33a9d2e 100644 --- a/flexdef.h +++ b/flexdef.h @@ -1,16 +1,36 @@ /* flexdef - definitions file for flex */ /* - * Copyright (c) 1987, the University of California + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. - * - * This program may be redistributed. Enhancements and derivative works - * may be created provided the new works, if made available to the general - * public, are made available for use by anyone. - */ + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint + +static char copyright[] = + "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; + +#endif /* @(#) $Header$ (LBL) */ @@ -67,10 +87,6 @@ char *sprintf(); /* keep lint happy */ #define DEFAULT_SKELETON_FILE "flex.skel" #endif -#ifndef FAST_SKELETON_FILE -#define FAST_SKELETON_FILE "flex.fastskel" -#endif - /* special chk[] values marking the slots taking by end-of-buffer and action * numbers */ -- cgit v1.2.3 From dd1269ffd68198c596aec85bdd39ca77b916c99e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 May 1989 11:48:24 +0000 Subject: removed static char copyright --- flexdef.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/flexdef.h b/flexdef.h index 33a9d2e..379a2fa 100644 --- a/flexdef.h +++ b/flexdef.h @@ -24,14 +24,6 @@ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; - -#endif - /* @(#) $Header$ (LBL) */ #ifndef FILE -- cgit v1.2.3 From 9ff111c5b135a37c5107fcf8797f7e739d50e981 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 24 May 1989 12:48:13 +0000 Subject: updated for 2nd release Beta test added RCS header --- README | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/README b/README index 3c77332..f9f9f46 100644 --- a/README +++ b/README @@ -1,8 +1,10 @@ +// $Header$ + The flex distribution consists of the following files: README This message - Changes Differences between this release and the beta-test + Changes Differences between this release and the previous one Makefile flexdef.h @@ -11,6 +13,7 @@ The flex distribution consists of the following files: ccl.c dfa.c flex sources ecs.c + gen.c main.c misc.c nfa.c @@ -20,16 +23,10 @@ The flex distribution consists of the following files: scan.c.dist pre-flex'd version of scan.l - flex.skel - flex.fastskel - flexskelcom.h skeleton scanner sources - flexskeldef.h - fastskeldef.h + flex.skel skeleton for generated scanners flex.1 manual entry - Timings a brief note comparing timings of flex vs. lex - The files are packaged as a compressed shell archive, which in turn contains seven shell archives. Create a directory where you want flex to live, cd there, and use @@ -47,15 +44,12 @@ to live, cd there, and use to extract them. -Either move {flexskelcom.h,flexskeldef.h,fastskeldef.h} into /usr/include -or edit {flex.skel,flex.fastskel,flexskeldef.h,fastskeldef.h,scan.c.dist} -and wire in the full pathname of where you are going to keep the include files. +Decide where you want to keep flex.skel (suggestion: /usr/local/lib) and +move it there. Edit "Makefile" and change the definition of SKELETON_FILE +to reflect the full pathname of flex.skel. + +For a System V machine, add "-DSV" to CFLAGS in the Makefile. -Decide where you want to keep {flex.skel,flex.fastskel} (suggestion: -/usr/local/lib) and move it there. Edit "Makefile" and change the -definitions of SKELETON_FILE and F_SKELETON_FILE to reflect the full -pathnames of {flex.skel,flex.fastskel}. For a System V machine, add -"-DSV" to CFLAGS. To make flex for the first time, use: -- cgit v1.2.3 From e831ac409985484419486fabe3ca605b4646126f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 25 May 1989 11:46:34 +0000 Subject: Split copyright string into two to avoid tempting fate with \ sequences ... --- ccl.c | 4 ++-- dfa.c | 4 ++-- ecs.c | 4 ++-- gen.c | 4 ++-- main.c | 4 ++-- misc.c | 4 ++-- scan.l | 4 ++-- sym.c | 4 ++-- tblcmp.c | 4 ++-- yylex.c | 4 ++-- 10 files changed, 20 insertions(+), 20 deletions(-) diff --git a/ccl.c b/ccl.c index 37f3351..4f36363 100644 --- a/ccl.c +++ b/ccl.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/dfa.c b/dfa.c index e177797..5241e4d 100644 --- a/dfa.c +++ b/dfa.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/ecs.c b/ecs.c index b9b84c2..0808e36 100644 --- a/ecs.c +++ b/ecs.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/gen.c b/gen.c index fc81c84..dd88cb7 100644 --- a/gen.c +++ b/gen.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/main.c b/main.c index bd2a90a..063417d 100644 --- a/main.c +++ b/main.c @@ -28,8 +28,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/misc.c b/misc.c index 5b36e57..08ebf2d 100644 --- a/misc.c +++ b/misc.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/scan.l b/scan.l index 4717cfa..6c9f0c0 100644 --- a/scan.l +++ b/scan.l @@ -30,8 +30,8 @@ #ifndef lint static char copyright[] = -"@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/sym.c b/sym.c index 34314f5..3cccd8d 100644 --- a/sym.c +++ b/sym.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/tblcmp.c b/tblcmp.c index 18d653f..d13ef8e 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; diff --git a/yylex.c b/yylex.c index 357aa07..2e1cead 100644 --- a/yylex.c +++ b/yylex.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; -- cgit v1.2.3 From d528679da50e1c9fd33459b7d2a5a64de16c783f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 25 May 1989 11:48:20 +0000 Subject: Split copyright string. Added check for empty machine in dupmachine(). --- nfa.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/nfa.c b/nfa.c index d1c6f4e..d386037 100644 --- a/nfa.c +++ b/nfa.c @@ -27,8 +27,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; @@ -159,7 +159,9 @@ int dupmachine( mach ) int mach; { - int i, state, init, last = lastst[mach], state_offset; + int i, init, state_offset; + int state = 0; + int last = lastst[mach]; for ( i = firstst[mach]; i <= last; ++i ) { @@ -176,6 +178,9 @@ int mach; accptnum[state] = accptnum[i]; } + if ( state == 0 ) + flexfatal( "empty machine in dupmachine()" ); + state_offset = state - i + 1; init = mach + state_offset; -- cgit v1.2.3 From c29434b99e6c034103a0c240603037f9a73db0f8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 25 May 1989 11:48:41 +0000 Subject: Split copyright string. --- parse.y | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parse.y b/parse.y index a26c5d4..6f742dc 100644 --- a/parse.y +++ b/parse.y @@ -33,8 +33,8 @@ #ifndef lint static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ - All rights reserved.\n"; + "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; +static char CR_continuation[] = "@(#) All rights reserved.\n"; static char rcsid[] = "@(#) $Header$ (LBL)"; -- cgit v1.2.3 From 6dae978eaa528ef7da75df54e91b4744805025ca Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 25 May 1989 11:49:28 +0000 Subject: Cleaned up forward declarations of yyunput() and input() --- flex.skl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flex.skl b/flex.skl index 5ac11d2..1ccbdff 100644 --- a/flex.skl +++ b/flex.skl @@ -119,8 +119,8 @@ static char *yy_last_accepting_cpos; static yy_state_type yy_get_previous_state(); static int yy_get_next_buffer(); -static yyunput(); -static input(); +static void yyunput(); +static int input(); YY_DECL { @@ -355,7 +355,7 @@ static yy_state_type yy_get_previous_state() } -static yyunput( c, yy_bp ) +static void yyunput( c, yy_bp ) int c; register char *yy_bp; -- cgit v1.2.3 From 9512aa976d4805901305531bc0c45122139ca8d5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 25 May 1989 12:21:18 +0000 Subject: fixsed bug with -I and backtracking --- gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen.c b/gen.c index dd88cb7..87e7058 100644 --- a/gen.c +++ b/gen.c @@ -548,7 +548,7 @@ gen_next_match() else printf( "while ( yy_current_state != %d );\n", jamstate ); - if ( ! reject ) + if ( ! reject && ! interactive ) { /* do the guaranteed-needed backtrack to figure out the match */ indent_puts( "yy_cp = yy_last_accepting_cpos;" ); -- cgit v1.2.3 From 2aca1d0aba150eb989b84f6d0beffd87c02b618f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 15:37:39 +0000 Subject: *** empty log message *** --- README | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README b/README index f9f9f46..2e015d6 100644 --- a/README +++ b/README @@ -1,5 +1,7 @@ // $Header$ +This is release 2.0.1 of flex - a beta release. + The flex distribution consists of the following files: README This message @@ -11,8 +13,8 @@ The flex distribution consists of the following files: parse.y scan.l ccl.c - dfa.c flex sources - ecs.c + dfa.c + ecs.c flex sources gen.c main.c misc.c @@ -21,12 +23,16 @@ The flex distribution consists of the following files: tblcmp.c yylex.c - scan.c.dist pre-flex'd version of scan.l + initscan.c pre-flex'd version of scan.l flex.skel skeleton for generated scanners flex.1 manual entry + COPYING flex's copyright + MISC miscellaneous stuff (e.g., old VMS Makefile) which + almost no one will care about + The files are packaged as a compressed shell archive, which in turn contains seven shell archives. Create a directory where you want flex to live, cd there, and use @@ -48,7 +54,7 @@ Decide where you want to keep flex.skel (suggestion: /usr/local/lib) and move it there. Edit "Makefile" and change the definition of SKELETON_FILE to reflect the full pathname of flex.skel. -For a System V machine, add "-DSV" to CFLAGS in the Makefile. +For a System V machine, add "-DSYS_V" to CFLAGS in the Makefile. To make flex for the first time, use: -- cgit v1.2.3 From f7983859899c2c3e724ede01a406e99e0b1dee90 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 15:38:27 +0000 Subject: 2.0.1 beta --- flex.skl | 178 ++++++++++++++++++++++++++++++++++++++++---------------------- flexdef.h | 26 +++++++-- gen.c | 76 +++++++++++++++++++++++---- misc.c | 84 ++++++++++++++++------------- nfa.c | 2 +- parse.y | 70 ++++++++++++++++++++++-- scan.l | 26 +++++---- sym.c | 11 ++-- 8 files changed, 338 insertions(+), 135 deletions(-) diff --git a/flex.skl b/flex.skl index 1ccbdff..54e9ded 100644 --- a/flex.skl +++ b/flex.skl @@ -8,18 +8,17 @@ #define FLEX_SCANNER -/* returned upon end-of-file */ -#define YY_END_TOK 0 - /* amount of stuff to slurp up with each read */ +#ifndef YY_READ_BUF_SIZE #define YY_READ_BUF_SIZE 8192 +#endif +#ifndef YY_BUF_SIZE #define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of input buffer */ +#endif -/* number of characters one rule can match. One less than YY_BUF_SIZE to make - * sure we never access beyond the end of an array - */ -#define YY_BUF_MAX (YY_BUF_SIZE - 1) +/* returned upon end-of-file */ +#define YY_END_TOK 0 /* copy whatever the last rule matched to the standard output */ @@ -32,6 +31,7 @@ if ( (result = read( fileno(yyin), buf, max_size )) < 0 ) \ YY_FATAL_ERROR( "read() in flex scanner failed" ); #define YY_NULL 0 +#define yyterminate() return ( YY_NULL ) /* report a fatal error */ #define YY_FATAL_ERROR(msg) \ @@ -45,14 +45,25 @@ #define yywrap() 1 /* enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way that old Unix-lex does it + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN */ -#define BEGIN yy_start = 1 + +#define BEGIN yy_start = 1 + 2 * + +/* action number for EOF rule of a given start state */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* special action meaning "start processing a new file" */ +#define YY_NEW_FILE goto new_file /* default declaration of generated scanner - a define so the user can * easily add parameters */ +#ifdef __STDC__ +#define YY_DECL int yylex( void ) +#else #define YY_DECL int yylex() +#endif /* code executed at the end of each rule */ #define YY_BREAK break; @@ -87,9 +98,14 @@ #define YY_USER_ACTION -%% section 1 code and the data tables for the DFA go here - FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +char *yytext; + +#ifndef __STDC__ +#define const +#endif + +%% section 1 code and the data tables for the DFA go here /* these variables are all declared out here so that section 3 code can * manipulate them @@ -111,16 +127,23 @@ static char yy_ch_buf[YY_BUF_SIZE + 2]; /* yy_hold_char holds the character lost when yytext is formed */ static char yy_hold_char; -char *yytext; static yy_state_type yy_last_accepting_state; static char *yy_last_accepting_cpos; +#ifdef __STDC__ +static yy_state_type yy_get_previous_state( void ); +static int yy_get_next_buffer( void ); +static void yyunput( int c, char *buf_ptr ); +static int input( void ); +static void yyrestart( FILE *input_file ); +#else static yy_state_type yy_get_previous_state(); static int yy_get_next_buffer(); - static void yyunput(); static int input(); +static void yyrestart(); +#endif YY_DECL { @@ -177,13 +200,13 @@ new_file: /* yy_bp points to the position in yy_ch_buf of the start of the * current run. */ - yy_bp = yy_cp; +%% yymore()-related code goes here %% code to set up and find next match goes here - /* bogus while loop to let YY_BACK_TRACK and EOB_ACT_LAST_MATCH - * actions branch here without introducing an optimizer-daunting - * goto + /* bogus while loop to let YY_BACK_TRACK, EOB_ACT_LAST_MATCH, + * and EOF actions branch here without introducing an optimizer- + * daunting goto */ while ( 1 ) { @@ -196,6 +219,8 @@ new_file: fprintf( stderr, "--accepting rule #%d (\"%s\")\n", yy_act, yytext ); #endif + +do_action: /* this label is used only to access EOF actions */ switch ( yy_act ) { %% actions go here @@ -221,11 +246,13 @@ new_file: * will get returned. */ yy_c_buf_p = yytext; - return ( YY_NULL ); + + yy_act = YY_STATE_EOF((yy_start - 1) / 2); + goto do_action; } else - goto new_file; + YY_NEW_FILE; } break; @@ -270,65 +297,70 @@ new_file: static int yy_get_next_buffer() { + register char *dest = yy_ch_buf; + register char *source = yytext - 1; /* copy prev. char, too */ + register int number_to_move, i; + int ret_val; + if ( yy_c_buf_p != &yy_ch_buf[yy_n_chars + 1] ) { YY_FATAL_ERROR( "NULL in input" ); /*NOTREACHED*/ } - else - { /* try to read more data */ - register char *dest = yy_ch_buf; - register char *source = yytext - 1; /* copy prev. char, too */ - register int number_to_move, i; - int ret_val; - - /* first move last chars to start of buffer */ - number_to_move = yy_c_buf_p - yytext; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( yy_eof_has_been_seen ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - yy_n_chars = 0; + /* try to read more data */ - else - /* read in more data */ - YY_INPUT( (&yy_ch_buf[number_to_move]), yy_n_chars, - YY_BUF_SIZE - number_to_move - 1 ); + /* first move last chars to start of buffer */ + number_to_move = yy_c_buf_p - yytext; - if ( yy_n_chars == 0 ) - { - if ( number_to_move == 1 ) - ret_val = EOB_ACT_END_OF_FILE; - else - ret_val = EOB_ACT_LAST_MATCH; + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); - yy_eof_has_been_seen = 1; - } + if ( yy_eof_has_been_seen ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_n_chars = 0; - else - ret_val = EOB_ACT_RESTART_SCAN; + else + { + int num_to_read = YY_BUF_SIZE - number_to_move - 1; - yy_n_chars += number_to_move; - yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; - yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; - /* yytext begins at the second character in - * yy_ch_buf; the first character is the one which - * preceded it before reading in the latest buffer; - * it needs to be kept around in case it's a - * newline, so yy_get_previous_state() will have - * with '^' rules active - */ + /* read in more data */ + YY_INPUT( (&yy_ch_buf[number_to_move]), yy_n_chars, num_to_read ); + } - yytext = &yy_ch_buf[1]; + if ( yy_n_chars == 0 ) + { + if ( number_to_move == 1 ) + ret_val = EOB_ACT_END_OF_FILE; + else + ret_val = EOB_ACT_LAST_MATCH; - return ( ret_val ); + yy_eof_has_been_seen = 1; } + + else + ret_val = EOB_ACT_RESTART_SCAN; + + yy_n_chars += number_to_move; + yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + /* yytext begins at the second character in + * yy_ch_buf; the first character is the one which + * preceded it before reading in the latest buffer; + * it needs to be kept around in case it's a + * newline, so yy_get_previous_state() will have + * with '^' rules active + */ + + yytext = &yy_ch_buf[1]; + + return ( ret_val ); } @@ -355,9 +387,13 @@ static yy_state_type yy_get_previous_state() } +#ifdef __STDC__ +static void yyunput( int c, register char *yy_bp ) +#else static void yyunput( c, yy_bp ) int c; register char *yy_bp; +#endif { register char *yy_cp = yy_c_buf_p; @@ -441,3 +477,19 @@ static int input() return ( c ); } + + +#ifdef __STDC__ +static void yyrestart( FILE *input_file ) +#else +static void yyrestart( input_file ) +FILE *input_file; +#endif + + { + if ( yyin != stdin ) + fclose( yyin ); + + yyin = input_file; + yy_init = 1; + } diff --git a/flexdef.h b/flexdef.h index 379a2fa..85272fd 100644 --- a/flexdef.h +++ b/flexdef.h @@ -30,9 +30,16 @@ #include #endif -#ifdef SV +#ifdef SYS_V #include + +#ifdef AMIGA +#define bzero(s, n) setmem((char *)(s), (unsigned)(n), '\0') +#define abs(x) ((x) < 0 ? -(x) : (x)) +#else #define bzero(s, n) memset((char *)(s), '\0', (unsigned)(n)) +#endif + #ifndef VMS char *memset(); #else @@ -49,7 +56,7 @@ char *memset(); #endif #endif -#ifndef SV +#ifndef SYS_V #include #ifdef lint char *sprintf(); /* keep lint happy */ @@ -331,6 +338,8 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * reject - if true, generate backtracking tables for REJECT macro * real_reject - if true, scanner really uses REJECT (as opposed to just * having "reject" set for variable trailing context) + * continued_action - true if this rule's action is to "fall through" to + * the next rule's action (i.e., the '|' action) * yymore_really_used - has a REALLY_xxx value indicating whether a * %used or %notused was used with yymore() * reject_really_used - same for REJECT @@ -339,7 +348,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs; extern int fullspd, gen_line_dirs, performance_report, backtrack_report; -extern int yymore_used, reject, real_reject; +extern int yymore_used, reject, real_reject, continued_action; #define REALLY_NOT_DETERMINED 0 #define REALLY_USED 1 @@ -464,10 +473,13 @@ extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; * scset - set of rules active in start condition * scbol - set of rules active only at the beginning of line in a s.c. * scxclu - true if start condition is exclusive + * sceof - true if start condition has EOF rule + * scname - start condition name * actvsc - stack of active start conditions for the current rule */ -extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; +extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; +extern char **scname; /* variables for dfa machine data: @@ -566,6 +578,9 @@ char *allocate_array(), *reallocate_array(); #define allocate_int_ptr_array(size) \ (int **) allocate_array( size, sizeof( int * ) ) +#define allocate_char_ptr_array(size) \ + (char **) allocate_array( size, sizeof( char * ) ) + #define allocate_dfaacc_union(size) \ (union dfaacc_union *) \ allocate_array( size, sizeof( union dfaacc_union ) ) @@ -573,6 +588,9 @@ char *allocate_array(), *reallocate_array(); #define reallocate_int_ptr_array(array,size) \ (int **) reallocate_array( (char *) array, size, sizeof( int * ) ) +#define reallocate_char_ptr_array(array,size) \ + (char **) reallocate_array( (char *) array, size, sizeof( char * ) ) + #define reallocate_dfaacc_union(array, size) \ (union dfaacc_union *) reallocate_array( (char *) array, size, sizeof( union dfaacc_union ) ) diff --git a/gen.c b/gen.c index 87e7058..f39600d 100644 --- a/gen.c +++ b/gen.c @@ -132,7 +132,7 @@ genctbl() int end_of_buffer_action = num_rules + 1; /* table of verify for transition and offset to next state */ - printf( "static struct yy_trans_info yy_transition[%d] =\n", + printf( "static const struct yy_trans_info yy_transition[%d] =\n", tblend + numecs + 1 ); printf( " {\n" ); @@ -197,7 +197,7 @@ genctbl() printf( "\n" ); /* table of pointers to start states */ - printf( "static struct yy_trans_info *yy_start_state_list[%d] =\n", + printf( "static const struct yy_trans_info *yy_start_state_list[%d] =\n", lastsc * 2 + 1 ); printf( " {\n" ); @@ -217,7 +217,7 @@ genecs() { register int i, j; - static char C_char_decl[] = "static char %s[%d] =\n { 0,\n"; + static char C_char_decl[] = "static const char %s[%d] =\n { 0,\n"; int numrows; char clower(); @@ -390,7 +390,8 @@ genftbl() /* *everything* is done in terms of arrays starting at 1, so provide * a null entry for the zero element of all C arrays */ - static char C_short_decl[] = "static short int %s[%d] =\n { 0,\n"; + static char C_short_decl[] = + "static const short int %s[%d] =\n { 0,\n"; printf( C_short_decl, ALIST, lastdfa + 1 ); @@ -611,8 +612,11 @@ gen_start_state() } if ( reject ) + { /* set up for storing up states */ indent_puts( "yy_state_ptr = yy_state_buf;" ); + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } } } @@ -632,9 +636,12 @@ gentabs() /* *everything* is done in terms of arrays starting at 1, so provide * a null entry for the zero element of all C arrays */ - static char C_long_decl[] = "static long int %s[%d] =\n { 0,\n"; - static char C_short_decl[] = "static short int %s[%d] =\n { 0,\n"; - static char C_char_decl[] = "static char %s[%d] =\n { 0,\n"; + static char C_long_decl[] = + "static const long int %s[%d] =\n { 0,\n"; + static char C_short_decl[] = + "static const short int %s[%d] =\n { 0,\n"; + static char C_char_decl[] = + "static const char %s[%d] =\n { 0,\n"; acc_array = allocate_integer_array( current_max_dfas ); nummt = 0; @@ -684,7 +691,9 @@ gentabs() ++j; - if ( variable_trailing_context_rules && accnum > 0 && + if ( variable_trailing_context_rules && + ! (accnum & YY_TRAILING_HEAD_MASK) && + accnum > 0 && rule_type[accnum] == RULE_VARIABLE ) { /* special hack to flag accepting number as part @@ -895,6 +904,9 @@ char str[]; make_tables() { + register int i; + int did_eof_rule = false; + printf( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); if ( fullspd ) @@ -940,7 +952,7 @@ make_tables() if ( reject ) { /* declare state buffer variables */ - puts( "yy_trans_info yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); + puts( "yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); puts( "char *yy_full_match;" ); puts( "int yy_lp;" ); @@ -982,6 +994,15 @@ make_tables() puts( " */" ); puts( "#define REJECT reject_used_but_not_detected" ); } + + if ( yymore_used ) + { + indent_puts( "static char *yy_more_pos = (char *) 0;" ); + indent_puts( "#define yymore() (yy_more_pos = yy_bp)" ); + } + + else + indent_puts( "#define yymore() yymore_used_but_not_detected" ); skelout(); @@ -996,6 +1017,26 @@ make_tables() set_indent( 2 ); + if ( yymore_used ) + { + indent_puts( "if ( yy_more_pos )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_bp = yy_more_pos;" ); + indent_puts( "yy_more_pos = (char *) 0;" ); + indent_puts( "}" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + indent_puts( "yy_bp = yy_cp;" ); + indent_down(); + } + + else + indent_puts( "yy_bp = yy_cp;" ); + + skelout(); + gen_start_state(); gen_next_match(); @@ -1009,6 +1050,23 @@ make_tables() gen_bt_action(); action_out(); + /* generate cases for any missing EOF rules */ + for ( i = 1; i <= lastsc; ++i ) + if ( ! sceof[i] ) + { + do_indent(); + printf( "case YY_STATE_EOF(%s):\n", scname[i] ); + did_eof_rule = true; + } + + if ( did_eof_rule ) + { + indent_up(); + indent_puts( "yyterminate();" ); + indent_down(); + } + + /* generate code for yy_get_previous_state() */ set_indent( 1 ); skelout(); diff --git a/misc.c b/misc.c index 08ebf2d..004e267 100644 --- a/misc.c +++ b/misc.c @@ -289,11 +289,15 @@ dataflush() datapos = 0; } -/* gettime - return current time +/* flex_gettime - return current time * * synopsis - * char *gettime(), *time_str; - * time_str = gettime(); + * char *flex_gettime(), *time_str; + * time_str = flex_gettime(); + * + * note + * the routine name has the "flex_" prefix because of name clashes + * with Turbo-C */ /* include sys/types.h to use time_t and make lint happy */ @@ -311,7 +315,7 @@ dataflush() typedef long time_t; #endif -char *gettime() +char *flex_gettime() { time_t t, time(); @@ -377,6 +381,7 @@ char msg[]; { fprintf( stderr, "flex: %s\n", msg ); + flexend( 1 ); } @@ -506,48 +511,53 @@ char array[]; { switch ( array[1] ) { - case 'n': return ( '\n' ); - case 't': return ( '\t' ); + case 'a': return ( '\a' ); + case 'b': return ( '\b' ); case 'f': return ( '\f' ); + case 'n': return ( '\n' ); case 'r': return ( '\r' ); - case 'b': return ( '\b' ); + case 't': return ( '\t' ); + case 'v': return ( '\v' ); case '0': - if ( isdigit(array[2]) ) - { /* \0 */ - char c, esc_char; - register int sptr = 2; - - while ( isdigit(array[sptr]) ) - /* don't increment inside loop control because the - * macro will expand it to two increments! (Not a - * problem with the C version of the macro) - */ - ++sptr; - - c = array[sptr]; - array[sptr] = '\0'; - - esc_char = otoi( array + 2 ); - array[sptr] = c; - - if ( esc_char == '\0' ) - { - synerr( "escape sequence for null not allowed" ); - return ( 1 ); - } - - return ( esc_char ); - } - - else + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { /* \ */ + char c, esc_char; + register int sptr = 1; + + while ( isdigit(array[sptr]) ) + /* don't increment inside loop control because if + * isdigit() is a macro it will expand it to two + * increments ... + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = otoi( array + 1 ); + array[sptr] = c; + + if ( esc_char == '\0' ) { synerr( "escape sequence for null not allowed" ); return ( 1 ); } + + return ( esc_char ); + } + + default: + return ( array[1] ); } - - return ( array[1] ); } diff --git a/nfa.c b/nfa.c index d386037..53fdd01 100644 --- a/nfa.c +++ b/nfa.c @@ -216,7 +216,7 @@ int mach, variable_trail_rule, headcnt, trailcnt; /* we did this in new_rule(), but it often gets the wrong * number because we do it before we start parsing the current rule */ - rule_type[num_rules] = linenum; + rule_linenum[num_rules] = linenum; fprintf( temp_action_file, "case %d:\n", num_rules ); diff --git a/parse.y b/parse.y index 6f742dc..f11f738 100644 --- a/parse.y +++ b/parse.y @@ -24,7 +24,7 @@ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL +%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP %{ @@ -46,6 +46,7 @@ int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; char clower(); static int madeany = false; /* whether we've made the '.' character class */ +int previous_continued_action; /* whether the previous rule's action was '|' */ %} @@ -59,7 +60,7 @@ goal : initlex sect1 sect1end sect2 initforrule def_rule = mkstate( -pat ); - finish_rule( def_rule, variable_trail_rule, 0, 0 ); + finish_rule( def_rule, false, 0, 0 ); for ( i = 1; i <= lastsc; ++i ) scset[i] = mkbranch( scset[i], def_rule ); @@ -126,6 +127,7 @@ initforrule : trlcontxt = variable_trail_rule = varlength = false; trailcnt = headcnt = rulelen = 0; current_state_type = STATE_NORMAL; + previous_continued_action = continued_action; new_rule(); } ; @@ -196,6 +198,16 @@ flexrule : scon '^' re eol scset[i] = mkbranch( scset[i], pat ); } + | scon EOF_OP + { build_eof_action(); } + + | EOF_OP + { + /* this EOF applies only to the INITIAL start cond. */ + actvsc[actvp = 1] = 1; + build_eof_action(); + } + | error { synerr( "unrecognized rule" ); } ; @@ -206,7 +218,7 @@ scon : '<' namelist2 '>' namelist2 : namelist2 ',' NAME { if ( (scnum = sclookup( nmstr )) == 0 ) - synerr( "undeclared start condition" ); + lerrsf( "undeclared start condition %s", nmstr ); else actvsc[++actvp] = scnum; @@ -215,7 +227,7 @@ namelist2 : namelist2 ',' NAME | NAME { if ( (scnum = sclookup( nmstr )) == 0 ) - synerr( "undeclared start condition" ); + lerrsf( "undeclared start condition %s", nmstr ); else actvsc[actvp = 1] = scnum; } @@ -280,6 +292,28 @@ re : re '|' series mark_beginning_as_normal( $2 ); current_state_type = STATE_NORMAL; + if ( previous_continued_action ) + { + /* we need to treat this as variable trailing + * context so that the backup does not happen + * in the action but before the action switch + * statement. If the backup happens in the + * action, then the rules "falling into" this + * one's action will *also* do the backup, + * erroneously. + */ + if ( ! varlength || headcnt != 0 ) + { + if ( performance_report ) + fprintf( stderr, + "trailing context rule made variable because of preceding '|' action\n" ); + } + + /* mark as variable */ + varlength = true; + headcnt = 0; + } + if ( varlength && headcnt == 0 ) { /* variable trailing context rule */ /* mark the first part of the rule as the accepting @@ -558,6 +592,34 @@ string : string CHAR %% + +/* build_eof_action - build the "<>" action for the active start + * conditions + */ + +build_eof_action() + + { + register int i; + + for ( i = 1; i <= actvp; ++i ) + { + if ( sceof[actvsc[i]] ) + lerrsf( "multiple <> rules for start condition %s", + scname[actvsc[i]] ); + + else + { + sceof[actvsc[i]] = true; + fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n", + scname[actvsc[i]] ); + } + } + + line_directive_out( temp_action_file ); + } + + /* synerr - report a syntax error * * synopsis diff --git a/scan.l b/scan.l index 6c9f0c0..092c028 100644 --- a/scan.l +++ b/scan.l @@ -78,7 +78,7 @@ NOT_NAME [^a-z_\n]+ SCNAME {NAME} -ESCSEQ \\([^\n]|0[0-9]{1,3}) +ESCSEQ \\([^\n]|[0-9]{1,3}) %% static int bracelevel, didadef; @@ -107,7 +107,7 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) } ^"%used" checking_used = REALLY_USED; BEGIN(USED_LIST); -^"%not"{OPTWS}"used" checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); +^"%unused" checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); ^"%"[^sx]" ".*\n { @@ -181,15 +181,15 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) if ( all_upper( yytext ) ) reject_really_used = checking_used; else - synerr( "unrecognized %used/%notused construct" ); + synerr( "unrecognized %used/%unused construct" ); } "yymore" { if ( all_lower( yytext ) ) yymore_really_used = checking_used; else - synerr( "unrecognized %used/%notused construct" ); + synerr( "unrecognized %used/%unused construct" ); } -{NOT_WS}+ synerr( "unrecognized %used/%notused construct" ); +{NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); .*\n/{NOT_WS} { @@ -201,6 +201,8 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) .*\n ++linenum; ACTION_ECHO; +<> MARK_END_OF_PROLOG; yyterminate(); + ^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ /* this horrible mess of a rule matches indented lines which @@ -225,26 +227,22 @@ ESCSEQ \\([^\n]|0[0-9]{1,3}) BEGIN(PERCENT_BRACE_ACTION); return ( '\n' ); } -{WS}"|".*\n ++linenum; return ( '\n' ); +{WS}"|".*\n continued_action = true; ++linenum; return ( '\n' ); ^{OPTWS}"/*" ACTION_ECHO; BEGIN(C_COMMENT_2); -{WS} { /* needs to be separate from following rule due to - * bug with trailing context - */ - bracelevel = 0; - BEGIN(ACTION); - return ( '\n' ); - } - +{WS} | {OPTWS}/\n { bracelevel = 0; + continued_action = false; BEGIN(ACTION); return ( '\n' ); } ^{OPTWS}\n ++linenum; return ( '\n' ); +"<>" return ( EOF_OP ); + ^"%%".* { sectnum = 3; BEGIN(SECT3); diff --git a/sym.c b/sym.c index 3cccd8d..48aaee0 100644 --- a/sym.c +++ b/sym.c @@ -266,7 +266,7 @@ int xcluflg; */ if ( strcmp( str, "0" ) ) - printf( "#define %s %d\n", str, lastsc * 2 ); + printf( "#define %s %d\n", str, lastsc ); if ( ++lastsc >= current_max_scs ) { @@ -277,16 +277,21 @@ int xcluflg; scset = reallocate_integer_array( scset, current_max_scs ); scbol = reallocate_integer_array( scbol, current_max_scs ); scxclu = reallocate_integer_array( scxclu, current_max_scs ); + sceof = reallocate_integer_array( sceof, current_max_scs ); + scname = reallocate_char_ptr_array( scname, current_max_scs ); actvsc = reallocate_integer_array( actvsc, current_max_scs ); } - if ( addsym( copy_string( str ), (char *) 0, lastsc, - sctbl, START_COND_HASH_SIZE ) ) + scname[lastsc] = copy_string( str ); + + if ( addsym( scname[lastsc], (char *) 0, lastsc, + sctbl, START_COND_HASH_SIZE ) ) lerrsf( "start condition %s declared twice", str ); scset[lastsc] = mkstate( SYM_EPSILON ); scbol[lastsc] = mkstate( SYM_EPSILON ); scxclu[lastsc] = xcluflg; + sceof[lastsc] = false; } -- cgit v1.2.3 From 0d450ace2c484b7a70932e1423d2121a1d00f04f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 15:44:28 +0000 Subject: *** empty log message *** --- NEWS | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- main.c | 124 ++++++++++++++++++++++++++++++++++++++--------------------------- 2 files changed, 176 insertions(+), 69 deletions(-) diff --git a/NEWS b/NEWS index c382644..3915e75 100644 --- a/NEWS +++ b/NEWS @@ -1,17 +1,104 @@ -Changes between beta-test release of Feb. '88 and initial release: - - - many files renamed to remove "flex" prefix - - input() routine added to compressed and fast skeletons - - unput() routine added to compressed skeleton - - -d, -ce support for fast scanners - - symbol table extended to avoid ugly casts of ints <-> char *'s; - this may relieve MS-DOS woes - - actions are now separated with YY_BREAK instead of simple "break"'s - - fixed bug causing core-dumps if skeleton file could not be opened - - fixed bugs in logic deciding which options cannot be intermixed - - initial start condition can now be referred to as - - fixed bug which would incorrectly computer trailing context - count for a pattern like "(foo){3}"; now this is considered - "variable length", even though it isn't. - - block comments allowed between rules - - misc. typos corrected +Changes between beta-test release of June '88 and previous release: + + User-visible: + + - -p flag generates a performance report to stderr. The report + consists of comments regarding features of the scanner rules + which result in slower scanners. + + - -b flag generates backtracking information to lex.backtrack. + This is a list of scanner states which require backtracking + and the characters on which they do so. By adding rules + one can remove backtracking states. If all backtracking states + are eliminated, the generated scanner will run faster. + Backtracking is not yet documented in the manual entry. + + - Variable trailing context now works, i.e., one can have + rules like "(foo)*/[ \t]*bletch". Some trailing context + patterns still cannot be properly matched and generate + error messages. These are patterns where the ending of the + first part of the rule matches the beginning of the second + part, such as "zx*/xy*", where the 'x*' matches the 'x' at + the beginning of the trailing context. Lex won't get these + patterns right either. + + - Faster scanners. + + - End-of-file rules. The special rule "<>" indicates + actions which are to be taken when an end-of-file is + encountered and yywrap() returns non-zero (i.e., indicates + no further files to process). See manual entry for example. + + - The -r (reject used) flag is gone. flex now scans the input + for occurrences of the string "REJECT" to determine if the + action is needed. It tries to be intelligent about this but + can be fooled. One can force the presence or absence of + REJECT by adding a line in the first section of the form + "%used REJECT" or "%unused REJECT". + + - yymore() has been implemented. Similarly to REJECT, flex + detects the use of yymore(), which can be overridden using + "%used" or "%unused". + + - Patterns like "x{0,3}" now work (i.e., with lower-limit == 0). + + - Removed '\^x' for ctrl-x misfeature. + + - Added '\a' and '\v' escape sequences. + + - \ now works for octal escape sequences; previously + \0 was required. + + - Better error reporting; line numbers are associated with rules. + + - yyleng is a macro; it cannot be accessed outside of the + scanner source file. + + - yytext and yyleng should not be modified within a flex action. + + - Generated scanners #define the name FLEX_SCANNER. + + - Rules are internally separated by YY_BREAK in lex.yy.c rather + than break, to allow redefinition. + + - The macro YY_USER_ACTION can be redefined to provide an action + which is always executed prior to the matched rule's action. + + - yyrestart() is a new action which can be used to restart + the scanner after it has seen an end-of-file (a "real" one, + that is, one for which yywrap() returned non-zero). It takes + a FILE* argument indicating a new file to scan and sets + things up so that a subsequent call to yylex() will start + scanning that file. + + - Internal scanner names all preceded by "yy_" + + - lex.yy.c is deleted if errors are encountered during processing. + + - Comments may be put in the first section of the input by preceding + them with '#'. + + + + Other changes: + + - Some portability-related bugs fixed, in particular for machines + with unsigned characters or sizeof( int* ) != sizeof( int ). + Also, tweaks for VMS and Microsoft C (MS-DOS), and identifiers all + trimmed to be 31 or fewer characters. Shortened file names + for dinosaur OS's. Checks for allocating > 64K memory + on 16 bit'ers. Amiga tweaks. Compiles using gcc on a Sun-3. + + - Compressed and fast scanner skeletons merged. + + - Skeleton header files done away with. + + - Generated scanner uses prototypes and "const" for __STDC__. + + - -DSV flag is now -DSYS_V for System V compilation. + + - Removed all references to FTL language. + + - Software now covered by BSD Copyright. + + - flex will replace lex in subsequent BSD releases. diff --git a/main.c b/main.c index 063417d..14ccccf 100644 --- a/main.c +++ b/main.c @@ -39,12 +39,14 @@ static char rcsid[] = #include "flexdef.h" +static char flex_version[] = "2.0.1 (beta)"; + /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; int fullspd, gen_line_dirs, performance_report, backtrack_report; -int yymore_used, reject, real_reject; +int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; int datapos, dataline, linenum; FILE *skelfile = NULL; @@ -60,7 +62,8 @@ int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; int tecbck[CSIZE + 1]; -int lastsc, current_max_scs, *scset, *scbol, *scxclu, *actvsc; +int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; +char **scname; int current_max_dfa_size, current_max_xpairs; int current_max_template_xpairs, current_max_dfas; int lastdfa, *nxt, *chk, *tnxt; @@ -84,6 +87,13 @@ char action_file_name[] = "/tmp/flexXXXXXX"; char action_file_name[] = "flexXXXXXX.tmp"; #endif +#ifndef SHORT_FILE_NAMES +static char outfile[] = "lex.yy.c"; +#else +static char outfile[] = "lexyy.c"; +#endif +static int outfile_created = 0; + /* flex - main program * @@ -100,55 +110,59 @@ char **argv; readin(); - if ( ! syntaxerror ) - { - if ( yymore_really_used == REALLY_USED ) - yymore_used = true; - else if ( yymore_really_used == REALLY_NOT_USED ) - yymore_used = false; + if ( syntaxerror ) + flexend( 1 ); - if ( reject_really_used == REALLY_USED ) - reject = true; - else if ( reject_really_used == REALLY_NOT_USED ) - reject = false; + if ( yymore_really_used == REALLY_USED ) + yymore_used = true; + else if ( yymore_really_used == REALLY_NOT_USED ) + yymore_used = false; - if ( performance_report ) - { - if ( yymore_used ) - fprintf( stderr, - "yymore() entails a minor performance penalty\n" ); + if ( reject_really_used == REALLY_USED ) + reject = true; + else if ( reject_really_used == REALLY_NOT_USED ) + reject = false; - if ( reject ) - fprintf( stderr, - "REJECT entails a large performance penalty\n" ); + if ( performance_report ) + { + if ( yymore_used ) + fprintf( stderr, + "yymore() entails a minor performance penalty\n" ); - if ( variable_trailing_context_rules ) - fprintf( stderr, - "Variable trailing context rules entail a large performance penalty\n" ); - } + if ( interactive ) + fprintf( stderr, + "-I (interactive) entails a minor performance penalty\n" ); if ( reject ) - real_reject = true; + fprintf( stderr, + "REJECT entails a large performance penalty\n" ); if ( variable_trailing_context_rules ) - reject = true; + fprintf( stderr, +"Variable trailing context rules entail a large performance penalty\n" ); + } - if ( (fulltbl || fullspd) && reject ) - { - if ( real_reject ) - flexerror( "REJECT cannot be used with -f or -F" ); - else - flexerror( - "variable trailing context rules cannot be used with -f or -F" ); - } + if ( reject ) + real_reject = true; - /* convert the ndfa to a dfa */ - ntod(); + if ( variable_trailing_context_rules ) + reject = true; - /* generate the C state transition tables from the DFA */ - make_tables(); + if ( (fulltbl || fullspd) && reject ) + { + if ( real_reject ) + flexerror( "REJECT cannot be used with -f or -F" ); + else + flexerror( + "variable trailing context rules cannot be used with -f or -F" ); } + /* convert the ndfa to a dfa */ + ntod(); + + /* generate the C state transition tables from the DFA */ + make_tables(); + /* note, flexend does not return. It exits with its argument as status. */ flexend( 0 ); @@ -174,7 +188,7 @@ int status; { int tblsiz; - char *gettime(); + char *flex_gettime(); if ( skelfile != NULL ) (void) fclose( skelfile ); @@ -185,6 +199,12 @@ int status; (void) unlink( action_file_name ); } + if ( status != 0 && outfile_created ) + { + (void) fclose( stdout ); + (void) unlink( outfile ); + } + if ( backtrack_report ) { if ( num_backtracking == 0 ) @@ -201,9 +221,9 @@ int status; if ( printstats ) { - endtime = gettime(); + endtime = flex_gettime(); - fprintf( stderr, "flex usage statistics:\n" ); + fprintf( stderr, "flex version %s usage statistics:\n", flex_version ); fprintf( stderr, " started at %s, finished at %s\n", starttime, endtime ); @@ -251,12 +271,12 @@ int status; { tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; - fprintf( stderr, " %d/%d base/def entries created\n", + fprintf( stderr, " %d/%d base-def entries created\n", lastdfa + numtemps, current_max_dfas ); - fprintf( stderr, " %d/%d (peak %d) nxt/chk entries created\n", + fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n", tblend, current_max_xpairs, peakpairs ); fprintf( stderr, - " %d/%d (peak %d) template nxt/chk entries created\n", + " %d/%d (peak %d) template nxt-chk entries created\n", numtemps * nummecs, current_max_template_xpairs, numtemps * numecs ); fprintf( stderr, " %d empty table entries\n", nummt ); @@ -307,11 +327,11 @@ char **argv; { int i, sawcmpflag, use_stdout; - char *arg, *skelname = NULL, *gettime(), clower(), *mktemp(); + char *arg, *skelname = NULL, *flex_gettime(), clower(), *mktemp(); printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; backtrack_report = performance_report = ddebug = fulltbl = fullspd = false; - yymore_used = reject = false; + yymore_used = continued_action = reject = false; yymore_really_used = reject_really_used = false; gen_line_dirs = usemecs = useecs = true; @@ -453,14 +473,12 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ if ( ! use_stdout ) { -#ifndef SHORT_FILE_NAMES - FILE *prev_stdout = freopen( "lex.yy.c", "w", stdout ); -#else - FILE *prev_stdout = freopen( "lexyy.c", "w", stdout ); -#endif + FILE *prev_stdout = freopen( outfile, "w", stdout ); if ( prev_stdout == NULL ) flexerror( "could not create lex.yy.c" ); + + outfile_created = 1; } if ( argc ) @@ -497,7 +515,7 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ lastsc = 0; /* initialize the statistics */ - starttime = gettime(); + starttime = flex_gettime(); if ( (skelfile = fopen( skelname, "r" )) == NULL ) lerrsf( "can't open skeleton file %s", skelname ); @@ -611,6 +629,8 @@ set_up_initial_allocations() scset = allocate_integer_array( current_max_scs ); scbol = allocate_integer_array( current_max_scs ); scxclu = allocate_integer_array( current_max_scs ); + sceof = allocate_integer_array( current_max_scs ); + scname = allocate_char_ptr_array( current_max_scs ); actvsc = allocate_integer_array( current_max_scs ); current_maxccls = INITIAL_MAX_CCLS; -- cgit v1.2.3 From 26c1ee87854b28a3048a6604c33f22e32ca9c10b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 15:48:09 +0000 Subject: 2.1 beta release --- NEWS | 2 +- README | 2 +- main.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 3915e75..e62f1c5 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Changes between beta-test release of June '88 and previous release: +Changes between 2.1 beta-test release of June '89 and previous release: User-visible: diff --git a/README b/README index 2e015d6..c5ff3bc 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ // $Header$ -This is release 2.0.1 of flex - a beta release. +This is release 2.1 of flex - a beta release. The flex distribution consists of the following files: diff --git a/main.c b/main.c index 14ccccf..141f463 100644 --- a/main.c +++ b/main.c @@ -39,7 +39,7 @@ static char rcsid[] = #include "flexdef.h" -static char flex_version[] = "2.0.1 (beta)"; +static char flex_version[] = "2.1 (beta)"; /* these globals are all defined and commented in flexdef.h */ -- cgit v1.2.3 -- cgit v1.2.3 From 6fddaa10023b51c17de80a0818f3873b1527d383 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 16:02:09 +0000 Subject: *** empty log message *** --- NEWS | 2 +- README | 2 +- main.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e62f1c5..2c332d2 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Changes between 2.1 beta-test release of June '89 and previous release: +Changes between 2.0.1 beta-test release of June '89 and previous release: User-visible: diff --git a/README b/README index c5ff3bc..2e015d6 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ // $Header$ -This is release 2.1 of flex - a beta release. +This is release 2.0.1 of flex - a beta release. The flex distribution consists of the following files: diff --git a/main.c b/main.c index 141f463..14ccccf 100644 --- a/main.c +++ b/main.c @@ -39,7 +39,7 @@ static char rcsid[] = #include "flexdef.h" -static char flex_version[] = "2.1 (beta)"; +static char flex_version[] = "2.0.1 (beta)"; /* these globals are all defined and commented in flexdef.h */ -- cgit v1.2.3 From 15710e053d95d05c3363c9abb1b7018b8c15ae95 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 16:27:01 +0000 Subject: *** empty log message *** --- README | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/README b/README index 2e015d6..b21b275 100644 --- a/README +++ b/README @@ -6,8 +6,6 @@ The flex distribution consists of the following files: README This message - Changes Differences between this release and the previous one - Makefile flexdef.h parse.y @@ -29,39 +27,33 @@ The flex distribution consists of the following files: flex.1 manual entry + Changes Differences between this release and the previous one COPYING flex's copyright MISC miscellaneous stuff (e.g., old VMS Makefile) which almost no one will care about -The files are packaged as a compressed shell archive, which in turn -contains seven shell archives. Create a directory where you want flex -to live, cd there, and use - uncompress flex.shar.Z - sh flex.shar +If you have installed a previous version of flex, delete it (after making +backups, of course). This will entail removing the source directory, +/usr/include/{flexskelcom,fastskeldef,flexskeldef}.h, and +/usr/local/lib/flex.{skel,fastskel}, if that's where you put the various +pieces. - sh flex.shar.1 - sh flex.shar.2 - sh flex.shar.3 - sh flex.shar.4 - sh flex.shar.5 - sh flex.shar.6 - sh flex.shar.7 - -to extract them. Decide where you want to keep flex.skel (suggestion: /usr/local/lib) and move it there. Edit "Makefile" and change the definition of SKELETON_FILE to reflect the full pathname of flex.skel. -For a System V machine, add "-DSYS_V" to CFLAGS in the Makefile. +Read the "Porting considerations" note in the Makefile and make +the necessary changes. To make flex for the first time, use: make first_flex -which uses a pre-generated copy of the scanner whose source is in flex. +which uses the pre-generated copy of the flex scanner (the scanner +itself is written using flex). Assuming it builds successfully, you can test it using @@ -106,3 +98,22 @@ Please send problems and feedback to: Berkeley, CA 94720 (415) 486-6411 + + +I will be gone from mid-July '89 through mid-August '89. From August on, +the addresses are: + + vern@cs.cornell.edu (email sent to the former addresses should + continue to be forwarded for quite a while) + (if I'm unlucky, you'll have to send mail + to "paxson@cs.cornell.edu", so try that if + the first doesn't work) + + Vern Paxson + CS Department + Grad Office + 4126 Upson + Cornell University + Ithaca, NY 14853-7501 + + -- cgit v1.2.3 From 266f28d940f70213a065ec69d48152f3d4b70cd6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 16:36:26 +0000 Subject: *** empty log message *** --- NEWS | 2 +- README | 2 +- main.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 2c332d2..e62f1c5 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Changes between 2.0.1 beta-test release of June '89 and previous release: +Changes between 2.1 beta-test release of June '89 and previous release: User-visible: diff --git a/README b/README index b21b275..bba617a 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ // $Header$ -This is release 2.0.1 of flex - a beta release. +This is release 2.1 of flex - a beta release. The flex distribution consists of the following files: diff --git a/main.c b/main.c index 14ccccf..141f463 100644 --- a/main.c +++ b/main.c @@ -39,7 +39,7 @@ static char rcsid[] = #include "flexdef.h" -static char flex_version[] = "2.0.1 (beta)"; +static char flex_version[] = "2.1 (beta)"; /* these globals are all defined and commented in flexdef.h */ -- cgit v1.2.3 From 7551873b87b795799e3ff78a1ce18292e8b93c80 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 16:47:40 +0000 Subject: Initial revision --- COPYING | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 COPYING diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..aa50598 --- /dev/null +++ b/COPYING @@ -0,0 +1,32 @@ +Flex carries the copyright used for BSD software, slightly modified +because it originated at the Lawrence Berkeley (not Livermore!) Laboratory, +which operates under a contract with the Department of Energy: + + Copyright (c) 1989 The Regents of the University of California. + All rights reserved. + + This code is derived from software contributed to Berkeley by + Vern Paxson. + + The United States Government has rights in this work pursuant to + contract no. DE-AC03-76SF00098 between the United States Department of + Energy and the University of California. + + Redistribution and use in source and binary forms are permitted + provided that the above copyright notice and this paragraph are + duplicated in all such forms and that any documentation, + advertising materials, and other materials related to such + distribution and use acknowledge that the software was developed + by the University of California, Berkeley. The name of the + University may not be used to endorse or promote products derived + from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This basically says "do whatever you please with this software except +remove this notice". + +Note that the "flex.skel" scanner skeleton carries no copyright notice. +You are free to do whatever you please with scanners generated using flex; +for them, you are not even bound by the above copyright. -- cgit v1.2.3 From 4bcc9d241b6be0b94f7bad798958d2d116e48112 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 17:12:01 +0000 Subject: *** empty log message *** --- README | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README b/README index bba617a..da74cc0 100644 --- a/README +++ b/README @@ -83,7 +83,7 @@ and testing using: Format the manual entry using - nroff -man flex.1 + make flex.man Please send problems and feedback to: -- cgit v1.2.3 From c743c0226f01803af124a84ff4ff372ec359cb50 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 17:23:54 +0000 Subject: made trailing context combined with '|' warning always come out --- parse.y | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/parse.y b/parse.y index f11f738..1f24c3f 100644 --- a/parse.y +++ b/parse.y @@ -304,9 +304,11 @@ re : re '|' series */ if ( ! varlength || headcnt != 0 ) { - if ( performance_report ) - fprintf( stderr, - "trailing context rule made variable because of preceding '|' action\n" ); + fprintf( stderr, + "flex: warning - trailing context rule at line %d made variable because\n", + linenum ); + fprintf( stderr, + " of preceding '|' action\n" ); } /* mark as variable */ -- cgit v1.2.3 From fe13f320768a66f6fd732c10db88a519b94a7070 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Jun 1989 17:24:13 +0000 Subject: changed to not use '|' and trailing context combo so users can test using -F ... --- scan.l | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scan.l b/scan.l index 092c028..8c40174 100644 --- a/scan.l +++ b/scan.l @@ -231,7 +231,17 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) ^{OPTWS}"/*" ACTION_ECHO; BEGIN(C_COMMENT_2); -{WS} | +{WS} { + /* this rule is separate from the one below because + * otherwise we get variable trailing context, so + * we can't build the scanner using -{f,F} + */ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + return ( '\n' ); + } + {OPTWS}/\n { bracelevel = 0; continued_action = false; -- cgit v1.2.3 From ea27b1aec2f4187859fedd03a299dffc40148b8c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 30 Dec 1989 15:32:56 +0000 Subject: *** empty log message *** --- README | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README b/README index da74cc0..364345c 100644 --- a/README +++ b/README @@ -41,7 +41,7 @@ pieces. Decide where you want to keep flex.skel (suggestion: /usr/local/lib) and -move it there. Edit "Makefile" and change the definition of SKELETON_FILE +copy it there. Edit "Makefile" and change the definition of SKELETON_FILE to reflect the full pathname of flex.skel. Read the "Porting considerations" note in the Makefile and make @@ -105,9 +105,6 @@ the addresses are: vern@cs.cornell.edu (email sent to the former addresses should continue to be forwarded for quite a while) - (if I'm unlucky, you'll have to send mail - to "paxson@cs.cornell.edu", so try that if - the first doesn't work) Vern Paxson CS Department -- cgit v1.2.3 From 5cb4a367022f71df3e2cf589094f5475b644141d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 30 Dec 1989 15:33:47 +0000 Subject: unput() bug fix --- flex.skl | 1 + 1 file changed, 1 insertion(+) diff --git a/flex.skl b/flex.skl index 54e9ded..32d118b 100644 --- a/flex.skl +++ b/flex.skl @@ -411,6 +411,7 @@ register char *yy_bp; yy_cp += dest - source; yy_bp += dest - source; + yy_n_chars = YY_BUF_SIZE; if ( yy_cp < yy_ch_buf + 2 ) YY_FATAL_ERROR( "flex scanner push-back overflow" ); -- cgit v1.2.3 From 6659ba1da7aa9dbb96f8a0c26851e4baceeeadf2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 30 Dec 1989 15:34:28 +0000 Subject: made -c case-sensitive --- main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.c b/main.c index 141f463..45c4d04 100644 --- a/main.c +++ b/main.c @@ -327,7 +327,7 @@ char **argv; { int i, sawcmpflag, use_stdout; - char *arg, *skelname = NULL, *flex_gettime(), clower(), *mktemp(); + char *arg, *skelname = NULL, *flex_gettime(), *mktemp(); printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; backtrack_report = performance_report = ddebug = fulltbl = fullspd = false; @@ -366,7 +366,7 @@ char **argv; } for ( ++i; arg[i] != '\0'; ++i ) - switch ( clower( arg[i] ) ) + switch ( arg[i] ) { case 'e': useecs = true; -- cgit v1.2.3 From 7fa99930c8bdb239aa849e29618d8b628e366dab Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 30 Dec 1989 15:34:53 +0000 Subject: removed gratuitous trailing context code --- nfa.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/nfa.c b/nfa.c index 53fdd01..99bb1e5 100644 --- a/nfa.c +++ b/nfa.c @@ -249,10 +249,6 @@ int mach, variable_trail_rule, headcnt, trailcnt; if ( headcnt > 0 ) fprintf( temp_action_file, "%s = %s + %d;\n", scanner_cp, scanner_bp, headcnt ); - - else - fprintf( temp_action_file, "%s = %s;\n", - scanner_cp, scanner_bp ); } else -- cgit v1.2.3 From b035468920efdb5cc8c5f0fbd42308d18f6ac99c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 10 Jan 1990 15:10:22 +0000 Subject: Initial revision --- libmain.c | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 libmain.c diff --git a/libmain.c b/libmain.c new file mode 100644 index 0000000..31adc8c --- /dev/null +++ b/libmain.c @@ -0,0 +1,9 @@ +/* libmain - flex run-time support library "main" function */ + +/* $Header$ */ + +main() + + { + yylex(); + } -- cgit v1.2.3 From 72aeeaa908d4da96aa5968133bde1d8e4dfdb574 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:37:45 +0000 Subject: Changes for unsigned/8-bit chars. 2.2 Release. --- ccl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ccl.c b/ccl.c index 4f36363..a051b94 100644 --- a/ccl.c +++ b/ccl.c @@ -41,13 +41,13 @@ static char rcsid[] = * * synopsis * int cclp; - * char ch; + * int ch; * ccladd( cclp, ch ); */ ccladd( cclp, ch ) int cclp; -char ch; +int ch; { int ind, len, newpos, i; @@ -154,7 +154,7 @@ int cset[]; putc( '[', file ); - for ( i = 1; i <= CSIZE; ++i ) + for ( i = 1; i <= csize; ++i ) { if ( cset[i] ) { @@ -164,7 +164,7 @@ int cset[]; fputs( readable_form( i ), file ); - while ( ++i <= CSIZE && cset[i] ) + while ( ++i <= csize && cset[i] ) ; if ( i - 1 > start_char ) -- cgit v1.2.3 From e02a85b01cd31a241e9cb192ad982d9ace0d396a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:40:45 +0000 Subject: %t hacks. minor cosmetics. 2.2 Relase. --- dfa.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/dfa.c b/dfa.c index 5241e4d..f1be016 100644 --- a/dfa.c +++ b/dfa.c @@ -127,8 +127,8 @@ register int nacc; if ( accset[j] & YY_TRAILING_HEAD_MASK ) { fprintf( stderr, - "flex: Dangerous trailing context in rule at line %d\n", - rule_linenum[ar] ); + "%s: Dangerous trailing context in rule at line %d\n", + program_name, rule_linenum[ar] ); return; } } @@ -211,13 +211,9 @@ int state[]; register int i, ec; int out_char_set[CSIZE + 1]; - for ( i = 1; i <= CSIZE; ++i ) + for ( i = 1; i <= csize; ++i ) { - ec = ecgroup[i]; - - if ( ec < 0 ) - ec = -ec; - + ec = abs( ecgroup[i] ); out_char_set[i] = state[ec]; } @@ -226,7 +222,7 @@ int state[]; list_character_set( file, out_char_set ); /* now invert the members of the set to get the jam transitions */ - for ( i = 1; i <= CSIZE; ++i ) + for ( i = 1; i <= csize; ++i ) out_char_set[i] = ! out_char_set[i]; fprintf( file, "\n jam-transitions: EOF " ); @@ -435,7 +431,7 @@ ntod() */ todo_head = todo_next = 0; - for ( i = 0; i <= CSIZE; ++i ) + for ( i = 0; i <= csize; ++i ) { duplist[i] = NIL; symlist[i] = false; @@ -464,7 +460,7 @@ ntod() /* declare it "short" because it's a real long-shot that that * won't be large enough */ - printf( "static short int %s[][%d] =\n {\n", NEXTARRAY, + printf( "static short int yy_nxt[][%d] =\n {\n", numecs + 1 ); /* '}' so vi doesn't get too confused */ /* generate 0 entries for state #0 */ @@ -875,7 +871,7 @@ int ds[], dsize, transsym, nset[]; { /* do nothing */ } - else if ( ecgroup[sym] == transsym ) + else if ( abs( ecgroup[sym] ) == transsym ) nset[++numstates] = tsp; bottom: @@ -922,13 +918,16 @@ int symlist[]; if ( tch != SYM_EPSILON ) { - if ( tch < -lastccl || tch > CSIZE ) + if ( tch < -lastccl || tch > csize ) flexfatal( "bad transition character detected in sympartition()" ); if ( tch > 0 ) { /* character transition */ - mkechar( ecgroup[tch], dupfwd, duplist ); - symlist[ecgroup[tch]] = 1; + /* abs() needed for fake %t ec's */ + int ec = abs( ecgroup[tch] ); + + mkechar( ec, dupfwd, duplist ); + symlist[ec] = 1; } else -- cgit v1.2.3 From 89285343f4b5a802770ccca224609a293dc1891c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:41:31 +0000 Subject: %t support. 8-bit/unsigned char support. 2.2 Release. --- ecs.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/ecs.c b/ecs.c index 0808e36..d5ac84b 100644 --- a/ecs.c +++ b/ecs.c @@ -113,10 +113,34 @@ int fwd[], bck[], num; } +/* ecs_from_xlation - associate equivalence class numbers using %t table + * + * synopsis + * ecs_from_xlation( ecmap ); + * + * Upon return, ecmap will map each character code to its equivalence + * class. The mapping will be positive if the character is the representative + * of its class, negative otherwise. + */ + +ecs_from_xlation( ecmap ) +int ecmap[]; + + { + int i; + + for ( i = 1; i <= csize; ++i ) + if ( xlation[i] == 0 ) + ecmap[i] = num_xlations + 1; + else + ecmap[i] = xlation[i]; + } + + /* mkeccl - update equivalence classes based on character class xtions * * synopsis - * char ccls[]; + * Char ccls[]; * int lenccl, fwd[llsiz], bck[llsiz], llsiz; * mkeccl( ccls, lenccl, fwd, bck, llsiz ); * @@ -126,14 +150,13 @@ int fwd[], bck[], num; */ mkeccl( ccls, lenccl, fwd, bck, llsiz ) -char ccls[]; +Char ccls[]; int lenccl, fwd[], bck[], llsiz; { int cclp, oldec, newec; int cclm, i, j; - -#define PROCFLG 0x80 + static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ /* note that it doesn't matter whether or not the character class is * negated. The same results will be obtained in either case. @@ -151,7 +174,7 @@ int lenccl, fwd[], bck[], llsiz; for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) { /* look for the symbol in the character class */ - for ( ; j < lenccl && (ccls[j] <= i || (ccls[j] & PROCFLG)); ++j ) + for ( ; j < lenccl && (ccls[j] <= i || cclflags[j]); ++j ) if ( ccls[j] == i ) { /* we found an old companion of cclm in the ccl. @@ -162,7 +185,7 @@ int lenccl, fwd[], bck[], llsiz; bck[i] = newec; fwd[newec] = i; newec = i; - ccls[j] |= PROCFLG; /* set flag so we don't reprocess */ + cclflags[j] = 1; /* set flag so we don't reprocess */ /* get next equivalence class member */ /* continue 2 */ @@ -193,10 +216,10 @@ next_pt: /* find next ccl member to process */ - for ( ++cclp; (ccls[cclp] & PROCFLG) && cclp < lenccl; ++cclp ) + for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) { /* reset "doesn't need processing" flag */ - ccls[cclp] &= ~PROCFLG; + cclflags[cclp] = 0; } } } -- cgit v1.2.3 From 3b0f33ba0614bdc0c4606fb331117424bd8cb9a6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:44:53 +0000 Subject: C++ support. Turbo-C support. 8-bit char support. yyleng is an int. unput() callable in section 3. yymore hacks. yyrestart() no longer closes stdin. 2.2 Release. --- flex.skl | 122 +++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 76 insertions(+), 46 deletions(-) diff --git a/flex.skl b/flex.skl index 32d118b..8b189a5 100644 --- a/flex.skl +++ b/flex.skl @@ -4,9 +4,36 @@ * $Header$ */ +#define FLEX_SCANNER + #include -#define FLEX_SCANNER + +#ifdef __cplusplus +#include +#include + +/* use prototypes in function declarations */ +#define YY_USE_PROTOS + +/* the "const" storage-class-modifier is valid */ +#define YY_USE_CONST + +#endif + +#ifdef __STDC__ +#define YY_USE_PROTOS +#define YY_USE_CONST +#endif + +#ifdef __TURBOC__ +#define YY_USE_CONST +#endif + + +#ifndef YY_USE_CONST +#define const +#endif /* amount of stuff to slurp up with each read */ #ifndef YY_READ_BUF_SIZE @@ -22,13 +49,14 @@ /* copy whatever the last rule matched to the standard output */ -#define ECHO fputs( yytext, yyout ) +/* cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */ +#define ECHO fputs( (char *) yytext, yyout ) /* gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". */ #define YY_INPUT(buf,result,max_size) \ - if ( (result = read( fileno(yyin), buf, max_size )) < 0 ) \ + if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ YY_FATAL_ERROR( "read() in flex scanner failed" ); #define YY_NULL 0 #define yyterminate() return ( YY_NULL ) @@ -59,7 +87,7 @@ /* default declaration of generated scanner - a define so the user can * easily add parameters */ -#ifdef __STDC__ +#ifdef YY_USE_PROTOS #define YY_DECL int yylex( void ) #else #define YY_DECL int yylex() @@ -70,18 +98,18 @@ #define YY_END_OF_BUFFER_CHAR 0 +%% section 1 definitions go here + /* done after the current pattern has been matched and before the * corresponding action - sets up yytext */ #define YY_DO_BEFORE_ACTION \ yytext = yy_bp; \ +%% code to fiddle yytext and yyleng for yymore() goes here yy_hold_char = *yy_cp; \ *yy_cp = '\0'; \ yy_c_buf_p = yy_cp; -/* returns the length of the matched text */ -#define yyleng (yy_cp - yy_bp) - #define EOB_ACT_RESTART_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 @@ -94,23 +122,24 @@ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ } -#define unput(c) yyunput( c, yy_bp ) +#define unput(c) yyunput( c, yytext ) #define YY_USER_ACTION -FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; -char *yytext; +extern YY_CHAR *yytext; +extern int yyleng; -#ifndef __STDC__ -#define const -#endif +YY_CHAR *yytext; +int yyleng; -%% section 1 code and the data tables for the DFA go here +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; + +%% data tables for the DFA go here /* these variables are all declared out here so that section 3 code can * manipulate them */ -static char *yy_c_buf_p; /* points to current character in buffer */ +static YY_CHAR *yy_c_buf_p; /* points to current character in buffer */ static int yy_init = 1; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ @@ -123,32 +152,32 @@ static int yy_n_chars; /* number of characters read into yy_ch_buf */ * to put in 2 end-of-buffer characters (this is explained where it is * done) at the end of yy_ch_buf */ -static char yy_ch_buf[YY_BUF_SIZE + 2]; +static YY_CHAR yy_ch_buf[YY_BUF_SIZE + 2]; /* yy_hold_char holds the character lost when yytext is formed */ -static char yy_hold_char; +static YY_CHAR yy_hold_char; static yy_state_type yy_last_accepting_state; -static char *yy_last_accepting_cpos; +static YY_CHAR *yy_last_accepting_cpos; -#ifdef __STDC__ +#ifdef YY_USE_PROTOS static yy_state_type yy_get_previous_state( void ); static int yy_get_next_buffer( void ); -static void yyunput( int c, char *buf_ptr ); +static void yyunput( int c, YY_CHAR *buf_ptr ); static int input( void ); -static void yyrestart( FILE *input_file ); +void yyrestart( FILE *input_file ); #else static yy_state_type yy_get_previous_state(); static int yy_get_next_buffer(); static void yyunput(); static int input(); -static void yyrestart(); +void yyrestart(); #endif YY_DECL { register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; + register YY_CHAR *yy_cp, *yy_bp; register int yy_act; %% user's declarations go here @@ -192,6 +221,7 @@ new_file: while ( 1 ) /* loops until end-of-file is reached */ { +%% yymore()-related code goes here yy_cp = yy_c_buf_p; /* support of yytext */ @@ -200,7 +230,7 @@ new_file: /* yy_bp points to the position in yy_ch_buf of the start of the * current run. */ -%% yymore()-related code goes here + yy_bp = yy_cp; %% code to set up and find next match goes here @@ -229,8 +259,6 @@ do_action: /* this label is used only to access EOF actions */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = yy_hold_char; - yytext = yy_bp; - switch ( yy_get_next_buffer() ) { case EOB_ACT_END_OF_FILE: @@ -257,7 +285,7 @@ do_action: /* this label is used only to access EOF actions */ break; case EOB_ACT_RESTART_SCAN: - yy_c_buf_p = yytext; + yy_c_buf_p = yytext + YY_MORE_ADJ; yy_hold_char = *yy_c_buf_p; break; @@ -267,13 +295,15 @@ do_action: /* this label is used only to access EOF actions */ yy_current_state = yy_get_previous_state(); yy_cp = yy_c_buf_p; - yy_bp = yytext; + yy_bp = yytext + YY_MORE_ADJ; continue; /* go to "YY_DO_BEFORE_ACTION" */ } break; default: +#ifdef FLEX_DEBUG printf( "action # %d\n", yy_act ); +#endif YY_FATAL_ERROR( "fatal flex scanner internal error" ); } @@ -297,8 +327,8 @@ do_action: /* this label is used only to access EOF actions */ static int yy_get_next_buffer() { - register char *dest = yy_ch_buf; - register char *source = yytext - 1; /* copy prev. char, too */ + register YY_CHAR *dest = yy_ch_buf; + register YY_CHAR *source = yytext - 1; /* copy prev. char, too */ register int number_to_move, i; int ret_val; @@ -374,11 +404,11 @@ static yy_state_type yy_get_previous_state() { register yy_state_type yy_current_state; - register char *yy_cp; + register YY_CHAR *yy_cp; %% code to get the start state into yy_current_state goes here - for ( yy_cp = yytext; yy_cp < yy_c_buf_p; ++yy_cp ) + for ( yy_cp = yytext + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) { %% code to find the next state goes here } @@ -387,24 +417,24 @@ static yy_state_type yy_get_previous_state() } -#ifdef __STDC__ -static void yyunput( int c, register char *yy_bp ) +#ifdef YY_USE_PROTOS +static void yyunput( int c, register YY_CHAR *yy_bp ) #else static void yyunput( c, yy_bp ) int c; -register char *yy_bp; +register YY_CHAR *yy_bp; #endif { - register char *yy_cp = yy_c_buf_p; + register YY_CHAR *yy_cp = yy_c_buf_p; *yy_cp = yy_hold_char; /* undo effects of setting up yytext */ if ( yy_cp < yy_ch_buf + 2 ) { /* need to shift things up to make room */ register int number_to_move = yy_n_chars + 2; /* +2 for EOB chars */ - register char *dest = &yy_ch_buf[YY_BUF_SIZE + 2]; - register char *source = &yy_ch_buf[number_to_move]; + register YY_CHAR *dest = &yy_ch_buf[YY_BUF_SIZE + 2]; + register YY_CHAR *source = &yy_ch_buf[number_to_move]; while ( source > yy_ch_buf ) *--dest = *--source; @@ -422,6 +452,9 @@ register char *yy_bp; *--yy_cp = c; + /* note: the formal parameter *must* be called "yy_bp" for this + * macro to now work correctly + */ YY_DO_BEFORE_ACTION; /* set up yytext again */ } @@ -430,7 +463,7 @@ static int input() { int c; - char *yy_cp = yy_c_buf_p; + YY_CHAR *yy_cp = yy_c_buf_p; *yy_cp = yy_hold_char; @@ -465,7 +498,7 @@ static int input() break; case EOB_ACT_RESTART_SCAN: - yy_c_buf_p = yytext; + yy_c_buf_p = yytext + YY_MORE_ADJ; break; case EOB_ACT_LAST_MATCH: @@ -480,17 +513,14 @@ static int input() } -#ifdef __STDC__ -static void yyrestart( FILE *input_file ) +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) #else -static void yyrestart( input_file ) +void yyrestart( input_file ) FILE *input_file; #endif { - if ( yyin != stdin ) - fclose( yyin ); - yyin = input_file; yy_init = 1; } -- cgit v1.2.3 From 8003f580309d7599d3fc9471bbb5fec120a5dae7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:47:34 +0000 Subject: 8-bit char support. 2.2 Release. --- tblcmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tblcmp.c b/tblcmp.c index d13ef8e..840736d 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -727,7 +727,7 @@ int state[], statenum, comstate; { int i, numdiff, tmpbase, tmp[CSIZE + 1]; - char transset[CSIZE + 1]; + Char transset[CSIZE + 1]; int tsptr; ++numtemps; -- cgit v1.2.3 From 0485773898462ac4ce65ce4487617c106bdae396 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:48:26 +0000 Subject: Unsigned char support. \x support. 2.2 Release. --- misc.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 29 deletions(-) diff --git a/misc.c b/misc.c index 004e267..e851344 100644 --- a/misc.c +++ b/misc.c @@ -89,18 +89,18 @@ int size, element_size; /* all_lower - true if a string is all lower-case * * synopsis: - * char *str; + * Char *str; * int all_lower(); * true/false = all_lower( str ); */ int all_lower( str ) -register char *str; +register Char *str; { while ( *str ) { - if ( ! islower( *str ) ) + if ( ! isascii( *str ) || ! islower( *str ) ) return ( 0 ); ++str; } @@ -112,18 +112,18 @@ register char *str; /* all_upper - true if a string is all upper-case * * synopsis: - * char *str; + * Char *str; * int all_upper(); * true/false = all_upper( str ); */ int all_upper( str ) -register char *str; +register Char *str; { while ( *str ) { - if ( ! isupper( *str ) ) + if ( ! isascii( *str ) || ! isupper( (char) *str ) ) return ( 0 ); ++str; } @@ -166,15 +166,16 @@ int v[], n; /* clower - replace upper-case letter to lower-case * * synopsis: - * char clower(), c; + * Char clower(); + * int c; * c = clower( c ); */ -char clower( c ) -register char c; +Char clower( c ) +register int c; { - return ( isupper(c) ? tolower(c) : c ); + return ( (isascii( c ) && isupper( c )) ? tolower( c ) : c ); } @@ -208,11 +209,42 @@ register char *str; } +/* copy_unsigned_string - + * returns a dynamically allocated copy of a (potentially) unsigned string + * + * synopsis + * Char *str, *copy, *copy_unsigned_string(); + * copy = copy_unsigned_string( str ); + */ + +Char *copy_unsigned_string( str ) +register Char *str; + + { + register Char *c; + Char *copy; + + /* find length */ + for ( c = str; *c; ++c ) + ; + + copy = (Char *) malloc( (unsigned) ((c - str + 1) * sizeof( Char )) ); + + if ( copy == NULL ) + flexfatal( "dynamic memory failure in copy_unsigned_string()" ); + + for ( c = copy; (*c++ = *str++); ) + ; + + return ( copy ); + } + + /* cshell - shell sort a character array in increasing order * * synopsis * - * char v[n]; + * Char v[n]; * int n; * cshell( v, n ); * @@ -224,12 +256,12 @@ register char *str; * n - number of elements of v to be sorted */ cshell( v, n ) -char v[]; +Char v[]; int n; { int gap, i, j, jg; - char k; + Char k; for ( gap = n / 2; gap > 0; gap = gap / 2 ) for ( i = gap; i < n; ++i ) @@ -262,6 +294,7 @@ dataend() puts( " } ;\n" ); dataline = 0; + datapos = 0; } @@ -380,7 +413,7 @@ flexerror( msg ) char msg[]; { - fprintf( stderr, "flex: %s\n", msg ); + fprintf( stderr, "%s: %s\n", program_name, msg ); flexend( 1 ); } @@ -397,11 +430,31 @@ flexfatal( msg ) char msg[]; { - fprintf( stderr, "flex: fatal internal error %s\n", msg ); + fprintf( stderr, "%s: fatal internal error %s\n", program_name, msg ); flexend( 1 ); } +/* htoi - convert a hexadecimal digit string to an integer value + * + * synopsis: + * int val, htoi(); + * Char str[]; + * val = htoi( str ); + */ + +int htoi( str ) +Char str[]; + + { + int result; + + (void) sscanf( (char *) str, "%x", &result ); + + return ( result ); + } + + /* line_directive_out - spit out a "# line" statement */ line_directive_out( output_file_name ) @@ -479,19 +532,19 @@ int value; /* myctoi - return the integer represented by a string of digits * * synopsis - * char array[]; + * Char array[]; * int val, myctoi(); * val = myctoi( array ); * */ int myctoi( array ) -char array[]; +Char array[]; { int val = 0; - (void) sscanf( array, "%d", &val ); + (void) sscanf( (char *) array, "%d", &val ); return ( val ); } @@ -500,13 +553,13 @@ char array[]; /* myesc - return character corresponding to escape sequence * * synopsis - * char array[], c, myesc(); + * Char array[], c, myesc(); * c = myesc( array ); * */ -char myesc( array ) -char array[]; +Char myesc( array ) +Char array[]; { switch ( array[1] ) @@ -519,6 +572,9 @@ char array[]; case 't': return ( '\t' ); case 'v': return ( '\v' ); + case 'x': + /* fall through */ + case '0': case '1': case '2': @@ -529,11 +585,15 @@ char array[]; case '7': case '8': case '9': - { /* \ */ - char c, esc_char; + + { /* \ or \x */ + Char c, esc_char; register int sptr = 1; + + if ( array[1] == 'x' ) + ++sptr; - while ( isdigit(array[sptr]) ) + while ( isascii( array[sptr] ) && isdigit( array[sptr] ) ) /* don't increment inside loop control because if * isdigit() is a macro it will expand it to two * increments ... @@ -543,7 +603,11 @@ char array[]; c = array[sptr]; array[sptr] = '\0'; - esc_char = otoi( array + 1 ); + if ( array[1] == 'x' ) + esc_char = htoi( array + 2 ); + else + esc_char = otoi( array + 1 ); + array[sptr] = c; if ( esc_char == '\0' ) @@ -565,17 +629,17 @@ char array[]; * * synopsis: * int val, otoi(); - * char str[]; + * Char str[]; * val = otoi( str ); */ int otoi( str ) -char str[]; +Char str[]; { int result; - (void) sscanf( str, "%o", &result ); + (void) sscanf( (char *) str, "%o", &result ); return ( result ); } @@ -597,7 +661,7 @@ register int c; { static char rform[10]; - if ( (c >= 0 && c < 32) || c == 127 ) + if ( (c >= 0 && c < 32) || c >= 127 ) { switch ( c ) { -- cgit v1.2.3 From c3f6e4127dcb9464a3435c42b9b5b0af2f6ab73c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:48:47 +0000 Subject: Removed redundant test. 2.2 Release. --- nfa.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/nfa.c b/nfa.c index 99bb1e5..1d15a58 100644 --- a/nfa.c +++ b/nfa.c @@ -245,11 +245,8 @@ int mach, variable_trail_rule, headcnt, trailcnt; "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); if ( headcnt > 0 ) - { - if ( headcnt > 0 ) - fprintf( temp_action_file, "%s = %s + %d;\n", - scanner_cp, scanner_bp, headcnt ); - } + fprintf( temp_action_file, "%s = %s + %d;\n", + scanner_cp, scanner_bp, headcnt ); else fprintf( temp_action_file, -- cgit v1.2.3 From 7b88995a281ba64349c3adf98377f631db4f7c06 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:49:14 +0000 Subject: Unsigned char support. 2.2 Release. --- sym.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/sym.c b/sym.c index 48aaee0..8fc4e22 100644 --- a/sym.c +++ b/sym.c @@ -108,22 +108,22 @@ int table_size; /* cclinstal - save the text of a character class * * synopsis - * char ccltxt[]; + * Char ccltxt[]; * int cclnum; * cclinstal( ccltxt, cclnum ); */ cclinstal( ccltxt, cclnum ) -char ccltxt[]; +Char ccltxt[]; int cclnum; { /* we don't bother checking the return status because we are not called * unless the symbol is new */ - char *copy_string(); + Char *copy_unsigned_string(); - (void) addsym( copy_string( ccltxt ), (char *) 0, cclnum, + (void) addsym( (char *) copy_unsigned_string( ccltxt ), (char *) 0, cclnum, ccltab, CCL_HASH_SIZE ); } @@ -131,16 +131,16 @@ int cclnum; /* ccllookup - lookup the number associated with character class text * * synopsis - * char ccltxt[]; + * Char ccltxt[]; * int ccllookup, cclval; * cclval/0 = ccllookup( ccltxt ); */ int ccllookup( ccltxt ) -char ccltxt[]; +Char ccltxt[]; { - return ( findsym( ccltxt, ccltab, CCL_HASH_SIZE )->int_val ); + return ( findsym( (char *) ccltxt, ccltab, CCL_HASH_SIZE )->int_val ); } @@ -206,17 +206,20 @@ int hash_size; /* ndinstal - install a name definition * * synopsis - * char nd[], def[]; + * char nd[]; + * Char def[]; * ndinstal( nd, def ); */ ndinstal( nd, def ) -char nd[], def[]; +char nd[]; +Char def[]; { char *copy_string(); + Char *copy_unsigned_string(); - if ( addsym( copy_string( nd ), copy_string( def ), 0, + if ( addsym( copy_string( nd ), (char *) copy_unsigned_string( def ), 0, ndtbl, NAME_TABLE_HASH_SIZE ) ) synerr( "name defined twice" ); } @@ -230,11 +233,11 @@ char nd[], def[]; * def/NULL = ndlookup( nd ); */ -char *ndlookup( nd ) +Char *ndlookup( nd ) char nd[]; { - return ( findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val ); + return ( (Char *) findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val ); } -- cgit v1.2.3 From 725995c0e2357ee98adf7c1de4f21dcb243587e3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:49:56 +0000 Subject: Unsigned char support. %t support. Removed hard-wiring of program name "flex". -c changed to -C; -c now deprecated. -n added. :-( Multiple input files. SYSV tmpnam() use. Removed old #define's from output. Identified error messages w/ filename and line. 2.2 Release. --- main.c | 136 ++++++++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 84 insertions(+), 52 deletions(-) diff --git a/main.c b/main.c index 45c4d04..81842f8 100644 --- a/main.c +++ b/main.c @@ -39,13 +39,13 @@ static char rcsid[] = #include "flexdef.h" -static char flex_version[] = "2.1 (beta)"; +static char flex_version[] = "2.2"; /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; -int fullspd, gen_line_dirs, performance_report, backtrack_report; +int fullspd, gen_line_dirs, performance_report, backtrack_report, csize; int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; int datapos, dataline, linenum; @@ -62,6 +62,8 @@ int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; int tecbck[CSIZE + 1]; +int *xlation = (int *) 0; +int num_xlations; int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; char **scname; int current_max_dfa_size, current_max_xpairs; @@ -73,7 +75,7 @@ int *accsiz, *dhash, numas; int numsnpairs, jambase, jamstate; int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; int current_max_ccl_tbl_size; -char *ccltbl; +Char *ccltbl; char *starttime, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; @@ -81,16 +83,15 @@ int num_backtracking, bol_needed; FILE *temp_action_file; FILE *backtrack_file; int end_of_buffer_state; -#ifndef SHORT_FILE_NAMES -char action_file_name[] = "/tmp/flexXXXXXX"; -#else -char action_file_name[] = "flexXXXXXX.tmp"; -#endif +char *action_file_name = NULL; +char **input_files; +int num_input_files; +char *program_name; #ifndef SHORT_FILE_NAMES -static char outfile[] = "lex.yy.c"; +static char *outfile = "lex.yy.c"; #else -static char outfile[] = "lexyy.c"; +static char *outfile = "lexyy.c"; #endif static int outfile_created = 0; @@ -125,14 +126,14 @@ char **argv; if ( performance_report ) { - if ( yymore_used ) - fprintf( stderr, - "yymore() entails a minor performance penalty\n" ); - if ( interactive ) fprintf( stderr, "-I (interactive) entails a minor performance penalty\n" ); + if ( yymore_used ) + fprintf( stderr, + "yymore() entails a minor performance penalty\n" ); + if ( reject ) fprintf( stderr, "REJECT entails a large performance penalty\n" ); @@ -223,7 +224,8 @@ int status; { endtime = flex_gettime(); - fprintf( stderr, "flex version %s usage statistics:\n", flex_version ); + fprintf( stderr, "%s version %s usage statistics:\n", program_name, + flex_version ); fprintf( stderr, " started at %s, finished at %s\n", starttime, endtime ); @@ -287,16 +289,16 @@ int status; if ( useecs ) { - tblsiz = tblsiz + CSIZE; + tblsiz = tblsiz + csize; fprintf( stderr, " %d/%d equivalence classes created\n", - numecs, CSIZE ); + numecs, csize ); } if ( usemecs ) { tblsiz = tblsiz + numecs; fprintf( stderr, " %d/%d meta-equivalence classes created\n", - nummecs, CSIZE ); + nummecs, csize ); } fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n", @@ -338,6 +340,8 @@ char **argv; sawcmpflag = false; use_stdout = false; + program_name = argv[0]; + /* read flags */ for ( --argc, ++argv; argc ; --argc, ++argv ) { @@ -354,8 +358,15 @@ char **argv; break; case 'c': + fprintf( stderr, + "%s: Assuming use of deprecated -c flag is really intended to be -C\n", + program_name ); + + /* fall through */ + + case 'C': if ( i != 1 ) - flexerror( "-c flag must be given separately" ); + flexerror( "-C flag must be given separately" ); if ( ! sawcmpflag ) { @@ -385,7 +396,7 @@ char **argv; break; default: - lerrif( "unknown -c option %c", + lerrif( "unknown -C option '%c'", (int) arg[i] ); break; } @@ -418,6 +429,10 @@ char **argv; gen_line_dirs = false; break; + case 'n': + /* stupid do-nothing deprecated option */ + break; + case 'p': performance_report = true; break; @@ -446,16 +461,16 @@ char **argv; break; default: - lerrif( "unknown flag %c", (int) arg[i] ); + lerrif( "unknown flag '%c'", (int) arg[i] ); break; } -get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ +get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ ; } if ( (fulltbl || fullspd) && usemecs ) - flexerror( "full table and -cm don't make sense together" ); + flexerror( "full table and -Cm don't make sense together" ); if ( (fulltbl || fullspd) && interactive ) flexerror( "full table and -I are (currently) incompatible" ); @@ -476,24 +491,14 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ FILE *prev_stdout = freopen( outfile, "w", stdout ); if ( prev_stdout == NULL ) - flexerror( "could not create lex.yy.c" ); + lerrsf( "could not create %s", outfile ); outfile_created = 1; } - if ( argc ) - { - if ( argc > 1 ) - flexerror( "extraneous argument(s) given" ); - - yyin = fopen( infilename = argv[0], "r" ); - - if ( yyin == NULL ) - lerrsf( "can't open %s", argv[0] ); - } - - else - yyin = stdin; + num_input_files = argc; + input_files = argv; + set_input_file( num_input_files > 0 ? input_files[0] : NULL ); if ( backtrack_report ) { @@ -520,7 +525,23 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ if ( (skelfile = fopen( skelname, "r" )) == NULL ) lerrsf( "can't open skeleton file %s", skelname ); - (void) mktemp( action_file_name ); +#ifdef SYS_V + action_file_name = tmpnam( NULL ); +#endif + + if ( action_file_name == NULL ) + { + static char temp_action_file_name[32]; + +#ifndef SHORT_FILE_NAMES + strcpy( temp_action_file_name, "/tmp/flexXXXXXX" ); +#else + strcpy( temp_action_file_name, "flexXXXXXX.tmp" ); +#endif + (void) mktemp( temp_action_file_name ); + + action_file_name = temp_action_file_name; + } if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL ) lerrsf( "can't open temporary action file %s", action_file_name ); @@ -534,6 +555,8 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ linenum = sectnum = 1; firstprot = NIL; + csize = CSIZE; + /* used in mkprot() so that the first proto goes in slot 1 * of the proto queue */ @@ -544,18 +567,18 @@ get_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ /* set up doubly-linked equivalence classes */ ecgroup[1] = NIL; - for ( i = 2; i <= CSIZE; ++i ) + for ( i = 2; i <= csize; ++i ) { ecgroup[i] = i - 1; nextecm[i - 1] = i; } - nextecm[CSIZE] = NIL; + nextecm[csize] = NIL; } else { /* put everything in its own equivalence class */ - for ( i = 1; i <= CSIZE; ++i ) + for ( i = 1; i <= csize; ++i ) { ecgroup[i] = i; nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ @@ -578,29 +601,38 @@ readin() if ( ddebug ) puts( "#define FLEX_DEBUG" ); - if ( fulltbl ) - puts( "#define FLEX_FULL_TABLE" ); - else if ( fullspd ) - puts( "#define FLEX_FAST_COMPRESSED" ); - else - puts( "#define FLEX_COMPRESSED" ); +#ifdef FLEX_8_BIT_CHARS + puts( "#define YY_CHAR unsigned char" ); +#else + puts( "#define YY_CHAR char" ); +#endif skelout(); line_directive_out( stdout ); if ( yyparse() ) - lerrif( "fatal parse error at line %d", linenum ); + { + pinpoint_message( "fatal parse error" ); + flexend( 1 ); + } - if ( useecs ) + if ( xlation ) { - numecs = cre8ecs( nextecm, ecgroup, CSIZE ); + ecs_from_xlation( ecgroup ); + useecs = true; + numecs = num_xlations + 1; /* + 1 for characters not in %t table */ ccl2ecl(); } - else - numecs = CSIZE; + else if ( useecs ) + { + numecs = cre8ecs( nextecm, ecgroup, csize ); + ccl2ecl(); + } + else + numecs = csize; } -- cgit v1.2.3 From b45d34372b282ef91a1e538e87654f8ea019ddee Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:53:12 +0000 Subject: 8-bit char support. Error-message pinpointing. 2.2 Release. --- parse.y | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/parse.y b/parse.y index 1f24c3f..07ce77f 100644 --- a/parse.y +++ b/parse.y @@ -43,7 +43,7 @@ static char rcsid[] = int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; -char clower(); +Char clower(); static int madeany = false; /* whether we've made the '.' character class */ int previous_continued_action; /* whether the previous rule's action was '|' */ @@ -305,8 +305,8 @@ re : re '|' series if ( ! varlength || headcnt != 0 ) { fprintf( stderr, - "flex: warning - trailing context rule at line %d made variable because\n", - linenum ); + "%s: warning - trailing context rule at line %d made variable because\n", + program_name, linenum ); fprintf( stderr, " of preceding '|' action\n" ); } @@ -458,7 +458,7 @@ singleton : singleton '*' if ( useecs ) mkeccl( ccltbl + cclmap[anyccl], ccllen[anyccl], nextecm, - ecgroup, CSIZE ); + ecgroup, csize ); madeany = true; } @@ -478,7 +478,7 @@ singleton : singleton '*' if ( useecs ) mkeccl( ccltbl + cclmap[$1], ccllen[$1], - nextecm, ecgroup, CSIZE ); + nextecm, ecgroup, csize ); ++rulelen; @@ -622,27 +622,29 @@ build_eof_action() } -/* synerr - report a syntax error - * - * synopsis - * char str[]; - * synerr( str ); - */ +/* synerr - report a syntax error */ synerr( str ) char str[]; { syntaxerror = true; - fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str ); + pinpoint_message( str ); } -/* yyerror - eat up an error message from the parser - * - * synopsis - * char msg[]; - * yyerror( msg ); +/* pinpoint_message - write out a message, pinpointing its location */ + +pinpoint_message( str ) +char str[]; + + { + fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str ); + } + + +/* yyerror - eat up an error message from the parser; + * currently, messages are ignore */ yyerror( msg ) -- cgit v1.2.3 From 55d56a3510ec7d120b001127d9bcb08e969398fa Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:53:40 +0000 Subject: Bug in -F table generation fixed. 8-bit char support. Hardwired generated array names. "const"'s added to generated code. Fixed yymore() / trailing context bug. --- gen.c | 113 ++++++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/gen.c b/gen.c index f39600d..36e350f 100644 --- a/gen.c +++ b/gen.c @@ -170,9 +170,6 @@ genctbl() nxt[base[i] - 1] = anum; /* action number */ } - dataline = 0; - datapos = 0; - for ( i = 0; i <= tblend; ++i ) { if ( chk[i] == EOB_POSITION ) @@ -204,7 +201,7 @@ genctbl() for ( i = 0; i <= lastsc * 2; ++i ) printf( " &yy_transition[%d],\n", base[i] ); - printf( " };\n" ); + dataend(); if ( useecs ) genecs(); @@ -217,13 +214,13 @@ genecs() { register int i, j; - static char C_char_decl[] = "static const char %s[%d] =\n { 0,\n"; + static char C_char_decl[] = "static const YY_CHAR %s[%d] =\n { 0,\n"; int numrows; - char clower(); + Char clower(); - printf( C_char_decl, ECARRAY, CSIZE + 1 ); + printf( C_char_decl, "yy_ec", csize + 1 ); - for ( i = 1; i <= CSIZE; ++i ) + for ( i = 1; i <= csize; ++i ) { if ( caseins && (i >= 'A') && (i <= 'Z') ) ecgroup[i] = ecgroup[clower( i )]; @@ -238,11 +235,11 @@ genecs() { fputs( "\n\nEquivalence Classes:\n\n", stderr ); - numrows = (CSIZE + 1) / 8; + numrows = (csize + 1) / 8; for ( j = 1; j <= numrows; ++j ) { - for ( i = j; i <= CSIZE; i = i + numrows ) + for ( i = j; i <= csize; i = i + numrows ) { char *readable_form(); @@ -393,7 +390,7 @@ genftbl() static char C_short_decl[] = "static const short int %s[%d] =\n { 0,\n"; - printf( C_short_decl, ALIST, lastdfa + 1 ); + printf( C_short_decl, "yy_accept", lastdfa + 1 ); dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; @@ -426,7 +423,7 @@ gen_next_compressed_state() { char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; - indent_put2s( "register char yy_c = %s;", char_map ); + indent_put2s( "register YY_CHAR yy_c = %s;", char_map ); /* save the backtracking info \before/ computing the next state * because we always compute one more state than needed - we @@ -503,8 +500,8 @@ gen_next_match() else if ( fullspd ) { indent_puts( "{" ); - indent_puts( "register struct yy_trans_info *yy_trans_info;\n" ); - indent_puts( "register char yy_c;\n" ); + indent_puts( "register const struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "register YY_CHAR yy_c;\n" ); indent_put2s( "for ( yy_c = %s;", char_map ); indent_puts( " (yy_trans_info = &yy_current_state[yy_c])->yy_verify == yy_c;" ); @@ -641,7 +638,7 @@ gentabs() static char C_short_decl[] = "static const short int %s[%d] =\n { 0,\n"; static char C_char_decl[] = - "static const char %s[%d] =\n { 0,\n"; + "static const YY_CHAR %s[%d] =\n { 0,\n"; acc_array = allocate_integer_array( current_max_dfas ); nummt = 0; @@ -657,21 +654,15 @@ gentabs() { /* write out accepting list and pointer list * - * first we generate the ACCEPT array. In the process, we compute - * the indices that will go into the ALIST array, and save the + * first we generate the "yy_acclist" array. In the process, we compute + * the indices that will go into the "yy_accept" array, and save the * indices in the dfaacc array */ int EOB_accepting_list[2]; - printf( C_short_decl, ACCEPT, max( numas, 1 ) + 1 ); - - /* set up accepting structures for the End Of Buffer state */ - EOB_accepting_list[0] = 0; - EOB_accepting_list[1] = end_of_buffer_action; - accsiz[end_of_buffer_state] = 1; - dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; + printf( C_short_decl, "yy_acclist", max( numas, 1 ) + 1 ); - j = 1; /* index into ACCEPT array */ + j = 1; /* index into "yy_acclist" array */ for ( i = 1; i <= lastdfa; ++i ) { @@ -734,12 +725,12 @@ gentabs() acc_array[i] = 0; } - /* spit out ALIST array. If we're doing "reject", it'll be pointers - * into the ACCEPT array. Otherwise it's actual accepting numbers. + /* spit out "yy_accept" array. If we're doing "reject", it'll be pointers + * into the "yy_acclist" array. Otherwise it's actual accepting numbers. * In either case, we just dump the numbers. */ - /* "lastdfa + 2" is the size of ALIST; includes room for C arrays + /* "lastdfa + 2" is the size of "yy_accept"; includes room for C arrays * beginning at 0 and for "jam" state */ k = lastdfa + 2; @@ -752,7 +743,7 @@ gentabs() */ ++k; - printf( C_short_decl, ALIST, k ); + printf( C_short_decl, "yy_accept", k ); for ( i = 1; i <= lastdfa; ++i ) { @@ -781,7 +772,7 @@ gentabs() if ( trace ) fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); - printf( C_char_decl, MATCHARRAY, numecs + 1 ); + printf( C_char_decl, "yy_meta", numecs + 1 ); for ( i = 1; i <= numecs; ++i ) { @@ -797,7 +788,7 @@ gentabs() total_states = lastdfa + numtemps; printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, - BASEARRAY, total_states + 1 ); + "yy_base", total_states + 1 ); for ( i = 1; i <= lastdfa; ++i ) { @@ -831,7 +822,7 @@ gentabs() dataend(); printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, - DEFARRAY, total_states + 1 ); + "yy_def", total_states + 1 ); for ( i = 1; i <= total_states; ++i ) mkdata( def[i] ); @@ -839,7 +830,7 @@ gentabs() dataend(); printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, - NEXTARRAY, tblend + 1 ); + "yy_nxt", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) { @@ -852,7 +843,7 @@ gentabs() dataend(); printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, - CHECKARRAY, tblend + 1 ); + "yy_chk", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) { @@ -907,6 +898,25 @@ make_tables() register int i; int did_eof_rule = false; + skelout(); + + /* first, take care of YY_DO_BEFORE_ACTION depending on yymore being used */ + set_indent( 2 ); + + if ( yymore_used ) + { + indent_puts( "yytext -= yy_more_len; \\" ); + indent_puts( "yyleng = yy_cp - yytext; \\" ); + } + + else + indent_puts( "yyleng = yy_cp - yy_bp; \\" ); + + set_indent( 0 ); + + skelout(); + + printf( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); if ( fullspd ) @@ -934,7 +944,7 @@ make_tables() indent_puts( "};" ); indent_down(); - indent_puts( "typedef struct yy_trans_info *yy_state_type;" ); + indent_puts( "typedef const struct yy_trans_info *yy_state_type;" ); } else @@ -953,7 +963,7 @@ make_tables() { /* declare state buffer variables */ puts( "yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); - puts( "char *yy_full_match;" ); + puts( "YY_CHAR *yy_full_match;" ); puts( "int yy_lp;" ); if ( variable_trailing_context_rules ) @@ -997,13 +1007,20 @@ make_tables() if ( yymore_used ) { - indent_puts( "static char *yy_more_pos = (char *) 0;" ); - indent_puts( "#define yymore() (yy_more_pos = yy_bp)" ); + indent_puts( "static int yy_more_flag = 0;" ); + indent_puts( "static int yy_doing_yy_more = 0;" ); + indent_puts( "static int yy_more_len = 0;" ); + indent_puts( + "#define yymore() { yy_more_flag = 1; }" ); + indent_puts( + "#define YY_MORE_ADJ (yy_doing_yy_more ? yy_more_len : 0)" ); } - + else + { indent_puts( "#define yymore() yymore_used_but_not_detected" ); - + indent_puts( "#define YY_MORE_ADJ 0" ); + } skelout(); @@ -1019,22 +1036,16 @@ make_tables() if ( yymore_used ) { - indent_puts( "if ( yy_more_pos )" ); + indent_puts( "yy_doing_yy_more = yy_more_flag;" ); + indent_puts( "if ( yy_doing_yy_more )" ); indent_up(); indent_puts( "{" ); - indent_puts( "yy_bp = yy_more_pos;" ); - indent_puts( "yy_more_pos = (char *) 0;" ); + indent_puts( "yy_more_len = yyleng;" ); + indent_puts( "yy_more_flag = 0;" ); indent_puts( "}" ); indent_down(); - indent_puts( "else" ); - indent_up(); - indent_puts( "yy_bp = yy_cp;" ); - indent_down(); } - else - indent_puts( "yy_bp = yy_cp;" ); - skelout(); gen_start_state(); @@ -1072,7 +1083,7 @@ make_tables() skelout(); if ( bol_needed ) - indent_puts( "register char *yy_bp = yytext;\n" ); + indent_puts( "register YY_CHAR *yy_bp = yytext;\n" ); gen_start_state(); -- cgit v1.2.3 From a5b6db34334c72f7f5bd5feb22e744f29119738d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 17:58:49 +0000 Subject: Removed unused EOB_accepting_list array. 2.2 Release. --- gen.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/gen.c b/gen.c index 36e350f..2fbe0a6 100644 --- a/gen.c +++ b/gen.c @@ -658,8 +658,6 @@ gentabs() * the indices that will go into the "yy_accept" array, and save the * indices in the dfaacc array */ - int EOB_accepting_list[2]; - printf( C_short_decl, "yy_acclist", max( numas, 1 ) + 1 ); j = 1; /* index into "yy_acclist" array */ -- cgit v1.2.3 From 0ae15402ee1402957c436089bf6d7c97213c897b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 18:00:27 +0000 Subject: 8-bit char support. SYS_V / Atari portability fixes. Removed generated array names. CSIZE now only defined if not already defined. Added "csize" global. Added "input_files", "num_input_files", and "program_name" globals. %t support globals. 2.2 Release. --- flexdef.h | 74 ++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/flexdef.h b/flexdef.h index 85272fd..1a154cb 100644 --- a/flexdef.h +++ b/flexdef.h @@ -30,24 +30,35 @@ #include #endif +#ifdef FLEX_8_BIT_CHARS +#define CSIZE 255 +#define Char unsigned char +#else +#define Char char +#endif + + #ifdef SYS_V #include #ifdef AMIGA -#define bzero(s, n) setmem((char *)(s), (unsigned)(n), '\0') +#define bzero(s, n) setmem((char *)(s), n, '\0') #define abs(x) ((x) < 0 ? -(x) : (x)) #else -#define bzero(s, n) memset((char *)(s), '\0', (unsigned)(n)) +#define bzero(s, n) memset((char *)(s), '\0', n) #endif #ifndef VMS +#ifndef SYS_V +/* System V systems should already declare memset as returning void* */ char *memset(); +#endif #else /* memset is needed for old versions of the VMS C runtime library */ #define memset(s, c, n) \ { \ register char *t = s; \ - register unsigned int m = n; \ + register int m = n; \ while ( m-- > 0 ) \ *t++ = c; \ } @@ -125,22 +136,6 @@ char *sprintf(); /* keep lint happy */ #define INITIAL_MAX_DFA_SIZE 750 #define MAX_DFA_SIZE_INCREMENT 750 -/* array names to be used in generated machine. They're short because - * we write out one data statement (which names the array) for each element - * in the array. - */ - -/* points to list of rules accepted for a state */ -#define ALIST "yy_accept" -#define ACCEPT "yy_acclist" /* list of rules accepted for a state */ -#define ECARRAY "yy_ec" /* maps input characters to equivalence classes */ -/* maps equivalence classes to meta-equivalence classes */ -#define MATCHARRAY "yy_meta" -#define BASEARRAY "yy_base" /* "base" array */ -#define DEFARRAY "yy_def" /* "default" array */ -#define NEXTARRAY "yy_nxt" /* "next" array */ -#define CHECKARRAY "yy_chk" /* "check" array */ - /* a note on the following masks. They are used to mark accepting numbers * as being special. As such, they implicitly limit the number of accepting @@ -171,7 +166,9 @@ char *sprintf(); /* keep lint happy */ #define INFINITY -1 /* for x{5,} constructions */ /* size of input alphabet - should be size of ASCII set */ +#ifndef CSIZE #define CSIZE 127 +#endif #define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ #define MAX_CCLS_INCREMENT 100 @@ -325,29 +322,31 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * spprdflt - if true (-s), suppress the default rule * interactive - if true (-I), generate an interactive scanner * caseins - if true (-i), generate a case-insensitive scanner - * useecs - if true (-ce flag), use equivalence classes - * fulltbl - if true (-cf flag), don't compress the DFA state table - * usemecs - if true (-cm flag), use meta-equivalence classes + * useecs - if true (-Ce flag), use equivalence classes + * fulltbl - if true (-Cf flag), don't compress the DFA state table + * usemecs - if true (-Cm flag), use meta-equivalence classes * fullspd - if true (-F flag), use Jacobson method of table representation * gen_line_dirs - if true (i.e., no -L flag), generate #line directives * performance_report - if true (i.e., -p flag), generate a report relating * to scanner performance * backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file * listing backtracking states + * csize - size of character set for the scanner we're generating; + * 127 for 7-bit chars and 255 for 8-bit * yymore_used - if true, yymore() is used in input rules * reject - if true, generate backtracking tables for REJECT macro * real_reject - if true, scanner really uses REJECT (as opposed to just - * having "reject" set for variable trailing context) + * having "reject" set for variable trailing context) * continued_action - true if this rule's action is to "fall through" to - * the next rule's action (i.e., the '|' action) + * the next rule's action (i.e., the '|' action) * yymore_really_used - has a REALLY_xxx value indicating whether a - * %used or %notused was used with yymore() + * %used or %notused was used with yymore() * reject_really_used - same for REJECT */ extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs; -extern int fullspd, gen_line_dirs, performance_report, backtrack_report; +extern int fullspd, gen_line_dirs, performance_report, backtrack_report, csize; extern int yymore_used, reject, real_reject, continued_action; #define REALLY_NOT_DETERMINED 0 @@ -360,19 +359,25 @@ extern int yymore_really_used, reject_really_used; * datapos - characters on current output line * dataline - number of contiguous lines of data in current data * statement. Used to generate readable -f output + * linenum - current input line number * skelfile - the skeleton file * yyin - input file * temp_action_file - temporary file to hold actions * backtrack_file - file to summarize backtracking states to - * action_file_name - name of the temporary file * infilename - name of input file - * linenum - current input line number + * action_file_name - name of the temporary file + * input_files - array holding names of input files + * num_input_files - size of input_files array + * program_name - name with which program was invoked */ extern int datapos, dataline, linenum; extern FILE *skelfile, *yyin, *temp_action_file, *backtrack_file; extern char *infilename; -extern char action_file_name[]; +extern char *action_file_name; +extern char **input_files; +extern int num_input_files; +extern char *program_name; /* variables for stack of states having only one out-transition: @@ -461,10 +466,14 @@ extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; * templates) * tecfwd - forward link of meta-equivalence classes members * tecbck - backward link of MEC's + * xlation - maps character codes to their translations, or nil if no %t table + * num_xlations - number of different xlation values */ extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; +extern int *xlation; +extern int num_xlations; /* variables for start conditions: @@ -537,7 +546,7 @@ extern int end_of_buffer_state; extern int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; extern int current_max_ccl_tbl_size; -extern char *ccltbl; +extern Char *ccltbl; /* variables for miscellaneous information: @@ -594,10 +603,11 @@ char *allocate_array(), *reallocate_array(); #define reallocate_dfaacc_union(array, size) \ (union dfaacc_union *) reallocate_array( (char *) array, size, sizeof( union dfaacc_union ) ) -#define allocate_character_array(size) allocate_array( size, sizeof( char ) ) +#define allocate_character_array(size) \ + (Char *) allocate_array( size, sizeof( Char ) ) #define reallocate_character_array(array,size) \ - reallocate_array( array, size, sizeof( char ) ) + (Char *) reallocate_array( (char *) array, size, sizeof( Char ) ) /* used to communicate between scanner and parser. The type should really -- cgit v1.2.3 From f61a9c1928cd6229d1560a5d6a16ef01efe9bcb1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 15 Jan 1990 18:02:29 +0000 Subject: 8-bit char support. Arbitrary indented/%{} code allowed in section 2. \x escapes. %t support. Minor POSIX-compliance changes. BEGIN(0) -> BEGIN(INITIAL). yywrap() and set_input_file() for multiple input files. C_COMMENT_2 removed. 2.2 Release. --- scan.l | 204 +++++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 149 insertions(+), 55 deletions(-) diff --git a/scan.l b/scan.l index 8c40174..dfc1d47 100644 --- a/scan.l +++ b/scan.l @@ -49,12 +49,12 @@ static char rcsid[] = return ( CHAR ); #define RETURNNAME \ - (void) strcpy( nmstr, yytext ); \ + (void) strcpy( nmstr, (char *) yytext ); \ return ( NAME ); #define PUT_BACK_STRING(str, start) \ - for ( i = strlen( str ) - 1; i >= start; --i ) \ - unput(str[i]) + for ( i = strlen( (char *) str ) - 1; i >= start; --i ) \ + unput((str)[i]) #define CHECK_REJECT(str) \ if ( all_upper( str ) ) \ @@ -63,11 +63,13 @@ static char rcsid[] = #define CHECK_YYMORE(str) \ if ( all_lower( str ) ) \ yymore_used = true; + +#undef yywrap %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE -%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT C_COMMENT_2 ACTION_COMMENT -%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST +%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT +%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 XLATION WS [ \t\f]+ OPTWS [ \t\f]* @@ -78,18 +80,19 @@ NOT_NAME [^a-z_\n]+ SCNAME {NAME} -ESCSEQ \\([^\n]|[0-9]{1,3}) +ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) %% static int bracelevel, didadef; - int i, indented_code, checking_used; - char nmdef[MAXLINE], myesc(); + int i, indented_code, checking_used, new_xlation; + int doing_codeblock = false; + Char nmdef[MAXLINE], myesc(); ^{WS} indented_code = true; BEGIN(CODEBLOCK); ^#.*\n ++linenum; ECHO; /* treat as a comment */ ^"/*" ECHO; BEGIN(C_COMMENT); -^"%s"(tart)? return ( SCDECL ); -^"%x" return ( XSCDECL ); +^"%s"{NAME}? return ( SCDECL ); +^"%x"{NAME}? return ( XSCDECL ); ^"%{".*\n { ++linenum; line_directive_out( stdout ); @@ -106,37 +109,68 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) return ( SECTEND ); } -^"%used" checking_used = REALLY_USED; BEGIN(USED_LIST); -^"%unused" checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); +^"%used" { + pinpoint_message( "Warning, %%used/%%unused have been deprecated" ); + checking_used = REALLY_USED; BEGIN(USED_LIST); + } +^"%unused" { + checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); + pinpoint_message( "Warning, %%used/%%unused have been deprecated" ); + checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); + } -^"%"[^sx]" ".*\n { +^"%"[aeknopt]" ".*\n { +#ifdef NOTDEF fprintf( stderr, "old-style lex command at line %d ignored:\n\t%s", linenum, yytext ); +#endif ++linenum; } +^"%"[cr]{OPTWS} /* ignore old lex directive */ + +%t{OPTWS}\n { + char *malloc(); + + ++linenum; + xlation = (int *) malloc( sizeof( int ) * (csize + 1) ); + + for ( i = 1; i <= csize; ++i ) + xlation[i] = 0; + + if ( ! xlation ) + flexfatal( + "dynamic memory failure building %t table" ); + + num_xlations = 0; + + BEGIN(XLATION); + } + +^"%"[^sxanpekotcru{}]{OPTWS} synerr( "unrecognized '%' directive" ); + ^{NAME} { - (void) strcpy( nmstr, yytext ); + (void) strcpy( nmstr, (char *) yytext ); didadef = false; BEGIN(PICKUPDEF); } {SCNAME} RETURNNAME; ^{OPTWS}\n ++linenum; /* allows blank lines in section 1 */ -\n ++linenum; return ( '\n' ); +{OPTWS}\n ++linenum; return ( '\n' ); . synerr( "illegal character" ); BEGIN(RECOVER); -"*/" ECHO; BEGIN(0); -"*/".*\n ++linenum; ECHO; BEGIN(0); +"*/" ECHO; BEGIN(INITIAL); +"*/".*\n ++linenum; ECHO; BEGIN(INITIAL); [^*\n]+ ECHO; "*" ECHO; \n ++linenum; ECHO; -^"%}".*\n ++linenum; BEGIN(0); +^"%}".*\n ++linenum; BEGIN(INITIAL); "reject" ECHO; CHECK_REJECT(yytext); "yymore" ECHO; CHECK_YYMORE(yytext); {NAME}|{NOT_NAME}|. ECHO; @@ -144,16 +178,16 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) ++linenum; ECHO; if ( indented_code ) - BEGIN(0); + BEGIN(INITIAL); } {WS} /* separates name and definition */ {NOT_WS}.* { - (void) strcpy( nmdef, yytext ); + (void) strcpy( (char *) nmdef, (char *) yytext ); - for ( i = strlen( nmdef ) - 1; + for ( i = strlen( (char *) nmdef ) - 1; i >= 0 && nmdef[i] == ' ' || nmdef[i] == '\t'; --i ) @@ -168,14 +202,14 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) \n { if ( ! didadef ) synerr( "incomplete name definition" ); - BEGIN(0); + BEGIN(INITIAL); ++linenum; } -.*\n ++linenum; BEGIN(0); RETURNNAME; +.*\n ++linenum; BEGIN(INITIAL); RETURNNAME; -\n ++linenum; BEGIN(0); +\n ++linenum; BEGIN(INITIAL); {WS} "reject" { if ( all_upper( yytext ) ) @@ -192,6 +226,25 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) {NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); +"%t"{OPTWS}\n ++linenum; BEGIN(INITIAL); +^{OPTWS}[0-9]+ ++num_xlations; new_xlation = true; +^. synerr( "bad row in translation table" ); +{WS} /* ignore whitespace */ + +{ESCSEQ} { + xlation[myesc( yytext )] = + (new_xlation ? num_xlations : -num_xlations); + new_xlation = false; + } +. { + xlation[yytext[0]] = + (new_xlation ? num_xlations : -num_xlations); + new_xlation = false; + } + +\n ++linenum; + + .*\n/{NOT_WS} { ++linenum; ACTION_ECHO; @@ -205,14 +258,15 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) ^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ - /* this horrible mess of a rule matches indented lines which - * do not contain "/*". We need to make the distinction because - * otherwise this rule will be taken instead of the rule which - * matches the beginning of comments like this one - */ -^{WS}([^/\n]|"/"[^*\n])*("/"?)\n { - synerr( "indented code found outside of action" ); - ++linenum; +^({WS}|"%{") { + indented_code = (yytext[0] != '%'); + doing_codeblock = true; + bracelevel = 1; + + if ( indented_code ) + ACTION_ECHO; + + BEGIN(CODEBLOCK_2); } "<" BEGIN(SC); return ( '<' ); @@ -229,8 +283,6 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) } {WS}"|".*\n continued_action = true; ++linenum; return ( '\n' ); -^{OPTWS}"/*" ACTION_ECHO; BEGIN(C_COMMENT_2); - {WS} { /* this rule is separate from the one below because * otherwise we get variable trailing context, so @@ -262,10 +314,10 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) "["([^\\\]\n]|{ESCSEQ})+"]" { int cclval; - (void) strcpy( nmstr, yytext ); + (void) strcpy( nmstr, (char *) yytext ); /* check to see if we've already encountered this ccl */ - if ( (cclval = ccllookup( nmstr )) ) + if ( (cclval = ccllookup( (Char *) nmstr )) ) { yylval = cclval; ++cclreuse; @@ -276,12 +328,12 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) /* we fudge a bit. We know that this ccl will * soon be numbered as lastccl + 1 by cclinit */ - cclinstal( nmstr, lastccl + 1 ); + cclinstal( (Char *) nmstr, lastccl + 1 ); /* push back everything but the leading bracket * so the ccl can be rescanned */ - PUT_BACK_STRING(nmstr, 1); + PUT_BACK_STRING((char *) nmstr, 1); BEGIN(FIRSTCCL); return ( '[' ); @@ -289,10 +341,10 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) } "{"{NAME}"}" { - register char *nmdefptr; - char *ndlookup(); + register Char *nmdefptr; + Char *ndlookup(); - (void) strcpy( nmstr, yytext ); + (void) strcpy( nmstr, (char *) yytext ); nmstr[yyleng - 1] = '\0'; /* chop trailing brace */ /* lookup from "nmstr + 1" to chop leading brace */ @@ -368,21 +420,32 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) \n synerr( "missing }" ); ++linenum; BEGIN(SECT2); -{OPTWS}"%}".* bracelevel = 0; -"reject" ACTION_ECHO; CHECK_REJECT(yytext); -"yymore" ACTION_ECHO; CHECK_YYMORE(yytext); -{NAME}|{NOT_NAME}|. ACTION_ECHO; -\n { +{OPTWS}"%}".* bracelevel = 0; +"reject" { + ACTION_ECHO; + CHECK_REJECT(yytext); + } +"yymore" { + ACTION_ECHO; + CHECK_YYMORE(yytext); + } +{NAME}|{NOT_NAME}|. ACTION_ECHO; +\n { ++linenum; ACTION_ECHO; - if ( bracelevel == 0 ) + if ( bracelevel == 0 || + (doing_codeblock && indented_code) ) { - fputs( "\tYY_BREAK\n", temp_action_file ); + if ( ! doing_codeblock ) + fputs( "\tYY_BREAK\n", temp_action_file ); + + doing_codeblock = false; BEGIN(SECT2); } } - /* REJECT and yymore() are checked for above, in PERCENT_BRACE_ACTION */ + + /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ "{" ACTION_ECHO; ++bracelevel; "}" ACTION_ECHO; --bracelevel; [^a-z_{}"'/\n]+ ACTION_ECHO; @@ -407,12 +470,6 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) \n ++linenum; ACTION_ECHO; . ACTION_ECHO; -"*/" ACTION_ECHO; BEGIN(SECT2); -"*/".*\n ++linenum; ACTION_ECHO; BEGIN(SECT2); -[^*\n]+ ACTION_ECHO; -"*" ACTION_ECHO; -\n ++linenum; ACTION_ECHO; - [^"\\\n]+ ACTION_ECHO; \\. ACTION_ECHO; \n ++linenum; ACTION_ECHO; @@ -434,3 +491,40 @@ ESCSEQ \\([^\n]|[0-9]{1,3}) .*(\n?) ECHO; %% + + +int yywrap() + + { + if ( --num_input_files > 0 ) + { + set_input_file( *++input_files ); + return ( 0 ); + } + + else + return ( 1 ); + } + + +/* set_input_file - open the given file (if NULL, stdin) for scanning */ + +set_input_file( file ) +char *file; + + { + if ( file ) + { + infilename = file; + yyin = fopen( infilename, "r" ); + + if ( yyin == NULL ) + lerrsf( "can't open %s", file ); + } + + else + { + yyin = stdin; + infilename = ""; + } + } -- cgit v1.2.3 From 7d51e0e6deb415cd50ec6a909586423f3fc0f7ce Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 16 Jan 1990 10:29:10 +0000 Subject: 8-bit char support. 2.2 Release. --- yylex.c | 41 +++++------------------------------------ 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/yylex.c b/yylex.c index 2e1cead..ea12936 100644 --- a/yylex.c +++ b/yylex.c @@ -35,6 +35,7 @@ static char rcsid[] = #endif +#include #include "flexdef.h" #include "parse.h" @@ -175,43 +176,11 @@ int yylex() fprintf( stderr, "\\%c", yylval ); break; - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - case 8: - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - case 16: - case 17: - case 18: - case 19: - case 20: - case 21: - case 22: - case 23: - case 24: - case 25: - case 26: - case 27: - case 28: - case 29: - case 30: - case 31: - case 127: - fprintf( stderr, "\\%.3o", yylval ); - break; - default: - (void) putc( yylval, stderr ); + if ( ! isascii( yylval ) || ! isprint( yylval ) ) + fprintf( stderr, "\\%.3o", yylval ); + else + (void) putc( yylval, stderr ); break; } -- cgit v1.2.3 From 1d59e0b135c3b3dcb119af1f4045c856a7d9dec7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 16 Jan 1990 10:39:51 +0000 Subject: Added missing ',' in error message. 2.2 Release, second try. --- misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc.c b/misc.c index e851344..119658a 100644 --- a/misc.c +++ b/misc.c @@ -430,7 +430,7 @@ flexfatal( msg ) char msg[]; { - fprintf( stderr, "%s: fatal internal error %s\n", program_name, msg ); + fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg ); flexend( 1 ); } -- cgit v1.2.3 From fd6ee792d251e9ff28372ec39934a6a14ce346ad Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 16 Jan 1990 11:17:36 +0000 Subject: Restored EOB accepting list for REJECT. Second try at 2.2 Release. --- gen.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gen.c b/gen.c index 2fbe0a6..fdc4feb 100644 --- a/gen.c +++ b/gen.c @@ -658,6 +658,14 @@ gentabs() * the indices that will go into the "yy_accept" array, and save the * indices in the dfaacc array */ + int EOB_accepting_list[2]; + + /* set up accepting structures for the End Of Buffer state */ + EOB_accepting_list[0] = 0; + EOB_accepting_list[1] = end_of_buffer_action; + accsiz[end_of_buffer_state] = 1; + dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; + printf( C_short_decl, "yy_acclist", max( numas, 1 ) + 1 ); j = 1; /* index into "yy_acclist" array */ -- cgit v1.2.3 From 65a8cee37cb94b7824c6f82246f262f314cde717 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 25 Feb 1990 01:28:22 +0000 Subject: Initial revision --- flex.1 | 1434 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1434 insertions(+) create mode 100644 flex.1 diff --git a/flex.1 b/flex.1 new file mode 100644 index 0000000..d52f126 --- /dev/null +++ b/flex.1 @@ -0,0 +1,1434 @@ +.TH FLEX 1 "24 February 1990" "Version 2.2" +.SH NAME +flex - fast lexical analyzer generator +.SH SYNOPSIS +.B flex +.B [-bcdfinpstvFILT -C[efmF] -Sskeleton] +.I [filename ...] +.SH DESCRIPTION +.I flex +is a tool for generating +.I scanners: +programs which recognized lexical patterns in text. +.I flex +reads +the given input files, or its standard input if no file names are given, +for a description of a scanner to generate. The description is in +the form of pairs +of regular expressions and C code, called +.I rules. flex +generates as output a C source file, +.B lex.yy.c, +which defines a routine +.B yylex(). +This file is compiled and linked with the +.B -ll +library to produce an executable. When the executable is run, +it analyzes its input for occurrences +of the regular expressions. Whenever it finds one, it executes +the corresponding C code. +.SH SOME SIMPLE EXAMPLES +.LP +First some simple examples to get the flavor of how one uses +.I flex. +The following +.I flex +input specifies a scanner which whenever it encounters a tab +will print eight blanks to its standard output: +.nf + + %% + \t printf( " " ); + +.fi +By default, any text not matched by a +.I flex +scanner +is copied to the output, so the net effect of this scanner is +to copy its input file to its output with each tab expanded +into eight blanks. +In this input, there is just one rule. "\t" is the +.I pattern +(it's a regular expression specifying a tab) and the "printf" is the +.I action. The "%%" marks the beginning of the rules. +.LP +Here's another simple example: +.nf + + int num_lines = 0, num_chars = 0; + + %% + \n ++num_lines; ++num_chars; + . ++num_chars; + + %% + main() + { + yylex(); + printf( "# of lines = %d, # of chars = %d\n", + num_lines, num_chars ); + } + +.fi +This scanner counts the number of characters and the number +of lines in its input (it produces no output other than the +final report on the counts). The first line +declares two globals, "num_lines" and "num_chars", which are accessible +both inside +.B yylex() +and in the +.B main() +routine declared after the second "%%". There are two rules, one +which matches a newline ("\\n") and increments both the line count and +the character count, and one which matches any character other than +a newline (indicated by the "." regular expression). +.LP +A somewhat more complicated example: +.nf + + /* scanner for a toy Pascal-like language */ + + %{ + /* need this for the call to atof() below */ + #include + %} + + DIGIT [0-9] + ID [a-z][a-z0-9]* + + %% + + {DIGIT}+ { + printf( "An integer: %s (%d)\\n", yytext, + atoi( yytext ) ); + } + + {DIGIT}+"."{DIGIT}* { + printf( "A float: %s (%d)\\n", yytext, + atof( yytext ) ); + } + + if|then|begin|end|procedure|function { + printf( "A keyword: %s\\n", yytext ); + } + + {ID} printf( "An identifier: %s\\n", yytext ); + + "+"|"-"|"*"|"/" printf( "An operator: %s\\n", yytext ); + + "{"[^}\\n]*"}" /* eat up one-line comments */ + + [ \\t\\n]+ /* eat up whitespace */ + + . printf( "Unrecognized character: %s\\n", yytext ); + + %% + + main( argc, argv ) + int argc; + char **argv; + { + ++argv, --argc; + if ( argc > 0 ) + yyin = fopen( argv[0], "r" ); + else + yyin = stdin; + + yylex(); + } + +.fi +This is the beginnings of a simple scanner for a language like +Pascal. It identifies different types of +.I tokens +and reports on what it has seen. +.LP +The details of the example will be explained in the following +sections. +.SH FORMAT OF THE INPUT FILE +The +.I flex +input file consists of three sections, separated by +.B %%: +.nf + + definitions + %% + rules + %% + user code + +.fi +The +.I definitions +section contains declarations of simple +.I name +definitions to simplify the scanner specification and of +.I start conditions, +which are explained in a later section. +.LP +Name definitions have the form: +.nf + + name definition + +.fi +The "name" is a word beginning with a letter or a '_' +followed by zero or more letters, digits, '_', or '-'. +The definition is taken to begin at the first non-white-space +following the name and continue to the end of the line. +Definition can subsequently be referred to using "{name}", which +will expand to "(definition)". For example, +.nf + + DIGIT [0-9] + ID [a-z][a-z0-9]* + +.fi +defines "DIGIT" to be a regular expression which matches a +single digit, and +"ID" to be a regular expression which matches a letter +followed by zero-or-more letters or digits. +A subsequent reference to +.nf + + {DIGIT}+"."{DIGIT}* + +.fi +is identical to +.nf + + ([0-9])+"."([0-9])* + +.fi +and matches one-or-more digits followed by a '.' followed +by zero-or-more digits. +.LP +The +.I rules +section of the +.I flex +input contains a series of rules of the form: +.nf + + pattern action + +.fi +where the pattern must be unindented and the action must begin +on the same line. +.LP +See below for a further description of patterns and actions. +.LP +Finally, the user code section is simply copied to +.B lex.yy.c +verbatim. +It is used for companion routines which call or are called +by the scanner. The presence of this section is optional; +if it is missing, the second +.B %% +in the input file may be skipped, too. +.LP +In the definitions and rule sections, any +.I indented +text or text enclosed in +.B %{ +and +.B %} +is copied verbatim to the output (with the %{}'s removed). +The %{}'s must appear unindented on lines by themselves. +.LP +In the rules section, +any indented or %{} text appearing before the +first rule may be used to declare variables +which are local to the scanning routine, and, after the declarations, +code which is to be executed whenever the scanning routine is entered. +Other indented or %{} text in the rule section is still copied to the output, +but its meaning is not well-defined and it may well cause compile-time +errors (this feature is present for +.I POSIX +compliance; see below for other such features). +.LP +In the definitions section, an unindented comment (i.e., a line +beginning with "/*") is also copied verbatim to the output up +to the next "*/". Also, any line beginning with '#' is ignored. +.SH PATTERNS +The patterns in the input are written using an extended set of regular +expressions. These are: +.nf + + x match the character 'x' + . any character except newline + [xyz] an 'x', a 'y', or a 'z' + [abj-oZ] an 'a', a 'b', any letter + from 'j' through 'o', or a 'Z' + [^A-Z] any character EXCEPT an uppercase letter, + including a newline (unlike how many other + regular expression tools treat the '^'!). + This means that a pattern like [^"]* will + match an entire file (overflowing the input + buffer) unless there's another quote in + the input. + [^A-Z\\n] any character EXCEPT an uppercase letter or + a newline + r* zero or more r's, where r is any regular expression + r+ one or more r's + r? zero or one r's (that is, "an optional r") + r{2,5} anywhere from two to five r's + r{2,} two or more r's + r{4} exactly 4 r's + {name} the expansion of the "name" definition + (see above) + "[xyz]\\"foo" + the literal string: [xyz]"foo + \\x if x is an 'a', 'b', 'f', 'n', 'r', + 't', or 'v', then the ANSI-C + interpretation of \\x. Otherwise, + a literal 'x' (used to escape + operators such as '*') + \\123 the character with octal value 123 + \\x2a the character with hexadecimal value 2a + (r) match an r; parentheses are used + to override precedence (see below) + + + rs the regular expression r followed + by the regular expression s; called + "concatenation" + + + r|s either an r or an s + + + r/s an r but only if it is followed by + an s. The s is not part of the + matched text. This type of + pattern is known as "trailing context". + ^r an r, but only at the beginning of a line + r$ an r, but only at the end of a line + (r must not use trailing context) + + + r an r, but only in start condition s (see + below for discussion of start conditions) + r + same, but in any of start conditions s1, + s2, or s3 + + + <> an end-of-file + <> + an end-of-file when in start condition s1 or s2 + +.fi +The regular expressions listed above are grouped according to +precedence, from highest precedence at the top to lowest at the bottom. +Those grouped together have equal precedence. For example, +.nf + + foo|bar* + +.fi +is the same as +.nf + + (foo)|(ba(r*)) + +.fi +since the '*' operator has higher precedence than concatenation, +and concatenation higher than alternation ('|'). This pattern +therefore matches +.I either +the string "foo" +.I or +the string "ba" followed by zero-or-more r's. +To match "foo" or zero-or-more "bar"'s, use: +.nf + + foo|(bar)* + +.fi +and to match zero-or-more "foo"'s or "bar"'s: +.nf + + (foo|bar)* + +.fi +.SH HOW THE INPUT IS MATCHED +When the generated scanner is run, it analyzes its input looking +for strings which match any of its patterns. If it finds more than +one match, it takes the one matching the most text (for trailing +context rules, this includes the length of the trailing part, even +though it will then be returned to the input). If it finds two +or more matches of the same length, the +rule listed first in the +.I flex +input file is chosen. +.LP +Once the match is determined, the text corresponding to the match +(called the +.I token) +is made available in the global character pointer +.B yytext, +and its length in the global integer +.B yyleng. +The +.I action +corresponding to the matched pattern is then executed (a more +detailed description of actions follows), and then the remaining +input is scanned for another match. +.LP +If no match is found, then the +.I default rule +is executed: the next character in the input is matched and +copied to the standard output. Thus, the simplest legal +.I flex +input is: +.nf + + %% + +.fi +which generates a scanner that simply copies its input (one character +at a time) to its output. +.SH ACTIONS +Each pattern in a rule has a corresponding action, which can be any +arbitrary C statement. The pattern ends at the first non-escaped +whitespace character; the remainder of the line is its action. If the +action is empty, then when the pattern is matched the input token +is simply discarded. For example, here is the specification for a program +which deletes all occurrences of "zap me" from its input: +.nf + + %% + "zap me" + +.fi +Here is a program which compresses multiple blanks and tabs down to +a single blank, and throws away whitespace found at the end of a line: +.nf + + %% + [ \t]+ putchar( ' ' ); + [ \t]+$ /* ignore this token */ + +.fi +.LP +If the action contains a '{', then the action spans till the balancing +'}' is found, and the action may cross multiple lines. +.I flex +knows about C strings and comments and won't be fooled by braces found +within them, but also allows actions to begin with +.B %{ +and will consider the action to be all the text up to the next +.B %}. +.LP +An action consisting solely of a vertical bar ('|') means "same as +the action for the next rule. See below for an illustration. +.LP +Actions can include arbitrary C code, including +.B return +statements to return a value whatever routine called +.B yylex(). +Each time +.B yylex() +is called it continues processing tokens from where it last left +off until it either reaches +the end of the file or executes a return. +.LP +Actions are not allowed to modify yytext or yyleng. +.LP +There are a number of special directives which can be included within +an action: +.IP - +.B ECHO +copies yytext to the scanner's output. +.IP - +.B BEGIN +followed by the name of a start condition places the scanner in the +corresponding start condition (see below). +.IP - +.B REJECT +directs the scanner to proceed on to the "second best" rule which matched the +input (or a prefix of the input). The rule is chosen as described +above in "How the Input is Matched", and +.B yytext +and +.B yyleng +set up appropriately. +It may either be one which matched as much text +as the originally chosen rule but came later in the +.I flex +input file, or one which matched less text. +For example, the following will both count the +words in the input and call the routine special() whenever "frob" is seen: +.nf + + %{ + int word_count = 0; + %} + %% + + frob special(); REJECT; + [^ \t\n]+ ++word_count; + +.fi +Without the +.B REJECT, +any "frob"'s in the input would not be counted as words, since the +scanner normally executes only one action per token. +Multiple +.B REJECT's +are allowed, each one finding the next best choice to the currently +active rule. For example, when the following scanner scans the token +"abcd", it will write "abcdabcaba" to the output: +.nf + + %% + a | + ab | + abc | + abcd ECHO; REJECT; + .|\n /* eat up any unmatched character */ + +.fi +(The first three rules share the fourth's action since they use +the special '|' action.) +.B REJECT +is a particularly expensive feature in terms scanner performance; +if it is used in +.I any +of the scanner's actions it will slow down +.I all +of the scanner's matching. +.IP - +.B yymore() +tells the scanner that the next time it matches a rule, the corresponding +token should be +.I appended +onto the current value of +.B yytext +rather than replacing it. For example, given the input "mega-kludge" +the following will write "mega-mega-kludge" to the output: +.nf + + %% + mega- ECHO; yymore(); + kludge ECHO; + +.fi +First "mega-" is matched and echoed to the output. Then "kludge" +is matched, but the previous "mega-" is still hanging around at the +beginning of +.B yytext +so the ECHO for the "kludge" rule will actually write "mega-kludge". +The presence of +.B yymore() +in the scanner's action entails a minor performance penalty in the +scanner's matching speed. +.IP - +.B yyless(n) +returns all but the first +.I n +characters of the current token back to the input stream, where they +will be rescanned when the scanner looks for the next match. +.B yytext +and +.B yyleng +are adjusted appropriately (e.g., +.B yyleng +will now be equal to +.I n +). For example, on the input "foobar" the following will write out +"foobarbar": +.nf + + foobar ECHO; yyless(3); + [a-z]+ ECHO; + +.fi +An argument of 0 to +.B yyless +will cause the current input string to be scanned again. Unless you've +changed how the scanner will subsequently process its input (using +.B BEGIN, +for example), this will result in an endless loop. +.IP - +.B unput(c) +puts the character +.I c +back onto the input stream. It will be the next character scanned. +The following action will take the current token and cause it +to be rescanned enclosed in parentheses. +.nf + + { + int i; + unput( ')' ); + for ( i = yyleng - 1; i >= 0; --i ) + unput( yytext[i] ); + unput( '(' ); + } + +.fi +Note that since each +.B unput() +puts the given character back at the +.I beginning +of the input stream, pushing back strings must be done back-to-front. +.IP - +.B input() +reads the next character from the input stream. For example, +the following is one way to eat up C comments: +.nf + + %% + "/*" { + register int c; + + for ( ; ; ) + { + while ( (c = input()) != '*' && + c != EOF ) + ; /* eat up text of comment */ + + if ( c == '*' ) + { + while ( (c = input()) == '*' ) + ; + if ( c == '/' ) + break; /* found the end */ + } + + if ( c == EOF ) + { + error( "EOF in comment" ); + break; + } + } + } + +.fi +.IP - +.I yyterminate() +can be used in lieu of a return statement in an action. It terminates +the scanner and returns a 0 to the scanner's caller, indicating "all done". +By default, +.I yyterminate() +is also called when an end-of-file is encountered. It is a macro and +may be redefined. +.SH THE GENERATED SCANNER +The output of +.I flex +is the file +.B lex.yy.c, +which contains the scanning routine +.B yylex(), +a number of tables used by it for matching tokens, and a number +of auxilliary routines and macros. By default, +.B yylex() +is declared as follows: +.nf + + int yylex() + { + ... various definitions and the actions in here ... + } + +.fi +(If your environment supports function prototypes, then it will +be "int yylex( void )".) This definition may be changed by redefining +the "YY_DECL" macro. For example, you could use: +.nf + + #undef YY_DECL + #define YY_DECL float lexscan( a, b ) float a, b; + +.fi +to give it the the scanning routine the name +.I lexscan, +returning a float, and taking two floats as arguments. Note that +if you give arguments to the scanning routine using a +K&R-style/non-prototyped function declaration, you must terminate +the definition with a semi-colon (;). +.LP +Whenever +.B yylex() +is called, it scans tokens from the global input file +.I yyin +(default, stdin). It continues until it either reaches +an end-of-file (at which point it returns the value 0) or +one of its actions executes a +.I return +statement. +In the former case, the scanner may not be called again unless +.B void yyrestart( FILE *input_file ) +is called, to point +.I yyin +at the new input_file. In the latter case (i.e., when an action +executes a return), the scanner may then be called again and it +will resume scanning where it left off. +.LP +By default (and for purposes of efficiency), the scanner uses +block-reads rather than simple +.I getc() +calls to read characters from +.I yyin. +The nature of how it gets its input can be controlled by redefining the +.B YY_INPUT +macro. +YY_INPUT's calling sequence is "YY_INPUT(buf,result,max_size)". Its +action is to place up to +.I max_size +characters in the character array +.I buf +and return in the integer variable +.I result +either the +number of characters read or the constant YY_NULL (0 on Unix systems) +to indicate EOF. The default YY_INPUT reads from the +global file-pointer "yyin" (which is by default +.I stdin), +so if you +just want to change the input file, you needn't redefine +YY_INPUT - just point yyin at the input file. +.LP +A sample redefinition of YY_INPUT (in the definitions section of the input +file): +.nf + + %{ + #undef YY_INPUT + #define YY_INPUT(buf,result,max_size) \\ + result = ((buf[0] = getchar()) == EOF) ? YY_NULL : 1; + %} + +.fi +You also can add in things like keeping track of the +input line number this way; but don't expect your scanner to +go very fast. +.LP +When the scanner receives an end-of-file indication from YY_INPUT, +it then checks the +.B yywrap() +function. If it returns false (zero), then it is assumed that the +function has gone ahead and set up +.I yyin +to point to another input file, and scanning continues. If it returns +true (non-zero), then the scanner terminates, returning 0 to its +caller. +.LP +The default +.B yywrap() +always returns 1. Presently, to redefine it you must first +"#undef yywrap", as it is currently implemented as a macro. As noted +by the hedging in the previous sentence, it may be changed to +a true function in the near future. +.LP +The scanner writes its +.B ECHO +output to the +.I yyout +global (default, stdout), which may be redefined by the user simply +by assigning it to some other FILE*. +.SH START CONDITIONS +.I flex +provides a mechanism for conditionally activating rules. Any rule +whose pattern is prefixed with "" will only be active when +the scanner is in the start condition named "sc". For example, +.nf + + [^"]* { /* eat up the string body ... */ + ... + } + +.fi +will be active only when the scanner is in the "STRING" start +condition, and +.nf + + \\. { /* handle an escape ... */ + ... + } + +.fi +will be active only when the current start condition is +either "INITIAL", "STRING", or "QUOTE". +.LP +Start conditions +are declared in the definitions (first) section of the input +using unindented lines beginning with either +.B %s +or +.B %x +followed by a list of names. +The former declares +.I inclusive +start conditions, the latter +.I exclusive +start conditions. A start condition is activated using the +.B BEGIN +action. Until the next +.B BEGIN +action is executed, rules with the given start +condition will be active and +rules with other start conditions will be inactive. +If the start condition is +.I inclusive, +then rules with no start conditions at all will also be active. +If it is +.I exclusive, +then +.I only +rules qualified with the start condition will be active. +So a set of rules conditioned on the same exclusive start condition +describe a scanner which is independent of any of the other rules in the +.I flex +input. Because of this, +exclusive start conditions make it easy to specify "mini-scanners" +which scan portions of the input that are syntactically different +from the rest (e.g., comments). +.LP +.B BEGIN(0) +returns to the original state where only the rules with +no start conditions are active. This state can also be +referred to as the start-condition "INITIAL", so +.B BEGIN(INITIAL) +is equivalent to +.B BEGIN(0). +.LP +Here is a scanner which will recognize numbers only if they +are preceded earlier in the line by the string "expect-number": +.nf + + %s expect + + %% + expect-number BEGIN(expect); + + [0-9]+ printf( "found a number\n" ); + \n { + /* that's the end of the line, so + * we need another "expect-number" + * before we'll recognize any more + * numbers + */ + BEGIN(INITIAL); + } + +.fi +Here is a scanner which recognizes (and discards) C comments while +maintaining a count of the current input line. +.nf + + %x comment + %% + int line_num = 1; + + [^*\n]* + "*"+[^*/\n]* + \n ++line_num; + "*"+"/" BEGIN(INITIAL); + + "/*" BEGIN(comment); + +.fi +Note that start-conditions names are really integer values and +can be stored as such. Thus, the above could be extended in the +following fashion: +.nf + + %x comment + %% + int line_num = 1; + int comment_caller; + + [^*\n]* + "*"+[^*/\n]* + \n ++line_num; + "*"+"/" BEGIN(comment_caller); + + "/*" { + comment_caller = INTIIAL; + BEGIN(comment); + } + + ... + + "/*" { + comment_caller = foo; + BEGIN(comment); + } +.fi +One can then implement a "stack" of start conditions using an +array of integers. (It is likely that such stacks will become +a full-fledged +.I flex +feature in the future.) Note, though, that +start conditions do not have their own name-space; %s's and %x's +declare names effectively the same as #define's. +.SH END-OF-FILE RULES +The special rule "<>" indicates +actions which are to be taken when an end-of-file is +encountered and yywrap() returns non-zero (i.e., indicates +no further files to process). The action can either +point yyin at a new file to process, in which case the +action +.I must +finish with the special +.I YY_NEW_FILE +action +(this is a branch, so subsequent code in the action won't +be executed), or the action must finish with a +.I return +or +.I yyterminate() +statement. <> rules may not be used with other +patterns; they may only be qualified with a list of start +conditions. If an unqualified <> rule is given, it +applies only to the +.B INITIAL +start condition, and +.I not +to +.B %s +(or +.B %x) +start conditions. +.LP +These rules are useful for catching things like unclosed comments. +An example: +.nf + + %x quote + %% + ... + <> { + error( "unterminated quote" ); + yyterminate(); + } + <> { + if ( *++filelist ) + { + yyin = fopen( *filelist, "r" ); + YY_NEW_FILE; + } + else + yyterminate(); + } + +.fi +.SH MISCELLANEOUS MACROS +The macro +.bd +YY_USER_ACTION +can be redefined to provide an action +which is always executed prior to the matched rule's action. For example, +it could be #define'd to call a routine to convert yytext to lower-case. +.LP +In the generated scanner, the actions are all gathered in one large +switch statement and separated using +.B YY_BREAK, +which may be redefined. +This allows, for example, C++ users to +#define YY_BREAK to do nothing (while being very careful that every +rule ends with a "break" or a "return"!) to avoid suffering from +unreachable statement warnings where a rule's action ends with "return". +.SH INTERFACING WITH YACC +One of the main uses of +.I flex +is as a companion to the +.I yacc +parser-generator. +.I yacc +parsers expect to call the +.B yylex() +routine to find the next input token. The routine is supposed to +return the type of the next token as well as putting any associated +value in the global +.B yylval. +To use +.I flex +with +.I yacc, +one specifies the +.B -d +option to +.I yacc +to instruct it to generate the file +.B y.tab.h +containing definitions of all the +.B %tokens +appearing in the +.I yacc +input. This file is then included in the +.I flex +scanner. For example, if one of the tokens is "TOK_NUMBER", +part of the scanner might look like: +.nf + + %{ + #include "y.tab.h" + %} + + %% + + [0-9]+ yylval = atoi( yytext ); return TOK_NUMBER; + +.fi +.SH TRANSLATION TABLE +In the name of POSIX compliance, +.I flex +supports a +.I translation table +for mapping input characters together into specified sets. +The table is specified in the first section, and its format looks like: +.nf + + %t + 1 abcd + 2 ABCDEFGHIJKLMNOPQRSTUVWXYZ + 52 0123456789 + %t + +.fi +This example specifies that the characters 'a', 'b', 'c', and 'd' +are to all be lumped into group #1, the upper-case letters are +to be in group #2, and digits in group #52, and +.I no other characters will appear in the patterns +(note that characters can also be specified in a +.B %t +table using escape sequences). +The group numbers are actually disregarded by +.I flex; +.B %t +serves, though, to lump characters together. Given the above +table, for example, the pattern "aAA*5" is equivalent to "dZQ*0". +They both say, "match any character in group #1, followed by +a character from group #2, followed by zero-or-more characters +from group #2, followed by a character from group #52." Thus +.B %t +provides a crude way for introducing equivalence classes into +the scanner specification. It is the author's belief that the +.B -i +option coupled with the equivalence classes which +.I flex +automatically generates take care of virtually all the instances +when one might consider using +.B %t. +But what the hell, it's there if you want it. +.so options.man +.SH PERFORMANCE CONSIDERATIONS +The main design goal of +.I flex +is that it generate high-performance scanners. It has been optimized +for dealing well with large sets of rules. Aside from the effects +outlined above of table compression on scanner speed, +there are a number of options/actions which degrade performance. These +are, in decreasing order of performance impact: +.nf + + REJECT + pattern sets that require backtracking + arbitrary trailing context + %T + '^' beginning-of-line operator + yymore() + start conditions + +.fi +.LP +Getting rid of backtracking is messy and often may be too much +work for a complicated scanner's rules. In principal, one begins +by using the +.B -b +flag to generate a +.I lex.backtrack +file. For example, on the input +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + +.fi +the file looks like: +.nf + + State #6 is non-accepting - + associated rules: + 2 3 + out-transitions: [ o ] + jam-transitions: EOF [ \001-n p-\177 ] + + State #8 is non-accepting - + associated rules: + 3 + out-transitions: [ a ] + jam-transitions: EOF [ \001-` b-\177 ] + + State #9 is non-accepting - + associated rules: + 3 + out-transitions: [ r ] + jam-transitions: EOF [ \001-q s-\177 ] + + Compressed tables always backtrack. + +.fi +The first few lines tell us that there's a scanner state in +which it can make a transition on an 'o' but not on any other +character, and the currently scanned text does not match any rule. +If the scanner is in that state and then reads +something other than an 'o', it will have to backtrack to find +a rule which is matched. With +a bit of headscratching one can see that this must be the +state it's in when it has seen "fo". When this has happened, +if anything other than another 'o' is seen, the scanner will +have to back up to simply match the 'f' (by the default rule). +.LP +The comment regarding State #8 indicates there's a problem +when "foob" has been scanned. Indeed, on any character other +than a 'b', the scanner will have to back up to accept "foo". +Similarly, the comment for State #9 concerns when "fooba" has +been scanned. +.LP +The final comment reminds us that there's no point going to +all the trouble of removing backtracking from the rules unless +we're using +.B -f +or +.B -F, +since there's no performance gain doing so with compressed scanners. +.LP +The way to remove the backtracking is to add "error" rules: +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + fooba | + foob | + fo { + /* false alarm, not really a keyword */ + return TOK_ID; + } + +.fi +.LP +Unfortunately backtracking messages tend to cascade and +with a complicated input set it's not uncommon to get hundreds +of messages. If one can decipher them, though, it often +only takes a dozen or so rules to eliminate the backtracking. +(A possible future +.I flex +feature will be to automatically add rules to eliminate backtracking. +The problem is that while it's easy for +.I flex +to figure out what rules are needed, it's very hard for it to +know what the proper action is. Currently I'm thinking that it +will simply invoke a user-redefinable macro and that's it ...) +.LP +Another area where the user can increase a scanner's performance +(and one that's easier to implement) arises from the fact that +the longer the tokens matched, the faster the scanner will run. +This is because with long tokens the processing of most input +characters takes place in the (short) inner scanning loop, and +does not often have to go through the additional work of setting up +the scanning environment (e.g., +.B yytext) +for the action. Recall the scanner for C comments: +.nf + + %x comment + %% + int line_num = 1; + + [^*\n]* + "*"+[^*/\n]* + \n ++line_num; + "*"+"/" BEGIN(INITIAL); + + "/*" BEGIN(comment); + +.fi +This could be sped up by writing it as: +.nf + + %x comment + %% + int line_num = 1; + + [^*\n]* + [^*\n]*\n ++line_num; + "*"+[^*/\n]* + "*"+[^*/\n]*\n ++line_num; + "*"+"/" BEGIN(INITIAL); + + "/*" BEGIN(comment); + +.fi +Now instead of each newline requiring the processing of another +action, recognizing the newlines is "distributed" over the other rules +to keep the matched text as long as possible. Note that +.I adding +rules does +.I not +slow down the scanner! The speed of the scanner is independent +of the number of rules or (modulo the considerations given at the +beginning of this section) how complicated the rules are with +regard to operators such as '*' and '|'. +.SH INCOMPATIBILITIES WITH LEX AND POSIX +.I flex +is a rewrite of the Unix +.I lex +tool (the two implementations do not share any code, though), +which dates to the late 1970's. There are some incompatibilities +which are of concern to those who wish to write scanners acceptable +to either implementation. At present, the POSIX lex draft is +very close to the original lex implementation, so some of these +incompatibilities are also in conflict with the POSIX draft. But +the intent is that except as noted below, +.I flex +as it presently stands will +ultimately be POSIX comformant (i.e., that those areas of conflict with +the POSIX draft will be resolved in +.I flex's +favor). Please bare in +mind that all the comments are with regard to the POSIX +.I draft +standard and not the final document; they are included so +.I flex +users can be aware of the standardization issues and those areas where +.I flex +may in the near future be incompatibly changed with +its current definition. +.LP +.I flex +is fully compatible with +.I lex +with the following exceptions: +.IP - +When definitions are expanded, +.I flex +encloses them in parentheses. +With lex, the following +.nf + + NAME [A-Z][A-Z0-9]* + %% + foo{NAME}? printf( "Found it\\n" ); + %% + +.fi +will not match the string "foo" because when the macro +is expanded the rule is equivalent to "foo[A-Z][A-Z0-9]*?" +and the precedence is such that the '?' is associated with +"[A-Z0-9]*". With +.I flex, +the rule will be expanded to +"foo([A-z][A-Z0-9]*)?" and so the string "foo" will match. +Note that because of this, the +.B ^, $, , +and +.B / +operators cannot be used in a definition. +.IP +Note that the POSIX draft interpretation here is the same as +.I flex's. +.IP - +The undocumented lex-scanner internal variable +.B yylineno +is not supported. (The variable is not part of the POSIX draft.) +.IP - +The +.B input() +routine is not redefinable, though may be called to read characters +following whatever has been matched by a rule. If +.B input() +encounters an end-of-file the normal +.B yywrap() +processing is done. A ``real'' end-of-file is returned as +.I EOF. +.IP +Input is instead controlled by redefining the +.B YY_INPUT +macro. +.IP +The +.I flex +restriction that +.B input() +cannot be redefined is in accordance with the POSIX draft, but +.B YY_INPUT +has not yet been accepted into the draft. +.IP - +.B output() +is not supported. +Output from the ECHO macro is done to the file-pointer +"yyout" (default +.I stdout). +.IP +The POSIX draft mentions that an +.B output() +routine exists but currently gives no details as to what it does. +.IP - +If you are providing your own yywrap() routine, you must include a +"#undef yywrap" in the definitions section (section 1). Note that +the "#undef" will have to be enclosed in %{}'s. +.IP +The POSIX draft +specifies that yywrap() is a function and this is unlikely to change; so +.I flex users are warned +that +.I yywrap() +is likely to be changed to a function in the near future. +.IP - +The precedence of the +.B {} +operator is different. lex interprets "abc{1,3}" as "match one, two, or +three occurrences of 'abc'", whereas +.I flex +interprets it as "match 'ab' +followed by one, two, or three occurrences of 'c'". The latter is +in agreement with the current POSIX draft. +.IP - +To refer to yytext outside of your scanner source file, use +"extern char *yytext;" rather than "extern char yytext[];". +This is contrary to the POSIX draft but a point on which I refuse +to budge, as the array representation entails a serious performance penalty. +.IP - +The name +.bd +FLEX_SCANNER +is #define'd so scanners may be written for use with either +.I flex +or +.I lex. +.SH DEFICIENCES / BUGS +.LP +Some trailing context +patterns cannot be properly matched and generate +warning messages ("Dangerous trailing context"). These are +patterns where the ending of the +first part of the rule matches the beginning of the second +part, such as "zx*/xy*", where the 'x*' matches the 'x' at +the beginning of the trailing context. (Note that the POSIX draft +states that the text matched by such patterns is undefined.) +If desperate, you can use +.B yyless() +to effect arbitrary trailing context. +.LP +.I variable +trailing context (where both the leading and trailing parts do not have +a fixed length) entails the same performance loss as +.I REJECT +(i.e., substantial). +.LP +For some trailing context rules, parts which are actually fixed-length are +not recognized as such, leading to the abovementioned performance loss. +In particular, parts using '|' or {n} are always considered variable-length. +.LP +Use of unput() or input() invalidates yytext and yyleng. +.LP +Use of unput() to push back more text than was matched can +result in the pushed-back text matching a beginning-of-line ('^') +rule even though it didn't come at the beginning of the line +(though this is rare!). +.LP +Nulls are not allowed in +.I flex +inputs or in the inputs to +scanners generated by +.I flex. +Their presence generates fatal errors. +.LP +.I flex +does not generate correct #line directives for code internal +to the scanner; thus, bugs in +.I +flex.skel +yield bogus line numbers. +.LP +Due to both buffering of input and read-ahead, you cannot intermix +calls to stdio routines, such as, for example, +.B getchar() +with +.I flex +rules and expect it to work. Call +.B input() +instead. +.LP +The total table entries listed by the +.B -v +flag excludes the number of table entries needed to determine +what rule has been matched. The number of entries is equal +to the number of DFA states if the scanner does not use REJECT, +and somewhat greater than the number of states if it does. +.LP +It would be useful if +.I flex +wrote to lex.yy.c a summary of the flags used in +its generation (such as which table compression options). +.LP +Some of the macros, such as +.B yywrap(), +may in the future become functions which live in the +.B -ll +library. This will doubtless break a lot of code, but may be +required for POSIX-compliance. +.LP +The +.I flex +internal algorithms need documentation. +.SH "SEE ALSO" +.LP +lex(1), yacc(1), sed(1), awk(1). +.LP +M. E. Lesk and E. Schmidt, +.I LEX - Lexical Analyzer Generator +.SH AUTHOR +Vern Paxson, with the help of many ideas and much inspiration from +Van Jacobson. Original version by Jef Poskanzer. Fast table +representation is a partial implementation of a design done by Van +Jacobson. The implementation was done by Kevin Gong and Vern Paxson. +.LP +Thanks to the many +.I flex +beta-testers and feedbackers, especially Casey +Leedom, benson@odi.com, +Frederic Brehm, Nick Christopher, Jason Coughlin, +Chris Faylor, Eric Goldman, Eric +Hughes, Jeffrey R. Jones, Kevin B. Kenny, Ronald Lamprecht, +Greg Lee, Craig Leres, Mohamed el Lozy, Jim Meyering, Marc Nozell, Esmond Pitt, +Jef Poskanzer, Dave Tallman, Frank Whaley, Ken Yap, and others whose names +have slipped my marginal mail-archiving skills but whose contributions +are appreciated all the same. +.LP +Thanks to Keith Bostic, John Gilmore, Bob +Mulcahy, Rich Salz, and Richard Stallman for help with various distribution +headaches. +.LP +Thanks to Esmond Pitt for 8-bit character support, Benson Margulies and Fred +Burke for C++ support, and Ove Ewerlid for supporting NUL's (as well as for +impressive efforts regarding generating extremely high-performance +scanners, which with luck will be soon forthcoming). +.LP +This work was primarily done when I was a member of the Real Time System Group +at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there +for the support I received. +.LP +Send comments to: +.nf + + Vern Paxson + Computer Science Department + 4126 Upson Hall + Cornell University + Ithaca, NY 14853-7501 + + vern@cs.cornell.edu + decvax!cornell!vern + vern@LBL (bitnet) + +.fi -- cgit v1.2.3 From e234671be07e4dfa0ac24892392898da169007cb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 25 Feb 1990 19:47:38 +0000 Subject: *** empty log message *** --- flex.1 | 69 +++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/flex.1 b/flex.1 index d52f126..2eef948 100644 --- a/flex.1 +++ b/flex.1 @@ -33,24 +33,25 @@ First some simple examples to get the flavor of how one uses .I flex. The following .I flex -input specifies a scanner which whenever it encounters a tab -will print eight blanks to its standard output: +input specifies a scanner which whenever it encounters the string +"username" will replace it with the user's login name: .nf %% - \t printf( " " ); + username printf( "%s", getlogin() ); .fi By default, any text not matched by a .I flex scanner is copied to the output, so the net effect of this scanner is -to copy its input file to its output with each tab expanded -into eight blanks. -In this input, there is just one rule. "\t" is the +to copy its input file to its output with each occurrence +of "username" expanded. +In this input, there is just one rule. "username" is the .I pattern -(it's a regular expression specifying a tab) and the "printf" is the -.I action. The "%%" marks the beginning of the rules. +and the "printf" is the +.I action. +The "%%" marks the beginning of the rules. .LP Here's another simple example: .nf @@ -58,14 +59,14 @@ Here's another simple example: int num_lines = 0, num_chars = 0; %% - \n ++num_lines; ++num_chars; + \\n ++num_lines; ++num_chars; . ++num_chars; %% main() { yylex(); - printf( "# of lines = %d, # of chars = %d\n", + printf( "# of lines = %d, # of chars = %d\\n", num_lines, num_chars ); } @@ -128,7 +129,7 @@ A somewhat more complicated example: int argc; char **argv; { - ++argv, --argc; + ++argv, --argc; /* skip over program name */ if ( argc > 0 ) yyin = fopen( argv[0], "r" ); else @@ -143,7 +144,7 @@ Pascal. It identifies different types of .I tokens and reports on what it has seen. .LP -The details of the example will be explained in the following +The details of this example will be explained in the following sections. .SH FORMAT OF THE INPUT FILE The @@ -408,8 +409,8 @@ a single blank, and throws away whitespace found at the end of a line: .nf %% - [ \t]+ putchar( ' ' ); - [ \t]+$ /* ignore this token */ + [ \\t]+ putchar( ' ' ); + [ \\t]+$ /* ignore this token */ .fi .LP @@ -469,7 +470,7 @@ words in the input and call the routine special() whenever "frob" is seen: %% frob special(); REJECT; - [^ \t\n]+ ++word_count; + [^ \\t\\n]+ ++word_count; .fi Without the @@ -488,7 +489,7 @@ active rule. For example, when the following scanner scans the token ab | abc | abcd ECHO; REJECT; - .|\n /* eat up any unmatched character */ + .|\\n /* eat up any unmatched character */ .fi (The first three rules share the fourth's action since they use @@ -805,8 +806,8 @@ are preceded earlier in the line by the string "expect-number": %% expect-number BEGIN(expect); - [0-9]+ printf( "found a number\n" ); - \n { + [0-9]+ printf( "found a number\\n" ); + \\n { /* that's the end of the line, so * we need another "expect-number" * before we'll recognize any more @@ -824,9 +825,9 @@ maintaining a count of the current input line. %% int line_num = 1; - [^*\n]* - "*"+[^*/\n]* - \n ++line_num; + [^*\\n]* + "*"+[^*/\\n]* + \\n ++line_num; "*"+"/" BEGIN(INITIAL); "/*" BEGIN(comment); @@ -842,9 +843,9 @@ following fashion: int line_num = 1; int comment_caller; - [^*\n]* - "*"+[^*/\n]* - \n ++line_num; + [^*\\n]* + "*"+[^*/\\n]* + \\n ++line_num; "*"+"/" BEGIN(comment_caller); "/*" { @@ -1058,19 +1059,19 @@ the file looks like: associated rules: 2 3 out-transitions: [ o ] - jam-transitions: EOF [ \001-n p-\177 ] + jam-transitions: EOF [ \\001-n p-\\177 ] State #8 is non-accepting - associated rules: 3 out-transitions: [ a ] - jam-transitions: EOF [ \001-` b-\177 ] + jam-transitions: EOF [ \\001-` b-\\177 ] State #9 is non-accepting - associated rules: 3 out-transitions: [ r ] - jam-transitions: EOF [ \001-q s-\177 ] + jam-transitions: EOF [ \\001-q s-\\177 ] Compressed tables always backtrack. @@ -1144,9 +1145,9 @@ for the action. Recall the scanner for C comments: %% int line_num = 1; - [^*\n]* - "*"+[^*/\n]* - \n ++line_num; + [^*\\n]* + "*"+[^*/\\n]* + \\n ++line_num; "*"+"/" BEGIN(INITIAL); "/*" BEGIN(comment); @@ -1159,10 +1160,10 @@ This could be sped up by writing it as: %% int line_num = 1; - [^*\n]* - [^*\n]*\n ++line_num; - "*"+[^*/\n]* - "*"+[^*/\n]*\n ++line_num; + [^*\\n]* + [^*\\n]*\\n ++line_num; + "*"+[^*/\\n]* + "*"+[^*/\\n]*\\n ++line_num; "*"+"/" BEGIN(INITIAL); "/*" BEGIN(comment); -- cgit v1.2.3 From a4b5e58b7d2d495a77bc9f0ff4f1b0cda166626e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 26 Feb 1990 17:59:14 +0000 Subject: *** empty log message *** --- flex.1 | 134 +++++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 84 insertions(+), 50 deletions(-) diff --git a/flex.1 b/flex.1 index 2eef948..a05ca27 100644 --- a/flex.1 +++ b/flex.1 @@ -117,9 +117,9 @@ A somewhat more complicated example: "+"|"-"|"*"|"/" printf( "An operator: %s\\n", yytext ); - "{"[^}\\n]*"}" /* eat up one-line comments */ + "{"[^}\\n]*"}" /* eat up one-line comments */ - [ \\t\\n]+ /* eat up whitespace */ + [ \\t\\n]+ /* eat up whitespace */ . printf( "Unrecognized character: %s\\n", yytext ); @@ -149,8 +149,9 @@ sections. .SH FORMAT OF THE INPUT FILE The .I flex -input file consists of three sections, separated by -.B %%: +input file consists of three sections, separated by a line with just +.B %% +in it: .nf definitions @@ -164,7 +165,7 @@ The .I definitions section contains declarations of simple .I name -definitions to simplify the scanner specification and of +definitions to simplify the scanner specification, and declarations of .I start conditions, which are explained in a later section. .LP @@ -174,11 +175,11 @@ Name definitions have the form: name definition .fi -The "name" is a word beginning with a letter or a '_' -followed by zero or more letters, digits, '_', or '-'. -The definition is taken to begin at the first non-white-space -following the name and continue to the end of the line. -Definition can subsequently be referred to using "{name}", which +The "name" is a word beginning with a letter or an underscore ('_') +followed by zero or more letters, digits, '_', or '-' (dash). +The definition is taken to begin at the first non-white-space character +following the name and continuing to the end of the line. +The definition can subsequently be referred to using "{name}", which will expand to "(definition)". For example, .nf @@ -189,7 +190,7 @@ will expand to "(definition)". For example, defines "DIGIT" to be a regular expression which matches a single digit, and "ID" to be a regular expression which matches a letter -followed by zero-or-more letters or digits. +followed by zero-or-more letters-or-digits. A subsequent reference to .nf @@ -241,7 +242,7 @@ The %{}'s must appear unindented on lines by themselves. In the rules section, any indented or %{} text appearing before the first rule may be used to declare variables -which are local to the scanning routine, and, after the declarations, +which are local to the scanning routine and (after the declarations) code which is to be executed whenever the scanning routine is entered. Other indented or %{} text in the rule section is still copied to the output, but its meaning is not well-defined and it may well cause compile-time @@ -251,7 +252,8 @@ compliance; see below for other such features). .LP In the definitions section, an unindented comment (i.e., a line beginning with "/*") is also copied verbatim to the output up -to the next "*/". Also, any line beginning with '#' is ignored. +to the next "*/". Also, any line in the definitions section +beginning with '#' is ignored. .SH PATTERNS The patterns in the input are written using an extended set of regular expressions. These are: @@ -259,18 +261,16 @@ expressions. These are: x match the character 'x' . any character except newline - [xyz] an 'x', a 'y', or a 'z' - [abj-oZ] an 'a', a 'b', any letter - from 'j' through 'o', or a 'Z' - [^A-Z] any character EXCEPT an uppercase letter, - including a newline (unlike how many other - regular expression tools treat the '^'!). - This means that a pattern like [^"]* will - match an entire file (overflowing the input - buffer) unless there's another quote in - the input. + [xyz] a "character class"; in this case, the pattern + matches either an 'x', a 'y', or a 'z' + [abj-oZ] a "character class" with a range in it; matches + an 'a', a 'b', any letter from 'j' through 'o', + or a 'Z' + [^A-Z] a "negated character class", i.e., any character + but those in the class. In this case, any + character EXCEPT an uppercase letter. [^A-Z\\n] any character EXCEPT an uppercase letter or - a newline + a newline r* zero or more r's, where r is any regular expression r+ one or more r's r? zero or one r's (that is, "an optional r") @@ -281,32 +281,29 @@ expressions. These are: (see above) "[xyz]\\"foo" the literal string: [xyz]"foo - \\x if x is an 'a', 'b', 'f', 'n', 'r', - 't', or 'v', then the ANSI-C - interpretation of \\x. Otherwise, - a literal 'x' (used to escape - operators such as '*') - \\123 the character with octal value 123 - \\x2a the character with hexadecimal value 2a - (r) match an r; parentheses are used - to override precedence (see below) + \\X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v', + then the ANSI-C interpretation of \\x. + Otherwise, a literal 'X' (used to escape + operators such as '*') + \\123 the character with octal value 123 + \\x2a the character with hexadecimal value 2a + (r) match an r; parentheses are used to override + precedence (see below) - rs the regular expression r followed - by the regular expression s; called - "concatenation" + rs the regular expression r followed by the + regular expression s; called "concatenation" r|s either an r or an s - r/s an r but only if it is followed by - an s. The s is not part of the - matched text. This type of - pattern is known as "trailing context". + r/s an r but only if it is followed by an s. The + s is not part of the matched text. This type + of pattern is called as "trailing context". ^r an r, but only at the beginning of a line - r$ an r, but only at the end of a line - (r must not use trailing context) + r$ an r, but only at the end of a line. Equivalent + to "r/\\n". r an r, but only in start condition s (see @@ -348,12 +345,40 @@ To match "foo" or zero-or-more "bar"'s, use: foo|(bar)* .fi -and to match zero-or-more "foo"'s or "bar"'s: +and to match zero-or-more "foo"'s-or-"bar"'s: .nf (foo|bar)* .fi +.LP +Some notes on patterns: +.IP - +A negated character class such as the example "[^A-Z]" +above +.I will match a newline +unless "\\n" (or an equivalent escape sequence) is one of the +characters explicitly present in the negated character class +(e.g., "[^A-Z\\n]"). This is unlike how many other regular +expression tools treat negated character classes, but unfortunately +the inconsistency is historically entrenched. +Matching newlines means that a pattern like [^"]* can match an entire +input (overflowing the scanner's input buffer) unless there's another +quote in the input. +.I - +A rule can have at most one instance of trailing context (the '/' operator +or the '$' operator). The start condition, '^', and "<>" patterns +can only occur at the beginning of a pattern, and, as well as with '/' and '$', +cannot be grouped inside parentheses. The following are all illegal: +.nf + + foo/bar$ + foo|(bar$) + foo|^bar + foobar + +.fi +(Note that the first of these, though, can be written "foo/bar\\n".) .SH HOW THE INPUT IS MATCHED When the generated scanner is run, it analyzes its input looking for strings which match any of its patterns. If it finds more than @@ -380,7 +405,7 @@ input is scanned for another match. .LP If no match is found, then the .I default rule -is executed: the next character in the input is matched and +is executed: the next character in the input is considered matched and copied to the standard output. Thus, the simplest legal .I flex input is: @@ -404,6 +429,9 @@ which deletes all occurrences of "zap me" from its input: "zap me" .fi +(It will copy all other characters in the input to the output since +they will be matched by the default rule.) +.LP Here is a program which compresses multiple blanks and tabs down to a single blank, and throws away whitespace found at the end of a line: .nf @@ -414,27 +442,33 @@ a single blank, and throws away whitespace found at the end of a line: .fi .LP -If the action contains a '{', then the action spans till the balancing -'}' is found, and the action may cross multiple lines. +If the action contains a '{', then the action spans till the balancing '}' +is found, and the action may cross multiple lines. .I flex knows about C strings and comments and won't be fooled by braces found within them, but also allows actions to begin with .B %{ and will consider the action to be all the text up to the next -.B %}. +.B %} +(regardless of ordinary braces inside the action). .LP An action consisting solely of a vertical bar ('|') means "same as -the action for the next rule. See below for an illustration. +the action for the next rule." See below for an illustration. .LP Actions can include arbitrary C code, including .B return -statements to return a value whatever routine called +statements to return a value to whatever routine called .B yylex(). Each time .B yylex() is called it continues processing tokens from where it last left off until it either reaches -the end of the file or executes a return. +the end of the file or executes a return. Once it reaches an end-of-file, +however, then any subsequent call to +.B yylex() +will simply immediately return, unless +.B yyrestart() +is first called (see below). .LP Actions are not allowed to modify yytext or yyleng. .LP -- cgit v1.2.3 From 873124b6bef4c98f4733cf64916fbe658f551492 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 28 Feb 1990 11:10:27 +0000 Subject: *** empty log message *** --- flex.1 | 83 +++++++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 26 deletions(-) diff --git a/flex.1 b/flex.1 index a05ca27..4012418 100644 --- a/flex.1 +++ b/flex.1 @@ -498,9 +498,7 @@ For example, the following will both count the words in the input and call the routine special() whenever "frob" is seen: .nf - %{ - int word_count = 0; - %} + int word_count = 0; %% frob special(); REJECT; @@ -535,6 +533,14 @@ if it is used in of the scanner's actions it will slow down .I all of the scanner's matching. +.IP +Note also that unlike the other special actions, +.B REJECT +is a +.I branch; +code immediately following it in the action will +.I not +be executed. .IP - .B yymore() tells the scanner that the next time it matches a rule, the corresponding @@ -555,7 +561,9 @@ First "mega-" is matched and echoed to the output. Then "kludge" is matched, but the previous "mega-" is still hanging around at the beginning of .B yytext -so the ECHO for the "kludge" rule will actually write "mega-kludge". +so the +.B ECHO +for the "kludge" rule will actually write "mega-kludge". The presence of .B yymore() in the scanner's action entails a minor performance penalty in the @@ -577,13 +585,14 @@ will now be equal to "foobarbar": .nf + %% foobar ECHO; yyless(3); [a-z]+ ECHO; .fi An argument of 0 to .B yyless -will cause the current input string to be scanned again. Unless you've +will cause the entire current input string to be scanned again. Unless you've changed how the scanner will subsequently process its input (using .B BEGIN, for example), this will result in an endless loop. @@ -644,11 +653,15 @@ the following is one way to eat up C comments: .fi .IP - -.I yyterminate() +.B yyterminate() can be used in lieu of a return statement in an action. It terminates the scanner and returns a 0 to the scanner's caller, indicating "all done". +Subsequent calls to the scanner will immediately return unless preceded +by a call to +.B yyrestart() +(see below). By default, -.I yyterminate() +.B yyterminate() is also called when an end-of-file is encountered. It is a macro and may be redefined. .SH THE GENERATED SCANNER @@ -679,7 +692,7 @@ the "YY_DECL" macro. For example, you could use: #define YY_DECL float lexscan( a, b ) float a, b; .fi -to give it the the scanning routine the name +to give the scanning routine the name .I lexscan, returning a float, and taking two floats as arguments. Note that if you give arguments to the scanning routine using a @@ -690,16 +703,22 @@ Whenever .B yylex() is called, it scans tokens from the global input file .I yyin -(default, stdin). It continues until it either reaches +(which defaults to stdin). It continues until it either reaches an end-of-file (at which point it returns the value 0) or one of its actions executes a .I return statement. -In the former case, the scanner may not be called again unless -.B void yyrestart( FILE *input_file ) -is called, to point +In the former case, when called again the scanner will immediately +return unless +.B yyrestart() +is called to point .I yyin -at the new input_file. In the latter case (i.e., when an action +at the new input file. ( +.B yyrestart() +takes one argument, a +.B FILE * +pointer.) +In the latter case (i.e., when an action executes a return), the scanner may then be called again and it will resume scanning where it left off. .LP @@ -721,14 +740,15 @@ and return in the integer variable either the number of characters read or the constant YY_NULL (0 on Unix systems) to indicate EOF. The default YY_INPUT reads from the -global file-pointer "yyin" (which is by default -.I stdin), -so if you +global file-pointer "yyin", so if you just want to change the input file, you needn't redefine -YY_INPUT - just point yyin at the input file. +YY_INPUT - just point yyin at the input file (by assigning it to the +file pointer returned by +.B fopen(), +for example). .LP -A sample redefinition of YY_INPUT (in the definitions section of the input -file): +A sample redefinition of YY_INPUT (in the definitions +section of the input file): .nf %{ @@ -745,7 +765,9 @@ go very fast. When the scanner receives an end-of-file indication from YY_INPUT, it then checks the .B yywrap() -function. If it returns false (zero), then it is assumed that the +function. If +.B yywrap() +returns false (zero), then it is assumed that the function has gone ahead and set up .I yyin to point to another input file, and scanning continues. If it returns @@ -755,7 +777,7 @@ caller. The default .B yywrap() always returns 1. Presently, to redefine it you must first -"#undef yywrap", as it is currently implemented as a macro. As noted +"#undef yywrap", as it is currently implemented as a macro. As indicated by the hedging in the previous sentence, it may be changed to a true function in the near future. .LP @@ -764,7 +786,9 @@ The scanner writes its output to the .I yyout global (default, stdout), which may be redefined by the user simply -by assigning it to some other FILE*. +by assigning it to some other +.B FILE +pointer. .SH START CONDITIONS .I flex provides a mechanism for conditionally activating rules. Any rule @@ -815,7 +839,7 @@ If it is then .I only rules qualified with the start condition will be active. -So a set of rules conditioned on the same exclusive start condition +A set of rules contingent on the same exclusive start condition describe a scanner which is independent of any of the other rules in the .I flex input. Because of this, @@ -823,6 +847,11 @@ exclusive start conditions make it easy to specify "mini-scanners" which scan portions of the input that are syntactically different from the rest (e.g., comments). .LP +The default rule (to +.B ECHO +any unmatched character) remains active in exclusive start conditions. +### you are here +.LP .B BEGIN(0) returns to the original state where only the rules with no start conditions are active. This state can also be @@ -916,7 +945,7 @@ action be executed), or the action must finish with a .I return or -.I yyterminate() +.B yyterminate() statement. <> rules may not be used with other patterns; they may only be qualified with a list of start conditions. If an unqualified <> rule is given, it @@ -1300,7 +1329,9 @@ has not yet been accepted into the draft. .IP - .B output() is not supported. -Output from the ECHO macro is done to the file-pointer +Output from the +.B ECHO +macro is done to the file-pointer "yyout" (default .I stdout). .IP @@ -1316,7 +1347,7 @@ The POSIX draft specifies that yywrap() is a function and this is unlikely to change; so .I flex users are warned that -.I yywrap() +.B yywrap() is likely to be changed to a function in the near future. .IP - The precedence of the -- cgit v1.2.3 From d2e355582d6a7ec99996b7dee756884cd80a274c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 28 Feb 1990 15:09:10 +0000 Subject: *** empty log message *** --- flex.1 | 521 ++++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 403 insertions(+), 118 deletions(-) diff --git a/flex.1 b/flex.1 index 4012418..6357f27 100644 --- a/flex.1 +++ b/flex.1 @@ -262,13 +262,13 @@ expressions. These are: x match the character 'x' . any character except newline [xyz] a "character class"; in this case, the pattern - matches either an 'x', a 'y', or a 'z' + matches either an 'x', a 'y', or a 'z' [abj-oZ] a "character class" with a range in it; matches - an 'a', a 'b', any letter from 'j' through 'o', + an 'a', a 'b', any letter from 'j' through 'o', or a 'Z' [^A-Z] a "negated character class", i.e., any character - but those in the class. In this case, any - character EXCEPT an uppercase letter. + but those in the class. In this case, any + character EXCEPT an uppercase letter. [^A-Z\\n] any character EXCEPT an uppercase letter or a newline r* zero or more r's, where r is any regular expression @@ -282,28 +282,28 @@ expressions. These are: "[xyz]\\"foo" the literal string: [xyz]"foo \\X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v', - then the ANSI-C interpretation of \\x. - Otherwise, a literal 'X' (used to escape + then the ANSI-C interpretation of \\x. + Otherwise, a literal 'X' (used to escape operators such as '*') \\123 the character with octal value 123 \\x2a the character with hexadecimal value 2a (r) match an r; parentheses are used to override - precedence (see below) + precedence (see below) rs the regular expression r followed by the - regular expression s; called "concatenation" + regular expression s; called "concatenation" r|s either an r or an s r/s an r but only if it is followed by an s. The - s is not part of the matched text. This type - of pattern is called as "trailing context". + s is not part of the matched text. This type + of pattern is called as "trailing context". ^r an r, but only at the beginning of a line r$ an r, but only at the end of a line. Equivalent - to "r/\\n". + to "r/\\n". r an r, but only in start condition s (see @@ -365,7 +365,7 @@ the inconsistency is historically entrenched. Matching newlines means that a pattern like [^"]* can match an entire input (overflowing the scanner's input buffer) unless there's another quote in the input. -.I - +.IP - A rule can have at most one instance of trailing context (the '/' operator or the '$' operator). The start condition, '^', and "<>" patterns can only occur at the beginning of a pattern, and, as well as with '/' and '$', @@ -378,7 +378,15 @@ cannot be grouped inside parentheses. The following are all illegal: foobar .fi -(Note that the first of these, though, can be written "foo/bar\\n".) +Note that the first of these, though, can be written "foo/bar\\n", and +the second could be written as two rules using the special '|' action (see +below): +.nf + + foo | + ^bar /* action goes here */ + +.fi .SH HOW THE INPUT IS MATCHED When the generated scanner is run, it analyzes its input looking for strings which match any of its patterns. If it finds more than @@ -498,7 +506,7 @@ For example, the following will both count the words in the input and call the routine special() whenever "frob" is seen: .nf - int word_count = 0; + int word_count = 0; %% frob special(); REJECT; @@ -672,7 +680,7 @@ is the file which contains the scanning routine .B yylex(), a number of tables used by it for matching tokens, and a number -of auxilliary routines and macros. By default, +of auxiliary routines and macros. By default, .B yylex() is declared as follows: .nf @@ -847,10 +855,28 @@ exclusive start conditions make it easy to specify "mini-scanners" which scan portions of the input that are syntactically different from the rest (e.g., comments). .LP +If the distinction between inclusive and exclusive start conditions +is still a little vague, here's a simple example illustrating the +connection between the two. The set of rules: +.nf + + %s example + %% + foo /* do something */ + +.fi +is equivalent to +.nf + + %x example + %% + foo /* do something */ + +.fi +.LP The default rule (to .B ECHO -any unmatched character) remains active in exclusive start conditions. -### you are here +any unmatched character) remains active in start conditions. .LP .B BEGIN(0) returns to the original state where only the rules with @@ -859,17 +885,30 @@ referred to as the start-condition "INITIAL", so .B BEGIN(INITIAL) is equivalent to .B BEGIN(0). +(The parentheses around the start condition name are not required but +are considered good style.) .LP -Here is a scanner which will recognize numbers only if they -are preceded earlier in the line by the string "expect-number": +Here is a scanner which provides two different interpretations +of a string like "123.456". By default it will treat it as +as three tokens, the integer "123", a dot ('.'), and the integer "456". +But if the string is preceded earlier in the line by the string +"expect-floats" +it will treat it as a single token, the floating-point number +123.456: .nf + %{ + #include + %} %s expect %% - expect-number BEGIN(expect); + expect-floats BEGIN(expect); - [0-9]+ printf( "found a number\\n" ); + [0-9]+"."[0-9]+ { + printf( "found a float, = %f\\n", + atof( yytext ) ); + } \\n { /* that's the end of the line, so * we need another "expect-number" @@ -879,6 +918,13 @@ are preceded earlier in the line by the string "expect-number": BEGIN(INITIAL); } + [0-9]+ { + printf( "found an integer, = %d\\n", + atoi( yytext ) ); + } + + "." printf( "found a dot\\n" ); + .fi Here is a scanner which recognizes (and discards) C comments while maintaining a count of the current input line. @@ -888,31 +934,26 @@ maintaining a count of the current input line. %% int line_num = 1; - [^*\\n]* - "*"+[^*/\\n]* + "/*" BEGIN(comment); + + [^*\\n]* /* eat anything that's not a '*' */ + "*"+[^*/\\n]* /* eat up '*'s not followed by '/'s */ \\n ++line_num; "*"+"/" BEGIN(INITIAL); - "/*" BEGIN(comment); - .fi Note that start-conditions names are really integer values and can be stored as such. Thus, the above could be extended in the following fashion: .nf - %x comment + %x comment foo %% int line_num = 1; int comment_caller; - [^*\\n]* - "*"+[^*/\\n]* - \\n ++line_num; - "*"+"/" BEGIN(comment_caller); - "/*" { - comment_caller = INTIIAL; + comment_caller = INITIAL; BEGIN(comment); } @@ -922,6 +963,12 @@ following fashion: comment_caller = foo; BEGIN(comment); } + + [^*\\n]* /* eat anything that's not a '*' */ + "*"+[^*/\\n]* /* eat up '*'s not followed by '/'s */ + \\n ++line_num; + "*"+"/" BEGIN(comment_caller); + .fi One can then implement a "stack" of start conditions using an array of integers. (It is likely that such stacks will become @@ -929,7 +976,7 @@ a full-fledged .I flex feature in the future.) Note, though, that start conditions do not have their own name-space; %s's and %x's -declare names effectively the same as #define's. +declare names in the same fashion as #define's. .SH END-OF-FILE RULES The special rule "<>" indicates actions which are to be taken when an end-of-file is @@ -965,12 +1012,14 @@ An example: %x quote %% - ... + + ...other rules for dealing with quotes... + <> { error( "unterminated quote" ); yyterminate(); } - <> { + <> { if ( *++filelist ) { yyin = fopen( *filelist, "r" ); @@ -993,10 +1042,13 @@ In the generated scanner, the actions are all gathered in one large switch statement and separated using .B YY_BREAK, which may be redefined. -This allows, for example, C++ users to +This allows, for example, some C++ users to #define YY_BREAK to do nothing (while being very careful that every rule ends with a "break" or a "return"!) to avoid suffering from -unreachable statement warnings where a rule's action ends with "return". +unreachable statement warnings where because a rule's action ends with +"return", the +.B YY_BREAK +is inaccessible. .SH INTERFACING WITH YACC One of the main uses of .I flex @@ -1004,9 +1056,9 @@ is as a companion to the .I yacc parser-generator. .I yacc -parsers expect to call the +parsers expect to call a routine named .B yylex() -routine to find the next input token. The routine is supposed to +to find the next input token. The routine is supposed to return the type of the next token as well as putting any associated value in the global .B yylval. @@ -1044,7 +1096,7 @@ In the name of POSIX compliance, .I flex supports a .I translation table -for mapping input characters together into specified sets. +for mapping input characters into groups. The table is specified in the first section, and its format looks like: .nf @@ -1052,29 +1104,31 @@ The table is specified in the first section, and its format looks like: 1 abcd 2 ABCDEFGHIJKLMNOPQRSTUVWXYZ 52 0123456789 + 6 \\t\\ \\n %t .fi This example specifies that the characters 'a', 'b', 'c', and 'd' -are to all be lumped into group #1, the upper-case letters are -to be in group #2, and digits in group #52, and -.I no other characters will appear in the patterns -(note that characters can also be specified in a -.B %t -table using escape sequences). +are to all be lumped into group #1, upper-case letters +in group #2, digits in group #52, tabs, blanks, and newlines into +group #6, and +.I +no other characters will appear in the patterns. The group numbers are actually disregarded by .I flex; .B %t serves, though, to lump characters together. Given the above -table, for example, the pattern "aAA*5" is equivalent to "dZQ*0". +table, for example, the pattern "a(AA)*5" is equivalent to "d(ZQ)*0". They both say, "match any character in group #1, followed by -a character from group #2, followed by zero-or-more characters +zero-or-more pairs of characters from group #2, followed by a character from group #52." Thus .B %t provides a crude way for introducing equivalence classes into -the scanner specification. It is the author's belief that the +the scanner specification. +.LP +Note that the .B -i -option coupled with the equivalence classes which +option (see below) coupled with the equivalence classes which .I flex automatically generates take care of virtually all the instances when one might consider using @@ -1086,23 +1140,29 @@ The main design goal of .I flex is that it generate high-performance scanners. It has been optimized for dealing well with large sets of rules. Aside from the effects -outlined above of table compression on scanner speed, +of table compression on scanner speed outlined above, there are a number of options/actions which degrade performance. These -are, in decreasing order of performance impact: +are, from most expensive to least: .nf REJECT + pattern sets that require backtracking arbitrary trailing context - %T + '^' beginning-of-line operator yymore() - start conditions .fi +with the first three all being quite expensive and the last two +being quite cheap. +.LP +.B REJECT +should be avoided at all costs when performance is important. +It is a particularly expensive option. .LP -Getting rid of backtracking is messy and often may be too much -work for a complicated scanner's rules. In principal, one begins +Getting rid of backtracking is messy and often may be an enormous +amount of work for a complicated scanner. In principal, one begins by using the .B -b flag to generate a @@ -1141,7 +1201,8 @@ the file looks like: .fi The first few lines tell us that there's a scanner state in which it can make a transition on an 'o' but not on any other -character, and the currently scanned text does not match any rule. +character, and the in that state currently scanned text does not match +any rule. If the scanner is in that state and then reads something other than an 'o', it will have to backtrack to find a rule which is matched. With @@ -1180,18 +1241,60 @@ The way to remove the backtracking is to add "error" rules: .fi .LP -Unfortunately backtracking messages tend to cascade and -with a complicated input set it's not uncommon to get hundreds +Eliminating backtracking among a list of keywords can also be +done using a "catch-all" rule: +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + [a-z]+ return TOK_ID; + +.fi +This is usually the best solution when appropriate. +.LP +Backtracking messages tend to cascade. +With a complicated set of rules it's not uncommon to get hundreds of messages. If one can decipher them, though, it often -only takes a dozen or so rules to eliminate the backtracking. -(A possible future -.I flex -feature will be to automatically add rules to eliminate backtracking. -The problem is that while it's easy for +only takes a dozen or so rules to eliminate the backtracking (though +it's easy to make a mistake and have an error rule accidentally match +a valid token. A possible future .I flex -to figure out what rules are needed, it's very hard for it to -know what the proper action is. Currently I'm thinking that it -will simply invoke a user-redefinable macro and that's it ...) +feature will be to automatically add rules to eliminate backtracking). +.LP +.I Variable +trailing context (where both the leading and trailing parts do not have +a fixed length) entails almost the same performance loss as +.I REJECT +(i.e., substantial). So when possible a rule like: +.nf + + %% + mouse|rat/(cat|dog) run(); + +.fi +is better written: +.nf + + %% + mouse/cat|dog run(); + rat/cat|dog run(); + +.fi +or as +.nf + + %% + mouse|rat/cat run(); + mouse|rat/dog run(); + +.fi +Note that here the special '|' action does +.I not +provide any savings, and can even make things worse (see +.B BUGS +below). .LP Another area where the user can increase a scanner's performance (and one that's easier to implement) arises from the fact that @@ -1208,13 +1311,13 @@ for the action. Recall the scanner for C comments: %% int line_num = 1; + "/*" BEGIN(comment); + [^*\\n]* "*"+[^*/\\n]* \\n ++line_num; "*"+"/" BEGIN(INITIAL); - "/*" BEGIN(comment); - .fi This could be sped up by writing it as: .nf @@ -1223,14 +1326,14 @@ This could be sped up by writing it as: %% int line_num = 1; + "/*" BEGIN(comment); + [^*\\n]* [^*\\n]*\\n ++line_num; "*"+[^*/\\n]* "*"+[^*/\\n]*\\n ++line_num; "*"+"/" BEGIN(INITIAL); - "/*" BEGIN(comment); - .fi Now instead of each newline requiring the processing of another action, recognizing the newlines is "distributed" over the other rules @@ -1242,30 +1345,121 @@ slow down the scanner! The speed of the scanner is independent of the number of rules or (modulo the considerations given at the beginning of this section) how complicated the rules are with regard to operators such as '*' and '|'. +.LP +A final example in speeding up a scanner: suppose you want to scan +through a file containing identifiers and keywords, one per line +and with no other extraneous characters, and recognize all the +keywords. A natural first approach is: +.nf + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + .|\\n /* it's not a keyword */ + +.fi +To eliminate the back-tracking, introduce a catch-all rule: +.nf + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + [a-z]+ | + .|\\n /* it's not a keyword */ + +.fi +Now, if it's guaranteed that there's exactly one word per line, +then we can reduce the total number of matches by a half by +merging in the recognition of newlines with that of the other +tokens: +.nf + + %% + asm\\n | + auto\\n | + break\\n | + ... etc ... + volatile\\n | + while\\n /* it's a keyword */ + + [a-z]+\\n | + .|\\n /* it's not a keyword */ + +.fi +One has to be careful here, as we have now reintroduced backtracking +into the scanner. In particular, while +.I we +know that there will never be any characters in the input stream +other than letters or newlines, +.I flex +can't figure this out, and it will plan for possibly needing backtracking +when it has scanned a token like "auto" and then the next character +is something other than a newline or a letter. Previously it would +then just match the "auto" rule and be done, but now it has no "auto" +rule, only a "auto\\n" rule. To eliminate the possibility of backtracking, +we could either duplicate all rules but without final newlines, or, +since we never expect to encounter such an input and therefore don't +how it's classified, we can introduce one more catch-all rule, this +one which doesn't include a newline: +.nf + + %% + asm\\n | + auto\\n | + break\\n | + ... etc ... + volatile\\n | + while\\n /* it's a keyword */ + + [a-z]+\\n | + [a-z]+ | + .|\\n /* it's not a keyword */ + +.fi +Compiled with +.B -Cf, +this is about as fast as one can get a +.I flex +scanner to go for this particular problem. .SH INCOMPATIBILITIES WITH LEX AND POSIX .I flex is a rewrite of the Unix .I lex tool (the two implementations do not share any code, though), -which dates to the late 1970's. There are some incompatibilities -which are of concern to those who wish to write scanners acceptable -to either implementation. At present, the POSIX lex draft is -very close to the original lex implementation, so some of these +with some extensions and incompatibilities, both of which +are of concern to those who wish to write scanners acceptable +to either implementation. At present, the POSIX +.I lex +draft is +very close to the original +.I lex +implementation, so some of these incompatibilities are also in conflict with the POSIX draft. But the intent is that except as noted below, .I flex as it presently stands will -ultimately be POSIX comformant (i.e., that those areas of conflict with +ultimately be POSIX conformant (i.e., that those areas of conflict with the POSIX draft will be resolved in .I flex's -favor). Please bare in -mind that all the comments are with regard to the POSIX +favor). Please bear in +mind that all the comments which follow are with regard to the POSIX .I draft -standard and not the final document; they are included so +standard of Summer 1989, and not the final document (or subsequent +drafts); they are included so .I flex users can be aware of the standardization issues and those areas where .I flex -may in the near future be incompatibly changed with +may in the near future undergo changes incompatible with its current definition. .LP .I flex @@ -1273,10 +1467,14 @@ is fully compatible with .I lex with the following exceptions: .IP - +.I lex +does not support exclusive start conditions (%x), though they +are in the current POSIX draft. +.IP - When definitions are expanded, .I flex encloses them in parentheses. -With lex, the following +With lex, the following: .nf NAME [A-Z][A-Z0-9]* @@ -1291,28 +1489,34 @@ and the precedence is such that the '?' is associated with "[A-Z0-9]*". With .I flex, the rule will be expanded to -"foo([A-z][A-Z0-9]*)?" and so the string "foo" will match. +"foo([A-Z][A-Z0-9]*)?" and so the string "foo" will match. Note that because of this, the -.B ^, $, , +.B ^, $, , /, and -.B / -operators cannot be used in a definition. +.B <> +operators cannot be used in a +.I flex +definition. .IP -Note that the POSIX draft interpretation here is the same as +The POSIX draft interpretation is the same as .I flex's. .IP - -The undocumented lex-scanner internal variable +The undocumented +.I lex +scanner internal variable .B yylineno is not supported. (The variable is not part of the POSIX draft.) .IP - The .B input() -routine is not redefinable, though may be called to read characters +routine is not redefinable, though it may be called to read characters following whatever has been matched by a rule. If .B input() encounters an end-of-file the normal .B yywrap() -processing is done. A ``real'' end-of-file is returned as +processing is done. A ``real'' end-of-file is returned by +.B input() +as .I EOF. .IP Input is instead controlled by redefining the @@ -1332,7 +1536,8 @@ is not supported. Output from the .B ECHO macro is done to the file-pointer -"yyout" (default +.I yyout +(default .I stdout). .IP The POSIX draft mentions that an @@ -1350,19 +1555,50 @@ that .B yywrap() is likely to be changed to a function in the near future. .IP - +After a call to +.B unput(), +.I yytext +and +.I yyleng +are undefined until the next token is matched. This is not the case with +.I lex +or the present POSIX draft. +.IP - The precedence of the .B {} -operator is different. lex interprets "abc{1,3}" as "match one, two, or +(numeric range) operator is different. +.I lex +interprets "abc{1,3}" as "match one, two, or three occurrences of 'abc'", whereas .I flex interprets it as "match 'ab' followed by one, two, or three occurrences of 'c'". The latter is in agreement with the current POSIX draft. .IP - -To refer to yytext outside of your scanner source file, use -"extern char *yytext;" rather than "extern char yytext[];". -This is contrary to the POSIX draft but a point on which I refuse -to budge, as the array representation entails a serious performance penalty. +To refer to yytext outside of the scanner source file, +the correct definition with +.I flex +is "extern char *yytext" rather than "extern char yytext[]". +This is contrary to the current POSIX draft but a point on which +.I flex +will not be changing, as the array representation entails a +serious performance penalty. It is hoped that the POSIX draft will +be emended to support the +.I flex +variety of declaration (as this is a fairly painless change to +require of +.I lex +users). +.IP - +The special table-size declarations such as +.B %a +supported by +.I lex +are not required by +.I flex +scanners; +.I flex +ignores them. .IP - The name .bd @@ -1371,7 +1607,45 @@ is #define'd so scanners may be written for use with either .I flex or .I lex. -.SH DEFICIENCES / BUGS +.LP +The following +.I flex +features are not included in +.I lex +or the POSIX draft standard: +.nf + + yyterminate() + <> + YY_DECL + #line directives + %{}'s around actions + yyrestart() + comments beginning with '#' + multiple actions on a line + +.fi +This last feature refers to the fact that with +.I flex +you can put multiple actions on the same line, separated with +semi-colons, while with +.I lex, +the following +.nf + + foo handle_foo(); ++num_foos_seen; + +.fi +is (rather surprisingly) truncated to +.nf + + foo handle_foo(); + +.fi +.I flex +does not truncate the action. Actions that are not enclosed in +braces are simply terminated at the end of the line. +.SH DEFICIENCIES / BUGS .LP Some trailing context patterns cannot be properly matched and generate @@ -1381,21 +1655,26 @@ first part of the rule matches the beginning of the second part, such as "zx*/xy*", where the 'x*' matches the 'x' at the beginning of the trailing context. (Note that the POSIX draft states that the text matched by such patterns is undefined.) -If desperate, you can use -.B yyless() -to effect arbitrary trailing context. -.LP -.I variable -trailing context (where both the leading and trailing parts do not have -a fixed length) entails the same performance loss as -.I REJECT -(i.e., substantial). .LP For some trailing context rules, parts which are actually fixed-length are not recognized as such, leading to the abovementioned performance loss. -In particular, parts using '|' or {n} are always considered variable-length. +In particular, parts using '|' or {n} (such as "foo{3}") are always +considered variable-length. .LP -Use of unput() or input() invalidates yytext and yyleng. +Combining trailing context with the special '|' action can result in +.I fixed +trailing context being turned into the more expensive +.I variable +trailing context. For example, this happens in the following example: +.nf + + %% + abc | + xyz/def + +.fi +.LP +Use of unput() invalidates yytext and yyleng. .LP Use of unput() to push back more text than was matched can result in the pushed-back text matching a beginning-of-line ('^') @@ -1412,13 +1691,21 @@ Their presence generates fatal errors. .I flex does not generate correct #line directives for code internal to the scanner; thus, bugs in -.I -flex.skel +.I flex.skel yield bogus line numbers. .LP +The +.B -d +option should use the +.I line +number corresponding to the matched rule rather than the +.I rule +number, which is +close-to-useless. +.LP Due to both buffering of input and read-ahead, you cannot intermix -calls to stdio routines, such as, for example, -.B getchar() +calls to routines, such as, for example, +.B getchar(), with .I flex rules and expect it to work. Call @@ -1449,13 +1736,13 @@ The internal algorithms need documentation. .SH "SEE ALSO" .LP -lex(1), yacc(1), sed(1), awk(1). +flex(1), lex(1), yacc(1), sed(1), awk(1). .LP M. E. Lesk and E. Schmidt, .I LEX - Lexical Analyzer Generator .SH AUTHOR Vern Paxson, with the help of many ideas and much inspiration from -Van Jacobson. Original version by Jef Poskanzer. Fast table +Van Jacobson. Original version by Jef Poskanzer. The fast table representation is a partial implementation of a design done by Van Jacobson. The implementation was done by Kevin Gong and Vern Paxson. .LP @@ -1467,7 +1754,7 @@ Frederic Brehm, Nick Christopher, Jason Coughlin, Chris Faylor, Eric Goldman, Eric Hughes, Jeffrey R. Jones, Kevin B. Kenny, Ronald Lamprecht, Greg Lee, Craig Leres, Mohamed el Lozy, Jim Meyering, Marc Nozell, Esmond Pitt, -Jef Poskanzer, Dave Tallman, Frank Whaley, Ken Yap, and others whose names +Jef Poskanzer, Dave Tallman, Frank Whaley, Ken Yap, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. .LP @@ -1476,11 +1763,9 @@ Mulcahy, Rich Salz, and Richard Stallman for help with various distribution headaches. .LP Thanks to Esmond Pitt for 8-bit character support, Benson Margulies and Fred -Burke for C++ support, and Ove Ewerlid for supporting NUL's (as well as for -impressive efforts regarding generating extremely high-performance -scanners, which with luck will be soon forthcoming). +Burke for C++ support, and Ove Ewerlid for supporting NUL's. .LP -This work was primarily done when I was a member of the Real Time System Group +This work was primarily done when I was at the Real Time Systems Group at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there for the support I received. .LP -- cgit v1.2.3 From f9bd1cfebc0dc7ac9e879cd17feb04e1be51f3a7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 28 Feb 1990 16:59:42 +0000 Subject: Changed .so options.man to inlined version since flex.1 will have a different (shorter) options description. --- flex.1 | 347 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 346 insertions(+), 1 deletion(-) diff --git a/flex.1 b/flex.1 index 6357f27..a4808b4 100644 --- a/flex.1 +++ b/flex.1 @@ -1134,7 +1134,352 @@ automatically generates take care of virtually all the instances when one might consider using .B %t. But what the hell, it's there if you want it. -.so options.man +.SH OPTIONS +.I flex +has the following options: +.TP +.B -b +Generate backtracking information to +.I lex.backtrack. +This is a list of scanner states which require backtracking +and the input characters on which they do so. By adding rules one +can remove backtracking states. If all backtracking states +are eliminated and +.B -f +or +.B -F +is used, the generated scanner will run faster (see the +.B -p +flag). Only users who wish to squeeze every last cycle out of their +scanners need worry about this option. (See the section on PERFORMANCE +CONSIDERATIONS below.) +.TP +.B -c +is a do-nothing, deprecated option included for POSIX compliance. +.IP +.B NOTE: +in previous releases of +.I flex +.B -c +specified table-compression options. This functionality is +now given by the +.B -C +flag. To ease the the impact of this change, when +.I flex +encounters +.B -c, +it currently issues a warning message and assumes that +.B -C +was desired instead. In the future this "promotion" of +.B -c +to +.B -C +will go away in the name of full POSIX compliance (unless +the POSIX meaning is removed first). +.TP +.B -d +makes the generated scanner run in +.I debug +mode. Whenever a pattern is recognized the scanner will +write to +.I stderr +a line of the form: +.nf + + --accepting rule #n ("the matched text") + +.fi +Rules are numbered sequentially with the first one being 1. Rule #0 +is executed when the scanner backtracks; Rule #(n+1) (where +.I n +is the number of rules in the +.I flex +input) indicates the default action; Rule #(n+2) indicates +that the input buffer is empty and needs to be refilled and then the scan +restarted. Rules beyond (n+2) are end-of-file actions. +.TP +.B -f +specifies (take your pick) +.I full table +or +.I fast scanner. +No table compression is done. The result is large but fast. +This option is equivalent to +.B -Cf +(see below). +.TP +.B -i +instructs +.I flex +to generate a +.I case-insensitive +scanner. The case of letters given in the +.I flex +input patterns will +be ignored, and tokens in the input will be matched regardless of case. The +matched text given in +.I yytext +will have the preserved case (i.e., it will not be folded). +.TP +.B -n +is another do-nothing, deprecated option included only for +POSIX compliance. +.TP +.B -p +generates a performance report to stderr. The report +consists of comments regarding features of the +.I flex +input file which will cause a loss of performance in the resulting scanner. +Note that the use of +.I REJECT +and variable trailing context (see the BUGS section below) +entails a substantial performance penalty; use of +.I yymore(), +the +.B ^ +operator, +and the +.B -I +flag entail minor performance penalties. +.TP +.B -s +causes the +.I default rule +(that unmatched scanner input is echoed to +.I stdout) +to be suppressed. If the scanner encounters input that does not +match any of its rules, it aborts with an error. This option is +useful for finding holes in a scanner's rule set. +.TP +.B -t +instructs +.I flex +to write the scanner it generates to standard output instead +of +.B lex.yy.c. +.TP +.B -v +specifies that +.I flex +should write to +.I stderr +a summary of statistics regarding the scanner it generates. +Most of the statistics are meaningless to the casual +.I flex +user, but the +first line identifies the version of +.I flex, +which is useful for figuring +out where you stand with respect to patches and new releases. +.TP +.B -F +specifies that the +.ul +fast +scanner table representation should be used. This representation is +about as fast as the full table representation +.ul +(-f), +and for some sets of patterns will be considerably smaller (and for +others, larger). In general, if the pattern set contains both "keywords" +and a catch-all, "identifier" rule, such as in the set: +.nf + + "case" return TOK_CASE; + "switch" return TOK_SWITCH; + ... + "default" return TOK_DEFAULT; + [a-z]+ return TOK_ID; + +.fi +then you're better off using the full table representation. If only +the "identifier" rule is present and you then use a hash table or some such +to detect the keywords, you're better off using +.ul +-F. +.IP +This option is equivalent to +.B -CF +(see below). +.TP +.B -I +instructs +.I flex +to generate an +.I interactive +scanner. Normally, scanners generated by +.I flex +always look ahead one +character before deciding that a rule has been matched. At the cost of +some scanning overhead, +.I flex +will generate a scanner which only looks ahead +when needed. Such scanners are called +.I interactive +because if you want to write a scanner for an interactive system such as a +command shell, you will probably want the user's input to be terminated +with a newline, and without +.B -I +the user will have to type a character in addition to the newline in order +to have the newline recognized. This leads to dreadful interactive +performance. +.IP +If all this seems to confusing, here's the general rule: if a human will +be typing in input to your scanner, use +.B -I, +otherwise don't; if you don't care about squeezing the utmost performance +from your scanner and you +don't want to make any assumptions about the input to your scanner, +use +.B -I. +.IP +Note, +.B -I +cannot be used in conjunction with +.I full +or +.I fast tables, +i.e., the +.B -f, -F, -Cf, +or +.B -CF +flags. +.TP +.B -L +instructs +.I flex +not to generate +.B #line +directives. Without this option, +.I flex +peppers the generated scanner +with #line directives so error messages in the actions will be correctly +located with respect to the original +.I flex +input file, and not to +the fairly meaningless line numbers of +.B lex.yy.c. +(Unfortunately +.I flex +does not presently generate the necessary directives +to "retarget" the line numbers for those parts of +.B lex.yy.c +which it generated. So if there is an error in the generated code, +a meaningless line number is reported.) +.TP +.B -T +makes +.I flex +run in +.I trace +mode. It will generate a lot of messages to +.I stdout +concerning +the form of the input and the resultant non-deterministic and deterministic +finite automata. This option is mostly for use in maintaining +.I flex. +.TP +.B -C[efmF] +controls the degree of table compression. +.IP +.B -Ce +directs +.I flex +to construct +.I equivalence classes, +i.e., sets of characters +which have identical lexical properties (for example, if the only +appearance of digits in the +.I flex +input is in the character class +"[0-9]" then the digits '0', '1', ..., '9' will all be put +in the same equivalence class). Equivalence classes usually give +dramatic reductions in the final table/object file sizes (typically +a factor of 2-5) and are pretty cheap performance-wise (one array +look-up per character scanned). +.IP +.B -Cf +specifies that the +.I full +scanner tables should be generated - +.I flex +should not compress the +tables by taking advantages of similar transition functions for +different states. +.IP +.B -CF +specifies that the alternate fast scanner representation (described +above under the +.B -F +flag) +should be used. +.IP +.B -Cm +directs +.I flex +to construct +.I meta-equivalence classes, +which are sets of equivalence classes (or characters, if equivalence +classes are not being used) that are commonly used together. Meta-equivalence +classes are often a big win when using compressed tables, but they +have a moderate performance impact (one or two "if" tests and one +array look-up per character scanned). +.IP +A lone +.B -C +specifies that the scanner tables should be compressed but neither +equivalence classes nor meta-equivalence classes should be used. +.IP +The options +.B -Cf +or +.B -CF +and +.B -Cm +do not make sense together - there is no opportunity for meta-equivalence +classes if the table is not being compressed. Otherwise the options +may be freely mixed. +.IP +The default setting is +.B -Cem, +which specifies that +.I flex +should generate equivalence classes +and meta-equivalence classes. This setting provides the highest +degree of table compression. You can trade off +faster-executing scanners at the cost of larger tables with +the following generally being true: +.nf + + slowest & smallest + -Cem + -Cm + -Ce + -C + -C{f,F}e + -C{f,F} + fastest & largest + +.fi +Note that scanners with the smallest tables are usually generated and +compiled the quickest, so +during development you will usually want to use the default, maximal +compression. +.IP +.B -Cfe +is often a good compromise between speed and size for production +scanners. +.IP +.B -C +options are not cumulative; whenever the flag is encountered, the +previous -C settings are forgotten. +.TP +.B -Sskeleton_file +overrides the default skeleton file from which +.I flex +constructs its scanners. You'll never need this option unless you are doing +.I flex +maintenance or development. .SH PERFORMANCE CONSIDERATIONS The main design goal of .I flex -- cgit v1.2.3 From a78a8ffd6147447d5a23b9d82ee2a6c6a51e2385 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 28 Feb 1990 18:10:34 +0000 Subject: *** empty log message *** --- flex.1 | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 10 deletions(-) diff --git a/flex.1 b/flex.1 index a4808b4..5cfd6d2 100644 --- a/flex.1 +++ b/flex.1 @@ -748,12 +748,7 @@ and return in the integer variable either the number of characters read or the constant YY_NULL (0 on Unix systems) to indicate EOF. The default YY_INPUT reads from the -global file-pointer "yyin", so if you -just want to change the input file, you needn't redefine -YY_INPUT - just point yyin at the input file (by assigning it to the -file pointer returned by -.B fopen(), -for example). +global file-pointer "yyin". .LP A sample redefinition of YY_INPUT (in the definitions section of the input file): @@ -766,6 +761,9 @@ section of the input file): %} .fi +This definition will change the input processing to occur +one character at a time. +.LP You also can add in things like keeping track of the input line number this way; but don't expect your scanner to go very fast. @@ -888,7 +886,30 @@ is equivalent to (The parentheses around the start condition name are not required but are considered good style.) .LP -Here is a scanner which provides two different interpretations +.B BEGIN +actions can also be given as indented code at the beginning +of the rules section. For example, the following will cause +the scanner to enter the "SPECIAL" start condition whenever +.I yylex() +is called and the global variable +.I enter_special +is true: +.nf + + int enter_special; + + %x SPECIAL + %% + if ( enter_special ) + BEGIN(SPECIAL); + + blahblahblah + ...more rules follow... + +.fi +.LP +To illustrate the uses of start conditions, +here is a scanner which provides two different interpretations of a string like "123.456". By default it will treat it as as three tokens, the integer "123", a dot ('.'), and the integer "456". But if the string is preceded earlier in the line by the string @@ -986,7 +1007,7 @@ point yyin at a new file to process, in which case the action .I must finish with the special -.I YY_NEW_FILE +.B YY_NEW_FILE action (this is a branch, so subsequent code in the action won't be executed), or the action must finish with a @@ -1041,8 +1062,11 @@ it could be #define'd to call a routine to convert yytext to lower-case. In the generated scanner, the actions are all gathered in one large switch statement and separated using .B YY_BREAK, -which may be redefined. -This allows, for example, some C++ users to +which may be redefined. By default, it is simply a "break", to separate +each rule's action from the following rule's. +Redefining +.B YY_BREAK +allows, for example, C++ users to #define YY_BREAK to do nothing (while being very careful that every rule ends with a "break" or a "return"!) to avoid suffering from unreachable statement warnings where because a rule's action ends with @@ -1889,6 +1913,12 @@ The POSIX draft mentions that an .B output() routine exists but currently gives no details as to what it does. .IP - +The +.I lex +.B %r +(generate a Ratfor scanner) option is not supported. It is not part +of the POSIX draft. +.IP - If you are providing your own yywrap() routine, you must include a "#undef yywrap" in the definitions section (section 1). Note that the "#undef" will have to be enclosed in %{}'s. @@ -1990,6 +2020,46 @@ is (rather surprisingly) truncated to .I flex does not truncate the action. Actions that are not enclosed in braces are simply terminated at the end of the line. +.SH DIAGNOSTICS +.I reject_used_but_not_detected undefined +or +.I yymore_used_but_not_detected undefined - +These errors can occur at compile time. They indicate that the +scanner uses +.B REJECT +or +.B yymore() +but that +.I flex +failed to notice the fact, meaning that +.I flex +scanned the first two sections looking for occurrences of these actions +and failed to find any, but somehow you snuck some in (via a #include +file, for example). Make an explicit reference to the action in your +.I flex +input file. (Note that previously +.I flex +supported a +.B %used/%unused +mechanism for dealing with this problem; this feature is still supported +but now deprecated, and will go away soon unless the author hear's from +people who can argue compellingly that they need it.) +.LP +.I flex scanner jammed - +a scanner compiled with +.B -s +has encountered an input string which wasn't matched by +any of its rules. +.LP +.I flex input buffer overflowed - +a scanner rule matched a string long enough to overflow the +scanner's internal input buffer (16K bytes - controlled by +.B YY_BUF_MAX +in "flex.skel"). +.LP +.I fatal internal error, bad transition character detected in sympartition() - +Your input may contain an eight-bit character (either directly or expressed +as an escape sequence) and your version of flex was built for 7-bit characters. .SH DEFICIENCIES / BUGS .LP Some trailing context -- cgit v1.2.3 From 7768de78e642b90c06fbe5be6809663829169dd8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 14 Mar 1990 13:39:01 +0000 Subject: Tweaks for handling NUL's. --- ccl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ccl.c b/ccl.c index a051b94..3289e21 100644 --- a/ccl.c +++ b/ccl.c @@ -135,7 +135,7 @@ int cclp; /* list_character_set - list the members of a set of characters in CCL form * * synopsis - * int cset[CSIZE + 1]; + * int cset[CSIZE]; * FILE *file; * list_character_set( cset ); * @@ -154,7 +154,7 @@ int cset[]; putc( '[', file ); - for ( i = 1; i <= csize; ++i ) + for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) { if ( cset[i] ) { @@ -164,7 +164,7 @@ int cset[]; fputs( readable_form( i ), file ); - while ( ++i <= csize && cset[i] ) + while ( ++i < csize && cset[i] ) ; if ( i - 1 > start_char ) -- cgit v1.2.3 From f248f2d111793b35a158d9c52e5c9b892ce5a5bc Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 14 Mar 1990 13:39:21 +0000 Subject: Tweaks for NUL chars. --- dfa.c | 22 ++++++++++++++++------ gen.c | 19 +++++++++++++------ main.c | 26 ++++++++++++-------------- misc.c | 6 ------ parse.y | 11 ++++++++++- scan.l | 8 ++++---- tblcmp.c | 2 +- 7 files changed, 56 insertions(+), 38 deletions(-) diff --git a/dfa.c b/dfa.c index f1be016..b8250ba 100644 --- a/dfa.c +++ b/dfa.c @@ -209,9 +209,9 @@ int state[]; { register int i, ec; - int out_char_set[CSIZE + 1]; + int out_char_set[CSIZE]; - for ( i = 1; i <= csize; ++i ) + for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) { ec = abs( ecgroup[i] ); out_char_set[i] = state[ec]; @@ -222,7 +222,7 @@ int state[]; list_character_set( file, out_char_set ); /* now invert the members of the set to get the jam transitions */ - for ( i = 1; i <= csize; ++i ) + for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) out_char_set[i] = ! out_char_set[i]; fprintf( file, "\n jam-transitions: EOF " ); @@ -406,14 +406,23 @@ ntod() { int *accset, ds, nacc, newds; - int duplist[CSIZE + 1], sym, hashval, numstates, dsize; - int targfreq[CSIZE + 1], targstate[CSIZE + 1], state[CSIZE + 1]; + int sym, hashval, numstates, dsize; int *nset, *dset; int targptr, totaltrans, i, comstate, comfreq, targ; int *epsclosure(), snstods(), symlist[CSIZE + 1]; int num_start_states; int todo_head, todo_next; + /* note that the following are indexed by *equivalence classes* + * and not by characters. Since equivalence classes are indexed + * beginning with 1, even if the scanner accepts NUL's, this + * means that (since every character is potentially in its own + * equivalence class) these arrays must have room for indices + * from 1 to CSIZE, so their size must be CSIZE + 1. + */ + int duplist[CSIZE + 1], state[CSIZE + 1]; + int targfreq[CSIZE + 1], targstate[CSIZE + 1]; + /* this is so find_table_space(...) will know where to start looking in * chk/nxt for unused records for space to put in the state */ @@ -919,7 +928,8 @@ int symlist[]; if ( tch != SYM_EPSILON ) { if ( tch < -lastccl || tch > csize ) - flexfatal( "bad transition character detected in sympartition()" ); + flexfatal( + "bad transition character detected in sympartition()" ); if ( tch > 0 ) { /* character transition */ diff --git a/gen.c b/gen.c index fdc4feb..8bba86c 100644 --- a/gen.c +++ b/gen.c @@ -214,13 +214,14 @@ genecs() { register int i, j; - static char C_char_decl[] = "static const YY_CHAR %s[%d] =\n { 0,\n"; + static char C_char_decl[] = + "static const YY_CHAR %s[%d] =\n { %d,\n"; int numrows; Char clower(); - printf( C_char_decl, "yy_ec", csize + 1 ); + printf( C_char_decl, "yy_ec", csize, uses_NUL ? abs( ecgroup[0] ) : 0 ); - for ( i = 1; i <= csize; ++i ) + for ( i = 1; i < csize; ++i ) { if ( caseins && (i >= 'A') && (i <= 'Z') ) ecgroup[i] = ecgroup[clower( i )]; @@ -233,16 +234,22 @@ genecs() if ( trace ) { + char *readable_form(); + fputs( "\n\nEquivalence Classes:\n\n", stderr ); + if ( uses_NUL ) + { + fprintf( stderr, "%4s = %-2d\n", + readable_form( 0 ), ecgroup[0] ); + } + numrows = (csize + 1) / 8; for ( j = 1; j <= numrows; ++j ) { - for ( i = j; i <= csize; i = i + numrows ) + for ( i = j; i < csize; i = i + numrows ) { - char *readable_form(); - fprintf( stderr, "%4s = %-2d", readable_form( i ), ecgroup[i] ); diff --git a/main.c b/main.c index 81842f8..95a1751 100644 --- a/main.c +++ b/main.c @@ -60,7 +60,7 @@ int current_state_type; int variable_trailing_context_rules; int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; -int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; +int numecs, nextecm[CSIZE], ecgroup[CSIZE], nummecs, tecfwd[CSIZE + 1]; int tecbck[CSIZE + 1]; int *xlation = (int *) 0; int num_xlations; @@ -79,7 +79,7 @@ Char *ccltbl; char *starttime, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -int num_backtracking, bol_needed; +int num_backtracking, bol_needed, uses_NUL; FILE *temp_action_file; FILE *backtrack_file; int end_of_buffer_state; @@ -96,12 +96,6 @@ static char *outfile = "lexyy.c"; static int outfile_created = 0; -/* flex - main program - * - * synopsis (from the shell) - * flex [-v] [file ...] - */ - main( argc, argv ) int argc; char **argv; @@ -206,7 +200,7 @@ int status; (void) unlink( outfile ); } - if ( backtrack_report ) + if ( backtrack_report && backtrack_file ) { if ( num_backtracking == 0 ) fprintf( backtrack_file, "No backtracking.\n" ); @@ -550,7 +544,7 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; num_backtracking = onesp = numprots = 0; - variable_trailing_context_rules = bol_needed = false; + variable_trailing_context_rules = bol_needed = uses_NUL = false; linenum = sectnum = 1; firstprot = NIL; @@ -565,9 +559,9 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ if ( useecs ) { /* set up doubly-linked equivalence classes */ - ecgroup[1] = NIL; + ecgroup[0] = NIL; - for ( i = 2; i <= csize; ++i ) + for ( i = 1; i < csize; ++i ) { ecgroup[i] = i - 1; nextecm[i - 1] = i; @@ -578,7 +572,7 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ else { /* put everything in its own equivalence class */ - for ( i = 1; i <= csize; ++i ) + for ( i = 0; i < csize; ++i ) { ecgroup[i] = i; nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ @@ -627,7 +621,11 @@ readin() else if ( useecs ) { - numecs = cre8ecs( nextecm, ecgroup, csize ); + if ( uses_NUL ) + numecs = cre8ecs( nextecm, ecgroup, csize, 0 ); + else + numecs = cre8ecs( nextecm, ecgroup, csize - 1, 1 ); + ccl2ecl(); } diff --git a/misc.c b/misc.c index 119658a..e7d8398 100644 --- a/misc.c +++ b/misc.c @@ -610,12 +610,6 @@ Char array[]; array[sptr] = c; - if ( esc_char == '\0' ) - { - synerr( "escape sequence for null not allowed" ); - return ( 1 ); - } - return ( esc_char ); } diff --git a/parse.y b/parse.y index 07ce77f..12479a8 100644 --- a/parse.y +++ b/parse.y @@ -503,7 +503,7 @@ singleton : singleton '*' ++rulelen; if ( $1 == '\0' ) - synerr( "null in rule" ); + uses_NUL = true; if ( caseins && $1 >= 'A' && $1 <= 'Z' ) $1 = clower( $1 ); @@ -531,6 +531,9 @@ fullccl : '[' ccl ']' ccl : ccl CHAR '-' CHAR { + if ( $2 == '\0' || $4 == '\0' ) + uses_NUL = true; + if ( $2 > $4 ) synerr( "negative range in character class" ); @@ -559,6 +562,9 @@ ccl : ccl CHAR '-' CHAR | ccl CHAR { + if ( $2 == '\0' ) + uses_NUL = true; + if ( caseins ) if ( $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); @@ -579,6 +585,9 @@ ccl : ccl CHAR '-' CHAR string : string CHAR { + if ( $2 == '\0' ) + uses_NUL = true; + if ( caseins ) if ( $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); diff --git a/scan.l b/scan.l index dfc1d47..ee9a2be 100644 --- a/scan.l +++ b/scan.l @@ -135,15 +135,15 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) char *malloc(); ++linenum; - xlation = (int *) malloc( sizeof( int ) * (csize + 1) ); - - for ( i = 1; i <= csize; ++i ) - xlation[i] = 0; + xlation = (int *) malloc( sizeof( int ) * csize ); if ( ! xlation ) flexfatal( "dynamic memory failure building %t table" ); + for ( i = 0; i < csize; ++i ) + xlation[i] = 0; + num_xlations = 0; BEGIN(XLATION); diff --git a/tblcmp.c b/tblcmp.c index 840736d..ac822eb 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -233,7 +233,7 @@ cmptmps() * transitions */ - nummecs = cre8ecs( tecfwd, tecbck, numecs ); + nummecs = cre8ecs( tecfwd, tecbck, numecs, 1 ); } else -- cgit v1.2.3 From 99ed9836390f8fc31923e0d2e2208e462a2a4d06 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 14 Mar 1990 13:39:41 +0000 Subject: Tweaks for NUL chars. --- ecs.c | 13 ++++++++----- flexdef.h | 18 +++++++++++++----- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/ecs.c b/ecs.c index d5ac84b..c7ca3bb 100644 --- a/ecs.c +++ b/ecs.c @@ -80,15 +80,18 @@ ccl2ecl() * * synopsis * int cre8ecs(); - * number of classes = cre8ecs( fwd, bck, num ); + * number of classes = cre8ecs( fwd, bck, num, start_pos ); * * fwd is the forward linked-list of equivalence class members. bck * is the backward linked-list, and num is the number of class members. + * start_pos is 0 if the class members begin in fwd[] and bck[] at + * position 0, and 1 if they begin at position 1. + * * Returned is the number of classes. */ -int cre8ecs( fwd, bck, num ) -int fwd[], bck[], num; +int cre8ecs( fwd, bck, num, start_pos ) +int fwd[], bck[], num, start_pos; { int i, j, numcl; @@ -101,7 +104,7 @@ int fwd[], bck[], num; * class. */ - for ( i = 1; i <= num; ++i ) + for ( i = start_pos; i < num + start_pos; ++i ) if ( bck[i] == NIL ) { bck[i] = ++numcl; @@ -129,7 +132,7 @@ int ecmap[]; { int i; - for ( i = 1; i <= csize; ++i ) + for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) if ( xlation[i] == 0 ) ecmap[i] = num_xlations + 1; else diff --git a/flexdef.h b/flexdef.h index 1a154cb..793f785 100644 --- a/flexdef.h +++ b/flexdef.h @@ -31,7 +31,7 @@ #endif #ifdef FLEX_8_BIT_CHARS -#define CSIZE 255 +#define CSIZE 256 #define Char unsigned char #else #define Char char @@ -167,7 +167,7 @@ char *sprintf(); /* keep lint happy */ /* size of input alphabet - should be size of ASCII set */ #ifndef CSIZE -#define CSIZE 127 +#define CSIZE 128 #endif #define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ @@ -332,7 +332,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file * listing backtracking states * csize - size of character set for the scanner we're generating; - * 127 for 7-bit chars and 255 for 8-bit + * 128 for 7-bit chars and 256 for 8-bit * yymore_used - if true, yymore() is used in input rules * reject - if true, generate backtracking tables for REJECT macro * real_reject - if true, scanner really uses REJECT (as opposed to just @@ -470,8 +470,15 @@ extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; * num_xlations - number of different xlation values */ -extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; +extern int numecs, nextecm[CSIZE], ecgroup[CSIZE], nummecs; + +/* meta-equivalence classes are indexed starting at 1, so it's possible + * that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1 + * slots total (since the arrays are 0-based). nextecm[] and ecgroup[] + * don't require the extra position since they're indexed from 1 .. CSIZE - 1. + */ extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; + extern int *xlation; extern int num_xlations; @@ -569,12 +576,13 @@ extern Char *ccltbl; * hshsave - number of hash collisions saved by checking number of states * num_backtracking - number of DFA states requiring back-tracking * bol_needed - whether scanner needs beginning-of-line recognition + * uses_NUL - true if the scanner needs to be able to recognize NUL's */ extern char *starttime, *endtime, nmstr[MAXLINE]; extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -extern int num_backtracking, bol_needed; +extern int num_backtracking, bol_needed, uses_NUL; char *allocate_array(), *reallocate_array(); -- cgit v1.2.3 From d7400bed5b86aab362ae042ce3b48dbd8d921b30 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 14 Mar 1990 13:41:56 +0000 Subject: Added <> token --- yylex.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yylex.c b/yylex.c index ea12936..0d059b6 100644 --- a/yylex.c +++ b/yylex.c @@ -194,6 +194,10 @@ int yylex() fprintf( stderr, "[%d]", yylval ); break; + case EOF_OP: + fprintf( stderr, "<>" ); + break; + case 0: fprintf( stderr, "End Marker" ); break; -- cgit v1.2.3 From 9f5605411cdc0ab55245c3578dd234bbf01e90a4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:49:24 +0000 Subject: removed NUL hack --- ccl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccl.c b/ccl.c index 3289e21..78f263c 100644 --- a/ccl.c +++ b/ccl.c @@ -154,7 +154,7 @@ int cset[]; putc( '[', file ); - for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) + for ( i = 0; i < csize; ++i ) { if ( cset[i] ) { -- cgit v1.2.3 From 5670e8eb09ad34bb969e9b3b27925b5c7fe026b3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:50:00 +0000 Subject: more thrashing around with NUL's --- dfa.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 9 deletions(-) diff --git a/dfa.c b/dfa.c index b8250ba..a35b307 100644 --- a/dfa.c +++ b/dfa.c @@ -211,7 +211,7 @@ int state[]; register int i, ec; int out_char_set[CSIZE]; - for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) + for ( i = 0; i < csize; ++i ) { ec = abs( ecgroup[i] ); out_char_set[i] = state[ec]; @@ -222,7 +222,7 @@ int state[]; list_character_set( file, out_char_set ); /* now invert the members of the set to get the jam transitions */ - for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) + for ( i = 0; i < csize; ++i ) out_char_set[i] = ! out_char_set[i]; fprintf( file, "\n jam-transitions: EOF " ); @@ -391,6 +391,9 @@ increase_max_dfas() dhash = reallocate_integer_array( dhash, current_max_dfas ); dss = reallocate_int_ptr_array( dss, current_max_dfas ); dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); + + if ( nultrans ) + nultrans = reallocate_integer_array( nultrans, current_max_dfas ); } @@ -407,6 +410,7 @@ ntod() { int *accset, ds, nacc, newds; int sym, hashval, numstates, dsize; + int num_full_table_rows; /* used only for -f */ int *nset, *dset; int targptr, totaltrans, i, comstate, comfreq, targ; int *epsclosure(), snstods(), symlist[CSIZE + 1]; @@ -457,6 +461,59 @@ ntod() inittbl(); + /* check to see whether we should build a separate table for transitions + * on NUL characters. We don't do this for full-speed (-F) scanners, + * since for them we don't have a simple state number lying around with + * which to index the table. We also don't bother doing it for scanners + * unless (1) NUL is in its own equivalence class (indicated by a + * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is + * the last equivalence class, and (3) the number of equivalence classes + * is the same as the number of characters. This latter case comes about + * when useecs is false or when its true but every character still + * manages to land in its own class (unlikely, but it's cheap to check + * for). If all these things are true then the character code needed + * to represent NUL's equivalence class for indexing the tables is + * going to take one more bit than the number of characters, and therefore + * we won't be assured of being able to fit it into a YY_CHAR variable. + * This rules out storing the transitions in a compressed table, since + * the code for interpreting them uses a YY_CHAR variable (perhaps it + * should just use an integer, though; this is worth pondering ... ###). + * + * Finally, for full tables, we want the number of entries in the + * table to be a power of two so the array references go fast (it + * will just take a shift to compute the major index). If encoding + * NUL's transitions in the table will spoil this, we give it its + * own table (note that this will be the case if we're not using + * equivalence classes). + */ + + /* note that the test for ecgroup[0] == numecs below accomplishes + * both (1) and (2) above + */ + if ( ! fullspd && ecgroup[0] == numecs ) + { /* NUL is alone in its equivalence class, which is the last one */ + int use_NUL_table = (numecs == csize); + + if ( fulltbl && ! use_NUL_table ) + { /* we still may want to use the table if numecs is a power of 2 */ + int power_of_two; + + for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 ) + if ( numecs == power_of_two ) + { + use_NUL_table = true; + break; + } + } + + if ( use_NUL_table ) + nultrans = allocate_integer_array( current_max_dfas ); + /* from now on, nultrans != nil indicates that we're + * saving null transitions for later, separate encoding + */ + } + + if ( fullspd ) { for ( i = 0; i <= numecs; ++i ) @@ -464,16 +521,30 @@ ntod() place_state( state, 0, 0 ); } - if ( fulltbl ) + else if ( fulltbl ) { + if ( nultrans ) + /* we won't be including NUL's transitions in the table, + * so build it for entries from 0 .. numecs - 1 + */ + num_full_table_rows = numecs; + + else + /* take into account the fact that we'll be including + * the NUL entries in the transition table. Build it + * from 0 .. numecs. + */ + num_full_table_rows = numecs + 1; + /* declare it "short" because it's a real long-shot that that - * won't be large enough + * won't be large enough. */ printf( "static short int yy_nxt[][%d] =\n {\n", - numecs + 1 ); /* '}' so vi doesn't get too confused */ + /* '}' so vi doesn't get too confused */ + num_full_table_rows ); /* generate 0 entries for state #0 */ - for ( i = 0; i <= numecs; ++i ) + for ( i = 0; i < num_full_table_rows; ++i ) mk2data( 0 ); /* force ',' and dataflush() next call to mk2data */ @@ -614,6 +685,12 @@ ntod() if ( ds > num_start_states ) check_for_backtracking( ds, state ); + if ( nultrans ) + { + nultrans[ds] = state[NUL_ec]; + state[NUL_ec] = 0; /* remove transition */ + } + if ( fulltbl ) { /* supply array's 0-element */ @@ -622,7 +699,7 @@ ntod() else mk2data( end_of_buffer_state ); - for ( i = 1; i <= numecs; ++i ) + for ( i = 1; i < num_full_table_rows; ++i ) /* jams are marked by negative of state number */ mk2data( state[i] ? state[i] : -ds ); @@ -846,6 +923,9 @@ int ds[], dsize, transsym, nset[]; { /* loop through negated character class */ ch = ccltbl[ccllist + j]; + if ( ch == 0 ) + ch = NUL_ec; + if ( ch > transsym ) break; /* transsym isn't in negated ccl */ @@ -862,6 +942,9 @@ int ds[], dsize, transsym, nset[]; { ch = ccltbl[ccllist + j]; + if ( ch == 0 ) + ch = NUL_ec; + if ( ch > transsym ) break; @@ -931,7 +1014,7 @@ int symlist[]; flexfatal( "bad transition character detected in sympartition()" ); - if ( tch > 0 ) + if ( tch >= 0 ) { /* character transition */ /* abs() needed for fake %t ec's */ int ec = abs( ecgroup[tch] ); @@ -946,7 +1029,8 @@ int symlist[]; lenccl = ccllen[tch]; cclp = cclmap[tch]; - mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs ); + mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs, + NUL_ec ); if ( cclng[tch] ) { @@ -956,6 +1040,9 @@ int symlist[]; { ich = ccltbl[cclp + k]; + if ( ich == 0 ) + ich = NUL_ec; + for ( ++j; j < ich; ++j ) symlist[j] = 1; } @@ -968,6 +1055,10 @@ int symlist[]; for ( k = 0; k < lenccl; ++k ) { ich = ccltbl[cclp + k]; + + if ( ich == 0 ) + ich = NUL_ec; + symlist[ich] = 1; } } -- cgit v1.2.3 From 32754ff727bf9bcd96da20b28cdd6acfa13cf92a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:50:17 +0000 Subject: NUL's; indenting --- flex.skl | 193 +++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 126 insertions(+), 67 deletions(-) diff --git a/flex.skl b/flex.skl index 8b189a5..eba3558 100644 --- a/flex.skl +++ b/flex.skl @@ -110,9 +110,10 @@ *yy_cp = '\0'; \ yy_c_buf_p = yy_cp; -#define EOB_ACT_RESTART_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 +#define EOB_ACT_NUL_TRANS 0 +#define EOB_ACT_CONTINUE_SCAN 1 +#define EOB_ACT_END_OF_FILE 2 +#define EOB_ACT_LAST_MATCH 3 /* return all but the first 'n' matched characters back to the input stream */ #define yyless(n) \ @@ -162,12 +163,14 @@ static YY_CHAR *yy_last_accepting_cpos; #ifdef YY_USE_PROTOS static yy_state_type yy_get_previous_state( void ); +static yy_state_type yy_try_NUL_trans( void ); static int yy_get_next_buffer( void ); static void yyunput( int c, YY_CHAR *buf_ptr ); static int input( void ); void yyrestart( FILE *input_file ); #else static yy_state_type yy_get_previous_state(); +static yy_state_type yy_try_NUL_trans(); static int yy_get_next_buffer(); static void yyunput(); static int input(); @@ -234,93 +237,130 @@ new_file: %% code to set up and find next match goes here - /* bogus while loop to let YY_BACK_TRACK, EOB_ACT_LAST_MATCH, - * and EOF actions branch here without introducing an optimizer- - * daunting goto - */ - while ( 1 ) - { +yy_find_action: %% code to find the action number goes here - YY_DO_BEFORE_ACTION; - YY_USER_ACTION; + YY_DO_BEFORE_ACTION; + YY_USER_ACTION; #ifdef FLEX_DEBUG - fprintf( stderr, "--accepting rule #%d (\"%s\")\n", - yy_act, yytext ); + fprintf( stderr, "--accepting rule #%d (\"%s\")\n", + yy_act, yytext ); #endif do_action: /* this label is used only to access EOF actions */ - switch ( yy_act ) - { + switch ( yy_act ) + { %% actions go here - case YY_END_OF_BUFFER: - /* undo the effects of YY_DO_BEFORE_ACTION */ - *yy_cp = yy_hold_char; + case YY_END_OF_BUFFER: + { + /* amount of text matched not including the EOB char */ + int yy_amount_of_matched_text = yy_cp - yytext - 1; + + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; - switch ( yy_get_next_buffer() ) + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: { - case EOB_ACT_END_OF_FILE: + if ( yywrap() ) { - if ( yywrap() ) - { - /* note: because we've taken care in - * yy_get_next_buffer() to have set up yytext, - * we can now set up yy_c_buf_p so that if some - * total hoser (like flex itself) wants - * to call the scanner after we return the - * YY_NULL, it'll still work - another YY_NULL - * will get returned. - */ - yy_c_buf_p = yytext; - - yy_act = YY_STATE_EOF((yy_start - 1) / 2); - goto do_action; - } - - else - YY_NEW_FILE; + /* note: because we've taken care in + * yy_get_next_buffer() to have set up yytext, + * we can now set up yy_c_buf_p so that if some + * total hoser (like flex itself) wants + * to call the scanner after we return the + * YY_NULL, it'll still work - another YY_NULL + * will get returned. + */ + yy_c_buf_p = yytext + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF((yy_start - 1) / 2); + goto do_action; } - break; - case EOB_ACT_RESTART_SCAN: - yy_c_buf_p = yytext + YY_MORE_ADJ; - yy_hold_char = *yy_c_buf_p; - break; + else + YY_NEW_FILE; + } + break; + + case EOB_ACT_NUL_TRANS: + { + yy_state_type yy_next_state; + + yy_c_buf_p = yytext + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); - case EOB_ACT_LAST_MATCH: - yy_c_buf_p = &yy_ch_buf[yy_n_chars]; + /* okay, we're now positioned to make the + * NUL transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we + * don't want to build jamming into it because + * then it will run more slowly) + */ - yy_current_state = yy_get_previous_state(); + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* consume the NUL */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } - yy_cp = yy_c_buf_p; - yy_bp = yytext + YY_MORE_ADJ; - continue; /* go to "YY_DO_BEFORE_ACTION" */ + else + { +%% code to do backtracking for compressed tables and set up yy_cp goes here + goto yy_find_action; + } } - break; - default: -#ifdef FLEX_DEBUG - printf( "action # %d\n", yy_act ); -#endif - YY_FATAL_ERROR( "fatal flex scanner internal error" ); + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = &yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext + YY_MORE_ADJ; + goto yy_find_action; + } + break; } - break; /* exit bogus while loop */ + default: +#ifdef FLEX_DEBUG + printf( "action # %d\n", yy_act ); +#endif + YY_FATAL_ERROR( "fatal flex scanner internal error" ); } } } -/* yy_get_next_buffer - try to read in new buffer +/* yy_get_next_buffer - try to read in a new buffer * * synopsis * int yy_get_next_buffer(); * * returns a code representing an action * EOB_ACT_LAST_MATCH - - * EOB_ACT_RESTART_SCAN - restart the scanner + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position * EOB_ACT_END_OF_FILE - end of file */ @@ -331,12 +371,10 @@ static int yy_get_next_buffer() register YY_CHAR *source = yytext - 1; /* copy prev. char, too */ register int number_to_move, i; int ret_val; - - if ( yy_c_buf_p != &yy_ch_buf[yy_n_chars + 1] ) - { - YY_FATAL_ERROR( "NULL in input" ); - /*NOTREACHED*/ - } + + if ( yy_c_buf_p < &yy_ch_buf[yy_n_chars + 1] ) + /* this happens when we see a NUL */ + return ( EOB_ACT_NUL_TRANS ); /* try to read more data */ @@ -374,7 +412,7 @@ static int yy_get_next_buffer() } else - ret_val = EOB_ACT_RESTART_SCAN; + ret_val = EOB_ACT_CONTINUE_SCAN; yy_n_chars += number_to_move; yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; @@ -417,6 +455,23 @@ static yy_state_type yy_get_previous_state() } +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +static yy_state_type yy_try_NUL_trans( yy_current_state ) +register yy_state_type yy_current_state; + + { + register int yy_is_jam; +%% code to find the next state, and perhaps do backtracking, goes here + + return ( yy_is_jam ? 0 : yy_current_state ); + } + + #ifdef YY_USE_PROTOS static void yyunput( int c, register YY_CHAR *yy_bp ) #else @@ -481,7 +536,7 @@ static int input() { if ( yywrap() ) { - yy_c_buf_p = yytext; + yy_c_buf_p = yytext + YY_MORE_ADJ; return ( EOF ); } @@ -497,7 +552,11 @@ static int input() } break; - case EOB_ACT_RESTART_SCAN: + case EOB_ACT_NUL_TRANS: + *yy_c_buf_p = '\0'; + break; + + case EOB_ACT_CONTINUE_SCAN: yy_c_buf_p = yytext + YY_MORE_ADJ; break; -- cgit v1.2.3 From bf413a54bf00f4807f1278ab3fc6f8c53bd5cc70 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:50:37 +0000 Subject: NUL's. 8-bit chars. --- flexdef.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/flexdef.h b/flexdef.h index 793f785..9df7899 100644 --- a/flexdef.h +++ b/flexdef.h @@ -30,11 +30,20 @@ #include #endif +/* always be prepared to generate an 8-bit scanner */ +#define FLEX_8_BIT_CHARS + #ifdef FLEX_8_BIT_CHARS #define CSIZE 256 #define Char unsigned char #else #define Char char +#define CSIZE 128 +#endif + +/* size of input alphabet - should be size of ASCII set */ +#ifndef DEFAULT_CSIZE +#define DEFAULT_CSIZE 128 #endif @@ -165,11 +174,6 @@ char *sprintf(); /* keep lint happy */ #define UNIQUE -1 /* marks a symbol as an e.c. representative */ #define INFINITY -1 /* for x{5,} constructions */ -/* size of input alphabet - should be size of ASCII set */ -#ifndef CSIZE -#define CSIZE 128 -#endif - #define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ #define MAX_CCLS_INCREMENT 100 @@ -202,7 +206,7 @@ char *sprintf(); /* keep lint happy */ #define INITIAL_MAX_TEMPLATE_XPAIRS 2500 #define MAX_TEMPLATE_XPAIRS_INCREMENT 2500 -#define SYM_EPSILON 0 /* to mark transitions on the symbol epsilon */ +#define SYM_EPSILON (CSIZE + 1) /* to mark transitions on the symbol epsilon */ #define INITIAL_MAX_SCS 40 /* maximum number of start conditions */ #define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */ @@ -470,7 +474,12 @@ extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; * num_xlations - number of different xlation values */ -extern int numecs, nextecm[CSIZE], ecgroup[CSIZE], nummecs; +/* reserve enough room in the equivalence class arrays so that we + * can use the CSIZE'th element to hold equivalence class information + * for the NUL character. Later we'll move this information into + * the 0th element. + */ +extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; /* meta-equivalence classes are indexed starting at 1, so it's possible * that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1 @@ -509,6 +518,8 @@ extern char **scname; * tnxt - internal nxt table for templates * base - offset into "nxt" for given state * def - where to go if "chk" disallows "nxt" entry + * nultrans - NUL transition for each state + * NUL_ec - equivalence class of the NUL character * tblend - last "nxt/chk" table entry being used * firstfree - first empty entry in "nxt/chk" table * dss - nfa state set for each dfa @@ -529,7 +540,7 @@ extern char **scname; extern int current_max_dfa_size, current_max_xpairs; extern int current_max_template_xpairs, current_max_dfas; extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; -extern int *base, *def, tblend, firstfree, **dss, *dfasiz; +extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; extern union dfaacc_union { int *dfaacc_set; @@ -576,13 +587,12 @@ extern Char *ccltbl; * hshsave - number of hash collisions saved by checking number of states * num_backtracking - number of DFA states requiring back-tracking * bol_needed - whether scanner needs beginning-of-line recognition - * uses_NUL - true if the scanner needs to be able to recognize NUL's */ extern char *starttime, *endtime, nmstr[MAXLINE]; extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -extern int num_backtracking, bol_needed, uses_NUL; +extern int num_backtracking, bol_needed; char *allocate_array(), *reallocate_array(); -- cgit v1.2.3 From 19c7555b5ab5b3d984bb05acd60eb9a2b31c31d8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:51:13 +0000 Subject: NUL's. --- gen.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 193 insertions(+), 43 deletions(-) diff --git a/gen.c b/gen.c index 8bba86c..a5d8873 100644 --- a/gen.c +++ b/gen.c @@ -44,6 +44,13 @@ static int indent_level = 0; /* each level is 4 spaces */ #define indent_down() (--indent_level) #define set_indent(indent_val) indent_level = indent_val +/* *everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays + */ +static char C_short_decl[] = "static const short int %s[%d] =\n { 0,\n"; +static char C_long_decl[] = "static const long int %s[%d] =\n { 0,\n"; +static char C_state_decl[] = + "static const yy_state_type %s[%d] =\n { 0,\n"; /* indent to the current level */ @@ -97,7 +104,7 @@ gen_bt_action() if ( reject || num_backtracking == 0 ) return; - set_indent( 4 ); + set_indent( 3 ); indent_puts( "case 0: /* must backtrack */" ); indent_puts( "/* undo the effects of YY_DO_BEFORE_ACTION */" ); @@ -214,12 +221,14 @@ genecs() { register int i, j; - static char C_char_decl[] = - "static const YY_CHAR %s[%d] =\n { %d,\n"; + static char C_char_decl[] = "static const %s %s[%d] =\n { 0,\n"; int numrows; Char clower(); - printf( C_char_decl, "yy_ec", csize, uses_NUL ? abs( ecgroup[0] ) : 0 ); + if ( numecs < csize ) + printf( C_char_decl, "YY_CHAR", "yy_ec", csize ); + else + printf( C_char_decl, "short", "yy_ec", csize ); for ( i = 1; i < csize; ++i ) { @@ -238,15 +247,9 @@ genecs() fputs( "\n\nEquivalence Classes:\n\n", stderr ); - if ( uses_NUL ) - { - fprintf( stderr, "%4s = %-2d\n", - readable_form( 0 ), ecgroup[0] ); - } + numrows = csize / 8; - numrows = (csize + 1) / 8; - - for ( j = 1; j <= numrows; ++j ) + for ( j = 0; j < numrows; ++j ) { for ( i = j; i < csize; i = i + numrows ) { @@ -391,12 +394,6 @@ genftbl() register int i; int end_of_buffer_action = num_rules + 1; - /* *everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all C arrays - */ - static char C_short_decl[] = - "static const short int %s[%d] =\n { 0,\n"; - printf( C_short_decl, "yy_accept", lastdfa + 1 ); @@ -425,11 +422,10 @@ genftbl() /* generate the code to find the next compressed-table state */ -gen_next_compressed_state() +gen_next_compressed_state( char_map ) +char *char_map; { - char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; - indent_put2s( "register YY_CHAR yy_c = %s;", char_map ); /* save the backtracking info \before/ computing the next state @@ -474,7 +470,10 @@ gen_next_compressed_state() gen_next_match() - { /* NOTE - changes in here should be reflected in get_next_state() */ + { + /* NOTE - changes in here should be reflected in gen_next_state() and + * gen_NUL_trans() + */ char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; char *char_map_2 = useecs ? "yy_ec[*++yy_cp]" : "*++yy_cp"; @@ -539,7 +538,7 @@ gen_next_match() indent_up(); indent_puts( "{" ); - gen_next_state(); + gen_next_state( false ); indent_puts( "++yy_cp;" ); @@ -565,31 +564,135 @@ gen_next_match() /* generate the code to find the next state */ -gen_next_state() +gen_next_state( worry_about_NULs ) +int worry_about_NULs; { /* NOTE - changes in here should be reflected in get_next_match() */ - char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; - - if ( fulltbl ) + char char_map[256]; + + if ( worry_about_NULs && ! nultrans ) { + if ( useecs ) + sprintf( char_map, "(*yy_cp ? yy_ec[*yy_cp] : %d)", NUL_ec ); + else + sprintf( char_map, "(*yy_cp ? *yy_cp : %d)", NUL_ec ); + } + + else + strcpy( char_map, useecs ? "yy_ec[*yy_cp]" : "*yy_cp" ); + + if ( worry_about_NULs && nultrans ) + { + if ( ! fulltbl && ! fullspd ) + /* compressed tables backtrack *before* they match */ + gen_backtracking(); + + indent_puts( "if ( *yy_cp )" ); + indent_up(); + indent_puts( "{" ); + } + + if ( fulltbl ) indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];", char_map ); + + else if ( fullspd ) + indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;", + char_map ); + + else + gen_next_compressed_state( char_map ); + + if ( worry_about_NULs && nultrans ) + { + indent_puts( "}" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_down(); + } + + if ( fullspd || fulltbl ) gen_backtracking(); + + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + + +/* generate the code to make a NUL transition */ + +gen_NUL_trans() + + { /* NOTE - changes in here should be reflected in get_next_match() */ + int need_backtracking = (num_backtracking > 0 && ! reject); + + if ( need_backtracking ) + /* we'll need yy_cp lying around for the gen_backtracking() */ + indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); + + putchar( '\n' ); + + if ( nultrans ) + { + indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_puts( "yy_is_jam = (yy_current_state == 0);" ); + } + + else if ( fulltbl ) + { + do_indent(); + printf( "yy_current_state = yy_nxt[yy_current_state][%d];\n", + NUL_ec ); + indent_puts( "yy_is_jam = (yy_current_state <= 0);" ); } else if ( fullspd ) { - indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;", - char_map ); - gen_backtracking(); + do_indent(); + printf( "register int yy_c = %d;\n", NUL_ec ); + + indent_puts( + "register const struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "yy_trans_info = &yy_current_state[yy_c];" ); + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + + indent_puts( "yy_is_jam = (yy_trans_info->yy_verify != yy_c);" ); } else { - gen_next_compressed_state(); + char NUL_ec_str[20]; + + sprintf( NUL_ec_str, "%d", NUL_ec ); + gen_next_compressed_state( NUL_ec_str ); if ( reject ) indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + + do_indent(); + + if ( interactive ) + printf( "yy_is_jam = (yy_base[yy_current_state] == %d);\n", + jambase ); + else + printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); + } + + /* if we've entered an accepting state, backtrack; note that + * compressed tables have *already* done such backtracking, so + * we needn't bother with it again + */ + if ( need_backtracking && (fullspd || fulltbl) ) + { + putchar( '\n' ); + indent_puts( "if ( ! yy_is_jam )" ); + indent_up(); + indent_puts( "{" ); + gen_backtracking(); + indent_puts( "}" ); + indent_down(); } } @@ -640,10 +743,6 @@ gentabs() /* *everything* is done in terms of arrays starting at 1, so provide * a null entry for the zero element of all C arrays */ - static char C_long_decl[] = - "static const long int %s[%d] =\n { 0,\n"; - static char C_short_decl[] = - "static const short int %s[%d] =\n { 0,\n"; static char C_char_decl[] = "static const YY_CHAR %s[%d] =\n { 0,\n"; @@ -972,18 +1071,40 @@ make_tables() else gentabs(); + if ( nultrans ) + { + printf( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + if ( fullspd ) + { + if ( nultrans ) + printf( " &yy_transition[%d],\n", base[i] ); + else + printf( " 0,\n" ); + } + + else + mkdata( nultrans[i] ); + } + + dataend(); + } + if ( reject ) { /* declare state buffer variables */ - puts( "yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); - puts( "YY_CHAR *yy_full_match;" ); - puts( "int yy_lp;" ); + puts( + "static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); + puts( "static YY_CHAR *yy_full_match;" ); + puts( "static int yy_lp;" ); if ( variable_trailing_context_rules ) { - puts( "int yy_looking_for_trail_begin = 0;" ); - puts( "int yy_full_lp;" ); - puts( "int *yy_full_state;" ); + puts( "static int yy_looking_for_trail_begin = 0;" ); + puts( "static int yy_full_lp;" ); + puts( "static int *yy_full_state;" ); printf( "#define YY_TRAILING_MASK 0x%x\n", YY_TRAILING_MASK ); printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", YY_TRAILING_HEAD_MASK ); @@ -1062,10 +1183,13 @@ make_tables() skelout(); gen_start_state(); + + /* note, don't use any indentation */ + puts( "yy_match:" ); gen_next_match(); skelout(); - set_indent( 3 ); + set_indent( 2 ); gen_find_action(); /* copy actions from action_file to output file */ @@ -1091,6 +1215,28 @@ make_tables() } + /* generate code for handling NUL's, if needed */ + + /* first, deal with backtracking and setting up yy_cp if the scanner + * finds that it should JAM on the NUL + */ + skelout(); + set_indent( 7 ); + + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_c_buf_p;" ); + + else + { /* compressed table */ + if ( ! reject && ! interactive ) + { + /* do the guaranteed-needed backtrack to figure out the match */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + } + } + + /* generate code for yy_get_previous_state() */ set_indent( 1 ); skelout(); @@ -1102,7 +1248,11 @@ make_tables() set_indent( 2 ); skelout(); - gen_next_state(); + gen_next_state( true ); + + set_indent( 1 ); + skelout(); + gen_NUL_trans(); skelout(); -- cgit v1.2.3 From b8381bd5765900c02b753d019a1a366e9b064d9e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:51:27 +0000 Subject: NUL's. -8 --- main.c | 57 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/main.c b/main.c index 95a1751..b8ffe12 100644 --- a/main.c +++ b/main.c @@ -60,7 +60,7 @@ int current_state_type; int variable_trailing_context_rules; int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; -int numecs, nextecm[CSIZE], ecgroup[CSIZE], nummecs, tecfwd[CSIZE + 1]; +int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; int tecbck[CSIZE + 1]; int *xlation = (int *) 0; int num_xlations; @@ -69,7 +69,7 @@ char **scname; int current_max_dfa_size, current_max_xpairs; int current_max_template_xpairs, current_max_dfas; int lastdfa, *nxt, *chk, *tnxt; -int *base, *def, tblend, firstfree, **dss, *dfasiz; +int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; union dfaacc_union *dfaacc; int *accsiz, *dhash, numas; int numsnpairs, jambase, jamstate; @@ -79,7 +79,7 @@ Char *ccltbl; char *starttime, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -int num_backtracking, bol_needed, uses_NUL; +int num_backtracking, bol_needed; FILE *temp_action_file; FILE *backtrack_file; int end_of_buffer_state; @@ -152,7 +152,6 @@ char **argv; "variable trailing context rules cannot be used with -f or -F" ); } - /* convert the ndfa to a dfa */ ntod(); /* generate the C state transition tables from the DFA */ @@ -334,6 +333,8 @@ char **argv; sawcmpflag = false; use_stdout = false; + csize = DEFAULT_CSIZE; + program_name = argv[0]; /* read flags */ @@ -394,7 +395,7 @@ char **argv; (int) arg[i] ); break; } - + goto get_next_arg; case 'd': @@ -454,6 +455,10 @@ char **argv; printstats = true; break; + case '8': + csize = CSIZE; + break; + default: lerrif( "unknown flag '%c'", (int) arg[i] ); break; @@ -528,9 +533,9 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ static char temp_action_file_name[32]; #ifndef SHORT_FILE_NAMES - strcpy( temp_action_file_name, "/tmp/flexXXXXXX" ); + (void) strcpy( temp_action_file_name, "/tmp/flexXXXXXX" ); #else - strcpy( temp_action_file_name, "flexXXXXXX.tmp" ); + (void) strcpy( temp_action_file_name, "flexXXXXXX.tmp" ); #endif (void) mktemp( temp_action_file_name ); @@ -544,24 +549,24 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; num_backtracking = onesp = numprots = 0; - variable_trailing_context_rules = bol_needed = uses_NUL = false; + variable_trailing_context_rules = bol_needed = false; linenum = sectnum = 1; firstprot = NIL; - csize = CSIZE; - /* used in mkprot() so that the first proto goes in slot 1 * of the proto queue */ lastprot = 1; if ( useecs ) - { - /* set up doubly-linked equivalence classes */ - ecgroup[0] = NIL; + { /* set up doubly-linked equivalence classes */ + /* We loop all the way up to csize, since ecgroup[csize] is the + * position used for NUL characters + */ + ecgroup[1] = NIL; - for ( i = 1; i < csize; ++i ) + for ( i = 2; i <= csize; ++i ) { ecgroup[i] = i - 1; nextecm[i - 1] = i; @@ -572,7 +577,7 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ else { /* put everything in its own equivalence class */ - for ( i = 0; i < csize; ++i ) + for ( i = 1; i <= csize; ++i ) { ecgroup[i] = i; nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ @@ -613,24 +618,22 @@ readin() if ( xlation ) { - ecs_from_xlation( ecgroup ); + numecs = ecs_from_xlation( ecgroup ); useecs = true; - numecs = num_xlations + 1; /* + 1 for characters not in %t table */ - ccl2ecl(); } else if ( useecs ) - { - if ( uses_NUL ) - numecs = cre8ecs( nextecm, ecgroup, csize, 0 ); - else - numecs = cre8ecs( nextecm, ecgroup, csize - 1, 1 ); - - ccl2ecl(); - } + numecs = cre8ecs( nextecm, ecgroup, csize ); else numecs = csize; + + /* now map the equivalence class for NUL to its expected place */ + ecgroup[0] = ecgroup[csize]; + NUL_ec = abs( ecgroup[0] ); + + if ( useecs ) + ccl2ecl(); } @@ -688,4 +691,6 @@ set_up_initial_allocations() dhash = allocate_integer_array( current_max_dfas ); dss = allocate_int_ptr_array( current_max_dfas ); dfaacc = allocate_dfaacc_union( current_max_dfas ); + + nultrans = (int *) 0; } -- cgit v1.2.3 From 66682f70ef866dc2f73f93275bb233c65a31e5b8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:51:45 +0000 Subject: Hack to cshell for NUL's. --- misc.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/misc.c b/misc.c index e7d8398..d4bc083 100644 --- a/misc.c +++ b/misc.c @@ -6,7 +6,7 @@ * * This code is derived from software contributed to Berkeley by * Vern Paxson. - * + * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. @@ -76,7 +76,7 @@ int size, element_size; */ if ( element_size * size <= 0 ) flexfatal( "request for < 1 byte in allocate_array()" ); - + mem = malloc( (unsigned) (element_size * size) ); if ( mem == NULL ) @@ -204,7 +204,7 @@ register char *str; for ( c = copy; (*c++ = *str++); ) ; - + return ( copy ); } @@ -235,7 +235,7 @@ register Char *str; for ( c = copy; (*c++ = *str++); ) ; - + return ( copy ); } @@ -245,19 +245,21 @@ register Char *str; * synopsis * * Char v[n]; - * int n; - * cshell( v, n ); + * int n, special_case_0; + * cshell( v, n, special_case_0 ); * * description * does a shell sort of the first n elements of array v. + * If special_case_0 is true, then any element equal to 0 + * is instead assumed to have infinite weight. * * passed * v - array to be sorted * n - number of elements of v to be sorted */ -cshell( v, n ) +cshell( v, n, special_case_0 ) Char v[]; -int n; +int n, special_case_0; { int gap, i, j, jg; @@ -269,7 +271,16 @@ int n; { jg = j + gap; - if ( v[j] <= v[jg] ) + if ( special_case_0 ) + { + if ( v[jg] == 0 ) + break; + + else if ( v[j] != 0 && v[j] <= v[jg] ) + break; + } + + else if ( v[j] <= v[jg] ) break; k = v[j]; @@ -461,7 +472,7 @@ line_directive_out( output_file_name ) FILE *output_file_name; { - if ( infilename && gen_line_dirs ) + if ( infilename && gen_line_dirs ) fprintf( output_file_name, "# line %d \"%s\"\n", linenum, infilename ); } @@ -589,7 +600,7 @@ Char array[]; { /* \ or \x */ Char c, esc_char; register int sptr = 1; - + if ( array[1] == 'x' ) ++sptr; @@ -670,10 +681,10 @@ register int c; return ( rform ); } } - + else if ( c == ' ' ) return ( "' '" ); - + else { rform[0] = c; @@ -696,12 +707,12 @@ int size, element_size; /* same worry as in allocate_array(): */ if ( size * element_size <= 0 ) flexfatal( "attempt to increase array size by less than 1 byte" ); - + new_array = realloc( array, (unsigned) (size * element_size )); if ( new_array == NULL ) flexfatal( "attempt to increase array size failed" ); - + return ( new_array ); } -- cgit v1.2.3 From bd6b6a7fad0811263f313e2c0ffabe8e24425370 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:52:00 +0000 Subject: hack for NUL's. --- nfa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nfa.c b/nfa.c index 1d15a58..44067a2 100644 --- a/nfa.c +++ b/nfa.c @@ -642,7 +642,8 @@ int sym; else { if ( useecs ) - mkechar( sym, nextecm, ecgroup ); + /* map NUL's to csize */ + mkechar( sym ? sym : csize, nextecm, ecgroup ); } return ( lastnfa ); -- cgit v1.2.3 From c61ccdf8f096de4acf34cc40d99d4159a88681e8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:52:30 +0000 Subject: Nuked some extraneous whitespace. NUL's hacks. --- parse.y | 44 ++++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/parse.y b/parse.y index 12479a8..a65fae1 100644 --- a/parse.y +++ b/parse.y @@ -6,7 +6,7 @@ * * This code is derived from software contributed to Berkeley by * Vern Paxson. - * + * * The United States Government has rights in this work pursuant to * contract no. DE-AC03-76SF00098 between the United States Department of * Energy and the University of California. @@ -83,7 +83,7 @@ initlex : scinstal( "INITIAL", false ); } ; - + sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' | | error '\n' @@ -102,7 +102,7 @@ startconddecl : SCDECL xcluflg = false; } - + | XSCDECL { xcluflg = true; } ; @@ -132,7 +132,7 @@ initforrule : } ; -flexrule : scon '^' re eol +flexrule : scon '^' re eol { pat = link_machines( $3, $4 ); finish_rule( pat, variable_trail_rule, @@ -152,18 +152,18 @@ flexrule : scon '^' re eol } } - | scon re eol + | scon re eol { pat = link_machines( $2, $3 ); finish_rule( pat, variable_trail_rule, headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) - scset[actvsc[i]] = + scset[actvsc[i]] = mkbranch( scset[actvsc[i]], pat ); } - | '^' re eol + | '^' re eol { pat = link_machines( $2, $3 ); finish_rule( pat, variable_trail_rule, @@ -187,7 +187,7 @@ flexrule : scon '^' re eol } } - | re eol + | re eol { pat = link_machines( $1, $2 ); finish_rule( pat, variable_trail_rule, @@ -381,7 +381,7 @@ singleton : singleton '*' $$ = mkclos( $1 ); } - + | singleton '+' { varlength = true; @@ -413,7 +413,7 @@ singleton : singleton '*' $$ = mkrep( $1, $3, $5 ); } } - + | singleton '{' NUMBER ',' '}' { varlength = true; @@ -458,8 +458,8 @@ singleton : singleton '*' if ( useecs ) mkeccl( ccltbl + cclmap[anyccl], ccllen[anyccl], nextecm, - ecgroup, csize ); - + ecgroup, csize, csize ); + madeany = true; } @@ -474,12 +474,12 @@ singleton : singleton '*' /* sort characters for fast searching. We use a * shell sort since this list could be large. */ - cshell( ccltbl + cclmap[$1], ccllen[$1] ); + cshell( ccltbl + cclmap[$1], ccllen[$1], true ); if ( useecs ) mkeccl( ccltbl + cclmap[$1], ccllen[$1], - nextecm, ecgroup, csize ); - + nextecm, ecgroup, csize, csize ); + ++rulelen; $$ = mkstate( -$1 ); @@ -502,9 +502,6 @@ singleton : singleton '*' { ++rulelen; - if ( $1 == '\0' ) - uses_NUL = true; - if ( caseins && $1 >= 'A' && $1 <= 'Z' ) $1 = clower( $1 ); @@ -531,9 +528,6 @@ fullccl : '[' ccl ']' ccl : ccl CHAR '-' CHAR { - if ( $2 == '\0' || $4 == '\0' ) - uses_NUL = true; - if ( $2 > $4 ) synerr( "negative range in character class" ); @@ -556,15 +550,12 @@ ccl : ccl CHAR '-' CHAR cclsorted = cclsorted && ($2 > lastchar); lastchar = $4; } - + $$ = $1; } | ccl CHAR { - if ( $2 == '\0' ) - uses_NUL = true; - if ( caseins ) if ( $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); @@ -585,9 +576,6 @@ ccl : ccl CHAR '-' CHAR string : string CHAR { - if ( $2 == '\0' ) - uses_NUL = true; - if ( caseins ) if ( $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); -- cgit v1.2.3 -- cgit v1.2.3 From addb570c1861d641bcee12988a26a66243064f05 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:53:29 +0000 Subject: Minor tweaks for NUL's. --- tblcmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tblcmp.c b/tblcmp.c index ac822eb..2ffb38d 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -233,7 +233,7 @@ cmptmps() * transitions */ - nummecs = cre8ecs( tecfwd, tecbck, numecs, 1 ); + nummecs = cre8ecs( tecfwd, tecbck, numecs ); } else @@ -760,7 +760,7 @@ int state[], statenum, comstate; } if ( usemecs ) - mkeccl( transset, tsptr, tecfwd, tecbck, numecs ); + mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); mkprot( tnxt + tmpbase, -numtemps, comstate ); -- cgit v1.2.3 From 9db5c563777e753b40bb1e9a60cc19d3636c8411 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 16 Mar 1990 16:55:24 +0000 Subject: Many hacks for NUL's. Hope they're right. --- ecs.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 117 insertions(+), 18 deletions(-) diff --git a/ecs.c b/ecs.c index c7ca3bb..b3d7f93 100644 --- a/ecs.c +++ b/ecs.c @@ -64,7 +64,33 @@ ccl2ecl() { ich = ccltbl[cclp + ccls]; cclmec = ecgroup[ich]; - if ( cclmec > 0 ) + + if ( xlation && cclmec < 0 ) + { + /* special hack--if we're doing %t tables then it's + * possible that no representative of this character's + * equivalence class is in the ccl. So waiting till + * we see the representative would be disastrous. Instead, + * we add this character's equivalence class anyway, if it's + * not already present. + */ + int j; + + /* this loop makes this whole process n^2; but we don't + * really care about %t performance anyway + */ + for ( j = 0; j < newlen; ++j ) + if ( ccltbl[cclp + j] == -cclmec ) + break; + + if ( j >= newlen ) + { /* no representative yet, add this one in */ + ccltbl[cclp + newlen] = -cclmec; + ++newlen; + } + } + + else if ( cclmec > 0 ) { ccltbl[cclp + newlen] = cclmec; ++newlen; @@ -80,18 +106,16 @@ ccl2ecl() * * synopsis * int cre8ecs(); - * number of classes = cre8ecs( fwd, bck, num, start_pos ); + * number of classes = cre8ecs( fwd, bck, num ); * * fwd is the forward linked-list of equivalence class members. bck * is the backward linked-list, and num is the number of class members. - * start_pos is 0 if the class members begin in fwd[] and bck[] at - * position 0, and 1 if they begin at position 1. * * Returned is the number of classes. */ -int cre8ecs( fwd, bck, num, start_pos ) -int fwd[], bck[], num, start_pos; +int cre8ecs( fwd, bck, num ) +int fwd[], bck[], num; { int i, j, numcl; @@ -103,8 +127,7 @@ int fwd[], bck[], num, start_pos; * is positive, then x is the representative of its equivalence * class. */ - - for ( i = start_pos; i < num + start_pos; ++i ) + for ( i = 1; i <= num; ++i ) if ( bck[i] == NIL ) { bck[i] = ++numcl; @@ -119,24 +142,82 @@ int fwd[], bck[], num, start_pos; /* ecs_from_xlation - associate equivalence class numbers using %t table * * synopsis - * ecs_from_xlation( ecmap ); + * numecs = ecs_from_xlation( ecmap ); * * Upon return, ecmap will map each character code to its equivalence * class. The mapping will be positive if the character is the representative * of its class, negative otherwise. + * + * Returns the number of equivalence classes used. */ -ecs_from_xlation( ecmap ) +int ecs_from_xlation( ecmap ) int ecmap[]; { int i; + int nul_is_alone = false; + int did_default_xlation_class = false; + + if ( xlation[0] != 0 ) + { + /* if NUL shares its translation with other characters, choose one + * of the other characters as the representative for the equivalence + * class. This allows a cheap test later to see whether we can + * do away with NUL's equivalence class. + */ + for ( i = 1; i < csize; ++i ) + if ( xlation[i] == -xlation[0] ) + { + xlation[i] = xlation[0]; + ecmap[0] = -xlation[0]; + break; + } - for ( i = (uses_NUL ? 0 : 1); i < csize; ++i ) + if ( i >= csize ) + /* didn't find a companion character--remember this fact */ + nul_is_alone = true; + } + + for ( i = 1; i < csize; ++i ) if ( xlation[i] == 0 ) - ecmap[i] = num_xlations + 1; + { + if ( did_default_xlation_class ) + ecmap[i] = -num_xlations; + + else + { + /* make an equivalence class for those characters not + * specified in the %t table + */ + ++num_xlations; + ecmap[i] = num_xlations; + did_default_xlation_class = true; + } + } + else ecmap[i] = xlation[i]; + + if ( nul_is_alone ) + /* force NUL's equivalence class to be the last one */ + { + ++num_xlations; + ecmap[0] = num_xlations; + + /* there's actually a bug here: if someone is fanatic enough to + * put every character in its own translation class, then right + * now we just promoted NUL's equivalence class to be csize + 1; + * we can handle NUL's class number being == csize (by instead + * putting it in its own table), but we can't handle some *other* + * character having to be put in its own table, too. So in + * this case we bail out. + */ + if ( num_xlations > csize ) + flexfatal( "too many %t classes!" ); + } + + return num_xlations; } @@ -144,17 +225,19 @@ int ecmap[]; * * synopsis * Char ccls[]; - * int lenccl, fwd[llsiz], bck[llsiz], llsiz; - * mkeccl( ccls, lenccl, fwd, bck, llsiz ); + * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; + * mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ); * * where ccls contains the elements of the character class, lenccl is the * number of elements in the ccl, fwd is the forward link-list of equivalent * characters, bck is the backward link-list, and llsiz size of the link-list + * + * NUL_mapping is the value which NUL (0) should be mapped to. */ -mkeccl( ccls, lenccl, fwd, bck, llsiz ) +mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) Char ccls[]; -int lenccl, fwd[], bck[], llsiz; +int lenccl, fwd[], bck[], llsiz, NUL_mapping; { int cclp, oldec, newec; @@ -170,6 +253,10 @@ int lenccl, fwd[], bck[], llsiz; while ( cclp < lenccl ) { cclm = ccls[cclp]; + + if ( NUL_mapping && cclm == 0 ) + cclm = NUL_mapping; + oldec = bck[cclm]; newec = cclm; @@ -177,8 +264,19 @@ int lenccl, fwd[], bck[], llsiz; for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) { /* look for the symbol in the character class */ - for ( ; j < lenccl && (ccls[j] <= i || cclflags[j]); ++j ) - if ( ccls[j] == i ) + for ( ; j < lenccl; ++j ) + { + register int ccl_char; + + if ( NUL_mapping && ccls[j] == 0 ) + ccl_char = NUL_mapping; + else + ccl_char = ccls[j]; + + if ( ccl_char > i ) + break; + + if ( ccl_char == i && ! cclflags[j] ) { /* we found an old companion of cclm in the ccl. * link it into the new equivalence class and flag it as @@ -194,6 +292,7 @@ int lenccl, fwd[], bck[], llsiz; /* continue 2 */ goto next_pt; } + } /* symbol isn't in character class. Put it in the old equivalence * class -- cgit v1.2.3 -- cgit v1.2.3 From 9099540b4491d86735b74852d9dcea21c37b2cc0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 19 Mar 1990 16:34:39 +0000 Subject: Proto hacks. NUL hacks. Debugging hacks. C++ hacks. --- flex.skl | 221 ++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 119 insertions(+), 102 deletions(-) diff --git a/flex.skl b/flex.skl index eba3558..68cace6 100644 --- a/flex.skl +++ b/flex.skl @@ -35,6 +35,12 @@ #define const #endif +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + /* amount of stuff to slurp up with each read */ #ifndef YY_READ_BUF_SIZE #define YY_READ_BUF_SIZE 8192 @@ -50,7 +56,10 @@ /* copy whatever the last rule matched to the standard output */ /* cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */ -#define ECHO fputs( (char *) yytext, yyout ) +/* this used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite() + */ +#define ECHO fwrite( (char *) yytext, yyleng, 1, yyout ) /* gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". @@ -87,11 +96,7 @@ /* default declaration of generated scanner - a define so the user can * easily add parameters */ -#ifdef YY_USE_PROTOS -#define YY_DECL int yylex( void ) -#else -#define YY_DECL int yylex() -#endif +#define YY_DECL int yylex YY_PROTO(( void )) /* code executed at the end of each rule */ #define YY_BREAK break; @@ -110,10 +115,9 @@ *yy_cp = '\0'; \ yy_c_buf_p = yy_cp; -#define EOB_ACT_NUL_TRANS 0 -#define EOB_ACT_CONTINUE_SCAN 1 -#define EOB_ACT_END_OF_FILE 2 -#define EOB_ACT_LAST_MATCH 3 +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 /* return all but the first 'n' matched characters back to the input stream */ #define yyless(n) \ @@ -129,6 +133,7 @@ extern YY_CHAR *yytext; extern int yyleng; +extern FILE *yyin, *yyout; YY_CHAR *yytext; int yyleng; @@ -161,20 +166,16 @@ static YY_CHAR yy_hold_char; static yy_state_type yy_last_accepting_state; static YY_CHAR *yy_last_accepting_cpos; -#ifdef YY_USE_PROTOS -static yy_state_type yy_get_previous_state( void ); -static yy_state_type yy_try_NUL_trans( void ); -static int yy_get_next_buffer( void ); -static void yyunput( int c, YY_CHAR *buf_ptr ); -static int input( void ); -void yyrestart( FILE *input_file ); +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( void )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yyunput YY_PROTO(( int c, YY_CHAR *buf_ptr )); +void yyrestart YY_PROTO(( FILE *input_file )); + +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); #else -static yy_state_type yy_get_previous_state(); -static yy_state_type yy_try_NUL_trans(); -static int yy_get_next_buffer(); -static void yyunput(); -static int input(); -void yyrestart(); +static int input YY_PROTO(( void )); #endif YY_DECL @@ -243,12 +244,10 @@ yy_find_action: YY_DO_BEFORE_ACTION; YY_USER_ACTION; -#ifdef FLEX_DEBUG - fprintf( stderr, "--accepting rule #%d (\"%s\")\n", - yy_act, yytext ); -#endif - do_action: /* this label is used only to access EOF actions */ + +%% debug code goes here + switch ( yy_act ) { %% actions go here @@ -261,7 +260,44 @@ do_action: /* this label is used only to access EOF actions */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = yy_hold_char; - switch ( yy_get_next_buffer() ) + if ( yy_c_buf_p < &yy_ch_buf[yy_n_chars + 1] ) + /* this was really a NUL */ + { + yy_state_type yy_next_state; + + yy_c_buf_p = yytext + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* okay, we're now positioned to make the + * NUL transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we + * don't want to build jamming into it because + * then it will run more slowly) + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* consume the NUL */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { +%% code to do backtracking for compressed tables and set up yy_cp goes here + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) { case EOB_ACT_END_OF_FILE: { @@ -286,42 +322,6 @@ do_action: /* this label is used only to access EOF actions */ } break; - case EOB_ACT_NUL_TRANS: - { - yy_state_type yy_next_state; - - yy_c_buf_p = yytext + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state(); - - /* okay, we're now positioned to make the - * NUL transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we - * don't want to build jamming into it because - * then it will run more slowly) - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state ); - - yy_bp = yytext + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* consume the NUL */ - yy_cp = ++yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { -%% code to do backtracking for compressed tables and set up yy_cp goes here - goto yy_find_action; - } - } - case EOB_ACT_CONTINUE_SCAN: yy_c_buf_p = yytext + yy_amount_of_matched_text; @@ -347,7 +347,8 @@ do_action: /* this label is used only to access EOF actions */ #ifdef FLEX_DEBUG printf( "action # %d\n", yy_act ); #endif - YY_FATAL_ERROR( "fatal flex scanner internal error" ); + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); } } } @@ -372,9 +373,9 @@ static int yy_get_next_buffer() register int number_to_move, i; int ret_val; - if ( yy_c_buf_p < &yy_ch_buf[yy_n_chars + 1] ) - /* this happens when we see a NUL */ - return ( EOB_ACT_NUL_TRANS ); + if ( yy_c_buf_p > &yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); /* try to read more data */ @@ -397,6 +398,9 @@ static int yy_get_next_buffer() if ( num_to_read > YY_READ_BUF_SIZE ) num_to_read = YY_READ_BUF_SIZE; + else if ( num_to_read <= 0 ) + YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); + /* read in more data */ YY_INPUT( (&yy_ch_buf[number_to_move]), yy_n_chars, num_to_read ); } @@ -514,7 +518,11 @@ register YY_CHAR *yy_bp; } +#ifdef __cplusplus +static int yyinput() +#else static int input() +#endif { int c; @@ -523,47 +531,56 @@ static int input() *yy_cp = yy_hold_char; if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - { /* need more input */ - yytext = yy_c_buf_p; - ++yy_c_buf_p; + if ( yy_c_buf_p < &yy_ch_buf[yy_n_chars + 1] ) + /* this was really a NUL */ + *yy_c_buf_p = '\0'; - switch ( yy_get_next_buffer() ) - { - /* this code, unfortunately, is somewhat redundant with - * that above - */ - case EOB_ACT_END_OF_FILE: + else + { /* need more input */ + yytext = yy_c_buf_p; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) { - if ( yywrap() ) + /* this code, unfortunately, is somewhat redundant with + * that above + */ + case EOB_ACT_END_OF_FILE: { - yy_c_buf_p = yytext + YY_MORE_ADJ; - return ( EOF ); - } - - yy_ch_buf[0] = '\n'; - yy_n_chars = 1; - yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; - yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - yy_eof_has_been_seen = 0; - yytext = yy_c_buf_p = &yy_ch_buf[1]; - yy_hold_char = *yy_c_buf_p; + if ( yywrap() ) + { + yy_c_buf_p = yytext + YY_MORE_ADJ; + return ( EOF ); + } - return ( input() ); - } - break; + yy_ch_buf[0] = '\n'; + yy_n_chars = 1; + yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + yy_eof_has_been_seen = 0; + yytext = yy_c_buf_p = &yy_ch_buf[1]; + yy_hold_char = *yy_c_buf_p; - case EOB_ACT_NUL_TRANS: - *yy_c_buf_p = '\0'; - break; +#ifdef __cplusplus + return ( yyinput() ); +#else + return ( input() ); +#endif + } + break; - case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = yytext + YY_MORE_ADJ; - break; + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext + YY_MORE_ADJ; + break; - case EOB_ACT_LAST_MATCH: - YY_FATAL_ERROR( "unexpected last match in input()" ); + case EOB_ACT_LAST_MATCH: +#ifdef __cplusplus + YY_FATAL_ERROR( "unexpected last match in yyinput()" ); +#else + YY_FATAL_ERROR( "unexpected last match in input()" ); +#endif + } } - } c = *yy_c_buf_p; yy_hold_char = *++yy_c_buf_p; -- cgit v1.2.3 From 3cb1282de13e2f4ce3ebf517a9df26fbe0297f88 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:30:35 +0000 Subject: -8 tweaks. --- dfa.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dfa.c b/dfa.c index a35b307..5502a55 100644 --- a/dfa.c +++ b/dfa.c @@ -776,7 +776,6 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; int didsort = 0; register int i, j; int newds, *oldsns; - char *malloc(); for ( i = 1; i <= lastdfa; ++i ) if ( hashval == dhash[i] ) @@ -1011,8 +1010,14 @@ int symlist[]; if ( tch != SYM_EPSILON ) { if ( tch < -lastccl || tch > csize ) - flexfatal( - "bad transition character detected in sympartition()" ); + { + if ( tch > csize && tch <= CSIZE ) + flexerror( "scanner requires -8 flag" ); + + else + flexfatal( + "bad transition character detected in sympartition()" ); + } if ( tch >= 0 ) { /* character transition */ -- cgit v1.2.3 From 236fedcd5ef4b90a668e3fb4b9c9f9a9120349a9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:31:08 +0000 Subject: Many multiple-buffer additions. --- flex.skl | 305 ++++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 223 insertions(+), 82 deletions(-) diff --git a/flex.skl b/flex.skl index 68cace6..a88d297 100644 --- a/flex.skl +++ b/flex.skl @@ -37,8 +37,10 @@ #ifdef YY_USE_PROTOS #define YY_PROTO(proto) proto +char *malloc( unsigned size ); #else #define YY_PROTO(proto) () +char *malloc(); #endif /* amount of stuff to slurp up with each read */ @@ -46,10 +48,6 @@ #define YY_READ_BUF_SIZE 8192 #endif -#ifndef YY_BUF_SIZE -#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of input buffer */ -#endif - /* returned upon end-of-file */ #define YY_END_TOK 0 @@ -68,7 +66,7 @@ if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ YY_FATAL_ERROR( "read() in flex scanner failed" ); #define YY_NULL 0 -#define yyterminate() return ( YY_NULL ) +#define yyterminate() return ( YY_NULL ); /* report a fatal error */ #define YY_FATAL_ERROR(msg) \ @@ -91,7 +89,10 @@ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) /* special action meaning "start processing a new file" */ -#define YY_NEW_FILE goto new_file +#define YY_NEW_FILE { \ + yy_init_buffer( yy_current_buffer, yyin ); \ + yy_load_buffer_state(); \ + } /* default declaration of generated scanner - a define so the user can * easily add parameters @@ -103,6 +104,12 @@ #define YY_END_OF_BUFFER_CHAR 0 +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of default input buffer */ +#endif + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + %% section 1 definitions go here /* done after the current pattern has been matched and before the @@ -122,14 +129,53 @@ /* return all but the first 'n' matched characters back to the input stream */ #define yyless(n) \ { \ - *yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \ + /* undo effects of setting up yytext */ \ + *yy_cp = yy_hold_char; \ yy_c_buf_p = yy_cp = yy_bp + n; \ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ } #define unput(c) yyunput( c, yytext ) + +struct yy_buffer_state + { + FILE *yy_input_file; + + YY_CHAR *yy_ch_buf; /* input buffer */ + YY_CHAR *yy_buf_pos; /* current position in input buffer */ + int yy_buf_size; /* size of input buffer in bytes */ + int yy_n_chars; /* number of characters read into yy_ch_buf */ + + int yy_eof_status; /* whether we've seen an EOF on this buffer */ +#define EOF_NOT_SEEN 0 + /* "pending" happens when the EOF has been seen but there's still + * some text process + */ +#define EOF_PENDING 1 +#define EOF_DONE 2 + }; + +static struct yy_buffer_state *yy_original_buffer; +static struct yy_buffer_state *yy_current_buffer; + +/* we provide macros for accessing the buffer states in case in the + * future we want to put the buffer states in a more general "scanner state" + */ +#define YY_ORIGINAL_BUFFER yy_original_buffer +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed */ +static YY_CHAR yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + + +#ifndef YY_USER_ACTION #define YY_USER_ACTION +#endif extern YY_CHAR *yytext; extern int yyleng; @@ -145,24 +191,11 @@ FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; /* these variables are all declared out here so that section 3 code can * manipulate them */ -static YY_CHAR *yy_c_buf_p; /* points to current character in buffer */ +/* points to current character in buffer */ +static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; static int yy_init = 1; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ -/* true when we've seen an EOF for the current input file */ -static int yy_eof_has_been_seen; - -static int yy_n_chars; /* number of characters read into yy_ch_buf */ - -/* yy_ch_buf has to be 2 characters longer than YY_BUF_SIZE because we need - * to put in 2 end-of-buffer characters (this is explained where it is - * done) at the end of yy_ch_buf - */ -static YY_CHAR yy_ch_buf[YY_BUF_SIZE + 2]; - -/* yy_hold_char holds the character lost when yytext is formed */ -static YY_CHAR yy_hold_char; - static yy_state_type yy_last_accepting_state; static YY_CHAR *yy_last_accepting_cpos; @@ -171,6 +204,11 @@ static yy_state_type yy_try_NUL_trans YY_PROTO(( void )); static int yy_get_next_buffer YY_PROTO(( void )); static void yyunput YY_PROTO(( int c, YY_CHAR *buf_ptr )); void yyrestart YY_PROTO(( FILE *input_file )); +void yy_switch_to_buffer YY_PROTO(( struct yy_buffer_state *new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +struct yy_buffer_state *yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( struct yy_buffer_state *b )); +void yy_init_buffer YY_PROTO(( struct yy_buffer_state *b, FILE *file )); #ifdef __cplusplus static int yyinput YY_PROTO(( void )); @@ -197,29 +235,14 @@ YY_DECL if ( ! yyout ) yyout = stdout; -new_file: - /* this is where we enter upon encountering an end-of-file and - * yywrap() indicating that we should continue processing - */ - - /* we put in the '\n' and start reading from [1] so that an - * initial match-at-newline will be true. - */ - - yy_ch_buf[0] = '\n'; - yy_n_chars = 1; - - /* we always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; - yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + if ( yy_current_buffer ) + yy_init_buffer( yy_current_buffer, yyin ); + else + yy_original_buffer = + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); - yy_eof_has_been_seen = 0; + yy_load_buffer_state(); - yytext = yy_c_buf_p = &yy_ch_buf[1]; - yy_hold_char = *yy_c_buf_p; yy_init = 0; } @@ -260,7 +283,8 @@ do_action: /* this label is used only to access EOF actions */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = yy_hold_char; - if ( yy_c_buf_p < &yy_ch_buf[yy_n_chars + 1] ) + if ( yy_c_buf_p < + &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) /* this was really a NUL */ { yy_state_type yy_next_state; @@ -332,7 +356,8 @@ do_action: /* this label is used only to access EOF actions */ goto yy_match; case EOB_ACT_LAST_MATCH: - yy_c_buf_p = &yy_ch_buf[yy_n_chars]; + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; yy_current_state = yy_get_previous_state(); @@ -368,12 +393,12 @@ do_action: /* this label is used only to access EOF actions */ static int yy_get_next_buffer() { - register YY_CHAR *dest = yy_ch_buf; + register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; register YY_CHAR *source = yytext - 1; /* copy prev. char, too */ register int number_to_move, i; int ret_val; - if ( yy_c_buf_p > &yy_ch_buf[yy_n_chars + 1] ) + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); @@ -385,7 +410,7 @@ static int yy_get_next_buffer() for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); - if ( yy_eof_has_been_seen ) + if ( yy_current_buffer->yy_eof_status != EOF_NOT_SEEN ) /* don't do the read, it's not guaranteed to return an EOF, * just force an EOF */ @@ -393,7 +418,7 @@ static int yy_get_next_buffer() else { - int num_to_read = YY_BUF_SIZE - number_to_move - 1; + int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; if ( num_to_read > YY_READ_BUF_SIZE ) num_to_read = YY_READ_BUF_SIZE; @@ -402,35 +427,39 @@ static int yy_get_next_buffer() YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); /* read in more data */ - YY_INPUT( (&yy_ch_buf[number_to_move]), yy_n_chars, num_to_read ); + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); } if ( yy_n_chars == 0 ) { if ( number_to_move == 1 ) + { ret_val = EOB_ACT_END_OF_FILE; + yy_current_buffer->yy_eof_status = EOF_DONE; + } + else + { ret_val = EOB_ACT_LAST_MATCH; - - yy_eof_has_been_seen = 1; + yy_current_buffer->yy_eof_status = EOF_PENDING; + } } else ret_val = EOB_ACT_CONTINUE_SCAN; yy_n_chars += number_to_move; - yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; - yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - - /* yytext begins at the second character in - * yy_ch_buf; the first character is the one which - * preceded it before reading in the latest buffer; - * it needs to be kept around in case it's a - * newline, so yy_get_previous_state() will have - * with '^' rules active + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + /* yytext begins at the second character in yy_ch_buf; the first + * character is the one which preceded it before reading in the latest + * buffer; it needs to be kept around in case it's a newline, so + * yy_get_previous_state() will have with '^' rules active */ - yytext = &yy_ch_buf[1]; + yytext = &yy_current_buffer->yy_ch_buf[1]; return ( ret_val ); } @@ -487,22 +516,25 @@ register YY_CHAR *yy_bp; { register YY_CHAR *yy_cp = yy_c_buf_p; - *yy_cp = yy_hold_char; /* undo effects of setting up yytext */ + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; - if ( yy_cp < yy_ch_buf + 2 ) + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) { /* need to shift things up to make room */ register int number_to_move = yy_n_chars + 2; /* +2 for EOB chars */ - register YY_CHAR *dest = &yy_ch_buf[YY_BUF_SIZE + 2]; - register YY_CHAR *source = &yy_ch_buf[number_to_move]; + register YY_CHAR *dest = + &yy_current_buffer->yy_ch_buf[yy_current_buffer->yy_buf_size + 2]; + register YY_CHAR *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; - while ( source > yy_ch_buf ) + while ( source > yy_current_buffer->yy_ch_buf ) *--dest = *--source; yy_cp += dest - source; yy_bp += dest - source; - yy_n_chars = YY_BUF_SIZE; + yy_n_chars = yy_current_buffer->yy_buf_size; - if ( yy_cp < yy_ch_buf + 2 ) + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) YY_FATAL_ERROR( "flex scanner push-back overflow" ); } @@ -531,7 +563,8 @@ static int input() *yy_cp = yy_hold_char; if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - if ( yy_c_buf_p < &yy_ch_buf[yy_n_chars + 1] ) + { + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) /* this was really a NUL */ *yy_c_buf_p = '\0'; @@ -542,9 +575,6 @@ static int input() switch ( yy_get_next_buffer() ) { - /* this code, unfortunately, is somewhat redundant with - * that above - */ case EOB_ACT_END_OF_FILE: { if ( yywrap() ) @@ -553,13 +583,7 @@ static int input() return ( EOF ); } - yy_ch_buf[0] = '\n'; - yy_n_chars = 1; - yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; - yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - yy_eof_has_been_seen = 0; - yytext = yy_c_buf_p = &yy_ch_buf[1]; - yy_hold_char = *yy_c_buf_p; + YY_NEW_FILE; #ifdef __cplusplus return ( yyinput() ); @@ -581,6 +605,7 @@ static int input() #endif } } + } c = *yy_c_buf_p; yy_hold_char = *++yy_c_buf_p; @@ -597,6 +622,122 @@ FILE *input_file; #endif { - yyin = input_file; - yy_init = 1; + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( struct yy_buffer_state *new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +struct yy_buffer_state *new_buffer; +#endif + + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* flush out information for old buffer */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +struct yy_buffer_state *yy_create_buffer( FILE *file, int size ) +#else +struct yy_buffer_state *yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + + { + struct yy_buffer_state *b; + + b = (struct yy_buffer_state *) malloc( sizeof( struct yy_buffer_state ) ); + + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_buf_size = size + 2; + b->yy_ch_buf = (YY_CHAR *) malloc( b->yy_buf_size ); + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + yy_init_buffer( b, file ); + + return ( b ); + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( struct yy_buffer_state *b ) +#else +void yy_delete_buffer( b ) +struct yy_buffer_state *b; +#endif + + { + if ( b == yy_current_buffer ) + yy_current_buffer = (struct yy_buffer_state *) 0; + + free( (char *) b->yy_ch_buf ); + free( (char *) b ); + } + + +#ifdef YY_USE_PROTOS +void yy_init_buffer( struct yy_buffer_state *b, FILE *file ) +#else +void yy_init_buffer( b, file ) +struct yy_buffer_state *b; +FILE *file; +#endif + + { + b->yy_input_file = file; + + /* we put in the '\n' and start reading from [1] so that an + * initial match-at-newline will be true. + */ + + b->yy_ch_buf[0] = '\n'; + b->yy_n_chars = 1; + + /* we always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[2] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[1]; + + b->yy_eof_status = EOF_NOT_SEEN; } -- cgit v1.2.3 From f975d39a452e1a4cfd6fc40689404a31458f6806 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:31:58 +0000 Subject: defines for malloc() and realloc() conditional defines for abs(), min(), and max() --- flexdef.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/flexdef.h b/flexdef.h index 9df7899..4146fe6 100644 --- a/flexdef.h +++ b/flexdef.h @@ -52,7 +52,9 @@ #ifdef AMIGA #define bzero(s, n) setmem((char *)(s), n, '\0') +#ifndef abs #define abs(x) ((x) < 0 ? -(x) : (x)) +#endif #else #define bzero(s, n) memset((char *)(s), '\0', n) #endif @@ -83,6 +85,8 @@ char *sprintf(); /* keep lint happy */ #endif #endif +char *malloc(), *realloc(); + /* maximum line length we'll have to deal with */ #define MAXLINE BUFSIZ @@ -90,11 +94,17 @@ char *sprintf(); /* keep lint happy */ /* maximum size of file name */ #define FILENAMESIZE 1024 +#ifndef min #define min(x,y) ((x) < (y) ? (x) : (y)) +#endif +#ifndef max #define max(x,y) ((x) > (y) ? (x) : (y)) +#endif #ifdef MS_DOS +#ifndef abs #define abs(x) ((x) < 0 ? -(x) : (x)) +#endif #define SHORT_FILE_NAMES #endif -- cgit v1.2.3 From 54364715f89bf5061738a09acc822c20aae12424 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:32:30 +0000 Subject: full support for -d --- gen.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/gen.c b/gen.c index a5d8873..7f8ec61 100644 --- a/gen.c +++ b/gen.c @@ -1092,6 +1092,16 @@ make_tables() dataend(); } + if ( ddebug ) + { /* spit out table mapping rules to line numbers */ + printf( C_short_decl, "yy_rule_linenum", num_rules ); + + for ( i = 1; i < num_rules; ++i ) + mkdata( rule_linenum[i] ); + + dataend(); + } + if ( reject ) { /* declare state buffer variables */ @@ -1192,6 +1202,43 @@ make_tables() set_indent( 2 ); gen_find_action(); + skelout(); + if ( ddebug ) + { + indent_puts( "if ( yy_act == 0 )" ); + indent_up(); + indent_puts( "fprintf( stderr, \"--scanner backtracking\\n\" );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act < %d )\n", num_rules ); + indent_up(); + indent_puts( + "fprintf( stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); + indent_puts( " yy_rule_linenum[yy_act], yytext );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act == %d )\n", num_rules ); + indent_up(); + indent_puts( + "fprintf( stderr, \"--accepting default rule (\\\"%s\\\")\\n\"," ); + indent_puts( " yytext );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act == %d )\n", num_rules + 1 ); + indent_up(); + indent_puts( "fprintf( stderr, \"--(end of buffer or a NUL)\\n\" );" ); + indent_down(); + + do_indent(); + printf( "else\n" ); + indent_up(); + indent_puts( "fprintf( stderr, \"--EOF\\n\" );" ); + indent_down(); + } + /* copy actions from action_file to output file */ skelout(); indent_up(); -- cgit v1.2.3 From c14fda487dab878d69f063a3396fc5e278413db2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:32:59 +0000 Subject: Summary of generation flags. Minor -8 tweaks. --- main.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/main.c b/main.c index b8ffe12..148febf 100644 --- a/main.c +++ b/main.c @@ -94,6 +94,8 @@ static char *outfile = "lex.yy.c"; static char *outfile = "lexyy.c"; #endif static int outfile_created = 0; +static int use_stdout; +static char *skelname = NULL; main( argc, argv ) @@ -222,6 +224,47 @@ int status; fprintf( stderr, " started at %s, finished at %s\n", starttime, endtime ); + fprintf( stderr, " scanner options: -" ); + + if ( backtrack_report ) + putc( 'b', stderr ); + if ( ddebug ) + putc( 'd', stderr ); + if ( interactive ) + putc( 'I', stderr ); + if ( caseins ) + putc( 'i', stderr ); + if ( ! gen_line_dirs ) + putc( 'L', stderr ); + if ( performance_report ) + putc( 'p', stderr ); + if ( spprdflt ) + putc( 's', stderr ); + if ( use_stdout ) + putc( 't', stderr ); + if ( trace ) + putc( 'T', stderr ); + if ( printstats ) + putc( 'v', stderr ); /* always true! */ + if ( csize == 256 ) + putc( '8', stderr ); + + fprintf( stderr, " -C" ); + + if ( fulltbl ) + putc( 'f', stderr ); + if ( fullspd ) + putc( 'F', stderr ); + if ( useecs ) + putc( 'e', stderr ); + if ( usemecs ) + putc( 'm', stderr ); + + if ( strcmp( skelname, DEFAULT_SKELETON_FILE ) ) + fprintf( stderr, " -S%s", skelname ); + + putc( '\n', stderr ); + fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, current_max_dfas, totnst ); @@ -321,8 +364,8 @@ int argc; char **argv; { - int i, sawcmpflag, use_stdout; - char *arg, *skelname = NULL, *flex_gettime(), *mktemp(); + int i, sawcmpflag; + char *arg, *flex_gettime(), *mktemp(); printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; backtrack_report = performance_report = ddebug = fulltbl = fullspd = false; @@ -597,16 +640,15 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ readin() { + skelout(); + if ( ddebug ) puts( "#define FLEX_DEBUG" ); -#ifdef FLEX_8_BIT_CHARS + if ( csize == 256 ) puts( "#define YY_CHAR unsigned char" ); -#else + else puts( "#define YY_CHAR char" ); -#endif - - skelout(); line_directive_out( stdout ); -- cgit v1.2.3 From 9543f25b7c97c17c8307f0297070c8d045699533 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:33:27 +0000 Subject: Removed declarations of malloc() and realloc(). --- misc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/misc.c b/misc.c index d4bc083..cdbdf65 100644 --- a/misc.c +++ b/misc.c @@ -38,8 +38,6 @@ static char rcsid[] = #include #include "flexdef.h" -char *malloc(), *realloc(); - /* action_out - write the actions from the temporary file to lex.yy.c * -- cgit v1.2.3 From e986754e02f6160900184c878ae660ba854b9ebf Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:34:32 +0000 Subject: Corrected line numbers for continued actions. --- nfa.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nfa.c b/nfa.c index 44067a2..f17126c 100644 --- a/nfa.c +++ b/nfa.c @@ -218,6 +218,12 @@ int mach, variable_trail_rule, headcnt, trailcnt; */ rule_linenum[num_rules] = linenum; + /* if this is a continued action, then the line-number has + * already been updated, giving us the wrong number + */ + if ( continued_action ) + --rule_linenum[num_rules]; + fprintf( temp_action_file, "case %d:\n", num_rules ); if ( variable_trail_rule ) -- cgit v1.2.3 From fc25bea0a9caae2cf9bcbe36591fb51bf2062aaa Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:35:32 +0000 Subject: Rules rewritten so '/' and '$' parsed correctly. --- parse.y | 135 ++++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 80 insertions(+), 55 deletions(-) diff --git a/parse.y b/parse.y index a65fae1..be37f22 100644 --- a/parse.y +++ b/parse.y @@ -132,9 +132,9 @@ initforrule : } ; -flexrule : scon '^' re eol +flexrule : scon '^' rule { - pat = link_machines( $3, $4 ); + pat = $3; finish_rule( pat, variable_trail_rule, headcnt, trailcnt ); @@ -152,9 +152,9 @@ flexrule : scon '^' re eol } } - | scon re eol + | scon rule { - pat = link_machines( $2, $3 ); + pat = $2; finish_rule( pat, variable_trail_rule, headcnt, trailcnt ); @@ -163,9 +163,9 @@ flexrule : scon '^' re eol mkbranch( scset[actvsc[i]], pat ); } - | '^' re eol + | '^' rule { - pat = link_machines( $2, $3 ); + pat = $2; finish_rule( pat, variable_trail_rule, headcnt, trailcnt ); @@ -187,9 +187,9 @@ flexrule : scon '^' re eol } } - | re eol + | rule { - pat = link_machines( $1, $2 ); + pat = $1; finish_rule( pat, variable_trail_rule, headcnt, trailcnt ); @@ -236,51 +236,7 @@ namelist2 : namelist2 ',' NAME { synerr( "bad start condition list" ); } ; -eol : '$' - { - if ( trlcontxt ) - { - synerr( "trailing context used twice" ); - $$ = mkstate( SYM_EPSILON ); - } - else - { - trlcontxt = true; - - if ( ! varlength ) - headcnt = rulelen; - - ++rulelen; - trailcnt = 1; - - eps = mkstate( SYM_EPSILON ); - $$ = link_machines( eps, mkstate( '\n' ) ); - } - } - - | - { - $$ = mkstate( SYM_EPSILON ); - - if ( trlcontxt ) - { - if ( varlength && headcnt == 0 ) - /* both head and trail are variable-length */ - variable_trail_rule = true; - else - trailcnt = rulelen; - } - } - ; - -re : re '|' series - { - varlength = true; - - $$ = mkor( $1, $3 ); - } - - | re2 series +rule : re2 re { if ( transchar[lastst[$2]] != SYM_EPSILON ) /* provide final transition \now/ so it @@ -328,11 +284,80 @@ re : re '|' series * state ... */ add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); + variable_trail_rule = true; } + + else + trailcnt = rulelen; $$ = link_machines( $1, $2 ); } + | re2 re '$' + { synerr( "trailing context used twice" ); } + + | re '$' + { + if ( trlcontxt ) + { + synerr( "trailing context used twice" ); + $$ = mkstate( SYM_EPSILON ); + } + + else if ( previous_continued_action ) + { + /* see the comment in the rule for "re2 re" + * above + */ + if ( ! varlength || headcnt != 0 ) + { + fprintf( stderr, + "%s: warning - trailing context rule at line %d made variable because\n", + program_name, linenum ); + fprintf( stderr, + " of preceding '|' action\n" ); + } + + /* mark as variable */ + varlength = true; + headcnt = 0; + } + + trlcontxt = true; + + if ( ! varlength ) + headcnt = rulelen; + + ++rulelen; + trailcnt = 1; + + eps = mkstate( SYM_EPSILON ); + $$ = link_machines( $1, + link_machines( eps, mkstate( '\n' ) ) ); + } + + | re + { + $$ = $1; + + if ( trlcontxt ) + { + if ( varlength && headcnt == 0 ) + /* both head and trail are variable-length */ + variable_trail_rule = true; + else + trailcnt = rulelen; + } + } + ; + + +re : re '|' series + { + varlength = true; + $$ = mkor( $1, $3 ); + } + | series { $$ = $1; } ; @@ -340,8 +365,8 @@ re : re '|' series re2 : re '/' { - /* this rule is separate from the others for "re" so - * that the reduction will occur before the trailing + /* this rule is written separately so + * the reduction will occur before the trailing * series is parsed */ -- cgit v1.2.3 From 58d7f1822a723d8b0980c0d31eeacbd8501ea1cf Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:36:04 +0000 Subject: Removed malloc() declaration. Added detection of EOF in actions. --- scan.l | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scan.l b/scan.l index ee9a2be..922f0d2 100644 --- a/scan.l +++ b/scan.l @@ -132,8 +132,6 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) ^"%"[cr]{OPTWS} /* ignore old lex directive */ %t{OPTWS}\n { - char *malloc(); - ++linenum; xlation = (int *) malloc( sizeof( int ) * csize ); @@ -476,6 +474,11 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) \" ACTION_ECHO; BEGIN(ACTION); . ACTION_ECHO; +<> { + synerr( "EOF encountered inside an action" ); + yyterminate(); + } + {ESCSEQ} { yylval = myesc( yytext ); -- cgit v1.2.3 From 49520bf0d80cecbaefc05822bb6984d6e5a251a3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:36:33 +0000 Subject: Removed declaration of malloc() --- sym.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sym.c b/sym.c index 8fc4e22..12daf37 100644 --- a/sym.c +++ b/sym.c @@ -68,7 +68,6 @@ int table_size; register struct hash_entry *sym_entry = table[hash_val]; register struct hash_entry *new_entry; register struct hash_entry *successor; - char *malloc(); while ( sym_entry ) { -- cgit v1.2.3 From 0b1c3ab801625843a84f7c75e6165a098f3abdb3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 11:36:54 +0000 Subject: Fixed handling of premature EOF's. --- yylex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yylex.c b/yylex.c index 0d059b6..2a55f12 100644 --- a/yylex.c +++ b/yylex.c @@ -59,13 +59,13 @@ int yylex() else toktype = flexscan(); - if ( toktype == EOF ) + if ( toktype == EOF || toktype == 0 ) { eofseen = 1; if ( sectnum == 1 ) { - synerr( "unexpected EOF" ); + synerr( "premature EOF" ); sectnum = 2; toktype = SECTEND; } @@ -73,7 +73,7 @@ int yylex() else if ( sectnum == 2 ) { sectnum = 3; - toktype = SECTEND; + toktype = 0; } else -- cgit v1.2.3 From 1b2e26711b0ab821a8e700d81366070c7353130e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 13:15:30 +0000 Subject: cast added to malloc() call to keep lint happy. --- scan.l | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scan.l b/scan.l index 922f0d2..31fa754 100644 --- a/scan.l +++ b/scan.l @@ -53,7 +53,7 @@ static char rcsid[] = return ( NAME ); #define PUT_BACK_STRING(str, start) \ - for ( i = strlen( (char *) str ) - 1; i >= start; --i ) \ + for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \ unput((str)[i]) #define CHECK_REJECT(str) \ @@ -133,7 +133,8 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) %t{OPTWS}\n { ++linenum; - xlation = (int *) malloc( sizeof( int ) * csize ); + xlation = + (int *) malloc( sizeof( int ) * (unsigned) csize ); if ( ! xlation ) flexfatal( @@ -331,7 +332,7 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) /* push back everything but the leading bracket * so the ccl can be rescanned */ - PUT_BACK_STRING((char *) nmstr, 1); + PUT_BACK_STRING((Char *) nmstr, 1); BEGIN(FIRSTCCL); return ( '[' ); -- cgit v1.2.3 From 664709519b3ac046006880706e71b884037cb5c9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 13:16:02 +0000 Subject: "associated rules" changed to "associated rule line numbers". --- dfa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dfa.c b/dfa.c index 5502a55..4f5f00b 100644 --- a/dfa.c +++ b/dfa.c @@ -176,7 +176,7 @@ int ds; bubble( rule_set, num_associated_rules ); - fprintf( file, " associated rules:" ); + fprintf( file, " associated rule line numbers:" ); for ( i = 1; i <= num_associated_rules; ++i ) { -- cgit v1.2.3 From f2bccdb1833c5cf9d26b431e1b2af431a99b59f5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 13:16:43 +0000 Subject: *** empty log message *** --- flex.1 | 383 ++++++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 247 insertions(+), 136 deletions(-) diff --git a/flex.1 b/flex.1 index 5cfd6d2..ddaed24 100644 --- a/flex.1 +++ b/flex.1 @@ -1,9 +1,9 @@ -.TH FLEX 1 "24 February 1990" "Version 2.2" +.TH FLEX 1 "20 March 1990" "Version 2.2" .SH NAME flex - fast lexical analyzer generator .SH SYNOPSIS .B flex -.B [-bcdfinpstvFILT -C[efmF] -Sskeleton] +.B [-bcdfinpstvFILT8 -C[efmF] -Sskeleton] .I [filename ...] .SH DESCRIPTION .I flex @@ -230,7 +230,7 @@ if it is missing, the second .B %% in the input file may be skipped, too. .LP -In the definitions and rule sections, any +In the definitions and rules sections, any .I indented text or text enclosed in .B %{ @@ -369,24 +369,36 @@ quote in the input. A rule can have at most one instance of trailing context (the '/' operator or the '$' operator). The start condition, '^', and "<>" patterns can only occur at the beginning of a pattern, and, as well as with '/' and '$', -cannot be grouped inside parentheses. The following are all illegal: +cannot be grouped inside parentheses. A '^' which does not occur at +the beginning of a rule or a '$' which does not occur at the end of +a rule loses its special properties and is treated as a normal character. +.IP +The following are illegal: .nf foo/bar$ + foobar + +.fi +Note that the first of these, can be written "foo/bar\\n". +.IP +The following will result in '$' or '^' being treated as a normal character: +.nf + foo|(bar$) foo|^bar - foobar .fi -Note that the first of these, though, can be written "foo/bar\\n", and -the second could be written as two rules using the special '|' action (see -below): +If what's wanted is a "foo" or a bar-followed-by-a-newline, the following +could be used (the special '|' action is explained below): .nf foo | - ^bar /* action goes here */ + bar$ /* action goes here */ .fi +A similar trick will work for matching a foo or a +bar-at-the-beginning-of-a-line. .SH HOW THE INPUT IS MATCHED When the generated scanner is run, it analyzes its input looking for strings which match any of its patterns. If it finds more than @@ -540,7 +552,13 @@ if it is used in .I any of the scanner's actions it will slow down .I all -of the scanner's matching. +of the scanner's matching. Furthermore, +.B REJECT +cannot be used with the +.I -f +or +.I -F +options (see below). .IP Note also that unlike the other special actions, .B REJECT @@ -660,6 +678,16 @@ the following is one way to eat up C comments: } .fi +(Note that if the scanner is compiled using +.B C++, +then +.B input() +is instead referred to as +.B yyinput(), +in order to avoid a name clash with the +.B C++ +stream by the name of +.I input.) .IP - .B yyterminate() can be used in lieu of a return statement in an action. It terminates @@ -998,23 +1026,142 @@ a full-fledged feature in the future.) Note, though, that start conditions do not have their own name-space; %s's and %x's declare names in the same fashion as #define's. +.SH MULTIPLE INPUT BUFFERS +Some scanners (such as those which support "include" files) +require reading from several input streams. As +.I flex +scanners do a large amount of buffering, one cannot control +where the next input will be read from by simply writing a +.B YY_INPUT +which is sensitive to the scanning context. +.B YY_INPUT +is only called when the scanner reaches the end of its buffer, which +may be a long time after scanning a statement such as an "include" +which requires switching the input source. +.LP +To negotiate these sorts of problems, +.I flex +provides a mechanism for creating and switching between multiple +input buffers. An input buffer is created by using: +.nf + + YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) + +.fi +which takes a +.I FILE +pointer and a size and creates a buffer associated with the given +file and large enough to hold +.I size +characters (when in doubt, use +.B YY_BUF_SIZE +for the size). It returns a +.B YY_BUFFER_STATE +handle, which may then be passed to other routines: +.nf + + void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) + +.fi +switches the scanner's input buffer so subsequent tokens will +come from +.I new_buffer. +.nf + + void yy_delete_buffer( YY_BUFFER_STATE buffer ) + +.fi +is used to reclaim the storage associated with a buffer. +.LP +Finally, the +.B YY_CURRENT_BUFFER +macro returns a +.B YY_BUFFER_STATE +handle to the current buffer. +.LP +Here is an example of using these features for writing a scanner +which expands include files (the +.B <> +feature is discussed below): +.nf + + /* the "incl" state is used for picking up the name + * of an include file + */ + %x incl + + %{ + #define MAX_INCLUDE_DEPTH 10 + YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; + int include_stack_ptr = 0; + %} + + %% + include BEGIN(incl); + + [a-z]+ ECHO; + [^a-z\\n]*\\n? ECHO; + + [ \\t]* /* eat the whitespace */ + [^ \\t\\n]+ { /* got the include file name */ + if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) + { + fprintf( stderr, "Includes nested too deeply" ); + exit( 1 ); + } + + include_stack[include_stack_ptr++] = + YY_CURRENT_BUFFER; + + yyin = fopen( yytext, "r" ); + + if ( ! yyin ) + error( ... ); + + yy_switch_to_buffer( + yy_create_buffer( yyin, YY_BUF_SIZE ) ); + + BEGIN(INITIAL); + } + + <> { + if ( --include_stack_ptr < 0 ) + { + yyterminate(); + } + + else + yy_switch_to_buffer( + include_stack[include_stack_ptr] ); + } + +.fi .SH END-OF-FILE RULES The special rule "<>" indicates actions which are to be taken when an end-of-file is encountered and yywrap() returns non-zero (i.e., indicates -no further files to process). The action can either -point yyin at a new file to process, in which case the -action -.I must -finish with the special +no further files to process). The action must finish +by doing one of four things: +.IP - +the special .B YY_NEW_FILE -action -(this is a branch, so subsequent code in the action won't -be executed), or the action must finish with a +action, if +.I yyin +has been pointed at a new file to process; +.IP - +a .I return -or +statement; +.IP - +the special .B yyterminate() -statement. <> rules may not be used with other +action; +.IP - +or, switching to a new buffer using +.B yy_switch_to_buffer() +as shown in the example above. +.LP +<> rules may not be used with other patterns; they may only be qualified with a list of start conditions. If an unqualified <> rule is given, it applies only to the @@ -1042,10 +1189,10 @@ An example: } <> { if ( *++filelist ) - { - yyin = fopen( *filelist, "r" ); - YY_NEW_FILE; - } + { + yyin = fopen( *filelist, "r" ); + YY_NEW_FILE; + } else yyterminate(); } @@ -1210,17 +1357,15 @@ write to a line of the form: .nf - --accepting rule #n ("the matched text") + --accepting rule at line 53 ("the matched text") .fi -Rules are numbered sequentially with the first one being 1. Rule #0 -is executed when the scanner backtracks; Rule #(n+1) (where -.I n -is the number of rules in the -.I flex -input) indicates the default action; Rule #(n+2) indicates -that the input buffer is empty and needs to be refilled and then the scan -restarted. Rules beyond (n+2) are end-of-file actions. +The line number refers to the location of the rule in the file +defining the scanner (i.e., the file that was fed to flex). Messages +are also generated when the scanner backtracks, accepts the +default rule, reaches the end of its input buffer (or encounters +a NUL; at this point, the two look the same as far as the scanner's concerned), +or reaches an end-of-file. .TP .B -f specifies (take your pick) @@ -1256,7 +1401,7 @@ consists of comments regarding features of the input file which will cause a loss of performance in the resulting scanner. Note that the use of .I REJECT -and variable trailing context (see the BUGS section below) +and variable trailing context (see the BUGS section in flex(1)) entails a substantial performance penalty; use of .I yymore(), the @@ -1294,7 +1439,9 @@ user, but the first line identifies the version of .I flex, which is useful for figuring -out where you stand with respect to patches and new releases. +out where you stand with respect to patches and new releases, +and the next two lines give the date when the scanner was created +and a summary of the flags which were in effect. .TP .B -F specifies that the @@ -1402,6 +1549,30 @@ concerning the form of the input and the resultant non-deterministic and deterministic finite automata. This option is mostly for use in maintaining .I flex. +.TP +.B -8 +instructs +.I flex +to generate an 8-bit scanner, i.e., one which can recognize 8-bit +characters. On some sites, +.I flex +is installed with this option as the default. On others, the default +is 7-bit characters. To see which is the case, check the verbose +.B (-v) +output for "equivalence classes created". If the denominator of +the number shown is 128, then by default +.I flex +is generating 7-bit characters. If it is 256, then the default is +8-bit characters and the +.B -8 +flag is not required (but may be a good idea to keep the scanner +specification portable). Feeding a 7-bit scanner 8-bit characters +will result in infinite loops, bus errors, or other such fireworks, +so when in doubt, use the flag. Note that if equivalence classes +are used, 8-bit scanners take only slightly more table space than +7-bit scanners (128 bytes, to be exact); if equivalence classes are +not used, however, then the tables may grow up to twice their +7-bit size. .TP .B -C[efmF] controls the degree of table compression. @@ -1548,19 +1719,19 @@ the file looks like: .nf State #6 is non-accepting - - associated rules: + associated rule line numbers: 2 3 out-transitions: [ o ] jam-transitions: EOF [ \\001-n p-\\177 ] State #8 is non-accepting - - associated rules: + associated rule line numbers: 3 out-transitions: [ a ] jam-transitions: EOF [ \\001-` b-\\177 ] State #9 is non-accepting - - associated rules: + associated rule line numbers: 3 out-transitions: [ r ] jam-transitions: EOF [ \\001-q s-\\177 ] @@ -1571,7 +1742,8 @@ the file looks like: The first few lines tell us that there's a scanner state in which it can make a transition on an 'o' but not on any other character, and the in that state currently scanned text does not match -any rule. +any rule. The state occurs when trying to match the rules found +at lines 2 and 3 in the input file. If the scanner is in that state and then reads something other than an 'o', it will have to backtrack to find a rule which is matched. With @@ -1663,7 +1835,7 @@ Note that here the special '|' action does .I not provide any savings, and can even make things worse (see .B BUGS -below). +in flex(1)). .LP Another area where the user can increase a scanner's performance (and one that's easier to implement) arises from the fact that @@ -1800,6 +1972,14 @@ Compiled with this is about as fast as one can get a .I flex scanner to go for this particular problem. +.LP +A final note: +.I flex +is slow when matching NUL's, particularly when a token contains +multiple NUL's. +It's best to write rules which match +.I short +amounts of text if it's anticipated that the text will often include NUL's. .SH INCOMPATIBILITIES WITH LEX AND POSIX .I flex is a rewrite of the Unix @@ -1870,6 +2050,15 @@ definition. The POSIX draft interpretation is the same as .I flex's. .IP - +To specify a character class which matches anything but a left bracket (']'), +in +.I lex +one can use "[^]]" but with +.I flex +one must use "[^\]]". The latter works with +.I lex, +too. +.IP - The undocumented .I lex scanner internal variable @@ -2053,102 +2242,23 @@ any of its rules. .LP .I flex input buffer overflowed - a scanner rule matched a string long enough to overflow the -scanner's internal input buffer (16K bytes - controlled by -.B YY_BUF_MAX -in "flex.skel"). +scanner's internal input buffer (16K bytes by default - controlled by +.B YY_BUF_SIZE +in "flex.skel". Note that to redefine this macro, you must first +.B #undefine +it). .LP -.I fatal internal error, bad transition character detected in sympartition() - -Your input may contain an eight-bit character (either directly or expressed -as an escape sequence) and your version of flex was built for 7-bit characters. -.SH DEFICIENCIES / BUGS -.LP -Some trailing context -patterns cannot be properly matched and generate -warning messages ("Dangerous trailing context"). These are -patterns where the ending of the -first part of the rule matches the beginning of the second -part, such as "zx*/xy*", where the 'x*' matches the 'x' at -the beginning of the trailing context. (Note that the POSIX draft -states that the text matched by such patterns is undefined.) -.LP -For some trailing context rules, parts which are actually fixed-length are -not recognized as such, leading to the abovementioned performance loss. -In particular, parts using '|' or {n} (such as "foo{3}") are always -considered variable-length. +.I scanner requires -8 flag - +Your scanner specification includes recognizing 8-bit characters and +you did not specify the -8 flag (and your site has not installed flex +with -8 as the default). .LP -Combining trailing context with the special '|' action can result in -.I fixed -trailing context being turned into the more expensive -.I variable -trailing context. For example, this happens in the following example: -.nf - - %% - abc | - xyz/def - -.fi -.LP -Use of unput() invalidates yytext and yyleng. -.LP -Use of unput() to push back more text than was matched can -result in the pushed-back text matching a beginning-of-line ('^') -rule even though it didn't come at the beginning of the line -(though this is rare!). -.LP -Nulls are not allowed in +.I too many %t classes! - +You managed to put every single character into its own %t class. .I flex -inputs or in the inputs to -scanners generated by -.I flex. -Their presence generates fatal errors. -.LP -.I flex -does not generate correct #line directives for code internal -to the scanner; thus, bugs in -.I flex.skel -yield bogus line numbers. -.LP -The -.B -d -option should use the -.I line -number corresponding to the matched rule rather than the -.I rule -number, which is -close-to-useless. -.LP -Due to both buffering of input and read-ahead, you cannot intermix -calls to routines, such as, for example, -.B getchar(), -with -.I flex -rules and expect it to work. Call -.B input() -instead. -.LP -The total table entries listed by the -.B -v -flag excludes the number of table entries needed to determine -what rule has been matched. The number of entries is equal -to the number of DFA states if the scanner does not use REJECT, -and somewhat greater than the number of states if it does. -.LP -It would be useful if -.I flex -wrote to lex.yy.c a summary of the flags used in -its generation (such as which table compression options). -.LP -Some of the macros, such as -.B yywrap(), -may in the future become functions which live in the -.B -ll -library. This will doubtless break a lot of code, but may be -required for POSIX-compliance. -.LP -The -.I flex -internal algorithms need documentation. +requires that at least one of the classes share characters. +.SH DEFICIENCIES / BUGS +See flex(1). .SH "SEE ALSO" .LP flex(1), lex(1), yacc(1), sed(1), awk(1). @@ -2173,12 +2283,13 @@ Jef Poskanzer, Dave Tallman, Frank Whaley, Ken Yap, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. .LP -Thanks to Keith Bostic, John Gilmore, Bob +Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, Rich Salz, and Richard Stallman for help with various distribution headaches. .LP -Thanks to Esmond Pitt for 8-bit character support, Benson Margulies and Fred -Burke for C++ support, and Ove Ewerlid for supporting NUL's. +Thanks to Esmond Pitt for 8-bit character support; to Benson Margulies and Fred +Burke for C++ support; to Ove Ewerlid for the basics of support for +NUL's; and to Eric Hughes for the basics of support for multiple buffers. .LP This work was primarily done when I was at the Real Time Systems Group at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there -- cgit v1.2.3 From da67cd31b9e14bee1f455d745884dcf5af685b58 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 13:17:15 +0000 Subject: Changed to use YY_BUFFER_STATE everywhere. --- flex.skl | 54 ++++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/flex.skl b/flex.skl index a88d297..ec7660b 100644 --- a/flex.skl +++ b/flex.skl @@ -57,7 +57,7 @@ char *malloc(); /* this used to be an fputs(), but since the string might contain NUL's, * we now use fwrite() */ -#define ECHO fwrite( (char *) yytext, yyleng, 1, yyout ) +#define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout ) /* gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". @@ -156,13 +156,12 @@ struct yy_buffer_state #define EOF_DONE 2 }; -static struct yy_buffer_state *yy_original_buffer; -static struct yy_buffer_state *yy_current_buffer; +static YY_BUFFER_STATE yy_current_buffer; -/* we provide macros for accessing the buffer states in case in the - * future we want to put the buffer states in a more general "scanner state" +/* we provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state" */ -#define YY_ORIGINAL_BUFFER yy_original_buffer #define YY_CURRENT_BUFFER yy_current_buffer @@ -200,15 +199,15 @@ static yy_state_type yy_last_accepting_state; static YY_CHAR *yy_last_accepting_cpos; static yy_state_type yy_get_previous_state YY_PROTO(( void )); -static yy_state_type yy_try_NUL_trans YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); static int yy_get_next_buffer YY_PROTO(( void )); -static void yyunput YY_PROTO(( int c, YY_CHAR *buf_ptr )); +static void yyunput YY_PROTO(( YY_CHAR c, YY_CHAR *buf_ptr )); void yyrestart YY_PROTO(( FILE *input_file )); -void yy_switch_to_buffer YY_PROTO(( struct yy_buffer_state *new_buffer )); +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); void yy_load_buffer_state YY_PROTO(( void )); -struct yy_buffer_state *yy_create_buffer YY_PROTO(( FILE *file, int size )); -void yy_delete_buffer YY_PROTO(( struct yy_buffer_state *b )); -void yy_init_buffer YY_PROTO(( struct yy_buffer_state *b, FILE *file )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); #ifdef __cplusplus static int yyinput YY_PROTO(( void )); @@ -238,8 +237,7 @@ YY_DECL if ( yy_current_buffer ) yy_init_buffer( yy_current_buffer, yyin ); else - yy_original_buffer = - yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); yy_load_buffer_state(); @@ -506,10 +504,10 @@ register yy_state_type yy_current_state; #ifdef YY_USE_PROTOS -static void yyunput( int c, register YY_CHAR *yy_bp ) +static void yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) #else static void yyunput( c, yy_bp ) -int c; +YY_CHAR c; register YY_CHAR *yy_bp; #endif @@ -628,10 +626,10 @@ FILE *input_file; #ifdef YY_USE_PROTOS -void yy_switch_to_buffer( struct yy_buffer_state *new_buffer ) +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) #else void yy_switch_to_buffer( new_buffer ) -struct yy_buffer_state *new_buffer; +YY_BUFFER_STATE new_buffer; #endif { @@ -666,17 +664,17 @@ void yy_load_buffer_state() #ifdef YY_USE_PROTOS -struct yy_buffer_state *yy_create_buffer( FILE *file, int size ) +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) #else -struct yy_buffer_state *yy_create_buffer( file, size ) +YY_BUFFER_STATE yy_create_buffer( file, size ) FILE *file; int size; #endif { - struct yy_buffer_state *b; + YY_BUFFER_STATE b; - b = (struct yy_buffer_state *) malloc( sizeof( struct yy_buffer_state ) ); + b = (YY_BUFFER_STATE) malloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); @@ -685,7 +683,7 @@ int size; * we need to put in 2 end-of-buffer characters. */ b->yy_buf_size = size + 2; - b->yy_ch_buf = (YY_CHAR *) malloc( b->yy_buf_size ); + b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) b->yy_buf_size ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); @@ -697,15 +695,15 @@ int size; #ifdef YY_USE_PROTOS -void yy_delete_buffer( struct yy_buffer_state *b ) +void yy_delete_buffer( YY_BUFFER_STATE b ) #else void yy_delete_buffer( b ) -struct yy_buffer_state *b; +YY_BUFFER_STATE b; #endif { if ( b == yy_current_buffer ) - yy_current_buffer = (struct yy_buffer_state *) 0; + yy_current_buffer = (YY_BUFFER_STATE) 0; free( (char *) b->yy_ch_buf ); free( (char *) b ); @@ -713,10 +711,10 @@ struct yy_buffer_state *b; #ifdef YY_USE_PROTOS -void yy_init_buffer( struct yy_buffer_state *b, FILE *file ) +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) #else void yy_init_buffer( b, file ) -struct yy_buffer_state *b; +YY_BUFFER_STATE b; FILE *file; #endif -- cgit v1.2.3 From 56231f0b74b4bc1e46291b5fc0ad60b3f652f273 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 13:52:04 +0000 Subject: 2.2 changes --- NEWS | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index e62f1c5..08e9108 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,87 @@ +Changes between 2.2 (alpha) release of March '90 and previous release: + + User-visible: + + - Full user documentation now available. + + - Support for 8-bit scanners. + + - Scanners now accept NUL's. + + - A facility has been added for dealing with multiple + input buffers. + + - Two manual entries now. One which fully describes flex + (rather than just its differences from lex), and the + other for quick(er) reference. + + - A number of changes to bring flex closer into compliance + with the latest POSIX lex draft: + + %t support + flex now accepts multiple input files and concatenates + them together to form its input + previous -c (compress) flag renamed -C + do-nothing -c and -n flags added + Any indented code or code within %{}'s in section 2 is + now copied to the output + + - yyleng is now a bona fide global integer. + + - -d debug information now gives the line number of the + matched rule instead of which number rule it was from + the beginning of the file. + + - -v output now includes a summary of the flags used to generate + the scanner. + + - unput() and yyrestart() are now globally callable. + + - yyrestart() no longer closes the previous value of yyin. + + - C++ support; generated scanners can be compiled with C++ compiler. + + - Primitive -lfl library added, containing default main() + which calls yylex(). A number of routines currently living + in the scanner skeleton will probably migrate to here + in the future (in particular, yywrap() will probably cease + to be a macro and instead be a function in the -lfl library). + + - Hexadecimal (\x) escape sequences added. + + - Support for MS-DOS, VMS, and Turbo-C integrated. + + - The %used/%unused operators have been deprecated. They + may go away soon. + + + Other changes: + + - Makefile enhanced for easier testing and installation. + - The parser has been tweaked to detect some erroneous + constructions which previously were missed. + - Scanner input buffer overflow is now detected. + - Bugs with missing "const" declarations fixed. + - Out-of-date Minix/Atari patches provided. + - Scanners no longer require printf() unless FLEX_DEBUG is being used. + - A subtle input() bug has been fixed. + - Line numbers for "continued action" rules (those following + the special '|' action) are now correct. + - unput() bug fixed; had been causing problems porting flex to VMS. + - yymore() handling rewritten to fix bug with interaction + between yymore() and trailing context. + - EOF in actions now generates an error message. + - Bug involving -CFe and generating equivalence classes fixed. + - Bug which made -CF be treated as -Cf fixed. + - Support for SysV tmpnam() added. + - Unused #define's for scanner no longer generated. + - Error messages which are associated with a particular input + line are now all identified with their input line in standard + format. + - % directives which are valid to lex but not to flex are + now ignored instead of generating warnings. + + Changes between 2.1 beta-test release of June '89 and previous release: User-visible: @@ -88,17 +172,10 @@ Changes between 2.1 beta-test release of June '89 and previous release: trimmed to be 31 or fewer characters. Shortened file names for dinosaur OS's. Checks for allocating > 64K memory on 16 bit'ers. Amiga tweaks. Compiles using gcc on a Sun-3. - - Compressed and fast scanner skeletons merged. - - Skeleton header files done away with. - - Generated scanner uses prototypes and "const" for __STDC__. - - -DSV flag is now -DSYS_V for System V compilation. - - Removed all references to FTL language. - - Software now covered by BSD Copyright. - - flex will replace lex in subsequent BSD releases. -- cgit v1.2.3 From c303772eef6bd7de95acadf1d5c234a279ce3a8c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 13:52:32 +0000 Subject: -ll => -lfl --- flex.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flex.1 b/flex.1 index ddaed24..c3c914e 100644 --- a/flex.1 +++ b/flex.1 @@ -22,7 +22,7 @@ generates as output a C source file, which defines a routine .B yylex(). This file is compiled and linked with the -.B -ll +.B -lfl library to produce an executable. When the executable is run, it analyzes its input for occurrences of the regular expressions. Whenever it finds one, it executes -- cgit v1.2.3 From e1be7a80eb3aa80f0fc07db52b4ba0107b804879 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 13:52:49 +0000 Subject: Tweaks for lint and C++ --- flex.skl | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/flex.skl b/flex.skl index ec7660b..84afa55 100644 --- a/flex.skl +++ b/flex.skl @@ -38,9 +38,11 @@ #ifdef YY_USE_PROTOS #define YY_PROTO(proto) proto char *malloc( unsigned size ); +int free( char * ); #else #define YY_PROTO(proto) () char *malloc(); +int free(); #endif /* amount of stuff to slurp up with each read */ @@ -66,13 +68,18 @@ char *malloc(); if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ YY_FATAL_ERROR( "read() in flex scanner failed" ); #define YY_NULL 0 -#define yyterminate() return ( YY_NULL ); + +/* no semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#define yyterminate() return ( YY_NULL ) /* report a fatal error */ #define YY_FATAL_ERROR(msg) \ { \ - fputs( msg, stderr ); \ - putc( '\n', stderr ); \ + (void) fputs( msg, stderr ); \ + (void) putc( '\n', stderr ); \ exit( 1 ); \ } @@ -492,8 +499,12 @@ static yy_state_type yy_get_previous_state() * next_state = yy_try_NUL_trans( current_state ); */ +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( register yy_state_type yy_current_state ) +#else static yy_state_type yy_try_NUL_trans( yy_current_state ) register yy_state_type yy_current_state; +#endif { register int yy_is_jam; -- cgit v1.2.3 From e356b6d3abd7a9df1b058d9fd2b6559e4fc19ad3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 14:17:29 +0000 Subject: Added Earle Horton for 8-bit chars. --- flex.1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flex.1 b/flex.1 index c3c914e..9aea48d 100644 --- a/flex.1 +++ b/flex.1 @@ -2287,7 +2287,8 @@ Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, Rich Salz, and Richard Stallman for help with various distribution headaches. .LP -Thanks to Esmond Pitt for 8-bit character support; to Benson Margulies and Fred +Thanks to Esmond Pitt and Earle Horton for 8-bit character support; +to Benson Margulies and Fred Burke for C++ support; to Ove Ewerlid for the basics of support for NUL's; and to Eric Hughes for the basics of support for multiple buffers. .LP -- cgit v1.2.3 -- cgit v1.2.3 From 11d977e5954fb8e64f05168f6f0eb5759ec3f9de Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 14:32:52 +0000 Subject: Added USG alias for SYS_V --- flexdef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flexdef.h b/flexdef.h index 4146fe6..153522a 100644 --- a/flexdef.h +++ b/flexdef.h @@ -47,6 +47,10 @@ #endif +#ifdef USG +#define SYS_V +#endif + #ifdef SYS_V #include -- cgit v1.2.3 From 5ddbc20d7a289dd67a4bc5145ceada5b0946ec61 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 14:33:14 +0000 Subject: USG alias. --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index 08e9108..6badef0 100644 --- a/NEWS +++ b/NEWS @@ -80,6 +80,8 @@ Changes between 2.2 (alpha) release of March '90 and previous release: format. - % directives which are valid to lex but not to flex are now ignored instead of generating warnings. + - -DSYS_V flag can now also be specified -DUSG for System V + compilation. Changes between 2.1 beta-test release of June '89 and previous release: -- cgit v1.2.3 From cde5d035fffea16ee3c22f5ad0220b95bc9a28fb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 20 Mar 1990 14:54:13 +0000 Subject: 2.2 README --- README | 79 +++++++++++++++++++----------------------------------------------- 1 file changed, 23 insertions(+), 56 deletions(-) diff --git a/README b/README index 364345c..7735fd4 100644 --- a/README +++ b/README @@ -1,6 +1,10 @@ // $Header$ -This is release 2.1 of flex - a beta release. +This is release 2.2 of flex - an alpha release. + +The intent behind this alpha release is to weed out most of the +bugs associated with some new features, and to follow up with either +a beta or full 2.3 release by the end of May. The flex distribution consists of the following files: @@ -21,33 +25,30 @@ The flex distribution consists of the following files: tblcmp.c yylex.c + libmain.c flex library (-lfl) source + initscan.c pre-flex'd version of scan.l flex.skel skeleton for generated scanners - flex.1 manual entry + flexdoc.1 full user documentation + flex.1 reference documentation Changes Differences between this release and the previous one - COPYING flex's copyright - MISC miscellaneous stuff (e.g., old VMS Makefile) which - almost no one will care about + COPYING flex's copyright -If you have installed a previous version of flex, delete it (after making -backups, of course). This will entail removing the source directory, -/usr/include/{flexskelcom,fastskeldef,flexskeldef}.h, and -/usr/local/lib/flex.{skel,fastskel}, if that's where you put the various -pieces. + MISC/ a directory containing miscellaneous porting-related + notes (for Atari, MS-DOS, Turbo-C, and VMS) -Decide where you want to keep flex.skel (suggestion: /usr/local/lib) and -copy it there. Edit "Makefile" and change the definition of SKELETON_FILE -to reflect the full pathname of flex.skel. +Decide where you want to keep flex.skel (suggestion: /usr/local/lib), +but don't move it there yet. Edit "Makefile" and change the definition +of SKELETON_FILE to reflect the full pathname of flex.skel. Read the "Porting considerations" note in the Makefile and make the necessary changes. - To make flex for the first time, use: make first_flex @@ -61,56 +62,22 @@ Assuming it builds successfully, you can test it using The "diff" should not show any differences. -If you're feeling adventurous, rebuild scan.c using various -combinations of FLEX_FLAGS, each time trying "make test" when -you're done. To rebuild it, do - - rm scan.c - make FLEX_FLAGS="..." - -where "..." is one of: - - -c - -ce - -cm - -cfe - -cFe - -and testing using: +If you're feeling adventurous, issue "make bigtest" and be prepared +to wait a while. - make FLEX_FLAGS="..." test +Install flex using: - -Format the manual entry using - - make flex.man + make install Please send problems and feedback to: - vern@{csam.lbl.gov,rtsg.ee.lbl.gov} or ucbvax!csam.lbl.gov!vern - - Vern Paxson - Real Time Systems - Bldg. 46A - Lawrence Berkeley Laboratory - 1 Cyclotron Rd. - Berkeley, CA 94720 - - (415) 486-6411 - - -I will be gone from mid-July '89 through mid-August '89. From August on, -the addresses are: - - vern@cs.cornell.edu (email sent to the former addresses should - continue to be forwarded for quite a while) + vern@cs.cornell.edu + decvax!cornell!vern + vern@LBL (Bitnet) Vern Paxson CS Department - Grad Office - 4126 Upson + 4126 Upson Hall Cornell University Ithaca, NY 14853-7501 - - -- cgit v1.2.3 From e0087b51fa39f5e7887d817f2440dd3b76a256a0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 23 Mar 1990 14:16:08 +0000 Subject: nuked BITNET address. --- README | 1 - 1 file changed, 1 deletion(-) diff --git a/README b/README index 7735fd4..273fde5 100644 --- a/README +++ b/README @@ -74,7 +74,6 @@ Please send problems and feedback to: vern@cs.cornell.edu decvax!cornell!vern - vern@LBL (Bitnet) Vern Paxson CS Department -- cgit v1.2.3 From a5346338fcf1eb7cf43e67fd3785ba3d4f721fda Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 23 Mar 1990 14:18:40 +0000 Subject: minor typos and formatting changes. Removed BITNET address. --- flex.1 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flex.1 b/flex.1 index 9aea48d..33595bc 100644 --- a/flex.1 +++ b/flex.1 @@ -1741,7 +1741,7 @@ the file looks like: .fi The first few lines tell us that there's a scanner state in which it can make a transition on an 'o' but not on any other -character, and the in that state currently scanned text does not match +character, and that in that state the currently scanned text does not match any rule. The state occurs when trying to match the rules found at lines 2 and 3 in the input file. If the scanner is in that state and then reads @@ -2055,7 +2055,7 @@ in .I lex one can use "[^]]" but with .I flex -one must use "[^\]]". The latter works with +one must use "[^\\]]". The latter works with .I lex, too. .IP - @@ -2307,6 +2307,5 @@ Send comments to: vern@cs.cornell.edu decvax!cornell!vern - vern@LBL (bitnet) .fi -- cgit v1.2.3 From 1ff52b76ad8c2af1777b4098f2a79d42ec69bd14 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 23 Mar 1990 14:20:19 +0000 Subject: fix for g++ --- flex.skl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flex.skl b/flex.skl index 84afa55..3a7ca90 100644 --- a/flex.skl +++ b/flex.skl @@ -10,7 +10,7 @@ #ifdef __cplusplus -#include +#include #include /* use prototypes in function declarations */ @@ -37,8 +37,10 @@ #ifdef YY_USE_PROTOS #define YY_PROTO(proto) proto +#ifndef __cplusplus char *malloc( unsigned size ); int free( char * ); +#endif #else #define YY_PROTO(proto) () char *malloc(); -- cgit v1.2.3 From 8861e6f888504e8e215a09eaae2fcee992d5b822 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 23 Mar 1990 14:25:30 +0000 Subject: Changes for Patch #1. --- NEWS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/NEWS b/NEWS index 6badef0..96fbef8 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,17 @@ +Changes between 2.2 Patch #1 (23Mar90) and 2.2 (alpha) release: + + - Makefile fixes: definition of MAKE variable for systems + which don't have it; installation of flexdoc.1 along with + flex.1; *fixed two bugs which could cause "bigtest" to fail*. + + - flex.skel fix for compiling with g++. + + - README and flexdoc.1 no longer list an out-of-date BITNET address + for contacting me. + + - minor typos and formatting changes to flex.1 and flexdoc.1. + + Changes between 2.2 (alpha) release of March '90 and previous release: User-visible: -- cgit v1.2.3 From 123bdbe37378b7d332b3c4e4478776d1628b52f2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 26 Mar 1990 16:38:49 +0000 Subject: g++ tweaks --- flex.skl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flex.skl b/flex.skl index 3a7ca90..fe0a567 100644 --- a/flex.skl +++ b/flex.skl @@ -11,7 +11,7 @@ #ifdef __cplusplus #include -#include +#include /* use prototypes in function declarations */ #define YY_USE_PROTOS @@ -37,14 +37,14 @@ #ifdef YY_USE_PROTOS #define YY_PROTO(proto) proto -#ifndef __cplusplus char *malloc( unsigned size ); int free( char * ); -#endif #else #define YY_PROTO(proto) () +/* there's no standard place to get these definitions */ char *malloc(); int free(); +int read(); #endif /* amount of stuff to slurp up with each read */ -- cgit v1.2.3 From 242eed64b58cb4575bdd7a3302e9c633c11d590d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 26 Mar 1990 16:41:47 +0000 Subject: *** empty log message *** --- NEWS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 96fbef8..3909181 100644 --- a/NEWS +++ b/NEWS @@ -2,13 +2,13 @@ Changes between 2.2 Patch #1 (23Mar90) and 2.2 (alpha) release: - Makefile fixes: definition of MAKE variable for systems which don't have it; installation of flexdoc.1 along with - flex.1; *fixed two bugs which could cause "bigtest" to fail*. + flex.1; fixed two bugs which could cause "bigtest" to fail. - flex.skel fix for compiling with g++. - README and flexdoc.1 no longer list an out-of-date BITNET address for contacting me. - + - minor typos and formatting changes to flex.1 and flexdoc.1. -- cgit v1.2.3 From 2e926e90c8b27f8ef495792296a07cb223e3bec3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 27 Mar 1990 12:03:55 +0000 Subject: fixed fencepost errors with yy_buf_size and detecting NUL's --- flex.skl | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/flex.skl b/flex.skl index fe0a567..01f70ab 100644 --- a/flex.skl +++ b/flex.skl @@ -153,8 +153,12 @@ struct yy_buffer_state YY_CHAR *yy_ch_buf; /* input buffer */ YY_CHAR *yy_buf_pos; /* current position in input buffer */ - int yy_buf_size; /* size of input buffer in bytes */ - int yy_n_chars; /* number of characters read into yy_ch_buf */ + + /* size of input buffer in bytes, not including room for EOB characters*/ + int yy_buf_size; + + /* number of characters read into yy_ch_buf, not including EOB characters */ + int yy_n_chars; int yy_eof_status; /* whether we've seen an EOF on this buffer */ #define EOF_NOT_SEEN 0 @@ -290,8 +294,13 @@ do_action: /* this label is used only to access EOF actions */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = yy_hold_char; - if ( yy_c_buf_p < - &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + /* note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the end- + * of-buffer state). Contrast this with the test in yyinput(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) /* this was really a NUL */ { yy_state_type yy_next_state; @@ -575,7 +584,11 @@ static int input() if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) { - if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) /* this was really a NUL */ *yy_c_buf_p = '\0'; @@ -692,11 +705,12 @@ int size; if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + b->yy_buf_size = size; + /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ - b->yy_buf_size = size + 2; - b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) b->yy_buf_size ); + b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) (b->yy_buf_size + 2) ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); -- cgit v1.2.3 From 23df378842b78c25f72810afdf9edb157ac57c4a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 27 Mar 1990 12:06:06 +0000 Subject: Patch #2 changes --- NEWS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS b/NEWS index 3909181..96f5371 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +Changes between 2.2 Patch #2 (27Mar90) and 2.2 Patch #1: + + - fixed bug writing past end of input buffer in yyunput() + - fixed bug detecting NUL's at the end of a buffer + + Changes between 2.2 Patch #1 (23Mar90) and 2.2 (alpha) release: - Makefile fixes: definition of MAKE variable for systems -- cgit v1.2.3 From 6154e990a066c3ee91ee724908ceef795ebd64b7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 30 Mar 1990 02:43:05 +0000 Subject: Changed generation of archaic "continue" to "goto yy_find_action" --- gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen.c b/gen.c index 7f8ec61..d3dad92 100644 --- a/gen.c +++ b/gen.c @@ -119,7 +119,7 @@ gen_bt_action() indent_puts( "yy_cp = yy_last_accepting_cpos;" ); indent_puts( "yy_current_state = yy_last_accepting_state;" ); - indent_puts( "continue; /* go to \"YY_DO_BEFORE_ACTION\" */" ); + indent_puts( "goto yy_find_action;" ); putchar( '\n' ); set_indent( 0 ); -- cgit v1.2.3 From 35bc7095bc7b161cdcd5539ba88c481b12aa30b9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 3 Apr 1990 14:09:16 +0000 Subject: patch #3 - -I fix --- NEWS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS b/NEWS index 96f5371..4181bec 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +Changes between 2.2 Patch #3 (30Mar90) and 2.2 Patch #2: + + - fixed bug which caused -I scanners to bomb + + Changes between 2.2 Patch #2 (27Mar90) and 2.2 Patch #1: - fixed bug writing past end of input buffer in yyunput() -- cgit v1.2.3 From 01a6ef498707895893844c2b9c015ec481b5664e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 12 Apr 1990 11:03:24 +0000 Subject: added fix for allowing yy_switch_to_buffer() in yywrap() --- flex.skl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/flex.skl b/flex.skl index 01f70ab..1a6169e 100644 --- a/flex.skl +++ b/flex.skl @@ -211,6 +211,11 @@ static int yy_start = 0; /* start state number */ static yy_state_type yy_last_accepting_state; static YY_CHAR *yy_last_accepting_cpos; +/* flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + static yy_state_type yy_get_previous_state YY_PROTO(( void )); static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); static int yy_get_next_buffer YY_PROTO(( void )); @@ -341,6 +346,8 @@ do_action: /* this label is used only to access EOF actions */ { case EOB_ACT_END_OF_FILE: { + yy_did_buffer_switch_on_eof = 0; + if ( yywrap() ) { /* note: because we've taken care in @@ -358,7 +365,10 @@ do_action: /* this label is used only to access EOF actions */ } else - YY_NEW_FILE; + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } } break; @@ -672,6 +682,13 @@ YY_BUFFER_STATE new_buffer; yy_current_buffer = new_buffer; yy_load_buffer_state(); + + /* we don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; } -- cgit v1.2.3 From b7886e162067d0ccb0bec84fdfd538afb625f6ae Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:38:41 +0000 Subject: Declared void functions as such --- ccl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ccl.c b/ccl.c index 78f263c..536ca8f 100644 --- a/ccl.c +++ b/ccl.c @@ -45,7 +45,7 @@ static char rcsid[] = * ccladd( cclp, ch ); */ -ccladd( cclp, ch ) +void ccladd( cclp, ch ) int cclp; int ch; @@ -124,7 +124,7 @@ int cclinit() * cclnegate( ccl ); */ -cclnegate( cclp ) +void cclnegate( cclp ) int cclp; { @@ -144,7 +144,7 @@ int cclp; * has a non-zero value in the set array. */ -list_character_set( file, cset ) +void list_character_set( file, cset ) FILE *file; int cset[]; -- cgit v1.2.3 From 272255ac8956417e42c4cff205f11525cc08f353 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:39:54 +0000 Subject: prototypes for forward references declared void functions as such --- dfa.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/dfa.c b/dfa.c index 4f5f00b..5bf2348 100644 --- a/dfa.c +++ b/dfa.c @@ -38,6 +38,14 @@ static char rcsid[] = #include "flexdef.h" +/* declare functions that have forward references */ + +void dump_associated_rules PROTO((FILE*, int)); +void dump_transitions PROTO((FILE*, int[])); +void sympartition PROTO((int[], int, int[], int[])); +int symfollowset PROTO((int[], int, int, int[])); + + /* check_for_backtracking - check a DFA state for backtracking * * synopsis @@ -49,7 +57,7 @@ static char rcsid[] = * associated with this state */ -check_for_backtracking( ds, state ) +void check_for_backtracking( ds, state ) int ds; int state[]; @@ -80,9 +88,7 @@ int state[]; * synopsis * int nfa_states[num_states+1], num_states; * int accset[nacc+1], nacc; - * int check_trailing_context(); - * true/false = check_trailing_context( nfa_states, num_states, - * accset, nacc ); + * check_trailing_context( nfa_states, num_states, accset, nacc ); * * NOTES * Trailing context is "dangerous" if both the head and the trailing @@ -98,7 +104,7 @@ int state[]; * accset[1 .. nacc] is the list of accepting numbers for the DFA state. */ -int check_trailing_context( nfa_states, num_states, accset, nacc ) +void check_trailing_context( nfa_states, num_states, accset, nacc ) int *nfa_states, num_states; int *accset; register int nacc; @@ -148,7 +154,7 @@ register int nacc; * and writes a report to the given file */ -dump_associated_rules( file, ds ) +void dump_associated_rules( file, ds ) FILE *file; int ds; @@ -203,7 +209,7 @@ int ds; * is done to the given file. */ -dump_transitions( file, state ) +void dump_transitions( file, state ) FILE *file; int state[]; @@ -377,7 +383,7 @@ int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; /* increase_max_dfas - increase the maximum number of DFAs */ -increase_max_dfas() +void increase_max_dfas() { current_max_dfas += MAX_DFAS_INCREMENT; @@ -405,7 +411,8 @@ increase_max_dfas() * creates the dfa corresponding to the ndfa we've constructed. the * dfa starts out in state #1. */ -ntod() + +void ntod() { int *accset, ds, nacc, newds; @@ -981,7 +988,7 @@ bottom: * sympartition( ds, numstates, symlist, duplist ); */ -sympartition( ds, numstates, symlist, duplist ) +void sympartition( ds, numstates, symlist, duplist ) int ds[], numstates, duplist[]; int symlist[]; -- cgit v1.2.3 From c657eaf0c805d275259dbccb7a797b06df68ec18 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:40:17 +0000 Subject: declared void functions as such declared void functions as such --- ecs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ecs.c b/ecs.c index b3d7f93..d095fc1 100644 --- a/ecs.c +++ b/ecs.c @@ -43,7 +43,7 @@ static char rcsid[] = * ccl2ecl(); */ -ccl2ecl() +void ccl2ecl() { int i, ich, newlen, cclp, ccls, cclmec; @@ -235,7 +235,7 @@ int ecmap[]; * NUL_mapping is the value which NUL (0) should be mapped to. */ -mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) +void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) Char ccls[]; int lenccl, fwd[], bck[], llsiz, NUL_mapping; @@ -334,7 +334,7 @@ next_pt: * mkechar( tch, fwd, bck ); */ -mkechar( tch, fwd, bck ) +void mkechar( tch, fwd, bck ) int tch, fwd[], bck[]; { -- cgit v1.2.3 From f9c8594fd54b38f6555f3f22524cda5bfeb03edd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:40:54 +0000 Subject: Added YY_USER_INIT Added yy_new_buffer() alias for yy_create_buffer() fixed (hopefully) malloc declaration headaches --- flex.skl | 89 +++++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/flex.skl b/flex.skl index 1a6169e..190bb9f 100644 --- a/flex.skl +++ b/flex.skl @@ -8,9 +8,27 @@ #include +#ifdef __STDC__ +#include +#define YY_USE_PROTOS +#define YY_USE_CONST +#endif + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + #ifdef __cplusplus + +#ifndef __STDC__ #include +#endif + #include /* use prototypes in function declarations */ @@ -21,10 +39,6 @@ #endif -#ifdef __STDC__ -#define YY_USE_PROTOS -#define YY_USE_CONST -#endif #ifdef __TURBOC__ #define YY_USE_CONST @@ -35,10 +49,9 @@ #define const #endif + #ifdef YY_USE_PROTOS #define YY_PROTO(proto) proto -char *malloc( unsigned size ); -int free( char * ); #else #define YY_PROTO(proto) () /* there's no standard place to get these definitions */ @@ -47,6 +60,7 @@ int free(); int read(); #endif + /* amount of stuff to slurp up with each read */ #ifndef YY_READ_BUF_SIZE #define YY_READ_BUF_SIZE 8192 @@ -78,12 +92,29 @@ int read(); #define yyterminate() return ( YY_NULL ) /* report a fatal error */ + +/* The funky do-while is used to turn this macro definition into + * a single C statement (which needs a semi-colon terminator). + * This avoids problems with code like: + * + * if ( something_happens ) + * YY_FATAL_ERROR( "oops, the something happened" ); + * else + * everything_okay(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the YY_FATAL_ERROR() call. + */ + #define YY_FATAL_ERROR(msg) \ - { \ - (void) fputs( msg, stderr ); \ - (void) putc( '\n', stderr ); \ - exit( 1 ); \ - } + do \ + { \ + (void) fputs( msg, stderr ); \ + (void) putc( '\n', stderr ); \ + exit( 1 ); \ + } \ + while ( 0 ) /* default yywrap function - always treat EOF as an EOF */ #define yywrap() 1 @@ -98,10 +129,13 @@ int read(); #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) /* special action meaning "start processing a new file" */ -#define YY_NEW_FILE { \ - yy_init_buffer( yy_current_buffer, yyin ); \ - yy_load_buffer_state(); \ - } +#define YY_NEW_FILE \ + do \ + { \ + yy_init_buffer( yy_current_buffer, yyin ); \ + yy_load_buffer_state(); \ + } \ + while ( 0 ) /* default declaration of generated scanner - a define so the user can * easily add parameters @@ -137,12 +171,14 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; /* return all but the first 'n' matched characters back to the input stream */ #define yyless(n) \ - { \ - /* undo effects of setting up yytext */ \ - *yy_cp = yy_hold_char; \ - yy_c_buf_p = yy_cp = yy_bp + n; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } + do \ + { \ + /* undo effects of setting up yytext */ \ + *yy_cp = yy_hold_char; \ + yy_c_buf_p = yy_cp = yy_bp + n; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) #define unput(c) yyunput( c, yytext ) @@ -189,6 +225,10 @@ static int yy_n_chars; /* number of characters read into yy_ch_buf */ #define YY_USER_ACTION #endif +#ifndef YY_USER_INIT +#define YY_USER_INIT +#endif + extern YY_CHAR *yytext; extern int yyleng; extern FILE *yyin, *yyout; @@ -208,9 +248,6 @@ static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; static int yy_init = 1; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ -static yy_state_type yy_last_accepting_state; -static YY_CHAR *yy_last_accepting_cpos; - /* flag which is used to allow yywrap()'s to do buffer switches * instead of setting up a fresh yyin. A bit of a hack ... */ @@ -227,6 +264,8 @@ YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +#define yy_new_buffer yy_create_buffer + #ifdef __cplusplus static int yyinput YY_PROTO(( void )); #else @@ -243,6 +282,8 @@ YY_DECL if ( yy_init ) { + YY_USER_INIT; + if ( ! yy_start ) yy_start = 1; /* first start state */ -- cgit v1.2.3 From 0fd7f2520b08f196ba8f66ef66323cd9eebdd908 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:42:56 +0000 Subject: Added prototypes changed memory allocation routines to deal with void*'s instead of char*'s some rearranging for VMS --- flexdef.h | 267 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 238 insertions(+), 29 deletions(-) diff --git a/flexdef.h b/flexdef.h index 153522a..d9ec2c2 100644 --- a/flexdef.h +++ b/flexdef.h @@ -46,6 +46,14 @@ #define DEFAULT_CSIZE 128 #endif +#ifndef PROTO +#ifdef __STDC__ +#define PROTO(proto) proto +#else +#define PROTO(proto) () +#endif +#endif + #ifdef USG #define SYS_V @@ -53,6 +61,18 @@ #ifdef SYS_V #include +#else + +#include +#ifdef lint +char *sprintf(); /* keep lint happy */ +#endif +#ifdef SCO_UNIX +void *memset(); +#else +char *memset(); +#endif +#endif #ifdef AMIGA #define bzero(s, n) setmem((char *)(s), n, '\0') @@ -60,34 +80,13 @@ #define abs(x) ((x) < 0 ? -(x) : (x)) #endif #else -#define bzero(s, n) memset((char *)(s), '\0', n) +#define bzero(s, n) (void) memset((char *)(s), '\0', n) #endif -#ifndef VMS -#ifndef SYS_V -/* System V systems should already declare memset as returning void* */ -char *memset(); -#endif -#else -/* memset is needed for old versions of the VMS C runtime library */ -#define memset(s, c, n) \ - { \ - register char *t = s; \ - register int m = n; \ - while ( m-- > 0 ) \ - *t++ = c; \ - } +#ifdef VMS #define unlink delete #define SHORT_FILE_NAMES #endif -#endif - -#ifndef SYS_V -#include -#ifdef lint -char *sprintf(); /* keep lint happy */ -#endif -#endif char *malloc(), *realloc(); @@ -608,13 +607,13 @@ extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; extern int num_backtracking, bol_needed; -char *allocate_array(), *reallocate_array(); +void *allocate_array(), *reallocate_array(); #define allocate_integer_array(size) \ (int *) allocate_array( size, sizeof( int ) ) #define reallocate_integer_array(array,size) \ - (int *) reallocate_array( (char *) array, size, sizeof( int ) ) + (int *) reallocate_array( (void *) array, size, sizeof( int ) ) #define allocate_int_ptr_array(size) \ (int **) allocate_array( size, sizeof( int * ) ) @@ -627,22 +626,232 @@ char *allocate_array(), *reallocate_array(); allocate_array( size, sizeof( union dfaacc_union ) ) #define reallocate_int_ptr_array(array,size) \ - (int **) reallocate_array( (char *) array, size, sizeof( int * ) ) + (int **) reallocate_array( (void *) array, size, sizeof( int * ) ) #define reallocate_char_ptr_array(array,size) \ - (char **) reallocate_array( (char *) array, size, sizeof( char * ) ) + (char **) reallocate_array( (void *) array, size, sizeof( char * ) ) #define reallocate_dfaacc_union(array, size) \ - (union dfaacc_union *) reallocate_array( (char *) array, size, sizeof( union dfaacc_union ) ) + (union dfaacc_union *) \ + reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) ) #define allocate_character_array(size) \ (Char *) allocate_array( size, sizeof( Char ) ) #define reallocate_character_array(array,size) \ - (Char *) reallocate_array( (char *) array, size, sizeof( Char ) ) + (Char *) reallocate_array( (void *) array, size, sizeof( Char ) ) /* used to communicate between scanner and parser. The type should really * be YYSTYPE, but we can't easily get our hands on it. */ extern int yylval; + + +/* external functions that are cross-referenced among the flex source files */ + + +/* from file ccl.c */ + +extern void ccladd PROTO((int, int)); /* Add a single character to a ccl */ +extern int cclinit PROTO(()); /* make an empty ccl */ +extern void cclnegate PROTO((int)); /* negate a ccl */ + +/* list the members of a set of characters in CCL form */ +extern void list_character_set PROTO((FILE*, int[])); + + +/* from file dfa.c */ + +/* increase the maximum number of dfas */ +extern void increase_max_dfas PROTO(()); + +extern void ntod PROTO(()); /* convert a ndfa to a dfa */ + + +/* from file ecs.c */ + +/* convert character classes to set of equivalence classes */ +extern void ccl2ecl PROTO(()); + +/* associate equivalence class numbers with class members */ +extern int cre8ecs PROTO((int[], int[], int)); + +/* associate equivalence class numbers using %t table */ +extern int ecs_from_xlation PROTO((int[])); + +/* update equivalence classes based on character class transitions */ +extern void mkeccl PROTO((Char[], int, int[], int[], int, int)); + +/* create equivalence class for single character */ +extern void mkechar PROTO((int, int[], int[])); + + +/* from file gen.c */ + +extern void make_tables PROTO(()); /* generate transition tables */ + + +/* from file main.c */ + +extern void flexend PROTO((int)); + + +/* from file misc.c */ + +/* write out the actions from the temporary file to lex.yy.c */ +extern void action_out PROTO(()); + +/* true if a string is all lower case */ +extern int all_lower PROTO((register Char *)); + +/* true if a string is all upper case */ +extern int all_upper PROTO((register Char *)); + +/* bubble sort an integer array */ +extern void bubble PROTO((int [], int)); + +/* shell sort a character array */ +extern void cshell PROTO((Char [], int, int)); + +extern void dataend PROTO(()); /* finish up a block of data declarations */ + +/* report an error message and terminate */ +extern void flexerror PROTO((char[])); + +/* report a fatal error message and terminate */ +extern void flexfatal PROTO((char[])); + +/* report an error message formatted with one integer argument */ +extern void lerrif PROTO((char[], int)); + +/* report an error message formatted with one string argument */ +extern void lerrsf PROTO((char[], char[])); + +/* spit out a "# line" statement */ +extern void line_directive_out PROTO((FILE*)); + +/* generate a data statment for a two-dimensional array */ +extern void mk2data PROTO((int)); + +extern void mkdata PROTO((int)); /* generate a data statement */ + +/* return the integer represented by a string of digits */ +extern int myctoi PROTO((Char [])); + +/* write out one section of the skeleton file */ +extern void skelout PROTO(()); + +/* output a yy_trans_info structure */ +extern void transition_struct_out PROTO((int, int)); + + +/* from file nfa.c */ + +/* add an accepting state to a machine */ +extern void add_accept PROTO((int, int)); + +/* make a given number of copies of a singleton machine */ +extern int copysingl PROTO((int, int)); + +/* debugging routine to write out an nfa */ +extern void dumpnfa PROTO((int)); + +/* finish up the processing for a rule */ +extern void finish_rule PROTO((int, int, int, int)); + +/* connect two machines together */ +extern int link_machines PROTO((int, int)); + +/* mark each "beginning" state in a machine as being a "normal" (i.e., + * not trailing context associated) state + */ +extern void mark_beginning_as_normal PROTO((register int)); + +/* make a machine that branches to two machines */ +extern int mkbranch PROTO((int, int)); + +extern int mkclos PROTO((int)); /* convert a machine into a closure */ +extern int mkopt PROTO((int)); /* make a machine optional */ + +/* make a machine that matches either one of two machines */ +extern int mkor PROTO((int, int)); + +/* convert a machine into a positive closure */ +extern int mkposcl PROTO((int)); + +extern int mkrep PROTO((int, int, int)); /* make a replicated machine */ + +/* create a state with a transition on a given symbol */ +extern int mkstate PROTO((int)); + +extern void new_rule PROTO(()); /* initialize for a new rule */ + + +/* from file parse.y */ + +/* write out a message formatted with one string, pinpointing its location */ +extern void format_pinpoint_message PROTO((char[], char[])); + +/* write out a message, pinpointing its location */ +extern void pinpoint_message PROTO((char[])); + +extern void synerr PROTO((char [])); /* report a syntax error */ +extern int yyparse PROTO(()); /* the YACC parser */ + + +/* from file scan.l */ + +extern int flexscan PROTO(()); /* the Flex-generated scanner for flex */ + +/* open the given file (if NULL, stdin) for scanning */ +extern void set_input_file PROTO((char*)); + +extern int yywrap PROTO(()); /* wrapup a file in the lexical analyzer */ + + +/* from file sym.c */ + +/* save the text of a character class */ +extern void cclinstal PROTO ((Char [], int)); + +/* lookup the number associated with character class */ +extern int ccllookup PROTO((Char [])); + +extern void ndinstal PROTO((char[], Char[])); /* install a name definition */ +extern void scinstal PROTO((char[], int)); /* make a start condition */ + +/* lookup the number associated with a start condition */ +extern int sclookup PROTO((char[])); + + +/* from file tblcmp.c */ + +/* build table entries for dfa state */ +extern void bldtbl PROTO((int[], int, int, int, int)); + +extern void cmptmps PROTO(()); /* compress template table entries */ +extern void inittbl PROTO(()); /* initialize transition tables */ +extern void mkdeftbl PROTO(()); /* make the default, "jam" table entries */ + +/* create table entries for a state (or state fragment) which has + * only one out-transition */ +extern void mk1tbl PROTO((int, int, int, int)); + +/* place a state into full speed transition table */ +extern void place_state PROTO((int*, int, int)); + +/* save states with only one out-transition to be processed later */ +extern void stack1 PROTO((int, int, int, int)); + + +/* from file yylex.c */ + +extern int yylex PROTO(()); + + +/* The Unix kernel calls used here */ + +extern int read PROTO((int, char*, int)); +extern int unlink PROTO((char*)); +extern int write PROTO((int, char*, int)); -- cgit v1.2.3 From 4436da6f0b6c6f773072d13f3e6ad9687d53f8c8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:48:51 +0000 Subject: prototypes for forward references declared void functions as such yy_flex_debug testing of error on file closes casts to void for sprintf() and strcpy() --- gen.c | 81 +++++++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/gen.c b/gen.c index d3dad92..52956aa 100644 --- a/gen.c +++ b/gen.c @@ -38,6 +38,14 @@ static char rcsid[] = #include "flexdef.h" +/* declare functions that have forward references */ + +void gen_next_state PROTO((int)); +void genecs PROTO(()); +void indent_put2s PROTO((char [], char [])); +void indent_puts PROTO((char [])); + + static int indent_level = 0; /* each level is 4 spaces */ #define indent_up() (++indent_level) @@ -55,7 +63,7 @@ static char C_state_decl[] = /* indent to the current level */ -do_indent() +void do_indent() { register int i = indent_level * 4; @@ -76,7 +84,7 @@ do_indent() /* generate the code to keep backtracking information */ -gen_backtracking() +void gen_backtracking() { if ( reject || num_backtracking == 0 ) @@ -98,7 +106,7 @@ gen_backtracking() /* generate the code to perform the backtrack */ -gen_bt_action() +void gen_bt_action() { if ( reject || num_backtracking == 0 ) @@ -132,7 +140,7 @@ gen_bt_action() * genctbl(); */ -genctbl() +void genctbl() { register int i; @@ -202,7 +210,7 @@ genctbl() /* table of pointers to start states */ printf( "static const struct yy_trans_info *yy_start_state_list[%d] =\n", - lastsc * 2 + 1 ); + lastsc * 2 + 1 ); printf( " {\n" ); for ( i = 0; i <= lastsc * 2; ++i ) @@ -217,7 +225,7 @@ genctbl() /* generate equivalence-class tables */ -genecs() +void genecs() { register int i, j; @@ -253,8 +261,7 @@ genecs() { for ( i = j; i < csize; i = i + numrows ) { - fprintf( stderr, "%4s = %-2d", - readable_form( i ), ecgroup[i] ); + fprintf( stderr, "%4s = %-2d", readable_form( i ), ecgroup[i] ); putc( ' ', stderr ); } @@ -267,7 +274,7 @@ genecs() /* generate the code to find the action number */ -gen_find_action() +void gen_find_action() { if ( fullspd ) @@ -388,7 +395,7 @@ gen_find_action() * genftbl(); */ -genftbl() +void genftbl() { register int i; @@ -422,7 +429,7 @@ genftbl() /* generate the code to find the next compressed-table state */ -gen_next_compressed_state( char_map ) +void gen_next_compressed_state( char_map ) char *char_map; { @@ -468,7 +475,7 @@ char *char_map; /* generate the code to find the next match */ -gen_next_match() +void gen_next_match() { /* NOTE - changes in here should be reflected in gen_next_state() and @@ -564,7 +571,7 @@ gen_next_match() /* generate the code to find the next state */ -gen_next_state( worry_about_NULs ) +void gen_next_state( worry_about_NULs ) int worry_about_NULs; { /* NOTE - changes in here should be reflected in get_next_match() */ @@ -573,13 +580,13 @@ int worry_about_NULs; if ( worry_about_NULs && ! nultrans ) { if ( useecs ) - sprintf( char_map, "(*yy_cp ? yy_ec[*yy_cp] : %d)", NUL_ec ); + (void) sprintf( char_map, "(*yy_cp ? yy_ec[*yy_cp] : %d)", NUL_ec ); else - sprintf( char_map, "(*yy_cp ? *yy_cp : %d)", NUL_ec ); + (void) sprintf( char_map, "(*yy_cp ? *yy_cp : %d)", NUL_ec ); } else - strcpy( char_map, useecs ? "yy_ec[*yy_cp]" : "*yy_cp" ); + (void) strcpy( char_map, useecs ? "yy_ec[*yy_cp]" : "*yy_cp" ); if ( worry_about_NULs && nultrans ) { @@ -623,7 +630,7 @@ int worry_about_NULs; /* generate the code to make a NUL transition */ -gen_NUL_trans() +void gen_NUL_trans() { /* NOTE - changes in here should be reflected in get_next_match() */ int need_backtracking = (num_backtracking > 0 && ! reject); @@ -665,7 +672,7 @@ gen_NUL_trans() { char NUL_ec_str[20]; - sprintf( NUL_ec_str, "%d", NUL_ec ); + (void) sprintf( NUL_ec_str, "%d", NUL_ec ); gen_next_compressed_state( NUL_ec_str ); if ( reject ) @@ -699,7 +706,7 @@ gen_NUL_trans() /* generate the code to find the start state */ -gen_start_state() +void gen_start_state() { if ( fullspd ) @@ -734,7 +741,7 @@ gen_start_state() * gentabs(); */ -gentabs() +void gentabs() { int i, j, k, *accset, nacc, *acc_array, total_states; @@ -973,7 +980,7 @@ gentabs() * current indentation level, adding a final newline */ -indent_put2s( fmt, arg ) +void indent_put2s( fmt, arg ) char fmt[], arg[]; { @@ -987,7 +994,7 @@ char fmt[], arg[]; * newline */ -indent_puts( str ) +void indent_puts( str ) char str[]; { @@ -1004,7 +1011,7 @@ char str[]; * Generates transition tables and finishes generating output file */ -make_tables() +void make_tables() { register int i; @@ -1071,6 +1078,12 @@ make_tables() else gentabs(); + if ( num_backtracking > 0 ) + { + indent_puts( "static yy_state_type yy_last_accepting_state;" ); + indent_puts( "static YY_CHAR *yy_last_accepting_cpos;\n" ); + } + if ( nultrans ) { printf( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); @@ -1094,11 +1107,12 @@ make_tables() if ( ddebug ) { /* spit out table mapping rules to line numbers */ - printf( C_short_decl, "yy_rule_linenum", num_rules ); + indent_puts( "extern int yy_flex_debug;" ); + indent_puts( "int yy_flex_debug = 1;\n" ); + printf( C_short_decl, "yy_rule_linenum", num_rules ); for ( i = 1; i < num_rules; ++i ) mkdata( rule_linenum[i] ); - dataend(); } @@ -1168,9 +1182,17 @@ make_tables() skelout(); - (void) fclose( temp_action_file ); + if ( ferror( temp_action_file ) ) + flexfatal( "error occurred when writing temporary action file" ); + + else if ( fclose( temp_action_file ) ) + flexfatal( "error occurred when closing temporary action file" ); + temp_action_file = fopen( action_file_name, "r" ); + if ( temp_action_file == NULL ) + flexfatal( "could not re-open temporary action file" ); + /* copy prolog from action_file to output file */ action_out(); @@ -1205,6 +1227,10 @@ make_tables() skelout(); if ( ddebug ) { + indent_puts( "if ( yy_flex_debug )" ); + indent_up(); + + indent_puts( "{" ); indent_puts( "if ( yy_act == 0 )" ); indent_up(); indent_puts( "fprintf( stderr, \"--scanner backtracking\\n\" );" ); @@ -1237,6 +1263,9 @@ make_tables() indent_up(); indent_puts( "fprintf( stderr, \"--EOF\\n\" );" ); indent_down(); + + indent_puts( "}" ); + indent_down(); } /* copy actions from action_file to output file */ -- cgit v1.2.3 From b651edeb752b30eb02886d4fc23005250d27832a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:50:08 +0000 Subject: Added declaration of arguments made yylex() a function --- libmain.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libmain.c b/libmain.c index 31adc8c..951bdaa 100644 --- a/libmain.c +++ b/libmain.c @@ -2,8 +2,12 @@ /* $Header$ */ -main() +extern int yylex(); + +int main( argc, argv ) +int argc; +char *argv[]; { - yylex(); + return yylex(); } -- cgit v1.2.3 From 5afa7fe502544056f031f0672dcaf592baf95341 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:50:57 +0000 Subject: declared void functions as such prototypes for forward references shuffled around some routines to make the order perhaps a little more logical changed memory references to use void* instead of char* --- misc.c | 124 +++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 71 insertions(+), 53 deletions(-) diff --git a/misc.c b/misc.c index cdbdf65..4d13ac7 100644 --- a/misc.c +++ b/misc.c @@ -39,6 +39,19 @@ static char rcsid[] = #include "flexdef.h" +/* ANSI C does not guarantee that isascii() is defined */ +#ifndef isascii +#define isascii(c) ((c) <= 0177) +#endif + + + +/* declare functions that have forward references */ + +void dataflush PROTO(()); +int otoi PROTO((Char [])); + + /* action_out - write the actions from the temporary file to lex.yy.c * * synopsis @@ -47,7 +60,7 @@ static char rcsid[] = * Copies the action file up to %% (or end-of-file) to lex.yy.c */ -action_out() +void action_out() { char buf[MAXLINE]; @@ -62,11 +75,11 @@ action_out() /* allocate_array - allocate memory for an integer array of the given size */ -char *allocate_array( size, element_size ) +void *allocate_array( size, element_size ) int size, element_size; { - register char *mem; + register void *mem; /* on 16-bit int machines (e.g., 80286) we might be trying to * allocate more than a signed int can hold, and that won't @@ -75,7 +88,7 @@ int size, element_size; if ( element_size * size <= 0 ) flexfatal( "request for < 1 byte in allocate_array()" ); - mem = malloc( (unsigned) (element_size * size) ); + mem = (void *) malloc( (unsigned) (element_size * size) ); if ( mem == NULL ) flexfatal( "memory allocation failed in allocate_array()" ); @@ -144,7 +157,7 @@ register Char *str; * v - the array to be sorted * n - the number of elements of 'v' to be sorted */ -bubble( v, n ) +void bubble( v, n ) int v[], n; { @@ -255,7 +268,8 @@ register Char *str; * v - array to be sorted * n - number of elements of v to be sorted */ -cshell( v, n, special_case_0 ) + +void cshell( v, n, special_case_0 ) Char v[]; int n, special_case_0; @@ -293,7 +307,8 @@ int n, special_case_0; * synopsis * dataend(); */ -dataend() + +void dataend() { if ( datapos > 0 ) @@ -313,7 +328,8 @@ dataend() * synopsis * dataflush(); */ -dataflush() + +void dataflush() { putchar( '\n' ); @@ -331,6 +347,40 @@ dataflush() datapos = 0; } + +/* flexerror - report an error message and terminate + * + * synopsis + * char msg[]; + * flexerror( msg ); + */ + +void flexerror( msg ) +char msg[]; + + { + fprintf( stderr, "%s: %s\n", program_name, msg ); + + flexend( 1 ); + } + + +/* flexfatal - report a fatal error message and terminate + * + * synopsis + * char msg[]; + * flexfatal( msg ); + */ + +void flexfatal( msg ) +char msg[]; + + { + fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg ); + flexend( 1 ); + } + + /* flex_gettime - return current time * * synopsis @@ -382,7 +432,7 @@ char *flex_gettime() * lerrif( msg, arg ); */ -lerrif( msg, arg ) +void lerrif( msg, arg ) char msg[]; int arg; @@ -400,7 +450,7 @@ int arg; * lerrsf( msg, arg ); */ -lerrsf( msg, arg ) +void lerrsf( msg, arg ) char msg[], arg[]; { @@ -411,39 +461,6 @@ char msg[], arg[]; } -/* flexerror - report an error message and terminate - * - * synopsis - * char msg[]; - * flexerror( msg ); - */ - -flexerror( msg ) -char msg[]; - - { - fprintf( stderr, "%s: %s\n", program_name, msg ); - - flexend( 1 ); - } - - -/* flexfatal - report a fatal error message and terminate - * - * synopsis - * char msg[]; - * flexfatal( msg ); - */ - -flexfatal( msg ) -char msg[]; - - { - fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg ); - flexend( 1 ); - } - - /* htoi - convert a hexadecimal digit string to an integer value * * synopsis: @@ -466,7 +483,7 @@ Char str[]; /* line_directive_out - spit out a "# line" statement */ -line_directive_out( output_file_name ) +void line_directive_out( output_file_name ) FILE *output_file_name; { @@ -483,7 +500,7 @@ FILE *output_file_name; * * generates a data statement initializing the current 2-D array to "value" */ -mk2data( value ) +void mk2data( value ) int value; { @@ -515,7 +532,7 @@ int value; * generates a data statement initializing the current array element to * "value" */ -mkdata( value ) +void mkdata( value ) int value; { @@ -675,7 +692,7 @@ register int c; case '\b': return ( "\\b" ); default: - sprintf( rform, "\\%.3o", c ); + (void) sprintf( rform, "\\%.3o", c ); return ( rform ); } } @@ -695,18 +712,19 @@ register int c; /* reallocate_array - increase the size of a dynamic array */ -char *reallocate_array( array, size, element_size ) -char *array; +void *reallocate_array( array, size, element_size ) +void *array; int size, element_size; { - register char *new_array; + register void *new_array; /* same worry as in allocate_array(): */ if ( size * element_size <= 0 ) flexfatal( "attempt to increase array size by less than 1 byte" ); - new_array = realloc( array, (unsigned) (size * element_size )); + new_array = + (void *) realloc( (char *)array, (unsigned) (size * element_size )); if ( new_array == NULL ) flexfatal( "attempt to increase array size failed" ); @@ -724,7 +742,7 @@ int size, element_size; * Copies from skelfile to stdout until a line beginning with "%%" or * EOF is found. */ -skelout() +void skelout() { char buf[MAXLINE]; @@ -747,7 +765,7 @@ skelout() * element_n. Formats the output with spaces and carriage returns. */ -transition_struct_out( element_v, element_n ) +void transition_struct_out( element_v, element_n ) int element_v, element_n; { -- cgit v1.2.3 From f0f16ac55548b308d3f0e5cbfcaa528982ba5363 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:51:47 +0000 Subject: declared void functions as such added prototypes for forward references --- nfa.c | 22 +++++++++++++++------- tblcmp.c | 34 ++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/nfa.c b/nfa.c index f17126c..a6def31 100644 --- a/nfa.c +++ b/nfa.c @@ -37,6 +37,13 @@ static char rcsid[] = #include "flexdef.h" + +/* declare functions that have forward references */ + +int dupmachine PROTO((int)); +void mkxtion PROTO((int, int)); + + /* add_accept - add an accepting state to a machine * * synopsis @@ -46,8 +53,8 @@ static char rcsid[] = * accepting_number becomes mach's accepting number. */ -add_accept( mach, accepting_number ) -int mach; +void add_accept( mach, accepting_number ) +int mach, accepting_number; { /* hang the accepting number off an epsilon state. if it is associated @@ -101,7 +108,7 @@ int singl, num; * dumpnfa( state1 ); */ -dumpnfa( state1 ) +void dumpnfa( state1 ) int state1; { @@ -191,6 +198,7 @@ int mach; return ( init ); } + /* finish_rule - finish up the processing for a rule * * synopsis @@ -207,7 +215,7 @@ int mach; * context has variable length. */ -finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) +void finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) int mach, variable_trail_rule, headcnt, trailcnt; { @@ -318,7 +326,7 @@ int first, last; * The "beginning" states are the epsilon closure of the first state */ -mark_beginning_as_normal( mach ) +void mark_beginning_as_normal( mach ) register int mach; { @@ -666,7 +674,7 @@ int sym; * stateto - the state to which the transition is to be made */ -mkxtion( statefrom, stateto ) +void mkxtion( statefrom, stateto ) int statefrom, stateto; { @@ -694,7 +702,7 @@ int statefrom, stateto; * arrays (such as rule_type[]) are grown as needed. */ -new_rule() +void new_rule() { if ( ++num_rules >= current_max_rules ) diff --git a/tblcmp.c b/tblcmp.c index 2ffb38d..dbdf28f 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -37,6 +37,16 @@ static char rcsid[] = #include "flexdef.h" + +/* declarations for functions that have forward references */ + +void mkentry PROTO((register int*, int, int, int, int)); +void mkprot PROTO((int[], int, int)); +void mktemplate PROTO((int[], int, int)); +void mv2front PROTO((int)); +int tbldiff PROTO((int[], int, int[])); + + /* bldtbl - build table entries for dfa state * * synopsis @@ -72,7 +82,7 @@ static char rcsid[] = * cost only one difference. */ -bldtbl( state, statenum, totaltrans, comstate, comfreq ) +void bldtbl( state, statenum, totaltrans, comstate, comfreq ) int state[], statenum, totaltrans, comstate, comfreq; { @@ -218,7 +228,7 @@ int state[], statenum, totaltrans, comstate, comfreq; * table entries made for them. */ -cmptmps() +void cmptmps() { int tmpstorage[CSIZE + 1]; @@ -290,7 +300,7 @@ cmptmps() /* expand_nxt_chk - expand the next check arrays */ -expand_nxt_chk() +void expand_nxt_chk() { register int old_max = current_max_xpairs; @@ -424,7 +434,7 @@ int *state, numtrans; * own tbase/tdef tables. They are shifted down to be contiguous * with the non-template entries during table generation. */ -inittbl() +void inittbl() { register int i; @@ -461,7 +471,7 @@ inittbl() * mkdeftbl(); */ -mkdeftbl() +void mkdeftbl() { int i; @@ -512,7 +522,7 @@ mkdeftbl() * state array. */ -mkentry( state, numchars, statenum, deflink, totaltrans ) +void mkentry( state, numchars, statenum, deflink, totaltrans ) register int *state; int numchars, statenum, deflink, totaltrans; @@ -645,7 +655,7 @@ int numchars, statenum, deflink, totaltrans; * mk1tbl( state, sym, onenxt, onedef ); */ -mk1tbl( state, sym, onenxt, onedef ) +void mk1tbl( state, sym, onenxt, onedef ) int state, sym, onenxt, onedef; { @@ -678,7 +688,7 @@ int state, sym, onenxt, onedef; * mkprot( state, statenum, comstate ); */ -mkprot( state, statenum, comstate ) +void mkprot( state, statenum, comstate ) int state[], statenum, comstate; { @@ -722,7 +732,7 @@ int state[], statenum, comstate; * mktemplate( state, statenum, comstate, totaltrans ); */ -mktemplate( state, statenum, comstate ) +void mktemplate( state, statenum, comstate ) int state[], statenum, comstate; { @@ -780,7 +790,7 @@ int state[], statenum, comstate; * mv2front( qelm ); */ -mv2front( qelm ) +void mv2front( qelm ) int qelm; { @@ -813,7 +823,7 @@ int qelm; * Transnum is the number of out-transitions for the state. */ -place_state( state, statenum, transnum ) +void place_state( state, statenum, transnum ) int *state, statenum, transnum; { @@ -859,7 +869,7 @@ int *state, statenum, transnum; * no room, we process the sucker right now. */ -stack1( statenum, sym, nextstate, deflink ) +void stack1( statenum, sym, nextstate, deflink ) int statenum, sym, nextstate, deflink; { -- cgit v1.2.3 From 74949cce89750f52071806d312aae7a688c1b49b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:52:28 +0000 Subject: introduced format_pinpoint_message() declared void functions as such changed lone <> to apply to all outstanding start conditions --- parse.y | 58 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/parse.y b/parse.y index be37f22..73d3f8f 100644 --- a/parse.y +++ b/parse.y @@ -147,8 +147,8 @@ flexrule : scon '^' rule bol_needed = true; if ( performance_report ) - fprintf( stderr, - "'^' operator results in sub-optimal performance\n" ); + pinpoint_message( + "'^' operator results in sub-optimal performance" ); } } @@ -182,8 +182,8 @@ flexrule : scon '^' rule bol_needed = true; if ( performance_report ) - fprintf( stderr, - "'^' operator results in sub-optimal performance\n" ); + pinpoint_message( + "'^' operator results in sub-optimal performance" ); } } @@ -203,9 +203,21 @@ flexrule : scon '^' rule | EOF_OP { - /* this EOF applies only to the INITIAL start cond. */ - actvsc[actvp = 1] = 1; - build_eof_action(); + /* this EOF applies to all start conditions + * which don't already have EOF actions + */ + actvp = 0; + + for ( i = 1; i <= lastsc; ++i ) + if ( ! sceof[i] ) + actvsc[++actvp] = i; + + if ( actvp == 0 ) + pinpoint_message( + "warning - all start conditions already have <> rules" ); + + else + build_eof_action(); } | error @@ -218,7 +230,8 @@ scon : '<' namelist2 '>' namelist2 : namelist2 ',' NAME { if ( (scnum = sclookup( nmstr )) == 0 ) - lerrsf( "undeclared start condition %s", nmstr ); + format_pinpoint_message( + "undeclared start condition %s", nmstr ); else actvsc[++actvp] = scnum; @@ -227,7 +240,8 @@ namelist2 : namelist2 ',' NAME | NAME { if ( (scnum = sclookup( nmstr )) == 0 ) - lerrsf( "undeclared start condition %s", nmstr ); + format_pinpoint_message( + "undeclared start condition %s", nmstr ); else actvsc[actvp = 1] = scnum; } @@ -621,7 +635,7 @@ string : string CHAR * conditions */ -build_eof_action() +void build_eof_action() { register int i; @@ -629,7 +643,8 @@ build_eof_action() for ( i = 1; i <= actvp; ++i ) { if ( sceof[actvsc[i]] ) - lerrsf( "multiple <> rules for start condition %s", + format_pinpoint_message( + "multiple <> rules for start condition %s", scname[actvsc[i]] ); else @@ -646,7 +661,7 @@ build_eof_action() /* synerr - report a syntax error */ -synerr( str ) +void synerr( str ) char str[]; { @@ -655,9 +670,24 @@ char str[]; } +/* format_pinpoint_message - write out a message formatted with one string, + * pinpointing its location + */ + +void format_pinpoint_message( msg, arg ) +char msg[], arg[]; + + { + char errmsg[MAXLINE]; + + (void) sprintf( errmsg, msg, arg ); + pinpoint_message( errmsg ); + } + + /* pinpoint_message - write out a message, pinpointing its location */ -pinpoint_message( str ) +void pinpoint_message( str ) char str[]; { @@ -669,7 +699,7 @@ char str[]; * currently, messages are ignore */ -yyerror( msg ) +void yyerror( msg ) char msg[]; { -- cgit v1.2.3 From 97d130eea6af5992d855468572b50f0b6919bc24 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:53:23 +0000 Subject: declared void functions as such changed to strip # comments, as documented moved #undef of yywrap() to before include of flexdef, so prototype doesn't get screwed up --- scan.l | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scan.l b/scan.l index 31fa754..59864a4 100644 --- a/scan.l +++ b/scan.l @@ -25,6 +25,8 @@ */ %{ +#undef yywrap + #include "flexdef.h" #include "parse.h" @@ -63,8 +65,6 @@ static char rcsid[] = #define CHECK_YYMORE(str) \ if ( all_lower( str ) ) \ yymore_used = true; - -#undef yywrap %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE @@ -89,7 +89,7 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) Char nmdef[MAXLINE], myesc(); ^{WS} indented_code = true; BEGIN(CODEBLOCK); -^#.*\n ++linenum; ECHO; /* treat as a comment */ +^#.*\n ++linenum; /* treat as a comment */ ^"/*" ECHO; BEGIN(C_COMMENT); ^"%s"{NAME}? return ( SCDECL ); ^"%x"{NAME}? return ( XSCDECL ); @@ -110,12 +110,12 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) } ^"%used" { - pinpoint_message( "Warning, %%used/%%unused have been deprecated" ); + pinpoint_message( "warning - %%used/%%unused have been deprecated" ); checking_used = REALLY_USED; BEGIN(USED_LIST); } ^"%unused" { checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); - pinpoint_message( "Warning, %%used/%%unused have been deprecated" ); + pinpoint_message( "warning - %%used/%%unused have been deprecated" ); checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); } @@ -513,7 +513,7 @@ int yywrap() /* set_input_file - open the given file (if NULL, stdin) for scanning */ -set_input_file( file ) +void set_input_file( file ) char *file; { -- cgit v1.2.3 From d211329c1eb607b79940661c6ab3ab3339f52874 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:53:57 +0000 Subject: declared void functions as such added prototypes for forward references changed to use format_pinpoint_message where appropriate --- sym.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sym.c b/sym.c index 12daf37..0047e70 100644 --- a/sym.c +++ b/sym.c @@ -37,6 +37,12 @@ static char rcsid[] = #include "flexdef.h" + +/* declare functions that have forward references */ + +int hashfunct PROTO((register char[], int)); + + struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; struct hash_entry *sctbl[START_COND_HASH_SIZE]; struct hash_entry *ccltab[CCL_HASH_SIZE]; @@ -112,7 +118,7 @@ int table_size; * cclinstal( ccltxt, cclnum ); */ -cclinstal( ccltxt, cclnum ) +void cclinstal( ccltxt, cclnum ) Char ccltxt[]; int cclnum; @@ -210,7 +216,7 @@ int hash_size; * ndinstal( nd, def ); */ -ndinstal( nd, def ) +void ndinstal( nd, def ) char nd[]; Char def[]; @@ -251,7 +257,7 @@ char nd[]; * the start condition is Exclusive if xcluflg is true */ -scinstal( str, xcluflg ) +void scinstal( str, xcluflg ) char str[]; int xcluflg; @@ -288,7 +294,7 @@ int xcluflg; if ( addsym( scname[lastsc], (char *) 0, lastsc, sctbl, START_COND_HASH_SIZE ) ) - lerrsf( "start condition %s declared twice", str ); + format_pinpoint_message( "start condition %s declared twice", str ); scset[lastsc] = mkstate( SYM_EPSILON ); scbol[lastsc] = mkstate( SYM_EPSILON ); -- cgit v1.2.3 From da9a7b41b184131889127a21336e0037ae591932 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:54:45 +0000 Subject: Added macro definition for isascii() if not already present --- yylex.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yylex.c b/yylex.c index 2a55f12..52e4a18 100644 --- a/yylex.c +++ b/yylex.c @@ -39,6 +39,13 @@ static char rcsid[] = #include "flexdef.h" #include "parse.h" + +/* ANSI C does not guarantee that isascii() is defined */ +#ifndef isascii +#define isascii(c) ((c) <= 0177) +#endif + + /* yylex - scan for a regular expression token * * synopsis -- cgit v1.2.3 From 22eb4a996511769231111fa80dc02d12620117cd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 16:56:04 +0000 Subject: declared void functions as such added prototypes for forward references changed to check for error status when closing files --- main.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/main.c b/main.c index 148febf..24f888b 100644 --- a/main.c +++ b/main.c @@ -39,7 +39,14 @@ static char rcsid[] = #include "flexdef.h" -static char flex_version[] = "2.2"; +static char flex_version[] = "2.3"; + + +/* declare functions that have forward references */ + +void flexinit PROTO((int, char**)); +void readin PROTO(()); +void set_up_initial_allocations PROTO(()); /* these globals are all defined and commented in flexdef.h */ @@ -98,7 +105,7 @@ static int use_stdout; static char *skelname = NULL; -main( argc, argv ) +int main( argc, argv ) int argc; char **argv; @@ -124,15 +131,13 @@ char **argv; { if ( interactive ) fprintf( stderr, - "-I (interactive) entails a minor performance penalty\n" ); + "-I (interactive) entails a minor performance penalty\n" ); if ( yymore_used ) - fprintf( stderr, - "yymore() entails a minor performance penalty\n" ); + fprintf( stderr, "yymore() entails a minor performance penalty\n" ); if ( reject ) - fprintf( stderr, - "REJECT entails a large performance penalty\n" ); + fprintf( stderr, "REJECT entails a large performance penalty\n" ); if ( variable_trailing_context_rules ) fprintf( stderr, @@ -179,7 +184,7 @@ char **argv; * This routine does not return. */ -flexend( status ) +void flexend( status ) int status; { @@ -187,18 +192,36 @@ int status; char *flex_gettime(); if ( skelfile != NULL ) - (void) fclose( skelfile ); + { + if ( ferror( skelfile ) ) + flexfatal( "error occurred when writing skeleton file" ); + + else if ( fclose( skelfile ) ) + flexfatal( "error occurred when closing skeleton file" ); + } if ( temp_action_file ) { - (void) fclose( temp_action_file ); - (void) unlink( action_file_name ); + if ( ferror( temp_action_file ) ) + flexfatal( "error occurred when writing temporary action file" ); + + else if ( fclose( temp_action_file ) ) + flexfatal( "error occurred when closing temporary action file" ); + + else if ( unlink( action_file_name ) ) + flexfatal( "error occurred when deleting temporary action file" ); } if ( status != 0 && outfile_created ) { - (void) fclose( stdout ); - (void) unlink( outfile ); + if ( ferror( stdout ) ) + flexfatal( "error occurred when writing output file" ); + + else if ( fclose( stdout ) ) + flexfatal( "error occurred when closing output file" ); + + else if ( unlink( outfile ) ) + flexfatal( "error occurred when deleting output file" ); } if ( backtrack_report && backtrack_file ) @@ -212,7 +235,11 @@ int status; else fprintf( backtrack_file, "Compressed tables always backtrack.\n" ); - (void) fclose( backtrack_file ); + if ( ferror( backtrack_file ) ) + flexfatal( "error occurred when writing backtracking file" ); + + else if ( fclose( backtrack_file ) ) + flexfatal( "error occurred when closing backtracking file" ); } if ( printstats ) @@ -267,8 +294,9 @@ int status; fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, - current_max_dfas, totnst ); - fprintf( stderr, " %d rules\n", num_rules - 1 /* - 1 for def. rule */ ); + current_max_dfas, totnst ); + fprintf( stderr, + " %d rules\n", num_rules - 1 /* - 1 for def. rule */ ); if ( num_backtracking == 0 ) fprintf( stderr, " No backtracking\n" ); @@ -282,7 +310,7 @@ int status; fprintf( stderr, " Beginning-of-line patterns used\n" ); fprintf( stderr, " %d/%d start conditions\n", lastsc, - current_max_scs ); + current_max_scs ); fprintf( stderr, " %d epsilon states, %d double epsilon states\n", numeps, eps2 ); @@ -359,7 +387,7 @@ int status; * flexinit( argc, argv ); */ -flexinit( argc, argv ) +void flexinit( argc, argv ) int argc; char **argv; @@ -637,7 +665,7 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ * readin(); */ -readin() +void readin() { skelout(); @@ -682,7 +710,7 @@ readin() /* set_up_initial_allocations - allocate memory for internal tables */ -set_up_initial_allocations() +void set_up_initial_allocations() { current_mns = INITIAL_MNS; -- cgit v1.2.3 From 3230360cc9ca4535b668fd2704dcbeb21d9f672c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 17:02:23 +0000 Subject: documentation on new features Comment regarding Ove's work ^foo|bar difference between flex / lex yyin initialization difference documented that yy_switch_to_buffer can be used in yywrap() documented that # comments are deprecated --- flex.1 | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 88 insertions(+), 17 deletions(-) diff --git a/flex.1 b/flex.1 index 33595bc..ffa589a 100644 --- a/flex.1 +++ b/flex.1 @@ -1,4 +1,4 @@ -.TH FLEX 1 "20 March 1990" "Version 2.2" +.TH FLEX 1 "26 May 1990" "Version 2.3" .SH NAME flex - fast lexical analyzer generator .SH SYNOPSIS @@ -253,7 +253,8 @@ compliance; see below for other such features). In the definitions section, an unindented comment (i.e., a line beginning with "/*") is also copied verbatim to the output up to the next "*/". Also, any line in the definitions section -beginning with '#' is ignored. +beginning with '#' is ignored, though this style of comment is +deprecated and may go away in the future. .SH PATTERNS The patterns in the input are written using an extended set of regular expressions. These are: @@ -1066,6 +1067,12 @@ handle, which may then be passed to other routines: switches the scanner's input buffer so subsequent tokens will come from .I new_buffer. +Note that +.B yy_switch_to_buffer() +may be used by yywrap() to sets things up for continued scanning, instead +of opening a new file and pointing +.I yyin +at it. .nf void yy_delete_buffer( YY_BUFFER_STATE buffer ) @@ -1073,6 +1080,15 @@ come from .fi is used to reclaim the storage associated with a buffer. .LP +.B yy_new_buffer() +is an alias for +.B yy_create_buffer(), +provided for compatibility with the C++ use of +.I new +and +.I delete +for creating and destroying dynamic objects. +.LP Finally, the .B YY_CURRENT_BUFFER macro returns a @@ -1164,15 +1180,15 @@ as shown in the example above. <> rules may not be used with other patterns; they may only be qualified with a list of start conditions. If an unqualified <> rule is given, it -applies only to the -.B INITIAL -start condition, and -.I not -to -.B %s -(or -.B %x) -start conditions. +applies to +.I all +start conditions which do not already have <> actions. To +specify an <> rule for only the initial start condition, use +.nf + + <> + +.fi .LP These rules are useful for catching things like unclosed comments. An example: @@ -1206,6 +1222,13 @@ can be redefined to provide an action which is always executed prior to the matched rule's action. For example, it could be #define'd to call a routine to convert yytext to lower-case. .LP +The macro +.B YY_USER_INIT +may be redefined to provide an action which is always executed before +the first scan (and before the scanner's internal initializations are done). +For example, it could be used to call a routine to read +in a data table or open a logging file. +.LP In the generated scanner, the actions are all gathered in one large switch statement and separated using .B YY_BREAK, @@ -1351,8 +1374,10 @@ the POSIX meaning is removed first). .B -d makes the generated scanner run in .I debug -mode. Whenever a pattern is recognized the scanner will -write to +mode. Whenever a pattern is recognized and the global +.B yy_flex_debug +is non-zero (which is the default), +the scanner will write to .I stderr a line of the form: .nf @@ -2139,6 +2164,16 @@ interprets it as "match 'ab' followed by one, two, or three occurrences of 'c'". The latter is in agreement with the current POSIX draft. .IP - +The precedence of the +.B ^ +operator is different. +.I lex +interprets "^foo|bar" as "match either 'foo' at the beginning of a line, +or 'bar' anywhere", whereas +.I flex +interprets it as "match either 'foo' or 'bar' if they come at the beginning +of a line". The latter is in agreement with the current POSIX draft. +.IP - To refer to yytext outside of the scanner source file, the correct definition with .I flex @@ -2154,6 +2189,31 @@ require of .I lex users). .IP - +.I yyin +is +.I initialized +by +.I lex +to be +.I stdin; +.I flex, +on the other hand, +initializes +.I yyin +to NULL +and then +.I assigns +it to +.I stdin +the first time the scanner is called, providing +.I yyin +has not already been assigned to a non-NULL value. The difference is +subtle, but the net effect is that with +.I flex +scanners, +.I yyin +does not have a valid value until the scanner has been called. +.IP - The special table-size declarations such as .B %a supported by @@ -2185,7 +2245,7 @@ or the POSIX draft standard: #line directives %{}'s around actions yyrestart() - comments beginning with '#' + comments beginning with '#' (deprecated) multiple actions on a line .fi @@ -2231,7 +2291,7 @@ input file. (Note that previously supported a .B %used/%unused mechanism for dealing with this problem; this feature is still supported -but now deprecated, and will go away soon unless the author hear's from +but now deprecated, and will go away soon unless the author hears from people who can argue compellingly that they need it.) .LP .I flex scanner jammed - @@ -2273,13 +2333,15 @@ Jacobson. The implementation was done by Kevin Gong and Vern Paxson. .LP Thanks to the many .I flex -beta-testers and feedbackers, especially Casey +beta-testers, feedbackers, and contributors, especially Casey Leedom, benson@odi.com, Frederic Brehm, Nick Christopher, Jason Coughlin, +Scott David Daniels, Leo Eskin, Chris Faylor, Eric Goldman, Eric Hughes, Jeffrey R. Jones, Kevin B. Kenny, Ronald Lamprecht, Greg Lee, Craig Leres, Mohamed el Lozy, Jim Meyering, Marc Nozell, Esmond Pitt, -Jef Poskanzer, Dave Tallman, Frank Whaley, Ken Yap, and those whose names +Jef Poskanzer, Jim Roskind, +Dave Tallman, Frank Whaley, Ken Yap, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. .LP @@ -2292,6 +2354,15 @@ to Benson Margulies and Fred Burke for C++ support; to Ove Ewerlid for the basics of support for NUL's; and to Eric Hughes for the basics of support for multiple buffers. .LP +Work is being done on extending +.I flex +to generate scanners in which the +state machine is directly represented in C code rather than tables. +These scanners may well be substantially faster than those generated +using -f or -F. If you are working in this area and are interested +in comparing notes and seeing whether redundant work can be avoided, +contact Ove Ewerlid (ewerlid@mizar.DoCS.UU.SE). +.LP This work was primarily done when I was at the Real Time Systems Group at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there for the support I received. -- cgit v1.2.3 From 64749a9fd4e65a222f4f430938721c4aff214aaa Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 17:16:56 +0000 Subject: 2.3 changes --- NEWS | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/NEWS b/NEWS index 4181bec..d3c9868 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,63 @@ +Changes between 2.3 (full) release of 26May90 and 2.2 (alpha) release: + + User-visible: + + - A lone <> rule (that is, one which is not qualified with + a list of start conditions) now specifies the EOF action for + *all* start conditions which haven't already had <> actions + given. To specify an end-of-file action for just the initial + state, use <>. + + - -d debug output is now contigent on the global yy_flex_debug + being set to a non-zero value, which it is by default. + + - A new macro, YY_USER_INIT, is provided for the user to specify + initialization action to be taken on the first call to the + scanner. This action is done before the scanner does its + own initialization. + + - yy_new_buffer() has been added as an alias for yy_create_buffer() + + - Comments beginning with '#' and extending to the end of the line + now work, but have been deprecated (in anticipation of making + flex recognize #line directives). + + - It is now documented that flex interprets "^foo|bar" differently + from lex. flex interprets it as "match either a 'foo' or a 'bar', + providing it comes at the beginning of a line", whereas lex + interprets it as "match either a 'foo' at the beginning of a line, + or a 'bar' anywhere". + + - It is now documented that flex initializes the global "yyin" on + the first call to the scanner, while lex initializes it at + compile-time. + + - It is now documented that yy_switch_to_buffer() can be used + in the yywrap() macro/routine. + + - The funky restrictions on when semi-colons could follow the + YY_NEW_FILE and yyless macros have been removed. They now + behave identically to functions. + + + Other changes: + + - Prototypes and proper declarations of void routines have + been added to the flex source code, courtesy of Kevin B. Kenny. + + - Routines dealing with memory allocation now use void* pointers + instead of char* - see README for porting implications. + + - Error-checking is now done when flex closes a file. + + - Various lint tweaks were added to reduce the number of gripes. + + - A note has been added to flexdoc.1 mentioning work in progress + on modifying flex to generate straight C code rather than a + table-driven automaton, with an email address of whom to contact + if you are working along similar lines. + + Changes between 2.2 Patch #3 (30Mar90) and 2.2 Patch #2: - fixed bug which caused -I scanners to bomb -- cgit v1.2.3 From 548be48e8a855b10f6c3f82c553e1aa20f46b21b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 17:24:13 +0000 Subject: Added DONT_HAVE_STDLIB_H and declarations of malloc() --- flex.skl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/flex.skl b/flex.skl index 190bb9f..4c24006 100644 --- a/flex.skl +++ b/flex.skl @@ -9,7 +9,14 @@ #include #ifdef __STDC__ + +#ifndef DONT_HAVE_STDLIB_H #include +#else +void *malloc( unsigned ); +void free( void* ); +#endif + #define YY_USE_PROTOS #define YY_USE_CONST #endif -- cgit v1.2.3 From 1fbadb95582564c7f3ff0a88e72e868008efcbff Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 17:29:53 +0000 Subject: pointed reader at Makefile instead of README for porting considerations added Makefile comments: support for SCO Unix; parameterization --- NEWS | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index d3c9868..fcb3024 100644 --- a/NEWS +++ b/NEWS @@ -46,12 +46,16 @@ Changes between 2.3 (full) release of 26May90 and 2.2 (alpha) release: been added to the flex source code, courtesy of Kevin B. Kenny. - Routines dealing with memory allocation now use void* pointers - instead of char* - see README for porting implications. + instead of char* - see Makefile for porting implications. - Error-checking is now done when flex closes a file. - Various lint tweaks were added to reduce the number of gripes. + - Makefile has been further parameterized to aid in porting. + + - Support for SCO Unix added. + - A note has been added to flexdoc.1 mentioning work in progress on modifying flex to generate straight C code rather than a table-driven automaton, with an email address of whom to contact -- cgit v1.2.3 From 059a869fbda245349577fc5c0ec3310c648544d6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 26 May 1990 17:31:27 +0000 Subject: Changed prolog to reflect 2.3 release. --- README | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README b/README index 273fde5..a407467 100644 --- a/README +++ b/README @@ -1,10 +1,6 @@ // $Header$ -This is release 2.2 of flex - an alpha release. - -The intent behind this alpha release is to weed out most of the -bugs associated with some new features, and to follow up with either -a beta or full 2.3 release by the end of May. +This is release 2.3 of flex - a full release. The flex distribution consists of the following files: -- cgit v1.2.3 From 70c3465c347a70b37491174080913ec331e902ce Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 27 Jun 1990 23:47:53 +0000 Subject: 4.4 BSD copyright --- COPYING | 32 +++++++++++++++++++------------- ccl.c | 40 ++++++++++++++++++---------------------- dfa.c | 40 ++++++++++++++++++---------------------- ecs.c | 40 ++++++++++++++++++---------------------- flexdef.h | 34 ++++++++++++++++++---------------- gen.c | 40 ++++++++++++++++++---------------------- main.c | 49 +++++++++++++++++++++++++------------------------ misc.c | 44 ++++++++++++++++++++------------------------ nfa.c | 42 +++++++++++++++++++----------------------- parse.y | 54 +++++++++++++++++++++++++----------------------------- scan.l | 51 +++++++++++++++++++++++++-------------------------- sym.c | 40 ++++++++++++++++++---------------------- tblcmp.c | 40 ++++++++++++++++++---------------------- yylex.c | 40 ++++++++++++++++++---------------------- 14 files changed, 277 insertions(+), 309 deletions(-) diff --git a/COPYING b/COPYING index aa50598..9b01361 100644 --- a/COPYING +++ b/COPYING @@ -2,30 +2,36 @@ Flex carries the copyright used for BSD software, slightly modified because it originated at the Lawrence Berkeley (not Livermore!) Laboratory, which operates under a contract with the Department of Energy: - Copyright (c) 1989 The Regents of the University of California. + Copyright (c) 1990 The Regents of the University of California. All rights reserved. This code is derived from software contributed to Berkeley by Vern Paxson. - The United States Government has rights in this work pursuant to - contract no. DE-AC03-76SF00098 between the United States Department of - Energy and the University of California. + The United States Government has rights in this work pursuant + to contract no. DE-AC03-76SF00098 between the United States + Department of Energy and the University of California. Redistribution and use in source and binary forms are permitted - provided that the above copyright notice and this paragraph are - duplicated in all such forms and that any documentation, - advertising materials, and other materials related to such - distribution and use acknowledge that the software was developed - by the University of California, Berkeley. The name of the - University may not be used to endorse or promote products derived - from this software without specific prior written permission. + provided that: (1) source distributions retain this entire + copyright notice and comment, and (2) distributions including + binaries display the following acknowledgement: ``This product + includes software developed by the University of California, + Berkeley and its contributors'' in the documentation or other + materials provided with the distribution and in all advertising + materials mentioning features or use of this software. Neither the + name of the University nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE. This basically says "do whatever you please with this software except -remove this notice". +remove this notice or take advantage of the University's (or the flex +authors') name". Note that the "flex.skel" scanner skeleton carries no copyright notice. You are free to do whatever you please with scanners generated using flex; diff --git a/ccl.c b/ccl.c index 536ca8f..45714b5 100644 --- a/ccl.c +++ b/ccl.c @@ -1,38 +1,34 @@ /* ccl - routines for character classes */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include "flexdef.h" diff --git a/dfa.c b/dfa.c index 5bf2348..b312ce4 100644 --- a/dfa.c +++ b/dfa.c @@ -1,38 +1,34 @@ /* dfa - DFA construction routines */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include "flexdef.h" diff --git a/ecs.c b/ecs.c index d095fc1..73c07dd 100644 --- a/ecs.c +++ b/ecs.c @@ -1,38 +1,34 @@ /* ecs - equivalence class routines */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include "flexdef.h" diff --git a/flexdef.h b/flexdef.h index d9ec2c2..6d3fee4 100644 --- a/flexdef.h +++ b/flexdef.h @@ -1,27 +1,29 @@ /* flexdef - definitions file for flex */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ /* @(#) $Header$ (LBL) */ diff --git a/gen.c b/gen.c index 52956aa..4d83656 100644 --- a/gen.c +++ b/gen.c @@ -1,38 +1,34 @@ /* gen - actual generation (writing) of flex scanners */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include "flexdef.h" diff --git a/main.c b/main.c index 24f888b..7bdd599 100644 --- a/main.c +++ b/main.c @@ -1,39 +1,40 @@ -/* flex - tool to generate fast lexical analyzers - * - * - * Copyright (c) 1989 The Regents of the University of California. +/* flex - tool to generate fast lexical analyzers */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint +char copyright[] = +"@(#) Copyright (c) 1990 The Regents of the University of California.\n\ + All rights reserved.\n"; +#endif /* not lint */ -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - +#ifndef lint static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif diff --git a/misc.c b/misc.c index 4d13ac7..2a05d6b 100644 --- a/misc.c +++ b/misc.c @@ -1,38 +1,34 @@ /* misc - miscellaneous flex routines */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. - * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include diff --git a/nfa.c b/nfa.c index a6def31..a2d04c3 100644 --- a/nfa.c +++ b/nfa.c @@ -1,38 +1,34 @@ /* nfa - NFA construction routines */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include "flexdef.h" diff --git a/parse.y b/parse.y index 73d3f8f..f74d32c 100644 --- a/parse.y +++ b/parse.y @@ -1,46 +1,42 @@ + /* parse.y - parser for flex input */ -/* - * Copyright (c) 1989 The Regents of the University of California. +%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP + +%{ +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP - -%{ - -#include "flexdef.h" - #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif +#include "flexdef.h" + int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; Char clower(); diff --git a/scan.l b/scan.l index 59864a4..d02acc2 100644 --- a/scan.l +++ b/scan.l @@ -1,44 +1,43 @@ + /* scan.l - scanner for flex input */ -/* - * Copyright (c) 1989 The Regents of the University of California. +%{ +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -%{ -#undef yywrap - -#include "flexdef.h" -#include "parse.h" - #ifndef lint -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; #endif +#undef yywrap + +#include "flexdef.h" +#include "parse.h" + #define ACTION_ECHO fprintf( temp_action_file, "%s", yytext ) #define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" ); diff --git a/sym.c b/sym.c index 0047e70..bdca154 100644 --- a/sym.c +++ b/sym.c @@ -1,38 +1,34 @@ /* sym - symbol table routines */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include "flexdef.h" diff --git a/tblcmp.c b/tblcmp.c index dbdf28f..a63a442 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -1,38 +1,34 @@ /* tblcmp - table compression routines */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include "flexdef.h" diff --git a/yylex.c b/yylex.c index 52e4a18..22bf3d6 100644 --- a/yylex.c +++ b/yylex.c @@ -1,38 +1,34 @@ /* yylex - scanner front-end for flex */ -/* - * Copyright (c) 1989 The Regents of the University of California. +/*- + * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * Vern Paxson. * - * The United States Government has rights in this work pursuant to - * contract no. DE-AC03-76SF00098 between the United States Department of - * Energy and the University of California. + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by the University of California, Berkeley. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #ifndef lint - -static char copyright[] = - "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; -static char CR_continuation[] = "@(#) All rights reserved.\n"; - static char rcsid[] = "@(#) $Header$ (LBL)"; - #endif #include -- cgit v1.2.3 From 8f85e8e46305393f63426ba1b3a4a42f7c5c9816 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 28 Jun 1990 00:40:34 +0000 Subject: Fixed bug in mini-scanner examle Fixed bug in YY_INPUT redefinition yylineno defense reentrancy documentation Something else which I forget. --- flex.1 | 158 +++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 111 insertions(+), 47 deletions(-) diff --git a/flex.1 b/flex.1 index ffa589a..cef3d7d 100644 --- a/flex.1 +++ b/flex.1 @@ -105,7 +105,7 @@ A somewhat more complicated example: } {DIGIT}+"."{DIGIT}* { - printf( "A float: %s (%d)\\n", yytext, + printf( "A float: %s (%g)\\n", yytext, atof( yytext ) ); } @@ -786,7 +786,10 @@ section of the input file): %{ #undef YY_INPUT #define YY_INPUT(buf,result,max_size) \\ - result = ((buf[0] = getchar()) == EOF) ? YY_NULL : 1; + { \\ + int c = getchar(); \\ + result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \\ + } %} .fi @@ -2041,6 +2044,99 @@ is fully compatible with .I lex with the following exceptions: .IP - +The undocumented +.I lex +scanner internal variable +.B yylineno +is not supported. It is difficult to support this option efficiently, +since it requires examining every character scanned and reexamining +the characters when the scanner backs up. +Things get more complicated when the end of buffer or file is reached or a +NUL is scanned (since the scan must then be restarted with the proper line +number count), or the user uses the yyless(), unput(), or REJECT actions, +or the multiple input buffer functions. +.IP +The fix is to add rules which, upon seeing a newline, increment +yylineno. This is usually an easy process, though it can be a drag if some +of the patterns can match multiple newlines along with other characters. +.IP +yylineno is not part of the POSIX draft. +.IP - +The +.B input() +routine is not redefinable, though it may be called to read characters +following whatever has been matched by a rule. If +.B input() +encounters an end-of-file the normal +.B yywrap() +processing is done. A ``real'' end-of-file is returned by +.B input() +as +.I EOF. +.IP +Input is instead controlled by redefining the +.B YY_INPUT +macro. +.IP +The +.I flex +restriction that +.B input() +cannot be redefined is in accordance with the POSIX draft, but +.B YY_INPUT +has not yet been accepted into the draft (and probably won't; it looks +like the draft will simply not specify any way of controlling the +scanner's input other than by making an initial assignment to +.I yyin). +.IP - +.I flex +scanners do not use stdio for input. Because of this, when writing an +interactive scanner one must explicitly call fflush() on the +stream associated with the terminal after writing out a prompt. +With +.I lex +such writes are automatically flushed since +.I lex +scanners use +.B getchar() +for their input. Also, when writing interactive scanners with +.I flex, +the +.B -I +flag must be used. +.IP - +.I flex +scanners are not as reentrant as +.I lex +scanners. In particular, if you have an interactive scanner and +an interrupt handler which long-jumps out of the scanner, and +the scanner is subsequently called again, you may get the following +message: +.nf + + fatal flex scanner internal error--end of buffer missed + +.fi +To reenter the scanner, first use +.nf + + yyrestart( yyin ); + +.fi +.IP - +.B output() +is not supported. +Output from the +.B ECHO +macro is done to the file-pointer +.I yyout +(default +.I stdout). +.IP +The POSIX draft mentions that an +.B output() +routine exists but currently gives no details as to what it does. +.IP - .I lex does not support exclusive start conditions (%x), though they are in the current POSIX draft. @@ -2084,49 +2180,6 @@ one must use "[^\\]]". The latter works with .I lex, too. .IP - -The undocumented -.I lex -scanner internal variable -.B yylineno -is not supported. (The variable is not part of the POSIX draft.) -.IP - -The -.B input() -routine is not redefinable, though it may be called to read characters -following whatever has been matched by a rule. If -.B input() -encounters an end-of-file the normal -.B yywrap() -processing is done. A ``real'' end-of-file is returned by -.B input() -as -.I EOF. -.IP -Input is instead controlled by redefining the -.B YY_INPUT -macro. -.IP -The -.I flex -restriction that -.B input() -cannot be redefined is in accordance with the POSIX draft, but -.B YY_INPUT -has not yet been accepted into the draft. -.IP - -.B output() -is not supported. -Output from the -.B ECHO -macro is done to the file-pointer -.I yyout -(default -.I stdout). -.IP -The POSIX draft mentions that an -.B output() -routine exists but currently gives no details as to what it does. -.IP - The .I lex .B %r @@ -2138,7 +2191,7 @@ If you are providing your own yywrap() routine, you must include a the "#undef" will have to be enclosed in %{}'s. .IP The POSIX draft -specifies that yywrap() is a function and this is unlikely to change; so +specifies that yywrap() is a function and this is very unlikely to change; so .I flex users are warned that .B yywrap() @@ -2313,6 +2366,17 @@ Your scanner specification includes recognizing 8-bit characters and you did not specify the -8 flag (and your site has not installed flex with -8 as the default). .LP +.I +fatal flex scanner internal error--end of buffer missed - +This can occur in an scanner which is reentered after a long-jump +has jumped out (or over) the scanner's activation frame. Before +reentering the scanner, use: +.nf + + yyrestart( yyin ); + +.fi +.LP .I too many %t classes! - You managed to put every single character into its own %t class. .I flex @@ -2334,7 +2398,7 @@ Jacobson. The implementation was done by Kevin Gong and Vern Paxson. Thanks to the many .I flex beta-testers, feedbackers, and contributors, especially Casey -Leedom, benson@odi.com, +Leedom, benson@odi.com, Keith Bostic, Frederic Brehm, Nick Christopher, Jason Coughlin, Scott David Daniels, Leo Eskin, Chris Faylor, Eric Goldman, Eric -- cgit v1.2.3 From e5108c09de581933bb85ed2b1f00334a2ae5e1e8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 28 Jun 1990 00:43:00 +0000 Subject: *** empty log message *** --- NEWS | 53 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/NEWS b/NEWS index fcb3024..4c05084 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Changes between 2.3 (full) release of 26May90 and 2.2 (alpha) release: +Changes between 2.3 (full) release of 28Jun90 and 2.2 (alpha) release: User-visible: @@ -22,23 +22,45 @@ Changes between 2.3 (full) release of 26May90 and 2.2 (alpha) release: now work, but have been deprecated (in anticipation of making flex recognize #line directives). - - It is now documented that flex interprets "^foo|bar" differently - from lex. flex interprets it as "match either a 'foo' or a 'bar', - providing it comes at the beginning of a line", whereas lex - interprets it as "match either a 'foo' at the beginning of a line, - or a 'bar' anywhere". - - - It is now documented that flex initializes the global "yyin" on - the first call to the scanner, while lex initializes it at - compile-time. - - - It is now documented that yy_switch_to_buffer() can be used - in the yywrap() macro/routine. - - The funky restrictions on when semi-colons could follow the YY_NEW_FILE and yyless macros have been removed. They now behave identically to functions. + - A bug in the sample redefinition of YY_INPUT in the documentation + has been corrected. + + - A bug in the sample simple tokener in the documentation has + been corrected. + + - The documentation on the incompatibilities between flex and + lex has been reordered so that the discussion of yylineno + and input() come first, as it's anticipated that these will + be the most common source of headaches. + + + Things which didn't used to be documented but now are: + + - flex interprets "^foo|bar" differently from lex. flex interprets + it as "match either a 'foo' or a 'bar', providing it comes at the + beginning of a line", whereas lex interprets it as "match either + a 'foo' at the beginning of a line, or a 'bar' anywhere". + + - flex initializes the global "yyin" on the first call to the + scanner, while lex initializes it at compile-time. + + - yy_switch_to_buffer() can be used in the yywrap() macro/routine. + + - flex scanners do not use stdio for their input, and hence when + writing an interactive scanner one must explictly call fflush() + after writing out a prompt. + + - flex scanner can be made reentrant (after a fashion) by using + "yyrestart( yyin );". This is useful for interactive scanners + which have interrupt handlers that long-jump out of the scanner. + + - a defense of why yylineno is not supported is included, along + with a suggestion on how to convert scanners which rely on it. + Other changes: @@ -56,6 +78,9 @@ Changes between 2.3 (full) release of 26May90 and 2.2 (alpha) release: - Support for SCO Unix added. + - Flex now sports the latest & greatest UC copyright notice + (which is only slightly different from the previous one). + - A note has been added to flexdoc.1 mentioning work in progress on modifying flex to generate straight C code rather than a table-driven automaton, with an email address of whom to contact -- cgit v1.2.3 From 746717cc9594402395206ead67328748fad7bb25 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 28 Jul 1990 16:53:13 +0000 Subject: Changed to get malloc definition in identical fashion to that used by flex.skel --- flexdef.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/flexdef.h b/flexdef.h index 6d3fee4..f30b538 100644 --- a/flexdef.h +++ b/flexdef.h @@ -90,7 +90,16 @@ char *memset(); #define SHORT_FILE_NAMES #endif +#ifdef __STDC__ +#ifndef DONT_HAVE_STDLIB_H +#include +#else +void *malloc( unsigned ); +void free( void* ); +#endif +#else char *malloc(), *realloc(); +#endif /* maximum line length we'll have to deal with */ -- cgit v1.2.3 From 6bbc2d6f55e4efb4782d062c39bd6de89c60e900 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Aug 1990 00:30:43 +0000 Subject: fixed to declare malloc() and free() by hand if __GNUC__ --- flex.skl | 6 +++--- flexdef.h | 19 ++++++++++++++----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/flex.skl b/flex.skl index 4c24006..dcffffe 100644 --- a/flex.skl +++ b/flex.skl @@ -10,11 +10,11 @@ #ifdef __STDC__ -#ifndef DONT_HAVE_STDLIB_H -#include -#else +#ifdef __GNUC__ void *malloc( unsigned ); void free( void* ); +#else +#include #endif #define YY_USE_PROTOS diff --git a/flexdef.h b/flexdef.h index f30b538..35fb16f 100644 --- a/flexdef.h +++ b/flexdef.h @@ -91,13 +91,15 @@ char *memset(); #endif #ifdef __STDC__ -#ifndef DONT_HAVE_STDLIB_H -#include -#else + +#ifdef __GNUC__ void *malloc( unsigned ); void free( void* ); -#endif #else +#include +#endif + +#else /* ! __STDC__ */ char *malloc(), *realloc(); #endif @@ -524,7 +526,13 @@ extern int num_xlations; * scxclu - true if start condition is exclusive * sceof - true if start condition has EOF rule * scname - start condition name - * actvsc - stack of active start conditions for the current rule + * actvsc - stack of active start conditions for the current rule; + * a negative entry means that the start condition is *not* + * active for the current rule. Start conditions may appear + * multiple times on the stack; the entry for it closest + * to the top of the stack (i.e., actvsc[actvp]) is the + * one to use. Others are present from "{" scoping + * constructs. */ extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; @@ -830,6 +838,7 @@ extern void cclinstal PROTO ((Char [], int)); extern int ccllookup PROTO((Char [])); extern void ndinstal PROTO((char[], Char[])); /* install a name definition */ +extern void scextend PROTO(()); /* increase maximum number of SC's */ extern void scinstal PROTO((char[], int)); /* make a start condition */ /* lookup the number associated with a start condition */ -- cgit v1.2.3 From 0e6a63a56150a2fb26aa2657aa9761e06acdc0c9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Aug 1990 01:01:07 +0000 Subject: Another try at getting the malloc() definitions correct; this time for g++, too --- flex.skl | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/flex.skl b/flex.skl index dcffffe..d387f5f 100644 --- a/flex.skl +++ b/flex.skl @@ -8,19 +8,6 @@ #include -#ifdef __STDC__ - -#ifdef __GNUC__ -void *malloc( unsigned ); -void free( void* ); -#else -#include -#endif - -#define YY_USE_PROTOS -#define YY_USE_CONST -#endif - /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ #ifdef c_plusplus @@ -32,10 +19,7 @@ void free( void* ); #ifdef __cplusplus -#ifndef __STDC__ #include -#endif - #include /* use prototypes in function declarations */ @@ -44,7 +28,22 @@ void free( void* ); /* the "const" storage-class-modifier is valid */ #define YY_USE_CONST -#endif +#else /* ! __cplusplus */ + +#ifdef __STDC__ + +#ifdef __GNUC__ +void *malloc( unsigned ); +void free( void* ); +#else +#include +#endif /* __GNUC__ */ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ #ifdef __TURBOC__ @@ -61,7 +60,10 @@ void free( void* ); #define YY_PROTO(proto) proto #else #define YY_PROTO(proto) () -/* there's no standard place to get these definitions */ +/* we can't get here if it's an ANSI C compiler, or a C++ compiler, + * so it's got to be a K&R compiler, and therefore there's no standard + * place from which to include these definitions + */ char *malloc(); int free(); int read(); -- cgit v1.2.3 From 8a62b0fa9391cb44960c403710cc3a5fbc40871b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Aug 1990 01:03:42 +0000 Subject: 2.3 patch #2 --- NEWS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/NEWS b/NEWS index 4c05084..2ace147 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,16 @@ +Changes between 2.3 Patch #2 (02Aug90) and original 2.3 release: + + - Fixed (hopefully) headaches involving declaring malloc() + and free() for gcc, which defines __STDC__ but (often) doesn't + come with the standard include files such as . + Reordered #ifdef maze in the scanner skeleton in the hope of + getting the declarations right for cfront and g++, too. + + - Note that this patch supercedes patch #1 for release 2.3, + which was never announced but was available briefly for + anonymous ftp. + + Changes between 2.3 (full) release of 28Jun90 and 2.2 (alpha) release: User-visible: -- cgit v1.2.3 From 89b5d52b8ab7357014ef87714638a4cb954f2661 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Aug 1990 14:09:36 +0000 Subject: changed to include for __GNUC__ --- flex.skl | 3 ++- flexdef.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/flex.skl b/flex.skl index d387f5f..385f6a6 100644 --- a/flex.skl +++ b/flex.skl @@ -33,7 +33,8 @@ #ifdef __STDC__ #ifdef __GNUC__ -void *malloc( unsigned ); +#include +void *malloc( size_t ); void free( void* ); #else #include diff --git a/flexdef.h b/flexdef.h index 35fb16f..ca6cb2c 100644 --- a/flexdef.h +++ b/flexdef.h @@ -93,7 +93,8 @@ char *memset(); #ifdef __STDC__ #ifdef __GNUC__ -void *malloc( unsigned ); +#include +void *malloc( size_t ); void free( void* ); #else #include -- cgit v1.2.3 From a68338983196b83c29f4779b7493a98e00499575 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Aug 1990 14:12:11 +0000 Subject: Patch #3 --- NEWS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS b/NEWS index 2ace147..c199d6c 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +Changes between 2.3 Patch #3 (03Aug90) and original 2.3 release: + + - Correction to patch #2 for gcc compilation; thanks goes to + Paul Eggert for catching this. + + Changes between 2.3 Patch #2 (02Aug90) and original 2.3 release: - Fixed (hopefully) headaches involving declaring malloc() -- cgit v1.2.3 From 43513ce78095f8ef020712f524a673b86ed4eca4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 14 Aug 1990 00:01:35 +0000 Subject: fixed hexadecimal escapes; added is_hex_digit() --- misc.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/misc.c b/misc.c index 2a05d6b..0071470 100644 --- a/misc.c +++ b/misc.c @@ -477,6 +477,38 @@ Char str[]; } +/* is_hex_digit - returns true if a character is a valid hex digit, false + * otherwise + * + * synopsis: + * int true_or_false, is_hex_digit(); + * int ch; + * val = is_hex_digit( ch ); + */ + +int is_hex_digit( ch ) +int ch; + + { + if ( isdigit( ch ) ) + return ( 1 ); + + switch ( clower( ch ) ) + { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + return ( 1 ); + + default: + return ( 0 ); + } + } + + /* line_directive_out - spit out a "# line" statement */ void line_directive_out( output_file_name ) @@ -584,6 +616,9 @@ Char myesc( array ) Char array[]; { + Char c, esc_char; + register int sptr; + switch ( array[1] ) { case 'a': return ( '\a' ); @@ -594,9 +629,6 @@ Char array[]; case 't': return ( '\t' ); case 'v': return ( '\v' ); - case 'x': - /* fall through */ - case '0': case '1': case '2': @@ -607,15 +639,31 @@ Char array[]; case '7': case '8': case '9': + { /* \ */ + sptr = 1; - { /* \ or \x */ - Char c, esc_char; - register int sptr = 1; - - if ( array[1] == 'x' ) + while ( isascii( array[sptr] ) && isdigit( array[sptr] ) ) + /* don't increment inside loop control because if + * isdigit() is a macro it might expand into multiple + * increments ... + */ ++sptr; - while ( isascii( array[sptr] ) && isdigit( array[sptr] ) ) + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = otoi( array + 1 ); + + array[sptr] = c; + + return ( esc_char ); + } + + case 'x': + { /* \x */ + int sptr = 2; + + while ( isascii( array[sptr] ) && is_hex_digit( array[sptr] ) ) /* don't increment inside loop control because if * isdigit() is a macro it will expand it to two * increments ... @@ -625,10 +673,7 @@ Char array[]; c = array[sptr]; array[sptr] = '\0'; - if ( array[1] == 'x' ) - esc_char = htoi( array + 2 ); - else - esc_char = otoi( array + 1 ); + esc_char = htoi( array + 2 ); array[sptr] = c; -- cgit v1.2.3 From 88f9c056708d07f7b4f7f1bd77f27c84dd5d41da Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 14 Aug 1990 00:05:51 +0000 Subject: patch #4 --- NEWS | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index c199d6c..6382413 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,11 @@ -Changes between 2.3 Patch #3 (03Aug90) and original 2.3 release: +Changes between 2.3 Patch #4 (03Aug90) and 2.3 Patch #3: + + - Fixed bug in hexadecimal escapes which allowed only digits, + not letters, in escapes + - Fixed bug in previous "Changes" file! + + +Changes between 2.3 Patch #3 (03Aug90) and 2.3 Patch #2: - Correction to patch #2 for gcc compilation; thanks goes to Paul Eggert for catching this. -- cgit v1.2.3 From a3a4f5e75d63bcb2d700000348271f84b022bfff Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 14 Aug 1990 00:07:55 +0000 Subject: fixed date in patch #4 --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 6382413..411d5be 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Changes between 2.3 Patch #4 (03Aug90) and 2.3 Patch #3: +Changes between 2.3 Patch #4 (14Aug90) and 2.3 Patch #3: - Fixed bug in hexadecimal escapes which allowed only digits, not letters, in escapes -- cgit v1.2.3 From b26d0d079f916c76be734453b58db6b2b8fed5cd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 14 Aug 1990 00:10:24 +0000 Subject: fixed comment in myesc() --- misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc.c b/misc.c index 0071470..18f5727 100644 --- a/misc.c +++ b/misc.c @@ -665,7 +665,7 @@ Char array[]; while ( isascii( array[sptr] ) && is_hex_digit( array[sptr] ) ) /* don't increment inside loop control because if - * isdigit() is a macro it will expand it to two + * isdigit() is a macro it might expand into multiple * increments ... */ ++sptr; -- cgit v1.2.3 From 92cf0c1b6fbbccaac0c341a4813130a0c1a1abc0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Aug 1990 20:22:03 +0000 Subject: Patch #5 --- NEWS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS b/NEWS index 411d5be..04efaf7 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +Changes between 2.3 Patch #5 (16Aug90) and 2.3 Patch #4: + + - An up-to-date version of initscan.c so "make test" will + work after applying the previous patches + + Changes between 2.3 Patch #4 (14Aug90) and 2.3 Patch #3: - Fixed bug in hexadecimal escapes which allowed only digits, -- cgit v1.2.3 From d80b3a8b62340d4a0cf38663fe9ceb1a4d61cac2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 29 Aug 1990 12:10:58 +0000 Subject: Patch #6 for 2.3 --- NEWS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS b/NEWS index 04efaf7..ca7b426 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,11 @@ +Changes between 2.3 Patch #6 (29Aug90) and 2.3 Patch #5: + + - Fixed a serious bug in yymore() which basically made it + completely broken. Thanks goes to Jean Christophe of + the Nethack development team for finding the problem + and passing along the fix. + + Changes between 2.3 Patch #5 (16Aug90) and 2.3 Patch #4: - An up-to-date version of initscan.c so "make test" will -- cgit v1.2.3 From dbd6e88e3f4d288b77da08187fe62848ca1975d7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 29 Aug 1990 12:11:13 +0000 Subject: Fixed yymore() but in not resetting yy_more_len --- gen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gen.c b/gen.c index 4d83656..f51adf9 100644 --- a/gen.c +++ b/gen.c @@ -1198,6 +1198,7 @@ void make_tables() if ( yymore_used ) { + indent_puts( "yy_more_len = 0;" ); indent_puts( "yy_doing_yy_more = yy_more_flag;" ); indent_puts( "if ( yy_doing_yy_more )" ); indent_up(); -- cgit v1.2.3 From 1d483ab9617fb58c1105ad1209f22d87d4a6e58c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 23 Oct 1990 14:51:01 +0000 Subject: fixed missing "rule_type" entry for end-of-buffer action --- gen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gen.c b/gen.c index f51adf9..fe5c562 100644 --- a/gen.c +++ b/gen.c @@ -772,6 +772,7 @@ void gentabs() /* set up accepting structures for the End Of Buffer state */ EOB_accepting_list[0] = 0; EOB_accepting_list[1] = end_of_buffer_action; + rule_type[end_of_buffer_action] = RULE_NORMAL; accsiz[end_of_buffer_state] = 1; dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; -- cgit v1.2.3 From dc29c04874e15f0cfae150bb282532b6430bc5c0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 28 Mar 1991 12:00:45 +0000 Subject: Patch #7 for 2.3 --- NEWS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS b/NEWS index ca7b426..0111a1f 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,11 @@ +Changes between 2.3 Patch #7 (28Mar91) and 2.3 Patch #6: + + - Fixed out-of-bounds array access that caused bad tables + to be produced on machines where the bad reference happened + to yield a 1. This caused problems installing or running + flex on some Suns, in particular. + + Changes between 2.3 Patch #6 (29Aug90) and 2.3 Patch #5: - Fixed a serious bug in yymore() which basically made it -- cgit v1.2.3 From 7fc51ed2881c2ea8cc98dca38723a3e33927e183 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 28 Mar 1991 12:01:38 +0000 Subject: Fixed out-of-bounds access bug; patch #7 for release 2.3 --- gen.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gen.c b/gen.c index fe5c562..83674c4 100644 --- a/gen.c +++ b/gen.c @@ -772,7 +772,6 @@ void gentabs() /* set up accepting structures for the End Of Buffer state */ EOB_accepting_list[0] = 0; EOB_accepting_list[1] = end_of_buffer_action; - rule_type[end_of_buffer_action] = RULE_NORMAL; accsiz[end_of_buffer_state] = 1; dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; @@ -800,7 +799,7 @@ void gentabs() if ( variable_trailing_context_rules && ! (accnum & YY_TRAILING_HEAD_MASK) && - accnum > 0 && + accnum > 0 && accnum <= num_rules && rule_type[accnum] == RULE_VARIABLE ) { /* special hack to flag accepting number as part -- cgit v1.2.3 From 35756b630192aa271f21713a41963f1d783e9618 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 20:57:33 +0000 Subject: -Wall fix --- yylex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yylex.c b/yylex.c index 22bf3d6..a4a3d47 100644 --- a/yylex.c +++ b/yylex.c @@ -181,7 +181,7 @@ int yylex() default: if ( ! isascii( yylval ) || ! isprint( yylval ) ) - fprintf( stderr, "\\%.3o", yylval ); + fprintf( stderr, "\\%.3o", (unsigned int) yylval ); else (void) putc( yylval, stderr ); break; -- cgit v1.2.3 From 79b778d19da3629eae56189c95bd96a1a889cf69 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 20:58:12 +0000 Subject: Fixed a bunch of fencepost errors in increasing tables. --- tblcmp.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tblcmp.c b/tblcmp.c index a63a442..7e4f74a 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -235,7 +235,7 @@ void cmptmps() if ( usemecs ) { - /* create equivalence classes base on data gathered on template + /* create equivalence classes based on data gathered on template * transitions */ @@ -245,7 +245,7 @@ void cmptmps() else nummecs = numecs; - if ( lastdfa + numtemps + 1 >= current_max_dfas ) + while ( lastdfa + numtemps + 1 >= current_max_dfas ) increase_max_dfas(); /* loop through each template */ @@ -308,8 +308,8 @@ void expand_nxt_chk() nxt = reallocate_integer_array( nxt, current_max_xpairs ); chk = reallocate_integer_array( chk, current_max_xpairs ); - bzero( (char *) (chk + old_max), - MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); + zero_out( (char *) (chk + old_max), + MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); } @@ -368,7 +368,7 @@ int *state, numtrans; while ( 1 ) /* loops until a space is found */ { - if ( i + numecs > current_max_xpairs ) + while ( i + numecs >= current_max_xpairs ) expand_nxt_chk(); /* loops until space for end-of-buffer and action number are found */ @@ -389,7 +389,7 @@ int *state, numtrans; else ++i; - if ( i + numecs > current_max_xpairs ) + while ( i + numecs >= current_max_xpairs ) expand_nxt_chk(); } @@ -435,7 +435,8 @@ void inittbl() { register int i; - bzero( (char *) chk, current_max_xpairs * sizeof( int ) / sizeof( char ) ); + zero_out( (char *) chk, + current_max_xpairs * sizeof( int ) / sizeof( char ) ); tblend = 0; firstfree = tblend + 1; @@ -476,7 +477,7 @@ void mkdeftbl() ++tblend; /* room for transition on end-of-buffer character */ - if ( tblend + numecs > current_max_xpairs ) + while ( tblend + numecs >= current_max_xpairs ) expand_nxt_chk(); /* add in default end-of-buffer transition */ @@ -584,7 +585,7 @@ int numchars, statenum, deflink, totaltrans; ; } - if ( baseaddr + maxec - minec >= current_max_xpairs ) + while ( baseaddr + maxec - minec + 1 >= current_max_xpairs ) expand_nxt_chk(); for ( i = minec; i <= maxec; ++i ) @@ -598,7 +599,8 @@ int numchars, statenum, deflink, totaltrans; ++baseaddr ) ; - if ( baseaddr + maxec - minec >= current_max_xpairs ) + while ( baseaddr + maxec - minec + 1 >= + current_max_xpairs ) expand_nxt_chk(); /* reset the loop counter so we'll start all @@ -620,7 +622,7 @@ int numchars, statenum, deflink, totaltrans; tblbase = baseaddr - minec; tbllast = tblbase + maxec; - if ( tbllast >= current_max_xpairs ) + while ( tbllast + 1 >= current_max_xpairs ) expand_nxt_chk(); base[statenum] = tblbase; @@ -860,7 +862,7 @@ int *state, statenum, transnum; * int statenum, sym, nextstate, deflink; * stack1( statenum, sym, nextstate, deflink ); * - * if there's room for another state one the "one-transition" stack, the + * if there's room for another state on the "one-transition" stack, the * state is pushed onto it, to be processed later by mk1tbl. If there's * no room, we process the sucker right now. */ -- cgit v1.2.3 From ae2a06ed8a0148e7a717de4537f3fb3390ddde5e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 20:58:57 +0000 Subject: Added internal "action" array, internal skeleton, zero_out() in lieu of bzero --- misc.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 104 insertions(+), 38 deletions(-) diff --git a/misc.c b/misc.c index 18f5727..d0e8e89 100644 --- a/misc.c +++ b/misc.c @@ -48,24 +48,22 @@ void dataflush PROTO(()); int otoi PROTO((Char [])); -/* action_out - write the actions from the temporary file to lex.yy.c - * - * synopsis - * action_out(); - * - * Copies the action file up to %% (or end-of-file) to lex.yy.c - */ +void add_action( new_text ) +char *new_text; + { + int len = strlen( new_text ); -void action_out() + while ( len + action_index + action_offset >= action_size - 10 /* slop */ ) + { + action_size *= 2; + prolog = action_array = + reallocate_character_array( action_array, action_size ); + action = &action_array[action_offset]; + } - { - char buf[MAXLINE]; + strcpy( &action[action_index], new_text ); - while ( fgets( buf, MAXLINE, temp_action_file ) != NULL ) - if ( buf[0] == '%' && buf[1] == '%' ) - break; - else - fputs( buf, stdout ); + action_index += len; } @@ -235,10 +233,7 @@ register Char *str; for ( c = str; *c; ++c ) ; - copy = (Char *) malloc( (unsigned) ((c - str + 1) * sizeof( Char )) ); - - if ( copy == NULL ) - flexfatal( "dynamic memory failure in copy_unsigned_string()" ); + copy = allocate_Character_array( c - str + 1 ); for ( c = copy; (*c++ = *str++); ) ; @@ -373,7 +368,7 @@ char msg[]; { fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg ); - flexend( 1 ); + exit( 1 ); } @@ -469,7 +464,7 @@ int htoi( str ) Char str[]; { - int result; + unsigned int result; (void) sscanf( (char *) str, "%x", &result ); @@ -511,12 +506,37 @@ int ch; /* line_directive_out - spit out a "# line" statement */ -void line_directive_out( output_file_name ) -FILE *output_file_name; +void line_directive_out( output_file ) +FILE *output_file; { if ( infilename && gen_line_dirs ) - fprintf( output_file_name, "# line %d \"%s\"\n", linenum, infilename ); + { + char directive[MAXLINE]; + sprintf( directive, "# line %d \"%s\"\n", linenum, infilename ); + + /* if output_file is nil then we should put the directive in + * the accumulated actions. + */ + if ( output_file ) + fputs( directive, output_file ); + else + add_action( directive ); + } + } + + +/* mark_prolog - mark the current position in the action array as + * representing the action prolog + */ +void mark_prolog() + { + prolog = action_array; + action_array[action_index++] = '\0'; + action_offset = action_index; + action = &action_array[action_offset]; + action_index = 0; + action[action_index] = '\0'; } @@ -621,7 +641,9 @@ Char array[]; switch ( array[1] ) { +#ifdef __STDC__ case 'a': return ( '\a' ); +#endif case 'b': return ( '\b' ); case 'f': return ( '\f' ); case 'n': return ( '\n' ); @@ -663,7 +685,8 @@ Char array[]; { /* \x */ int sptr = 2; - while ( isascii( array[sptr] ) && is_hex_digit( array[sptr] ) ) + while ( isascii( array[sptr] ) && + is_hex_digit( (char) array[sptr] ) ) /* don't increment inside loop control because if * isdigit() is a macro it might expand into multiple * increments ... @@ -698,7 +721,7 @@ int otoi( str ) Char str[]; { - int result; + unsigned int result; (void) sscanf( (char *) str, "%o", &result ); @@ -726,14 +749,18 @@ register int c; { switch ( c ) { - case '\n': return ( "\\n" ); - case '\t': return ( "\\t" ); +#ifdef __STDC__ + case '\a': return ( "\\a" ); +#endif + case '\b': return ( "\\b" ); case '\f': return ( "\\f" ); + case '\n': return ( "\\n" ); case '\r': return ( "\\r" ); - case '\b': return ( "\\b" ); + case '\t': return ( "\\t" ); + case '\v': return ( "\\v" ); default: - (void) sprintf( rform, "\\%.3o", c ); + (void) sprintf( rform, "\\%.3o", (unsigned int) c ); return ( rform ); } } @@ -780,19 +807,33 @@ int size, element_size; * skelout(); * * DESCRIPTION - * Copies from skelfile to stdout until a line beginning with "%%" or - * EOF is found. + * Copies skelfile or skel array to stdout until a line beginning with + * "%%" or EOF is found. */ void skelout() { - char buf[MAXLINE]; + if ( skelfile ) + { + char buf[MAXLINE]; - while ( fgets( buf, MAXLINE, skelfile ) != NULL ) - if ( buf[0] == '%' && buf[1] == '%' ) - break; - else - fputs( buf, stdout ); + while ( fgets( buf, MAXLINE, skelfile ) != NULL ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + fputs( buf, stdout ); + } + + else + { /* copy from skel array */ + char *buf; + + while ( (buf = skel[skel_ind++]) ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + printf( "%s\n", buf ); + } } @@ -824,3 +865,28 @@ int element_v, element_n; datapos = 0; } } + + +/* zero_out - set a region of memory to 0 + * + * synopsis + * char *region_ptr; + * int size_in_bytes; + * zero_out( region_ptr, size_in_bytes ); + * + * sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero. + */ + +void zero_out( region_ptr, size_in_bytes ) +char *region_ptr; +int size_in_bytes; + + { + register char *rp, *rp_end; + + rp = region_ptr; + rp_end = region_ptr + size_in_bytes; + + while ( rp < rp_end ) + *rp++ = 0; + } -- cgit v1.2.3 From 2fe9efe0b6ab4169bd89e70979a39918f90ecec0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 20:59:39 +0000 Subject: Added checking for whether rules are useful modified to work with internal "action" array --- nfa.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/nfa.c b/nfa.c index a2d04c3..6e50109 100644 --- a/nfa.c +++ b/nfa.c @@ -215,6 +215,8 @@ void finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) int mach, variable_trail_rule, headcnt, trailcnt; { + char action_text[MAXLINE]; + add_accept( mach, num_rules ); /* we did this in new_rule(), but it often gets the wrong @@ -228,13 +230,14 @@ int mach, variable_trail_rule, headcnt, trailcnt; if ( continued_action ) --rule_linenum[num_rules]; - fprintf( temp_action_file, "case %d:\n", num_rules ); + sprintf( action_text, "case %d:\n", num_rules ); + add_action( action_text ); if ( variable_trail_rule ) { rule_type[num_rules] = RULE_VARIABLE; - if ( performance_report ) + if ( performance_report > 0 ) fprintf( stderr, "Variable trailing context rule at line %d\n", rule_linenum[num_rules] ); @@ -251,23 +254,27 @@ int mach, variable_trail_rule, headcnt, trailcnt; char *scanner_cp = "yy_c_buf_p = yy_cp"; char *scanner_bp = "yy_bp"; - fprintf( temp_action_file, + add_action( "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); if ( headcnt > 0 ) - fprintf( temp_action_file, "%s = %s + %d;\n", + { + sprintf( action_text, "%s = %s + %d;\n", scanner_cp, scanner_bp, headcnt ); + add_action( action_text ); + } else - fprintf( temp_action_file, - "%s -= %d;\n", scanner_cp, trailcnt ); + { + sprintf( action_text, "%s -= %d;\n", scanner_cp, trailcnt ); + add_action( action_text ); + } - fprintf( temp_action_file, - "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); + add_action( "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); } } - line_directive_out( temp_action_file ); + line_directive_out( (FILE *) 0 ); } @@ -708,10 +715,13 @@ void new_rule() rule_type = reallocate_integer_array( rule_type, current_max_rules ); rule_linenum = reallocate_integer_array( rule_linenum, current_max_rules ); + rule_useful = + reallocate_integer_array( rule_useful, current_max_rules ); } if ( num_rules > MAX_RULE ) lerrif( "too many rules (> %d)!", MAX_RULE ); rule_linenum[num_rules] = linenum; + rule_useful[num_rules] = false; } -- cgit v1.2.3 From c39e52d592cc9e2221e4fc175c932afed62dce8f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:00:25 +0000 Subject: Added keeping track of which rules are useful fixed a fencepost error in checking for scanners that require -8 --- dfa.c | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/dfa.c b/dfa.c index b312ce4..884f7e3 100644 --- a/dfa.c +++ b/dfa.c @@ -128,9 +128,8 @@ register int nacc; for ( j = 1; j <= nacc; ++j ) if ( accset[j] & YY_TRAILING_HEAD_MASK ) { - fprintf( stderr, - "%s: Dangerous trailing context in rule at line %d\n", - program_name, rule_linenum[ar] ); + line_warning( "dangerous trailing context", + rule_linenum[ar] ); return; } } @@ -140,7 +139,7 @@ register int nacc; /* dump_associated_rules - list the rules associated with a DFA state * - * synopisis + * synopsis * int ds; * FILE *file; * dump_associated_rules( file, ds ); @@ -163,7 +162,7 @@ int ds; for ( i = 1; i <= size; ++i ) { - register rule_num = rule_linenum[assoc_rule[dset[i]]]; + register int rule_num = rule_linenum[assoc_rule[dset[i]]]; for ( j = 1; j <= num_associated_rules; ++j ) if ( rule_num == rule_set[j] ) @@ -194,7 +193,7 @@ int ds; /* dump_transitions - list the transitions associated with a DFA state * - * synopisis + * synopsis * int state[numecs]; * FILE *file; * dump_transitions( file, state ); @@ -464,12 +463,12 @@ void ntod() inittbl(); - /* check to see whether we should build a separate table for transitions + /* Check to see whether we should build a separate table for transitions * on NUL characters. We don't do this for full-speed (-F) scanners, * since for them we don't have a simple state number lying around with * which to index the table. We also don't bother doing it for scanners * unless (1) NUL is in its own equivalence class (indicated by a - * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is + * positive value of ecgroup[NUL]), (2) NUL's equivalence class is * the last equivalence class, and (3) the number of equivalence classes * is the same as the number of characters. This latter case comes about * when useecs is false or when its true but every character still @@ -566,7 +565,7 @@ void ntod() numstates = 1; /* for each start condition, make one state for the case when - * we're at the beginning of the line (the '%' operator) and + * we're at the beginning of the line (the '^' operator) and * one for the case when we're not */ if ( i % 2 == 1 ) @@ -822,10 +821,7 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; newds = lastdfa; - dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) ); - - if ( ! dss[newds] ) - flexfatal( "dynamic memory failure in snstods()" ); + dss[newds] = allocate_integer_array( numstates + 1 ); /* if we haven't already sorted the states in sns, we do so now, so that * future comparisons with it can be made quickly @@ -860,16 +856,18 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; bubble( accset, nacc ); - dfaacc[newds].dfaacc_set = - (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) ); - - if ( ! dfaacc[newds].dfaacc_set ) - flexfatal( "dynamic memory failure in snstods()" ); + dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); /* save the accepting set for later */ for ( i = 1; i <= nacc; ++i ) + { dfaacc[newds].dfaacc_set[i] = accset[i]; + if ( accset[i] <= num_rules ) + /* Who knows, perhaps a REJECT can yield this rule */ + rule_useful[accset[i]] = true; + } + accsiz[newds] = nacc; } @@ -882,6 +880,9 @@ int sns[], numstates, accset[], nacc, hashval, *newds_addr; j = accset[i]; dfaacc[newds].dfaacc_state = j; + + if ( j <= num_rules ) + rule_useful[j] = true; } *newds_addr = newds; @@ -985,8 +986,8 @@ bottom: */ void sympartition( ds, numstates, symlist, duplist ) -int ds[], numstates, duplist[]; -int symlist[]; +int ds[], numstates; +int symlist[], duplist[]; { int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; @@ -1012,9 +1013,9 @@ int symlist[]; if ( tch != SYM_EPSILON ) { - if ( tch < -lastccl || tch > csize ) + if ( tch < -lastccl || tch >= csize ) { - if ( tch > csize && tch <= CSIZE ) + if ( tch >= csize && tch <= CSIZE ) flexerror( "scanner requires -8 flag" ); else -- cgit v1.2.3 From 9ad3b379052793bb590f585e7b075283f38ed90d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:01:26 +0000 Subject: Beginning of %pointer/%array support --- flex.skl | 80 ++++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/flex.skl b/flex.skl index 385f6a6..217ed1a 100644 --- a/flex.skl +++ b/flex.skl @@ -99,7 +99,7 @@ int read(); * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ -#define yyterminate() return ( YY_NULL ) +#define yyterminate() return YY_NULL /* report a fatal error */ @@ -165,11 +165,13 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; %% section 1 definitions go here +#define yytext_ptr yytext + /* done after the current pattern has been matched and before the * corresponding action - sets up yytext */ #define YY_DO_BEFORE_ACTION \ - yytext = yy_bp; \ + yytext_ptr = yy_bp; \ %% code to fiddle yytext and yyleng for yymore() goes here yy_hold_char = *yy_cp; \ *yy_cp = '\0'; \ @@ -190,7 +192,7 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; } \ while ( 0 ) -#define unput(c) yyunput( c, yytext ) +#define unput(c) yyunput( c, yytext_ptr ) struct yy_buffer_state @@ -200,7 +202,7 @@ struct yy_buffer_state YY_CHAR *yy_ch_buf; /* input buffer */ YY_CHAR *yy_buf_pos; /* current position in input buffer */ - /* size of input buffer in bytes, not including room for EOB characters*/ + /* size of input buffer in bytes, not including room for EOB characters */ int yy_buf_size; /* number of characters read into yy_ch_buf, not including EOB characters */ @@ -215,7 +217,7 @@ struct yy_buffer_state #define EOF_DONE 2 }; -static YY_BUFFER_STATE yy_current_buffer; +static YY_BUFFER_STATE yy_current_buffer = 0; /* we provide macros for accessing buffer states in case in the * future we want to put the buffer states in a more general @@ -230,15 +232,6 @@ static YY_CHAR yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ - -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif - -#ifndef YY_USER_INIT -#define YY_USER_INIT -#endif - extern YY_CHAR *yytext; extern int yyleng; extern FILE *yyin, *yyout; @@ -292,7 +285,9 @@ YY_DECL if ( yy_init ) { +#ifdef YY_USER_INIT YY_USER_INIT; +#endif if ( ! yy_start ) yy_start = 1; /* first start state */ @@ -332,7 +327,13 @@ yy_find_action: %% code to find the action number goes here YY_DO_BEFORE_ACTION; - YY_USER_ACTION; + +#ifdef YY_USER_ACTION + if ( yy_act != YY_END_OF_BUFFER ) + { + YY_USER_ACTION; + } +#endif do_action: /* this label is used only to access EOF actions */ @@ -345,7 +346,7 @@ do_action: /* this label is used only to access EOF actions */ case YY_END_OF_BUFFER: { /* amount of text matched not including the EOB char */ - int yy_amount_of_matched_text = yy_cp - yytext - 1; + int yy_amount_of_matched_text = yy_cp - yytext_ptr - 1; /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = yy_hold_char; @@ -361,7 +362,7 @@ do_action: /* this label is used only to access EOF actions */ { yy_state_type yy_next_state; - yy_c_buf_p = yytext + yy_amount_of_matched_text; + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state(); @@ -376,7 +377,7 @@ do_action: /* this label is used only to access EOF actions */ yy_next_state = yy_try_NUL_trans( yy_current_state ); - yy_bp = yytext + YY_MORE_ADJ; + yy_bp = yytext_ptr + YY_MORE_ADJ; if ( yy_next_state ) { @@ -409,7 +410,7 @@ do_action: /* this label is used only to access EOF actions */ * YY_NULL, it'll still work - another YY_NULL * will get returned. */ - yy_c_buf_p = yytext + YY_MORE_ADJ; + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; yy_act = YY_STATE_EOF((yy_start - 1) / 2); goto do_action; @@ -424,12 +425,12 @@ do_action: /* this label is used only to access EOF actions */ break; case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = yytext + yy_amount_of_matched_text; + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state(); yy_cp = yy_c_buf_p; - yy_bp = yytext + YY_MORE_ADJ; + yy_bp = yytext_ptr + YY_MORE_ADJ; goto yy_match; case EOB_ACT_LAST_MATCH: @@ -439,7 +440,7 @@ do_action: /* this label is used only to access EOF actions */ yy_current_state = yy_get_previous_state(); yy_cp = yy_c_buf_p; - yy_bp = yytext + YY_MORE_ADJ; + yy_bp = yytext_ptr + YY_MORE_ADJ; goto yy_find_action; } break; @@ -471,7 +472,7 @@ static int yy_get_next_buffer() { register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; - register YY_CHAR *source = yytext - 1; /* copy prev. char, too */ + register YY_CHAR *source = yytext_ptr - 1; /* copy prev. char, too */ register int number_to_move, i; int ret_val; @@ -482,7 +483,7 @@ static int yy_get_next_buffer() /* try to read more data */ /* first move last chars to start of buffer */ - number_to_move = yy_c_buf_p - yytext; + number_to_move = yy_c_buf_p - yytext_ptr; for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); @@ -510,7 +511,7 @@ static int yy_get_next_buffer() if ( yy_n_chars == 0 ) { - if ( number_to_move == 1 ) + if ( number_to_move - YY_MORE_ADJ == 1 ) { ret_val = EOB_ACT_END_OF_FILE; yy_current_buffer->yy_eof_status = EOF_DONE; @@ -536,9 +537,9 @@ static int yy_get_next_buffer() * yy_get_previous_state() will have with '^' rules active */ - yytext = &yy_current_buffer->yy_ch_buf[1]; + yytext_ptr = &yy_current_buffer->yy_ch_buf[1]; - return ( ret_val ); + return ret_val; } @@ -556,12 +557,12 @@ static yy_state_type yy_get_previous_state() %% code to get the start state into yy_current_state goes here - for ( yy_cp = yytext + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) { %% code to find the next state goes here } - return ( yy_current_state ); + return yy_current_state; } @@ -582,7 +583,7 @@ register yy_state_type yy_current_state; register int yy_is_jam; %% code to find the next state, and perhaps do backtracking, goes here - return ( yy_is_jam ? 0 : yy_current_state ); + return yy_is_jam ? 0 : yy_current_state; } @@ -655,7 +656,7 @@ static int input() else { /* need more input */ - yytext = yy_c_buf_p; + yytext_ptr = yy_c_buf_p; ++yy_c_buf_p; switch ( yy_get_next_buffer() ) @@ -664,22 +665,21 @@ static int input() { if ( yywrap() ) { - yy_c_buf_p = yytext + YY_MORE_ADJ; - return ( EOF ); + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + return EOF; } YY_NEW_FILE; #ifdef __cplusplus - return ( yyinput() ); + return yyinput(); #else - return ( input() ); + return input(); #endif } - break; case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = yytext + YY_MORE_ADJ; + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; break; case EOB_ACT_LAST_MATCH: @@ -695,7 +695,7 @@ static int input() c = *yy_c_buf_p; yy_hold_char = *++yy_c_buf_p; - return ( c ); + return c; } @@ -751,7 +751,7 @@ void yy_load_buffer_state() { yy_n_chars = yy_current_buffer->yy_n_chars; - yytext = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; yyin = yy_current_buffer->yy_input_file; yy_hold_char = *yy_c_buf_p; } @@ -785,7 +785,7 @@ int size; yy_init_buffer( b, file ); - return ( b ); + return b; } -- cgit v1.2.3 From 8785f95a2e84cca862ee108cc09c4aba3eda9e05 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:01:45 +0000 Subject: Remove %t cruft --- ecs.c | 109 +----------------------------------------------------------------- 1 file changed, 1 insertion(+), 108 deletions(-) diff --git a/ecs.c b/ecs.c index 73c07dd..2617c46 100644 --- a/ecs.c +++ b/ecs.c @@ -61,32 +61,7 @@ void ccl2ecl() ich = ccltbl[cclp + ccls]; cclmec = ecgroup[ich]; - if ( xlation && cclmec < 0 ) - { - /* special hack--if we're doing %t tables then it's - * possible that no representative of this character's - * equivalence class is in the ccl. So waiting till - * we see the representative would be disastrous. Instead, - * we add this character's equivalence class anyway, if it's - * not already present. - */ - int j; - - /* this loop makes this whole process n^2; but we don't - * really care about %t performance anyway - */ - for ( j = 0; j < newlen; ++j ) - if ( ccltbl[cclp + j] == -cclmec ) - break; - - if ( j >= newlen ) - { /* no representative yet, add this one in */ - ccltbl[cclp + newlen] = -cclmec; - ++newlen; - } - } - - else if ( cclmec > 0 ) + if ( cclmec > 0 ) { ccltbl[cclp + newlen] = cclmec; ++newlen; @@ -135,88 +110,6 @@ int fwd[], bck[], num; } -/* ecs_from_xlation - associate equivalence class numbers using %t table - * - * synopsis - * numecs = ecs_from_xlation( ecmap ); - * - * Upon return, ecmap will map each character code to its equivalence - * class. The mapping will be positive if the character is the representative - * of its class, negative otherwise. - * - * Returns the number of equivalence classes used. - */ - -int ecs_from_xlation( ecmap ) -int ecmap[]; - - { - int i; - int nul_is_alone = false; - int did_default_xlation_class = false; - - if ( xlation[0] != 0 ) - { - /* if NUL shares its translation with other characters, choose one - * of the other characters as the representative for the equivalence - * class. This allows a cheap test later to see whether we can - * do away with NUL's equivalence class. - */ - for ( i = 1; i < csize; ++i ) - if ( xlation[i] == -xlation[0] ) - { - xlation[i] = xlation[0]; - ecmap[0] = -xlation[0]; - break; - } - - if ( i >= csize ) - /* didn't find a companion character--remember this fact */ - nul_is_alone = true; - } - - for ( i = 1; i < csize; ++i ) - if ( xlation[i] == 0 ) - { - if ( did_default_xlation_class ) - ecmap[i] = -num_xlations; - - else - { - /* make an equivalence class for those characters not - * specified in the %t table - */ - ++num_xlations; - ecmap[i] = num_xlations; - did_default_xlation_class = true; - } - } - - else - ecmap[i] = xlation[i]; - - if ( nul_is_alone ) - /* force NUL's equivalence class to be the last one */ - { - ++num_xlations; - ecmap[0] = num_xlations; - - /* there's actually a bug here: if someone is fanatic enough to - * put every character in its own translation class, then right - * now we just promoted NUL's equivalence class to be csize + 1; - * we can handle NUL's class number being == csize (by instead - * putting it in its own table), but we can't handle some *other* - * character having to be put in its own table, too. So in - * this case we bail out. - */ - if ( num_xlations > csize ) - flexfatal( "too many %t classes!" ); - } - - return num_xlations; - } - - /* mkeccl - update equivalence classes based on character class xtions * * synopsis -- cgit v1.2.3 From bd5222a3fcef5f18023cead4361fea598d428f96 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:03:13 +0000 Subject: numerous bug fixes extra formatting of error/warning messages added support of <*>, partial support for nested start conditions --- parse.y | 232 ++++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 167 insertions(+), 65 deletions(-) diff --git a/parse.y b/parse.y index f74d32c..4eca065 100644 --- a/parse.y +++ b/parse.y @@ -1,4 +1,3 @@ - /* parse.y - parser for flex input */ %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP @@ -39,7 +38,10 @@ static char rcsid[] = int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; +int *active_ss; Char clower(); +void build_eof_action(); +void yyerror(); static int madeany = false; /* whether we've made the '.' character class */ int previous_continued_action; /* whether the previous rule's action was '|' */ @@ -47,7 +49,7 @@ int previous_continued_action; /* whether the previous rule's action was '|' */ %} %% -goal : initlex sect1 sect1end sect2 initforrule +goal : initlex sect1 sect1end sect2 initforrule { /* add default rule */ int def_rule; @@ -56,27 +58,36 @@ goal : initlex sect1 sect1end sect2 initforrule def_rule = mkstate( -pat ); + /* remember the number of the default rule so we + * don't generate "can't match" warnings for it. + */ + default_rule = num_rules; + finish_rule( def_rule, false, 0, 0 ); for ( i = 1; i <= lastsc; ++i ) scset[i] = mkbranch( scset[i], def_rule ); if ( spprdflt ) - fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )", - temp_action_file ); + add_action( + "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); else - fputs( "ECHO", temp_action_file ); + add_action( "ECHO" ); - fputs( ";\n\tYY_BREAK\n", temp_action_file ); + add_action( ";\n\tYY_BREAK\n" ); } ; -initlex : - { - /* initialize for processing rules */ +initlex : + { /* initialize for processing rules */ /* create default DFA start condition */ scinstal( "INITIAL", false ); + + /* initially, the start condition scoping is + * "no start conditions active" + */ + actvp = 0; } ; @@ -87,9 +98,19 @@ sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' ; sect1end : SECTEND + { + /* we now know how many start conditions there + * are, so create the "activity" map indicating + * which conditions are active. + */ + active_ss = allocate_integer_array( lastsc + 1 ); + + for ( i = 1; i <= lastsc; ++i ) + active_ss[i] = 0; + } ; -startconddecl : SCDECL +startconddecl : SCDECL { /* these productions are separate from the s1object * rule because the semantics must be done before @@ -113,11 +134,11 @@ namelist1 : namelist1 WHITESPACE NAME { synerr( "bad start condition list" ); } ; -sect2 : sect2 initforrule flexrule '\n' +sect2 : sect2 initforrule flexrule '\n' | ; -initforrule : +initforrule : { /* initialize for a parse of one rule */ trlcontxt = variable_trail_rule = varlength = false; @@ -128,7 +149,7 @@ initforrule : } ; -flexrule : scon '^' rule +flexrule : scon '^' rule { pat = $3; finish_rule( pat, variable_trail_rule, @@ -142,7 +163,7 @@ flexrule : scon '^' rule { bol_needed = true; - if ( performance_report ) + if ( performance_report > 1 ) pinpoint_message( "'^' operator results in sub-optimal performance" ); } @@ -159,7 +180,7 @@ flexrule : scon '^' rule mkbranch( scset[actvsc[i]], pat ); } - | '^' rule + | '^' rule { pat = $2; finish_rule( pat, variable_trail_rule, @@ -177,13 +198,13 @@ flexrule : scon '^' rule { bol_needed = true; - if ( performance_report ) + if ( performance_report > 1 ) pinpoint_message( "'^' operator results in sub-optimal performance" ); } } - | rule + | rule { pat = $1; finish_rule( pat, variable_trail_rule, @@ -194,10 +215,10 @@ flexrule : scon '^' rule scset[i] = mkbranch( scset[i], pat ); } - | scon EOF_OP + | scon EOF_OP { build_eof_action(); } - | EOF_OP + | EOF_OP { /* this EOF applies to all start conditions * which don't already have EOF actions @@ -209,44 +230,57 @@ flexrule : scon '^' rule actvsc[++actvp] = i; if ( actvp == 0 ) - pinpoint_message( - "warning - all start conditions already have <> rules" ); + warn( + "all start conditions already have <> rules" ); else build_eof_action(); } - | error + | error { synerr( "unrecognized rule" ); } ; -scon : '<' namelist2 '>' - ; +scon : '<' namelist2 '>' -namelist2 : namelist2 ',' NAME - { - if ( (scnum = sclookup( nmstr )) == 0 ) - format_pinpoint_message( - "undeclared start condition %s", nmstr ); + | '<' '*' '>' + { + actvp = 0; - else - actvsc[++actvp] = scnum; + for ( i = 1; i <= lastsc; ++i ) + actvsc[++actvp] = i; } + ; - | NAME +namelist2 : namelist2 ',' sconname + + | { actvp = 0; } sconname + + | error + { synerr( "bad start condition list" ); } + ; + +sconname : NAME { if ( (scnum = sclookup( nmstr )) == 0 ) format_pinpoint_message( "undeclared start condition %s", nmstr ); else - actvsc[actvp = 1] = scnum; - } + { + if ( ++actvp >= current_max_scs ) + /* some bozo has included multiple instances + * of start condition names + */ + pinpoint_message( + "too many start conditions in <> construct!" ); - | error - { synerr( "bad start condition list" ); } + else + actvsc[actvp] = scnum; + } + } ; -rule : re2 re +rule : re2 re { if ( transchar[lastst[$2]] != SYM_EPSILON ) /* provide final transition \now/ so it @@ -269,13 +303,8 @@ rule : re2 re * erroneously. */ if ( ! varlength || headcnt != 0 ) - { - fprintf( stderr, - "%s: warning - trailing context rule at line %d made variable because\n", - program_name, linenum ); - fprintf( stderr, - " of preceding '|' action\n" ); - } + warn( + "trailing context made variable due to preceding '|' action" ); /* mark as variable */ varlength = true; @@ -320,26 +349,33 @@ rule : re2 re * above */ if ( ! varlength || headcnt != 0 ) - { - fprintf( stderr, - "%s: warning - trailing context rule at line %d made variable because\n", - program_name, linenum ); - fprintf( stderr, - " of preceding '|' action\n" ); - } + warn( + "trailing context made variable due to preceding '|' action" ); /* mark as variable */ varlength = true; headcnt = 0; } - trlcontxt = true; + if ( varlength && headcnt == 0 ) + { + /* again, see the comment in the rule for "re2 re" + * above + */ + add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); + variable_trail_rule = true; + } - if ( ! varlength ) - headcnt = rulelen; + else + { + if ( ! varlength ) + headcnt = rulelen; - ++rulelen; - trailcnt = 1; + ++rulelen; + trailcnt = 1; + } + + trlcontxt = true; eps = mkstate( SYM_EPSILON ); $$ = link_machines( $1, @@ -362,7 +398,7 @@ rule : re2 re ; -re : re '|' series +re : re '|' series { varlength = true; $$ = mkor( $1, $3 ); @@ -398,7 +434,7 @@ re2 : re '/' } ; -series : series singleton +series : series singleton { /* this is where concatenation of adjacent patterns * gets done @@ -410,7 +446,7 @@ series : series singleton { $$ = $1; } ; -singleton : singleton '*' +singleton : singleton '*' { varlength = true; @@ -443,7 +479,15 @@ singleton : singleton '*' else { if ( $3 == 0 ) - $$ = mkopt( mkrep( $1, $3, $5 ) ); + { + if ( $5 <= 0 ) + { + synerr( "bad iteration values" ); + $$ = $1; + } + else + $$ = mkopt( mkrep( $1, 1, $5 ) ); + } else $$ = mkrep( $1, $3, $5 ); } @@ -552,7 +596,7 @@ fullccl : '[' ccl ']' /* *Sigh* - to be compatible Unix lex, negated ccls * match newlines */ -#ifdef NOTDEF +#if 0 ccladd( $3, '\n' ); /* negated ccls don't match '\n' */ cclsorted = false; /* because we added the newline */ #endif @@ -561,7 +605,7 @@ fullccl : '[' ccl ']' } ; -ccl : ccl CHAR '-' CHAR +ccl : ccl CHAR '-' CHAR { if ( $2 > $4 ) synerr( "negative range in character class" ); @@ -635,6 +679,7 @@ void build_eof_action() { register int i; + char action_text[MAXLINE]; for ( i = 1; i <= actvp; ++i ) { @@ -646,12 +691,34 @@ void build_eof_action() else { sceof[actvsc[i]] = true; - fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n", + sprintf( action_text, "case YY_STATE_EOF(%s):\n", scname[actvsc[i]] ); + add_action( action_text ); } } - line_directive_out( temp_action_file ); + line_directive_out( (FILE *) 0 ); + + /* this isn't a normal rule after all - don't count it as + * such, so we don't have any holes in the rule numbering + * (which make generating "rule can never match" warnings + * more difficult + */ + --num_rules; + ++num_eof_rules; + } + + +/* format_synerr - write out formatted syntax error */ + +void format_synerr( msg, arg ) +char msg[], arg[]; + + { + char errmsg[MAXLINE]; + + (void) sprintf( errmsg, msg, arg ); + synerr( errmsg ); } @@ -666,6 +733,14 @@ char str[]; } +/* warn - report a warning, unless -w was given */ + +void warn( str ) +char str[]; + { + line_warning( str, linenum ); + } + /* format_pinpoint_message - write out a message formatted with one string, * pinpointing its location */ @@ -687,7 +762,34 @@ void pinpoint_message( str ) char str[]; { - fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str ); + line_pinpoint( str, linenum ); + } + + +/* line_warning - report a warning at a given line, unless -w was given */ + +void line_warning( str, line ) +char str[]; +int line; + { + char warning[MAXLINE]; + + if ( ! nowarn ) + { + sprintf( warning, "warning, %s", str ); + line_pinpoint( warning, line ); + } + } + + +/* line_pinpoint - write out a message, pinpointing it at the given line */ + +void line_pinpoint( str, line ) +char str[]; +int line; + + { + fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str ); } -- cgit v1.2.3 From e849142f823d50947a6f85fb26fc4519b074c6c5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:03:57 +0000 Subject: Fixed bug in 8-bit hashing --- sym.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/sym.c b/sym.c index bdca154..3ab3890 100644 --- a/sym.c +++ b/sym.c @@ -198,7 +198,7 @@ int hash_size; locstr = 0; while ( str[locstr] ) - hashval = ((hashval << 1) + str[locstr++]) % hash_size; + hashval = ((hashval << 1) + (unsigned char) str[locstr++]) % hash_size; return ( hashval ); } @@ -242,6 +242,28 @@ char nd[]; } +/* scextend - increase the maximum number of start conditions + * + * synopsis + * scextend(); + */ + +void scextend() + + { + current_max_scs += MAX_SCS_INCREMENT; + + ++num_reallocs; + + scset = reallocate_integer_array( scset, current_max_scs ); + scbol = reallocate_integer_array( scbol, current_max_scs ); + scxclu = reallocate_integer_array( scxclu, current_max_scs ); + sceof = reallocate_integer_array( sceof, current_max_scs ); + scname = reallocate_char_ptr_array( scname, current_max_scs ); + actvsc = reallocate_integer_array( actvsc, current_max_scs ); + } + + /* scinstal - make a start condition * * synopsis @@ -273,18 +295,7 @@ int xcluflg; printf( "#define %s %d\n", str, lastsc ); if ( ++lastsc >= current_max_scs ) - { - current_max_scs += MAX_SCS_INCREMENT; - - ++num_reallocs; - - scset = reallocate_integer_array( scset, current_max_scs ); - scbol = reallocate_integer_array( scbol, current_max_scs ); - scxclu = reallocate_integer_array( scxclu, current_max_scs ); - sceof = reallocate_integer_array( sceof, current_max_scs ); - scname = reallocate_char_ptr_array( scname, current_max_scs ); - actvsc = reallocate_integer_array( actvsc, current_max_scs ); - } + scextend(); scname[lastsc] = copy_string( str ); -- cgit v1.2.3 From f92b812885c94d889664505b6091d486c0bfe872 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:04:29 +0000 Subject: Bug/lint fixes Modified to work with "action" array instead of temp file --- gen.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/gen.c b/gen.c index 83674c4..13a8e70 100644 --- a/gen.c +++ b/gen.c @@ -457,7 +457,7 @@ char *char_map; printf( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); indent_up(); - indent_puts( "yy_c = yy_meta[yy_c];" ); + indent_puts( "yy_c = yy_meta[(unsigned int) yy_c];" ); indent_down(); } @@ -676,11 +676,7 @@ void gen_NUL_trans() do_indent(); - if ( interactive ) - printf( "yy_is_jam = (yy_base[yy_current_state] == %d);\n", - jambase ); - else - printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); + printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); } /* if we've entered an accepting state, backtrack; note that @@ -902,7 +898,7 @@ void gentabs() total_states = lastdfa + numtemps; - printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + printf( total_states > MAX_SHORT ? C_long_decl : C_short_decl, "yy_base", total_states + 1 ); for ( i = 1; i <= lastdfa; ++i ) @@ -1020,8 +1016,8 @@ void make_tables() if ( yymore_used ) { - indent_puts( "yytext -= yy_more_len; \\" ); - indent_puts( "yyleng = yy_cp - yytext; \\" ); + indent_puts( "yytext_ptr -= yy_more_len; \\" ); + indent_puts( "yyleng = yy_cp - yytext_ptr; \\" ); } else @@ -1125,9 +1121,10 @@ void make_tables() puts( "static int yy_looking_for_trail_begin = 0;" ); puts( "static int yy_full_lp;" ); puts( "static int *yy_full_state;" ); - printf( "#define YY_TRAILING_MASK 0x%x\n", YY_TRAILING_MASK ); + printf( "#define YY_TRAILING_MASK 0x%x\n", + (unsigned int) YY_TRAILING_MASK ); printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", - YY_TRAILING_HEAD_MASK ); + (unsigned int) YY_TRAILING_HEAD_MASK ); } puts( "#define REJECT \\" ); @@ -1165,7 +1162,7 @@ void make_tables() indent_puts( "static int yy_doing_yy_more = 0;" ); indent_puts( "static int yy_more_len = 0;" ); indent_puts( - "#define yymore() { yy_more_flag = 1; }" ); + "#define yymore() do { yy_more_flag = 1; } while ( 0 )" ); indent_puts( "#define YY_MORE_ADJ (yy_doing_yy_more ? yy_more_len : 0)" ); } @@ -1178,19 +1175,8 @@ void make_tables() skelout(); - if ( ferror( temp_action_file ) ) - flexfatal( "error occurred when writing temporary action file" ); - - else if ( fclose( temp_action_file ) ) - flexfatal( "error occurred when closing temporary action file" ); - - temp_action_file = fopen( action_file_name, "r" ); - - if ( temp_action_file == NULL ) - flexfatal( "could not re-open temporary action file" ); - - /* copy prolog from action_file to output file */ - action_out(); + /* copy prolog to output file */ + fputs( prolog, stdout ); skelout(); @@ -1265,11 +1251,11 @@ void make_tables() indent_down(); } - /* copy actions from action_file to output file */ + /* copy actions to output file */ skelout(); indent_up(); gen_bt_action(); - action_out(); + fputs( action, stdout ); /* generate cases for any missing EOF rules */ for ( i = 1; i <= lastsc; ++i ) @@ -1315,7 +1301,7 @@ void make_tables() skelout(); if ( bol_needed ) - indent_puts( "register YY_CHAR *yy_bp = yytext;\n" ); + indent_puts( "register YY_CHAR *yy_bp = yytext_ptr;\n" ); gen_start_state(); -- cgit v1.2.3 From 4c0f0933248d0ea882b82d4514c5649359f22b77 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:04:56 +0000 Subject: reallocate_character_array -> reallocate_Character_array --- ccl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccl.c b/ccl.c index 45714b5..f17fa1b 100644 --- a/ccl.c +++ b/ccl.c @@ -65,7 +65,7 @@ int ch; ++num_reallocs; - ccltbl = reallocate_character_array( ccltbl, current_max_ccl_tbl_size ); + ccltbl = reallocate_Character_array( ccltbl, current_max_ccl_tbl_size ); } ccllen[cclp] = len + 1; -- cgit v1.2.3 From 446040fbf808b478bbd7b6a8122a5c67c82e771d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:05:41 +0000 Subject: A lot of tweaks ... --- main.c | 263 +++++++++++++++++++++++++++++++++++++++++------------------------ scan.l | 257 ++++++++++++++++++++++++++++++--------------------------------- 2 files changed, 288 insertions(+), 232 deletions(-) diff --git a/main.c b/main.c index 7bdd599..38f7244 100644 --- a/main.c +++ b/main.c @@ -39,8 +39,9 @@ static char rcsid[] = #include "flexdef.h" +#include "version.h" -static char flex_version[] = "2.3"; +static char flex_version[] = FLEX_VERSION; /* declare functions that have forward references */ @@ -51,27 +52,31 @@ void set_up_initial_allocations PROTO(()); /* these globals are all defined and commented in flexdef.h */ -int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; -int fullspd, gen_line_dirs, performance_report, backtrack_report, csize; +int fullspd, gen_line_dirs, performance_report, backtrack_report; +int yytext_is_array, csize; int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; int datapos, dataline, linenum; FILE *skelfile = NULL; +int skel_ind = 0; +char *action_array, *prolog, *action; +int action_size, action_offset, action_index; char *infilename = NULL; int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; -int current_mns, num_rules, current_max_rules, lastnfa; +int current_mns, num_rules, num_eof_rules, default_rule; +int current_max_rules, lastnfa; int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; -int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; +int *accptnum, *assoc_rule, *state_type; +int *rule_type, *rule_linenum, *rule_useful; int current_state_type; int variable_trailing_context_rules; int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; int tecbck[CSIZE + 1]; -int *xlation = (int *) 0; -int num_xlations; int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; char **scname; int current_max_dfa_size, current_max_xpairs; @@ -84,14 +89,12 @@ int numsnpairs, jambase, jamstate; int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; int current_max_ccl_tbl_size; Char *ccltbl; -char *starttime, *endtime, nmstr[MAXLINE]; +char *starttime = 0, *endtime, nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; int num_backtracking, bol_needed; -FILE *temp_action_file; FILE *backtrack_file; int end_of_buffer_state; -char *action_file_name = NULL; char **input_files; int num_input_files; char *program_name; @@ -111,6 +114,8 @@ int argc; char **argv; { + int i; + flexinit( argc, argv ); readin(); @@ -128,14 +133,18 @@ char **argv; else if ( reject_really_used == REALLY_NOT_USED ) reject = false; - if ( performance_report ) + if ( performance_report > 0 ) { - if ( interactive ) - fprintf( stderr, + if ( performance_report > 1 ) + { + if ( interactive ) + fprintf( stderr, "-I (interactive) entails a minor performance penalty\n" ); - if ( yymore_used ) - fprintf( stderr, "yymore() entails a minor performance penalty\n" ); + if ( yymore_used ) + fprintf( stderr, + "yymore() entails a minor performance penalty\n" ); + } if ( reject ) fprintf( stderr, "REJECT entails a large performance penalty\n" ); @@ -162,14 +171,21 @@ char **argv; ntod(); + for ( i = 1; i <= num_rules; ++i ) + if ( ! rule_useful[i] && i != default_rule ) + line_warning( "rule cannot be matched", rule_linenum[i] ); + + if ( spprdflt && ! reject && rule_useful[default_rule] ) + line_warning( "-s option given but default rule can be matched", + rule_linenum[default_rule] ); + /* generate the C state transition tables from the DFA */ make_tables(); /* note, flexend does not return. It exits with its argument as status. */ - flexend( 0 ); - /*NOTREACHED*/ + return 0; /* keep compilers/lint happy */ } @@ -195,24 +211,12 @@ int status; if ( skelfile != NULL ) { if ( ferror( skelfile ) ) - flexfatal( "error occurred when writing skeleton file" ); + flexfatal( "error occurred when reading skeleton file" ); else if ( fclose( skelfile ) ) flexfatal( "error occurred when closing skeleton file" ); } - if ( temp_action_file ) - { - if ( ferror( temp_action_file ) ) - flexfatal( "error occurred when writing temporary action file" ); - - else if ( fclose( temp_action_file ) ) - flexfatal( "error occurred when closing temporary action file" ); - - else if ( unlink( action_file_name ) ) - flexfatal( "error occurred when deleting temporary action file" ); - } - if ( status != 0 && outfile_created ) { if ( ferror( stdout ) ) @@ -245,12 +249,15 @@ int status; if ( printstats ) { - endtime = flex_gettime(); - fprintf( stderr, "%s version %s usage statistics:\n", program_name, flex_version ); - fprintf( stderr, " started at %s, finished at %s\n", - starttime, endtime ); + + if ( starttime ) + { + endtime = flex_gettime(); + fprintf( stderr, " started at %s, finished at %s\n", + starttime, endtime ); + } fprintf( stderr, " scanner options: -" ); @@ -258,23 +265,31 @@ int status; putc( 'b', stderr ); if ( ddebug ) putc( 'd', stderr ); - if ( interactive ) - putc( 'I', stderr ); if ( caseins ) putc( 'i', stderr ); - if ( ! gen_line_dirs ) - putc( 'L', stderr ); - if ( performance_report ) + if ( performance_report > 0 ) + putc( 'p', stderr ); + if ( performance_report > 1 ) putc( 'p', stderr ); if ( spprdflt ) putc( 's', stderr ); if ( use_stdout ) putc( 't', stderr ); - if ( trace ) - putc( 'T', stderr ); if ( printstats ) putc( 'v', stderr ); /* always true! */ - if ( csize == 256 ) + if ( nowarn ) + putc( 'w', stderr ); + if ( ! interactive ) + putc( 'B', stderr ); + if ( interactive ) + putc( 'I', stderr ); + if ( ! gen_line_dirs ) + putc( 'L', stderr ); + if ( trace ) + putc( 'T', stderr ); + if ( csize == 128 ) + putc( '7', stderr ); + else putc( '8', stderr ); fprintf( stderr, " -C" ); @@ -288,7 +303,7 @@ int status; if ( usemecs ) putc( 'm', stderr ); - if ( strcmp( skelname, DEFAULT_SKELETON_FILE ) ) + if ( skelname ) fprintf( stderr, " -S%s", skelname ); putc( '\n', stderr ); @@ -296,8 +311,8 @@ int status; fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, current_max_dfas, totnst ); - fprintf( stderr, - " %d rules\n", num_rules - 1 /* - 1 for def. rule */ ); + fprintf( stderr, " %d rules\n", + num_rules + num_eof_rules - 1 /* - 1 for def. rule */ ); if ( num_backtracking == 0 ) fprintf( stderr, " No backtracking\n" ); @@ -394,18 +409,27 @@ char **argv; { int i, sawcmpflag; + int csize_given, interactive_given; char *arg, *flex_gettime(), *mktemp(); - printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; - backtrack_report = performance_report = ddebug = fulltbl = fullspd = false; - yymore_used = continued_action = reject = false; + printstats = syntaxerror = trace = spprdflt = caseins = false; + backtrack_report = ddebug = fulltbl = fullspd = false; + nowarn = yymore_used = continued_action = reject = yytext_is_array = false; yymore_really_used = reject_really_used = false; gen_line_dirs = usemecs = useecs = true; + performance_report = 0; sawcmpflag = false; use_stdout = false; + csize_given = false; + interactive_given = false; - csize = DEFAULT_CSIZE; + /* Initialize dynamic array for holding the rule actions. */ + action_size = 2048; /* default size of action array in bytes */ + prolog = action = action_array = allocate_character_array( action_size ); + action_offset = action_index = 0; + + starttime = flex_gettime(); program_name = argv[0]; @@ -420,6 +444,11 @@ char **argv; for ( i = 1; arg[i] != '\0'; ++i ) switch ( arg[i] ) { + case 'B': + interactive = false; + interactive_given = true; + break; + case 'b': backtrack_report = true; break; @@ -484,8 +513,13 @@ char **argv; fullspd = true; break; + case 'h': + usage(); + exit( 0 ); + case 'I': interactive = true; + interactive_given = true; break; case 'i': @@ -501,7 +535,7 @@ char **argv; break; case 'p': - performance_report = true; + ++performance_report; break; case 'S': @@ -527,36 +561,61 @@ char **argv; printstats = true; break; + case 'V': + fprintf( stderr, "%s version %s\n", + program_name, flex_version ); + exit( 0 ); + + case 'w': + nowarn = true; + break; + + case '7': + csize = 128; + csize_given = true; + break; + case '8': csize = CSIZE; + csize_given = true; break; default: - lerrif( "unknown flag '%c'", (int) arg[i] ); - break; + fprintf( stderr, "%s: unknown flag '%c'\n", + program_name, (int) arg[i] ); + usage(); + exit( 1 ); } get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ ; } + if ( ! csize_given ) + { + if ( fulltbl || fullspd ) + csize = DEFAULT_CSIZE; + else + csize = CSIZE; + } + + if ( ! interactive_given ) + { + if ( fulltbl || fullspd ) + interactive = false; + else + interactive = true; + } + if ( (fulltbl || fullspd) && usemecs ) flexerror( "full table and -Cm don't make sense together" ); if ( (fulltbl || fullspd) && interactive ) - flexerror( "full table and -I are (currently) incompatible" ); + flexerror( "full table and -I are incompatible" ); if ( fulltbl && fullspd ) flexerror( "full table and -F are mutually exclusive" ); - if ( ! skelname ) - { - static char skeleton_name_storage[400]; - - skelname = skeleton_name_storage; - (void) strcpy( skelname, DEFAULT_SKELETON_FILE ); - } - if ( ! use_stdout ) { FILE *prev_stdout = freopen( outfile, "w", stdout ); @@ -590,34 +649,11 @@ get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ lastccl = 0; lastsc = 0; - /* initialize the statistics */ - starttime = flex_gettime(); - - if ( (skelfile = fopen( skelname, "r" )) == NULL ) + if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) lerrsf( "can't open skeleton file %s", skelname ); -#ifdef SYS_V - action_file_name = tmpnam( NULL ); -#endif - - if ( action_file_name == NULL ) - { - static char temp_action_file_name[32]; - -#ifndef SHORT_FILE_NAMES - (void) strcpy( temp_action_file_name, "/tmp/flexXXXXXX" ); -#else - (void) strcpy( temp_action_file_name, "flexXXXXXX.tmp" ); -#endif - (void) mktemp( temp_action_file_name ); - - action_file_name = temp_action_file_name; - } - - if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL ) - lerrsf( "can't open temporary action file %s", action_file_name ); - - lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0; + lastdfa = lastnfa = 0; + num_rules = num_eof_rules = default_rule = numas = numsnpairs = tmpuses = 0; numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; num_backtracking = onesp = numprots = 0; @@ -675,9 +711,9 @@ void readin() puts( "#define FLEX_DEBUG" ); if ( csize == 256 ) - puts( "#define YY_CHAR unsigned char" ); + puts( "typedef unsigned char YY_CHAR;" ); else - puts( "#define YY_CHAR char" ); + puts( "typedef char YY_CHAR;" ); line_directive_out( stdout ); @@ -687,12 +723,6 @@ void readin() flexend( 1 ); } - if ( xlation ) - { - numecs = ecs_from_xlation( ecgroup ); - useecs = true; - } - else if ( useecs ) numecs = cre8ecs( nextecm, ecgroup, csize ); @@ -728,6 +758,7 @@ void set_up_initial_allocations() current_max_rules = INITIAL_MAX_RULES; rule_type = allocate_integer_array( current_max_rules ); rule_linenum = allocate_integer_array( current_max_rules ); + rule_useful = allocate_integer_array( current_max_rules ); current_max_scs = INITIAL_MAX_SCS; scset = allocate_integer_array( current_max_scs ); @@ -743,7 +774,7 @@ void set_up_initial_allocations() cclng = allocate_integer_array( current_maxccls ); current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; - ccltbl = allocate_character_array( current_max_ccl_tbl_size ); + ccltbl = allocate_Character_array( current_max_ccl_tbl_size ); current_max_dfa_size = INITIAL_MAX_DFA_SIZE; @@ -765,3 +796,43 @@ void set_up_initial_allocations() nultrans = (int *) 0; } + + +void usage() + { + fprintf( stderr, + "%s [-bcdfhinpstvwBFILTV78 -C[efmF] -Sskeleton] [filename ...]\n", + program_name ); + + fprintf( stderr, + "\t-b generate backtracking information to lex.backtrack\n" ); + fprintf( stderr, "\t-c do-nothing POSIX option\n" ); + fprintf( stderr, "\t-d turn on debug mode in generated scanner\n" ); + fprintf( stderr, "\t-f generate fast, large scanner\n" ); + fprintf( stderr, "\t-h produce this help message\n" ); + fprintf( stderr, "\t-i generate case-insensitive scanner\n" ); + fprintf( stderr, "\t-n do-nothing POSIX option\n" ); + fprintf( stderr, "\t-p generate performance report to stderr\n" ); + fprintf( stderr, "\t-s suppress default rule to ECHO unmatched text\n" ); + fprintf( stderr, + "\t-t write generated scanner on stdout instead of lex.yy.c\n" ); + fprintf( stderr, "\t-v write summary of scanner statistics to stderr\n" ); + fprintf( stderr, "\t-w do not generate warnings\n" ); + fprintf( stderr, "\t-B generate batch scanner (opposite of -I)\n" ); + fprintf( stderr, "\t-F use alternative fast scanner representation\n" ); + fprintf( stderr, "\t-I generate interactive scanner (opposite of -B)\n" ); + fprintf( stderr, "\t-L suppress #line directives in scanner\n" ); + fprintf( stderr, "\t-T %s should run in trace mode\n", program_name ); + fprintf( stderr, "\t-V report %s version\n", program_name ); + fprintf( stderr, "\t-7 generate 7-bit scanner\n" ); + fprintf( stderr, "\t-8 generate 8-bit scanner\n" ); + fprintf( stderr, + "\t-C specify degree of table compression (default is -Cem):\n" ); + fprintf( stderr, "\t\t-Ce construct equivalence classes\n" ); + fprintf( stderr, + "\t\t-Cf do not compress scanner tables; use -f representation\n" ); + fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" ); + fprintf( stderr, + "\t\t-CF do not compress scanner tables; use -F representation\n" ); + fprintf( stderr, "\t-S specify non-default skeleton file\n" ); + } diff --git a/scan.l b/scan.l index d02acc2..7083c1e 100644 --- a/scan.l +++ b/scan.l @@ -1,4 +1,3 @@ - /* scan.l - scanner for flex input */ %{ @@ -38,8 +37,8 @@ static char rcsid[] = #include "flexdef.h" #include "parse.h" -#define ACTION_ECHO fprintf( temp_action_file, "%s", yytext ) -#define MARK_END_OF_PROLOG fprintf( temp_action_file, "%%%% end of prolog\n" ); +#define ACTION_ECHO add_action( (char *) yytext ) +#define MARK_END_OF_PROLOG mark_prolog(); #undef YY_DECL #define YY_DECL \ @@ -47,11 +46,11 @@ static char rcsid[] = #define RETURNCHAR \ yylval = yytext[0]; \ - return ( CHAR ); + return CHAR; #define RETURNNAME \ (void) strcpy( nmstr, (char *) yytext ); \ - return ( NAME ); + return NAME; #define PUT_BACK_STRING(str, start) \ for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \ @@ -68,12 +67,14 @@ static char rcsid[] = %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE %x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT -%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 XLATION +%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 WS [ \t\f]+ OPTWS [ \t\f]* NOT_WS [^ \t\f\n] +NL \n|\r\n|\n\r + NAME [a-z_][a-z_0-9-]* NOT_NAME [^a-z_\n]+ @@ -81,71 +82,51 @@ SCNAME {NAME} ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) +FIRST_CCL_CHAR [^\\\n]|{ESCSEQ} +CCL_CHAR [^\\\n\]]|{ESCSEQ} + %% - static int bracelevel, didadef; - int i, indented_code, checking_used, new_xlation; + static int bracelevel, didadef, indented_code, checking_used; + int doing_codeblock = false; + int i; Char nmdef[MAXLINE], myesc(); + ^{WS} indented_code = true; BEGIN(CODEBLOCK); -^#.*\n ++linenum; /* treat as a comment */ ^"/*" ECHO; BEGIN(C_COMMENT); -^"%s"{NAME}? return ( SCDECL ); -^"%x"{NAME}? return ( XSCDECL ); -^"%{".*\n { +^"%s"{NAME}? return SCDECL; +^"%x"{NAME}? return XSCDECL; +^"%{".*{NL} { ++linenum; line_directive_out( stdout ); indented_code = false; BEGIN(CODEBLOCK); } -{WS} return ( WHITESPACE ); +{WS} return WHITESPACE; ^"%%".* { sectnum = 2; line_directive_out( stdout ); BEGIN(SECT2PROLOG); - return ( SECTEND ); + return SECTEND; } +^"%pointer" yytext_is_array = false; +^"%array" yytext_is_array = true; + ^"%used" { - pinpoint_message( "warning - %%used/%%unused have been deprecated" ); + warn( "%used/%unused have been deprecated" ); checking_used = REALLY_USED; BEGIN(USED_LIST); } ^"%unused" { + warn( "%used/%unused have been deprecated" ); checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); - pinpoint_message( "warning - %%used/%%unused have been deprecated" ); - checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); - } - - -^"%"[aeknopt]" ".*\n { -#ifdef NOTDEF - fprintf( stderr, - "old-style lex command at line %d ignored:\n\t%s", - linenum, yytext ); -#endif - ++linenum; } -^"%"[cr]{OPTWS} /* ignore old lex directive */ - -%t{OPTWS}\n { - ++linenum; - xlation = - (int *) malloc( sizeof( int ) * (unsigned) csize ); - - if ( ! xlation ) - flexfatal( - "dynamic memory failure building %t table" ); - - for ( i = 0; i < csize; ++i ) - xlation[i] = 0; - num_xlations = 0; - - BEGIN(XLATION); - } +^"%"[aceknopr]{WS}.*{NL} ++linenum; /* ignore */ ^"%"[^sxanpekotcru{}]{OPTWS} synerr( "unrecognized '%' directive" ); @@ -156,23 +137,22 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) } {SCNAME} RETURNNAME; -^{OPTWS}\n ++linenum; /* allows blank lines in section 1 */ -{OPTWS}\n ++linenum; return ( '\n' ); -. synerr( "illegal character" ); BEGIN(RECOVER); +^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ +{OPTWS}{NL} ++linenum; return '\n'; "*/" ECHO; BEGIN(INITIAL); -"*/".*\n ++linenum; ECHO; BEGIN(INITIAL); +"*/".*{NL} ++linenum; ECHO; BEGIN(INITIAL); [^*\n]+ ECHO; "*" ECHO; -\n ++linenum; ECHO; +{NL} ++linenum; ECHO; -^"%}".*\n ++linenum; BEGIN(INITIAL); +^"%}".*{NL} ++linenum; BEGIN(INITIAL); "reject" ECHO; CHECK_REJECT(yytext); "yymore" ECHO; CHECK_YYMORE(yytext); {NAME}|{NOT_NAME}|. ECHO; -\n { +{NL} { ++linenum; ECHO; if ( indented_code ) @@ -185,9 +165,10 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) {NOT_WS}.* { (void) strcpy( (char *) nmdef, (char *) yytext ); + /* skip trailing whitespace */ for ( i = strlen( (char *) nmdef ) - 1; i >= 0 && - nmdef[i] == ' ' || nmdef[i] == '\t'; + (nmdef[i] == ' ' || nmdef[i] == '\t'); --i ) ; @@ -197,17 +178,17 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) didadef = true; } -\n { +{NL} { if ( ! didadef ) synerr( "incomplete name definition" ); BEGIN(INITIAL); ++linenum; } -.*\n ++linenum; BEGIN(INITIAL); RETURNNAME; +.*{NL} ++linenum; BEGIN(INITIAL); RETURNNAME; -\n ++linenum; BEGIN(INITIAL); +{NL} ++linenum; BEGIN(INITIAL); {WS} "reject" { if ( all_upper( yytext ) ) @@ -224,37 +205,18 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) {NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); -"%t"{OPTWS}\n ++linenum; BEGIN(INITIAL); -^{OPTWS}[0-9]+ ++num_xlations; new_xlation = true; -^. synerr( "bad row in translation table" ); -{WS} /* ignore whitespace */ - -{ESCSEQ} { - xlation[myesc( yytext )] = - (new_xlation ? num_xlations : -num_xlations); - new_xlation = false; - } -. { - xlation[yytext[0]] = - (new_xlation ? num_xlations : -num_xlations); - new_xlation = false; - } - -\n ++linenum; - - -.*\n/{NOT_WS} { +.*{NL}/{NOT_WS} { ++linenum; ACTION_ECHO; MARK_END_OF_PROLOG; BEGIN(SECT2); } -.*\n ++linenum; ACTION_ECHO; +.*{NL} ++linenum; ACTION_ECHO; <> MARK_END_OF_PROLOG; yyterminate(); -^{OPTWS}\n ++linenum; /* allow blank lines in section 2 */ +^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ ^({WS}|"%{") { indented_code = (yytext[0] != '%'); @@ -267,19 +229,19 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) BEGIN(CODEBLOCK_2); } -"<" BEGIN(SC); return ( '<' ); -^"^" return ( '^' ); -\" BEGIN(QUOTE); return ( '"' ); -"{"/[0-9] BEGIN(NUM); return ( '{' ); +^"<" BEGIN(SC); return '<'; +^"^" return '^'; +\" BEGIN(QUOTE); return '"'; +"{"/[0-9] BEGIN(NUM); return '{'; "{"[^0-9\n][^}\n]* BEGIN(BRACEERROR); -"$"/[ \t\n] return ( '$' ); +"$"/([ \t]|{NL}) return '$'; {WS}"%{" { bracelevel = 1; BEGIN(PERCENT_BRACE_ACTION); - return ( '\n' ); + return '\n'; } -{WS}"|".*\n continued_action = true; ++linenum; return ( '\n' ); +{WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; {WS} { /* this rule is separate from the one below because @@ -289,27 +251,26 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) bracelevel = 0; continued_action = false; BEGIN(ACTION); - return ( '\n' ); + return '\n'; } -{OPTWS}/\n { +{OPTWS}{NL} { bracelevel = 0; continued_action = false; BEGIN(ACTION); - return ( '\n' ); + unput( '\n' ); /* so sees it */ + return '\n'; } -^{OPTWS}\n ++linenum; return ( '\n' ); - -"<>" return ( EOF_OP ); +"<>" return EOF_OP; ^"%%".* { sectnum = 3; BEGIN(SECT3); - return ( EOF ); /* to stop the parser */ + return EOF; /* to stop the parser */ } -"["([^\\\]\n]|{ESCSEQ})+"]" { +"["{FIRST_CCL_CHAR}{CCL_CHAR}* { int cclval; (void) strcpy( nmstr, (char *) yytext ); @@ -317,9 +278,12 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) /* check to see if we've already encountered this ccl */ if ( (cclval = ccllookup( (Char *) nmstr )) ) { + if ( input() != ']' ) + synerr( "bad character class" ); + yylval = cclval; ++cclreuse; - return ( PREVCCL ); + return PREVCCL; } else { @@ -331,10 +295,10 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) /* push back everything but the leading bracket * so the ccl can be rescanned */ - PUT_BACK_STRING((Char *) nmstr, 1); + yyless( 1 ); BEGIN(FIRSTCCL); - return ( '[' ); + return '['; } } @@ -342,80 +306,99 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) register Char *nmdefptr; Char *ndlookup(); - (void) strcpy( nmstr, (char *) yytext ); - nmstr[yyleng - 1] = '\0'; /* chop trailing brace */ + (void) strcpy( nmstr, (char *) yytext + 1 ); + nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ - /* lookup from "nmstr + 1" to chop leading brace */ - if ( ! (nmdefptr = ndlookup( nmstr + 1 )) ) - synerr( "undefined {name}" ); + if ( ! (nmdefptr = ndlookup( nmstr )) ) + format_synerr( "undefined definition {%s}", nmstr ); else { /* push back name surrounded by ()'s */ - unput(')'); - PUT_BACK_STRING(nmdefptr, 0); - unput('('); + int len = strlen( nmdefptr ); + + if ( nmdefptr[0] == '^' || + (len > 0 && nmdefptr[len - 1] == '$') ) + { + PUT_BACK_STRING(nmdefptr, 0); + + if ( nmdefptr[0] == '^' ) + BEGIN(CARETISBOL); + } + + else + { + unput(')'); + PUT_BACK_STRING(nmdefptr, 0); + unput('('); + } } } -[/|*+?.()] return ( yytext[0] ); +[/|*+?.()] return yytext[0]; . RETURNCHAR; -\n ++linenum; return ( '\n' ); -"," return ( ',' ); -">" BEGIN(SECT2); return ( '>' ); -">"/"^" BEGIN(CARETISBOL); return ( '>' ); +[,*] return yytext[0]; +">" BEGIN(SECT2); return '>'; +">"/^ BEGIN(CARETISBOL); return '>'; {SCNAME} RETURNNAME; -. synerr( "bad start condition name" ); +. { + format_synerr( "bad : %s", + (char *) yytext ); + } -"^" BEGIN(SECT2); return ( '^' ); +"^" BEGIN(SECT2); return '^'; [^"\n] RETURNCHAR; -\" BEGIN(SECT2); return ( '"' ); +\" BEGIN(SECT2); return '"'; -\n { +{NL} { synerr( "missing quote" ); BEGIN(SECT2); ++linenum; - return ( '"' ); + return '"'; } -"^"/[^-\n] BEGIN(CCL); return ( '^' ); -"^"/- return ( '^' ); -- BEGIN(CCL); yylval = '-'; return ( CHAR ); +"^"/[^-\n] BEGIN(CCL); return '^'; +"^"/- return '^'; . BEGIN(CCL); RETURNCHAR; --/[^\]\n] return ( '-' ); +-/[^\]\n] return '-'; [^\]\n] RETURNCHAR; -"]" BEGIN(SECT2); return ( ']' ); +"]" BEGIN(SECT2); return ']'; +.|{NL} { + synerr( "bad character class" ); + BEGIN(SECT2); + return ']'; + } [0-9]+ { yylval = myctoi( yytext ); - return ( NUMBER ); + return NUMBER; } -"," return ( ',' ); -"}" BEGIN(SECT2); return ( '}' ); +"," return ','; +"}" BEGIN(SECT2); return '}'; . { synerr( "bad character inside {}'s" ); BEGIN(SECT2); - return ( '}' ); + return '}'; } -\n { +{NL} { synerr( "missing }" ); BEGIN(SECT2); ++linenum; - return ( '}' ); + return '}'; } "}" synerr( "bad name in {}'s" ); BEGIN(SECT2); -\n synerr( "missing }" ); ++linenum; BEGIN(SECT2); +{NL} synerr( "missing }" ); ++linenum; BEGIN(SECT2); {OPTWS}"%}".* bracelevel = 0; @@ -428,14 +411,14 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) CHECK_YYMORE(yytext); } {NAME}|{NOT_NAME}|. ACTION_ECHO; -\n { +{NL} { ++linenum; ACTION_ECHO; if ( bracelevel == 0 || (doing_codeblock && indented_code) ) { if ( ! doing_codeblock ) - fputs( "\tYY_BREAK\n", temp_action_file ); + add_action( "\tYY_BREAK\n" ); doing_codeblock = false; BEGIN(SECT2); @@ -451,12 +434,12 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) "/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ \" ACTION_ECHO; BEGIN(ACTION_STRING); -\n { +{NL} { ++linenum; ACTION_ECHO; if ( bracelevel == 0 ) { - fputs( "\tYY_BREAK\n", temp_action_file ); + add_action( "\tYY_BREAK\n" ); BEGIN(SECT2); } } @@ -465,12 +448,11 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) "*/" ACTION_ECHO; BEGIN(ACTION); [^*\n]+ ACTION_ECHO; "*" ACTION_ECHO; -\n ++linenum; ACTION_ECHO; -. ACTION_ECHO; +{NL} ++linenum; ACTION_ECHO; [^"\\\n]+ ACTION_ECHO; \\. ACTION_ECHO; -\n ++linenum; ACTION_ECHO; +{NL} ++linenum; ACTION_ECHO; \" ACTION_ECHO; BEGIN(ACTION); . ACTION_ECHO; @@ -482,17 +464,20 @@ ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) {ESCSEQ} { yylval = myesc( yytext ); - return ( CHAR ); + return CHAR; } {ESCSEQ} { yylval = myesc( yytext ); BEGIN(CCL); - return ( CHAR ); + return CHAR; } .*(\n?) ECHO; + +<*>.|\n format_synerr( "bad character: %s", (char *) yytext ); + %% @@ -502,11 +487,11 @@ int yywrap() if ( --num_input_files > 0 ) { set_input_file( *++input_files ); - return ( 0 ); + return 0; } else - return ( 1 ); + return 1; } -- cgit v1.2.3 From aaedddb57c438cb0deb1af1e80862a8812abbdb3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:06:57 +0000 Subject: *** empty log message *** --- flexdef.h | 134 ++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 82 insertions(+), 52 deletions(-) diff --git a/flexdef.h b/flexdef.h index ca6cb2c..daa6961 100644 --- a/flexdef.h +++ b/flexdef.h @@ -69,20 +69,12 @@ #ifdef lint char *sprintf(); /* keep lint happy */ #endif -#ifdef SCO_UNIX -void *memset(); -#else -char *memset(); -#endif #endif #ifdef AMIGA -#define bzero(s, n) setmem((char *)(s), n, '\0') #ifndef abs #define abs(x) ((x) < 0 ? -(x) : (x)) #endif -#else -#define bzero(s, n) (void) memset((char *)(s), '\0', n) #endif #ifdef VMS @@ -129,10 +121,6 @@ char *malloc(), *realloc(); #define false 0 -#ifndef DEFAULT_SKELETON_FILE -#define DEFAULT_SKELETON_FILE "flex.skel" -#endif - /* special chk[] values marking the slots taking by end-of-buffer and action * numbers */ @@ -350,6 +338,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * eofseen - true if we've seen an eof in the input file * ddebug - if true (-d), make a "debug" scanner * trace - if true (-T), trace processing + * nowarn - if true (-w), do not generate warnings * spprdflt - if true (-s), suppress the default rule * interactive - if true (-I), generate an interactive scanner * caseins - if true (-i), generate a case-insensitive scanner @@ -358,10 +347,13 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * usemecs - if true (-Cm flag), use meta-equivalence classes * fullspd - if true (-F flag), use Jacobson method of table representation * gen_line_dirs - if true (i.e., no -L flag), generate #line directives - * performance_report - if true (i.e., -p flag), generate a report relating - * to scanner performance + * performance_report - if > 0 (i.e., -p flag), generate a report relating + * to scanner performance; if > 1 (-p -p), report on minor performance + * problems, too * backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file * listing backtracking states + * yytext_is_array - if true (i.e., %array directive), then declare + * yytext as a array instead of a character pointer. Nice and inefficient. * csize - size of character set for the scanner we're generating; * 128 for 7-bit chars and 256 for 8-bit * yymore_used - if true, yymore() is used in input rules @@ -375,9 +367,10 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * reject_really_used - same for REJECT */ -extern int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; +extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs; -extern int fullspd, gen_line_dirs, performance_report, backtrack_report, csize; +extern int fullspd, gen_line_dirs, performance_report, backtrack_report; +extern int yytext_is_array, csize; extern int yymore_used, reject, real_reject, continued_action; #define REALLY_NOT_DETERMINED 0 @@ -392,24 +385,37 @@ extern int yymore_really_used, reject_really_used; * statement. Used to generate readable -f output * linenum - current input line number * skelfile - the skeleton file + * skel - compiled-in skeleton array + * skel_ind - index into "skel" array, if skelfile is nil * yyin - input file - * temp_action_file - temporary file to hold actions * backtrack_file - file to summarize backtracking states to * infilename - name of input file - * action_file_name - name of the temporary file * input_files - array holding names of input files * num_input_files - size of input_files array * program_name - name with which program was invoked + * + * action_array - array to hold the rule actions + * action_size - size of action_array + * prolog - pointer to where the prolog starts in action_array + * action_offset - index where the non-prolog starts in action_array + * action_index - index where the next action should go, with respect + * to "action" + * action - pointer to where non-prolog starts; equal to + * &action_array[action_offset] */ extern int datapos, dataline, linenum; -extern FILE *skelfile, *yyin, *temp_action_file, *backtrack_file; +extern FILE *skelfile, *yyin, *backtrack_file; +extern char *skel[]; +extern int skel_ind; extern char *infilename; -extern char *action_file_name; extern char **input_files; extern int num_input_files; extern char *program_name; +extern char *action_array, *prolog, *action; +extern int action_size, action_offset, action_index; + /* variables for stack of states having only one out-transition: * onestate - state number @@ -427,6 +433,8 @@ extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; * current_mns - current maximum on number of NFA states * num_rules - number of the last accepting state; also is number of * rules created so far + * num_eof_rules - number of <> rules + * default_rule - number of the default rule * current_max_rules - current maximum number of rules * lastnfa - last nfa state number created * firstst - physically the first state of a fragment @@ -443,15 +451,18 @@ extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; * recognizing the text-to-be-matched to the beginning of * the trailing context), or a subsequent state in a trailing * context rule - * rule_type - a RULE_xxx type identifying whether this a a ho-hum + * rule_type - a RULE_xxx type identifying whether this a ho-hum * normal rule or one which has variable head & trailing * context * rule_linenum - line number associated with rule + * rule_useful - true if we've determined that the rule can be matched */ -extern int current_mns, num_rules, current_max_rules, lastnfa; +extern int current_mns, num_rules, num_eof_rules, default_rule; +extern int current_max_rules, lastnfa; extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; -extern int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; +extern int *accptnum, *assoc_rule, *state_type; +extern int *rule_type, *rule_linenum, *rule_useful; /* different types of states; values are useful as masks, as well, for * routines like check_trailing_context() @@ -497,8 +508,6 @@ extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; * templates) * tecfwd - forward link of meta-equivalence classes members * tecbck - backward link of MEC's - * xlation - maps character codes to their translations, or nil if no %t table - * num_xlations - number of different xlation values */ /* reserve enough room in the equivalence class arrays so that we @@ -515,9 +524,6 @@ extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; */ extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; -extern int *xlation; -extern int num_xlations; - /* variables for start conditions: * lastsc - last start condition created @@ -557,8 +563,8 @@ extern char **scname; * firstfree - first empty entry in "nxt/chk" table * dss - nfa state set for each dfa * dfasiz - size of nfa state set for each dfa - * dfaacc - accepting set for each dfa state (or accepting number, if - * -r is not given) + * dfaacc - accepting set for each dfa state (if using REJECT), or accepting + * number, if not * accsiz - size of accepting set for each dfa state * dhash - dfa state hash value * numas - number of DFA accepting states created; note that this @@ -656,9 +662,15 @@ void *allocate_array(), *reallocate_array(); reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) ) #define allocate_character_array(size) \ - (Char *) allocate_array( size, sizeof( Char ) ) + (char *) allocate_array( size, sizeof( char ) ) #define reallocate_character_array(array,size) \ + (char *) reallocate_array( (void *) array, size, sizeof( char ) ) + +#define allocate_Character_array(size) \ + (Char *) allocate_array( size, sizeof( Char ) ) + +#define reallocate_Character_array(array,size) \ (Char *) reallocate_array( (void *) array, size, sizeof( Char ) ) @@ -674,7 +686,7 @@ extern int yylval; /* from file ccl.c */ extern void ccladd PROTO((int, int)); /* Add a single character to a ccl */ -extern int cclinit PROTO(()); /* make an empty ccl */ +extern int cclinit PROTO((void)); /* make an empty ccl */ extern void cclnegate PROTO((int)); /* negate a ccl */ /* list the members of a set of characters in CCL form */ @@ -684,22 +696,19 @@ extern void list_character_set PROTO((FILE*, int[])); /* from file dfa.c */ /* increase the maximum number of dfas */ -extern void increase_max_dfas PROTO(()); +extern void increase_max_dfas PROTO((void)); -extern void ntod PROTO(()); /* convert a ndfa to a dfa */ +extern void ntod PROTO((void)); /* convert a ndfa to a dfa */ /* from file ecs.c */ /* convert character classes to set of equivalence classes */ -extern void ccl2ecl PROTO(()); +extern void ccl2ecl PROTO((void)); /* associate equivalence class numbers with class members */ extern int cre8ecs PROTO((int[], int[], int)); -/* associate equivalence class numbers using %t table */ -extern int ecs_from_xlation PROTO((int[])); - /* update equivalence classes based on character class transitions */ extern void mkeccl PROTO((Char[], int, int[], int[], int, int)); @@ -709,18 +718,19 @@ extern void mkechar PROTO((int, int[], int[])); /* from file gen.c */ -extern void make_tables PROTO(()); /* generate transition tables */ +extern void make_tables PROTO((void)); /* generate transition tables */ /* from file main.c */ extern void flexend PROTO((int)); +extern void usage PROTO((void)); /* from file misc.c */ -/* write out the actions from the temporary file to lex.yy.c */ -extern void action_out PROTO(()); +/* Add the given text to the stored actions. */ +extern void add_action PROTO(( char *new_text )); /* true if a string is all lower case */ extern int all_lower PROTO((register Char *)); @@ -734,7 +744,8 @@ extern void bubble PROTO((int [], int)); /* shell sort a character array */ extern void cshell PROTO((Char [], int, int)); -extern void dataend PROTO(()); /* finish up a block of data declarations */ +/* finish up a block of data declarations */ +extern void dataend PROTO((void)); /* report an error message and terminate */ extern void flexerror PROTO((char[])); @@ -751,6 +762,9 @@ extern void lerrsf PROTO((char[], char[])); /* spit out a "# line" statement */ extern void line_directive_out PROTO((FILE*)); +/* mark the current position in the action array as the end of the prolog */ +extern void mark_prolog PROTO(()); + /* generate a data statment for a two-dimensional array */ extern void mk2data PROTO((int)); @@ -760,11 +774,14 @@ extern void mkdata PROTO((int)); /* generate a data statement */ extern int myctoi PROTO((Char [])); /* write out one section of the skeleton file */ -extern void skelout PROTO(()); +extern void skelout PROTO((void)); /* output a yy_trans_info structure */ extern void transition_struct_out PROTO((int, int)); +/* set a region of memory to 0 */ +extern void zero_out PROTO((char *, int)); + /* from file nfa.c */ @@ -805,7 +822,7 @@ extern int mkrep PROTO((int, int, int)); /* make a replicated machine */ /* create a state with a transition on a given symbol */ extern int mkstate PROTO((int)); -extern void new_rule PROTO(()); /* initialize for a new rule */ +extern void new_rule PROTO((void)); /* initialize for a new rule */ /* from file parse.y */ @@ -816,18 +833,29 @@ extern void format_pinpoint_message PROTO((char[], char[])); /* write out a message, pinpointing its location */ extern void pinpoint_message PROTO((char[])); +/* write out a warning, pinpointing it at the given line */ +void line_warning PROTO(( char[], int )); + +/* write out a message, pinpointing it at the given line */ +void line_pinpoint PROTO(( char[], int )); + +/* report a formatted syntax error */ +extern void format_synerr PROTO((char [], char[])); extern void synerr PROTO((char [])); /* report a syntax error */ -extern int yyparse PROTO(()); /* the YACC parser */ +extern void warn PROTO((char [])); /* report a warning */ +extern int yyparse PROTO((void)); /* the YACC parser */ /* from file scan.l */ -extern int flexscan PROTO(()); /* the Flex-generated scanner for flex */ +/* the Flex-generated scanner for flex */ +extern int flexscan PROTO((void)); /* open the given file (if NULL, stdin) for scanning */ extern void set_input_file PROTO((char*)); -extern int yywrap PROTO(()); /* wrapup a file in the lexical analyzer */ +/* wrapup a file in the lexical analyzer */ +extern int yywrap PROTO((void)); /* from file sym.c */ @@ -839,7 +867,8 @@ extern void cclinstal PROTO ((Char [], int)); extern int ccllookup PROTO((Char [])); extern void ndinstal PROTO((char[], Char[])); /* install a name definition */ -extern void scextend PROTO(()); /* increase maximum number of SC's */ +/* increase maximum number of SC's */ +extern void scextend PROTO((void)); extern void scinstal PROTO((char[], int)); /* make a start condition */ /* lookup the number associated with a start condition */ @@ -851,9 +880,10 @@ extern int sclookup PROTO((char[])); /* build table entries for dfa state */ extern void bldtbl PROTO((int[], int, int, int, int)); -extern void cmptmps PROTO(()); /* compress template table entries */ -extern void inittbl PROTO(()); /* initialize transition tables */ -extern void mkdeftbl PROTO(()); /* make the default, "jam" table entries */ +extern void cmptmps PROTO((void)); /* compress template table entries */ +extern void inittbl PROTO((void)); /* initialize transition tables */ +/* make the default, "jam" table entries */ +extern void mkdeftbl PROTO((void)); /* create table entries for a state (or state fragment) which has * only one out-transition */ @@ -868,7 +898,7 @@ extern void stack1 PROTO((int, int, int, int)); /* from file yylex.c */ -extern int yylex PROTO(()); +extern int yylex PROTO((void)); /* The Unix kernel calls used here */ -- cgit v1.2.3 From 69e8b486e485c04f4d04708f0b163c3473af63ed Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:08:06 +0000 Subject: Mostly .LP -> .PP --- flex.1 | 221 ++++++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 128 insertions(+), 93 deletions(-) diff --git a/flex.1 b/flex.1 index cef3d7d..3ed69be 100644 --- a/flex.1 +++ b/flex.1 @@ -1,6 +1,6 @@ .TH FLEX 1 "26 May 1990" "Version 2.3" .SH NAME -flex - fast lexical analyzer generator +flexdoc - documentation for flex, fast lexical analyzer generator .SH SYNOPSIS .B flex .B [-bcdfinpstvFILT8 -C[efmF] -Sskeleton] @@ -28,7 +28,7 @@ it analyzes its input for occurrences of the regular expressions. Whenever it finds one, it executes the corresponding C code. .SH SOME SIMPLE EXAMPLES -.LP +.PP First some simple examples to get the flavor of how one uses .I flex. The following @@ -52,23 +52,23 @@ In this input, there is just one rule. "username" is the and the "printf" is the .I action. The "%%" marks the beginning of the rules. -.LP +.PP Here's another simple example: .nf - int num_lines = 0, num_chars = 0; + int num_lines = 0, num_chars = 0; %% - \\n ++num_lines; ++num_chars; - . ++num_chars; + \\n ++num_lines; ++num_chars; + . ++num_chars; %% main() - { - yylex(); - printf( "# of lines = %d, # of chars = %d\\n", - num_lines, num_chars ); - } + { + yylex(); + printf( "# of lines = %d, # of chars = %d\\n", + num_lines, num_chars ); + } .fi This scanner counts the number of characters and the number @@ -83,7 +83,7 @@ routine declared after the second "%%". There are two rules, one which matches a newline ("\\n") and increments both the line count and the character count, and one which matches any character other than a newline (indicated by the "." regular expression). -.LP +.PP A somewhat more complicated example: .nf @@ -143,7 +143,7 @@ This is the beginnings of a simple scanner for a language like Pascal. It identifies different types of .I tokens and reports on what it has seen. -.LP +.PP The details of this example will be explained in the following sections. .SH FORMAT OF THE INPUT FILE @@ -168,7 +168,7 @@ section contains declarations of simple definitions to simplify the scanner specification, and declarations of .I start conditions, which are explained in a later section. -.LP +.PP Name definitions have the form: .nf @@ -205,7 +205,7 @@ is identical to .fi and matches one-or-more digits followed by a '.' followed by zero-or-more digits. -.LP +.PP The .I rules section of the @@ -218,9 +218,9 @@ input contains a series of rules of the form: .fi where the pattern must be unindented and the action must begin on the same line. -.LP +.PP See below for a further description of patterns and actions. -.LP +.PP Finally, the user code section is simply copied to .B lex.yy.c verbatim. @@ -229,7 +229,7 @@ by the scanner. The presence of this section is optional; if it is missing, the second .B %% in the input file may be skipped, too. -.LP +.PP In the definitions and rules sections, any .I indented text or text enclosed in @@ -238,7 +238,7 @@ and .B %} is copied verbatim to the output (with the %{}'s removed). The %{}'s must appear unindented on lines by themselves. -.LP +.PP In the rules section, any indented or %{} text appearing before the first rule may be used to declare variables @@ -249,7 +249,7 @@ but its meaning is not well-defined and it may well cause compile-time errors (this feature is present for .I POSIX compliance; see below for other such features). -.LP +.PP In the definitions section, an unindented comment (i.e., a line beginning with "/*") is also copied verbatim to the output up to the next "*/". Also, any line in the definitions section @@ -352,7 +352,7 @@ and to match zero-or-more "foo"'s-or-"bar"'s: (foo|bar)* .fi -.LP +.PP Some notes on patterns: .IP - A negated character class such as the example "[^A-Z]" @@ -410,7 +410,7 @@ or more matches of the same length, the rule listed first in the .I flex input file is chosen. -.LP +.PP Once the match is determined, the text corresponding to the match (called the .I token) @@ -423,7 +423,7 @@ The corresponding to the matched pattern is then executed (a more detailed description of actions follows), and then the remaining input is scanned for another match. -.LP +.PP If no match is found, then the .I default rule is executed: the next character in the input is considered matched and @@ -452,7 +452,7 @@ which deletes all occurrences of "zap me" from its input: .fi (It will copy all other characters in the input to the output since they will be matched by the default rule.) -.LP +.PP Here is a program which compresses multiple blanks and tabs down to a single blank, and throws away whitespace found at the end of a line: .nf @@ -462,7 +462,7 @@ a single blank, and throws away whitespace found at the end of a line: [ \\t]+$ /* ignore this token */ .fi -.LP +.PP If the action contains a '{', then the action spans till the balancing '}' is found, and the action may cross multiple lines. .I flex @@ -472,10 +472,10 @@ within them, but also allows actions to begin with and will consider the action to be all the text up to the next .B %} (regardless of ordinary braces inside the action). -.LP +.PP An action consisting solely of a vertical bar ('|') means "same as the action for the next rule." See below for an illustration. -.LP +.PP Actions can include arbitrary C code, including .B return statements to return a value to whatever routine called @@ -490,9 +490,9 @@ however, then any subsequent call to will simply immediately return, unless .B yyrestart() is first called (see below). -.LP +.PP Actions are not allowed to modify yytext or yyleng. -.LP +.PP There are a number of special directives which can be included within an action: .IP - @@ -735,7 +735,7 @@ returning a float, and taking two floats as arguments. Note that if you give arguments to the scanning routine using a K&R-style/non-prototyped function declaration, you must terminate the definition with a semi-colon (;). -.LP +.PP Whenever .B yylex() is called, it scans tokens from the global input file @@ -758,7 +758,7 @@ pointer.) In the latter case (i.e., when an action executes a return), the scanner may then be called again and it will resume scanning where it left off. -.LP +.PP By default (and for purposes of efficiency), the scanner uses block-reads rather than simple .I getc() @@ -778,7 +778,7 @@ either the number of characters read or the constant YY_NULL (0 on Unix systems) to indicate EOF. The default YY_INPUT reads from the global file-pointer "yyin". -.LP +.PP A sample redefinition of YY_INPUT (in the definitions section of the input file): .nf @@ -795,11 +795,11 @@ section of the input file): .fi This definition will change the input processing to occur one character at a time. -.LP +.PP You also can add in things like keeping track of the input line number this way; but don't expect your scanner to go very fast. -.LP +.PP When the scanner receives an end-of-file indication from YY_INPUT, it then checks the .B yywrap() @@ -811,14 +811,14 @@ function has gone ahead and set up to point to another input file, and scanning continues. If it returns true (non-zero), then the scanner terminates, returning 0 to its caller. -.LP +.PP The default .B yywrap() always returns 1. Presently, to redefine it you must first "#undef yywrap", as it is currently implemented as a macro. As indicated by the hedging in the previous sentence, it may be changed to a true function in the near future. -.LP +.PP The scanner writes its .B ECHO output to the @@ -850,7 +850,7 @@ condition, and .fi will be active only when the current start condition is either "INITIAL", "STRING", or "QUOTE". -.LP +.PP Start conditions are declared in the definitions (first) section of the input using unindented lines beginning with either @@ -884,7 +884,7 @@ input. Because of this, exclusive start conditions make it easy to specify "mini-scanners" which scan portions of the input that are syntactically different from the rest (e.g., comments). -.LP +.PP If the distinction between inclusive and exclusive start conditions is still a little vague, here's a simple example illustrating the connection between the two. The set of rules: @@ -903,11 +903,11 @@ is equivalent to foo /* do something */ .fi -.LP +.PP The default rule (to .B ECHO any unmatched character) remains active in start conditions. -.LP +.PP .B BEGIN(0) returns to the original state where only the rules with no start conditions are active. This state can also be @@ -917,7 +917,7 @@ is equivalent to .B BEGIN(0). (The parentheses around the start condition name are not required but are considered good style.) -.LP +.PP .B BEGIN actions can also be given as indented code at the beginning of the rules section. For example, the following will cause @@ -939,7 +939,7 @@ is true: ...more rules follow... .fi -.LP +.PP To illustrate the uses of start conditions, here is a scanner which provides two different interpretations of a string like "123.456". By default it will treat it as @@ -1042,7 +1042,7 @@ which is sensitive to the scanning context. is only called when the scanner reaches the end of its buffer, which may be a long time after scanning a statement such as an "include" which requires switching the input source. -.LP +.PP To negotiate these sorts of problems, .I flex provides a mechanism for creating and switching between multiple @@ -1072,7 +1072,7 @@ come from .I new_buffer. Note that .B yy_switch_to_buffer() -may be used by yywrap() to sets things up for continued scanning, instead +may be used by yywrap() to set things up for continued scanning, instead of opening a new file and pointing .I yyin at it. @@ -1082,7 +1082,7 @@ at it. .fi is used to reclaim the storage associated with a buffer. -.LP +.PP .B yy_new_buffer() is an alias for .B yy_create_buffer(), @@ -1091,13 +1091,13 @@ provided for compatibility with the C++ use of and .I delete for creating and destroying dynamic objects. -.LP +.PP Finally, the .B YY_CURRENT_BUFFER macro returns a .B YY_BUFFER_STATE handle to the current buffer. -.LP +.PP Here is an example of using these features for writing a scanner which expands include files (the .B <> @@ -1179,7 +1179,7 @@ action; or, switching to a new buffer using .B yy_switch_to_buffer() as shown in the example above. -.LP +.PP <> rules may not be used with other patterns; they may only be qualified with a list of start conditions. If an unqualified <> rule is given, it @@ -1192,7 +1192,7 @@ specify an <> rule for only the initial start condition, use <> .fi -.LP +.PP These rules are useful for catching things like unclosed comments. An example: .nf @@ -1224,14 +1224,14 @@ YY_USER_ACTION can be redefined to provide an action which is always executed prior to the matched rule's action. For example, it could be #define'd to call a routine to convert yytext to lower-case. -.LP +.PP The macro .B YY_USER_INIT may be redefined to provide an action which is always executed before the first scan (and before the scanner's internal initializations are done). For example, it could be used to call a routine to read in a data table or open a logging file. -.LP +.PP In the generated scanner, the actions are all gathered in one large switch statement and separated using .B YY_BREAK, @@ -1322,7 +1322,7 @@ from group #2, followed by a character from group #52." Thus .B %t provides a crude way for introducing equivalence classes into the scanner specification. -.LP +.PP Note that the .B -i option (see below) coupled with the equivalence classes which @@ -1572,7 +1572,7 @@ makes run in .I trace mode. It will generate a lot of messages to -.I stdout +.I stderr concerning the form of the input and the resultant non-deterministic and deterministic finite automata. This option is mostly for use in maintaining @@ -1718,17 +1718,17 @@ are, from most expensive to least: pattern sets that require backtracking arbitrary trailing context - '^' beginning-of-line operator yymore() + '^' beginning-of-line operator .fi with the first three all being quite expensive and the last two being quite cheap. -.LP +.PP .B REJECT should be avoided at all costs when performance is important. It is a particularly expensive option. -.LP +.PP Getting rid of backtracking is messy and often may be an enormous amount of work for a complicated scanner. In principal, one begins by using the @@ -1779,13 +1779,13 @@ a bit of headscratching one can see that this must be the state it's in when it has seen "fo". When this has happened, if anything other than another 'o' is seen, the scanner will have to back up to simply match the 'f' (by the default rule). -.LP +.PP The comment regarding State #8 indicates there's a problem when "foob" has been scanned. Indeed, on any character other than a 'b', the scanner will have to back up to accept "foo". Similarly, the comment for State #9 concerns when "fooba" has been scanned. -.LP +.PP The final comment reminds us that there's no point going to all the trouble of removing backtracking from the rules unless we're using @@ -1793,7 +1793,7 @@ we're using or .B -F, since there's no performance gain doing so with compressed scanners. -.LP +.PP The way to remove the backtracking is to add "error" rules: .nf @@ -1809,7 +1809,7 @@ The way to remove the backtracking is to add "error" rules: } .fi -.LP +.PP Eliminating backtracking among a list of keywords can also be done using a "catch-all" rule: .nf @@ -1822,7 +1822,7 @@ done using a "catch-all" rule: .fi This is usually the best solution when appropriate. -.LP +.PP Backtracking messages tend to cascade. With a complicated set of rules it's not uncommon to get hundreds of messages. If one can decipher them, though, it often @@ -1831,7 +1831,7 @@ it's easy to make a mistake and have an error rule accidentally match a valid token. A possible future .I flex feature will be to automatically add rules to eliminate backtracking). -.LP +.PP .I Variable trailing context (where both the leading and trailing parts do not have a fixed length) entails almost the same performance loss as @@ -1864,7 +1864,7 @@ Note that here the special '|' action does provide any savings, and can even make things worse (see .B BUGS in flex(1)). -.LP +.PP Another area where the user can increase a scanner's performance (and one that's easier to implement) arises from the fact that the longer the tokens matched, the faster the scanner will run. @@ -1914,7 +1914,7 @@ slow down the scanner! The speed of the scanner is independent of the number of rules or (modulo the considerations given at the beginning of this section) how complicated the rules are with regard to operators such as '*' and '|'. -.LP +.PP A final example in speeding up a scanner: suppose you want to scan through a file containing identifiers and keywords, one per line and with no other extraneous characters, and recognize all the @@ -2000,7 +2000,7 @@ Compiled with this is about as fast as one can get a .I flex scanner to go for this particular problem. -.LP +.PP A final note: .I flex is slow when matching NUL's, particularly when a token contains @@ -2038,7 +2038,7 @@ users can be aware of the standardization issues and those areas where .I flex may in the near future undergo changes incompatible with its current definition. -.LP +.PP .I flex is fully compatible with .I lex @@ -2160,8 +2160,16 @@ and the precedence is such that the '?' is associated with .I flex, the rule will be expanded to "foo([A-Z][A-Z0-9]*)?" and so the string "foo" will match. -Note that because of this, the -.B ^, $, , /, +.PP +Note that if the definition begins with +.B ^ +or ends with +.B $ +then it is +.I not +expanded with parentheses, to allow these operators to appear in +definitions without losing their special meanings. But the +.B , /, and .B <> operators cannot be used in a @@ -2284,7 +2292,7 @@ is #define'd so scanners may be written for use with either .I flex or .I lex. -.LP +.PP The following .I flex features are not included in @@ -2323,6 +2331,32 @@ is (rather surprisingly) truncated to does not truncate the action. Actions that are not enclosed in braces are simply terminated at the end of the line. .SH DIAGNOSTICS +.I warning, rule cannot be matched +indicates that the given rule +cannot be matched because it follows other rules that will +always match the same text as it. For +example, in the following "foo" cannot be matched because it comes after +an identifier "catch-all" rule: +.nf + + [a-z]+ got_identifier(); + foo got_foo(); + +.fi +Using +.B REJECT +in a scanner suppresses this warning. +.PP +.I warning, +.B -s +.I option given but default rule +.I can be matched +means that it is possible (perhaps only in a particular start condition) +that the default rule (match any single character) is the only one +that will match a particular input. Since +.B -s +was given, presumably this is not intended. +.PP .I reject_used_but_not_detected undefined or .I yymore_used_but_not_detected undefined - @@ -2346,26 +2380,26 @@ supported a mechanism for dealing with this problem; this feature is still supported but now deprecated, and will go away soon unless the author hears from people who can argue compellingly that they need it.) -.LP +.PP .I flex scanner jammed - a scanner compiled with .B -s has encountered an input string which wasn't matched by any of its rules. -.LP +.PP .I flex input buffer overflowed - a scanner rule matched a string long enough to overflow the scanner's internal input buffer (16K bytes by default - controlled by .B YY_BUF_SIZE in "flex.skel". Note that to redefine this macro, you must first -.B #undefine +.B #undef it). -.LP +.PP .I scanner requires -8 flag - Your scanner specification includes recognizing 8-bit characters and you did not specify the -8 flag (and your site has not installed flex with -8 as the default). -.LP +.PP .I fatal flex scanner internal error--end of buffer missed - This can occur in an scanner which is reentered after a long-jump @@ -2376,7 +2410,7 @@ reentering the scanner, use: yyrestart( yyin ); .fi -.LP +.PP .I too many %t classes! - You managed to put every single character into its own %t class. .I flex @@ -2384,9 +2418,9 @@ requires that at least one of the classes share characters. .SH DEFICIENCIES / BUGS See flex(1). .SH "SEE ALSO" -.LP +.PP flex(1), lex(1), yacc(1), sed(1), awk(1). -.LP +.PP M. E. Lesk and E. Schmidt, .I LEX - Lexical Analyzer Generator .SH AUTHOR @@ -2394,30 +2428,30 @@ Vern Paxson, with the help of many ideas and much inspiration from Van Jacobson. Original version by Jef Poskanzer. The fast table representation is a partial implementation of a design done by Van Jacobson. The implementation was done by Kevin Gong and Vern Paxson. -.LP +.PP Thanks to the many .I flex beta-testers, feedbackers, and contributors, especially Casey -Leedom, benson@odi.com, Keith Bostic, +Leedom, benson@odi.com, Peter A. Bigot, Keith Bostic, Frederic Brehm, Nick Christopher, Jason Coughlin, Scott David Daniels, Leo Eskin, Chris Faylor, Eric Goldman, Eric Hughes, Jeffrey R. Jones, Kevin B. Kenny, Ronald Lamprecht, -Greg Lee, Craig Leres, Mohamed el Lozy, Jim Meyering, Marc Nozell, Esmond Pitt, -Jef Poskanzer, Jim Roskind, +Greg Lee, Craig Leres, Mohamed el Lozy, Jim Meyering, Marc Nozell, +Walter Pelissero, Francois Pinard, Esmond Pitt, Jef Poskanzer, Jim Roskind, Dave Tallman, Frank Whaley, Ken Yap, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. -.LP +.PP Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, Rich Salz, and Richard Stallman for help with various distribution headaches. -.LP +.PP Thanks to Esmond Pitt and Earle Horton for 8-bit character support; to Benson Margulies and Fred Burke for C++ support; to Ove Ewerlid for the basics of support for NUL's; and to Eric Hughes for the basics of support for multiple buffers. -.LP +.PP Work is being done on extending .I flex to generate scanners in which the @@ -2426,21 +2460,22 @@ These scanners may well be substantially faster than those generated using -f or -F. If you are working in this area and are interested in comparing notes and seeing whether redundant work can be avoided, contact Ove Ewerlid (ewerlid@mizar.DoCS.UU.SE). -.LP +.PP This work was primarily done when I was at the Real Time Systems Group at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there for the support I received. -.LP +.PP Send comments to: .nf Vern Paxson - Computer Science Department - 4126 Upson Hall - Cornell University - Ithaca, NY 14853-7501 - - vern@cs.cornell.edu - decvax!cornell!vern + Computer Systems Engineering + Bldg. 46A, Room 1123 + Lawrence Berkeley Laboratory + University of California + Berkeley, CA 94720 + + vern@ee.lbl.gov + ucbvax!ee.lbl.gov!vern .fi -- cgit v1.2.3 From 19504c9cd5bcb8b8d60c038db33bce2c8380ec8f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 6 Feb 1993 21:08:38 +0000 Subject: Finally updated email addr --- README | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README b/README index a407467..482a951 100644 --- a/README +++ b/README @@ -68,11 +68,12 @@ Install flex using: Please send problems and feedback to: - vern@cs.cornell.edu - decvax!cornell!vern + vern@ee.lbl.gov + ucbvax!ee.lbl.gov!vern Vern Paxson - CS Department - 4126 Upson Hall - Cornell University - Ithaca, NY 14853-7501 + Computer Systems Engineering + 46A/1123 + Lawrence Berkeley Laboratory + 1 Cyclotron Rd. + Berkeley, CA 94720 -- cgit v1.2.3 From e196c113e4e85ea93946ab757f71027f59904eea Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Apr 1993 20:35:08 +0000 Subject: Added %array support --- gen.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/gen.c b/gen.c index 13a8e70..b1f6e98 100644 --- a/gen.c +++ b/gen.c @@ -1023,6 +1023,18 @@ void make_tables() else indent_puts( "yyleng = yy_cp - yy_bp; \\" ); + /* now also deal with copying yytext_ptr to yytext if needed */ + skelout(); + if ( yytext_is_array ) + { + indent_puts( "if ( yyleng >= YYLMAX ) \\" ); + indent_up(); + indent_puts( + "YY_FATAL_ERROR( \"token too large, exceeds YYLMAX\" ); \\" ); + indent_down(); + indent_puts( "strcpy( yytext, (char *) yytext_ptr ); \\" ); + } + set_indent( 0 ); skelout(); -- cgit v1.2.3 From 220517426c41698cc8f36a4d1018168e227cfcce Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Apr 1993 20:35:51 +0000 Subject: Fixed subtle problems regarding '*'s in comments %pointer/%array match entire lines --- scan.l | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/scan.l b/scan.l index 7083c1e..88add7b 100644 --- a/scan.l +++ b/scan.l @@ -76,7 +76,7 @@ NOT_WS [^ \t\f\n] NL \n|\r\n|\n\r NAME [a-z_][a-z_0-9-]* -NOT_NAME [^a-z_\n]+ +NOT_NAME [^a-z_*\n]+ SCNAME {NAME} @@ -113,8 +113,8 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} return SECTEND; } -^"%pointer" yytext_is_array = false; -^"%array" yytext_is_array = true; +^"%pointer".*\n ++linenum; yytext_is_array = false; +^"%array".*\n ++linenum; yytext_is_array = true; ^"%used" { warn( "%used/%unused have been deprecated" ); @@ -401,6 +401,7 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} {NL} synerr( "missing }" ); ++linenum; BEGIN(SECT2); +"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); {OPTWS}"%}".* bracelevel = 0; "reject" { ACTION_ECHO; @@ -445,10 +446,17 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} } . ACTION_ECHO; -"*/" ACTION_ECHO; BEGIN(ACTION); -[^*\n]+ ACTION_ECHO; +"*/" { + ACTION_ECHO; + if ( doing_codeblock ) + BEGIN(CODEBLOCK_2); + else + BEGIN(ACTION); + } + "*" ACTION_ECHO; -{NL} ++linenum; ACTION_ECHO; +[^*\n]+ ACTION_ECHO; +[^*\n]*{NL} ++linenum; ACTION_ECHO; [^"\\\n]+ ACTION_ECHO; \\. ACTION_ECHO; -- cgit v1.2.3 From 93d6d7ce4a3ffca0a36b18053929caf6b1346353 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Apr 1993 20:36:29 +0000 Subject: Added non-STDC clause for '\a' --- misc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/misc.c b/misc.c index d0e8e89..b371a74 100644 --- a/misc.c +++ b/misc.c @@ -643,6 +643,8 @@ Char array[]; { #ifdef __STDC__ case 'a': return ( '\a' ); +#else + case 'a': return ( '\007' ); #endif case 'b': return ( '\b' ); case 'f': return ( '\f' ); -- cgit v1.2.3 From 149fe2f839f7e5185c5444ca6fa57fa845732668 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Apr 1993 20:37:04 +0000 Subject: %array support --- flex.skl | 9 +++------ main.c | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/flex.skl b/flex.skl index 217ed1a..a1282bf 100644 --- a/flex.skl +++ b/flex.skl @@ -163,18 +163,17 @@ int read(); typedef struct yy_buffer_state *YY_BUFFER_STATE; -%% section 1 definitions go here - -#define yytext_ptr yytext +%% section 1 definitions and declarations of yytext/yytext_ptr go here /* done after the current pattern has been matched and before the * corresponding action - sets up yytext */ #define YY_DO_BEFORE_ACTION \ yytext_ptr = yy_bp; \ -%% code to fiddle yytext and yyleng for yymore() goes here +%% code to fiddle yytext and yyleng for yymore() goes here yy_hold_char = *yy_cp; \ *yy_cp = '\0'; \ +%% code to copy yytext_ptr to yytext[] goes here, if %array yy_c_buf_p = yy_cp; #define EOB_ACT_CONTINUE_SCAN 0 @@ -232,11 +231,9 @@ static YY_CHAR yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ -extern YY_CHAR *yytext; extern int yyleng; extern FILE *yyin, *yyout; -YY_CHAR *yytext; int yyleng; FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; diff --git a/main.c b/main.c index 38f7244..2703f4f 100644 --- a/main.c +++ b/main.c @@ -723,9 +723,8 @@ void readin() flexend( 1 ); } - else if ( useecs ) + if ( useecs ) numecs = cre8ecs( nextecm, ecgroup, csize ); - else numecs = csize; @@ -735,6 +734,23 @@ void readin() if ( useecs ) ccl2ecl(); + + if ( yytext_is_array ) + { + puts( "extern char yytext[];\n" ); + puts( "#ifndef YYLMAX" ); + puts( "#define YYLMAX YY_READ_BUF_SIZE" ); + puts( "#endif YYLMAX\n" ); + puts( "char yytext[YYLMAX];" ); + puts( "YY_CHAR *yytext_ptr;" ); + } + + else + { + puts( "extern YY_CHAR *yytext;" ); + puts( "YY_CHAR *yytext;" ); + puts( "#define yytext_ptr yytext" ); + } } -- cgit v1.2.3 From b4f9c16a07d84323c73f123b06587224827bb8aa Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Apr 1993 20:37:50 +0000 Subject: 2.3.8 --- NEWS | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/NEWS b/NEWS index 0111a1f..d2d0d97 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,27 @@ +Changes between 2.3 Patch #8 (21Feb93) and 2.3 Patch #7: + + - Fixed bugs in dynamic memory allocation leading to grievous + fencepost problems when generating large scanners. + - Fixed bug causing infinite loops on character classes with 8-bit + characters in them. + - Fixed bug in matching repetitions with a lower bound of 0. + - Fixed bug in scanning NUL characters using an "interactive" scanner. + - Fixed bug in using yymore() at the end of a file. + - Fixed bug in misrecognizing rules with variable trailing context. + - Fixed bug compiling flex on Suns using gcc 2. + - Fixed bug in not recognizing that input files with the character + ASCII 128 in them require the -8 flag. + - Fixed bug that could cause an infinite loop writing out + error messages. + - Fixed bug in not recognizing old-style lex % declarations if + followed by a tab instead of a space. + - Fixed potential crash when flex terminated early (usually due + to a bad flag) and the -v flag had been given. + - Added some missing declarations of void functions. + - Changed to only use '\a' for __STDC__ compilers. + - Updated mailing addresses. + + Changes between 2.3 Patch #7 (28Mar91) and 2.3 Patch #6: - Fixed out-of-bounds array access that caused bad tables -- cgit v1.2.3 From 38bb4c0fcd54539764cf2db73d3cc58d60cdc579 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Apr 1993 20:38:09 +0000 Subject: Fixed bug in description of backtracking --- flex.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flex.1 b/flex.1 index 3ed69be..1fd184d 100644 --- a/flex.1 +++ b/flex.1 @@ -1782,9 +1782,9 @@ have to back up to simply match the 'f' (by the default rule). .PP The comment regarding State #8 indicates there's a problem when "foob" has been scanned. Indeed, on any character other -than a 'b', the scanner will have to back up to accept "foo". +than an 'a', the scanner will have to back up to accept "foo". Similarly, the comment for State #9 concerns when "fooba" has -been scanned. +been scanned and an 'r' does not follow. .PP The final comment reminds us that there's no point going to all the trouble of removing backtracking from the rules unless -- cgit v1.2.3 From da65ac15e3bf4e7391dc967aed49394798d21bf5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 14 Apr 1993 22:41:35 +0000 Subject: Reformatting. --- ccl.c | 162 +++-- dfa.c | 1540 +++++++++++++++++++++++------------------------ ecs.c | 282 +++++---- flex.skl | 784 ++++++++++++------------ flexdef.h | 301 +++++----- gen.c | 1974 +++++++++++++++++++++++++++++++------------------------------ libmain.c | 7 +- main.c | 1261 ++++++++++++++++++++------------------- misc.c | 942 +++++++++++++---------------- nfa.c | 727 +++++++++++------------ parse.y | 574 +++++++++--------- scan.l | 202 +++---- sym.c | 310 ++++------ tblcmp.c | 1137 +++++++++++++++++------------------ yylex.c | 330 +++++------ 15 files changed, 5134 insertions(+), 5399 deletions(-) diff --git a/ccl.c b/ccl.c index f17fa1b..77adfae 100644 --- a/ccl.c +++ b/ccl.c @@ -33,143 +33,119 @@ static char rcsid[] = #include "flexdef.h" -/* ccladd - add a single character to a ccl - * - * synopsis - * int cclp; - * int ch; - * ccladd( cclp, ch ); - */ +/* ccladd - add a single character to a ccl */ void ccladd( cclp, ch ) int cclp; int ch; + { + int ind, len, newpos, i; - { - int ind, len, newpos, i; + len = ccllen[cclp]; + ind = cclmap[cclp]; - len = ccllen[cclp]; - ind = cclmap[cclp]; + /* check to see if the character is already in the ccl */ - /* check to see if the character is already in the ccl */ + for ( i = 0; i < len; ++i ) + if ( ccltbl[ind + i] == ch ) + return; - for ( i = 0; i < len; ++i ) - if ( ccltbl[ind + i] == ch ) - return; + newpos = ind + len; - newpos = ind + len; + if ( newpos >= current_max_ccl_tbl_size ) + { + current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; - if ( newpos >= current_max_ccl_tbl_size ) - { - current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; + ++num_reallocs; - ++num_reallocs; + ccltbl = reallocate_Character_array( ccltbl, + current_max_ccl_tbl_size ); + } - ccltbl = reallocate_Character_array( ccltbl, current_max_ccl_tbl_size ); + ccllen[cclp] = len + 1; + ccltbl[newpos] = ch; } - ccllen[cclp] = len + 1; - ccltbl[newpos] = ch; - } - -/* cclinit - make an empty ccl - * - * synopsis - * int cclinit(); - * new_ccl = cclinit(); - */ +/* cclinit - return an empty ccl */ int cclinit() - - { - if ( ++lastccl >= current_maxccls ) { - current_maxccls += MAX_CCLS_INCREMENT; + if ( ++lastccl >= current_maxccls ) + { + current_maxccls += MAX_CCLS_INCREMENT; - ++num_reallocs; + ++num_reallocs; - cclmap = reallocate_integer_array( cclmap, current_maxccls ); - ccllen = reallocate_integer_array( ccllen, current_maxccls ); - cclng = reallocate_integer_array( cclng, current_maxccls ); - } + cclmap = reallocate_integer_array( cclmap, current_maxccls ); + ccllen = reallocate_integer_array( ccllen, current_maxccls ); + cclng = reallocate_integer_array( cclng, current_maxccls ); + } - if ( lastccl == 1 ) - /* we're making the first ccl */ - cclmap[lastccl] = 0; + if ( lastccl == 1 ) + /* we're making the first ccl */ + cclmap[lastccl] = 0; - else - /* the new pointer is just past the end of the last ccl. Since - * the cclmap points to the \first/ character of a ccl, adding the - * length of the ccl to the cclmap pointer will produce a cursor - * to the first free space - */ - cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; + else + /* The new pointer is just past the end of the last ccl. + * Since the cclmap points to the \first/ character of a + * ccl, adding the length of the ccl to the cclmap pointer + * will produce a cursor to the first free space. + */ + cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; - ccllen[lastccl] = 0; - cclng[lastccl] = 0; /* ccl's start out life un-negated */ + ccllen[lastccl] = 0; + cclng[lastccl] = 0; /* ccl's start out life un-negated */ - return ( lastccl ); - } + return lastccl; + } -/* cclnegate - negate a ccl - * - * synopsis - * int cclp; - * cclnegate( ccl ); - */ +/* cclnegate - negate the given ccl */ void cclnegate( cclp ) int cclp; - - { - cclng[cclp] = 1; - } + { + cclng[cclp] = 1; + } /* list_character_set - list the members of a set of characters in CCL form * - * synopsis - * int cset[CSIZE]; - * FILE *file; - * list_character_set( cset ); - * - * writes to the given file a character-class representation of those - * characters present in the given set. A character is present if it - * has a non-zero value in the set array. + * Writes to the given file a character-class representation of those + * characters present in the given CCL. A character is present if it + * has a non-zero value in the cset array. */ void list_character_set( file, cset ) FILE *file; int cset[]; + { + char *readable_form(); + register int i; - { - register int i; - char *readable_form(); + putc( '[', file ); - putc( '[', file ); + for ( i = 0; i < csize; ++i ) + { + if ( cset[i] ) + { + register int start_char = i; - for ( i = 0; i < csize; ++i ) - { - if ( cset[i] ) - { - register int start_char = i; + putc( ' ', file ); - putc( ' ', file ); + fputs( readable_form( i ), file ); - fputs( readable_form( i ), file ); + while ( ++i < csize && cset[i] ) + ; - while ( ++i < csize && cset[i] ) - ; + if ( i - 1 > start_char ) + /* this was a run */ + fprintf( file, "-%s", readable_form( i - 1 ) ); - if ( i - 1 > start_char ) - /* this was a run */ - fprintf( file, "-%s", readable_form( i - 1 ) ); + putc( ' ', file ); + } + } - putc( ' ', file ); - } + putc( ']', file ); } - - putc( ']', file ); - } diff --git a/dfa.c b/dfa.c index 884f7e3..f91c193 100644 --- a/dfa.c +++ b/dfa.c @@ -45,55 +45,55 @@ int symfollowset PROTO((int[], int, int, int[])); /* check_for_backtracking - check a DFA state for backtracking * * synopsis - * int ds, state[numecs]; - * check_for_backtracking( ds, state ); + * void check_for_backtracking( int ds, int state[numecs] ); * * ds is the number of the state to check and state[] is its out-transitions, - * indexed by equivalence class, and state_rules[] is the set of rules - * associated with this state + * indexed by equivalence class. */ void check_for_backtracking( ds, state ) int ds; int state[]; + { + if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state ) + { /* state is non-accepting */ + ++num_backtracking; - { - if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state ) - { /* state is non-accepting */ - ++num_backtracking; - - if ( backtrack_report ) - { - fprintf( backtrack_file, "State #%d is non-accepting -\n", ds ); + if ( backtrack_report ) + { + fprintf( backtrack_file, + "State #%d is non-accepting -\n", ds ); - /* identify the state */ - dump_associated_rules( backtrack_file, ds ); + /* identify the state */ + dump_associated_rules( backtrack_file, ds ); - /* now identify it further using the out- and jam-transitions */ - dump_transitions( backtrack_file, state ); + /* Now identify it further using the out- and + * jam-transitions. + */ + dump_transitions( backtrack_file, state ); - putc( '\n', backtrack_file ); - } + putc( '\n', backtrack_file ); + } + } } - } /* check_trailing_context - check to see if NFA state set constitutes * "dangerous" trailing context * * synopsis - * int nfa_states[num_states+1], num_states; - * int accset[nacc+1], nacc; - * check_trailing_context( nfa_states, num_states, accset, nacc ); + * void check_trailing_context( int nfa_states[num_states+1], int num_states, + * int accset[nacc+1], int nacc ); * * NOTES - * Trailing context is "dangerous" if both the head and the trailing + * Trailing context is "dangerous" if both the head and the trailing * part are of variable size \and/ there's a DFA state which contains * both an accepting state for the head part of the rule and NFA states * which occur after the beginning of the trailing context. + * * When such a rule is matched, it's impossible to tell if having been - * in the DFA state indicates the beginning of the trailing context - * or further-along scanning of the pattern. In these cases, a warning + * in the DFA state indicates the beginning of the trailing context or + * further-along scanning of the pattern. In these cases, a warning * message is issued. * * nfa_states[1 .. num_states] is the list of NFA states in the DFA. @@ -104,101 +104,94 @@ void check_trailing_context( nfa_states, num_states, accset, nacc ) int *nfa_states, num_states; int *accset; register int nacc; + { + register int i, j; - { - register int i, j; + for ( i = 1; i <= num_states; ++i ) + { + int ns = nfa_states[i]; + register int type = state_type[ns]; + register int ar = assoc_rule[ns]; - for ( i = 1; i <= num_states; ++i ) - { - int ns = nfa_states[i]; - register int type = state_type[ns]; - register int ar = assoc_rule[ns]; - - if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) - { /* do nothing */ - } - - else if ( type == STATE_TRAILING_CONTEXT ) - { - /* potential trouble. Scan set of accepting numbers for - * the one marking the end of the "head". We assume that - * this looping will be fairly cheap since it's rare that - * an accepting number set is large. - */ - for ( j = 1; j <= nacc; ++j ) - if ( accset[j] & YY_TRAILING_HEAD_MASK ) - { - line_warning( "dangerous trailing context", - rule_linenum[ar] ); - return; - } - } + if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) + { /* do nothing */ + } + + else if ( type == STATE_TRAILING_CONTEXT ) + { + /* Potential trouble. Scan set of accepting numbers + * for the one marking the end of the "head". We + * assume that this looping will be fairly cheap + * since it's rare that an accepting number set + * is large. + */ + for ( j = 1; j <= nacc; ++j ) + if ( accset[j] & YY_TRAILING_HEAD_MASK ) + { + line_warning( + "dangerous trailing context", + rule_linenum[ar] ); + return; + } + } + } } - } /* dump_associated_rules - list the rules associated with a DFA state * - * synopsis - * int ds; - * FILE *file; - * dump_associated_rules( file, ds ); - * - * goes through the set of NFA states associated with the DFA and + * Goes through the set of NFA states associated with the DFA and * extracts the first MAX_ASSOC_RULES unique rules, sorts them, - * and writes a report to the given file + * and writes a report to the given file. */ void dump_associated_rules( file, ds ) FILE *file; int ds; - - { - register int i, j; - register int num_associated_rules = 0; - int rule_set[MAX_ASSOC_RULES + 1]; - int *dset = dss[ds]; - int size = dfasiz[ds]; - - for ( i = 1; i <= size; ++i ) { - register int rule_num = rule_linenum[assoc_rule[dset[i]]]; + register int i, j; + register int num_associated_rules = 0; + int rule_set[MAX_ASSOC_RULES + 1]; + int *dset = dss[ds]; + int size = dfasiz[ds]; - for ( j = 1; j <= num_associated_rules; ++j ) - if ( rule_num == rule_set[j] ) - break; + for ( i = 1; i <= size; ++i ) + { + register int rule_num = rule_linenum[assoc_rule[dset[i]]]; - if ( j > num_associated_rules ) - { /* new rule */ - if ( num_associated_rules < MAX_ASSOC_RULES ) - rule_set[++num_associated_rules] = rule_num; - } - } + for ( j = 1; j <= num_associated_rules; ++j ) + if ( rule_num == rule_set[j] ) + break; - bubble( rule_set, num_associated_rules ); + if ( j > num_associated_rules ) + { /* new rule */ + if ( num_associated_rules < MAX_ASSOC_RULES ) + rule_set[++num_associated_rules] = rule_num; + } + } - fprintf( file, " associated rule line numbers:" ); + bubble( rule_set, num_associated_rules ); - for ( i = 1; i <= num_associated_rules; ++i ) - { - if ( i % 8 == 1 ) - putc( '\n', file ); - - fprintf( file, "\t%d", rule_set[i] ); + fprintf( file, " associated rule line numbers:" ); + + for ( i = 1; i <= num_associated_rules; ++i ) + { + if ( i % 8 == 1 ) + putc( '\n', file ); + + fprintf( file, "\t%d", rule_set[i] ); + } + + putc( '\n', file ); } - - putc( '\n', file ); - } /* dump_transitions - list the transitions associated with a DFA state * * synopsis - * int state[numecs]; - * FILE *file; - * dump_transitions( file, state ); + * dump_transitions( FILE *file, int state[numecs] ); * - * goes through the set of out-transitions and lists them in human-readable + * Goes through the set of out-transitions and lists them in human-readable * form (i.e., not as equivalence classes); also lists jam transitions * (i.e., all those which are not out-transitions, plus EOF). The dump * is done to the given file. @@ -207,870 +200,893 @@ int ds; void dump_transitions( file, state ) FILE *file; int state[]; + { + register int i, ec; + int out_char_set[CSIZE]; - { - register int i, ec; - int out_char_set[CSIZE]; + for ( i = 0; i < csize; ++i ) + { + ec = abs( ecgroup[i] ); + out_char_set[i] = state[ec]; + } - for ( i = 0; i < csize; ++i ) - { - ec = abs( ecgroup[i] ); - out_char_set[i] = state[ec]; - } - - fprintf( file, " out-transitions: " ); + fprintf( file, " out-transitions: " ); - list_character_set( file, out_char_set ); + list_character_set( file, out_char_set ); - /* now invert the members of the set to get the jam transitions */ - for ( i = 0; i < csize; ++i ) - out_char_set[i] = ! out_char_set[i]; + /* now invert the members of the set to get the jam transitions */ + for ( i = 0; i < csize; ++i ) + out_char_set[i] = ! out_char_set[i]; - fprintf( file, "\n jam-transitions: EOF " ); + fprintf( file, "\n jam-transitions: EOF " ); - list_character_set( file, out_char_set ); + list_character_set( file, out_char_set ); - putc( '\n', file ); - } + putc( '\n', file ); + } /* epsclosure - construct the epsilon closure of a set of ndfa states * * synopsis - * int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc; - * int hashval; - * int *epsclosure(); - * t = epsclosure( t, &numstates, accset, &nacc, &hashval ); + * int *epsclosure( int t[num_states], int *numstates_addr, + * int accset[num_rules+1], int *nacc_addr, + * int *hashval_addr ); * * NOTES - * the epsilon closure is the set of all states reachable by an arbitrary - * number of epsilon transitions which themselves do not have epsilon + * The epsilon closure is the set of all states reachable by an arbitrary + * number of epsilon transitions, which themselves do not have epsilon * transitions going out, unioned with the set of states which have non-null * accepting numbers. t is an array of size numstates of nfa state numbers. - * Upon return, t holds the epsilon closure and numstates is updated. accset - * holds a list of the accepting numbers, and the size of accset is given - * by nacc. t may be subjected to reallocation if it is not large enough - * to hold the epsilon closure. + * Upon return, t holds the epsilon closure and *numstates_addr is updated. + * accset holds a list of the accepting numbers, and the size of accset is + * given by *nacc_addr. t may be subjected to reallocation if it is not + * large enough to hold the epsilon closure. * - * hashval is the hash value for the dfa corresponding to the state set + * hashval is the hash value for the dfa corresponding to the state set. */ int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; - - { - register int stkpos, ns, tsp; - int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; - int stkend, nstate; - static int did_stk_init = false, *stk; + { + register int stkpos, ns, tsp; + int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; + int stkend, nstate; + static int did_stk_init = false, *stk; #define MARK_STATE(state) \ - trans1[state] = trans1[state] - MARKER_DIFFERENCE; +trans1[state] = trans1[state] - MARKER_DIFFERENCE; #define IS_MARKED(state) (trans1[state] < 0) #define UNMARK_STATE(state) \ - trans1[state] = trans1[state] + MARKER_DIFFERENCE; +trans1[state] = trans1[state] + MARKER_DIFFERENCE; #define CHECK_ACCEPT(state) \ - { \ - nfaccnum = accptnum[state]; \ - if ( nfaccnum != NIL ) \ - accset[++nacc] = nfaccnum; \ - } +{ \ +nfaccnum = accptnum[state]; \ +if ( nfaccnum != NIL ) \ +accset[++nacc] = nfaccnum; \ +} #define DO_REALLOCATION \ - { \ - current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ - ++num_reallocs; \ - t = reallocate_integer_array( t, current_max_dfa_size ); \ - stk = reallocate_integer_array( stk, current_max_dfa_size ); \ - } \ +{ \ +current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ +++num_reallocs; \ +t = reallocate_integer_array( t, current_max_dfa_size ); \ +stk = reallocate_integer_array( stk, current_max_dfa_size ); \ +} \ #define PUT_ON_STACK(state) \ - { \ - if ( ++stkend >= current_max_dfa_size ) \ - DO_REALLOCATION \ - stk[stkend] = state; \ - MARK_STATE(state) \ - } +{ \ +if ( ++stkend >= current_max_dfa_size ) \ +DO_REALLOCATION \ +stk[stkend] = state; \ +MARK_STATE(state) \ +} #define ADD_STATE(state) \ - { \ - if ( ++numstates >= current_max_dfa_size ) \ - DO_REALLOCATION \ - t[numstates] = state; \ - hashval = hashval + state; \ - } +{ \ +if ( ++numstates >= current_max_dfa_size ) \ +DO_REALLOCATION \ +t[numstates] = state; \ +hashval = hashval + state; \ +} #define STACK_STATE(state) \ - { \ - PUT_ON_STACK(state) \ - CHECK_ACCEPT(state) \ - if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ - ADD_STATE(state) \ - } +{ \ +PUT_ON_STACK(state) \ +CHECK_ACCEPT(state) \ +if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ +ADD_STATE(state) \ +} - if ( ! did_stk_init ) - { - stk = allocate_integer_array( current_max_dfa_size ); - did_stk_init = true; - } - nacc = stkend = hashval = 0; - - for ( nstate = 1; nstate <= numstates; ++nstate ) - { - ns = t[nstate]; + if ( ! did_stk_init ) + { + stk = allocate_integer_array( current_max_dfa_size ); + did_stk_init = true; + } - /* the state could be marked if we've already pushed it onto - * the stack - */ - if ( ! IS_MARKED(ns) ) - PUT_ON_STACK(ns) + nacc = stkend = hashval = 0; - CHECK_ACCEPT(ns) - hashval = hashval + ns; - } + for ( nstate = 1; nstate <= numstates; ++nstate ) + { + ns = t[nstate]; - for ( stkpos = 1; stkpos <= stkend; ++stkpos ) - { - ns = stk[stkpos]; - transsym = transchar[ns]; + /* The state could be marked if we've already pushed it onto + * the stack. + */ + if ( ! IS_MARKED(ns) ) + PUT_ON_STACK(ns) - if ( transsym == SYM_EPSILON ) - { - tsp = trans1[ns] + MARKER_DIFFERENCE; + CHECK_ACCEPT(ns) + hashval = hashval + ns; + } - if ( tsp != NO_TRANSITION ) + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) { - if ( ! IS_MARKED(tsp) ) - STACK_STATE(tsp) + ns = stk[stkpos]; + transsym = transchar[ns]; + + if ( transsym == SYM_EPSILON ) + { + tsp = trans1[ns] + MARKER_DIFFERENCE; - tsp = trans2[ns]; + if ( tsp != NO_TRANSITION ) + { + if ( ! IS_MARKED(tsp) ) + STACK_STATE(tsp) - if ( tsp != NO_TRANSITION ) - if ( ! IS_MARKED(tsp) ) - STACK_STATE(tsp) + tsp = trans2[ns]; + + if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) ) + STACK_STATE(tsp) + } + } } - } - } - /* clear out "visit" markers */ + /* Clear out "visit" markers. */ - for ( stkpos = 1; stkpos <= stkend; ++stkpos ) - { - if ( IS_MARKED(stk[stkpos]) ) - { - UNMARK_STATE(stk[stkpos]) - } - else - flexfatal( "consistency check failed in epsclosure()" ); - } + for ( stkpos = 1; stkpos <= stkend; ++stkpos ) + { + if ( IS_MARKED(stk[stkpos]) ) + UNMARK_STATE(stk[stkpos]) + else + flexfatal( "consistency check failed in epsclosure()" ); + } - *ns_addr = numstates; - *hv_addr = hashval; - *nacc_addr = nacc; + *ns_addr = numstates; + *hv_addr = hashval; + *nacc_addr = nacc; - return ( t ); - } + return t; + } /* increase_max_dfas - increase the maximum number of DFAs */ void increase_max_dfas() + { + current_max_dfas += MAX_DFAS_INCREMENT; - { - current_max_dfas += MAX_DFAS_INCREMENT; - - ++num_reallocs; + ++num_reallocs; - base = reallocate_integer_array( base, current_max_dfas ); - def = reallocate_integer_array( def, current_max_dfas ); - dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); - accsiz = reallocate_integer_array( accsiz, current_max_dfas ); - dhash = reallocate_integer_array( dhash, current_max_dfas ); - dss = reallocate_int_ptr_array( dss, current_max_dfas ); - dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); + base = reallocate_integer_array( base, current_max_dfas ); + def = reallocate_integer_array( def, current_max_dfas ); + dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); + accsiz = reallocate_integer_array( accsiz, current_max_dfas ); + dhash = reallocate_integer_array( dhash, current_max_dfas ); + dss = reallocate_int_ptr_array( dss, current_max_dfas ); + dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); - if ( nultrans ) - nultrans = reallocate_integer_array( nultrans, current_max_dfas ); - } + if ( nultrans ) + nultrans = + reallocate_integer_array( nultrans, current_max_dfas ); + } /* ntod - convert an ndfa to a dfa * - * synopsis - * ntod(); - * - * creates the dfa corresponding to the ndfa we've constructed. the - * dfa starts out in state #1. + * Creates the dfa corresponding to the ndfa we've constructed. The + * dfa starts out in state #1. */ void ntod() - - { - int *accset, ds, nacc, newds; - int sym, hashval, numstates, dsize; - int num_full_table_rows; /* used only for -f */ - int *nset, *dset; - int targptr, totaltrans, i, comstate, comfreq, targ; - int *epsclosure(), snstods(), symlist[CSIZE + 1]; - int num_start_states; - int todo_head, todo_next; - - /* note that the following are indexed by *equivalence classes* - * and not by characters. Since equivalence classes are indexed - * beginning with 1, even if the scanner accepts NUL's, this - * means that (since every character is potentially in its own - * equivalence class) these arrays must have room for indices - * from 1 to CSIZE, so their size must be CSIZE + 1. - */ - int duplist[CSIZE + 1], state[CSIZE + 1]; - int targfreq[CSIZE + 1], targstate[CSIZE + 1]; - - /* this is so find_table_space(...) will know where to start looking in - * chk/nxt for unused records for space to put in the state - */ - if ( fullspd ) - firstfree = 0; - - accset = allocate_integer_array( num_rules + 1 ); - nset = allocate_integer_array( current_max_dfa_size ); - - /* the "todo" queue is represented by the head, which is the DFA - * state currently being processed, and the "next", which is the - * next DFA state number available (not in use). We depend on the - * fact that snstods() returns DFA's \in increasing order/, and thus - * need only know the bounds of the dfas to be processed. - */ - todo_head = todo_next = 0; - - for ( i = 0; i <= csize; ++i ) { - duplist[i] = NIL; - symlist[i] = false; - } + int *accset, ds, nacc, newds; + int sym, hashval, numstates, dsize; + int num_full_table_rows; /* used only for -f */ + int *nset, *dset; + int targptr, totaltrans, i, comstate, comfreq, targ; + int *epsclosure(), snstods(), symlist[CSIZE + 1]; + int num_start_states; + int todo_head, todo_next; + + /* Note that the following are indexed by *equivalence classes* + * and not by characters. Since equivalence classes are indexed + * beginning with 1, even if the scanner accepts NUL's, this + * means that (since every character is potentially in its own + * equivalence class) these arrays must have room for indices + * from 1 to CSIZE, so their size must be CSIZE + 1. + */ + int duplist[CSIZE + 1], state[CSIZE + 1]; + int targfreq[CSIZE + 1], targstate[CSIZE + 1]; - for ( i = 0; i <= num_rules; ++i ) - accset[i] = NIL; + /* This is so find_table_space(...) will know where to start looking + * in chk/nxt for unused records for space to put in the state + */ + if ( fullspd ) + firstfree = 0; - if ( trace ) - { - dumpnfa( scset[1] ); - fputs( "\n\nDFA Dump:\n\n", stderr ); - } + accset = allocate_integer_array( num_rules + 1 ); + nset = allocate_integer_array( current_max_dfa_size ); - inittbl(); - - /* Check to see whether we should build a separate table for transitions - * on NUL characters. We don't do this for full-speed (-F) scanners, - * since for them we don't have a simple state number lying around with - * which to index the table. We also don't bother doing it for scanners - * unless (1) NUL is in its own equivalence class (indicated by a - * positive value of ecgroup[NUL]), (2) NUL's equivalence class is - * the last equivalence class, and (3) the number of equivalence classes - * is the same as the number of characters. This latter case comes about - * when useecs is false or when its true but every character still - * manages to land in its own class (unlikely, but it's cheap to check - * for). If all these things are true then the character code needed - * to represent NUL's equivalence class for indexing the tables is - * going to take one more bit than the number of characters, and therefore - * we won't be assured of being able to fit it into a YY_CHAR variable. - * This rules out storing the transitions in a compressed table, since - * the code for interpreting them uses a YY_CHAR variable (perhaps it - * should just use an integer, though; this is worth pondering ... ###). - * - * Finally, for full tables, we want the number of entries in the - * table to be a power of two so the array references go fast (it - * will just take a shift to compute the major index). If encoding - * NUL's transitions in the table will spoil this, we give it its - * own table (note that this will be the case if we're not using - * equivalence classes). - */ - - /* note that the test for ecgroup[0] == numecs below accomplishes - * both (1) and (2) above - */ - if ( ! fullspd && ecgroup[0] == numecs ) - { /* NUL is alone in its equivalence class, which is the last one */ - int use_NUL_table = (numecs == csize); - - if ( fulltbl && ! use_NUL_table ) - { /* we still may want to use the table if numecs is a power of 2 */ - int power_of_two; - - for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 ) - if ( numecs == power_of_two ) - { - use_NUL_table = true; - break; - } - } - - if ( use_NUL_table ) - nultrans = allocate_integer_array( current_max_dfas ); - /* from now on, nultrans != nil indicates that we're - * saving null transitions for later, separate encoding - */ - } + /* The "todo" queue is represented by the head, which is the DFA + * state currently being processed, and the "next", which is the + * next DFA state number available (not in use). We depend on the + * fact that snstods() returns DFA's \in increasing order/, and thus + * need only know the bounds of the dfas to be processed. + */ + todo_head = todo_next = 0; + for ( i = 0; i <= csize; ++i ) + { + duplist[i] = NIL; + symlist[i] = false; + } - if ( fullspd ) - { - for ( i = 0; i <= numecs; ++i ) - state[i] = 0; - place_state( state, 0, 0 ); - } + for ( i = 0; i <= num_rules; ++i ) + accset[i] = NIL; - else if ( fulltbl ) - { - if ( nultrans ) - /* we won't be including NUL's transitions in the table, - * so build it for entries from 0 .. numecs - 1 - */ - num_full_table_rows = numecs; + if ( trace ) + { + dumpnfa( scset[1] ); + fputs( "\n\nDFA Dump:\n\n", stderr ); + } - else - /* take into account the fact that we'll be including - * the NUL entries in the transition table. Build it - * from 0 .. numecs. - */ - num_full_table_rows = numecs + 1; - - /* declare it "short" because it's a real long-shot that that - * won't be large enough. + inittbl(); + + /* Check to see whether we should build a separate table for + * transitions on NUL characters. We don't do this for full-speed + * (-F) scanners, since for them we don't have a simple state + * number lying around with which to index the table. We also + * don't bother doing it for scanners unless (1) NUL is in its own + * equivalence class (indicated by a positive value of + * ecgroup[NUL]), (2) NUL's equivalence class is the last + * equivalence class, and (3) the number of equivalence classes is + * the same as the number of characters. This latter case comes + * about when useecs is false or when its true but every character + * still manages to land in its own class (unlikely, but it's + * cheap to check for). If all these things are true then the + * character code needed to represent NUL's equivalence class for + * indexing the tables is going to take one more bit than the + * number of characters, and therefore we won't be assured of + * being able to fit it into a YY_CHAR variable. This rules out + * storing the transitions in a compressed table, since the code + * for interpreting them uses a YY_CHAR variable (perhaps it + * should just use an integer, though; this is worth pondering ... + * ###). + * + * Finally, for full tables, we want the number of entries in the + * table to be a power of two so the array references go fast (it + * will just take a shift to compute the major index). If + * encoding NUL's transitions in the table will spoil this, we + * give it its own table (note that this will be the case if we're + * not using equivalence classes). */ - printf( "static short int yy_nxt[][%d] =\n {\n", - /* '}' so vi doesn't get too confused */ - num_full_table_rows ); - - /* generate 0 entries for state #0 */ - for ( i = 0; i < num_full_table_rows; ++i ) - mk2data( 0 ); - /* force ',' and dataflush() next call to mk2data */ - datapos = NUMDATAITEMS; + /* Note that the test for ecgroup[0] == numecs below accomplishes + * both (1) and (2) above + */ + if ( ! fullspd && ecgroup[0] == numecs ) + { + /* NUL is alone in its equivalence class, which is the + * last one. + */ + int use_NUL_table = (numecs == csize); - /* force extra blank line next dataflush() */ - dataline = NUMDATALINES; - } + if ( fulltbl && ! use_NUL_table ) + { + /* We still may want to use the table if numecs + * is a power of 2. + */ + int power_of_two; + + for ( power_of_two = 1; power_of_two <= csize; + power_of_two *= 2 ) + if ( numecs == power_of_two ) + { + use_NUL_table = true; + break; + } + } - /* create the first states */ + if ( use_NUL_table ) + nultrans = allocate_integer_array( current_max_dfas ); - num_start_states = lastsc * 2; + /* From now on, nultrans != nil indicates that we're + * saving null transitions for later, separate encoding. + */ + } - for ( i = 1; i <= num_start_states; ++i ) - { - numstates = 1; - /* for each start condition, make one state for the case when - * we're at the beginning of the line (the '^' operator) and - * one for the case when we're not - */ - if ( i % 2 == 1 ) - nset[numstates] = scset[(i / 2) + 1]; - else - nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] ); + if ( fullspd ) + { + for ( i = 0; i <= numecs; ++i ) + state[i] = 0; + place_state( state, 0, 0 ); + } - nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); + else if ( fulltbl ) + { + if ( nultrans ) + /* We won't be including NUL's transitions in the + * table, so build it for entries from 0 .. numecs - 1. + */ + num_full_table_rows = numecs; - if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) - { - numas += nacc; - totnst += numstates; - ++todo_next; + else + /* Take into account the fact that we'll be including + * the NUL entries in the transition table. Build it + * from 0 .. numecs. + */ + num_full_table_rows = numecs + 1; + + /* Declare it "short" because it's a real long-shot that that + * won't be large enough. + */ + printf( "static short int yy_nxt[][%d] =\n {\n", + /* '}' so vi doesn't get too confused */ + num_full_table_rows ); + + /* Generate 0 entries for state #0. */ + for ( i = 0; i < num_full_table_rows; ++i ) + mk2data( 0 ); + + /* Force ',' and dataflush() next call to mk2data().*/ + datapos = NUMDATAITEMS; + + /* Force extra blank line next dataflush(). */ + dataline = NUMDATALINES; + } - if ( variable_trailing_context_rules && nacc > 0 ) - check_trailing_context( nset, numstates, accset, nacc ); - } - } + /* Create the first states. */ - if ( ! fullspd ) - { - if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) - flexfatal( "could not create unique end-of-buffer state" ); + num_start_states = lastsc * 2; - ++numas; - ++num_start_states; - ++todo_next; - } + for ( i = 1; i <= num_start_states; ++i ) + { + numstates = 1; + + /* For each start condition, make one state for the case when + * we're at the beginning of the line (the '^' operator) and + * one for the case when we're not. + */ + if ( i % 2 == 1 ) + nset[numstates] = scset[(i / 2) + 1]; + else + nset[numstates] = + mkbranch( scbol[i / 2], scset[i / 2] ); - while ( todo_head < todo_next ) - { - targptr = 0; - totaltrans = 0; + nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); - for ( i = 1; i <= numecs; ++i ) - state[i] = 0; - - ds = ++todo_head; + if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) + { + numas += nacc; + totnst += numstates; + ++todo_next; - dset = dss[ds]; - dsize = dfasiz[ds]; + if ( variable_trailing_context_rules && nacc > 0 ) + check_trailing_context( nset, numstates, + accset, nacc ); + } + } - if ( trace ) - fprintf( stderr, "state # %d:\n", ds ); + if ( ! fullspd ) + { + if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) + flexfatal( + "could not create unique end-of-buffer state" ); - sympartition( dset, dsize, symlist, duplist ); + ++numas; + ++num_start_states; + ++todo_next; + } - for ( sym = 1; sym <= numecs; ++sym ) - { - if ( symlist[sym] ) + while ( todo_head < todo_next ) { - symlist[sym] = 0; + targptr = 0; + totaltrans = 0; - if ( duplist[sym] == NIL ) - { /* symbol has unique out-transitions */ - numstates = symfollowset( dset, dsize, sym, nset ); - nset = epsclosure( nset, &numstates, accset, - &nacc, &hashval ); + for ( i = 1; i <= numecs; ++i ) + state[i] = 0; - if ( snstods( nset, numstates, accset, - nacc, hashval, &newds ) ) - { - totnst = totnst + numstates; - ++todo_next; - numas += nacc; + ds = ++todo_head; - if ( variable_trailing_context_rules && nacc > 0 ) - check_trailing_context( nset, numstates, - accset, nacc ); - } + dset = dss[ds]; + dsize = dfasiz[ds]; - state[sym] = newds; + if ( trace ) + fprintf( stderr, "state # %d:\n", ds ); - if ( trace ) - fprintf( stderr, "\t%d\t%d\n", sym, newds ); + sympartition( dset, dsize, symlist, duplist ); - targfreq[++targptr] = 1; - targstate[targptr] = newds; - ++numuniq; - } + for ( sym = 1; sym <= numecs; ++sym ) + { + if ( symlist[sym] ) + { + symlist[sym] = 0; + + if ( duplist[sym] == NIL ) + { + /* Symbol has unique out-transitions. */ + numstates = symfollowset( dset, dsize, + sym, nset ); + nset = epsclosure( nset, &numstates, + accset, &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, + nacc, hashval, &newds ) ) + { + totnst = totnst + numstates; + ++todo_next; + numas += nacc; + + if ( + variable_trailing_context_rules && + nacc > 0 ) + check_trailing_context( + nset, numstates, + accset, nacc ); + } + + state[sym] = newds; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", + sym, newds ); + + targfreq[++targptr] = 1; + targstate[targptr] = newds; + ++numuniq; + } + + else + { + /* sym's equivalence class has the same + * transitions as duplist(sym)'s + * equivalence class. + */ + targ = state[duplist[sym]]; + state[sym] = targ; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", + sym, targ ); + + /* Update frequency count for + * destination state. + */ + + i = 0; + while ( targstate[++i] != targ ) + ; + + ++targfreq[i]; + ++numdup; + } + + ++totaltrans; + duplist[sym] = NIL; + } + } - else - { - /* sym's equivalence class has the same transitions - * as duplist(sym)'s equivalence class - */ - targ = state[duplist[sym]]; - state[sym] = targ; + numsnpairs = numsnpairs + totaltrans; + + if ( caseins && ! useecs ) + { + register int j; - if ( trace ) - fprintf( stderr, "\t%d\t%d\n", sym, targ ); + for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) + state[i] = state[j]; + } - /* update frequency count for destination state */ + if ( ds > num_start_states ) + check_for_backtracking( ds, state ); - i = 0; - while ( targstate[++i] != targ ) - ; + if ( nultrans ) + { + nultrans[ds] = state[NUL_ec]; + state[NUL_ec] = 0; /* remove transition */ + } - ++targfreq[i]; - ++numdup; - } + if ( fulltbl ) + { + /* Supply array's 0-element. */ + if ( ds == end_of_buffer_state ) + mk2data( -end_of_buffer_state ); + else + mk2data( end_of_buffer_state ); + + for ( i = 1; i < num_full_table_rows; ++i ) + /* Jams are marked by negative of state + * number. + */ + mk2data( state[i] ? state[i] : -ds ); + + /* Force ',' and dataflush() next call to mk2data().*/ + datapos = NUMDATAITEMS; + + /* Force extra blank line next dataflush(). */ + dataline = NUMDATALINES; + } - ++totaltrans; - duplist[sym] = NIL; - } - } + else if ( fullspd ) + place_state( state, ds, totaltrans ); - numsnpairs = numsnpairs + totaltrans; + else if ( ds == end_of_buffer_state ) + /* Special case this state to make sure it does what + * it's supposed to, i.e., jam on end-of-buffer. + */ + stack1( ds, 0, 0, JAMSTATE ); - if ( caseins && ! useecs ) - { - register int j; + else /* normal, compressed state */ + { + /* Determine which destination state is the most + * common, and how many transitions to it there are. + */ - for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) - state[i] = state[j]; - } + comfreq = 0; + comstate = 0; - if ( ds > num_start_states ) - check_for_backtracking( ds, state ); + for ( i = 1; i <= targptr; ++i ) + if ( targfreq[i] > comfreq ) + { + comfreq = targfreq[i]; + comstate = targstate[i]; + } - if ( nultrans ) - { - nultrans[ds] = state[NUL_ec]; - state[NUL_ec] = 0; /* remove transition */ - } + bldtbl( state, ds, totaltrans, comstate, comfreq ); + } + } if ( fulltbl ) - { - /* supply array's 0-element */ - if ( ds == end_of_buffer_state ) - mk2data( -end_of_buffer_state ); - else - mk2data( end_of_buffer_state ); - - for ( i = 1; i < num_full_table_rows; ++i ) - /* jams are marked by negative of state number */ - mk2data( state[i] ? state[i] : -ds ); - - /* force ',' and dataflush() next call to mk2data */ - datapos = NUMDATAITEMS; - - /* force extra blank line next dataflush() */ - dataline = NUMDATALINES; - } - - else if ( fullspd ) - place_state( state, ds, totaltrans ); - - else if ( ds == end_of_buffer_state ) - /* special case this state to make sure it does what it's - * supposed to, i.e., jam on end-of-buffer - */ - stack1( ds, 0, 0, JAMSTATE ); - - else /* normal, compressed state */ - { - /* determine which destination state is the most common, and - * how many transitions to it there are - */ - - comfreq = 0; - comstate = 0; - - for ( i = 1; i <= targptr; ++i ) - if ( targfreq[i] > comfreq ) - { - comfreq = targfreq[i]; - comstate = targstate[i]; - } - - bldtbl( state, ds, totaltrans, comstate, comfreq ); - } - } - - if ( fulltbl ) - dataend(); + dataend(); - else if ( ! fullspd ) - { - cmptmps(); /* create compressed template entries */ + else if ( ! fullspd ) + { + cmptmps(); /* create compressed template entries */ - /* create tables for all the states with only one out-transition */ - while ( onesp > 0 ) - { - mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], - onedef[onesp] ); - --onesp; - } + /* Create tables for all the states with only one + * out-transition. + */ + while ( onesp > 0 ) + { + mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], + onedef[onesp] ); + --onesp; + } - mkdeftbl(); + mkdeftbl(); + } } - } /* snstods - converts a set of ndfa states into a dfa state * * synopsis - * int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval; - * int snstods(); - * is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds ); + * is_new_state = snstods( int sns[numstates], int numstates, + * int accset[num_rules+1], int nacc, + * int hashval, int *newds_addr ); * - * on return, the dfa state number is in newds. + * On return, the dfa state number is in newds. */ int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) int sns[], numstates, accset[], nacc, hashval, *newds_addr; + { + int didsort = 0; + register int i, j; + int newds, *oldsns; - { - int didsort = 0; - register int i, j; - int newds, *oldsns; + for ( i = 1; i <= lastdfa; ++i ) + if ( hashval == dhash[i] ) + { + if ( numstates == dfasiz[i] ) + { + oldsns = dss[i]; + + if ( ! didsort ) + { + /* We sort the states in sns so we + * can compare it to oldsns quickly. + * We use bubble because there probably + * aren't very many states. + */ + bubble( sns, numstates ); + didsort = 1; + } + + for ( j = 1; j <= numstates; ++j ) + if ( sns[j] != oldsns[j] ) + break; + + if ( j > numstates ) + { + ++dfaeql; + *newds_addr = i; + return 0; + } + + ++hshcol; + } + + else + ++hshsave; + } - for ( i = 1; i <= lastdfa; ++i ) - if ( hashval == dhash[i] ) - { - if ( numstates == dfasiz[i] ) - { - oldsns = dss[i]; - - if ( ! didsort ) - { - /* we sort the states in sns so we can compare it to - * oldsns quickly. we use bubble because there probably - * aren't very many states - */ - bubble( sns, numstates ); - didsort = 1; - } - - for ( j = 1; j <= numstates; ++j ) - if ( sns[j] != oldsns[j] ) - break; - - if ( j > numstates ) - { - ++dfaeql; - *newds_addr = i; - return ( 0 ); - } - - ++hshcol; - } + /* Make a new dfa. */ - else - ++hshsave; - } + if ( ++lastdfa >= current_max_dfas ) + increase_max_dfas(); - /* make a new dfa */ + newds = lastdfa; - if ( ++lastdfa >= current_max_dfas ) - increase_max_dfas(); + dss[newds] = allocate_integer_array( numstates + 1 ); - newds = lastdfa; + /* If we haven't already sorted the states in sns, we do so now, + * so that future comparisons with it can be made quickly. + */ - dss[newds] = allocate_integer_array( numstates + 1 ); + if ( ! didsort ) + bubble( sns, numstates ); - /* if we haven't already sorted the states in sns, we do so now, so that - * future comparisons with it can be made quickly - */ + for ( i = 1; i <= numstates; ++i ) + dss[newds][i] = sns[i]; - if ( ! didsort ) - bubble( sns, numstates ); + dfasiz[newds] = numstates; + dhash[newds] = hashval; - for ( i = 1; i <= numstates; ++i ) - dss[newds][i] = sns[i]; + if ( nacc == 0 ) + { + if ( reject ) + dfaacc[newds].dfaacc_set = (int *) 0; + else + dfaacc[newds].dfaacc_state = 0; - dfasiz[newds] = numstates; - dhash[newds] = hashval; + accsiz[newds] = 0; + } - if ( nacc == 0 ) - { - if ( reject ) - dfaacc[newds].dfaacc_set = (int *) 0; - else - dfaacc[newds].dfaacc_state = 0; + else if ( reject ) + { + /* We sort the accepting set in increasing order so the + * disambiguating rule that the first rule listed is considered + * match in the event of ties will work. We use a bubble + * sort since the list is probably quite small. + */ - accsiz[newds] = 0; - } + bubble( accset, nacc ); - else if ( reject ) - { - /* we sort the accepting set in increasing order so the disambiguating - * rule that the first rule listed is considered match in the event of - * ties will work. We use a bubble sort since the list is probably - * quite small. - */ + dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); - bubble( accset, nacc ); + /* Save the accepting set for later */ + for ( i = 1; i <= nacc; ++i ) + { + dfaacc[newds].dfaacc_set[i] = accset[i]; - dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); + if ( accset[i] <= num_rules ) + /* Who knows, perhaps a REJECT can yield + * this rule. + */ + rule_useful[accset[i]] = true; + } - /* save the accepting set for later */ - for ( i = 1; i <= nacc; ++i ) - { - dfaacc[newds].dfaacc_set[i] = accset[i]; + accsiz[newds] = nacc; + } - if ( accset[i] <= num_rules ) - /* Who knows, perhaps a REJECT can yield this rule */ - rule_useful[accset[i]] = true; - } + else + { + /* Find lowest numbered rule so the disambiguating rule + * will work. + */ + j = num_rules + 1; - accsiz[newds] = nacc; - } + for ( i = 1; i <= nacc; ++i ) + if ( accset[i] < j ) + j = accset[i]; - else - { /* find lowest numbered rule so the disambiguating rule will work */ - j = num_rules + 1; + dfaacc[newds].dfaacc_state = j; - for ( i = 1; i <= nacc; ++i ) - if ( accset[i] < j ) - j = accset[i]; + if ( j <= num_rules ) + rule_useful[j] = true; + } - dfaacc[newds].dfaacc_state = j; + *newds_addr = newds; - if ( j <= num_rules ) - rule_useful[j] = true; + return 1; } - *newds_addr = newds; - - return ( 1 ); - } - /* symfollowset - follow the symbol transitions one step * * synopsis - * int ds[current_max_dfa_size], dsize, transsym; - * int nset[current_max_dfa_size], numstates; - * numstates = symfollowset( ds, dsize, transsym, nset ); + * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, + * int transsym, int nset[current_max_dfa_size] ); */ int symfollowset( ds, dsize, transsym, nset ) int ds[], dsize, transsym, nset[]; + { + int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; + + numstates = 0; + + for ( i = 1; i <= dsize; ++i ) + { /* for each nfa state ns in the state set of ds */ + ns = ds[i]; + sym = transchar[ns]; + tsp = trans1[ns]; + + if ( sym < 0 ) + { /* it's a character class */ + sym = -sym; + ccllist = cclmap[sym]; + lenccl = ccllen[sym]; + + if ( cclng[sym] ) + { + for ( j = 0; j < lenccl; ++j ) + { + /* Loop through negated character + * class. + */ + ch = ccltbl[ccllist + j]; + + if ( ch == 0 ) + ch = NUL_ec; + + if ( ch > transsym ) + /* Transsym isn't in negated + * ccl. + */ + break; + + else if ( ch == transsym ) + /* next 2 */ goto bottom; + } + + /* Didn't find transsym in ccl. */ + nset[++numstates] = tsp; + } + + else + for ( j = 0; j < lenccl; ++j ) + { + ch = ccltbl[ccllist + j]; + + if ( ch == 0 ) + ch = NUL_ec; + + if ( ch > transsym ) + break; + else if ( ch == transsym ) + { + nset[++numstates] = tsp; + break; + } + } + } - { - int ns, tsp, sym, i, j, lenccl, ch, numstates; - int ccllist; - - numstates = 0; - - for ( i = 1; i <= dsize; ++i ) - { /* for each nfa state ns in the state set of ds */ - ns = ds[i]; - sym = transchar[ns]; - tsp = trans1[ns]; - - if ( sym < 0 ) - { /* it's a character class */ - sym = -sym; - ccllist = cclmap[sym]; - lenccl = ccllen[sym]; - - if ( cclng[sym] ) - { - for ( j = 0; j < lenccl; ++j ) - { /* loop through negated character class */ - ch = ccltbl[ccllist + j]; - - if ( ch == 0 ) - ch = NUL_ec; - - if ( ch > transsym ) - break; /* transsym isn't in negated ccl */ - - else if ( ch == transsym ) - /* next 2 */ goto bottom; - } - - /* didn't find transsym in ccl */ - nset[++numstates] = tsp; - } - - else - for ( j = 0; j < lenccl; ++j ) - { - ch = ccltbl[ccllist + j]; - - if ( ch == 0 ) - ch = NUL_ec; - - if ( ch > transsym ) - break; + else if ( sym >= 'A' && sym <= 'Z' && caseins ) + flexfatal( "consistency check failed in symfollowset" ); - else if ( ch == transsym ) - { - nset[++numstates] = tsp; - break; + else if ( sym == SYM_EPSILON ) + { /* do nothing */ } - } - } - else if ( sym >= 'A' && sym <= 'Z' && caseins ) - flexfatal( "consistency check failed in symfollowset" ); - - else if ( sym == SYM_EPSILON ) - { /* do nothing */ - } + else if ( abs( ecgroup[sym] ) == transsym ) + nset[++numstates] = tsp; - else if ( abs( ecgroup[sym] ) == transsym ) - nset[++numstates] = tsp; + bottom: ; + } -bottom: - ; + return numstates; } - return ( numstates ); - } - /* sympartition - partition characters with same out-transitions * * synopsis - * integer ds[current_max_dfa_size], numstates, duplist[numecs]; - * symlist[numecs]; - * sympartition( ds, numstates, symlist, duplist ); + * sympartition( int ds[current_max_dfa_size], int numstates, + * int symlist[numecs], int duplist[numecs] ); */ void sympartition( ds, numstates, symlist, duplist ) int ds[], numstates; int symlist[], duplist[]; + { + int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; - { - int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; - - /* partitioning is done by creating equivalence classes for those - * characters which have out-transitions from the given state. Thus - * we are really creating equivalence classes of equivalence classes. - */ - - for ( i = 1; i <= numecs; ++i ) - { /* initialize equivalence class list */ - duplist[i] = i - 1; - dupfwd[i] = i + 1; - } + /* Partitioning is done by creating equivalence classes for those + * characters which have out-transitions from the given state. Thus + * we are really creating equivalence classes of equivalence classes. + */ - duplist[1] = NIL; - dupfwd[numecs] = NIL; + for ( i = 1; i <= numecs; ++i ) + { /* initialize equivalence class list */ + duplist[i] = i - 1; + dupfwd[i] = i + 1; + } - for ( i = 1; i <= numstates; ++i ) - { - ns = ds[i]; - tch = transchar[ns]; + duplist[1] = NIL; + dupfwd[numecs] = NIL; - if ( tch != SYM_EPSILON ) - { - if ( tch < -lastccl || tch >= csize ) + for ( i = 1; i <= numstates; ++i ) { - if ( tch >= csize && tch <= CSIZE ) - flexerror( "scanner requires -8 flag" ); + ns = ds[i]; + tch = transchar[ns]; - else - flexfatal( + if ( tch != SYM_EPSILON ) + { + if ( tch < -lastccl || tch >= csize ) + { + if ( tch >= csize && tch <= CSIZE ) + flexerror( "scanner requires -8 flag" ); + + else + flexfatal( "bad transition character detected in sympartition()" ); - } + } - if ( tch >= 0 ) - { /* character transition */ - /* abs() needed for fake %t ec's */ - int ec = abs( ecgroup[tch] ); + if ( tch >= 0 ) + { /* character transition */ + /* abs() needed for fake %t ec's */ + int ec = abs( ecgroup[tch] ); - mkechar( ec, dupfwd, duplist ); - symlist[ec] = 1; - } + mkechar( ec, dupfwd, duplist ); + symlist[ec] = 1; + } - else - { /* character class */ - tch = -tch; + else + { /* character class */ + tch = -tch; - lenccl = ccllen[tch]; - cclp = cclmap[tch]; - mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs, - NUL_ec ); + lenccl = ccllen[tch]; + cclp = cclmap[tch]; + mkeccl( ccltbl + cclp, lenccl, dupfwd, + duplist, numecs, NUL_ec ); - if ( cclng[tch] ) - { - j = 0; + if ( cclng[tch] ) + { + j = 0; - for ( k = 0; k < lenccl; ++k ) - { - ich = ccltbl[cclp + k]; + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; - if ( ich == 0 ) - ich = NUL_ec; + if ( ich == 0 ) + ich = NUL_ec; - for ( ++j; j < ich; ++j ) - symlist[j] = 1; - } + for ( ++j; j < ich; ++j ) + symlist[j] = 1; + } - for ( ++j; j <= numecs; ++j ) - symlist[j] = 1; - } + for ( ++j; j <= numecs; ++j ) + symlist[j] = 1; + } - else - for ( k = 0; k < lenccl; ++k ) - { - ich = ccltbl[cclp + k]; + else + for ( k = 0; k < lenccl; ++k ) + { + ich = ccltbl[cclp + k]; - if ( ich == 0 ) - ich = NUL_ec; + if ( ich == 0 ) + ich = NUL_ec; - symlist[ich] = 1; + symlist[ich] = 1; + } + } } } - } } - } diff --git a/ecs.c b/ecs.c index 2617c46..ab6fadc 100644 --- a/ecs.c +++ b/ecs.c @@ -33,81 +33,71 @@ static char rcsid[] = #include "flexdef.h" -/* ccl2ecl - convert character classes to set of equivalence classes - * - * synopsis - * ccl2ecl(); - */ +/* ccl2ecl - convert character classes to set of equivalence classes */ void ccl2ecl() - - { - int i, ich, newlen, cclp, ccls, cclmec; - - for ( i = 1; i <= lastccl; ++i ) { - /* we loop through each character class, and for each character - * in the class, add the character's equivalence class to the - * new "character" class we are creating. Thus when we are all - * done, character classes will really consist of collections - * of equivalence classes - */ - - newlen = 0; - cclp = cclmap[i]; + int i, ich, newlen, cclp, ccls, cclmec; - for ( ccls = 0; ccls < ccllen[i]; ++ccls ) - { - ich = ccltbl[cclp + ccls]; - cclmec = ecgroup[ich]; - - if ( cclmec > 0 ) + for ( i = 1; i <= lastccl; ++i ) { - ccltbl[cclp + newlen] = cclmec; - ++newlen; + /* We loop through each character class, and for each character + * in the class, add the character's equivalence class to the + * new "character" class we are creating. Thus when we are all + * done, character classes will really consist of collections + * of equivalence classes + */ + + newlen = 0; + cclp = cclmap[i]; + + for ( ccls = 0; ccls < ccllen[i]; ++ccls ) + { + ich = ccltbl[cclp + ccls]; + cclmec = ecgroup[ich]; + + if ( cclmec > 0 ) + { + ccltbl[cclp + newlen] = cclmec; + ++newlen; + } + } + + ccllen[i] = newlen; } - } - - ccllen[i] = newlen; } - } /* cre8ecs - associate equivalence class numbers with class members * - * synopsis - * int cre8ecs(); - * number of classes = cre8ecs( fwd, bck, num ); - * - * fwd is the forward linked-list of equivalence class members. bck - * is the backward linked-list, and num is the number of class members. + * fwd is the forward linked-list of equivalence class members. bck + * is the backward linked-list, and num is the number of class members. * - * Returned is the number of classes. + * Returned is the number of classes. */ int cre8ecs( fwd, bck, num ) int fwd[], bck[], num; + { + int i, j, numcl; - { - int i, j, numcl; - - numcl = 0; - - /* create equivalence class numbers. From now on, abs( bck(x) ) - * is the equivalence class number for object x. If bck(x) - * is positive, then x is the representative of its equivalence - * class. - */ - for ( i = 1; i <= num; ++i ) - if ( bck[i] == NIL ) - { - bck[i] = ++numcl; - for ( j = fwd[i]; j != NIL; j = fwd[j] ) - bck[j] = -numcl; - } + numcl = 0; - return ( numcl ); - } + /* Create equivalence class numbers. From now on, abs( bck(x) ) + * is the equivalence class number for object x. If bck(x) + * is positive, then x is the representative of its equivalence + * class. + */ + for ( i = 1; i <= num; ++i ) + if ( bck[i] == NIL ) + { + bck[i] = ++numcl; + for ( j = fwd[i]; j != NIL; j = fwd[j] ) + bck[j] = -numcl; + } + + return numcl; + } /* mkeccl - update equivalence classes based on character class xtions @@ -115,11 +105,12 @@ int fwd[], bck[], num; * synopsis * Char ccls[]; * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; - * mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ); + * void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz], + * int llsiz, int NUL_mapping ); * - * where ccls contains the elements of the character class, lenccl is the + * ccls contains the elements of the character class, lenccl is the * number of elements in the ccl, fwd is the forward link-list of equivalent - * characters, bck is the backward link-list, and llsiz size of the link-list + * characters, bck is the backward link-list, and llsiz size of the link-list. * * NUL_mapping is the value which NUL (0) should be mapped to. */ @@ -127,116 +118,111 @@ int fwd[], bck[], num; void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) Char ccls[]; int lenccl, fwd[], bck[], llsiz, NUL_mapping; + { + int cclp, oldec, newec; + int cclm, i, j; + static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ - { - int cclp, oldec, newec; - int cclm, i, j; - static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ + /* Note that it doesn't matter whether or not the character class is + * negated. The same results will be obtained in either case. + */ - /* note that it doesn't matter whether or not the character class is - * negated. The same results will be obtained in either case. - */ + cclp = 0; - cclp = 0; + while ( cclp < lenccl ) + { + cclm = ccls[cclp]; - while ( cclp < lenccl ) - { - cclm = ccls[cclp]; + if ( NUL_mapping && cclm == 0 ) + cclm = NUL_mapping; - if ( NUL_mapping && cclm == 0 ) - cclm = NUL_mapping; + oldec = bck[cclm]; + newec = cclm; - oldec = bck[cclm]; - newec = cclm; + j = cclp + 1; - j = cclp + 1; + for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) + { /* look for the symbol in the character class */ + for ( ; j < lenccl; ++j ) + { + register int ccl_char; - for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) - { /* look for the symbol in the character class */ - for ( ; j < lenccl; ++j ) - { - register int ccl_char; - - if ( NUL_mapping && ccls[j] == 0 ) - ccl_char = NUL_mapping; - else - ccl_char = ccls[j]; - - if ( ccl_char > i ) - break; - - if ( ccl_char == i && ! cclflags[j] ) - { - /* we found an old companion of cclm in the ccl. - * link it into the new equivalence class and flag it as - * having been processed - */ - - bck[i] = newec; - fwd[newec] = i; - newec = i; - cclflags[j] = 1; /* set flag so we don't reprocess */ - - /* get next equivalence class member */ - /* continue 2 */ - goto next_pt; - } - } + if ( NUL_mapping && ccls[j] == 0 ) + ccl_char = NUL_mapping; + else + ccl_char = ccls[j]; + + if ( ccl_char > i ) + break; + + if ( ccl_char == i && ! cclflags[j] ) + { + /* We found an old companion of cclm + * in the ccl. Link it into the new + * equivalence class and flag it as + * having been processed. + */ + + bck[i] = newec; + fwd[newec] = i; + newec = i; + /* Set flag so we don't reprocess. */ + cclflags[j] = 1; + + /* Get next equivalence class member. */ + /* continue 2 */ + goto next_pt; + } + } + + /* Symbol isn't in character class. Put it in the old + * equivalence class. + */ - /* symbol isn't in character class. Put it in the old equivalence - * class - */ + bck[i] = oldec; - bck[i] = oldec; + if ( oldec != NIL ) + fwd[oldec] = i; - if ( oldec != NIL ) - fwd[oldec] = i; + oldec = i; - oldec = i; -next_pt: - ; - } + next_pt: ; + } - if ( bck[cclm] != NIL || oldec != bck[cclm] ) - { - bck[cclm] = NIL; - fwd[oldec] = NIL; - } + if ( bck[cclm] != NIL || oldec != bck[cclm] ) + { + bck[cclm] = NIL; + fwd[oldec] = NIL; + } - fwd[newec] = NIL; + fwd[newec] = NIL; - /* find next ccl member to process */ + /* Find next ccl member to process. */ - for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) - { - /* reset "doesn't need processing" flag */ - cclflags[cclp] = 0; - } + for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) + { + /* Reset "doesn't need processing" flag. */ + cclflags[cclp] = 0; + } + } } - } -/* mkechar - create equivalence class for single character - * - * synopsis - * int tch, fwd[], bck[]; - * mkechar( tch, fwd, bck ); - */ +/* mkechar - create equivalence class for single character */ void mkechar( tch, fwd, bck ) int tch, fwd[], bck[]; + { + /* If until now the character has been a proper subset of + * an equivalence class, break it away to create a new ec + */ - { - /* if until now the character has been a proper subset of - * an equivalence class, break it away to create a new ec - */ - - if ( fwd[tch] != NIL ) - bck[fwd[tch]] = bck[tch]; + if ( fwd[tch] != NIL ) + bck[fwd[tch]] = bck[tch]; - if ( bck[tch] != NIL ) - fwd[bck[tch]] = fwd[tch]; + if ( bck[tch] != NIL ) + fwd[bck[tch]] = fwd[tch]; - fwd[tch] = NIL; - bck[tch] = NIL; - } + fwd[tch] = NIL; + bck[tch] = NIL; + } diff --git a/flex.skl b/flex.skl index a1282bf..0c5669e 100644 --- a/flex.skl +++ b/flex.skl @@ -1,6 +1,6 @@ /* A lexical scanner generated by flex */ -/* scanner skeleton version: +/* Scanner skeleton version: * $Header$ */ @@ -22,10 +22,10 @@ #include #include -/* use prototypes in function declarations */ +/* Use prototypes in function declarations. */ #define YY_USE_PROTOS -/* the "const" storage-class-modifier is valid */ +/* The "const" storage-class-modifier is valid. */ #define YY_USE_CONST #else /* ! __cplusplus */ @@ -61,9 +61,9 @@ void free( void* ); #define YY_PROTO(proto) proto #else #define YY_PROTO(proto) () -/* we can't get here if it's an ANSI C compiler, or a C++ compiler, +/* We can't get here if it's an ANSI C compiler, or a C++ compiler, * so it's got to be a K&R compiler, and therefore there's no standard - * place from which to include these definitions + * place from which to include these definitions. */ char *malloc(); int free(); @@ -71,37 +71,37 @@ int read(); #endif -/* amount of stuff to slurp up with each read */ +/* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE #define YY_READ_BUF_SIZE 8192 #endif -/* returned upon end-of-file */ +/* Returned upon end-of-file. */ #define YY_END_TOK 0 -/* copy whatever the last rule matched to the standard output */ +/* Copy whatever the last rule matched to the standard output. */ -/* cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */ +/* Cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */ /* this used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite() + * we now use fwrite(). */ #define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout ) -/* gets input and stuffs it into "buf". number of characters read, or YY_NULL, +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". */ #define YY_INPUT(buf,result,max_size) \ if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ - YY_FATAL_ERROR( "read() in flex scanner failed" ); + YY_FATAL_ERROR( "read() in flex scanner failed" ); #define YY_NULL 0 -/* no semi-colon after return; correct usage is to write "yyterminate();" - +/* No semi-colon after return; correct usage is to write "yyterminate();" - * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ #define yyterminate() return YY_NULL -/* report a fatal error */ +/* Report a fatal error. */ /* The funky do-while is used to turn this macro definition into * a single C statement (which needs a semi-colon terminator). @@ -126,19 +126,19 @@ int read(); } \ while ( 0 ) -/* default yywrap function - always treat EOF as an EOF */ +/* Default yywrap function - always treat EOF as an EOF. */ #define yywrap() 1 -/* enter a start condition. This macro really ought to take a parameter, +/* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN + * definition of BEGIN. */ #define BEGIN yy_start = 1 + 2 * -/* action number for EOF rule of a given start state */ +/* Action number for EOF rule of a given start state. */ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) -/* special action meaning "start processing a new file" */ +/* Special action meaning "start processing a new file". */ #define YY_NEW_FILE \ do \ { \ @@ -147,12 +147,12 @@ int read(); } \ while ( 0 ) -/* default declaration of generated scanner - a define so the user can - * easily add parameters +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. */ #define YY_DECL int yylex YY_PROTO(( void )) -/* code executed at the end of each rule */ +/* Code executed at the end of each rule. */ #define YY_BREAK break; #define YY_END_OF_BUFFER_CHAR 0 @@ -165,8 +165,8 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; %% section 1 definitions and declarations of yytext/yytext_ptr go here -/* done after the current pattern has been matched and before the - * corresponding action - sets up yytext +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. */ #define YY_DO_BEFORE_ACTION \ yytext_ptr = yy_bp; \ @@ -180,11 +180,11 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 -/* return all but the first 'n' matched characters back to the input stream */ +/* Return all but the first 'n' matched characters back to the input stream. */ #define yyless(n) \ do \ { \ - /* undo effects of setting up yytext */ \ + /* Undo effects of setting up yytext. */ \ *yy_cp = yy_hold_char; \ yy_c_buf_p = yy_cp = yy_bp + n; \ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ @@ -195,37 +195,42 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; struct yy_buffer_state - { - FILE *yy_input_file; + { + FILE *yy_input_file; - YY_CHAR *yy_ch_buf; /* input buffer */ - YY_CHAR *yy_buf_pos; /* current position in input buffer */ + YY_CHAR *yy_ch_buf; /* input buffer */ + YY_CHAR *yy_buf_pos; /* current position in input buffer */ - /* size of input buffer in bytes, not including room for EOB characters */ - int yy_buf_size; + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; - /* number of characters read into yy_ch_buf, not including EOB characters */ - int yy_n_chars; + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; - int yy_eof_status; /* whether we've seen an EOF on this buffer */ + /* Whether we've seen an EOF on this buffer. */ + int yy_eof_status; #define EOF_NOT_SEEN 0 - /* "pending" happens when the EOF has been seen but there's still - * some text process - */ + /* "Pending" happens when the EOF has been seen but there's still + * some text to process. + */ #define EOF_PENDING 1 #define EOF_DONE 2 - }; + }; static YY_BUFFER_STATE yy_current_buffer = 0; -/* we provide macros for accessing buffer states in case in the +/* We provide macros for accessing buffer states in case in the * future we want to put the buffer states in a more general - * "scanner state" + * "scanner state". */ #define YY_CURRENT_BUFFER yy_current_buffer -/* yy_hold_char holds the character lost when yytext is formed */ +/* yy_hold_char holds the character lost when yytext is formed. */ static YY_CHAR yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ @@ -240,15 +245,15 @@ FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; %% data tables for the DFA go here -/* these variables are all declared out here so that section 3 code can - * manipulate them +/* These variables are all declared out here so that section 3 code can + * manipulate them. */ -/* points to current character in buffer */ +/* Points to current character in buffer. */ static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; static int yy_init = 1; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ -/* flag which is used to allow yywrap()'s to do buffer switches +/* Flag which is used to allow yywrap()'s to do buffer switches * instead of setting up a fresh yyin. A bit of a hack ... */ static int yy_did_buffer_switch_on_eof; @@ -273,300 +278,297 @@ static int input YY_PROTO(( void )); #endif YY_DECL - { - register yy_state_type yy_current_state; - register YY_CHAR *yy_cp, *yy_bp; - register int yy_act; + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp, *yy_bp; + register int yy_act; %% user's declarations go here - if ( yy_init ) - { + if ( yy_init ) + { #ifdef YY_USER_INIT - YY_USER_INIT; + YY_USER_INIT; #endif - if ( ! yy_start ) - yy_start = 1; /* first start state */ + if ( ! yy_start ) + yy_start = 1; /* first start state */ - if ( ! yyin ) - yyin = stdin; + if ( ! yyin ) + yyin = stdin; - if ( ! yyout ) - yyout = stdout; + if ( ! yyout ) + yyout = stdout; - if ( yy_current_buffer ) - yy_init_buffer( yy_current_buffer, yyin ); - else - yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + if ( yy_current_buffer ) + yy_init_buffer( yy_current_buffer, yyin ); + else + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); - yy_load_buffer_state(); + yy_load_buffer_state(); - yy_init = 0; - } + yy_init = 0; + } - while ( 1 ) /* loops until end-of-file is reached */ - { + while ( 1 ) /* loops until end-of-file is reached */ + { %% yymore()-related code goes here - yy_cp = yy_c_buf_p; + yy_cp = yy_c_buf_p; - /* support of yytext */ - *yy_cp = yy_hold_char; + /* Support of yytext. */ + *yy_cp = yy_hold_char; - /* yy_bp points to the position in yy_ch_buf of the start of the - * current run. - */ - yy_bp = yy_cp; + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; %% code to set up and find next match goes here yy_find_action: %% code to find the action number goes here - YY_DO_BEFORE_ACTION; + YY_DO_BEFORE_ACTION; #ifdef YY_USER_ACTION - if ( yy_act != YY_END_OF_BUFFER ) - { - YY_USER_ACTION; - } + if ( yy_act != YY_END_OF_BUFFER ) + { + YY_USER_ACTION; + } #endif -do_action: /* this label is used only to access EOF actions */ +do_action: /* This label is used only to access EOF actions. */ %% debug code goes here - switch ( yy_act ) - { + switch ( yy_act ) + { /* beginning of action switch */ %% actions go here - case YY_END_OF_BUFFER: + case YY_END_OF_BUFFER: { - /* amount of text matched not including the EOB char */ + /* Amount of text matched not including the EOB char. */ int yy_amount_of_matched_text = yy_cp - yytext_ptr - 1; - /* undo the effects of YY_DO_BEFORE_ACTION */ + /* Undo the effects of YY_DO_BEFORE_ACTION. */ *yy_cp = yy_hold_char; - /* note that here we test for yy_c_buf_p "<=" to the position + /* Note that here we test for yy_c_buf_p "<=" to the position * of the first EOB in the buffer, since yy_c_buf_p will * already have been incremented past the NUL character - * (since all states make transitions on EOB to the end- - * of-buffer state). Contrast this with the test in yyinput(). + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in yyinput(). */ if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) - /* this was really a NUL */ - { - yy_state_type yy_next_state; + { /* This was really a NUL. */ + yy_state_type yy_next_state; - yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; - yy_current_state = yy_get_previous_state(); + yy_current_state = yy_get_previous_state(); - /* okay, we're now positioned to make the - * NUL transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we - * don't want to build jamming into it because - * then it will run more slowly) - */ + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ - yy_next_state = yy_try_NUL_trans( yy_current_state ); + yy_next_state = yy_try_NUL_trans( yy_current_state ); - yy_bp = yytext_ptr + YY_MORE_ADJ; + yy_bp = yytext_ptr + YY_MORE_ADJ; - if ( yy_next_state ) - { - /* consume the NUL */ - yy_cp = ++yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } - else - { + else + { %% code to do backtracking for compressed tables and set up yy_cp goes here - goto yy_find_action; + goto yy_find_action; + } } - } else switch ( yy_get_next_buffer() ) - { - case EOB_ACT_END_OF_FILE: { - yy_did_buffer_switch_on_eof = 0; - - if ( yywrap() ) - { - /* note: because we've taken care in - * yy_get_next_buffer() to have set up yytext, - * we can now set up yy_c_buf_p so that if some - * total hoser (like flex itself) wants - * to call the scanner after we return the - * YY_NULL, it'll still work - another YY_NULL - * will get returned. - */ - yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF((yy_start - 1) / 2); - goto do_action; - } - - else - { - if ( ! yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; - } + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF( + (yy_start - 1) / 2); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; } - break; - - case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state(); - - yy_cp = yy_c_buf_p; - yy_bp = yytext_ptr + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - yy_c_buf_p = - &yy_current_buffer->yy_ch_buf[yy_n_chars]; - - yy_current_state = yy_get_previous_state(); - - yy_cp = yy_c_buf_p; - yy_bp = yytext_ptr + YY_MORE_ADJ; - goto yy_find_action; - } break; } - default: + default: #ifdef FLEX_DEBUG printf( "action # %d\n", yy_act ); #endif YY_FATAL_ERROR( "fatal flex scanner internal error--no action found" ); - } - } - } + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ /* yy_get_next_buffer - try to read in a new buffer * - * synopsis - * int yy_get_next_buffer(); - * - * returns a code representing an action - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file */ static int yy_get_next_buffer() + { + register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; + register YY_CHAR *source = yytext_ptr - 1; /* copy prev. char, too */ + register int number_to_move, i; + int ret_val; - { - register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; - register YY_CHAR *source = yytext_ptr - 1; /* copy prev. char, too */ - register int number_to_move, i; - int ret_val; - - if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) - YY_FATAL_ERROR( + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); - /* try to read more data */ + /* Try to read more data. */ - /* first move last chars to start of buffer */ - number_to_move = yy_c_buf_p - yytext_ptr; + /* First move last chars to start of buffer. */ + number_to_move = yy_c_buf_p - yytext_ptr; - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); - if ( yy_current_buffer->yy_eof_status != EOF_NOT_SEEN ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - yy_n_chars = 0; + if ( yy_current_buffer->yy_eof_status != EOF_NOT_SEEN ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_n_chars = 0; - else - { - int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; - else if ( num_to_read <= 0 ) - YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); + else if ( num_to_read <= 0 ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); - /* read in more data */ - YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), - yy_n_chars, num_to_read ); - } + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + } - if ( yy_n_chars == 0 ) - { - if ( number_to_move - YY_MORE_ADJ == 1 ) - { - ret_val = EOB_ACT_END_OF_FILE; - yy_current_buffer->yy_eof_status = EOF_DONE; - } + if ( yy_n_chars == 0 ) + { + if ( number_to_move - YY_MORE_ADJ == 1 ) + { + ret_val = EOB_ACT_END_OF_FILE; + yy_current_buffer->yy_eof_status = EOF_DONE; + } - else - { - ret_val = EOB_ACT_LAST_MATCH; - yy_current_buffer->yy_eof_status = EOF_PENDING; - } - } + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_eof_status = EOF_PENDING; + } + } - else - ret_val = EOB_ACT_CONTINUE_SCAN; + else + ret_val = EOB_ACT_CONTINUE_SCAN; - yy_n_chars += number_to_move; - yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; - yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - /* yytext begins at the second character in yy_ch_buf; the first - * character is the one which preceded it before reading in the latest - * buffer; it needs to be kept around in case it's a newline, so - * yy_get_previous_state() will have with '^' rules active - */ + /* yytext begins at the second character in yy_ch_buf; the first + * character is the one which preceded it before reading in the latest + * buffer; it needs to be kept around in case it's a newline, so + * yy_get_previous_state() will have with '^' rules active. + */ - yytext_ptr = &yy_current_buffer->yy_ch_buf[1]; + yytext_ptr = &yy_current_buffer->yy_ch_buf[1]; - return ret_val; - } + return ret_val; + } -/* yy_get_previous_state - get the state just before the EOB char was reached - * - * synopsis - * yy_state_type yy_get_previous_state(); - */ +/* yy_get_previous_state - get the state just before the EOB char was reached */ static yy_state_type yy_get_previous_state() - - { - register yy_state_type yy_current_state; - register YY_CHAR *yy_cp; + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp; %% code to get the start state into yy_current_state goes here - for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) - { + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { %% code to find the next state goes here - } + } - return yy_current_state; - } + return yy_current_state; + } /* yy_try_NUL_trans - try to make a transition on the NUL character * * synopsis - * next_state = yy_try_NUL_trans( current_state ); + * next_state = yy_try_NUL_trans( current_state ); */ #ifdef YY_USE_PROTOS @@ -575,13 +577,12 @@ static yy_state_type yy_try_NUL_trans( register yy_state_type yy_current_state ) static yy_state_type yy_try_NUL_trans( yy_current_state ) register yy_state_type yy_current_state; #endif - - { - register int yy_is_jam; + { + register int yy_is_jam; %% code to find the next state, and perhaps do backtracking, goes here - return yy_is_jam ? 0 : yy_current_state; - } + return yy_is_jam ? 0 : yy_current_state; + } #ifdef YY_USE_PROTOS @@ -591,42 +592,42 @@ static void yyunput( c, yy_bp ) YY_CHAR c; register YY_CHAR *yy_bp; #endif + { + register YY_CHAR *yy_cp = yy_c_buf_p; - { - register YY_CHAR *yy_cp = yy_c_buf_p; - - /* undo effects of setting up yytext */ - *yy_cp = yy_hold_char; - - if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) - { /* need to shift things up to make room */ - register int number_to_move = yy_n_chars + 2; /* +2 for EOB chars */ - register YY_CHAR *dest = - &yy_current_buffer->yy_ch_buf[yy_current_buffer->yy_buf_size + 2]; - register YY_CHAR *source = - &yy_current_buffer->yy_ch_buf[number_to_move]; - - while ( source > yy_current_buffer->yy_ch_buf ) - *--dest = *--source; - - yy_cp += dest - source; - yy_bp += dest - source; - yy_n_chars = yy_current_buffer->yy_buf_size; + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) - YY_FATAL_ERROR( "flex scanner push-back overflow" ); - } + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register YY_CHAR *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register YY_CHAR *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += dest - source; + yy_bp += dest - source; + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } - if ( yy_cp > yy_bp && yy_cp[-1] == '\n' ) - yy_cp[-2] = '\n'; + if ( yy_cp > yy_bp && yy_cp[-1] == '\n' ) + yy_cp[-2] = '\n'; - *--yy_cp = c; + *--yy_cp = c; - /* note: the formal parameter *must* be called "yy_bp" for this - * macro to now work correctly - */ - YY_DO_BEFORE_ACTION; /* set up yytext again */ - } + /* Note: the formal parameter *must* be called "yy_bp" for this + * macro to now work correctly. + */ + YY_DO_BEFORE_ACTION; /* set up yytext again */ + } #ifdef __cplusplus @@ -634,66 +635,67 @@ static int yyinput() #else static int input() #endif - - { - int c; - YY_CHAR *yy_cp = yy_c_buf_p; - - *yy_cp = yy_hold_char; - - if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) - /* this was really a NUL */ - *yy_c_buf_p = '\0'; + int c; + YY_CHAR *yy_cp = yy_c_buf_p; - else - { /* need more input */ - yytext_ptr = yy_c_buf_p; - ++yy_c_buf_p; + *yy_cp = yy_hold_char; - switch ( yy_get_next_buffer() ) + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) { - case EOB_ACT_END_OF_FILE: - { - if ( yywrap() ) - { - yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; - return EOF; - } - - YY_NEW_FILE; - + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + yytext_ptr = yy_c_buf_p; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + { + yy_c_buf_p = + yytext_ptr + YY_MORE_ADJ; + return EOF; + } + + YY_NEW_FILE; #ifdef __cplusplus - return yyinput(); + return yyinput(); #else - return input(); + return input(); #endif - } + } - case EOB_ACT_CONTINUE_SCAN: - yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; - break; + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + break; - case EOB_ACT_LAST_MATCH: + case EOB_ACT_LAST_MATCH: #ifdef __cplusplus - YY_FATAL_ERROR( "unexpected last match in yyinput()" ); + YY_FATAL_ERROR( + "unexpected last match in yyinput()" ); #else - YY_FATAL_ERROR( "unexpected last match in input()" ); + YY_FATAL_ERROR( + "unexpected last match in input()" ); #endif + } + } } - } - } - c = *yy_c_buf_p; - yy_hold_char = *++yy_c_buf_p; + c = *yy_c_buf_p; + yy_hold_char = *++yy_c_buf_p; - return c; - } + return c; + } #ifdef YY_USE_PROTOS @@ -702,11 +704,10 @@ void yyrestart( FILE *input_file ) void yyrestart( input_file ) FILE *input_file; #endif - - { - yy_init_buffer( yy_current_buffer, input_file ); - yy_load_buffer_state(); - } + { + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } #ifdef YY_USE_PROTOS @@ -715,29 +716,28 @@ void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) void yy_switch_to_buffer( new_buffer ) YY_BUFFER_STATE new_buffer; #endif - - { - if ( yy_current_buffer == new_buffer ) - return; - - if ( yy_current_buffer ) { - /* flush out information for old buffer */ - *yy_c_buf_p = yy_hold_char; - yy_current_buffer->yy_buf_pos = yy_c_buf_p; - yy_current_buffer->yy_n_chars = yy_n_chars; - } + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } - yy_current_buffer = new_buffer; - yy_load_buffer_state(); + yy_current_buffer = new_buffer; + yy_load_buffer_state(); - /* we don't actually know whether we did this switch during - * EOF (yywrap()) processing, but the only time this flag - * is looked at is after yywrap() is called, so it's safe - * to go ahead and always set it. - */ - yy_did_buffer_switch_on_eof = 1; - } + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } #ifdef YY_USE_PROTOS @@ -745,13 +745,12 @@ void yy_load_buffer_state( void ) #else void yy_load_buffer_state() #endif - - { - yy_n_chars = yy_current_buffer->yy_n_chars; - yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; - yyin = yy_current_buffer->yy_input_file; - yy_hold_char = *yy_c_buf_p; - } + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } #ifdef YY_USE_PROTOS @@ -761,29 +760,28 @@ YY_BUFFER_STATE yy_create_buffer( file, size ) FILE *file; int size; #endif + { + YY_BUFFER_STATE b; - { - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) malloc( sizeof( struct yy_buffer_state ) ); + b = (YY_BUFFER_STATE) malloc( sizeof( struct yy_buffer_state ) ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - b->yy_buf_size = size; + b->yy_buf_size = size; - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) (b->yy_buf_size + 2) ); + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) (b->yy_buf_size + 2) ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - yy_init_buffer( b, file ); + yy_init_buffer( b, file ); - return b; - } + return b; + } #ifdef YY_USE_PROTOS @@ -792,14 +790,13 @@ void yy_delete_buffer( YY_BUFFER_STATE b ) void yy_delete_buffer( b ) YY_BUFFER_STATE b; #endif + { + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; - { - if ( b == yy_current_buffer ) - yy_current_buffer = (YY_BUFFER_STATE) 0; - - free( (char *) b->yy_ch_buf ); - free( (char *) b ); - } + free( (char *) b->yy_ch_buf ); + free( (char *) b ); + } #ifdef YY_USE_PROTOS @@ -809,25 +806,24 @@ void yy_init_buffer( b, file ) YY_BUFFER_STATE b; FILE *file; #endif + { + b->yy_input_file = file; - { - b->yy_input_file = file; - - /* we put in the '\n' and start reading from [1] so that an - * initial match-at-newline will be true. - */ + /* We put in the '\n' and start reading from [1] so that an + * initial match-at-newline will be true. + */ - b->yy_ch_buf[0] = '\n'; - b->yy_n_chars = 1; + b->yy_ch_buf[0] = '\n'; + b->yy_n_chars = 1; - /* we always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[2] = YY_END_OF_BUFFER_CHAR; + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[2] = YY_END_OF_BUFFER_CHAR; - b->yy_buf_pos = &b->yy_ch_buf[1]; + b->yy_buf_pos = &b->yy_ch_buf[1]; - b->yy_eof_status = EOF_NOT_SEEN; - } + b->yy_eof_status = EOF_NOT_SEEN; + } diff --git a/flexdef.h b/flexdef.h index daa6961..d227363 100644 --- a/flexdef.h +++ b/flexdef.h @@ -32,7 +32,7 @@ #include #endif -/* always be prepared to generate an 8-bit scanner */ +/* Always be prepared to generate an 8-bit scanner. */ #define FLEX_8_BIT_CHARS #ifdef FLEX_8_BIT_CHARS @@ -43,7 +43,7 @@ #define CSIZE 128 #endif -/* size of input alphabet - should be size of ASCII set */ +/* Size of input alphabet - should be size of ASCII set. */ #ifndef DEFAULT_CSIZE #define DEFAULT_CSIZE 128 #endif @@ -97,10 +97,10 @@ char *malloc(), *realloc(); #endif -/* maximum line length we'll have to deal with */ +/* Maximum line length we'll have to deal with. */ #define MAXLINE BUFSIZ -/* maximum size of file name */ +/* Maximum size of file name. */ #define FILENAMESIZE 1024 #ifndef min @@ -121,24 +121,24 @@ char *malloc(), *realloc(); #define false 0 -/* special chk[] values marking the slots taking by end-of-buffer and action - * numbers +/* Special chk[] values marking the slots taking by end-of-buffer and action + * numbers. */ #define EOB_POSITION -1 #define ACTION_POSITION -2 -/* number of data items per line for -f output */ +/* Number of data items per line for -f output. */ #define NUMDATAITEMS 10 -/* number of lines of data in -f output before inserting a blank line for +/* Number of lines of data in -f output before inserting a blank line for * readability. */ #define NUMDATALINES 10 -/* transition_struct_out() definitions */ +/* Transition_struct_out() definitions. */ #define TRANS_STRUCT_PRINT_LENGTH 15 -/* returns true if an nfa state has an epsilon out-transition slot +/* Returns true if an nfa state has an epsilon out-transition slot * that can be used. This definition is currently not used. */ #define FREE_EPSILON(state) \ @@ -146,14 +146,14 @@ char *malloc(), *realloc(); trans2[state] == NO_TRANSITION && \ finalst[state] != state) -/* returns true if an nfa state has an epsilon out-transition character +/* Returns true if an nfa state has an epsilon out-transition character * and both slots are free */ #define SUPER_FREE_EPSILON(state) \ (transchar[state] == SYM_EPSILON && \ trans1[state] == NO_TRANSITION) \ -/* maximum number of NFA states that can comprise a DFA state. It's real +/* Maximum number of NFA states that can comprise a DFA state. It's real * big because if there's a lot of rules, the initial state will have a * huge epsilon closure. */ @@ -161,7 +161,7 @@ char *malloc(), *realloc(); #define MAX_DFA_SIZE_INCREMENT 750 -/* a note on the following masks. They are used to mark accepting numbers +/* A note on the following masks. They are used to mark accepting numbers * as being special. As such, they implicitly limit the number of accepting * numbers (i.e., rules) because if there are too many rules the rule numbers * will overload the mask bits. Fortunately, this limit is \large/ (0x2000 == @@ -169,18 +169,20 @@ char *malloc(), *realloc(); * new_rule() to ensure that this limit is not reached. */ -/* mask to mark a trailing context accepting number */ +/* Mask to mark a trailing context accepting number. */ #define YY_TRAILING_MASK 0x2000 -/* mask to mark the accepting number of the "head" of a trailing context rule */ +/* Mask to mark the accepting number of the "head" of a trailing context + * rule. + */ #define YY_TRAILING_HEAD_MASK 0x4000 -/* maximum number of rules, as outlined in the above note */ +/* Maximum number of rules, as outlined in the above note. */ #define MAX_RULE (YY_TRAILING_MASK - 1) /* NIL must be 0. If not, its special meaning when making equivalence classes - * (it marks the representative of a given e.c.) will be unidentifiable + * (it marks the representative of a given e.c.) will be unidentifiable. */ #define NIL 0 @@ -192,7 +194,7 @@ char *malloc(), *realloc(); #define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ #define MAX_CCLS_INCREMENT 100 -/* size of table holding members of character classes */ +/* Size of table holding members of character classes. */ #define INITIAL_MAX_CCL_TBL_SIZE 500 #define MAX_CCL_TBL_SIZE_INCREMENT 250 @@ -207,17 +209,17 @@ char *malloc(), *realloc(); #define JAMSTATE -32766 /* marks a reference to the state that always jams */ -/* enough so that if it's subtracted from an NFA state number, the result - * is guaranteed to be negative +/* Enough so that if it's subtracted from an NFA state number, the result + * is guaranteed to be negative. */ #define MARKER_DIFFERENCE 32000 #define MAXIMUM_MNS 31999 -/* maximum number of nxt/chk pairs for non-templates */ +/* Maximum number of nxt/chk pairs for non-templates. */ #define INITIAL_MAX_XPAIRS 2000 #define MAX_XPAIRS_INCREMENT 2000 -/* maximum number of nxt/chk pairs needed for templates */ +/* Maximum number of nxt/chk pairs needed for templates. */ #define INITIAL_MAX_TEMPLATE_XPAIRS 2500 #define MAX_TEMPLATE_XPAIRS_INCREMENT 2500 @@ -229,77 +231,77 @@ char *malloc(), *realloc(); #define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */ #define SAME_TRANS -1 /* transition is the same as "default" entry for state */ -/* the following percentages are used to tune table compression: +/* The following percentages are used to tune table compression: - * the percentage the number of out-transitions a state must be of the + * The percentage the number of out-transitions a state must be of the * number of equivalence classes in order to be considered for table - * compaction by using protos + * compaction by using protos. */ #define PROTO_SIZE_PERCENTAGE 15 -/* the percentage the number of homogeneous out-transitions of a state +/* The percentage the number of homogeneous out-transitions of a state * must be of the number of total out-transitions of the state in order * that the state's transition table is first compared with a potential * template of the most common out-transition instead of with the first - * proto in the proto queue + * proto in the proto queue. */ #define CHECK_COM_PERCENTAGE 50 -/* the percentage the number of differences between a state's transition +/* The percentage the number of differences between a state's transition * table and the proto it was first compared with must be of the total * number of out-transitions of the state in order to keep the first - * proto as a good match and not search any further + * proto as a good match and not search any further. */ #define FIRST_MATCH_DIFF_PERCENTAGE 10 -/* the percentage the number of differences between a state's transition +/* The percentage the number of differences between a state's transition * table and the most similar proto must be of the state's total number - * of out-transitions to use the proto as an acceptable close match + * of out-transitions to use the proto as an acceptable close match. */ #define ACCEPTABLE_DIFF_PERCENTAGE 50 -/* the percentage the number of homogeneous out-transitions of a state +/* The percentage the number of homogeneous out-transitions of a state * must be of the number of total out-transitions of the state in order - * to consider making a template from the state + * to consider making a template from the state. */ #define TEMPLATE_SAME_PERCENTAGE 60 -/* the percentage the number of differences between a state's transition +/* The percentage the number of differences between a state's transition * table and the most similar proto must be of the state's total number - * of out-transitions to create a new proto from the state + * of out-transitions to create a new proto from the state. */ #define NEW_PROTO_DIFF_PERCENTAGE 20 -/* the percentage the total number of out-transitions of a state must be +/* The percentage the total number of out-transitions of a state must be * of the number of equivalence classes in order to consider trying to * fit the transition table into "holes" inside the nxt/chk table. */ #define INTERIOR_FIT_PERCENTAGE 15 -/* size of region set aside to cache the complete transition table of - * protos on the proto queue to enable quick comparisons +/* Size of region set aside to cache the complete transition table of + * protos on the proto queue to enable quick comparisons. */ #define PROT_SAVE_SIZE 2000 #define MSP 50 /* maximum number of saved protos (protos on the proto queue) */ -/* maximum number of out-transitions a state can have that we'll rummage +/* Maximum number of out-transitions a state can have that we'll rummage * around through the interior of the internal fast table looking for a - * spot for it + * spot for it. */ #define MAX_XTIONS_FULL_INTERIOR_FIT 4 -/* maximum number of rules which will be reported as being associated - * with a DFA state +/* Maximum number of rules which will be reported as being associated + * with a DFA state. */ #define MAX_ASSOC_RULES 100 -/* number that, if used to subscript an array, has a good chance of producing - * an error; should be small enough to fit into a short +/* Number that, if used to subscript an array, has a good chance of producing + * an error; should be small enough to fit into a short. */ #define BAD_SUBSCRIPT -32767 -/* absolute value of largest number that can be stored in a short, with a +/* Absolute value of largest number that can be stored in a short, with a * bit of slop thrown in for general paranoia. */ #define MAX_SHORT 32766 @@ -307,19 +309,19 @@ char *malloc(), *realloc(); /* Declarations for global variables. */ -/* variables for symbol tables: +/* Variables for symbol tables: * sctbl - start-condition symbol table * ndtbl - name-definition symbol table * ccltab - character class text symbol table */ struct hash_entry - { - struct hash_entry *prev, *next; - char *name; - char *str_val; - int int_val; - } ; + { + struct hash_entry *prev, *next; + char *name; + char *str_val; + int int_val; + } ; typedef struct hash_entry *hash_table[]; @@ -332,7 +334,7 @@ extern struct hash_entry *sctbl[START_COND_HASH_SIZE]; extern struct hash_entry *ccltab[CCL_HASH_SIZE]; -/* variables for flags: +/* Variables for flags: * printstats - if true (-v), dump statistics * syntaxerror - true if a syntax error has been found * eofseen - true if we've seen an eof in the input file @@ -379,10 +381,10 @@ extern int yymore_used, reject, real_reject, continued_action; extern int yymore_really_used, reject_really_used; -/* variables used in the flex input routines: +/* Variables used in the flex input routines: * datapos - characters on current output line * dataline - number of contiguous lines of data in current data - * statement. Used to generate readable -f output + * statement. Used to generate readable -f output * linenum - current input line number * skelfile - the skeleton file * skel - compiled-in skeleton array @@ -399,9 +401,9 @@ extern int yymore_really_used, reject_really_used; * prolog - pointer to where the prolog starts in action_array * action_offset - index where the non-prolog starts in action_array * action_index - index where the next action should go, with respect - * to "action" + * to "action" * action - pointer to where non-prolog starts; equal to - * &action_array[action_offset] + * &action_array[action_offset] */ extern int datapos, dataline, linenum; @@ -417,7 +419,7 @@ extern char *action_array, *prolog, *action; extern int action_size, action_offset, action_index; -/* variables for stack of states having only one out-transition: +/* Variables for stack of states having only one out-transition: * onestate - state number * onesym - transition symbol * onenext - target state @@ -429,10 +431,10 @@ extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; -/* variables for nfa machine data: +/* Variables for nfa machine data: * current_mns - current maximum on number of NFA states * num_rules - number of the last accepting state; also is number of - * rules created so far + * rules created so far * num_eof_rules - number of <> rules * default_rule - number of the default rule * current_max_rules - current maximum number of rules @@ -446,14 +448,14 @@ extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; * accptnum - accepting number * assoc_rule - rule associated with this NFA state (or 0 if none) * state_type - a STATE_xxx type identifying whether the state is part - * of a normal rule, the leading state in a trailing context - * rule (i.e., the state which marks the transition from - * recognizing the text-to-be-matched to the beginning of - * the trailing context), or a subsequent state in a trailing - * context rule + * of a normal rule, the leading state in a trailing context + * rule (i.e., the state which marks the transition from + * recognizing the text-to-be-matched to the beginning of + * the trailing context), or a subsequent state in a trailing + * context rule * rule_type - a RULE_xxx type identifying whether this a ho-hum - * normal rule or one which has variable head & trailing - * context + * normal rule or one which has variable head & trailing + * context * rule_linenum - line number associated with rule * rule_useful - true if we've determined that the rule can be matched */ @@ -464,27 +466,27 @@ extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; extern int *accptnum, *assoc_rule, *state_type; extern int *rule_type, *rule_linenum, *rule_useful; -/* different types of states; values are useful as masks, as well, for - * routines like check_trailing_context() +/* Different types of states; values are useful as masks, as well, for + * routines like check_trailing_context(). */ #define STATE_NORMAL 0x1 #define STATE_TRAILING_CONTEXT 0x2 -/* global holding current type of state we're making */ +/* Global holding current type of state we're making. */ extern int current_state_type; -/* different types of rules */ +/* Different types of rules. */ #define RULE_NORMAL 0 #define RULE_VARIABLE 1 -/* true if the input rules include a rule with both variable-length head - * and trailing context, false otherwise +/* True if the input rules include a rule with both variable-length head + * and trailing context, false otherwise. */ extern int variable_trailing_context_rules; -/* variables for protos: +/* Variables for protos: * numtemps - number of templates created * numprots - number of protos created * protprev - backlink to a more-recently used proto @@ -500,7 +502,7 @@ extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; -/* variables for managing equivalence classes: +/* Variables for managing equivalence classes: * numecs - number of equivalence classes * nextecm - forward link of Equivalence Class members * ecgroup - class number or backward link of EC members @@ -510,14 +512,14 @@ extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; * tecbck - backward link of MEC's */ -/* reserve enough room in the equivalence class arrays so that we +/* Reserve enough room in the equivalence class arrays so that we * can use the CSIZE'th element to hold equivalence class information * for the NUL character. Later we'll move this information into * the 0th element. */ extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; -/* meta-equivalence classes are indexed starting at 1, so it's possible +/* Meta-equivalence classes are indexed starting at 1, so it's possible * that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1 * slots total (since the arrays are 0-based). nextecm[] and ecgroup[] * don't require the extra position since they're indexed from 1 .. CSIZE - 1. @@ -525,7 +527,7 @@ extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; -/* variables for start conditions: +/* Variables for start conditions: * lastsc - last start condition created * current_max_scs - current limit on number of start conditions * scset - set of rules active in start condition @@ -534,19 +536,19 @@ extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; * sceof - true if start condition has EOF rule * scname - start condition name * actvsc - stack of active start conditions for the current rule; - * a negative entry means that the start condition is *not* - * active for the current rule. Start conditions may appear - * multiple times on the stack; the entry for it closest - * to the top of the stack (i.e., actvsc[actvp]) is the - * one to use. Others are present from "{" scoping - * constructs. + * a negative entry means that the start condition is *not* + * active for the current rule. Start conditions may appear + * multiple times on the stack; the entry for it closest + * to the top of the stack (i.e., actvsc[actvp]) is the + * one to use. Others are present from "{" scoping + * constructs. */ extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; extern char **scname; -/* variables for dfa machine data: +/* Variables for dfa machine data: * current_max_dfa_size - current maximum number of NFA states in DFA * current_max_xpairs - current maximum number of non-template xtion pairs * current_max_template_xpairs - current maximum number of template pairs @@ -568,8 +570,8 @@ extern char **scname; * accsiz - size of accepting set for each dfa state * dhash - dfa state hash value * numas - number of DFA accepting states created; note that this - * is not necessarily the same value as num_rules, which is the analogous - * value for the NFA + * is not necessarily the same value as num_rules, which is the analogous + * value for the NFA * numsnpairs - number of state/nextstate transition pairs * jambase - position in base/def where the default jam table starts * jamstate - state number corresponding to "jam" state @@ -581,15 +583,15 @@ extern int current_max_template_xpairs, current_max_dfas; extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; extern union dfaacc_union - { - int *dfaacc_set; - int dfaacc_state; - } *dfaacc; + { + int *dfaacc_set; + int dfaacc_state; + } *dfaacc; extern int *accsiz, *dhash, numas; extern int numsnpairs, jambase, jamstate; extern int end_of_buffer_state; -/* variables for ccl information: +/* Variables for ccl information: * lastccl - ccl index of the last created ccl * current_maxccls - current limit on the maximum number of unique ccl's * cclmap - maps a ccl index to its set pointer @@ -606,7 +608,7 @@ extern int current_max_ccl_tbl_size; extern Char *ccltbl; -/* variables for miscellaneous information: +/* Variables for miscellaneous information: * starttime - real-time when we started * endtime - real-time when we ended * nmstr - last NAME scanned by the scanner @@ -617,7 +619,7 @@ extern Char *ccltbl; * numeps - number of epsilon NFA states created * eps2 - number of epsilon states which have 2 out-transitions * num_reallocs - number of times it was necessary to realloc() a group - * of arrays + * of arrays * tmpuses - number of DFA states that chain to templates * totnst - total number of NFA states used to make DFA states * peakpairs - peak number of transition pairs we had to store internally @@ -674,28 +676,28 @@ void *allocate_array(), *reallocate_array(); (Char *) reallocate_array( (void *) array, size, sizeof( Char ) ) -/* used to communicate between scanner and parser. The type should really +/* Used to communicate between scanner and parser. The type should really * be YYSTYPE, but we can't easily get our hands on it. */ extern int yylval; -/* external functions that are cross-referenced among the flex source files */ +/* External functions that are cross-referenced among the flex source files. */ /* from file ccl.c */ -extern void ccladd PROTO((int, int)); /* Add a single character to a ccl */ +extern void ccladd PROTO((int, int)); /* add a single character to a ccl */ extern int cclinit PROTO((void)); /* make an empty ccl */ extern void cclnegate PROTO((int)); /* negate a ccl */ -/* list the members of a set of characters in CCL form */ +/* List the members of a set of characters in CCL form. */ extern void list_character_set PROTO((FILE*, int[])); /* from file dfa.c */ -/* increase the maximum number of dfas */ +/* Increase the maximum number of dfas. */ extern void increase_max_dfas PROTO((void)); extern void ntod PROTO((void)); /* convert a ndfa to a dfa */ @@ -703,16 +705,16 @@ extern void ntod PROTO((void)); /* convert a ndfa to a dfa */ /* from file ecs.c */ -/* convert character classes to set of equivalence classes */ +/* Convert character classes to set of equivalence classes. */ extern void ccl2ecl PROTO((void)); -/* associate equivalence class numbers with class members */ +/* Associate equivalence class numbers with class members. */ extern int cre8ecs PROTO((int[], int[], int)); -/* update equivalence classes based on character class transitions */ +/* Update equivalence classes based on character class transitions. */ extern void mkeccl PROTO((Char[], int, int[], int[], int, int)); -/* create equivalence class for single character */ +/* Create equivalence class for single character. */ extern void mkechar PROTO((int, int[], int[])); @@ -732,94 +734,94 @@ extern void usage PROTO((void)); /* Add the given text to the stored actions. */ extern void add_action PROTO(( char *new_text )); -/* true if a string is all lower case */ +/* True if a string is all lower case. */ extern int all_lower PROTO((register Char *)); -/* true if a string is all upper case */ +/* True if a string is all upper case. */ extern int all_upper PROTO((register Char *)); -/* bubble sort an integer array */ +/* Bubble sort an integer array. */ extern void bubble PROTO((int [], int)); -/* shell sort a character array */ +/* Shell sort a character array. */ extern void cshell PROTO((Char [], int, int)); -/* finish up a block of data declarations */ +/* Finish up a block of data declarations. */ extern void dataend PROTO((void)); -/* report an error message and terminate */ +/* Report an error message and terminate. */ extern void flexerror PROTO((char[])); -/* report a fatal error message and terminate */ +/* Report a fatal error message and terminate. */ extern void flexfatal PROTO((char[])); -/* report an error message formatted with one integer argument */ +/* Report an error message formatted with one integer argument. */ extern void lerrif PROTO((char[], int)); -/* report an error message formatted with one string argument */ +/* Report an error message formatted with one string argument. */ extern void lerrsf PROTO((char[], char[])); -/* spit out a "# line" statement */ +/* Spit out a "# line" statement. */ extern void line_directive_out PROTO((FILE*)); -/* mark the current position in the action array as the end of the prolog */ +/* Mark the current position in the action array as the end of the prolog. */ extern void mark_prolog PROTO(()); -/* generate a data statment for a two-dimensional array */ +/* Generate a data statment for a two-dimensional array. */ extern void mk2data PROTO((int)); extern void mkdata PROTO((int)); /* generate a data statement */ -/* return the integer represented by a string of digits */ +/* Return the integer represented by a string of digits. */ extern int myctoi PROTO((Char [])); -/* write out one section of the skeleton file */ +/* Write out one section of the skeleton file. */ extern void skelout PROTO((void)); -/* output a yy_trans_info structure */ +/* Output a yy_trans_info structure. */ extern void transition_struct_out PROTO((int, int)); -/* set a region of memory to 0 */ +/* Set a region of memory to 0. */ extern void zero_out PROTO((char *, int)); /* from file nfa.c */ -/* add an accepting state to a machine */ +/* Add an accepting state to a machine. */ extern void add_accept PROTO((int, int)); -/* make a given number of copies of a singleton machine */ +/* Make a given number of copies of a singleton machine. */ extern int copysingl PROTO((int, int)); -/* debugging routine to write out an nfa */ +/* Debugging routine to write out an nfa. */ extern void dumpnfa PROTO((int)); -/* finish up the processing for a rule */ +/* Finish up the processing for a rule. */ extern void finish_rule PROTO((int, int, int, int)); -/* connect two machines together */ +/* Connect two machines together. */ extern int link_machines PROTO((int, int)); -/* mark each "beginning" state in a machine as being a "normal" (i.e., - * not trailing context associated) state +/* Mark each "beginning" state in a machine as being a "normal" (i.e., + * not trailing context associated) state. */ extern void mark_beginning_as_normal PROTO((register int)); -/* make a machine that branches to two machines */ +/* Make a machine that branches to two machines. */ extern int mkbranch PROTO((int, int)); extern int mkclos PROTO((int)); /* convert a machine into a closure */ extern int mkopt PROTO((int)); /* make a machine optional */ -/* make a machine that matches either one of two machines */ +/* Make a machine that matches either one of two machines. */ extern int mkor PROTO((int, int)); -/* convert a machine into a positive closure */ +/* Convert a machine into a positive closure. */ extern int mkposcl PROTO((int)); extern int mkrep PROTO((int, int, int)); /* make a replicated machine */ -/* create a state with a transition on a given symbol */ +/* Create a state with a transition on a given symbol. */ extern int mkstate PROTO((int)); extern void new_rule PROTO((void)); /* initialize for a new rule */ @@ -827,19 +829,19 @@ extern void new_rule PROTO((void)); /* initialize for a new rule */ /* from file parse.y */ -/* write out a message formatted with one string, pinpointing its location */ +/* Write out a message formatted with one string, pinpointing its location. */ extern void format_pinpoint_message PROTO((char[], char[])); -/* write out a message, pinpointing its location */ +/* Write out a message, pinpointing its location. */ extern void pinpoint_message PROTO((char[])); -/* write out a warning, pinpointing it at the given line */ +/* Write out a warning, pinpointing it at the given line. */ void line_warning PROTO(( char[], int )); -/* write out a message, pinpointing it at the given line */ +/* Write out a message, pinpointing it at the given line. */ void line_pinpoint PROTO(( char[], int )); -/* report a formatted syntax error */ +/* Report a formatted syntax error. */ extern void format_synerr PROTO((char [], char[])); extern void synerr PROTO((char [])); /* report a syntax error */ extern void warn PROTO((char [])); /* report a warning */ @@ -848,51 +850,52 @@ extern int yyparse PROTO((void)); /* the YACC parser */ /* from file scan.l */ -/* the Flex-generated scanner for flex */ +/* The Flex-generated scanner for flex. */ extern int flexscan PROTO((void)); -/* open the given file (if NULL, stdin) for scanning */ +/* Open the given file (if NULL, stdin) for scanning. */ extern void set_input_file PROTO((char*)); -/* wrapup a file in the lexical analyzer */ +/* Wrapup a file in the lexical analyzer. */ extern int yywrap PROTO((void)); /* from file sym.c */ -/* save the text of a character class */ +/* Save the text of a character class. */ extern void cclinstal PROTO ((Char [], int)); -/* lookup the number associated with character class */ +/* Lookup the number associated with character class. */ extern int ccllookup PROTO((Char [])); extern void ndinstal PROTO((char[], Char[])); /* install a name definition */ -/* increase maximum number of SC's */ +/* Increase maximum number of SC's. */ extern void scextend PROTO((void)); extern void scinstal PROTO((char[], int)); /* make a start condition */ -/* lookup the number associated with a start condition */ +/* Lookup the number associated with a start condition. */ extern int sclookup PROTO((char[])); /* from file tblcmp.c */ -/* build table entries for dfa state */ +/* Build table entries for dfa state. */ extern void bldtbl PROTO((int[], int, int, int, int)); extern void cmptmps PROTO((void)); /* compress template table entries */ extern void inittbl PROTO((void)); /* initialize transition tables */ -/* make the default, "jam" table entries */ +/* Make the default, "jam" table entries. */ extern void mkdeftbl PROTO((void)); -/* create table entries for a state (or state fragment) which has - * only one out-transition */ +/* Create table entries for a state (or state fragment) which has + * only one out-transition. + */ extern void mk1tbl PROTO((int, int, int, int)); -/* place a state into full speed transition table */ +/* Place a state into full speed transition table. */ extern void place_state PROTO((int*, int, int)); -/* save states with only one out-transition to be processed later */ +/* Save states with only one out-transition to be processed later. */ extern void stack1 PROTO((int, int, int, int)); @@ -901,7 +904,7 @@ extern void stack1 PROTO((int, int, int, int)); extern int yylex PROTO((void)); -/* The Unix kernel calls used here */ +/* The Unix system calls used here. */ extern int read PROTO((int, char*, int)); extern int unlink PROTO((char*)); diff --git a/gen.c b/gen.c index b1f6e98..db3f4a1 100644 --- a/gen.c +++ b/gen.c @@ -42,14 +42,14 @@ void indent_put2s PROTO((char [], char [])); void indent_puts PROTO((char [])); -static int indent_level = 0; /* each level is 4 spaces */ +static int indent_level = 0; /* each level is 8 spaces */ #define indent_up() (++indent_level) #define indent_down() (--indent_level) #define set_indent(indent_val) indent_level = indent_val -/* *everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all C arrays +/* *Everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays. */ static char C_short_decl[] = "static const short int %s[%d] =\n { 0,\n"; static char C_long_decl[] = "static const long int %s[%d] =\n { 0,\n"; @@ -57,1278 +57,1298 @@ static char C_state_decl[] = "static const yy_state_type %s[%d] =\n { 0,\n"; -/* indent to the current level */ +/* Indent to the current level. */ void do_indent() + { + register int i = indent_level * 8; - { - register int i = indent_level * 4; + while ( i >= 8 ) + { + putchar( '\t' ); + i -= 8; + } - while ( i >= 8 ) - { - putchar( '\t' ); - i -= 8; - } - - while ( i > 0 ) - { - putchar( ' ' ); - --i; + while ( i > 0 ) + { + putchar( ' ' ); + --i; + } } - } -/* generate the code to keep backtracking information */ +/* Generate the code to keep backtracking information. */ void gen_backtracking() + { + if ( reject || num_backtracking == 0 ) + return; - { - if ( reject || num_backtracking == 0 ) - return; - - if ( fullspd ) - indent_puts( "if ( yy_current_state[-1].yy_nxt )" ); - else - indent_puts( "if ( yy_accept[yy_current_state] )" ); + if ( fullspd ) + indent_puts( "if ( yy_current_state[-1].yy_nxt )" ); + else + indent_puts( "if ( yy_accept[yy_current_state] )" ); - indent_up(); - indent_puts( "{" ); - indent_puts( "yy_last_accepting_state = yy_current_state;" ); - indent_puts( "yy_last_accepting_cpos = yy_cp;" ); - indent_puts( "}" ); - indent_down(); - } + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_last_accepting_state = yy_current_state;" ); + indent_puts( "yy_last_accepting_cpos = yy_cp;" ); + indent_puts( "}" ); + indent_down(); + } -/* generate the code to perform the backtrack */ +/* Generate the code to perform the backtrack. */ void gen_bt_action() + { + if ( reject || num_backtracking == 0 ) + return; - { - if ( reject || num_backtracking == 0 ) - return; - - set_indent( 3 ); + set_indent( 3 ); - indent_puts( "case 0: /* must backtrack */" ); - indent_puts( "/* undo the effects of YY_DO_BEFORE_ACTION */" ); - indent_puts( "*yy_cp = yy_hold_char;" ); + indent_puts( "case 0: /* must backtrack */" ); + indent_puts( "/* undo the effects of YY_DO_BEFORE_ACTION */" ); + indent_puts( "*yy_cp = yy_hold_char;" ); - if ( fullspd || fulltbl ) - indent_puts( "yy_cp = yy_last_accepting_cpos + 1;" ); - else - /* backtracking info for compressed tables is taken \after/ - * yy_cp has been incremented for the next state - */ - indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_last_accepting_cpos + 1;" ); + else + /* Backtracking info for compressed tables is taken \after/ + * yy_cp has been incremented for the next state. + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); - indent_puts( "yy_current_state = yy_last_accepting_state;" ); - indent_puts( "goto yy_find_action;" ); - putchar( '\n' ); + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + indent_puts( "goto yy_find_action;" ); + putchar( '\n' ); - set_indent( 0 ); - } + set_indent( 0 ); + } -/* genctbl - generates full speed compressed transition table - * - * synopsis - * genctbl(); - */ +/* genctbl - generates full speed compressed transition table */ void genctbl() - - { - register int i; - int end_of_buffer_action = num_rules + 1; - - /* table of verify for transition and offset to next state */ - printf( "static const struct yy_trans_info yy_transition[%d] =\n", - tblend + numecs + 1 ); - printf( " {\n" ); - - /* We want the transition to be represented as the offset to the - * next state, not the actual state number, which is what it currently is. - * The offset is base[nxt[i]] - base[chk[i]]. That's just the - * difference between the starting points of the two involved states - * (to - from). - * - * first, though, we need to find some way to put in our end-of-buffer - * flags and states. We do this by making a state with absolutely no - * transitions. We put it at the end of the table. - */ - /* at this point, we're guaranteed that there's enough room in nxt[] - * and chk[] to hold tblend + numecs entries. We need just two slots. - * One for the action and one for the end-of-buffer transition. We - * now *assume* that we're guaranteed the only character we'll try to - * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to - * make sure there's room for jam entries for other characters. - */ - - base[lastdfa + 1] = tblend + 2; - nxt[tblend + 1] = end_of_buffer_action; - chk[tblend + 1] = numecs + 1; - chk[tblend + 2] = 1; /* anything but EOB */ - nxt[tblend + 2] = 0; /* so that "make test" won't show arb. differences */ - - /* make sure every state has a end-of-buffer transition and an action # */ - for ( i = 0; i <= lastdfa; ++i ) { - register int anum = dfaacc[i].dfaacc_state; + register int i; + int end_of_buffer_action = num_rules + 1; + + /* Table of verify for transition and offset to next state. */ + printf( "static const struct yy_trans_info yy_transition[%d] =\n", + tblend + numecs + 1 ); + printf( " {\n" ); + + /* We want the transition to be represented as the offset to the + * next state, not the actual state number, which is what it currently + * is. The offset is base[nxt[i]] - base[chk[i]]. That's just the + * difference between the starting points of the two involved states + * (to - from). + * + * First, though, we need to find some way to put in our end-of-buffer + * flags and states. We do this by making a state with absolutely no + * transitions. We put it at the end of the table. + */ - chk[base[i]] = EOB_POSITION; - chk[base[i] - 1] = ACTION_POSITION; - nxt[base[i] - 1] = anum; /* action number */ - } + /* At this point, we're guaranteed that there's enough room in nxt[] + * and chk[] to hold tblend + numecs entries. We need just two slots. + * One for the action and one for the end-of-buffer transition. We + * now *assume* that we're guaranteed the only character we'll try to + * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to + * make sure there's room for jam entries for other characters. + */ - for ( i = 0; i <= tblend; ++i ) - { - if ( chk[i] == EOB_POSITION ) - transition_struct_out( 0, base[lastdfa + 1] - i ); + base[lastdfa + 1] = tblend + 2; + nxt[tblend + 1] = end_of_buffer_action; + chk[tblend + 1] = numecs + 1; + chk[tblend + 2] = 1; /* anything but EOB */ - else if ( chk[i] == ACTION_POSITION ) - transition_struct_out( 0, nxt[i] ); + /* So that "make test" won't show arb. differences. */ + nxt[tblend + 2] = 0; - else if ( chk[i] > numecs || chk[i] == 0 ) - transition_struct_out( 0, 0 ); /* unused slot */ + /* Make sure every state has a end-of-buffer transition and an + * action #. + */ + for ( i = 0; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; - else /* verify, transition */ - transition_struct_out( chk[i], base[nxt[i]] - (i - chk[i]) ); - } + chk[base[i]] = EOB_POSITION; + chk[base[i] - 1] = ACTION_POSITION; + nxt[base[i] - 1] = anum; /* action number */ + } + for ( i = 0; i <= tblend; ++i ) + { + if ( chk[i] == EOB_POSITION ) + transition_struct_out( 0, base[lastdfa + 1] - i ); - /* here's the final, end-of-buffer state */ - transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); - transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); + else if ( chk[i] == ACTION_POSITION ) + transition_struct_out( 0, nxt[i] ); - printf( " };\n" ); - printf( "\n" ); + else if ( chk[i] > numecs || chk[i] == 0 ) + transition_struct_out( 0, 0 ); /* unused slot */ - /* table of pointers to start states */ - printf( "static const struct yy_trans_info *yy_start_state_list[%d] =\n", - lastsc * 2 + 1 ); - printf( " {\n" ); + else /* verify, transition */ + transition_struct_out( chk[i], + base[nxt[i]] - (i - chk[i]) ); + } - for ( i = 0; i <= lastsc * 2; ++i ) - printf( " &yy_transition[%d],\n", base[i] ); - dataend(); + /* Here's the final, end-of-buffer state. */ + transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); + transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); - if ( useecs ) - genecs(); - } + printf( " };\n" ); + printf( "\n" ); + /* Table of pointers to start states. */ + printf( + "static const struct yy_trans_info *yy_start_state_list[%d] =\n", + lastsc * 2 + 1 ); + printf( " {\n" ); /* } so vi doesn't get confused */ -/* generate equivalence-class tables */ + for ( i = 0; i <= lastsc * 2; ++i ) + printf( " &yy_transition[%d],\n", base[i] ); -void genecs() + dataend(); + + if ( useecs ) + genecs(); + } - { - register int i, j; - static char C_char_decl[] = "static const %s %s[%d] =\n { 0,\n"; - int numrows; - Char clower(); - if ( numecs < csize ) - printf( C_char_decl, "YY_CHAR", "yy_ec", csize ); - else - printf( C_char_decl, "short", "yy_ec", csize ); +/* Generate equivalence-class tables. */ - for ( i = 1; i < csize; ++i ) +void genecs() { - if ( caseins && (i >= 'A') && (i <= 'Z') ) - ecgroup[i] = ecgroup[clower( i )]; + Char clower(); + static char C_char_decl[] = "static const %s %s[%d] =\n { 0,\n"; + /* } so vi doesn't get confused */ + register int i, j; + int numrows; + + if ( numecs < csize ) + printf( C_char_decl, "YY_CHAR", "yy_ec", csize ); + else + printf( C_char_decl, "short", "yy_ec", csize ); - ecgroup[i] = abs( ecgroup[i] ); - mkdata( ecgroup[i] ); - } + for ( i = 1; i < csize; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; - dataend(); + ecgroup[i] = abs( ecgroup[i] ); + mkdata( ecgroup[i] ); + } - if ( trace ) - { - char *readable_form(); + dataend(); - fputs( "\n\nEquivalence Classes:\n\n", stderr ); + if ( trace ) + { + char *readable_form(); - numrows = csize / 8; + fputs( "\n\nEquivalence Classes:\n\n", stderr ); - for ( j = 0; j < numrows; ++j ) - { - for ( i = j; i < csize; i = i + numrows ) - { - fprintf( stderr, "%4s = %-2d", readable_form( i ), ecgroup[i] ); + numrows = csize / 8; - putc( ' ', stderr ); - } + for ( j = 0; j < numrows; ++j ) + { + for ( i = j; i < csize; i = i + numrows ) + { + fprintf( stderr, "%4s = %-2d", + readable_form( i ), ecgroup[i] ); + + putc( ' ', stderr ); + } - putc( '\n', stderr ); - } + putc( '\n', stderr ); + } + } } - } -/* generate the code to find the action number */ +/* Generate the code to find the action number. */ void gen_find_action() + { + if ( fullspd ) + indent_puts( "yy_act = yy_current_state[-1].yy_nxt;" ); - { - if ( fullspd ) - indent_puts( "yy_act = yy_current_state[-1].yy_nxt;" ); + else if ( fulltbl ) + indent_puts( "yy_act = yy_accept[yy_current_state];" ); - else if ( fulltbl ) - indent_puts( "yy_act = yy_accept[yy_current_state];" ); + else if ( reject ) + { + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); - else if ( reject ) - { - indent_puts( "yy_current_state = *--yy_state_ptr;" ); - indent_puts( "yy_lp = yy_accept[yy_current_state];" ); + puts( + "find_rule: /* we branch to this label when backtracking */" ); - puts( "find_rule: /* we branch to this label when backtracking */" ); + indent_puts( + "for ( ; ; ) /* until we find what rule we matched */" ); - indent_puts( "for ( ; ; ) /* until we find what rule we matched */" ); + indent_up(); - indent_up(); + indent_puts( "{" ); - indent_puts( "{" ); + indent_puts( + "if ( yy_lp && yy_lp < yy_accept[yy_current_state + 1] )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_act = yy_acclist[yy_lp];" ); - indent_puts( "if ( yy_lp && yy_lp < yy_accept[yy_current_state + 1] )" ); - indent_up(); - indent_puts( "{" ); - indent_puts( "yy_act = yy_acclist[yy_lp];" ); - - if ( variable_trailing_context_rules ) - { - indent_puts( "if ( yy_act & YY_TRAILING_HEAD_MASK ||" ); - indent_puts( " yy_looking_for_trail_begin )" ); - indent_up(); - indent_puts( "{" ); - - indent_puts( "if ( yy_act == yy_looking_for_trail_begin )" ); - indent_up(); - indent_puts( "{" ); - indent_puts( "yy_looking_for_trail_begin = 0;" ); - indent_puts( "yy_act &= ~YY_TRAILING_HEAD_MASK;" ); - indent_puts( "break;" ); - indent_puts( "}" ); - indent_down(); - - indent_puts( "}" ); - indent_down(); - - indent_puts( "else if ( yy_act & YY_TRAILING_MASK )" ); - indent_up(); - indent_puts( "{" ); - indent_puts( + if ( variable_trailing_context_rules ) + { + indent_puts( "if ( yy_act & YY_TRAILING_HEAD_MASK ||" ); + indent_puts( " yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + + indent_puts( + "if ( yy_act == yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_looking_for_trail_begin = 0;" ); + indent_puts( "yy_act &= ~YY_TRAILING_HEAD_MASK;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else if ( yy_act & YY_TRAILING_MASK )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_looking_for_trail_begin = yy_act & ~YY_TRAILING_MASK;" ); - indent_puts( + indent_puts( "yy_looking_for_trail_begin |= YY_TRAILING_HEAD_MASK;" ); - if ( real_reject ) + if ( real_reject ) + { + /* Remember matched text in case we back up + * due to REJECT. + */ + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "++yy_lp;" ); + indent_puts( "goto find_rule;" ); + } + + else { - /* remember matched text in case we back up due to REJECT */ + /* Remember matched text in case we back up due to trailing + * context plus REJECT. + */ + indent_up(); + indent_puts( "{" ); indent_puts( "yy_full_match = yy_cp;" ); - indent_puts( "yy_full_state = yy_state_ptr;" ); - indent_puts( "yy_full_lp = yy_lp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); } - indent_puts( "}" ); - indent_down(); + indent_puts( "}" ); + indent_down(); - indent_puts( "else" ); - indent_up(); - indent_puts( "{" ); - indent_puts( "yy_full_match = yy_cp;" ); - indent_puts( "yy_full_state = yy_state_ptr;" ); - indent_puts( "yy_full_lp = yy_lp;" ); - indent_puts( "break;" ); - indent_puts( "}" ); - indent_down(); + indent_puts( "--yy_cp;" ); - indent_puts( "++yy_lp;" ); - indent_puts( "goto find_rule;" ); - } + /* We could consolidate the following two lines with those at + * the beginning, but at the cost of complaints that we're + * branching inside a loop. + */ + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); - else - { - /* remember matched text in case we back up due to trailing context - * plus REJECT - */ - indent_up(); - indent_puts( "{" ); - indent_puts( "yy_full_match = yy_cp;" ); - indent_puts( "break;" ); - indent_puts( "}" ); - indent_down(); - } + indent_puts( "}" ); - indent_puts( "}" ); - indent_down(); - - indent_puts( "--yy_cp;" ); - - /* we could consolidate the following two lines with those at - * the beginning, but at the cost of complaints that we're - * branching inside a loop - */ - indent_puts( "yy_current_state = *--yy_state_ptr;" ); - indent_puts( "yy_lp = yy_accept[yy_current_state];" ); - - indent_puts( "}" ); + indent_down(); + } - indent_down(); + else + /* compressed */ + indent_puts( "yy_act = yy_accept[yy_current_state];" ); } - else - /* compressed */ - indent_puts( "yy_act = yy_accept[yy_current_state];" ); - } - -/* genftbl - generates full transition table - * - * synopsis - * genftbl(); - */ +/* genftbl - generates full transition table */ void genftbl() + { + register int i; + int end_of_buffer_action = num_rules + 1; - { - register int i; - int end_of_buffer_action = num_rules + 1; - - printf( C_short_decl, "yy_accept", lastdfa + 1 ); - + printf( C_short_decl, "yy_accept", lastdfa + 1 ); - dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; - for ( i = 1; i <= lastdfa; ++i ) - { - register int anum = dfaacc[i].dfaacc_state; + for ( i = 1; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; - mkdata( anum ); + mkdata( anum ); - if ( trace && anum ) - fprintf( stderr, "state # %d accepts: [%d]\n", i, anum ); - } + if ( trace && anum ) + fprintf( stderr, "state # %d accepts: [%d]\n", + i, anum ); + } - dataend(); + dataend(); - if ( useecs ) - genecs(); + if ( useecs ) + genecs(); - /* don't have to dump the actual full table entries - they were created - * on-the-fly - */ - } + /* Don't have to dump the actual full table entries - they were + * created on-the-fly. + */ + } -/* generate the code to find the next compressed-table state */ +/* Generate the code to find the next compressed-table state. */ void gen_next_compressed_state( char_map ) char *char_map; - - { - indent_put2s( "register YY_CHAR yy_c = %s;", char_map ); - - /* save the backtracking info \before/ computing the next state - * because we always compute one more state than needed - we - * always proceed until we reach a jam state - */ - gen_backtracking(); - - indent_puts( - "while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )" ); - indent_up(); - indent_puts( "{" ); - indent_puts( "yy_current_state = yy_def[yy_current_state];" ); - - if ( usemecs ) { - /* we've arrange it so that templates are never chained - * to one another. This means we can afford make a - * very simple test to see if we need to convert to - * yy_c's meta-equivalence class without worrying - * about erroneously looking up the meta-equivalence - * class twice + indent_put2s( "register YY_CHAR yy_c = %s;", char_map ); + + /* Save the backtracking info \before/ computing the next state + * because we always compute one more state than needed - we + * always proceed until we reach a jam state */ - do_indent(); - /* lastdfa + 2 is the beginning of the templates */ - printf( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); + gen_backtracking(); + indent_puts( +"while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )" ); indent_up(); - indent_puts( "yy_c = yy_meta[(unsigned int) yy_c];" ); - indent_down(); - } + indent_puts( "{" ); + indent_puts( "yy_current_state = yy_def[yy_current_state];" ); - indent_puts( "}" ); - indent_down(); + if ( usemecs ) + { + /* We've arrange it so that templates are never chained + * to one another. This means we can afford make a + * very simple test to see if we need to convert to + * yy_c's meta-equivalence class without worrying + * about erroneously looking up the meta-equivalence + * class twice + */ + do_indent(); + + /* lastdfa + 2 is the beginning of the templates */ + printf( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); + + indent_up(); + indent_puts( "yy_c = yy_meta[(unsigned int) yy_c];" ); + indent_down(); + } - indent_puts( + indent_puts( "}" ); + indent_down(); + + indent_puts( "yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];" ); - } + } -/* generate the code to find the next match */ +/* Generate the code to find the next match. */ void gen_next_match() - - { - /* NOTE - changes in here should be reflected in gen_next_state() and - * gen_NUL_trans() - */ - char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; - char *char_map_2 = useecs ? "yy_ec[*++yy_cp]" : "*++yy_cp"; - - if ( fulltbl ) { - indent_put2s( - "while ( (yy_current_state = yy_nxt[yy_current_state][%s]) > 0 )", - char_map ); + /* NOTE - changes in here should be reflected in gen_next_state() and + * gen_NUL_trans(). + */ + char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; + char *char_map_2 = useecs ? "yy_ec[*++yy_cp]" : "*++yy_cp"; - indent_up(); + if ( fulltbl ) + { + indent_put2s( + "while ( (yy_current_state = yy_nxt[yy_current_state][%s]) > 0 )", + char_map ); - if ( num_backtracking > 0 ) - { - indent_puts( "{" ); - gen_backtracking(); - putchar( '\n' ); - } + indent_up(); - indent_puts( "++yy_cp;" ); + if ( num_backtracking > 0 ) + { + indent_puts( "{" ); /* } for vi */ + gen_backtracking(); + putchar( '\n' ); + } - if ( num_backtracking > 0 ) - indent_puts( "}" ); + indent_puts( "++yy_cp;" ); - indent_down(); + if ( num_backtracking > 0 ) + /* { for vi */ + indent_puts( "}" ); - putchar( '\n' ); - indent_puts( "yy_current_state = -yy_current_state;" ); - } + indent_down(); - else if ( fullspd ) - { - indent_puts( "{" ); - indent_puts( "register const struct yy_trans_info *yy_trans_info;\n" ); - indent_puts( "register YY_CHAR yy_c;\n" ); - indent_put2s( "for ( yy_c = %s;", char_map ); - indent_puts( + putchar( '\n' ); + indent_puts( "yy_current_state = -yy_current_state;" ); + } + + else if ( fullspd ) + { + indent_puts( "{" ); /* } for vi */ + indent_puts( + "register const struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "register YY_CHAR yy_c;\n" ); + indent_put2s( "for ( yy_c = %s;", char_map ); + indent_puts( " (yy_trans_info = &yy_current_state[yy_c])->yy_verify == yy_c;" ); - indent_put2s( " yy_c = %s )", char_map_2 ); + indent_put2s( " yy_c = %s )", char_map_2 ); - indent_up(); + indent_up(); - if ( num_backtracking > 0 ) - indent_puts( "{" ); + if ( num_backtracking > 0 ) + indent_puts( "{" ); /* } for vi */ - indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); - if ( num_backtracking > 0 ) - { - putchar( '\n' ); - gen_backtracking(); - indent_puts( "}" ); - } + if ( num_backtracking > 0 ) + { + putchar( '\n' ); + gen_backtracking(); /* { for vi */ + indent_puts( "}" ); + } - indent_down(); - indent_puts( "}" ); - } + indent_down(); /* { for vi */ + indent_puts( "}" ); + } - else - { /* compressed */ - indent_puts( "do" ); + else + { /* compressed */ + indent_puts( "do" ); - indent_up(); - indent_puts( "{" ); + indent_up(); + indent_puts( "{" ); /* } for vi */ - gen_next_state( false ); + gen_next_state( false ); - indent_puts( "++yy_cp;" ); + indent_puts( "++yy_cp;" ); - indent_puts( "}" ); - indent_down(); + /* { for vi */ + indent_puts( "}" ); + indent_down(); - do_indent(); + do_indent(); - if ( interactive ) - printf( "while ( yy_base[yy_current_state] != %d );\n", jambase ); - else - printf( "while ( yy_current_state != %d );\n", jamstate ); - - if ( ! reject && ! interactive ) - { - /* do the guaranteed-needed backtrack to figure out the match */ - indent_puts( "yy_cp = yy_last_accepting_cpos;" ); - indent_puts( "yy_current_state = yy_last_accepting_state;" ); - } + if ( interactive ) + printf( "while ( yy_base[yy_current_state] != %d );\n", + jambase ); + else + printf( "while ( yy_current_state != %d );\n", + jamstate ); + + if ( ! reject && ! interactive ) + { + /* Do the guaranteed-needed backtrack to figure out + * the match. + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( + "yy_current_state = yy_last_accepting_state;" ); + } + } } - } -/* generate the code to find the next state */ +/* Generate the code to find the next state. */ void gen_next_state( worry_about_NULs ) int worry_about_NULs; + { /* NOTE - changes in here should be reflected in get_next_match() */ + char char_map[256]; - { /* NOTE - changes in here should be reflected in get_next_match() */ - char char_map[256]; + if ( worry_about_NULs && ! nultrans ) + { + if ( useecs ) + (void) sprintf( char_map, + "(*yy_cp ? yy_ec[*yy_cp] : %d)", NUL_ec ); + else + (void) sprintf( char_map, + "(*yy_cp ? *yy_cp : %d)", NUL_ec ); + } - if ( worry_about_NULs && ! nultrans ) - { - if ( useecs ) - (void) sprintf( char_map, "(*yy_cp ? yy_ec[*yy_cp] : %d)", NUL_ec ); else - (void) sprintf( char_map, "(*yy_cp ? *yy_cp : %d)", NUL_ec ); - } - - else - (void) strcpy( char_map, useecs ? "yy_ec[*yy_cp]" : "*yy_cp" ); - - if ( worry_about_NULs && nultrans ) - { - if ( ! fulltbl && ! fullspd ) - /* compressed tables backtrack *before* they match */ - gen_backtracking(); - - indent_puts( "if ( *yy_cp )" ); - indent_up(); - indent_puts( "{" ); - } - - if ( fulltbl ) - indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];", - char_map ); - - else if ( fullspd ) - indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;", - char_map ); - - else - gen_next_compressed_state( char_map ); - - if ( worry_about_NULs && nultrans ) - { - indent_puts( "}" ); - indent_down(); - indent_puts( "else" ); - indent_up(); - indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" ); - indent_down(); - } - - if ( fullspd || fulltbl ) - gen_backtracking(); - - if ( reject ) - indent_puts( "*yy_state_ptr++ = yy_current_state;" ); - } + (void) strcpy( char_map, useecs ? "yy_ec[*yy_cp]" : "*yy_cp" ); + if ( worry_about_NULs && nultrans ) + { + if ( ! fulltbl && ! fullspd ) + /* Compressed tables backtrack *before* they match. */ + gen_backtracking(); -/* generate the code to make a NUL transition */ + indent_puts( "if ( *yy_cp )" ); + indent_up(); + indent_puts( "{" ); /* } for vi */ + } -void gen_NUL_trans() + if ( fulltbl ) + indent_put2s( + "yy_current_state = yy_nxt[yy_current_state][%s];", + char_map ); - { /* NOTE - changes in here should be reflected in get_next_match() */ - int need_backtracking = (num_backtracking > 0 && ! reject); + else if ( fullspd ) + indent_put2s( + "yy_current_state += yy_current_state[%s].yy_nxt;", + char_map ); - if ( need_backtracking ) - /* we'll need yy_cp lying around for the gen_backtracking() */ - indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); + else + gen_next_compressed_state( char_map ); - putchar( '\n' ); + if ( worry_about_NULs && nultrans ) + { + /* { for vi */ + indent_puts( "}" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + indent_puts( + "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_down(); + } - if ( nultrans ) - { - indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" ); - indent_puts( "yy_is_jam = (yy_current_state == 0);" ); - } + if ( fullspd || fulltbl ) + gen_backtracking(); - else if ( fulltbl ) - { - do_indent(); - printf( "yy_current_state = yy_nxt[yy_current_state][%d];\n", - NUL_ec ); - indent_puts( "yy_is_jam = (yy_current_state <= 0);" ); + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); } - else if ( fullspd ) - { - do_indent(); - printf( "register int yy_c = %d;\n", NUL_ec ); - indent_puts( - "register const struct yy_trans_info *yy_trans_info;\n" ); - indent_puts( "yy_trans_info = &yy_current_state[yy_c];" ); - indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); +/* Generate the code to make a NUL transition. */ - indent_puts( "yy_is_jam = (yy_trans_info->yy_verify != yy_c);" ); - } +void gen_NUL_trans() + { /* NOTE - changes in here should be reflected in get_next_match() */ + int need_backtracking = (num_backtracking > 0 && ! reject); - else - { - char NUL_ec_str[20]; + if ( need_backtracking ) + /* We'll need yy_cp lying around for the gen_backtracking(). */ + indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); - (void) sprintf( NUL_ec_str, "%d", NUL_ec ); - gen_next_compressed_state( NUL_ec_str ); + putchar( '\n' ); - if ( reject ) - indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + if ( nultrans ) + { + indent_puts( + "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_puts( "yy_is_jam = (yy_current_state == 0);" ); + } - do_indent(); + else if ( fulltbl ) + { + do_indent(); + printf( "yy_current_state = yy_nxt[yy_current_state][%d];\n", + NUL_ec ); + indent_puts( "yy_is_jam = (yy_current_state <= 0);" ); + } - printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); - } + else if ( fullspd ) + { + do_indent(); + printf( "register int yy_c = %d;\n", NUL_ec ); - /* if we've entered an accepting state, backtrack; note that - * compressed tables have *already* done such backtracking, so - * we needn't bother with it again - */ - if ( need_backtracking && (fullspd || fulltbl) ) - { - putchar( '\n' ); - indent_puts( "if ( ! yy_is_jam )" ); - indent_up(); - indent_puts( "{" ); - gen_backtracking(); - indent_puts( "}" ); - indent_down(); - } - } + indent_puts( + "register const struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "yy_trans_info = &yy_current_state[yy_c];" ); + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + indent_puts( + "yy_is_jam = (yy_trans_info->yy_verify != yy_c);" ); + } -/* generate the code to find the start state */ + else + { + char NUL_ec_str[20]; -void gen_start_state() + (void) sprintf( NUL_ec_str, "%d", NUL_ec ); + gen_next_compressed_state( NUL_ec_str ); - { - if ( fullspd ) - indent_put2s( "yy_current_state = yy_start_state_list[yy_start%s];", - bol_needed ? " + (yy_bp[-1] == '\\n' ? 1 : 0)" : "" ); + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); - else - { - indent_puts( "yy_current_state = yy_start;" ); + do_indent(); - if ( bol_needed ) - { - indent_puts( "if ( yy_bp[-1] == '\\n' )" ); - indent_up(); - indent_puts( "++yy_current_state;" ); - indent_down(); - } + printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); + } - if ( reject ) - { - /* set up for storing up states */ - indent_puts( "yy_state_ptr = yy_state_buf;" ); - indent_puts( "*yy_state_ptr++ = yy_current_state;" ); - } + /* If we've entered an accepting state, backtrack; note that + * compressed tables have *already* done such backtracking, so + * we needn't bother with it again. + */ + if ( need_backtracking && (fullspd || fulltbl) ) + { + putchar( '\n' ); + indent_puts( "if ( ! yy_is_jam )" ); + indent_up(); + indent_puts( "{" ); + gen_backtracking(); + indent_puts( "}" ); + indent_down(); + } } - } -/* gentabs - generate data statements for the transition tables - * - * synopsis - * gentabs(); - */ +/* Generate the code to find the start state. */ -void gentabs() +void gen_start_state() + { + if ( fullspd ) + indent_put2s( + "yy_current_state = yy_start_state_list[yy_start%s];", + bol_needed ? " + (yy_bp[-1] == '\\n' ? 1 : 0)" : "" ); + + else + { + indent_puts( "yy_current_state = yy_start;" ); - { - int i, j, k, *accset, nacc, *acc_array, total_states; - int end_of_buffer_action = num_rules + 1; + if ( bol_needed ) + { + indent_puts( "if ( yy_bp[-1] == '\\n' )" ); + indent_up(); + indent_puts( "++yy_current_state;" ); + indent_down(); + } - /* *everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all C arrays - */ - static char C_char_decl[] = - "static const YY_CHAR %s[%d] =\n { 0,\n"; + if ( reject ) + { + /* Set up for storing up states. */ + indent_puts( "yy_state_ptr = yy_state_buf;" ); + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + } + } - acc_array = allocate_integer_array( current_max_dfas ); - nummt = 0; - /* the compressed table format jams by entering the "jam state", - * losing information about the previous state in the process. - * In order to recover the previous state, we effectively need - * to keep backtracking information. - */ - ++num_backtracking; +/* gentabs - generate data statements for the transition tables */ - if ( reject ) +void gentabs() { - /* write out accepting list and pointer list - * - * first we generate the "yy_acclist" array. In the process, we compute - * the indices that will go into the "yy_accept" array, and save the - * indices in the dfaacc array - */ - int EOB_accepting_list[2]; + int i, j, k, *accset, nacc, *acc_array, total_states; + int end_of_buffer_action = num_rules + 1; - /* set up accepting structures for the End Of Buffer state */ - EOB_accepting_list[0] = 0; - EOB_accepting_list[1] = end_of_buffer_action; - accsiz[end_of_buffer_state] = 1; - dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; - - printf( C_short_decl, "yy_acclist", max( numas, 1 ) + 1 ); + /* *Everything* is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays. + */ + static char C_char_decl[] = + "static const YY_CHAR %s[%d] =\n { 0,\n"; /* } for vi */ - j = 1; /* index into "yy_acclist" array */ + acc_array = allocate_integer_array( current_max_dfas ); + nummt = 0; - for ( i = 1; i <= lastdfa; ++i ) - { - acc_array[i] = j; + /* The compressed table format jams by entering the "jam state", + * losing information about the previous state in the process. + * In order to recover the previous state, we effectively need + * to keep backtracking information. + */ + ++num_backtracking; - if ( accsiz[i] != 0 ) + if ( reject ) { - accset = dfaacc[i].dfaacc_set; - nacc = accsiz[i]; + /* Write out accepting list and pointer list. + * + * First we generate the "yy_acclist" array. In the process, + * we compute the indices that will go into the "yy_accept" + * array, and save the indices in the dfaacc array. + */ + int EOB_accepting_list[2]; - if ( trace ) - fprintf( stderr, "state # %d accepts: ", i ); + /* Set up accepting structures for the End Of Buffer state. */ + EOB_accepting_list[0] = 0; + EOB_accepting_list[1] = end_of_buffer_action; + accsiz[end_of_buffer_state] = 1; + dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; - for ( k = 1; k <= nacc; ++k ) - { - int accnum = accset[k]; + printf( C_short_decl, "yy_acclist", max( numas, 1 ) + 1 ); - ++j; + j = 1; /* index into "yy_acclist" array */ - if ( variable_trailing_context_rules && - ! (accnum & YY_TRAILING_HEAD_MASK) && - accnum > 0 && accnum <= num_rules && - rule_type[accnum] == RULE_VARIABLE ) + for ( i = 1; i <= lastdfa; ++i ) { - /* special hack to flag accepting number as part - * of trailing context rule - */ - accnum |= YY_TRAILING_MASK; + acc_array[i] = j; + + if ( accsiz[i] != 0 ) + { + accset = dfaacc[i].dfaacc_set; + nacc = accsiz[i]; + + if ( trace ) + fprintf( stderr, + "state # %d accepts: ", i ); + + for ( k = 1; k <= nacc; ++k ) + { + int accnum = accset[k]; + + ++j; + + if ( variable_trailing_context_rules && + ! (accnum & YY_TRAILING_HEAD_MASK) && + accnum > 0 && accnum <= num_rules && + rule_type[accnum] == RULE_VARIABLE ) + { + /* Special hack to flag + * accepting number as part + * of trailing context rule. + */ + accnum |= YY_TRAILING_MASK; + } + + mkdata( accnum ); + + if ( trace ) + { + fprintf( stderr, "[%d]", + accset[k] ); + + if ( k < nacc ) + fputs( ", ", stderr ); + else + putc( '\n', stderr ); + } + } + } } - mkdata( accnum ); + /* add accepting number for the "jam" state */ + acc_array[i] = j; - if ( trace ) - { - fprintf( stderr, "[%d]", accset[k] ); - - if ( k < nacc ) - fputs( ", ", stderr ); - else - putc( '\n', stderr ); - } - } + dataend(); } - } - - /* add accepting number for the "jam" state */ - acc_array[i] = j; - dataend(); - } + else + { + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; - else - { - dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; + for ( i = 1; i <= lastdfa; ++i ) + acc_array[i] = dfaacc[i].dfaacc_state; - for ( i = 1; i <= lastdfa; ++i ) - acc_array[i] = dfaacc[i].dfaacc_state; + /* add accepting number for jam state */ + acc_array[i] = 0; + } - /* add accepting number for jam state */ - acc_array[i] = 0; - } + /* Spit out "yy_accept" array. If we're doing "reject", it'll be + * pointers into the "yy_acclist" array. Otherwise it's actual + * accepting numbers. In either case, we just dump the numbers. + */ - /* spit out "yy_accept" array. If we're doing "reject", it'll be pointers - * into the "yy_acclist" array. Otherwise it's actual accepting numbers. - * In either case, we just dump the numbers. - */ - - /* "lastdfa + 2" is the size of "yy_accept"; includes room for C arrays - * beginning at 0 and for "jam" state - */ - k = lastdfa + 2; - - if ( reject ) - /* we put a "cap" on the table associating lists of accepting - * numbers with state numbers. This is needed because we tell - * where the end of an accepting list is by looking at where - * the list for the next state starts. + /* "lastdfa + 2" is the size of "yy_accept"; includes room for C arrays + * beginning at 0 and for "jam" state. */ - ++k; + k = lastdfa + 2; - printf( C_short_decl, "yy_accept", k ); + if ( reject ) + /* We put a "cap" on the table associating lists of accepting + * numbers with state numbers. This is needed because we tell + * where the end of an accepting list is by looking at where + * the list for the next state starts. + */ + ++k; - for ( i = 1; i <= lastdfa; ++i ) - { - mkdata( acc_array[i] ); + printf( C_short_decl, "yy_accept", k ); - if ( ! reject && trace && acc_array[i] ) - fprintf( stderr, "state # %d accepts: [%d]\n", i, acc_array[i] ); - } + for ( i = 1; i <= lastdfa; ++i ) + { + mkdata( acc_array[i] ); - /* add entry for "jam" state */ - mkdata( acc_array[i] ); + if ( ! reject && trace && acc_array[i] ) + fprintf( stderr, "state # %d accepts: [%d]\n", + i, acc_array[i] ); + } - if ( reject ) - /* add "cap" for the list */ + /* Add entry for "jam" state. */ mkdata( acc_array[i] ); - dataend(); + if ( reject ) + /* Add "cap" for the list. */ + mkdata( acc_array[i] ); - if ( useecs ) - genecs(); + dataend(); - if ( usemecs ) - { - /* write out meta-equivalence classes (used to index templates with) */ + if ( useecs ) + genecs(); - if ( trace ) - fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); + if ( usemecs ) + { + /* Write out meta-equivalence classes (used to index + * templates with). + */ - printf( C_char_decl, "yy_meta", numecs + 1 ); + if ( trace ) + fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); - for ( i = 1; i <= numecs; ++i ) - { - if ( trace ) - fprintf( stderr, "%d = %d\n", i, abs( tecbck[i] ) ); + printf( C_char_decl, "yy_meta", numecs + 1 ); - mkdata( abs( tecbck[i] ) ); - } + for ( i = 1; i <= numecs; ++i ) + { + if ( trace ) + fprintf( stderr, "%d = %d\n", + i, abs( tecbck[i] ) ); - dataend(); - } + mkdata( abs( tecbck[i] ) ); + } - total_states = lastdfa + numtemps; + dataend(); + } - printf( total_states > MAX_SHORT ? C_long_decl : C_short_decl, - "yy_base", total_states + 1 ); + total_states = lastdfa + numtemps; - for ( i = 1; i <= lastdfa; ++i ) - { - register int d = def[i]; + printf( total_states > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_base", total_states + 1 ); - if ( base[i] == JAMSTATE ) - base[i] = jambase; + for ( i = 1; i <= lastdfa; ++i ) + { + register int d = def[i]; - if ( d == JAMSTATE ) - def[i] = jamstate; + if ( base[i] == JAMSTATE ) + base[i] = jambase; - else if ( d < 0 ) - { - /* template reference */ - ++tmpuses; - def[i] = lastdfa - d + 1; - } + if ( d == JAMSTATE ) + def[i] = jamstate; - mkdata( base[i] ); - } + else if ( d < 0 ) + { + /* Template reference. */ + ++tmpuses; + def[i] = lastdfa - d + 1; + } - /* generate jam state's base index */ - mkdata( base[i] ); + mkdata( base[i] ); + } - for ( ++i /* skip jam state */; i <= total_states; ++i ) - { + /* Generate jam state's base index. */ mkdata( base[i] ); - def[i] = jamstate; - } - dataend(); + for ( ++i /* skip jam state */; i <= total_states; ++i ) + { + mkdata( base[i] ); + def[i] = jamstate; + } - printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, - "yy_def", total_states + 1 ); + dataend(); - for ( i = 1; i <= total_states; ++i ) - mkdata( def[i] ); + printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_def", total_states + 1 ); - dataend(); + for ( i = 1; i <= total_states; ++i ) + mkdata( def[i] ); - printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, - "yy_nxt", tblend + 1 ); + dataend(); - for ( i = 1; i <= tblend; ++i ) - { - if ( nxt[i] == 0 || chk[i] == 0 ) - nxt[i] = jamstate; /* new state is the JAM state */ + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_nxt", tblend + 1 ); - mkdata( nxt[i] ); - } + for ( i = 1; i <= tblend; ++i ) + { + if ( nxt[i] == 0 || chk[i] == 0 ) + nxt[i] = jamstate; /* new state is the JAM state */ - dataend(); + mkdata( nxt[i] ); + } - printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, - "yy_chk", tblend + 1 ); + dataend(); - for ( i = 1; i <= tblend; ++i ) - { - if ( chk[i] == 0 ) - ++nummt; + printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + "yy_chk", tblend + 1 ); - mkdata( chk[i] ); - } + for ( i = 1; i <= tblend; ++i ) + { + if ( chk[i] == 0 ) + ++nummt; + + mkdata( chk[i] ); + } - dataend(); - } + dataend(); + } -/* write out a formatted string (with a secondary string argument) at the - * current indentation level, adding a final newline +/* Write out a formatted string (with a secondary string argument) at the + * current indentation level, adding a final newline. */ void indent_put2s( fmt, arg ) char fmt[], arg[]; - - { - do_indent(); - printf( fmt, arg ); - putchar( '\n' ); - } + { + do_indent(); + printf( fmt, arg ); + putchar( '\n' ); + } -/* write out a string at the current indentation level, adding a final - * newline +/* Write out a string at the current indentation level, adding a final + * newline. */ void indent_puts( str ) char str[]; - - { - do_indent(); - puts( str ); - } + { + do_indent(); + puts( str ); + } -/* make_tables - generate transition tables - * - * synopsis - * make_tables(); - * - * Generates transition tables and finishes generating output file +/* make_tables - generate transition tables and finishes generating output file */ void make_tables() + { + register int i; + int did_eof_rule = false; - { - register int i; - int did_eof_rule = false; - - skelout(); + skelout(); - /* first, take care of YY_DO_BEFORE_ACTION depending on yymore being used */ - set_indent( 2 ); + /* First, take care of YY_DO_BEFORE_ACTION depending on yymore + * being used. + */ + set_indent( 2 ); - if ( yymore_used ) - { - indent_puts( "yytext_ptr -= yy_more_len; \\" ); - indent_puts( "yyleng = yy_cp - yytext_ptr; \\" ); - } + if ( yymore_used ) + { + indent_puts( "yytext_ptr -= yy_more_len; \\" ); + indent_puts( "yyleng = yy_cp - yytext_ptr; \\" ); + } - else - indent_puts( "yyleng = yy_cp - yy_bp; \\" ); + else + indent_puts( "yyleng = yy_cp - yy_bp; \\" ); - /* now also deal with copying yytext_ptr to yytext if needed */ - skelout(); - if ( yytext_is_array ) - { - indent_puts( "if ( yyleng >= YYLMAX ) \\" ); - indent_up(); - indent_puts( + /* Now also deal with copying yytext_ptr to yytext if needed. */ + skelout(); + if ( yytext_is_array ) + { + indent_puts( "if ( yyleng >= YYLMAX ) \\" ); + indent_up(); + indent_puts( "YY_FATAL_ERROR( \"token too large, exceeds YYLMAX\" ); \\" ); - indent_down(); - indent_puts( "strcpy( yytext, (char *) yytext_ptr ); \\" ); - } + indent_down(); + indent_puts( "strcpy( yytext, (char *) yytext_ptr ); \\" ); + } - set_indent( 0 ); - - skelout(); + set_indent( 0 ); + skelout(); - printf( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); - if ( fullspd ) - { /* need to define the transet type as a size large - * enough to hold the biggest offset - */ - int total_table_size = tblend + numecs + 1; - char *trans_offset_type = - total_table_size > MAX_SHORT ? "long" : "short"; + printf( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); - set_indent( 0 ); - indent_puts( "struct yy_trans_info" ); - indent_up(); - indent_puts( "{" ); - indent_puts( "short yy_verify;" ); - - /* in cases where its sister yy_verify *is* a "yes, there is a - * transition", yy_nxt is the offset (in records) to the next state. - * In most cases where there is no transition, the value of yy_nxt - * is irrelevant. If yy_nxt is the -1th record of a state, though, - * then yy_nxt is the action number for that state - */ - - indent_put2s( "%s yy_nxt;", trans_offset_type ); - indent_puts( "};" ); - indent_down(); + if ( fullspd ) + { + /* Need to define the transet type as a size large + * enough to hold the biggest offset. + */ + int total_table_size = tblend + numecs + 1; + char *trans_offset_type = + total_table_size > MAX_SHORT ? "long" : "short"; + + set_indent( 0 ); + indent_puts( "struct yy_trans_info" ); + indent_up(); + indent_puts( "{" ); /* } for vi */ + indent_puts( "short yy_verify;" ); + + /* In cases where its sister yy_verify *is* a "yes, there is + * a transition", yy_nxt is the offset (in records) to the + * next state. In most cases where there is no transition, + * the value of yy_nxt is irrelevant. If yy_nxt is the -1th + * record of a state, though, then yy_nxt is the action number + * for that state. + */ + + indent_put2s( "%s yy_nxt;", trans_offset_type ); + indent_puts( "};" ); + indent_down(); + + indent_puts( + "typedef const struct yy_trans_info *yy_state_type;" ); + } - indent_puts( "typedef const struct yy_trans_info *yy_state_type;" ); - } - - else - indent_puts( "typedef int yy_state_type;" ); + else + indent_puts( "typedef int yy_state_type;" ); - if ( fullspd ) - genctbl(); + if ( fullspd ) + genctbl(); + else if ( fulltbl ) + genftbl(); + else + gentabs(); - else if ( fulltbl ) - genftbl(); + if ( num_backtracking > 0 ) + { + indent_puts( "static yy_state_type yy_last_accepting_state;" ); + indent_puts( "static YY_CHAR *yy_last_accepting_cpos;\n" ); + } - else - gentabs(); + if ( nultrans ) + { + printf( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); - if ( num_backtracking > 0 ) - { - indent_puts( "static yy_state_type yy_last_accepting_state;" ); - indent_puts( "static YY_CHAR *yy_last_accepting_cpos;\n" ); - } + for ( i = 1; i <= lastdfa; ++i ) + { + if ( fullspd ) + { + if ( nultrans ) + printf( " &yy_transition[%d],\n", + base[i] ); + else + printf( " 0,\n" ); + } - if ( nultrans ) - { - printf( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); + else + mkdata( nultrans[i] ); + } - for ( i = 1; i <= lastdfa; ++i ) - { - if ( fullspd ) - { - if ( nultrans ) - printf( " &yy_transition[%d],\n", base[i] ); - else - printf( " 0,\n" ); + dataend(); } - - else - mkdata( nultrans[i] ); - } - - dataend(); - } - if ( ddebug ) - { /* spit out table mapping rules to line numbers */ - indent_puts( "extern int yy_flex_debug;" ); - indent_puts( "int yy_flex_debug = 1;\n" ); + if ( ddebug ) + { /* Spit out table mapping rules to line numbers. */ + indent_puts( "extern int yy_flex_debug;" ); + indent_puts( "int yy_flex_debug = 1;\n" ); - printf( C_short_decl, "yy_rule_linenum", num_rules ); - for ( i = 1; i < num_rules; ++i ) - mkdata( rule_linenum[i] ); - dataend(); - } + printf( C_short_decl, "yy_rule_linenum", num_rules ); + for ( i = 1; i < num_rules; ++i ) + mkdata( rule_linenum[i] ); + dataend(); + } - if ( reject ) - { - /* declare state buffer variables */ - puts( + if ( reject ) + { + /* Declare state buffer variables. */ + puts( "static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); - puts( "static YY_CHAR *yy_full_match;" ); - puts( "static int yy_lp;" ); - - if ( variable_trailing_context_rules ) - { - puts( "static int yy_looking_for_trail_begin = 0;" ); - puts( "static int yy_full_lp;" ); - puts( "static int *yy_full_state;" ); - printf( "#define YY_TRAILING_MASK 0x%x\n", - (unsigned int) YY_TRAILING_MASK ); - printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", - (unsigned int) YY_TRAILING_HEAD_MASK ); - } - - puts( "#define REJECT \\" ); - puts( "{ \\" ); - puts( + puts( "static YY_CHAR *yy_full_match;" ); + puts( "static int yy_lp;" ); + + if ( variable_trailing_context_rules ) + { + puts( "static int yy_looking_for_trail_begin = 0;" ); + puts( "static int yy_full_lp;" ); + puts( "static int *yy_full_state;" ); + printf( "#define YY_TRAILING_MASK 0x%x\n", + (unsigned int) YY_TRAILING_MASK ); + printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", + (unsigned int) YY_TRAILING_HEAD_MASK ); + } + + puts( "#define REJECT \\" ); + puts( "{ \\" ); /* } for vi */ + puts( "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \\" ); - puts( - "yy_cp = yy_full_match; /* restore poss. backed-over text */ \\" ); + puts( + "yy_cp = yy_full_match; /* restore poss. backed-over text */ \\" ); - if ( variable_trailing_context_rules ) - { - puts( "yy_lp = yy_full_lp; /* restore orig. accepting pos. */ \\" ); - puts( + if ( variable_trailing_context_rules ) + { + puts( + "yy_lp = yy_full_lp; /* restore orig. accepting pos. */ \\" ); + puts( "yy_state_ptr = yy_full_state; /* restore orig. state */ \\" ); - puts( - "yy_current_state = *yy_state_ptr; /* restore curr. state */ \\" ); - } - - puts( "++yy_lp; \\" ); - puts( "goto find_rule; \\" ); - puts( "}" ); - } - - else - { - puts( "/* the intent behind this definition is that it'll catch" ); - puts( " * any uses of REJECT which flex missed" ); - puts( " */" ); - puts( "#define REJECT reject_used_but_not_detected" ); - } - - if ( yymore_used ) - { - indent_puts( "static int yy_more_flag = 0;" ); - indent_puts( "static int yy_doing_yy_more = 0;" ); - indent_puts( "static int yy_more_len = 0;" ); - indent_puts( - "#define yymore() do { yy_more_flag = 1; } while ( 0 )" ); - indent_puts( - "#define YY_MORE_ADJ (yy_doing_yy_more ? yy_more_len : 0)" ); - } + puts( + "yy_current_state = *yy_state_ptr; /* restore curr. state */ \\" ); + } - else - { - indent_puts( "#define yymore() yymore_used_but_not_detected" ); - indent_puts( "#define YY_MORE_ADJ 0" ); - } + puts( "++yy_lp; \\" ); + puts( "goto find_rule; \\" ); + /* { for vi */ + puts( "}" ); + } - skelout(); + else + { + puts( + "/* The intent behind this definition is that it'll catch" ); + puts( " * any uses of REJECT which flex missed." ); + puts( " */" ); + puts( "#define REJECT reject_used_but_not_detected" ); + } - /* copy prolog to output file */ - fputs( prolog, stdout ); + if ( yymore_used ) + { + indent_puts( "static int yy_more_flag = 0;" ); + indent_puts( "static int yy_doing_yy_more = 0;" ); + indent_puts( "static int yy_more_len = 0;" ); + indent_puts( + "#define yymore() do { yy_more_flag = 1; } while ( 0 )" ); + indent_puts( + "#define YY_MORE_ADJ (yy_doing_yy_more ? yy_more_len : 0)" ); + } - skelout(); + else + { + indent_puts( "#define yymore() yymore_used_but_not_detected" ); + indent_puts( "#define YY_MORE_ADJ 0" ); + } - set_indent( 2 ); + skelout(); - if ( yymore_used ) - { - indent_puts( "yy_more_len = 0;" ); - indent_puts( "yy_doing_yy_more = yy_more_flag;" ); - indent_puts( "if ( yy_doing_yy_more )" ); - indent_up(); - indent_puts( "{" ); - indent_puts( "yy_more_len = yyleng;" ); - indent_puts( "yy_more_flag = 0;" ); - indent_puts( "}" ); - indent_down(); - } + /* Copy prolog to output file. */ + fputs( prolog, stdout ); - skelout(); + skelout(); - gen_start_state(); + set_indent( 2 ); - /* note, don't use any indentation */ - puts( "yy_match:" ); - gen_next_match(); + if ( yymore_used ) + { + indent_puts( "yy_more_len = 0;" ); + indent_puts( "yy_doing_yy_more = yy_more_flag;" ); + indent_puts( "if ( yy_doing_yy_more )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_more_len = yyleng;" ); + indent_puts( "yy_more_flag = 0;" ); + indent_puts( "}" ); + indent_down(); + } - skelout(); - set_indent( 2 ); - gen_find_action(); + skelout(); - skelout(); - if ( ddebug ) - { - indent_puts( "if ( yy_flex_debug )" ); - indent_up(); + gen_start_state(); - indent_puts( "{" ); - indent_puts( "if ( yy_act == 0 )" ); - indent_up(); - indent_puts( "fprintf( stderr, \"--scanner backtracking\\n\" );" ); - indent_down(); + /* Note, don't use any indentation. */ + puts( "yy_match:" ); + gen_next_match(); - do_indent(); - printf( "else if ( yy_act < %d )\n", num_rules ); - indent_up(); - indent_puts( - "fprintf( stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); - indent_puts( " yy_rule_linenum[yy_act], yytext );" ); - indent_down(); + skelout(); + set_indent( 2 ); + gen_find_action(); - do_indent(); - printf( "else if ( yy_act == %d )\n", num_rules ); - indent_up(); - indent_puts( + skelout(); + if ( ddebug ) + { + indent_puts( "if ( yy_flex_debug )" ); + indent_up(); + + indent_puts( "{" ); + indent_puts( "if ( yy_act == 0 )" ); + indent_up(); + indent_puts( + "fprintf( stderr, \"--scanner backtracking\\n\" );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act < %d )\n", num_rules ); + indent_up(); + indent_puts( + "fprintf( + stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); + indent_puts( " yy_rule_linenum[yy_act], yytext );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act == %d )\n", num_rules ); + indent_up(); + indent_puts( "fprintf( stderr, \"--accepting default rule (\\\"%s\\\")\\n\"," ); - indent_puts( " yytext );" ); - indent_down(); + indent_puts( " yytext );" ); + indent_down(); + + do_indent(); + printf( "else if ( yy_act == %d )\n", num_rules + 1 ); + indent_up(); + indent_puts( + "fprintf( stderr, \"--(end of buffer or a NUL)\\n\" );" ); + indent_down(); + + do_indent(); + printf( "else\n" ); + indent_up(); + indent_puts( "fprintf( stderr, \"--EOF\\n\" );" ); + indent_down(); + + indent_puts( "}" ); + indent_down(); + } - do_indent(); - printf( "else if ( yy_act == %d )\n", num_rules + 1 ); + /* Copy actions to output file. */ + skelout(); indent_up(); - indent_puts( "fprintf( stderr, \"--(end of buffer or a NUL)\\n\" );" ); - indent_down(); + gen_bt_action(); + fputs( action, stdout ); - do_indent(); - printf( "else\n" ); - indent_up(); - indent_puts( "fprintf( stderr, \"--EOF\\n\" );" ); - indent_down(); + /* generate cases for any missing EOF rules */ + for ( i = 1; i <= lastsc; ++i ) + if ( ! sceof[i] ) + { + do_indent(); + printf( "case YY_STATE_EOF(%s):\n", scname[i] ); + did_eof_rule = true; + } - indent_puts( "}" ); - indent_down(); - } + if ( did_eof_rule ) + { + indent_up(); + indent_puts( "yyterminate();" ); + indent_down(); + } - /* copy actions to output file */ - skelout(); - indent_up(); - gen_bt_action(); - fputs( action, stdout ); - - /* generate cases for any missing EOF rules */ - for ( i = 1; i <= lastsc; ++i ) - if ( ! sceof[i] ) - { - do_indent(); - printf( "case YY_STATE_EOF(%s):\n", scname[i] ); - did_eof_rule = true; - } - - if ( did_eof_rule ) - { - indent_up(); - indent_puts( "yyterminate();" ); - indent_down(); - } + /* Generate code for handling NUL's, if needed. */ - /* generate code for handling NUL's, if needed */ - - /* first, deal with backtracking and setting up yy_cp if the scanner - * finds that it should JAM on the NUL - */ - skelout(); - set_indent( 7 ); - - if ( fullspd || fulltbl ) - indent_puts( "yy_cp = yy_c_buf_p;" ); - - else - { /* compressed table */ - if ( ! reject && ! interactive ) - { - /* do the guaranteed-needed backtrack to figure out the match */ - indent_puts( "yy_cp = yy_last_accepting_cpos;" ); - indent_puts( "yy_current_state = yy_last_accepting_state;" ); - } - } + /* First, deal with backtracking and setting up yy_cp if the scanner + * finds that it should JAM on the NUL> + */ + skelout(); + set_indent( 7 ); + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_c_buf_p;" ); + + else + { /* compressed table */ + if ( ! reject && ! interactive ) + { + /* Do the guaranteed-needed backtrack to figure + * out the match. + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( + "yy_current_state = yy_last_accepting_state;" ); + } + } - /* generate code for yy_get_previous_state() */ - set_indent( 1 ); - skelout(); - if ( bol_needed ) - indent_puts( "register YY_CHAR *yy_bp = yytext_ptr;\n" ); + /* Generate code for yy_get_previous_state(). */ + set_indent( 1 ); + skelout(); - gen_start_state(); + if ( bol_needed ) + indent_puts( "register YY_CHAR *yy_bp = yytext_ptr;\n" ); + + gen_start_state(); - set_indent( 2 ); - skelout(); - gen_next_state( true ); + set_indent( 2 ); + skelout(); + gen_next_state( true ); - set_indent( 1 ); - skelout(); - gen_NUL_trans(); + set_indent( 1 ); + skelout(); + gen_NUL_trans(); - skelout(); + skelout(); - /* copy remainder of input to output */ + /* Copy remainder of input to output. */ - line_directive_out( stdout ); - (void) flexscan(); /* copy remainder of input to output */ - } + line_directive_out( stdout ); + (void) flexscan(); /* copy remainder of input to output */ + } diff --git a/libmain.c b/libmain.c index 951bdaa..7432ee6 100644 --- a/libmain.c +++ b/libmain.c @@ -7,7 +7,6 @@ extern int yylex(); int main( argc, argv ) int argc; char *argv[]; - - { - return yylex(); - } + { + return yylex(); + } diff --git a/main.c b/main.c index 2703f4f..1370829 100644 --- a/main.c +++ b/main.c @@ -104,6 +104,7 @@ static char *outfile = "lex.yy.c"; #else static char *outfile = "lexyy.c"; #endif + static int outfile_created = 0; static int use_stdout; static char *skelname = NULL; @@ -112,743 +113,757 @@ static char *skelname = NULL; int main( argc, argv ) int argc; char **argv; + { + int i; - { - int i; - - flexinit( argc, argv ); + flexinit( argc, argv ); - readin(); + readin(); - if ( syntaxerror ) - flexend( 1 ); + if ( syntaxerror ) + flexend( 1 ); - if ( yymore_really_used == REALLY_USED ) - yymore_used = true; - else if ( yymore_really_used == REALLY_NOT_USED ) - yymore_used = false; + if ( yymore_really_used == REALLY_USED ) + yymore_used = true; + else if ( yymore_really_used == REALLY_NOT_USED ) + yymore_used = false; - if ( reject_really_used == REALLY_USED ) - reject = true; - else if ( reject_really_used == REALLY_NOT_USED ) - reject = false; + if ( reject_really_used == REALLY_USED ) + reject = true; + else if ( reject_really_used == REALLY_NOT_USED ) + reject = false; - if ( performance_report > 0 ) - { - if ( performance_report > 1 ) - { - if ( interactive ) - fprintf( stderr, - "-I (interactive) entails a minor performance penalty\n" ); + if ( performance_report > 0 ) + { + if ( performance_report > 1 ) + { + if ( interactive ) + fprintf( stderr, + "-I (interactive) entails a minor performance penalty\n" ); - if ( yymore_used ) - fprintf( stderr, - "yymore() entails a minor performance penalty\n" ); - } + if ( yymore_used ) + fprintf( stderr, + "yymore() entails a minor performance penalty\n" ); + } - if ( reject ) - fprintf( stderr, "REJECT entails a large performance penalty\n" ); + if ( reject ) + fprintf( stderr, + "REJECT entails a large performance penalty\n" ); - if ( variable_trailing_context_rules ) - fprintf( stderr, + if ( variable_trailing_context_rules ) + fprintf( stderr, "Variable trailing context rules entail a large performance penalty\n" ); - } + } - if ( reject ) - real_reject = true; + if ( reject ) + real_reject = true; - if ( variable_trailing_context_rules ) - reject = true; + if ( variable_trailing_context_rules ) + reject = true; - if ( (fulltbl || fullspd) && reject ) - { - if ( real_reject ) - flexerror( "REJECT cannot be used with -f or -F" ); - else - flexerror( + if ( (fulltbl || fullspd) && reject ) + { + if ( real_reject ) + flexerror( "REJECT cannot be used with -f or -F" ); + else + flexerror( "variable trailing context rules cannot be used with -f or -F" ); - } + } - ntod(); + ntod(); - for ( i = 1; i <= num_rules; ++i ) - if ( ! rule_useful[i] && i != default_rule ) - line_warning( "rule cannot be matched", rule_linenum[i] ); + for ( i = 1; i <= num_rules; ++i ) + if ( ! rule_useful[i] && i != default_rule ) + line_warning( "rule cannot be matched", + rule_linenum[i] ); - if ( spprdflt && ! reject && rule_useful[default_rule] ) - line_warning( "-s option given but default rule can be matched", - rule_linenum[default_rule] ); + if ( spprdflt && ! reject && rule_useful[default_rule] ) + line_warning( "-s option given but default rule can be matched", + rule_linenum[default_rule] ); - /* generate the C state transition tables from the DFA */ - make_tables(); + /* Generate the C state transition tables from the DFA. */ + make_tables(); - /* note, flexend does not return. It exits with its argument as status. */ - flexend( 0 ); + /* Note, flexend does not return. It exits with its argument + * as status. + */ + flexend( 0 ); - return 0; /* keep compilers/lint happy */ - } + return 0; /* keep compilers/lint happy */ + } /* flexend - terminate flex - * - * synopsis - * int status; - * flexend( status ); - * - * status is exit status. * * note * This routine does not return. */ -void flexend( status ) -int status; - - { - int tblsiz; - char *flex_gettime(); - - if ( skelfile != NULL ) - { - if ( ferror( skelfile ) ) - flexfatal( "error occurred when reading skeleton file" ); - - else if ( fclose( skelfile ) ) - flexfatal( "error occurred when closing skeleton file" ); - } +void flexend( exit_status ) +int exit_status; - if ( status != 0 && outfile_created ) { - if ( ferror( stdout ) ) - flexfatal( "error occurred when writing output file" ); + int tblsiz; + char *flex_gettime(); - else if ( fclose( stdout ) ) - flexfatal( "error occurred when closing output file" ); - - else if ( unlink( outfile ) ) - flexfatal( "error occurred when deleting output file" ); - } + if ( skelfile != NULL ) + { + if ( ferror( skelfile ) ) + flexfatal( + "error occurred when reading skeleton file" ); - if ( backtrack_report && backtrack_file ) - { - if ( num_backtracking == 0 ) - fprintf( backtrack_file, "No backtracking.\n" ); - else if ( fullspd || fulltbl ) - fprintf( backtrack_file, - "%d backtracking (non-accepting) states.\n", - num_backtracking ); - else - fprintf( backtrack_file, "Compressed tables always backtrack.\n" ); + else if ( fclose( skelfile ) ) + flexfatal( + "error occurred when closing skeleton file" ); + } - if ( ferror( backtrack_file ) ) - flexfatal( "error occurred when writing backtracking file" ); + if ( exit_status != 0 && outfile_created ) + { + if ( ferror( stdout ) ) + flexfatal( "error occurred when writing output file" ); - else if ( fclose( backtrack_file ) ) - flexfatal( "error occurred when closing backtracking file" ); - } + else if ( fclose( stdout ) ) + flexfatal( "error occurred when closing output file" ); - if ( printstats ) - { - fprintf( stderr, "%s version %s usage statistics:\n", program_name, - flex_version ); - - if ( starttime ) - { - endtime = flex_gettime(); - fprintf( stderr, " started at %s, finished at %s\n", - starttime, endtime ); - } + else if ( unlink( outfile ) ) + flexfatal( "error occurred when deleting output file" ); + } - fprintf( stderr, " scanner options: -" ); + if ( backtrack_report && backtrack_file ) + { + if ( num_backtracking == 0 ) + fprintf( backtrack_file, "No backtracking.\n" ); + else if ( fullspd || fulltbl ) + fprintf( backtrack_file, + "%d backtracking (non-accepting) states.\n", + num_backtracking ); + else + fprintf( backtrack_file, + "Compressed tables always backtrack.\n" ); + + if ( ferror( backtrack_file ) ) + flexfatal( + "error occurred when writing backtracking file" ); + + else if ( fclose( backtrack_file ) ) + flexfatal( + "error occurred when closing backtracking file" ); + } - if ( backtrack_report ) - putc( 'b', stderr ); - if ( ddebug ) - putc( 'd', stderr ); - if ( caseins ) - putc( 'i', stderr ); - if ( performance_report > 0 ) - putc( 'p', stderr ); - if ( performance_report > 1 ) - putc( 'p', stderr ); - if ( spprdflt ) - putc( 's', stderr ); - if ( use_stdout ) - putc( 't', stderr ); if ( printstats ) - putc( 'v', stderr ); /* always true! */ - if ( nowarn ) - putc( 'w', stderr ); - if ( ! interactive ) - putc( 'B', stderr ); - if ( interactive ) - putc( 'I', stderr ); - if ( ! gen_line_dirs ) - putc( 'L', stderr ); - if ( trace ) - putc( 'T', stderr ); - if ( csize == 128 ) - putc( '7', stderr ); - else - putc( '8', stderr ); + { + fprintf( stderr, "%s version %s usage statistics:\n", + program_name, flex_version ); - fprintf( stderr, " -C" ); + if ( starttime ) + { + endtime = flex_gettime(); + fprintf( stderr, " started at %s, finished at %s\n", + starttime, endtime ); + } - if ( fulltbl ) - putc( 'f', stderr ); - if ( fullspd ) - putc( 'F', stderr ); - if ( useecs ) - putc( 'e', stderr ); - if ( usemecs ) - putc( 'm', stderr ); - - if ( skelname ) - fprintf( stderr, " -S%s", skelname ); - - putc( '\n', stderr ); - - fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); - fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, - current_max_dfas, totnst ); - fprintf( stderr, " %d rules\n", - num_rules + num_eof_rules - 1 /* - 1 for def. rule */ ); - - if ( num_backtracking == 0 ) - fprintf( stderr, " No backtracking\n" ); - else if ( fullspd || fulltbl ) - fprintf( stderr, " %d backtracking (non-accepting) states\n", - num_backtracking ); - else - fprintf( stderr, " compressed tables always backtrack\n" ); + fprintf( stderr, " scanner options: -" ); + + if ( backtrack_report ) + putc( 'b', stderr ); + if ( ddebug ) + putc( 'd', stderr ); + if ( caseins ) + putc( 'i', stderr ); + if ( performance_report > 0 ) + putc( 'p', stderr ); + if ( performance_report > 1 ) + putc( 'p', stderr ); + if ( spprdflt ) + putc( 's', stderr ); + if ( use_stdout ) + putc( 't', stderr ); + if ( printstats ) + putc( 'v', stderr ); /* always true! */ + if ( nowarn ) + putc( 'w', stderr ); + if ( ! interactive ) + putc( 'B', stderr ); + if ( interactive ) + putc( 'I', stderr ); + if ( ! gen_line_dirs ) + putc( 'L', stderr ); + if ( trace ) + putc( 'T', stderr ); + if ( csize == 128 ) + putc( '7', stderr ); + else + putc( '8', stderr ); + + fprintf( stderr, " -C" ); + + if ( fulltbl ) + putc( 'f', stderr ); + if ( fullspd ) + putc( 'F', stderr ); + if ( useecs ) + putc( 'e', stderr ); + if ( usemecs ) + putc( 'm', stderr ); + + if ( skelname ) + fprintf( stderr, " -S%s", skelname ); + + putc( '\n', stderr ); + + fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); + fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, + current_max_dfas, totnst ); + fprintf( stderr, " %d rules\n", + num_rules + num_eof_rules - 1 /* - 1 for def. rule */ ); + + if ( num_backtracking == 0 ) + fprintf( stderr, " No backtracking\n" ); + else if ( fullspd || fulltbl ) + fprintf( stderr, + " %d backtracking (non-accepting) states\n", + num_backtracking ); + else + fprintf( stderr, + " compressed tables always backtrack\n" ); + + if ( bol_needed ) + fprintf( stderr, + " Beginning-of-line patterns used\n" ); + + fprintf( stderr, " %d/%d start conditions\n", lastsc, + current_max_scs ); + fprintf( stderr, + " %d epsilon states, %d double epsilon states\n", + numeps, eps2 ); - if ( bol_needed ) - fprintf( stderr, " Beginning-of-line patterns used\n" ); + if ( lastccl == 0 ) + fprintf( stderr, " no character classes\n" ); + else + fprintf( stderr, + " %d/%d character classes needed %d/%d words of storage, %d reused\n", + lastccl, current_maxccls, + cclmap[lastccl] + ccllen[lastccl], + current_max_ccl_tbl_size, cclreuse ); - fprintf( stderr, " %d/%d start conditions\n", lastsc, - current_max_scs ); - fprintf( stderr, " %d epsilon states, %d double epsilon states\n", - numeps, eps2 ); + fprintf( stderr, " %d state/nextstate pairs created\n", + numsnpairs ); + fprintf( stderr, " %d/%d unique/duplicate transitions\n", + numuniq, numdup ); - if ( lastccl == 0 ) - fprintf( stderr, " no character classes\n" ); - else - fprintf( stderr, - " %d/%d character classes needed %d/%d words of storage, %d reused\n", - lastccl, current_maxccls, - cclmap[lastccl] + ccllen[lastccl], - current_max_ccl_tbl_size, cclreuse ); + if ( fulltbl ) + { + tblsiz = lastdfa * numecs; + fprintf( stderr, " %d table entries\n", tblsiz ); + } - fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs ); - fprintf( stderr, " %d/%d unique/duplicate transitions\n", - numuniq, numdup ); + else + { + tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; + + fprintf( stderr, " %d/%d base-def entries created\n", + lastdfa + numtemps, current_max_dfas ); + fprintf( stderr, + " %d/%d (peak %d) nxt-chk entries created\n", + tblend, current_max_xpairs, peakpairs ); + fprintf( stderr, + " %d/%d (peak %d) template nxt-chk entries created\n", + numtemps * nummecs, current_max_template_xpairs, + numtemps * numecs ); + fprintf( stderr, " %d empty table entries\n", nummt ); + fprintf( stderr, " %d protos created\n", numprots ); + fprintf( stderr, " %d templates created, %d uses\n", + numtemps, tmpuses ); + } - if ( fulltbl ) - { - tblsiz = lastdfa * numecs; - fprintf( stderr, " %d table entries\n", tblsiz ); - } + if ( useecs ) + { + tblsiz = tblsiz + csize; + fprintf( stderr, + " %d/%d equivalence classes created\n", + numecs, csize ); + } - else - { - tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; - - fprintf( stderr, " %d/%d base-def entries created\n", - lastdfa + numtemps, current_max_dfas ); - fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n", - tblend, current_max_xpairs, peakpairs ); - fprintf( stderr, - " %d/%d (peak %d) template nxt-chk entries created\n", - numtemps * nummecs, current_max_template_xpairs, - numtemps * numecs ); - fprintf( stderr, " %d empty table entries\n", nummt ); - fprintf( stderr, " %d protos created\n", numprots ); - fprintf( stderr, " %d templates created, %d uses\n", - numtemps, tmpuses ); - } + if ( usemecs ) + { + tblsiz = tblsiz + numecs; + fprintf( stderr, + " %d/%d meta-equivalence classes created\n", + nummecs, csize ); + } - if ( useecs ) - { - tblsiz = tblsiz + csize; - fprintf( stderr, " %d/%d equivalence classes created\n", - numecs, csize ); - } - - if ( usemecs ) - { - tblsiz = tblsiz + numecs; - fprintf( stderr, " %d/%d meta-equivalence classes created\n", - nummecs, csize ); - } - - fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n", - hshcol, hshsave, dfaeql ); - fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs ); - fprintf( stderr, " %d total table entries needed\n", tblsiz ); - } + fprintf( stderr, + " %d (%d saved) hash collisions, %d DFAs equal\n", + hshcol, hshsave, dfaeql ); + fprintf( stderr, " %d sets of reallocations needed\n", + num_reallocs ); + fprintf( stderr, " %d total table entries needed\n", tblsiz ); + } #ifndef VMS - exit( status ); + exit( exit_status ); #else - exit( status + 1 ); + exit( exit_status + 1 ); #endif - } + } -/* flexinit - initialize flex - * - * synopsis - * int argc; - * char **argv; - * flexinit( argc, argv ); - */ +/* flexinit - initialize flex */ void flexinit( argc, argv ) int argc; char **argv; - - { - int i, sawcmpflag; - int csize_given, interactive_given; - char *arg, *flex_gettime(), *mktemp(); - - printstats = syntaxerror = trace = spprdflt = caseins = false; - backtrack_report = ddebug = fulltbl = fullspd = false; - nowarn = yymore_used = continued_action = reject = yytext_is_array = false; - yymore_really_used = reject_really_used = false; - gen_line_dirs = usemecs = useecs = true; - performance_report = 0; - - sawcmpflag = false; - use_stdout = false; - csize_given = false; - interactive_given = false; - - /* Initialize dynamic array for holding the rule actions. */ - action_size = 2048; /* default size of action array in bytes */ - prolog = action = action_array = allocate_character_array( action_size ); - action_offset = action_index = 0; - - starttime = flex_gettime(); - - program_name = argv[0]; - - /* read flags */ - for ( --argc, ++argv; argc ; --argc, ++argv ) { - if ( argv[0][0] != '-' || argv[0][1] == '\0' ) - break; - - arg = argv[0]; - - for ( i = 1; arg[i] != '\0'; ++i ) - switch ( arg[i] ) + int i, sawcmpflag; + int csize_given, interactive_given; + char *arg, *flex_gettime(), *mktemp(); + + printstats = syntaxerror = trace = spprdflt = caseins = false; + backtrack_report = ddebug = fulltbl = fullspd = false; + nowarn = yymore_used = continued_action = reject = false; + yytext_is_array = yymore_really_used = reject_really_used = false; + gen_line_dirs = usemecs = useecs = true; + performance_report = 0; + + sawcmpflag = false; + use_stdout = false; + csize_given = false; + interactive_given = false; + + /* Initialize dynamic array for holding the rule actions. */ + action_size = 2048; /* default size of action array in bytes */ + prolog = action = action_array = + allocate_character_array( action_size ); + action_offset = action_index = 0; + + starttime = flex_gettime(); + + program_name = argv[0]; + + /* read flags */ + for ( --argc, ++argv; argc ; --argc, ++argv ) { - case 'B': - interactive = false; - interactive_given = true; - break; - - case 'b': - backtrack_report = true; - break; + if ( argv[0][0] != '-' || argv[0][1] == '\0' ) + break; - case 'c': - fprintf( stderr, - "%s: Assuming use of deprecated -c flag is really intended to be -C\n", - program_name ); - - /* fall through */ - - case 'C': - if ( i != 1 ) - flexerror( "-C flag must be given separately" ); + arg = argv[0]; - if ( ! sawcmpflag ) - { - useecs = false; - usemecs = false; - fulltbl = false; - sawcmpflag = true; - } - - for ( ++i; arg[i] != '\0'; ++i ) + for ( i = 1; arg[i] != '\0'; ++i ) switch ( arg[i] ) - { - case 'e': - useecs = true; - break; - - case 'F': - fullspd = true; - break; - - case 'f': - fulltbl = true; - break; - - case 'm': - usemecs = true; - break; - - default: - lerrif( "unknown -C option '%c'", - (int) arg[i] ); - break; - } - - goto get_next_arg; - - case 'd': - ddebug = true; - break; - - case 'f': - useecs = usemecs = false; - fulltbl = true; - break; - - case 'F': - useecs = usemecs = false; - fullspd = true; - break; - - case 'h': - usage(); - exit( 0 ); - - case 'I': - interactive = true; - interactive_given = true; - break; - - case 'i': - caseins = true; - break; - - case 'L': - gen_line_dirs = false; - break; - - case 'n': - /* stupid do-nothing deprecated option */ - break; - - case 'p': - ++performance_report; - break; - - case 'S': - if ( i != 1 ) - flexerror( "-S flag must be given separately" ); - - skelname = arg + i + 1; - goto get_next_arg; - - case 's': - spprdflt = true; - break; - - case 't': - use_stdout = true; - break; - - case 'T': - trace = true; - break; - - case 'v': - printstats = true; - break; - - case 'V': - fprintf( stderr, "%s version %s\n", - program_name, flex_version ); - exit( 0 ); - - case 'w': - nowarn = true; - break; - - case '7': - csize = 128; - csize_given = true; - break; - - case '8': - csize = CSIZE; - csize_given = true; - break; - - default: - fprintf( stderr, "%s: unknown flag '%c'\n", - program_name, (int) arg[i] ); - usage(); - exit( 1 ); + { + case 'B': + interactive = false; + interactive_given = true; + break; + + case 'b': + backtrack_report = true; + break; + + case 'c': + fprintf( stderr, + "%s: Assuming use of deprecated -c flag is really intended to be -C\n", + program_name ); + + /* fall through */ + + case 'C': + if ( i != 1 ) + flexerror( + "-C flag must be given separately" ); + + if ( ! sawcmpflag ) + { + useecs = false; + usemecs = false; + fulltbl = false; + sawcmpflag = true; + } + + for ( ++i; arg[i] != '\0'; ++i ) + switch ( arg[i] ) + { + case 'e': + useecs = true; + break; + + case 'F': + fullspd = true; + break; + + case 'f': + fulltbl = true; + break; + + case 'm': + usemecs = true; + break; + + default: + lerrif( + "unknown -C option '%c'", + (int) arg[i] ); + break; + } + + goto get_next_arg; + + case 'd': + ddebug = true; + break; + + case 'f': + useecs = usemecs = false; + fulltbl = true; + break; + + case 'F': + useecs = usemecs = false; + fullspd = true; + break; + + case 'h': + usage(); + exit( 0 ); + + case 'I': + interactive = true; + interactive_given = true; + break; + + case 'i': + caseins = true; + break; + + case 'L': + gen_line_dirs = false; + break; + + case 'n': + /* Stupid do-nothing deprecated + * option. + */ + break; + + case 'p': + ++performance_report; + break; + + case 'S': + if ( i != 1 ) + flexerror( + "-S flag must be given separately" ); + + skelname = arg + i + 1; + goto get_next_arg; + + case 's': + spprdflt = true; + break; + + case 't': + use_stdout = true; + break; + + case 'T': + trace = true; + break; + + case 'v': + printstats = true; + break; + + case 'V': + fprintf( stderr, "%s version %s\n", + program_name, flex_version ); + exit( 0 ); + + case 'w': + nowarn = true; + break; + + case '7': + csize = 128; + csize_given = true; + break; + + case '8': + csize = CSIZE; + csize_given = true; + break; + + default: + fprintf( stderr, + "%s: unknown flag '%c'\n", + program_name, (int) arg[i] ); + usage(); + exit( 1 ); + } + + /* Used by -C and -S flags in lieu of a "continue 2" control. */ + get_next_arg: ; } -get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */ - ; - } - - if ( ! csize_given ) - { - if ( fulltbl || fullspd ) - csize = DEFAULT_CSIZE; - else - csize = CSIZE; - } + if ( ! csize_given ) + { + if ( fulltbl || fullspd ) + csize = DEFAULT_CSIZE; + else + csize = CSIZE; + } - if ( ! interactive_given ) - { - if ( fulltbl || fullspd ) - interactive = false; - else - interactive = true; - } + if ( ! interactive_given ) + { + if ( fulltbl || fullspd ) + interactive = false; + else + interactive = true; + } - if ( (fulltbl || fullspd) && usemecs ) - flexerror( "full table and -Cm don't make sense together" ); + if ( (fulltbl || fullspd) && usemecs ) + flexerror( "full table and -Cm don't make sense together" ); - if ( (fulltbl || fullspd) && interactive ) - flexerror( "full table and -I are incompatible" ); + if ( (fulltbl || fullspd) && interactive ) + flexerror( "full table and -I are incompatible" ); - if ( fulltbl && fullspd ) - flexerror( "full table and -F are mutually exclusive" ); + if ( fulltbl && fullspd ) + flexerror( "full table and -F are mutually exclusive" ); - if ( ! use_stdout ) - { - FILE *prev_stdout = freopen( outfile, "w", stdout ); + if ( ! use_stdout ) + { + FILE *prev_stdout = freopen( outfile, "w", stdout ); - if ( prev_stdout == NULL ) - lerrsf( "could not create %s", outfile ); + if ( prev_stdout == NULL ) + lerrsf( "could not create %s", outfile ); - outfile_created = 1; - } + outfile_created = 1; + } - num_input_files = argc; - input_files = argv; - set_input_file( num_input_files > 0 ? input_files[0] : NULL ); + num_input_files = argc; + input_files = argv; + set_input_file( num_input_files > 0 ? input_files[0] : NULL ); - if ( backtrack_report ) - { + if ( backtrack_report ) + { #ifndef SHORT_FILE_NAMES - backtrack_file = fopen( "lex.backtrack", "w" ); + backtrack_file = fopen( "lex.backtrack", "w" ); #else - backtrack_file = fopen( "lex.bck", "w" ); + backtrack_file = fopen( "lex.bck", "w" ); #endif - if ( backtrack_file == NULL ) - flexerror( "could not create lex.backtrack" ); - } - - else - backtrack_file = NULL; + if ( backtrack_file == NULL ) + flexerror( "could not create lex.backtrack" ); + } + else + backtrack_file = NULL; - lastccl = 0; - lastsc = 0; - if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) - lerrsf( "can't open skeleton file %s", skelname ); + lastccl = 0; + lastsc = 0; - lastdfa = lastnfa = 0; - num_rules = num_eof_rules = default_rule = numas = numsnpairs = tmpuses = 0; - numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; - numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; - num_backtracking = onesp = numprots = 0; - variable_trailing_context_rules = bol_needed = false; + if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) + lerrsf( "can't open skeleton file %s", skelname ); - linenum = sectnum = 1; - firstprot = NIL; + lastdfa = lastnfa = 0; + num_rules = num_eof_rules = default_rule = 0; + numas = numsnpairs = tmpuses = 0; + numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; + numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; + num_backtracking = onesp = numprots = 0; + variable_trailing_context_rules = bol_needed = false; - /* used in mkprot() so that the first proto goes in slot 1 - * of the proto queue - */ - lastprot = 1; + linenum = sectnum = 1; + firstprot = NIL; - if ( useecs ) - { /* set up doubly-linked equivalence classes */ - /* We loop all the way up to csize, since ecgroup[csize] is the - * position used for NUL characters + /* Used in mkprot() so that the first proto goes in slot 1 + * of the proto queue. */ - ecgroup[1] = NIL; + lastprot = 1; - for ( i = 2; i <= csize; ++i ) - { - ecgroup[i] = i - 1; - nextecm[i - 1] = i; - } + if ( useecs ) + { + /* Set up doubly-linked equivalence classes. */ - nextecm[csize] = NIL; - } + /* We loop all the way up to csize, since ecgroup[csize] is + * the position used for NUL characters. + */ + ecgroup[1] = NIL; - else - { /* put everything in its own equivalence class */ - for ( i = 1; i <= csize; ++i ) - { - ecgroup[i] = i; - nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ - } - } + for ( i = 2; i <= csize; ++i ) + { + ecgroup[i] = i - 1; + nextecm[i - 1] = i; + } - set_up_initial_allocations(); - } + nextecm[csize] = NIL; + } + else + { + /* Put everything in its own equivalence class. */ + for ( i = 1; i <= csize; ++i ) + { + ecgroup[i] = i; + nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ + } + } -/* readin - read in the rules section of the input file(s) - * - * synopsis - * readin(); - */ + set_up_initial_allocations(); + } -void readin() - { - skelout(); +/* readin - read in the rules section of the input file(s) */ - if ( ddebug ) - puts( "#define FLEX_DEBUG" ); +void readin() + { + skelout(); - if ( csize == 256 ) - puts( "typedef unsigned char YY_CHAR;" ); - else - puts( "typedef char YY_CHAR;" ); + if ( ddebug ) + puts( "#define FLEX_DEBUG" ); - line_directive_out( stdout ); + if ( csize == 256 ) + puts( "typedef unsigned char YY_CHAR;" ); + else + puts( "typedef char YY_CHAR;" ); - if ( yyparse() ) - { - pinpoint_message( "fatal parse error" ); - flexend( 1 ); - } + line_directive_out( stdout ); - if ( useecs ) - numecs = cre8ecs( nextecm, ecgroup, csize ); - else - numecs = csize; + if ( yyparse() ) + { + pinpoint_message( "fatal parse error" ); + flexend( 1 ); + } - /* now map the equivalence class for NUL to its expected place */ - ecgroup[0] = ecgroup[csize]; - NUL_ec = abs( ecgroup[0] ); + if ( useecs ) + numecs = cre8ecs( nextecm, ecgroup, csize ); + else + numecs = csize; - if ( useecs ) - ccl2ecl(); + /* Now map the equivalence class for NUL to its expected place. */ + ecgroup[0] = ecgroup[csize]; + NUL_ec = abs( ecgroup[0] ); - if ( yytext_is_array ) - { - puts( "extern char yytext[];\n" ); - puts( "#ifndef YYLMAX" ); - puts( "#define YYLMAX YY_READ_BUF_SIZE" ); - puts( "#endif YYLMAX\n" ); - puts( "char yytext[YYLMAX];" ); - puts( "YY_CHAR *yytext_ptr;" ); - } + if ( useecs ) + ccl2ecl(); - else - { - puts( "extern YY_CHAR *yytext;" ); - puts( "YY_CHAR *yytext;" ); - puts( "#define yytext_ptr yytext" ); + if ( yytext_is_array ) + { + puts( "extern char yytext[];\n" ); + puts( "#ifndef YYLMAX" ); + puts( "#define YYLMAX YY_READ_BUF_SIZE" ); + puts( "#endif YYLMAX\n" ); + puts( "char yytext[YYLMAX];" ); + puts( "YY_CHAR *yytext_ptr;" ); + } + + else + { + puts( "extern YY_CHAR *yytext;" ); + puts( "YY_CHAR *yytext;" ); + puts( "#define yytext_ptr yytext" ); + } } - } /* set_up_initial_allocations - allocate memory for internal tables */ void set_up_initial_allocations() - - { - current_mns = INITIAL_MNS; - firstst = allocate_integer_array( current_mns ); - lastst = allocate_integer_array( current_mns ); - finalst = allocate_integer_array( current_mns ); - transchar = allocate_integer_array( current_mns ); - trans1 = allocate_integer_array( current_mns ); - trans2 = allocate_integer_array( current_mns ); - accptnum = allocate_integer_array( current_mns ); - assoc_rule = allocate_integer_array( current_mns ); - state_type = allocate_integer_array( current_mns ); - - current_max_rules = INITIAL_MAX_RULES; - rule_type = allocate_integer_array( current_max_rules ); - rule_linenum = allocate_integer_array( current_max_rules ); - rule_useful = allocate_integer_array( current_max_rules ); - - current_max_scs = INITIAL_MAX_SCS; - scset = allocate_integer_array( current_max_scs ); - scbol = allocate_integer_array( current_max_scs ); - scxclu = allocate_integer_array( current_max_scs ); - sceof = allocate_integer_array( current_max_scs ); - scname = allocate_char_ptr_array( current_max_scs ); - actvsc = allocate_integer_array( current_max_scs ); - - current_maxccls = INITIAL_MAX_CCLS; - cclmap = allocate_integer_array( current_maxccls ); - ccllen = allocate_integer_array( current_maxccls ); - cclng = allocate_integer_array( current_maxccls ); - - current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; - ccltbl = allocate_Character_array( current_max_ccl_tbl_size ); - - current_max_dfa_size = INITIAL_MAX_DFA_SIZE; - - current_max_xpairs = INITIAL_MAX_XPAIRS; - nxt = allocate_integer_array( current_max_xpairs ); - chk = allocate_integer_array( current_max_xpairs ); - - current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; - tnxt = allocate_integer_array( current_max_template_xpairs ); - - current_max_dfas = INITIAL_MAX_DFAS; - base = allocate_integer_array( current_max_dfas ); - def = allocate_integer_array( current_max_dfas ); - dfasiz = allocate_integer_array( current_max_dfas ); - accsiz = allocate_integer_array( current_max_dfas ); - dhash = allocate_integer_array( current_max_dfas ); - dss = allocate_int_ptr_array( current_max_dfas ); - dfaacc = allocate_dfaacc_union( current_max_dfas ); - - nultrans = (int *) 0; - } + { + current_mns = INITIAL_MNS; + firstst = allocate_integer_array( current_mns ); + lastst = allocate_integer_array( current_mns ); + finalst = allocate_integer_array( current_mns ); + transchar = allocate_integer_array( current_mns ); + trans1 = allocate_integer_array( current_mns ); + trans2 = allocate_integer_array( current_mns ); + accptnum = allocate_integer_array( current_mns ); + assoc_rule = allocate_integer_array( current_mns ); + state_type = allocate_integer_array( current_mns ); + + current_max_rules = INITIAL_MAX_RULES; + rule_type = allocate_integer_array( current_max_rules ); + rule_linenum = allocate_integer_array( current_max_rules ); + rule_useful = allocate_integer_array( current_max_rules ); + + current_max_scs = INITIAL_MAX_SCS; + scset = allocate_integer_array( current_max_scs ); + scbol = allocate_integer_array( current_max_scs ); + scxclu = allocate_integer_array( current_max_scs ); + sceof = allocate_integer_array( current_max_scs ); + scname = allocate_char_ptr_array( current_max_scs ); + actvsc = allocate_integer_array( current_max_scs ); + + current_maxccls = INITIAL_MAX_CCLS; + cclmap = allocate_integer_array( current_maxccls ); + ccllen = allocate_integer_array( current_maxccls ); + cclng = allocate_integer_array( current_maxccls ); + + current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; + ccltbl = allocate_Character_array( current_max_ccl_tbl_size ); + + current_max_dfa_size = INITIAL_MAX_DFA_SIZE; + + current_max_xpairs = INITIAL_MAX_XPAIRS; + nxt = allocate_integer_array( current_max_xpairs ); + chk = allocate_integer_array( current_max_xpairs ); + + current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; + tnxt = allocate_integer_array( current_max_template_xpairs ); + + current_max_dfas = INITIAL_MAX_DFAS; + base = allocate_integer_array( current_max_dfas ); + def = allocate_integer_array( current_max_dfas ); + dfasiz = allocate_integer_array( current_max_dfas ); + accsiz = allocate_integer_array( current_max_dfas ); + dhash = allocate_integer_array( current_max_dfas ); + dss = allocate_int_ptr_array( current_max_dfas ); + dfaacc = allocate_dfaacc_union( current_max_dfas ); + + nultrans = (int *) 0; + } void usage() - { - fprintf( stderr, + { + fprintf( stderr, "%s [-bcdfhinpstvwBFILTV78 -C[efmF] -Sskeleton] [filename ...]\n", - program_name ); - - fprintf( stderr, - "\t-b generate backtracking information to lex.backtrack\n" ); - fprintf( stderr, "\t-c do-nothing POSIX option\n" ); - fprintf( stderr, "\t-d turn on debug mode in generated scanner\n" ); - fprintf( stderr, "\t-f generate fast, large scanner\n" ); - fprintf( stderr, "\t-h produce this help message\n" ); - fprintf( stderr, "\t-i generate case-insensitive scanner\n" ); - fprintf( stderr, "\t-n do-nothing POSIX option\n" ); - fprintf( stderr, "\t-p generate performance report to stderr\n" ); - fprintf( stderr, "\t-s suppress default rule to ECHO unmatched text\n" ); - fprintf( stderr, + program_name ); + + fprintf( stderr, + "\t-b generate backtracking information to lex.backtrack\n" ); + fprintf( stderr, "\t-c do-nothing POSIX option\n" ); + fprintf( stderr, "\t-d turn on debug mode in generated scanner\n" ); + fprintf( stderr, "\t-f generate fast, large scanner\n" ); + fprintf( stderr, "\t-h produce this help message\n" ); + fprintf( stderr, "\t-i generate case-insensitive scanner\n" ); + fprintf( stderr, "\t-n do-nothing POSIX option\n" ); + fprintf( stderr, "\t-p generate performance report to stderr\n" ); + fprintf( stderr, + "\t-s suppress default rule to ECHO unmatched text\n" ); + fprintf( stderr, "\t-t write generated scanner on stdout instead of lex.yy.c\n" ); - fprintf( stderr, "\t-v write summary of scanner statistics to stderr\n" ); - fprintf( stderr, "\t-w do not generate warnings\n" ); - fprintf( stderr, "\t-B generate batch scanner (opposite of -I)\n" ); - fprintf( stderr, "\t-F use alternative fast scanner representation\n" ); - fprintf( stderr, "\t-I generate interactive scanner (opposite of -B)\n" ); - fprintf( stderr, "\t-L suppress #line directives in scanner\n" ); - fprintf( stderr, "\t-T %s should run in trace mode\n", program_name ); - fprintf( stderr, "\t-V report %s version\n", program_name ); - fprintf( stderr, "\t-7 generate 7-bit scanner\n" ); - fprintf( stderr, "\t-8 generate 8-bit scanner\n" ); - fprintf( stderr, + fprintf( stderr, + "\t-v write summary of scanner statistics to stderr\n" ); + fprintf( stderr, "\t-w do not generate warnings\n" ); + fprintf( stderr, "\t-B generate batch scanner (opposite of -I)\n" ); + fprintf( stderr, + "\t-F use alternative fast scanner representation\n" ); + fprintf( stderr, + "\t-I generate interactive scanner (opposite of -B)\n" ); + fprintf( stderr, "\t-L suppress #line directives in scanner\n" ); + fprintf( stderr, "\t-T %s should run in trace mode\n", program_name ); + fprintf( stderr, "\t-V report %s version\n", program_name ); + fprintf( stderr, "\t-7 generate 7-bit scanner\n" ); + fprintf( stderr, "\t-8 generate 8-bit scanner\n" ); + fprintf( stderr, "\t-C specify degree of table compression (default is -Cem):\n" ); - fprintf( stderr, "\t\t-Ce construct equivalence classes\n" ); - fprintf( stderr, + fprintf( stderr, "\t\t-Ce construct equivalence classes\n" ); + fprintf( stderr, "\t\t-Cf do not compress scanner tables; use -f representation\n" ); - fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" ); - fprintf( stderr, + fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" ); + fprintf( stderr, "\t\t-CF do not compress scanner tables; use -F representation\n" ); - fprintf( stderr, "\t-S specify non-default skeleton file\n" ); - } + fprintf( stderr, "\t-S specify non-default skeleton file\n" ); + } diff --git a/misc.c b/misc.c index b371a74..b66b0ec 100644 --- a/misc.c +++ b/misc.c @@ -50,98 +50,84 @@ int otoi PROTO((Char [])); void add_action( new_text ) char *new_text; - { - int len = strlen( new_text ); - - while ( len + action_index + action_offset >= action_size - 10 /* slop */ ) { - action_size *= 2; - prolog = action_array = - reallocate_character_array( action_array, action_size ); - action = &action_array[action_offset]; - } + int len = strlen( new_text ); - strcpy( &action[action_index], new_text ); + while ( len + action_index + action_offset >= action_size - 10 + /* slop */ ) + { + action_size *= 2; + prolog = action_array = + reallocate_character_array( action_array, action_size ); + action = &action_array[action_offset]; + } + + strcpy( &action[action_index], new_text ); - action_index += len; - } + action_index += len; + } /* allocate_array - allocate memory for an integer array of the given size */ void *allocate_array( size, element_size ) int size, element_size; + { + register void *mem; - { - register void *mem; + /* On 16-bit int machines (e.g., 80286) we might be trying to + * allocate more than a signed int can hold, and that won't + * work. Cheap test: + */ + if ( element_size * size <= 0 ) + flexfatal( "request for < 1 byte in allocate_array()" ); - /* on 16-bit int machines (e.g., 80286) we might be trying to - * allocate more than a signed int can hold, and that won't - * work. Cheap test: - */ - if ( element_size * size <= 0 ) - flexfatal( "request for < 1 byte in allocate_array()" ); + mem = (void *) malloc( (unsigned) (element_size * size) ); - mem = (void *) malloc( (unsigned) (element_size * size) ); + if ( mem == NULL ) + flexfatal( "memory allocation failed in allocate_array()" ); - if ( mem == NULL ) - flexfatal( "memory allocation failed in allocate_array()" ); + return mem; + } - return ( mem ); - } - -/* all_lower - true if a string is all lower-case - * - * synopsis: - * Char *str; - * int all_lower(); - * true/false = all_lower( str ); - */ +/* all_lower - true if a string is all lower-case */ int all_lower( str ) register Char *str; - - { - while ( *str ) { - if ( ! isascii( *str ) || ! islower( *str ) ) - return ( 0 ); - ++str; + while ( *str ) + { + if ( ! isascii( *str ) || ! islower( *str ) ) + return 0; + ++str; + } + + return 1; } - return ( 1 ); - } - -/* all_upper - true if a string is all upper-case - * - * synopsis: - * Char *str; - * int all_upper(); - * true/false = all_upper( str ); - */ +/* all_upper - true if a string is all upper-case */ int all_upper( str ) register Char *str; - - { - while ( *str ) { - if ( ! isascii( *str ) || ! isupper( (char) *str ) ) - return ( 0 ); - ++str; - } + while ( *str ) + { + if ( ! isascii( *str ) || ! isupper( (char) *str ) ) + return 0; + ++str; + } - return ( 1 ); - } + return 1; + } /* bubble - bubble sort an integer array in increasing order * * synopsis * int v[n], n; - * bubble( v, n ); + * void bubble( v, n ); * * description * sorts the first n elements of array v and replaces them in @@ -149,97 +135,79 @@ register Char *str; * * passed * v - the array to be sorted - * n - the number of elements of 'v' to be sorted */ + * n - the number of elements of 'v' to be sorted + */ void bubble( v, n ) int v[], n; - - { - register int i, j, k; - - for ( i = n; i > 1; --i ) - for ( j = 1; j < i; ++j ) - if ( v[j] > v[j + 1] ) /* compare */ - { - k = v[j]; /* exchange */ - v[j] = v[j + 1]; - v[j + 1] = k; - } - } + { + register int i, j, k; + + for ( i = n; i > 1; --i ) + for ( j = 1; j < i; ++j ) + if ( v[j] > v[j + 1] ) /* compare */ + { + k = v[j]; /* exchange */ + v[j] = v[j + 1]; + v[j + 1] = k; + } + } -/* clower - replace upper-case letter to lower-case - * - * synopsis: - * Char clower(); - * int c; - * c = clower( c ); - */ +/* clower - replace upper-case letter to lower-case */ Char clower( c ) register int c; - - { - return ( (isascii( c ) && isupper( c )) ? tolower( c ) : c ); - } + { + return (isascii( c ) && isupper( c )) ? tolower( c ) : c; + } -/* copy_string - returns a dynamically allocated copy of a string - * - * synopsis - * char *str, *copy, *copy_string(); - * copy = copy_string( str ); - */ +/* copy_string - returns a dynamically allocated copy of a string */ char *copy_string( str ) register char *str; + { + register char *c; + char *copy; - { - register char *c; - char *copy; - - /* find length */ - for ( c = str; *c; ++c ) - ; + /* find length */ + for ( c = str; *c; ++c ) + ; - copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) ); + copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) ); - if ( copy == NULL ) - flexfatal( "dynamic memory failure in copy_string()" ); + if ( copy == NULL ) + flexfatal( "dynamic memory failure in copy_string()" ); - for ( c = copy; (*c++ = *str++); ) - ; + for ( c = copy; (*c++ = *str++); ) + ; - return ( copy ); - } + return copy; + } /* copy_unsigned_string - * returns a dynamically allocated copy of a (potentially) unsigned string - * - * synopsis - * Char *str, *copy, *copy_unsigned_string(); - * copy = copy_unsigned_string( str ); */ Char *copy_unsigned_string( str ) register Char *str; + { + register Char *c; + Char *copy; - { - register Char *c; - Char *copy; - - /* find length */ - for ( c = str; *c; ++c ) - ; + /* find length */ + for ( c = str; *c; ++c ) + ; - copy = allocate_Character_array( c - str + 1 ); + copy = allocate_Character_array( c - str + 1 ); - for ( c = copy; (*c++ = *str++); ) - ; + for ( c = copy; (*c++ = *str++); ) + ; - return ( copy ); - } + return copy; + } /* cshell - shell sort a character array in increasing order @@ -251,7 +219,7 @@ register Char *str; * cshell( v, n, special_case_0 ); * * description - * does a shell sort of the first n elements of array v. + * Does a shell sort of the first n elements of array v. * If special_case_0 is true, then any element equal to 0 * is instead assumed to have infinite weight. * @@ -263,120 +231,91 @@ register Char *str; void cshell( v, n, special_case_0 ) Char v[]; int n, special_case_0; - - { - int gap, i, j, jg; - Char k; - - for ( gap = n / 2; gap > 0; gap = gap / 2 ) - for ( i = gap; i < n; ++i ) - for ( j = i - gap; j >= 0; j = j - gap ) - { - jg = j + gap; - - if ( special_case_0 ) - { - if ( v[jg] == 0 ) - break; - - else if ( v[j] != 0 && v[j] <= v[jg] ) - break; - } - - else if ( v[j] <= v[jg] ) - break; - - k = v[j]; - v[j] = v[jg]; - v[jg] = k; - } - } + { + int gap, i, j, jg; + Char k; + + for ( gap = n / 2; gap > 0; gap = gap / 2 ) + for ( i = gap; i < n; ++i ) + for ( j = i - gap; j >= 0; j = j - gap ) + { + jg = j + gap; + + if ( special_case_0 ) + { + if ( v[jg] == 0 ) + break; + + else if ( v[j] != 0 && v[j] <= v[jg] ) + break; + } + + else if ( v[j] <= v[jg] ) + break; + + k = v[j]; + v[j] = v[jg]; + v[jg] = k; + } + } -/* dataend - finish up a block of data declarations - * - * synopsis - * dataend(); - */ +/* dataend - finish up a block of data declarations */ void dataend() + { + if ( datapos > 0 ) + dataflush(); - { - if ( datapos > 0 ) - dataflush(); - - /* add terminator for initialization */ - puts( " } ;\n" ); - - dataline = 0; - datapos = 0; - } + /* add terminator for initialization; { for vi */ + puts( " } ;\n" ); + dataline = 0; + datapos = 0; + } -/* dataflush - flush generated data statements - * - * synopsis - * dataflush(); - */ +/* dataflush - flush generated data statements */ void dataflush() - - { - putchar( '\n' ); - - if ( ++dataline >= NUMDATALINES ) { - /* put out a blank line so that the table is grouped into - * large blocks that enable the user to find elements easily - */ putchar( '\n' ); - dataline = 0; - } - /* reset the number of characters written on the current line */ - datapos = 0; - } + if ( ++dataline >= NUMDATALINES ) + { + /* Put out a blank line so that the table is grouped into + * large blocks that enable the user to find elements easily. + */ + putchar( '\n' ); + dataline = 0; + } + /* Reset the number of characters written on the current line. */ + datapos = 0; + } -/* flexerror - report an error message and terminate - * - * synopsis - * char msg[]; - * flexerror( msg ); - */ + +/* flexerror - report an error message and terminate */ void flexerror( msg ) char msg[]; - - { - fprintf( stderr, "%s: %s\n", program_name, msg ); - - flexend( 1 ); - } + { + fprintf( stderr, "%s: %s\n", program_name, msg ); + flexend( 1 ); + } -/* flexfatal - report a fatal error message and terminate - * - * synopsis - * char msg[]; - * flexfatal( msg ); - */ +/* flexfatal - report a fatal error message and terminate */ void flexfatal( msg ) char msg[]; - - { - fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg ); - exit( 1 ); - } + { + fprintf( stderr, "%s: fatal internal error, %s\n", program_name, msg ); + exit( 1 ); + } /* flex_gettime - return current time - * - * synopsis - * char *flex_gettime(), *time_str; - * time_str = flex_gettime(); * * note * the routine name has the "flex_" prefix because of name clashes @@ -399,385 +338,319 @@ typedef long time_t; #endif char *flex_gettime() + { + time_t t, time(); + char *result, *ctime(), *copy_string(); - { - time_t t, time(); - char *result, *ctime(), *copy_string(); - - t = time( (long *) 0 ); + t = time( (long *) 0 ); - result = copy_string( ctime( &t ) ); + result = copy_string( ctime( &t ) ); - /* get rid of trailing newline */ - result[24] = '\0'; + /* get rid of trailing newline */ + result[24] = '\0'; - return ( result ); - } + return result; + } -/* lerrif - report an error message formatted with one integer argument - * - * synopsis - * char msg[]; - * int arg; - * lerrif( msg, arg ); - */ +/* lerrif - report an error message formatted with one integer argument */ void lerrif( msg, arg ) char msg[]; int arg; - - { - char errmsg[MAXLINE]; - (void) sprintf( errmsg, msg, arg ); - flexerror( errmsg ); - } + { + char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); + flexerror( errmsg ); + } -/* lerrsf - report an error message formatted with one string argument - * - * synopsis - * char msg[], arg[]; - * lerrsf( msg, arg ); - */ +/* lerrsf - report an error message formatted with one string argument */ void lerrsf( msg, arg ) char msg[], arg[]; + { + char errmsg[MAXLINE]; - { - char errmsg[MAXLINE]; - - (void) sprintf( errmsg, msg, arg ); - flexerror( errmsg ); - } + (void) sprintf( errmsg, msg, arg ); + flexerror( errmsg ); + } -/* htoi - convert a hexadecimal digit string to an integer value - * - * synopsis: - * int val, htoi(); - * Char str[]; - * val = htoi( str ); - */ +/* htoi - convert a hexadecimal digit string to an integer value */ int htoi( str ) Char str[]; + { + unsigned int result; - { - unsigned int result; - - (void) sscanf( (char *) str, "%x", &result ); + (void) sscanf( (char *) str, "%x", &result ); - return ( result ); - } + return result; + } /* is_hex_digit - returns true if a character is a valid hex digit, false * otherwise - * - * synopsis: - * int true_or_false, is_hex_digit(); - * int ch; - * val = is_hex_digit( ch ); */ int is_hex_digit( ch ) int ch; - - { - if ( isdigit( ch ) ) - return ( 1 ); - - switch ( clower( ch ) ) { - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - return ( 1 ); - - default: - return ( 0 ); + if ( isdigit( ch ) ) + return 1; + + switch ( clower( ch ) ) + { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + return 1; + + default: + return 0; + } } - } /* line_directive_out - spit out a "# line" statement */ void line_directive_out( output_file ) FILE *output_file; - - { - if ( infilename && gen_line_dirs ) { - char directive[MAXLINE]; - sprintf( directive, "# line %d \"%s\"\n", linenum, infilename ); + if ( infilename && gen_line_dirs ) + { + char directive[MAXLINE]; + sprintf( directive, "# line %d \"%s\"\n", linenum, infilename ); - /* if output_file is nil then we should put the directive in - * the accumulated actions. - */ - if ( output_file ) - fputs( directive, output_file ); - else - add_action( directive ); + /* If output_file is nil then we should put the directive in + * the accumulated actions. + */ + if ( output_file ) + fputs( directive, output_file ); + else + add_action( directive ); + } } - } /* mark_prolog - mark the current position in the action array as * representing the action prolog */ void mark_prolog() - { - prolog = action_array; - action_array[action_index++] = '\0'; - action_offset = action_index; - action = &action_array[action_offset]; - action_index = 0; - action[action_index] = '\0'; - } + { + prolog = action_array; + action_array[action_index++] = '\0'; + action_offset = action_index; + action = &action_array[action_offset]; + action_index = 0; + action[action_index] = '\0'; + } /* mk2data - generate a data statement for a two-dimensional array * - * synopsis - * int value; - * mk2data( value ); - * - * generates a data statement initializing the current 2-D array to "value" + * Generates a data statement initializing the current 2-D array to "value". */ void mk2data( value ) int value; - - { - if ( datapos >= NUMDATAITEMS ) { - putchar( ',' ); - dataflush(); - } + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } - if ( datapos == 0 ) - /* indent */ - fputs( " ", stdout ); + if ( datapos == 0 ) + /* Indent. */ + fputs( " ", stdout ); - else - putchar( ',' ); + else + putchar( ',' ); - ++datapos; + ++datapos; - printf( "%5d", value ); - } + printf( "%5d", value ); + } /* mkdata - generate a data statement * - * synopsis - * int value; - * mkdata( value ); - * - * generates a data statement initializing the current array element to - * "value" + * Generates a data statement initializing the current array element to + * "value". */ void mkdata( value ) int value; - - { - if ( datapos >= NUMDATAITEMS ) { - putchar( ',' ); - dataflush(); - } - - if ( datapos == 0 ) - /* indent */ - fputs( " ", stdout ); + if ( datapos >= NUMDATAITEMS ) + { + putchar( ',' ); + dataflush(); + } - else - putchar( ',' ); + if ( datapos == 0 ) + /* Indent. */ + fputs( " ", stdout ); + else + putchar( ',' ); - ++datapos; + ++datapos; - printf( "%5d", value ); - } + printf( "%5d", value ); + } -/* myctoi - return the integer represented by a string of digits - * - * synopsis - * Char array[]; - * int val, myctoi(); - * val = myctoi( array ); - * - */ +/* myctoi - return the integer represented by a string of digits */ int myctoi( array ) Char array[]; + { + int val = 0; - { - int val = 0; - - (void) sscanf( (char *) array, "%d", &val ); + (void) sscanf( (char *) array, "%d", &val ); - return ( val ); - } + return val; + } -/* myesc - return character corresponding to escape sequence - * - * synopsis - * Char array[], c, myesc(); - * c = myesc( array ); - * - */ +/* myesc - return character corresponding to escape sequence */ Char myesc( array ) Char array[]; - - { - Char c, esc_char; - register int sptr; - - switch ( array[1] ) { + Char c, esc_char; + register int sptr; + + switch ( array[1] ) + { #ifdef __STDC__ - case 'a': return ( '\a' ); + case 'a': return '\a'; #else - case 'a': return ( '\007' ); + case 'a': return '\007'; #endif - case 'b': return ( '\b' ); - case 'f': return ( '\f' ); - case 'n': return ( '\n' ); - case 'r': return ( '\r' ); - case 't': return ( '\t' ); - case 'v': return ( '\v' ); - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { /* \ */ - sptr = 1; - - while ( isascii( array[sptr] ) && isdigit( array[sptr] ) ) - /* don't increment inside loop control because if - * isdigit() is a macro it might expand into multiple - * increments ... - */ - ++sptr; - - c = array[sptr]; - array[sptr] = '\0'; - - esc_char = otoi( array + 1 ); - - array[sptr] = c; - - return ( esc_char ); - } - - case 'x': - { /* \x */ - int sptr = 2; - - while ( isascii( array[sptr] ) && - is_hex_digit( (char) array[sptr] ) ) - /* don't increment inside loop control because if - * isdigit() is a macro it might expand into multiple - * increments ... - */ - ++sptr; - - c = array[sptr]; - array[sptr] = '\0'; - - esc_char = htoi( array + 2 ); - - array[sptr] = c; - - return ( esc_char ); - } - - default: - return ( array[1] ); + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { /* \ */ + sptr = 1; + + while ( isascii( array[sptr] ) && + isdigit( array[sptr] ) ) + /* Don't increment inside loop control + * because if isdigit() is a macro it might + * expand into multiple increments ... + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = otoi( array + 1 ); + + array[sptr] = c; + + return esc_char; + } + + case 'x': + { /* \x */ + int sptr = 2; + + while ( isascii( array[sptr] ) && + is_hex_digit( (char) array[sptr] ) ) + /* Don't increment inside loop control + * because if isdigit() is a macro it might + * expand into multiple increments ... + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = htoi( array + 2 ); + + array[sptr] = c; + + return esc_char; + } + + default: + return array[1]; + } } - } -/* otoi - convert an octal digit string to an integer value - * - * synopsis: - * int val, otoi(); - * Char str[]; - * val = otoi( str ); - */ +/* otoi - convert an octal digit string to an integer value */ int otoi( str ) Char str[]; + { + unsigned int result; - { - unsigned int result; - - (void) sscanf( (char *) str, "%o", &result ); - - return ( result ); - } + (void) sscanf( (char *) str, "%o", &result ); + return result; + } /* readable_form - return the the human-readable form of a character - * - * synopsis: - * int c; - * char *readable_form(); - * = readable_form( c ); * * The returned string is in static storage. */ char *readable_form( c ) register int c; - - { - static char rform[10]; - - if ( (c >= 0 && c < 32) || c >= 127 ) { - switch ( c ) - { + static char rform[10]; + + if ( (c >= 0 && c < 32) || c >= 127 ) + { + switch ( c ) + { #ifdef __STDC__ - case '\a': return ( "\\a" ); + case '\a': return "\\a"; #endif - case '\b': return ( "\\b" ); - case '\f': return ( "\\f" ); - case '\n': return ( "\\n" ); - case '\r': return ( "\\r" ); - case '\t': return ( "\\t" ); - case '\v': return ( "\\v" ); - - default: - (void) sprintf( rform, "\\%.3o", (unsigned int) c ); - return ( rform ); - } - } + case '\b': return "\\b"; + case '\f': return "\\f"; + case '\n': return "\\n"; + case '\r': return "\\r"; + case '\t': return "\\t"; + case '\v': return "\\v"; + + default: + (void) sprintf( rform, "\\%.3o", + (unsigned int) c ); + return rform; + } + } - else if ( c == ' ' ) - return ( "' '" ); + else if ( c == ' ' ) + return "' '"; - else - { - rform[0] = c; - rform[1] = '\0'; + else + { + rform[0] = c; + rform[1] = '\0'; - return ( rform ); + return rform; + } } - } /* reallocate_array - increase the size of a dynamic array */ @@ -785,65 +658,57 @@ register int c; void *reallocate_array( array, size, element_size ) void *array; int size, element_size; + { + register void *new_array; - { - register void *new_array; - - /* same worry as in allocate_array(): */ - if ( size * element_size <= 0 ) - flexfatal( "attempt to increase array size by less than 1 byte" ); + /* Same worry as in allocate_array(): */ + if ( size * element_size <= 0 ) + flexfatal( + "attempt to increase array size by less than 1 byte" ); - new_array = + new_array = (void *) realloc( (char *)array, (unsigned) (size * element_size )); - if ( new_array == NULL ) - flexfatal( "attempt to increase array size failed" ); + if ( new_array == NULL ) + flexfatal( "attempt to increase array size failed" ); - return ( new_array ); - } + return new_array; + } /* skelout - write out one section of the skeleton file * - * synopsis - * skelout(); - * - * DESCRIPTION + * Description * Copies skelfile or skel array to stdout until a line beginning with * "%%" or EOF is found. */ void skelout() - - { - if ( skelfile ) { - char buf[MAXLINE]; - - while ( fgets( buf, MAXLINE, skelfile ) != NULL ) - if ( buf[0] == '%' && buf[1] == '%' ) - break; - else - fputs( buf, stdout ); - } + if ( skelfile ) + { + char buf[MAXLINE]; - else - { /* copy from skel array */ - char *buf; + while ( fgets( buf, MAXLINE, skelfile ) != NULL ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + fputs( buf, stdout ); + } - while ( (buf = skel[skel_ind++]) ) - if ( buf[0] == '%' && buf[1] == '%' ) - break; - else - printf( "%s\n", buf ); + else + { /* copy from skel array */ + char *buf; + + while ( (buf = skel[skel_ind++]) ) + if ( buf[0] == '%' && buf[1] == '%' ) + break; + else + printf( "%s\n", buf ); + } } - } /* transition_struct_out - output a yy_trans_info structure - * - * synopsis - * int element_v, element_n; - * transition_struct_out( element_v, element_n ); * * outputs the yy_trans_info structure with the two elements, element_v and * element_n. Formats the output with spaces and carriage returns. @@ -851,44 +716,37 @@ void skelout() void transition_struct_out( element_v, element_n ) int element_v, element_n; + { + printf( "%7d, %5d,", element_v, element_n ); - { - printf( "%7d, %5d,", element_v, element_n ); - - datapos += TRANS_STRUCT_PRINT_LENGTH; + datapos += TRANS_STRUCT_PRINT_LENGTH; - if ( datapos >= 75 ) - { - putchar( '\n' ); + if ( datapos >= 75 ) + { + putchar( '\n' ); - if ( ++dataline % 10 == 0 ) - putchar( '\n' ); + if ( ++dataline % 10 == 0 ) + putchar( '\n' ); - datapos = 0; + datapos = 0; + } } - } /* zero_out - set a region of memory to 0 * - * synopsis - * char *region_ptr; - * int size_in_bytes; - * zero_out( region_ptr, size_in_bytes ); - * - * sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero. + * Sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero. */ void zero_out( region_ptr, size_in_bytes ) char *region_ptr; int size_in_bytes; + { + register char *rp, *rp_end; - { - register char *rp, *rp_end; - - rp = region_ptr; - rp_end = region_ptr + size_in_bytes; + rp = region_ptr; + rp_end = region_ptr + size_in_bytes; - while ( rp < rp_end ) - *rp++ = 0; - } + while ( rp < rp_end ) + *rp++ = 0; + } diff --git a/nfa.c b/nfa.c index 6e50109..cca39d6 100644 --- a/nfa.c +++ b/nfa.c @@ -41,34 +41,29 @@ void mkxtion PROTO((int, int)); /* add_accept - add an accepting state to a machine - * - * synopsis - * - * add_accept( mach, accepting_number ); * * accepting_number becomes mach's accepting number. */ void add_accept( mach, accepting_number ) int mach, accepting_number; + { + /* Hang the accepting number off an epsilon state. if it is associated + * with a state that has a non-epsilon out-transition, then the state + * will accept BEFORE it makes that transition, i.e., one character + * too soon. + */ - { - /* hang the accepting number off an epsilon state. if it is associated - * with a state that has a non-epsilon out-transition, then the state - * will accept BEFORE it makes that transition, i.e., one character - * too soon - */ - - if ( transchar[finalst[mach]] == SYM_EPSILON ) - accptnum[finalst[mach]] = accepting_number; + if ( transchar[finalst[mach]] == SYM_EPSILON ) + accptnum[finalst[mach]] = accepting_number; - else - { - int astate = mkstate( SYM_EPSILON ); - accptnum[astate] = accepting_number; - mach = link_machines( mach, astate ); + else + { + int astate = mkstate( SYM_EPSILON ); + accptnum[astate] = accepting_number; + mach = link_machines( mach, astate ); + } } - } /* copysingl - make a given number of copies of a singleton machine @@ -84,61 +79,56 @@ int mach, accepting_number; int copysingl( singl, num ) int singl, num; + { + int copy, i; - { - int copy, i; + copy = mkstate( SYM_EPSILON ); - copy = mkstate( SYM_EPSILON ); + for ( i = 1; i <= num; ++i ) + copy = link_machines( copy, dupmachine( singl ) ); - for ( i = 1; i <= num; ++i ) - copy = link_machines( copy, dupmachine( singl ) ); + return copy; + } - return ( copy ); - } - -/* dumpnfa - debugging routine to write out an nfa - * - * synopsis - * int state1; - * dumpnfa( state1 ); - */ +/* dumpnfa - debugging routine to write out an nfa */ void dumpnfa( state1 ) int state1; - { - int sym, tsp1, tsp2, anum, ns; + { + int sym, tsp1, tsp2, anum, ns; - fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n", - state1 ); + fprintf( stderr, + "\n\n********** beginning dump of nfa with start state %d\n", + state1 ); - /* we probably should loop starting at firstst[state1] and going to - * lastst[state1], but they're not maintained properly when we "or" - * all of the rules together. So we use our knowledge that the machine - * starts at state 1 and ends at lastnfa. - */ + /* We probably should loop starting at firstst[state1] and going to + * lastst[state1], but they're not maintained properly when we "or" + * all of the rules together. So we use our knowledge that the machine + * starts at state 1 and ends at lastnfa. + */ - /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ - for ( ns = 1; ns <= lastnfa; ++ns ) - { - fprintf( stderr, "state # %4d\t", ns ); + /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ + for ( ns = 1; ns <= lastnfa; ++ns ) + { + fprintf( stderr, "state # %4d\t", ns ); - sym = transchar[ns]; - tsp1 = trans1[ns]; - tsp2 = trans2[ns]; - anum = accptnum[ns]; + sym = transchar[ns]; + tsp1 = trans1[ns]; + tsp2 = trans2[ns]; + anum = accptnum[ns]; - fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); + fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); - if ( anum != NIL ) - fprintf( stderr, " [%d]", anum ); + if ( anum != NIL ) + fprintf( stderr, " [%d]", anum ); - fprintf( stderr, "\n" ); - } + fprintf( stderr, "\n" ); + } - fprintf( stderr, "********** end of dump\n" ); - } + fprintf( stderr, "********** end of dump\n" ); + } /* dupmachine - make a duplicate of a given machine @@ -160,46 +150,43 @@ int state1; int dupmachine( mach ) int mach; - - { - int i, init, state_offset; - int state = 0; - int last = lastst[mach]; - - for ( i = firstst[mach]; i <= last; ++i ) { - state = mkstate( transchar[i] ); + int i, init, state_offset; + int state = 0; + int last = lastst[mach]; - if ( trans1[i] != NO_TRANSITION ) - { - mkxtion( finalst[state], trans1[i] + state - i ); + for ( i = firstst[mach]; i <= last; ++i ) + { + state = mkstate( transchar[i] ); - if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION ) - mkxtion( finalst[state], trans2[i] + state - i ); - } + if ( trans1[i] != NO_TRANSITION ) + { + mkxtion( finalst[state], trans1[i] + state - i ); - accptnum[state] = accptnum[i]; - } + if ( transchar[i] == SYM_EPSILON && + trans2[i] != NO_TRANSITION ) + mkxtion( finalst[state], + trans2[i] + state - i ); + } - if ( state == 0 ) - flexfatal( "empty machine in dupmachine()" ); + accptnum[state] = accptnum[i]; + } + + if ( state == 0 ) + flexfatal( "empty machine in dupmachine()" ); - state_offset = state - i + 1; + state_offset = state - i + 1; - init = mach + state_offset; - firstst[init] = firstst[mach] + state_offset; - finalst[init] = finalst[mach] + state_offset; - lastst[init] = lastst[mach] + state_offset; + init = mach + state_offset; + firstst[init] = firstst[mach] + state_offset; + finalst[init] = finalst[mach] + state_offset; + lastst[init] = lastst[mach] + state_offset; - return ( init ); - } + return init; + } /* finish_rule - finish up the processing for a rule - * - * synopsis - * - * finish_rule( mach, variable_trail_rule, headcnt, trailcnt ); * * An accepting number is added to the given machine. If variable_trail_rule * is true then the rule has trailing context and both the head and trail @@ -213,70 +200,74 @@ int mach; void finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) int mach, variable_trail_rule, headcnt, trailcnt; + { + char action_text[MAXLINE]; - { - char action_text[MAXLINE]; - - add_accept( mach, num_rules ); + add_accept( mach, num_rules ); - /* we did this in new_rule(), but it often gets the wrong - * number because we do it before we start parsing the current rule - */ - rule_linenum[num_rules] = linenum; + /* We did this in new_rule(), but it often gets the wrong + * number because we do it before we start parsing the current rule. + */ + rule_linenum[num_rules] = linenum; - /* if this is a continued action, then the line-number has - * already been updated, giving us the wrong number - */ - if ( continued_action ) - --rule_linenum[num_rules]; + /* If this is a continued action, then the line-number has already + * been updated, giving us the wrong number. + */ + if ( continued_action ) + --rule_linenum[num_rules]; - sprintf( action_text, "case %d:\n", num_rules ); - add_action( action_text ); + sprintf( action_text, "case %d:\n", num_rules ); + add_action( action_text ); - if ( variable_trail_rule ) - { - rule_type[num_rules] = RULE_VARIABLE; + if ( variable_trail_rule ) + { + rule_type[num_rules] = RULE_VARIABLE; - if ( performance_report > 0 ) - fprintf( stderr, "Variable trailing context rule at line %d\n", - rule_linenum[num_rules] ); + if ( performance_report > 0 ) + fprintf( stderr, + "Variable trailing context rule at line %d\n", + rule_linenum[num_rules] ); - variable_trailing_context_rules = true; - } + variable_trailing_context_rules = true; + } - else - { - rule_type[num_rules] = RULE_NORMAL; + else + { + rule_type[num_rules] = RULE_NORMAL; - if ( headcnt > 0 || trailcnt > 0 ) - { - /* do trailing context magic to not match the trailing characters */ - char *scanner_cp = "yy_c_buf_p = yy_cp"; - char *scanner_bp = "yy_bp"; + if ( headcnt > 0 || trailcnt > 0 ) + { + /* Do trailing context magic to not match the trailing + * characters. + */ + char *scanner_cp = "yy_c_buf_p = yy_cp"; + char *scanner_bp = "yy_bp"; - add_action( + add_action( "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); - if ( headcnt > 0 ) - { - sprintf( action_text, "%s = %s + %d;\n", - scanner_cp, scanner_bp, headcnt ); - add_action( action_text ); + if ( headcnt > 0 ) + { + sprintf( action_text, "%s = %s + %d;\n", + scanner_cp, scanner_bp, headcnt ); + add_action( action_text ); + } + + else + { + sprintf( action_text, "%s -= %d;\n", + scanner_cp, trailcnt ); + add_action( action_text ); + } + + add_action( + "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); + } } - else - { - sprintf( action_text, "%s -= %d;\n", scanner_cp, trailcnt ); - add_action( action_text ); - } - - add_action( "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); - } + line_directive_out( (FILE *) 0 ); } - line_directive_out( (FILE *) 0 ); - } - /* link_machines - connect two machines together * @@ -296,67 +287,62 @@ int mach, variable_trail_rule, headcnt, trailcnt; int link_machines( first, last ) int first, last; + { + if ( first == NIL ) + return last; - { - if ( first == NIL ) - return ( last ); - - else if ( last == NIL ) - return ( first ); + else if ( last == NIL ) + return first; - else - { - mkxtion( finalst[first], last ); - finalst[first] = finalst[last]; - lastst[first] = max( lastst[first], lastst[last] ); - firstst[first] = min( firstst[first], firstst[last] ); + else + { + mkxtion( finalst[first], last ); + finalst[first] = finalst[last]; + lastst[first] = max( lastst[first], lastst[last] ); + firstst[first] = min( firstst[first], firstst[last] ); - return ( first ); + return first; + } } - } /* mark_beginning_as_normal - mark each "beginning" state in a machine * as being a "normal" (i.e., not trailing context- * associated) states * - * synopsis - * - * mark_beginning_as_normal( mach ) - * - * mach - machine to mark - * * The "beginning" states are the epsilon closure of the first state */ void mark_beginning_as_normal( mach ) register int mach; - - { - switch ( state_type[mach] ) { - case STATE_NORMAL: - /* oh, we've already visited here */ - return; - - case STATE_TRAILING_CONTEXT: - state_type[mach] = STATE_NORMAL; - - if ( transchar[mach] == SYM_EPSILON ) + switch ( state_type[mach] ) { - if ( trans1[mach] != NO_TRANSITION ) - mark_beginning_as_normal( trans1[mach] ); - - if ( trans2[mach] != NO_TRANSITION ) - mark_beginning_as_normal( trans2[mach] ); + case STATE_NORMAL: + /* Oh, we've already visited here. */ + return; + + case STATE_TRAILING_CONTEXT: + state_type[mach] = STATE_NORMAL; + + if ( transchar[mach] == SYM_EPSILON ) + { + if ( trans1[mach] != NO_TRANSITION ) + mark_beginning_as_normal( + trans1[mach] ); + + if ( trans2[mach] != NO_TRANSITION ) + mark_beginning_as_normal( + trans2[mach] ); + } + break; + + default: + flexerror( + "bad state type in mark_beginning_as_normal()" ); + break; } - break; - - default: - flexerror( "bad state type in mark_beginning_as_normal()" ); - break; } - } /* mkbranch - make a machine that branches to two machines @@ -368,30 +354,29 @@ register int mach; * branch - a machine which matches either first's pattern or second's * first, second - machines whose patterns are to be or'ed (the | operator) * - * note that first and second are NEITHER destroyed by the operation. Also, + * Note that first and second are NEITHER destroyed by the operation. Also, * the resulting machine CANNOT be used with any other "mk" operation except * more mkbranch's. Compare with mkor() */ int mkbranch( first, second ) int first, second; + { + int eps; - { - int eps; - - if ( first == NO_TRANSITION ) - return ( second ); + if ( first == NO_TRANSITION ) + return second; - else if ( second == NO_TRANSITION ) - return ( first ); + else if ( second == NO_TRANSITION ) + return first; - eps = mkstate( SYM_EPSILON ); + eps = mkstate( SYM_EPSILON ); - mkxtion( eps, first ); - mkxtion( eps, second ); + mkxtion( eps, first ); + mkxtion( eps, second ); - return ( eps ); - } + return eps; + } /* mkclos - convert a machine into a closure @@ -399,15 +384,14 @@ int first, second; * synopsis * new = mkclos( state ); * - * new - a new state which matches the closure of "state" + * new - a new state which matches the closure of "state" */ int mkclos( state ) int state; - - { - return ( mkopt( mkposcl( state ) ) ); - } + { + return mkopt( mkposcl( state ) ); + } /* mkopt - make a machine optional @@ -426,27 +410,26 @@ int state; int mkopt( mach ) int mach; + { + int eps; - { - int eps; + if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) + { + eps = mkstate( SYM_EPSILON ); + mach = link_machines( mach, eps ); + } - if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) - { + /* Can't skimp on the following if FREE_EPSILON(mach) is true because + * some state interior to "mach" might point back to the beginning + * for a closure. + */ eps = mkstate( SYM_EPSILON ); - mach = link_machines( mach, eps ); - } - - /* can't skimp on the following if FREE_EPSILON(mach) is true because - * some state interior to "mach" might point back to the beginning - * for a closure - */ - eps = mkstate( SYM_EPSILON ); - mach = link_machines( eps, mach ); + mach = link_machines( eps, mach ); - mkxtion( mach, finalst[mach] ); + mkxtion( mach, finalst[mach] ); - return ( mach ); - } + return mach; + } /* mkor - make a machine that matches either one of two machines @@ -465,56 +448,55 @@ int mach; int mkor( first, second ) int first, second; - - { - int eps, orend; - - if ( first == NIL ) - return ( second ); - - else if ( second == NIL ) - return ( first ); - - else { - /* see comment in mkopt() about why we can't use the first state - * of "first" or "second" if they satisfy "FREE_EPSILON" - */ - eps = mkstate( SYM_EPSILON ); + int eps, orend; - first = link_machines( eps, first ); + if ( first == NIL ) + return second; - mkxtion( first, second ); - - if ( SUPER_FREE_EPSILON(finalst[first]) && - accptnum[finalst[first]] == NIL ) - { - orend = finalst[first]; - mkxtion( finalst[second], orend ); - } - - else if ( SUPER_FREE_EPSILON(finalst[second]) && - accptnum[finalst[second]] == NIL ) - { - orend = finalst[second]; - mkxtion( finalst[first], orend ); - } + else if ( second == NIL ) + return first; else - { - eps = mkstate( SYM_EPSILON ); - - first = link_machines( first, eps ); - orend = finalst[first]; + { + /* See comment in mkopt() about why we can't use the first + * state of "first" or "second" if they satisfy "FREE_EPSILON". + */ + eps = mkstate( SYM_EPSILON ); + + first = link_machines( eps, first ); + + mkxtion( first, second ); + + if ( SUPER_FREE_EPSILON(finalst[first]) && + accptnum[finalst[first]] == NIL ) + { + orend = finalst[first]; + mkxtion( finalst[second], orend ); + } + + else if ( SUPER_FREE_EPSILON(finalst[second]) && + accptnum[finalst[second]] == NIL ) + { + orend = finalst[second]; + mkxtion( finalst[first], orend ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + + first = link_machines( first, eps ); + orend = finalst[first]; + + mkxtion( finalst[second], orend ); + } + } - mkxtion( finalst[second], orend ); - } + finalst[first] = orend; + return first; } - finalst[first] = orend; - return ( first ); - } - /* mkposcl - convert a machine into a positive closure * @@ -526,23 +508,22 @@ int first, second; int mkposcl( state ) int state; - - { - int eps; - - if ( SUPER_FREE_EPSILON(finalst[state]) ) { - mkxtion( finalst[state], state ); - return ( state ); - } + int eps; - else - { - eps = mkstate( SYM_EPSILON ); - mkxtion( eps, state ); - return ( link_machines( state, eps ) ); + if ( SUPER_FREE_EPSILON(finalst[state]) ) + { + mkxtion( finalst[state], state ); + return state; + } + + else + { + eps = mkstate( SYM_EPSILON ); + mkxtion( eps, state ); + return link_machines( state, eps ); + } } - } /* mkrep - make a replicated machine @@ -559,35 +540,34 @@ int state; int mkrep( mach, lb, ub ) int mach, lb, ub; + { + int base_mach, tail, copy, i; - { - int base_mach, tail, copy, i; + base_mach = copysingl( mach, lb - 1 ); - base_mach = copysingl( mach, lb - 1 ); + if ( ub == INFINITY ) + { + copy = dupmachine( mach ); + mach = link_machines( mach, + link_machines( base_mach, mkclos( copy ) ) ); + } - if ( ub == INFINITY ) - { - copy = dupmachine( mach ); - mach = link_machines( mach, - link_machines( base_mach, mkclos( copy ) ) ); - } + else + { + tail = mkstate( SYM_EPSILON ); - else - { - tail = mkstate( SYM_EPSILON ); + for ( i = lb; i < ub; ++i ) + { + copy = dupmachine( mach ); + tail = mkopt( link_machines( copy, tail ) ); + } - for ( i = lb; i < ub; ++i ) - { - copy = dupmachine( mach ); - tail = mkopt( link_machines( copy, tail ) ); - } + mach = link_machines( mach, link_machines( base_mach, tail ) ); + } - mach = link_machines( mach, link_machines( base_mach, tail ) ); + return mach; } - return ( mach ); - } - /* mkstate - create a state with a transition on a given symbol * @@ -607,64 +587,68 @@ int mach, lb, ub; int mkstate( sym ) int sym; - - { - if ( ++lastnfa >= current_mns ) { - if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) - lerrif( "input rules are too complicated (>= %d NFA states)", - current_mns ); - - ++num_reallocs; - - firstst = reallocate_integer_array( firstst, current_mns ); - lastst = reallocate_integer_array( lastst, current_mns ); - finalst = reallocate_integer_array( finalst, current_mns ); - transchar = reallocate_integer_array( transchar, current_mns ); - trans1 = reallocate_integer_array( trans1, current_mns ); - trans2 = reallocate_integer_array( trans2, current_mns ); - accptnum = reallocate_integer_array( accptnum, current_mns ); - assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); - state_type = reallocate_integer_array( state_type, current_mns ); - } + if ( ++lastnfa >= current_mns ) + { + if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) + lerrif( + "input rules are too complicated (>= %d NFA states)", + current_mns ); + + ++num_reallocs; + + firstst = reallocate_integer_array( firstst, current_mns ); + lastst = reallocate_integer_array( lastst, current_mns ); + finalst = reallocate_integer_array( finalst, current_mns ); + transchar = reallocate_integer_array( transchar, current_mns ); + trans1 = reallocate_integer_array( trans1, current_mns ); + trans2 = reallocate_integer_array( trans2, current_mns ); + accptnum = reallocate_integer_array( accptnum, current_mns ); + assoc_rule = + reallocate_integer_array( assoc_rule, current_mns ); + state_type = + reallocate_integer_array( state_type, current_mns ); + } - firstst[lastnfa] = lastnfa; - finalst[lastnfa] = lastnfa; - lastst[lastnfa] = lastnfa; - transchar[lastnfa] = sym; - trans1[lastnfa] = NO_TRANSITION; - trans2[lastnfa] = NO_TRANSITION; - accptnum[lastnfa] = NIL; - assoc_rule[lastnfa] = num_rules; - state_type[lastnfa] = current_state_type; - - /* fix up equivalence classes base on this transition. Note that any - * character which has its own transition gets its own equivalence class. - * Thus only characters which are only in character classes have a chance - * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' - * into two different equivalence classes. "[ab]" puts them in the same - * equivalence class (barring other differences elsewhere in the input). - */ - - if ( sym < 0 ) - { - /* we don't have to update the equivalence classes since that was - * already done when the ccl was created for the first time + firstst[lastnfa] = lastnfa; + finalst[lastnfa] = lastnfa; + lastst[lastnfa] = lastnfa; + transchar[lastnfa] = sym; + trans1[lastnfa] = NO_TRANSITION; + trans2[lastnfa] = NO_TRANSITION; + accptnum[lastnfa] = NIL; + assoc_rule[lastnfa] = num_rules; + state_type[lastnfa] = current_state_type; + + /* Fix up equivalence classes base on this transition. Note that any + * character which has its own transition gets its own equivalence + * class. Thus only characters which are only in character classes + * have a chance at being in the same equivalence class. E.g. "a|b" + * puts 'a' and 'b' into two different equivalence classes. "[ab]" + * puts them in the same equivalence class (barring other differences + * elsewhere in the input). */ - } - else if ( sym == SYM_EPSILON ) - ++numeps; + if ( sym < 0 ) + { + /* We don't have to update the equivalence classes since + * that was already done when the ccl was created for the + * first time. + */ + } - else - { - if ( useecs ) - /* map NUL's to csize */ - mkechar( sym ? sym : csize, nextecm, ecgroup ); - } + else if ( sym == SYM_EPSILON ) + ++numeps; - return ( lastnfa ); - } + else + { + if ( useecs ) + /* Map NUL's to csize. */ + mkechar( sym ? sym : csize, nextecm, ecgroup ); + } + + return lastnfa; + } /* mkxtion - make a transition from one state to another @@ -679,49 +663,40 @@ int sym; void mkxtion( statefrom, stateto ) int statefrom, stateto; + { + if ( trans1[statefrom] == NO_TRANSITION ) + trans1[statefrom] = stateto; - { - if ( trans1[statefrom] == NO_TRANSITION ) - trans1[statefrom] = stateto; - - else if ( (transchar[statefrom] != SYM_EPSILON) || - (trans2[statefrom] != NO_TRANSITION) ) - flexfatal( "found too many transitions in mkxtion()" ); + else if ( (transchar[statefrom] != SYM_EPSILON) || + (trans2[statefrom] != NO_TRANSITION) ) + flexfatal( "found too many transitions in mkxtion()" ); - else - { /* second out-transition for an epsilon state */ - ++eps2; - trans2[statefrom] = stateto; + else + { /* second out-transition for an epsilon state */ + ++eps2; + trans2[statefrom] = stateto; + } } - } -/* new_rule - initialize for a new rule - * - * synopsis - * - * new_rule(); - * - * the global num_rules is incremented and the any corresponding dynamic - * arrays (such as rule_type[]) are grown as needed. - */ +/* new_rule - initialize for a new rule */ void new_rule() - - { - if ( ++num_rules >= current_max_rules ) { - ++num_reallocs; - current_max_rules += MAX_RULES_INCREMENT; - rule_type = reallocate_integer_array( rule_type, current_max_rules ); - rule_linenum = - reallocate_integer_array( rule_linenum, current_max_rules ); - rule_useful = - reallocate_integer_array( rule_useful, current_max_rules ); - } + if ( ++num_rules >= current_max_rules ) + { + ++num_reallocs; + current_max_rules += MAX_RULES_INCREMENT; + rule_type = reallocate_integer_array( rule_type, + current_max_rules ); + rule_linenum = reallocate_integer_array( rule_linenum, + current_max_rules ); + rule_useful = reallocate_integer_array( rule_useful, + current_max_rules ); + } - if ( num_rules > MAX_RULE ) - lerrif( "too many rules (> %d)!", MAX_RULE ); + if ( num_rules > MAX_RULE ) + lerrif( "too many rules (> %d)!", MAX_RULE ); - rule_linenum[num_rules] = linenum; - rule_useful[num_rules] = false; - } + rule_linenum[num_rules] = linenum; + rule_useful[num_rules] = false; + } diff --git a/parse.y b/parse.y index 4eca065..828e0e2 100644 --- a/parse.y +++ b/parse.y @@ -58,7 +58,7 @@ goal : initlex sect1 sect1end sect2 initforrule def_rule = mkstate( -pat ); - /* remember the number of the default rule so we + /* Remember the number of the default rule so we * don't generate "can't match" warnings for it. */ default_rule = num_rules; @@ -66,13 +66,13 @@ goal : initlex sect1 sect1end sect2 initforrule finish_rule( def_rule, false, 0, 0 ); for ( i = 1; i <= lastsc; ++i ) - scset[i] = mkbranch( scset[i], def_rule ); + scset[i] = mkbranch( scset[i], def_rule ); if ( spprdflt ) - add_action( + add_action( "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); else - add_action( "ECHO" ); + add_action( "ECHO" ); add_action( ";\n\tYY_BREAK\n" ); } @@ -81,11 +81,11 @@ goal : initlex sect1 sect1end sect2 initforrule initlex : { /* initialize for processing rules */ - /* create default DFA start condition */ + /* Create default DFA start condition. */ scinstal( "INITIAL", false ); - /* initially, the start condition scoping is - * "no start conditions active" + /* Initially, the start condition scoping is + * "no start conditions active". */ actvp = 0; } @@ -99,26 +99,19 @@ sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' sect1end : SECTEND { - /* we now know how many start conditions there + /* We now know how many start conditions there * are, so create the "activity" map indicating * which conditions are active. */ active_ss = allocate_integer_array( lastsc + 1 ); for ( i = 1; i <= lastsc; ++i ) - active_ss[i] = 0; + active_ss[i] = 0; } ; startconddecl : SCDECL - { - /* these productions are separate from the s1object - * rule because the semantics must be done before - * we parse the remainder of an s1object - */ - - xcluflg = false; - } + { xcluflg = false; } | XSCDECL { xcluflg = true; } @@ -131,7 +124,7 @@ namelist1 : namelist1 WHITESPACE NAME { scinstal( nmstr, xcluflg ); } | error - { synerr( "bad start condition list" ); } + { synerr( "bad start condition list" ); } ; sect2 : sect2 initforrule flexrule '\n' @@ -140,7 +133,7 @@ sect2 : sect2 initforrule flexrule '\n' initforrule : { - /* initialize for a parse of one rule */ + /* Initialize for a parse of one rule. */ trlcontxt = variable_trail_rule = varlength = false; trailcnt = headcnt = rulelen = 0; current_state_type = STATE_NORMAL; @@ -150,69 +143,69 @@ initforrule : ; flexrule : scon '^' rule - { + { pat = $3; finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); + headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) - scbol[actvsc[i]] = - mkbranch( scbol[actvsc[i]], pat ); + scbol[actvsc[i]] = + mkbranch( scbol[actvsc[i]], pat ); if ( ! bol_needed ) - { - bol_needed = true; + { + bol_needed = true; - if ( performance_report > 1 ) - pinpoint_message( - "'^' operator results in sub-optimal performance" ); - } + if ( performance_report > 1 ) + pinpoint_message( + "'^' operator results in sub-optimal performance" ); + } } | scon rule - { + { pat = $2; finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); + headcnt, trailcnt ); for ( i = 1; i <= actvp; ++i ) - scset[actvsc[i]] = - mkbranch( scset[actvsc[i]], pat ); + scset[actvsc[i]] = + mkbranch( scset[actvsc[i]], pat ); } | '^' rule { pat = $2; finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); + headcnt, trailcnt ); - /* add to all non-exclusive start conditions, - * including the default (0) start condition + /* Add to all non-exclusive start conditions, + * including the default (0) start condition. */ for ( i = 1; i <= lastsc; ++i ) - if ( ! scxclu[i] ) - scbol[i] = mkbranch( scbol[i], pat ); + if ( ! scxclu[i] ) + scbol[i] = mkbranch( scbol[i], pat ); if ( ! bol_needed ) - { - bol_needed = true; + { + bol_needed = true; - if ( performance_report > 1 ) - pinpoint_message( - "'^' operator results in sub-optimal performance" ); - } + if ( performance_report > 1 ) + pinpoint_message( + "'^' operator results in sub-optimal performance" ); + } } | rule { pat = $1; finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); + headcnt, trailcnt ); for ( i = 1; i <= lastsc; ++i ) - if ( ! scxclu[i] ) - scset[i] = mkbranch( scset[i], pat ); + if ( ! scxclu[i] ) + scset[i] = mkbranch( scset[i], pat ); } | scon EOF_OP @@ -220,21 +213,21 @@ flexrule : scon '^' rule | EOF_OP { - /* this EOF applies to all start conditions - * which don't already have EOF actions + /* This EOF applies to all start conditions + * which don't already have EOF actions. */ actvp = 0; for ( i = 1; i <= lastsc; ++i ) - if ( ! sceof[i] ) - actvsc[++actvp] = i; + if ( ! sceof[i] ) + actvsc[++actvp] = i; if ( actvp == 0 ) - warn( - "all start conditions already have <> rules" ); + warn( + "all start conditions already have <> rules" ); else - build_eof_action(); + build_eof_action(); } | error @@ -263,71 +256,75 @@ namelist2 : namelist2 ',' sconname sconname : NAME { if ( (scnum = sclookup( nmstr )) == 0 ) - format_pinpoint_message( - "undeclared start condition %s", nmstr ); + format_pinpoint_message( + "undeclared start condition %s", + nmstr ); else - { - if ( ++actvp >= current_max_scs ) - /* some bozo has included multiple instances - * of start condition names - */ - pinpoint_message( + { + if ( ++actvp >= current_max_scs ) + /* Some bozo has included multiple + * instances of start condition names. + */ + pinpoint_message( "too many start conditions in <> construct!" ); - else - actvsc[actvp] = scnum; - } + else + actvsc[actvp] = scnum; + } } ; rule : re2 re { if ( transchar[lastst[$2]] != SYM_EPSILON ) - /* provide final transition \now/ so it - * will be marked as a trailing context - * state - */ - $2 = link_machines( $2, mkstate( SYM_EPSILON ) ); + /* Provide final transition \now/ so it + * will be marked as a trailing context + * state. + */ + $2 = link_machines( $2, + mkstate( SYM_EPSILON ) ); mark_beginning_as_normal( $2 ); current_state_type = STATE_NORMAL; if ( previous_continued_action ) - { - /* we need to treat this as variable trailing - * context so that the backup does not happen - * in the action but before the action switch - * statement. If the backup happens in the - * action, then the rules "falling into" this - * one's action will *also* do the backup, - * erroneously. - */ - if ( ! varlength || headcnt != 0 ) - warn( + { + /* We need to treat this as variable trailing + * context so that the backup does not happen + * in the action but before the action switch + * statement. If the backup happens in the + * action, then the rules "falling into" this + * one's action will *also* do the backup, + * erroneously. + */ + if ( ! varlength || headcnt != 0 ) + warn( "trailing context made variable due to preceding '|' action" ); - /* mark as variable */ - varlength = true; - headcnt = 0; - } + /* Mark as variable. */ + varlength = true; + headcnt = 0; + } if ( varlength && headcnt == 0 ) - { /* variable trailing context rule */ - /* mark the first part of the rule as the accepting - * "head" part of a trailing context rule - */ - /* by the way, we didn't do this at the beginning - * of this production because back then - * current_state_type was set up for a trail - * rule, and add_accept() can create a new - * state ... - */ - add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); - variable_trail_rule = true; - } + { /* variable trailing context rule */ + /* Mark the first part of the rule as the + * accepting "head" part of a trailing + * context rule. + * + * By the way, we didn't do this at the + * beginning of this production because back + * then current_state_type was set up for a + * trail rule, and add_accept() can create + * a new state ... + */ + add_accept( $1, + num_rules | YY_TRAILING_HEAD_MASK ); + variable_trail_rule = true; + } else - trailcnt = rulelen; + trailcnt = rulelen; $$ = link_machines( $1, $2 ); } @@ -336,70 +333,73 @@ rule : re2 re { synerr( "trailing context used twice" ); } | re '$' - { + { if ( trlcontxt ) - { - synerr( "trailing context used twice" ); - $$ = mkstate( SYM_EPSILON ); - } + { + synerr( "trailing context used twice" ); + $$ = mkstate( SYM_EPSILON ); + } else if ( previous_continued_action ) - { - /* see the comment in the rule for "re2 re" - * above - */ - if ( ! varlength || headcnt != 0 ) - warn( + { + /* See the comment in the rule for "re2 re" + * above. + */ + if ( ! varlength || headcnt != 0 ) + warn( "trailing context made variable due to preceding '|' action" ); - /* mark as variable */ - varlength = true; - headcnt = 0; - } + /* Mark as variable. */ + varlength = true; + headcnt = 0; + } if ( varlength && headcnt == 0 ) - { - /* again, see the comment in the rule for "re2 re" - * above - */ - add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ); - variable_trail_rule = true; - } + { + /* Again, see the comment in the rule for + * "re2 re" above. + */ + add_accept( $1, + num_rules | YY_TRAILING_HEAD_MASK ); + variable_trail_rule = true; + } else - { - if ( ! varlength ) - headcnt = rulelen; + { + if ( ! varlength ) + headcnt = rulelen; - ++rulelen; - trailcnt = 1; - } + ++rulelen; + trailcnt = 1; + } trlcontxt = true; eps = mkstate( SYM_EPSILON ); $$ = link_machines( $1, - link_machines( eps, mkstate( '\n' ) ) ); + link_machines( eps, mkstate( '\n' ) ) ); } | re { - $$ = $1; + $$ = $1; if ( trlcontxt ) - { - if ( varlength && headcnt == 0 ) - /* both head and trail are variable-length */ - variable_trail_rule = true; - else - trailcnt = rulelen; - } - } + { + if ( varlength && headcnt == 0 ) + /* Both head and trail are + * variable-length. + */ + variable_trail_rule = true; + else + trailcnt = rulelen; + } + } ; re : re '|' series - { + { varlength = true; $$ = mkor( $1, $3 ); } @@ -411,21 +411,23 @@ re : re '|' series re2 : re '/' { - /* this rule is written separately so - * the reduction will occur before the trailing - * series is parsed + /* This rule is written separately so the + * reduction will occur before the trailing + * series is parsed. */ if ( trlcontxt ) - synerr( "trailing context used twice" ); + synerr( "trailing context used twice" ); else - trlcontxt = true; + trlcontxt = true; if ( varlength ) - /* we hope the trailing context is fixed-length */ - varlength = false; + /* We hope the trailing context is + * fixed-length. + */ + varlength = false; else - headcnt = rulelen; + headcnt = rulelen; rulelen = 0; @@ -435,9 +437,9 @@ re2 : re '/' ; series : series singleton - { - /* this is where concatenation of adjacent patterns - * gets done + { + /* This is where concatenation of adjacent patterns + * gets done. */ $$ = link_machines( $1, $2 ); } @@ -447,7 +449,7 @@ series : series singleton ; singleton : singleton '*' - { + { varlength = true; $$ = mkclos( $1 ); @@ -456,14 +458,12 @@ singleton : singleton '*' | singleton '+' { varlength = true; - $$ = mkposcl( $1 ); } | singleton '?' { varlength = true; - $$ = mkopt( $1 ); } @@ -472,25 +472,27 @@ singleton : singleton '*' varlength = true; if ( $3 > $5 || $3 < 0 ) - { - synerr( "bad iteration values" ); - $$ = $1; - } + { + synerr( "bad iteration values" ); + $$ = $1; + } else - { - if ( $3 == 0 ) { - if ( $5 <= 0 ) - { - synerr( "bad iteration values" ); - $$ = $1; - } + if ( $3 == 0 ) + { + if ( $5 <= 0 ) + { + synerr( + "bad iteration values" ); + $$ = $1; + } + else + $$ = mkopt( + mkrep( $1, 1, $5 ) ); + } else - $$ = mkopt( mkrep( $1, 1, $5 ) ); + $$ = mkrep( $1, $3, $5 ); } - else - $$ = mkrep( $1, $3, $5 ); - } } | singleton '{' NUMBER ',' '}' @@ -498,49 +500,50 @@ singleton : singleton '*' varlength = true; if ( $3 <= 0 ) - { - synerr( "iteration value must be positive" ); - $$ = $1; - } + { + synerr( "iteration value must be positive" ); + $$ = $1; + } else - $$ = mkrep( $1, $3, INFINITY ); + $$ = mkrep( $1, $3, INFINITY ); } | singleton '{' NUMBER '}' { - /* the singleton could be something like "(foo)", + /* The singleton could be something like "(foo)", * in which case we have no idea what its length * is, so we punt here. */ varlength = true; if ( $3 <= 0 ) - { - synerr( "iteration value must be positive" ); - $$ = $1; - } + { + synerr( "iteration value must be positive" ); + $$ = $1; + } else - $$ = link_machines( $1, copysingl( $1, $3 - 1 ) ); + $$ = link_machines( $1, + copysingl( $1, $3 - 1 ) ); } | '.' { if ( ! madeany ) - { - /* create the '.' character class */ - anyccl = cclinit(); - ccladd( anyccl, '\n' ); - cclnegate( anyccl ); + { + /* Create the '.' character class. */ + anyccl = cclinit(); + ccladd( anyccl, '\n' ); + cclnegate( anyccl ); - if ( useecs ) - mkeccl( ccltbl + cclmap[anyccl], - ccllen[anyccl], nextecm, - ecgroup, csize, csize ); + if ( useecs ) + mkeccl( ccltbl + cclmap[anyccl], + ccllen[anyccl], nextecm, + ecgroup, csize, csize ); - madeany = true; - } + madeany = true; + } ++rulelen; @@ -550,14 +553,15 @@ singleton : singleton '*' | fullccl { if ( ! cclsorted ) - /* sort characters for fast searching. We use a - * shell sort since this list could be large. - */ - cshell( ccltbl + cclmap[$1], ccllen[$1], true ); + /* Sort characters for fast searching. We + * use a shell sort since this list could + * be large. + */ + cshell( ccltbl + cclmap[$1], ccllen[$1], true ); if ( useecs ) - mkeccl( ccltbl + cclmap[$1], ccllen[$1], - nextecm, ecgroup, csize, csize ); + mkeccl( ccltbl + cclmap[$1], ccllen[$1], + nextecm, ecgroup, csize, csize ); ++rulelen; @@ -582,7 +586,7 @@ singleton : singleton '*' ++rulelen; if ( caseins && $1 >= 'A' && $1 <= 'Z' ) - $1 = clower( $1 ); + $1 = clower( $1 ); $$ = mkstate( $1 ); } @@ -593,50 +597,42 @@ fullccl : '[' ccl ']' | '[' '^' ccl ']' { - /* *Sigh* - to be compatible Unix lex, negated ccls - * match newlines - */ -#if 0 - ccladd( $3, '\n' ); /* negated ccls don't match '\n' */ - cclsorted = false; /* because we added the newline */ -#endif cclnegate( $3 ); $$ = $3; } ; ccl : ccl CHAR '-' CHAR - { + { if ( $2 > $4 ) - synerr( "negative range in character class" ); + synerr( "negative range in character class" ); else - { - if ( caseins ) { - if ( $2 >= 'A' && $2 <= 'Z' ) - $2 = clower( $2 ); - if ( $4 >= 'A' && $4 <= 'Z' ) - $4 = clower( $4 ); + if ( caseins ) + { + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + if ( $4 >= 'A' && $4 <= 'Z' ) + $4 = clower( $4 ); + } + + for ( i = $2; i <= $4; ++i ) + ccladd( $1, i ); + + /* Keep track if this ccl is staying in + * alphabetical order. + */ + cclsorted = cclsorted && ($2 > lastchar); + lastchar = $4; } - for ( i = $2; i <= $4; ++i ) - ccladd( $1, i ); - - /* keep track if this ccl is staying in alphabetical - * order - */ - cclsorted = cclsorted && ($2 > lastchar); - lastchar = $4; - } - $$ = $1; } | ccl CHAR - { - if ( caseins ) - if ( $2 >= 'A' && $2 <= 'Z' ) + { + if ( caseins && $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); ccladd( $1, $2 ); @@ -654,9 +650,8 @@ ccl : ccl CHAR '-' CHAR ; string : string CHAR - { - if ( caseins ) - if ( $2 >= 'A' && $2 <= 'Z' ) + { + if ( caseins && $2 >= 'A' && $2 <= 'Z' ) $2 = clower( $2 ); ++rulelen; @@ -676,70 +671,67 @@ string : string CHAR */ void build_eof_action() - - { - register int i; - char action_text[MAXLINE]; - - for ( i = 1; i <= actvp; ++i ) { - if ( sceof[actvsc[i]] ) - format_pinpoint_message( - "multiple <> rules for start condition %s", - scname[actvsc[i]] ); - - else - { - sceof[actvsc[i]] = true; - sprintf( action_text, "case YY_STATE_EOF(%s):\n", - scname[actvsc[i]] ); - add_action( action_text ); - } - } + register int i; + char action_text[MAXLINE]; - line_directive_out( (FILE *) 0 ); + for ( i = 1; i <= actvp; ++i ) + { + if ( sceof[actvsc[i]] ) + format_pinpoint_message( + "multiple <> rules for start condition %s", + scname[actvsc[i]] ); - /* this isn't a normal rule after all - don't count it as - * such, so we don't have any holes in the rule numbering - * (which make generating "rule can never match" warnings - * more difficult - */ - --num_rules; - ++num_eof_rules; - } + else + { + sceof[actvsc[i]] = true; + sprintf( action_text, "case YY_STATE_EOF(%s):\n", + scname[actvsc[i]] ); + add_action( action_text ); + } + } + + line_directive_out( (FILE *) 0 ); + + /* This isn't a normal rule after all - don't count it as + * such, so we don't have any holes in the rule numbering + * (which make generating "rule can never match" warnings + * more difficult. + */ + --num_rules; + ++num_eof_rules; + } /* format_synerr - write out formatted syntax error */ void format_synerr( msg, arg ) char msg[], arg[]; + { + char errmsg[MAXLINE]; - { - char errmsg[MAXLINE]; - - (void) sprintf( errmsg, msg, arg ); - synerr( errmsg ); - } + (void) sprintf( errmsg, msg, arg ); + synerr( errmsg ); + } /* synerr - report a syntax error */ void synerr( str ) char str[]; - - { - syntaxerror = true; - pinpoint_message( str ); - } + { + syntaxerror = true; + pinpoint_message( str ); + } /* warn - report a warning, unless -w was given */ void warn( str ) char str[]; - { - line_warning( str, linenum ); - } + { + line_warning( str, linenum ); + } /* format_pinpoint_message - write out a message formatted with one string, * pinpointing its location @@ -747,23 +739,21 @@ char str[]; void format_pinpoint_message( msg, arg ) char msg[], arg[]; + { + char errmsg[MAXLINE]; - { - char errmsg[MAXLINE]; - - (void) sprintf( errmsg, msg, arg ); - pinpoint_message( errmsg ); - } + (void) sprintf( errmsg, msg, arg ); + pinpoint_message( errmsg ); + } /* pinpoint_message - write out a message, pinpointing its location */ void pinpoint_message( str ) char str[]; - - { - line_pinpoint( str, linenum ); - } + { + line_pinpoint( str, linenum ); + } /* line_warning - report a warning at a given line, unless -w was given */ @@ -771,15 +761,15 @@ char str[]; void line_warning( str, line ) char str[]; int line; - { - char warning[MAXLINE]; - - if ( ! nowarn ) { - sprintf( warning, "warning, %s", str ); - line_pinpoint( warning, line ); + char warning[MAXLINE]; + + if ( ! nowarn ) + { + sprintf( warning, "warning, %s", str ); + line_pinpoint( warning, line ); + } } - } /* line_pinpoint - write out a message, pinpointing it at the given line */ @@ -787,10 +777,9 @@ int line; void line_pinpoint( str, line ) char str[]; int line; - - { - fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str ); - } + { + fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str ); + } /* yyerror - eat up an error message from the parser; @@ -799,6 +788,5 @@ int line; void yyerror( msg ) char msg[]; - - { - } + { + } diff --git a/scan.l b/scan.l index 88add7b..0566a2b 100644 --- a/scan.l +++ b/scan.l @@ -54,15 +54,15 @@ static char rcsid[] = #define PUT_BACK_STRING(str, start) \ for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \ - unput((str)[i]) + unput((str)[i]) #define CHECK_REJECT(str) \ if ( all_upper( str ) ) \ - reject = true; + reject = true; #define CHECK_YYMORE(str) \ if ( all_lower( str ) ) \ - yymore_used = true; + yymore_used = true; %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE @@ -86,11 +86,11 @@ FIRST_CCL_CHAR [^\\\n]|{ESCSEQ} CCL_CHAR [^\\\n\]]|{ESCSEQ} %% - static int bracelevel, didadef, indented_code, checking_used; + static int bracelevel, didadef, indented_code, checking_used; - int doing_codeblock = false; - int i; - Char nmdef[MAXLINE], myesc(); + int doing_codeblock = false; + int i; + Char nmdef[MAXLINE], myesc(); ^{WS} indented_code = true; BEGIN(CODEBLOCK); @@ -156,7 +156,7 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} ++linenum; ECHO; if ( indented_code ) - BEGIN(INITIAL); + BEGIN(INITIAL); } @@ -165,22 +165,21 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} {NOT_WS}.* { (void) strcpy( (char *) nmdef, (char *) yytext ); - /* skip trailing whitespace */ + /* Skip trailing whitespace. */ for ( i = strlen( (char *) nmdef ) - 1; - i >= 0 && - (nmdef[i] == ' ' || nmdef[i] == '\t'); + i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); --i ) - ; + ; nmdef[i + 1] = '\0'; - ndinstal( nmstr, nmdef ); + ndinstal( nmstr, nmdef ); didadef = true; } {NL} { if ( ! didadef ) - synerr( "incomplete name definition" ); + synerr( "incomplete name definition" ); BEGIN(INITIAL); ++linenum; } @@ -192,15 +191,17 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} {WS} "reject" { if ( all_upper( yytext ) ) - reject_really_used = checking_used; + reject_really_used = checking_used; else - synerr( "unrecognized %used/%unused construct" ); + synerr( + "unrecognized %used/%unused construct" ); } "yymore" { if ( all_lower( yytext ) ) - yymore_really_used = checking_used; + yymore_really_used = checking_used; else - synerr( "unrecognized %used/%unused construct" ); + synerr( + "unrecognized %used/%unused construct" ); } {NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); @@ -224,7 +225,7 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} bracelevel = 1; if ( indented_code ) - ACTION_ECHO; + ACTION_ECHO; BEGIN(CODEBLOCK_2); } @@ -244,9 +245,9 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; {WS} { - /* this rule is separate from the one below because + /* This rule is separate from the one below because * otherwise we get variable trailing context, so - * we can't build the scanner using -{f,F} + * we can't build the scanner using -{f,F}. */ bracelevel = 0; continued_action = false; @@ -275,31 +276,33 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} (void) strcpy( nmstr, (char *) yytext ); - /* check to see if we've already encountered this ccl */ + /* Check to see if we've already encountered this + * ccl. + */ if ( (cclval = ccllookup( (Char *) nmstr )) ) - { - if ( input() != ']' ) - synerr( "bad character class" ); - - yylval = cclval; - ++cclreuse; - return PREVCCL; - } - else - { - /* we fudge a bit. We know that this ccl will - * soon be numbered as lastccl + 1 by cclinit - */ - cclinstal( (Char *) nmstr, lastccl + 1 ); - - /* push back everything but the leading bracket - * so the ccl can be rescanned - */ - yyless( 1 ); + { + if ( input() != ']' ) + synerr( "bad character class" ); - BEGIN(FIRSTCCL); - return '['; - } + yylval = cclval; + ++cclreuse; + return PREVCCL; + } + else + { + /* We fudge a bit. We know that this ccl will + * soon be numbered as lastccl + 1 by cclinit. + */ + cclinstal( (Char *) nmstr, lastccl + 1 ); + + /* Push back everything but the leading bracket + * so the ccl can be rescanned. + */ + yyless( 1 ); + + BEGIN(FIRSTCCL); + return '['; + } } "{"{NAME}"}" { @@ -310,28 +313,29 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ if ( ! (nmdefptr = ndlookup( nmstr )) ) - format_synerr( "undefined definition {%s}", nmstr ); + format_synerr( "undefined definition {%s}", + nmstr ); else - { /* push back name surrounded by ()'s */ - int len = strlen( nmdefptr ); - - if ( nmdefptr[0] == '^' || - (len > 0 && nmdefptr[len - 1] == '$') ) - { - PUT_BACK_STRING(nmdefptr, 0); - - if ( nmdefptr[0] == '^' ) - BEGIN(CARETISBOL); - } - - else - { - unput(')'); - PUT_BACK_STRING(nmdefptr, 0); - unput('('); + { /* push back name surrounded by ()'s */ + int len = strlen( nmdefptr ); + + if ( nmdefptr[0] == '^' || + (len > 0 && nmdefptr[len - 1] == '$') ) + { + PUT_BACK_STRING(nmdefptr, 0); + + if ( nmdefptr[0] == '^' ) + BEGIN(CARETISBOL); + } + + else + { + unput(')'); + PUT_BACK_STRING(nmdefptr, 0); + unput('('); + } } - } } [/|*+?.()] return yytext[0]; @@ -417,13 +421,13 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} ACTION_ECHO; if ( bracelevel == 0 || (doing_codeblock && indented_code) ) - { - if ( ! doing_codeblock ) - add_action( "\tYY_BREAK\n" ); - - doing_codeblock = false; - BEGIN(SECT2); - } + { + if ( ! doing_codeblock ) + add_action( "\tYY_BREAK\n" ); + + doing_codeblock = false; + BEGIN(SECT2); + } } @@ -439,19 +443,19 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} ++linenum; ACTION_ECHO; if ( bracelevel == 0 ) - { - add_action( "\tYY_BREAK\n" ); - BEGIN(SECT2); - } + { + add_action( "\tYY_BREAK\n" ); + BEGIN(SECT2); + } } . ACTION_ECHO; "*/" { ACTION_ECHO; if ( doing_codeblock ) - BEGIN(CODEBLOCK_2); + BEGIN(CODEBLOCK_2); else - BEGIN(ACTION); + BEGIN(ACTION); } "*" ACTION_ECHO; @@ -490,37 +494,35 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} int yywrap() - - { - if ( --num_input_files > 0 ) { - set_input_file( *++input_files ); - return 0; + if ( --num_input_files > 0 ) + { + set_input_file( *++input_files ); + return 0; + } + + else + return 1; } - else - return 1; - } - /* set_input_file - open the given file (if NULL, stdin) for scanning */ void set_input_file( file ) char *file; - - { - if ( file ) - { - infilename = file; - yyin = fopen( infilename, "r" ); - - if ( yyin == NULL ) - lerrsf( "can't open %s", file ); - } - - else { - yyin = stdin; - infilename = ""; + if ( file ) + { + infilename = file; + yyin = fopen( infilename, "r" ); + + if ( yyin == NULL ) + lerrsf( "can't open %s", file ); + } + + else + { + yyin = stdin; + infilename = ""; + } } - } diff --git a/sym.c b/sym.c index 3ab3890..1558cf4 100644 --- a/sym.c +++ b/sym.c @@ -47,13 +47,6 @@ struct hash_entry *findsym(); /* addsym - add symbol and definitions to symbol table - * - * synopsis - * char sym[], *str_def; - * int int_def; - * hash_table table; - * int table_size; - * 0 / -1 = addsym( sym, def, int_def, table, table_size ); * * -1 is returned if the symbol already exists, and the change not made. */ @@ -64,263 +57,204 @@ char *str_def; int int_def; hash_table table; int table_size; - - { - int hash_val = hashfunct( sym, table_size ); - register struct hash_entry *sym_entry = table[hash_val]; - register struct hash_entry *new_entry; - register struct hash_entry *successor; - - while ( sym_entry ) { - if ( ! strcmp( sym, sym_entry->name ) ) - { /* entry already exists */ - return ( -1 ); - } - - sym_entry = sym_entry->next; + int hash_val = hashfunct( sym, table_size ); + register struct hash_entry *sym_entry = table[hash_val]; + register struct hash_entry *new_entry; + register struct hash_entry *successor; + + while ( sym_entry ) + { + if ( ! strcmp( sym, sym_entry->name ) ) + { /* entry already exists */ + return -1; + } + + sym_entry = sym_entry->next; + } + + /* create new entry */ + new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); + + if ( new_entry == NULL ) + flexfatal( "symbol table memory allocation failed" ); + + if ( (successor = table[hash_val]) ) + { + new_entry->next = successor; + successor->prev = new_entry; + } + else + new_entry->next = NULL; + + new_entry->prev = NULL; + new_entry->name = sym; + new_entry->str_val = str_def; + new_entry->int_val = int_def; + + table[hash_val] = new_entry; + + return 0; } - /* create new entry */ - new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); - - if ( new_entry == NULL ) - flexfatal( "symbol table memory allocation failed" ); - - if ( (successor = table[hash_val]) ) - { - new_entry->next = successor; - successor->prev = new_entry; - } - else - new_entry->next = NULL; - new_entry->prev = NULL; - new_entry->name = sym; - new_entry->str_val = str_def; - new_entry->int_val = int_def; - - table[hash_val] = new_entry; - - return ( 0 ); - } - - -/* cclinstal - save the text of a character class - * - * synopsis - * Char ccltxt[]; - * int cclnum; - * cclinstal( ccltxt, cclnum ); - */ +/* cclinstal - save the text of a character class */ void cclinstal( ccltxt, cclnum ) Char ccltxt[]; int cclnum; - - { - /* we don't bother checking the return status because we are not called - * unless the symbol is new - */ - Char *copy_unsigned_string(); - - (void) addsym( (char *) copy_unsigned_string( ccltxt ), (char *) 0, cclnum, - ccltab, CCL_HASH_SIZE ); - } + { + /* We don't bother checking the return status because we are not + * called unless the symbol is new. + */ + Char *copy_unsigned_string(); + + (void) addsym( (char *) copy_unsigned_string( ccltxt ), + (char *) 0, cclnum, + ccltab, CCL_HASH_SIZE ); + } /* ccllookup - lookup the number associated with character class text * - * synopsis - * Char ccltxt[]; - * int ccllookup, cclval; - * cclval/0 = ccllookup( ccltxt ); + * Returns 0 if there's no CCL associated with the text. */ int ccllookup( ccltxt ) Char ccltxt[]; + { + return findsym( (char *) ccltxt, ccltab, CCL_HASH_SIZE )->int_val; + } - { - return ( findsym( (char *) ccltxt, ccltab, CCL_HASH_SIZE )->int_val ); - } - -/* findsym - find symbol in symbol table - * - * synopsis - * char sym[]; - * hash_table table; - * int table_size; - * struct hash_entry *sym_entry, *findsym(); - * sym_entry = findsym( sym, table, table_size ); - */ +/* findsym - find symbol in symbol table */ struct hash_entry *findsym( sym, table, table_size ) register char sym[]; hash_table table; int table_size; - - { - register struct hash_entry *sym_entry = table[hashfunct( sym, table_size )]; - static struct hash_entry empty_entry = { - (struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0, - } ; - - while ( sym_entry ) - { - if ( ! strcmp( sym, sym_entry->name ) ) - return ( sym_entry ); - sym_entry = sym_entry->next; + static struct hash_entry empty_entry = + { + (struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0, + } ; + register struct hash_entry *sym_entry = + table[hashfunct( sym, table_size )]; + + while ( sym_entry ) + { + if ( ! strcmp( sym, sym_entry->name ) ) + return sym_entry; + sym_entry = sym_entry->next; + } + + return &empty_entry; } - return ( &empty_entry ); - } - -/* hashfunct - compute the hash value for "str" and hash size "hash_size" - * - * synopsis - * char str[]; - * int hash_size, hash_val; - * hash_val = hashfunct( str, hash_size ); - */ +/* hashfunct - compute the hash value for "str" and hash size "hash_size" */ int hashfunct( str, hash_size ) register char str[]; int hash_size; + { + register int hashval; + register int locstr; - { - register int hashval; - register int locstr; - - hashval = 0; - locstr = 0; + hashval = 0; + locstr = 0; - while ( str[locstr] ) - hashval = ((hashval << 1) + (unsigned char) str[locstr++]) % hash_size; + while ( str[locstr] ) + hashval = ((hashval << 1) + (unsigned char) str[locstr++]) % + hash_size; - return ( hashval ); - } + return hashval; + } -/* ndinstal - install a name definition - * - * synopsis - * char nd[]; - * Char def[]; - * ndinstal( nd, def ); - */ +/* ndinstal - install a name definition */ void ndinstal( nd, def ) char nd[]; Char def[]; + { + char *copy_string(); + Char *copy_unsigned_string(); - { - char *copy_string(); - Char *copy_unsigned_string(); - - if ( addsym( copy_string( nd ), (char *) copy_unsigned_string( def ), 0, - ndtbl, NAME_TABLE_HASH_SIZE ) ) - synerr( "name defined twice" ); - } + if ( addsym( copy_string( nd ), (char *) copy_unsigned_string( def ), 0, + ndtbl, NAME_TABLE_HASH_SIZE ) ) + synerr( "name defined twice" ); + } /* ndlookup - lookup a name definition * - * synopsis - * char nd[], *def; - * char *ndlookup(); - * def/NULL = ndlookup( nd ); + * Returns a nil pointer if the name definition does not exist. */ Char *ndlookup( nd ) char nd[]; - - { - return ( (Char *) findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val ); - } + { + return (Char *) findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val; + } -/* scextend - increase the maximum number of start conditions - * - * synopsis - * scextend(); - */ +/* scextend - increase the maximum number of start conditions */ void scextend() + { + current_max_scs += MAX_SCS_INCREMENT; - { - current_max_scs += MAX_SCS_INCREMENT; - - ++num_reallocs; + ++num_reallocs; - scset = reallocate_integer_array( scset, current_max_scs ); - scbol = reallocate_integer_array( scbol, current_max_scs ); - scxclu = reallocate_integer_array( scxclu, current_max_scs ); - sceof = reallocate_integer_array( sceof, current_max_scs ); - scname = reallocate_char_ptr_array( scname, current_max_scs ); - actvsc = reallocate_integer_array( actvsc, current_max_scs ); - } + scset = reallocate_integer_array( scset, current_max_scs ); + scbol = reallocate_integer_array( scbol, current_max_scs ); + scxclu = reallocate_integer_array( scxclu, current_max_scs ); + sceof = reallocate_integer_array( sceof, current_max_scs ); + scname = reallocate_char_ptr_array( scname, current_max_scs ); + actvsc = reallocate_integer_array( actvsc, current_max_scs ); + } /* scinstal - make a start condition - * - * synopsis - * char str[]; - * int xcluflg; - * scinstal( str, xcluflg ); * * NOTE - * the start condition is Exclusive if xcluflg is true + * The start condition is "exclusive" if xcluflg is true. */ void scinstal( str, xcluflg ) char str[]; int xcluflg; + { + char *copy_string(); - { - char *copy_string(); - - /* bit of a hack. We know how the default start-condition is - * declared, and don't put out a define for it, because it - * would come out as "#define 0 1" - */ - /* actually, this is no longer the case. The default start-condition - * is now called "INITIAL". But we keep the following for the sake - * of future robustness. - */ - - if ( strcmp( str, "0" ) ) + /* Generate start condition definition, for use in BEGIN et al. */ printf( "#define %s %d\n", str, lastsc ); - if ( ++lastsc >= current_max_scs ) - scextend(); + if ( ++lastsc >= current_max_scs ) + scextend(); - scname[lastsc] = copy_string( str ); + scname[lastsc] = copy_string( str ); - if ( addsym( scname[lastsc], (char *) 0, lastsc, - sctbl, START_COND_HASH_SIZE ) ) - format_pinpoint_message( "start condition %s declared twice", str ); + if ( addsym( scname[lastsc], (char *) 0, lastsc, + sctbl, START_COND_HASH_SIZE ) ) + format_pinpoint_message( "start condition %s declared twice", + str ); - scset[lastsc] = mkstate( SYM_EPSILON ); - scbol[lastsc] = mkstate( SYM_EPSILON ); - scxclu[lastsc] = xcluflg; - sceof[lastsc] = false; - } + scset[lastsc] = mkstate( SYM_EPSILON ); + scbol[lastsc] = mkstate( SYM_EPSILON ); + scxclu[lastsc] = xcluflg; + sceof[lastsc] = false; + } /* sclookup - lookup the number associated with a start condition * - * synopsis - * char str[], scnum; - * int sclookup; - * scnum/0 = sclookup( str ); + * Returns 0 if no such start condition. */ int sclookup( str ) char str[]; - - { - return ( findsym( str, sctbl, START_COND_HASH_SIZE )->int_val ); - } + { + return findsym( str, sctbl, START_COND_HASH_SIZE )->int_val; + } diff --git a/tblcmp.c b/tblcmp.c index 7e4f74a..26e7cb6 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -80,237 +80,243 @@ int tbldiff PROTO((int[], int, int[])); void bldtbl( state, statenum, totaltrans, comstate, comfreq ) int state[], statenum, totaltrans, comstate, comfreq; + { + int extptr, extrct[2][CSIZE + 1]; + int mindiff, minprot, i, d; + + /* If extptr is 0 then the first array of extrct holds the result + * of the "best difference" to date, which is those transitions + * which occur in "state" but not in the proto which, to date, + * has the fewest differences between itself and "state". If + * extptr is 1 then the second array of extrct hold the best + * difference. The two arrays are toggled between so that the + * best difference to date can be kept around and also a difference + * just created by checking against a candidate "best" proto. + */ - { - int extptr, extrct[2][CSIZE + 1]; - int mindiff, minprot, i, d; - int checkcom; + extptr = 0; - /* If extptr is 0 then the first array of extrct holds the result of the - * "best difference" to date, which is those transitions which occur in - * "state" but not in the proto which, to date, has the fewest differences - * between itself and "state". If extptr is 1 then the second array of - * extrct hold the best difference. The two arrays are toggled - * between so that the best difference to date can be kept around and - * also a difference just created by checking against a candidate "best" - * proto. - */ + /* If the state has too few out-transitions, don't bother trying to + * compact its tables. + */ - extptr = 0; + if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); - /* if the state has too few out-transitions, don't bother trying to - * compact its tables - */ + else + { + /* "checkcom" is true if we should only check "state" against + * protos which have the same "comstate" value. + */ + int checkcom = + comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; - if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) - mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + minprot = firstprot; + mindiff = totaltrans; + + if ( checkcom ) + { + /* Find first proto which has the same "comstate". */ + for ( i = firstprot; i != NIL; i = protnext[i] ) + if ( protcomst[i] == comstate ) + { + minprot = i; + mindiff = tbldiff( state, minprot, + extrct[extptr] ); + break; + } + } - else - { - /* checkcom is true if we should only check "state" against - * protos which have the same "comstate" value - */ + else + { + /* Since we've decided that the most common destination + * out of "state" does not occur with a high enough + * frequency, we set the "comstate" to zero, assuring + * that if this state is entered into the proto list, + * it will not be considered a template. + */ + comstate = 0; + + if ( firstprot != NIL ) + { + minprot = firstprot; + mindiff = tbldiff( state, minprot, + extrct[extptr] ); + } + } - checkcom = comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; + /* We now have the first interesting proto in "minprot". If + * it matches within the tolerances set for the first proto, + * we don't want to bother scanning the rest of the proto list + * to see if we have any other reasonable matches. + */ - minprot = firstprot; - mindiff = totaltrans; + if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) + { + /* Not a good enough match. Scan the rest of the + * protos. + */ + for ( i = minprot; i != NIL; i = protnext[i] ) + { + d = tbldiff( state, i, extrct[1 - extptr] ); + if ( d < mindiff ) + { + extptr = 1 - extptr; + mindiff = d; + minprot = i; + } + } + } - if ( checkcom ) - { - /* find first proto which has the same "comstate" */ - for ( i = firstprot; i != NIL; i = protnext[i] ) - if ( protcomst[i] == comstate ) - { - minprot = i; - mindiff = tbldiff( state, minprot, extrct[extptr] ); - break; - } - } + /* Check if the proto we've decided on as our best bet is close + * enough to the state we want to match to be usable. + */ - else - { - /* since we've decided that the most common destination out - * of "state" does not occur with a high enough frequency, - * we set the "comstate" to zero, assuring that if this state - * is entered into the proto list, it will not be considered - * a template. - */ - comstate = 0; - - if ( firstprot != NIL ) - { - minprot = firstprot; - mindiff = tbldiff( state, minprot, extrct[extptr] ); - } - } + if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) + { + /* No good. If the state is homogeneous enough, + * we make a template out of it. Otherwise, we + * make a proto. + */ - /* we now have the first interesting proto in "minprot". If - * it matches within the tolerances set for the first proto, - * we don't want to bother scanning the rest of the proto list - * to see if we have any other reasonable matches. - */ + if ( comfreq * 100 >= + totaltrans * TEMPLATE_SAME_PERCENTAGE ) + mktemplate( state, statenum, comstate ); - if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) - { /* not a good enough match. Scan the rest of the protos */ - for ( i = minprot; i != NIL; i = protnext[i] ) - { - d = tbldiff( state, i, extrct[1 - extptr] ); - if ( d < mindiff ) - { - extptr = 1 - extptr; - mindiff = d; - minprot = i; - } - } - } + else + { + mkprot( state, statenum, comstate ); + mkentry( state, numecs, statenum, + JAMSTATE, totaltrans ); + } + } - /* check if the proto we've decided on as our best bet is close - * enough to the state we want to match to be usable - */ + else + { /* use the proto */ + mkentry( extrct[extptr], numecs, statenum, + prottbl[minprot], mindiff ); - if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) - { - /* no good. If the state is homogeneous enough, we make a - * template out of it. Otherwise, we make a proto. - */ + /* If this state was sufficiently different from the + * proto we built it from, make it, too, a proto. + */ - if ( comfreq * 100 >= totaltrans * TEMPLATE_SAME_PERCENTAGE ) - mktemplate( state, statenum, comstate ); + if ( mindiff * 100 >= + totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) + mkprot( state, statenum, comstate ); + + /* Since mkprot added a new proto to the proto queue, + * it's possible that "minprot" is no longer on the + * proto queue (if it happened to have been the last + * entry, it would have been bumped off). If it's + * not there, then the new proto took its physical + * place (though logically the new proto is at the + * beginning of the queue), so in that case the + * following call will do nothing. + */ - else - { - mkprot( state, statenum, comstate ); - mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + mv2front( minprot ); + } } - } - - else - { /* use the proto */ - mkentry( extrct[extptr], numecs, statenum, - prottbl[minprot], mindiff ); - - /* if this state was sufficiently different from the proto - * we built it from, make it, too, a proto - */ - - if ( mindiff * 100 >= totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) - mkprot( state, statenum, comstate ); - - /* since mkprot added a new proto to the proto queue, it's possible - * that "minprot" is no longer on the proto queue (if it happened - * to have been the last entry, it would have been bumped off). - * If it's not there, then the new proto took its physical place - * (though logically the new proto is at the beginning of the - * queue), so in that case the following call will do nothing. - */ - - mv2front( minprot ); - } } - } /* cmptmps - compress template table entries * - * synopsis - * cmptmps(); - * - * template tables are compressed by using the 'template equivalence - * classes', which are collections of transition character equivalence - * classes which always appear together in templates - really meta-equivalence - * classes. until this point, the tables for templates have been stored - * up at the top end of the nxt array; they will now be compressed and have - * table entries made for them. + * Template tables are compressed by using the 'template equivalence + * classes', which are collections of transition character equivalence + * classes which always appear together in templates - really meta-equivalence + * classes. until this point, the tables for templates have been stored + * up at the top end of the nxt array; they will now be compressed and have + * table entries made for them. */ void cmptmps() - - { - int tmpstorage[CSIZE + 1]; - register int *tmp = tmpstorage, i, j; - int totaltrans, trans; - - peakpairs = numtemps * numecs + tblend; - - if ( usemecs ) { - /* create equivalence classes based on data gathered on template - * transitions - */ + int tmpstorage[CSIZE + 1]; + register int *tmp = tmpstorage, i, j; + int totaltrans, trans; - nummecs = cre8ecs( tecfwd, tecbck, numecs ); - } - - else - nummecs = numecs; + peakpairs = numtemps * numecs + tblend; - while ( lastdfa + numtemps + 1 >= current_max_dfas ) - increase_max_dfas(); + if ( usemecs ) + { + /* Create equivalence classes based on data gathered on + * template transitions. + */ + nummecs = cre8ecs( tecfwd, tecbck, numecs ); + } - /* loop through each template */ + else + nummecs = numecs; - for ( i = 1; i <= numtemps; ++i ) - { - totaltrans = 0; /* number of non-jam transitions out of this template */ + while ( lastdfa + numtemps + 1 >= current_max_dfas ) + increase_max_dfas(); - for ( j = 1; j <= numecs; ++j ) - { - trans = tnxt[numecs * i + j]; + /* Loop through each template. */ - if ( usemecs ) + for ( i = 1; i <= numtemps; ++i ) { - /* the absolute value of tecbck is the meta-equivalence class - * of a given equivalence class, as set up by cre8ecs - */ - if ( tecbck[j] > 0 ) - { - tmp[tecbck[j]] = trans; - - if ( trans > 0 ) - ++totaltrans; - } - } + /* Number of non-jam transitions out of this template. */ + totaltrans = 0; + + for ( j = 1; j <= numecs; ++j ) + { + trans = tnxt[numecs * i + j]; + + if ( usemecs ) + { + /* The absolute value of tecbck is the + * meta-equivalence class of a given + * equivalence class, as set up by cre8ecs(). + */ + if ( tecbck[j] > 0 ) + { + tmp[tecbck[j]] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + else + { + tmp[j] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } - else - { - tmp[j] = trans; + /* It is assumed (in a rather subtle way) in the skeleton + * that if we're using meta-equivalence classes, the def[] + * entry for all templates is the jam template, i.e., + * templates never default to other non-jam table entries + * (e.g., another template) + */ - if ( trans > 0 ) - ++totaltrans; + /* Leave room for the jam-state after the last real state. */ + mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); } - } - - /* it is assumed (in a rather subtle way) in the skeleton that - * if we're using meta-equivalence classes, the def[] entry for - * all templates is the jam template, i.e., templates never default - * to other non-jam table entries (e.g., another template) - */ - - /* leave room for the jam-state after the last real state */ - mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); } - } /* expand_nxt_chk - expand the next check arrays */ void expand_nxt_chk() + { + register int old_max = current_max_xpairs; - { - register int old_max = current_max_xpairs; - - current_max_xpairs += MAX_XPAIRS_INCREMENT; + current_max_xpairs += MAX_XPAIRS_INCREMENT; - ++num_reallocs; + ++num_reallocs; - nxt = reallocate_integer_array( nxt, current_max_xpairs ); - chk = reallocate_integer_array( chk, current_max_xpairs ); + nxt = reallocate_integer_array( nxt, current_max_xpairs ); + chk = reallocate_integer_array( chk, current_max_xpairs ); - zero_out( (char *) (chk + old_max), - MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); - } + zero_out( (char *) (chk + old_max), + MAX_XPAIRS_INCREMENT * sizeof( int ) / sizeof( char ) ); + } /* find_table_space - finds a space in the table for a state to be placed @@ -334,96 +340,99 @@ void expand_nxt_chk() int find_table_space( state, numtrans ) int *state, numtrans; - - { - /* firstfree is the position of the first possible occurrence of two - * consecutive unused records in the chk and nxt arrays - */ - register int i; - register int *state_ptr, *chk_ptr; - register int *ptr_to_last_entry_in_state; - - /* if there are too many out-transitions, put the state at the end of - * nxt and chk - */ - if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) { - /* if table is empty, return the first available spot in chk/nxt, - * which should be 1 + /* Firstfree is the position of the first possible occurrence of two + * consecutive unused records in the chk and nxt arrays. */ - if ( tblend < 2 ) - return ( 1 ); - - i = tblend - numecs; /* start searching for table space near the - * end of chk/nxt arrays - */ - } - - else - i = firstfree; /* start searching for table space from the - * beginning (skipping only the elements - * which will definitely not hold the new - * state) - */ + register int i; + register int *state_ptr, *chk_ptr; + register int *ptr_to_last_entry_in_state; - while ( 1 ) /* loops until a space is found */ - { - while ( i + numecs >= current_max_xpairs ) - expand_nxt_chk(); - - /* loops until space for end-of-buffer and action number are found */ - while ( 1 ) - { - if ( chk[i - 1] == 0 ) /* check for action number space */ + /* If there are too many out-transitions, put the state at the end of + * nxt and chk. + */ + if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) { - if ( chk[i] == 0 ) /* check for end-of-buffer space */ - break; + /* If table is empty, return the first available spot in + * chk/nxt, which should be 1. + */ + if ( tblend < 2 ) + return 1; - else - i += 2; /* since i != 0, there is no use checking to - * see if (++i) - 1 == 0, because that's the - * same as i == 0, so we skip a space - */ + /* Start searching for table space near the end of + * chk/nxt arrays. + */ + i = tblend - numecs; } - else - ++i; + else + /* Start searching for table space from the beginning + * (skipping only the elements which will definitely not + * hold the new state). + */ + i = firstfree; - while ( i + numecs >= current_max_xpairs ) - expand_nxt_chk(); - } + while ( 1 ) /* loops until a space is found */ + { + while ( i + numecs >= current_max_xpairs ) + expand_nxt_chk(); - /* if we started search from the beginning, store the new firstfree for - * the next call of find_table_space() - */ - if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) - firstfree = i + 1; + /* Loops until space for end-of-buffer and action number + * are found. + */ + while ( 1 ) + { + /* Check for action number space. */ + if ( chk[i - 1] == 0 ) + { + /* Check for end-of-buffer space. */ + if ( chk[i] == 0 ) + break; + + else + /* Since i != 0, there is no use + * checking to see if (++i) - 1 == 0, + * because that's the same as i == 0, + * so we skip a space. + */ + i += 2; + } + + else + ++i; + + while ( i + numecs >= current_max_xpairs ) + expand_nxt_chk(); + } - /* check to see if all elements in chk (and therefore nxt) that are - * needed for the new state have not yet been taken - */ + /* If we started search from the beginning, store the new + * firstfree for the next call of find_table_space(). + */ + if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) + firstfree = i + 1; - state_ptr = &state[1]; - ptr_to_last_entry_in_state = &chk[i + numecs + 1]; + /* Check to see if all elements in chk (and therefore nxt) + * that are needed for the new state have not yet been taken. + */ - for ( chk_ptr = &chk[i + 1]; chk_ptr != ptr_to_last_entry_in_state; - ++chk_ptr ) - if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) - break; + state_ptr = &state[1]; + ptr_to_last_entry_in_state = &chk[i + numecs + 1]; - if ( chk_ptr == ptr_to_last_entry_in_state ) - return ( i ); + for ( chk_ptr = &chk[i + 1]; + chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr ) + if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) + break; - else - ++i; + if ( chk_ptr == ptr_to_last_entry_in_state ) + return i; + + else + ++i; + } } - } /* inittbl - initialize transition tables - * - * synopsis - * inittbl(); * * Initializes "firstfree" to be one beyond the end of the table. Initializes * all "chk" entries to be zero. Note that templates are built in their @@ -431,73 +440,67 @@ int *state, numtrans; * with the non-template entries during table generation. */ void inittbl() + { + register int i; - { - register int i; - - zero_out( (char *) chk, - current_max_xpairs * sizeof( int ) / sizeof( char ) ); + zero_out( (char *) chk, + current_max_xpairs * sizeof( int ) / sizeof( char ) ); - tblend = 0; - firstfree = tblend + 1; - numtemps = 0; + tblend = 0; + firstfree = tblend + 1; + numtemps = 0; - if ( usemecs ) - { - /* set up doubly-linked meta-equivalence classes - * these are sets of equivalence classes which all have identical - * transitions out of TEMPLATES - */ + if ( usemecs ) + { + /* Set up doubly-linked meta-equivalence classes; these + * are sets of equivalence classes which all have identical + * transitions out of TEMPLATES. + */ - tecbck[1] = NIL; + tecbck[1] = NIL; - for ( i = 2; i <= numecs; ++i ) - { - tecbck[i] = i - 1; - tecfwd[i - 1] = i; - } + for ( i = 2; i <= numecs; ++i ) + { + tecbck[i] = i - 1; + tecfwd[i - 1] = i; + } - tecfwd[numecs] = NIL; + tecfwd[numecs] = NIL; + } } - } -/* mkdeftbl - make the default, "jam" table entries - * - * synopsis - * mkdeftbl(); - */ +/* mkdeftbl - make the default, "jam" table entries */ void mkdeftbl() + { + int i; - { - int i; - - jamstate = lastdfa + 1; + jamstate = lastdfa + 1; - ++tblend; /* room for transition on end-of-buffer character */ + ++tblend; /* room for transition on end-of-buffer character */ - while ( tblend + numecs >= current_max_xpairs ) - expand_nxt_chk(); + while ( tblend + numecs >= current_max_xpairs ) + expand_nxt_chk(); - /* add in default end-of-buffer transition */ - nxt[tblend] = end_of_buffer_state; - chk[tblend] = jamstate; + /* Add in default end-of-buffer transition. */ + nxt[tblend] = end_of_buffer_state; + chk[tblend] = jamstate; - for ( i = 1; i <= numecs; ++i ) - { - nxt[tblend + i] = 0; - chk[tblend + i] = jamstate; - } + for ( i = 1; i <= numecs; ++i ) + { + nxt[tblend + i] = 0; + chk[tblend + i] = jamstate; + } - jambase = tblend; + jambase = tblend; - base[jamstate] = jambase; - def[jamstate] = 0; + base[jamstate] = jambase; + def[jamstate] = 0; - tblend += numecs; - ++numtemps; - } + tblend += numecs; + ++numtemps; + } /* mkentry - create base/def and nxt/chk entries for transition array @@ -522,299 +525,273 @@ void mkdeftbl() void mkentry( state, numchars, statenum, deflink, totaltrans ) register int *state; int numchars, statenum, deflink, totaltrans; + { + register int minec, maxec, i, baseaddr; + int tblbase, tbllast; - { - register int minec, maxec, i, baseaddr; - int tblbase, tbllast; + if ( totaltrans == 0 ) + { /* there are no out-transitions */ + if ( deflink == JAMSTATE ) + base[statenum] = JAMSTATE; + else + base[statenum] = 0; - if ( totaltrans == 0 ) - { /* there are no out-transitions */ - if ( deflink == JAMSTATE ) - base[statenum] = JAMSTATE; - else - base[statenum] = 0; + def[statenum] = deflink; + return; + } - def[statenum] = deflink; - return; - } + for ( minec = 1; minec <= numchars; ++minec ) + { + if ( state[minec] != SAME_TRANS ) + if ( state[minec] != 0 || deflink != JAMSTATE ) + break; + } - for ( minec = 1; minec <= numchars; ++minec ) - { - if ( state[minec] != SAME_TRANS ) - if ( state[minec] != 0 || deflink != JAMSTATE ) - break; - } + if ( totaltrans == 1 ) + { + /* There's only one out-transition. Save it for later to fill + * in holes in the tables. + */ + stack1( statenum, minec, state[minec], deflink ); + return; + } - if ( totaltrans == 1 ) - { - /* there's only one out-transition. Save it for later to fill - * in holes in the tables. - */ - stack1( statenum, minec, state[minec], deflink ); - return; - } + for ( maxec = numchars; maxec > 0; --maxec ) + { + if ( state[maxec] != SAME_TRANS ) + if ( state[maxec] != 0 || deflink != JAMSTATE ) + break; + } - for ( maxec = numchars; maxec > 0; --maxec ) - { - if ( state[maxec] != SAME_TRANS ) - if ( state[maxec] != 0 || deflink != JAMSTATE ) - break; - } + /* Whether we try to fit the state table in the middle of the table + * entries we have already generated, or if we just take the state + * table at the end of the nxt/chk tables, we must make sure that we + * have a valid base address (i.e., non-negative). Note that not + * only are negative base addresses dangerous at run-time (because + * indexing the next array with one and a low-valued character might + * generate an array-out-of-bounds error message), but at compile-time + * negative base addresses denote TEMPLATES. + */ - /* Whether we try to fit the state table in the middle of the table - * entries we have already generated, or if we just take the state - * table at the end of the nxt/chk tables, we must make sure that we - * have a valid base address (i.e., non-negative). Note that not only are - * negative base addresses dangerous at run-time (because indexing the - * next array with one and a low-valued character might generate an - * array-out-of-bounds error message), but at compile-time negative - * base addresses denote TEMPLATES. - */ - - /* find the first transition of state that we need to worry about. */ - if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) - { /* attempt to squeeze it into the middle of the tabls */ - baseaddr = firstfree; - - while ( baseaddr < minec ) - { - /* using baseaddr would result in a negative base address below - * find the next free slot - */ - for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) - ; - } - - while ( baseaddr + maxec - minec + 1 >= current_max_xpairs ) - expand_nxt_chk(); + /* Find the first transition of state that we need to worry about. */ + if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) + { + /* Attempt to squeeze it into the middle of the tables. */ + baseaddr = firstfree; - for ( i = minec; i <= maxec; ++i ) - if ( state[i] != SAME_TRANS ) - if ( state[i] != 0 || deflink != JAMSTATE ) - if ( chk[baseaddr + i - minec] != 0 ) - { /* baseaddr unsuitable - find another */ - for ( ++baseaddr; - baseaddr < current_max_xpairs && - chk[baseaddr] != 0; - ++baseaddr ) - ; - - while ( baseaddr + maxec - minec + 1 >= - current_max_xpairs ) - expand_nxt_chk(); - - /* reset the loop counter so we'll start all - * over again next time it's incremented + while ( baseaddr < minec ) + { + /* Using baseaddr would result in a negative base + * address below; find the next free slot. */ - - i = minec - 1; + for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) + ; } - } - else - { - /* ensure that the base address we eventually generate is - * non-negative - */ - baseaddr = max( tblend + 1, minec ); - } - - tblbase = baseaddr - minec; - tbllast = tblbase + maxec; - - while ( tbllast + 1 >= current_max_xpairs ) - expand_nxt_chk(); + while ( baseaddr + maxec - minec + 1 >= current_max_xpairs ) + expand_nxt_chk(); + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS && + (state[i] != 0 || deflink != JAMSTATE) && + chk[baseaddr + i - minec] != 0 ) + { /* baseaddr unsuitable - find another */ + for ( ++baseaddr; + baseaddr < current_max_xpairs && + chk[baseaddr] != 0; ++baseaddr ) + ; + + while ( baseaddr + maxec - minec + 1 >= + current_max_xpairs ) + expand_nxt_chk(); + + /* Reset the loop counter so we'll start all + * over again next time it's incremented. + */ - base[statenum] = tblbase; - def[statenum] = deflink; + i = minec - 1; + } + } - for ( i = minec; i <= maxec; ++i ) - if ( state[i] != SAME_TRANS ) - if ( state[i] != 0 || deflink != JAMSTATE ) + else { - nxt[tblbase + i] = state[i]; - chk[tblbase + i] = statenum; + /* Ensure that the base address we eventually generate is + * non-negative. + */ + baseaddr = max( tblend + 1, minec ); } - if ( baseaddr == firstfree ) - /* find next free slot in tables */ - for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) - ; + tblbase = baseaddr - minec; + tbllast = tblbase + maxec; - tblend = max( tblend, tbllast ); - } + while ( tbllast + 1 >= current_max_xpairs ) + expand_nxt_chk(); + + base[statenum] = tblbase; + def[statenum] = deflink; + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + { + nxt[tblbase + i] = state[i]; + chk[tblbase + i] = statenum; + } + + if ( baseaddr == firstfree ) + /* Find next free slot in tables. */ + for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) + ; + + tblend = max( tblend, tbllast ); + } /* mk1tbl - create table entries for a state (or state fragment) which * has only one out-transition - * - * synopsis - * int state, sym, onenxt, onedef; - * mk1tbl( state, sym, onenxt, onedef ); */ void mk1tbl( state, sym, onenxt, onedef ) int state, sym, onenxt, onedef; + { + if ( firstfree < sym ) + firstfree = sym; - { - if ( firstfree < sym ) - firstfree = sym; - - while ( chk[firstfree] != 0 ) - if ( ++firstfree >= current_max_xpairs ) - expand_nxt_chk(); + while ( chk[firstfree] != 0 ) + if ( ++firstfree >= current_max_xpairs ) + expand_nxt_chk(); - base[state] = firstfree - sym; - def[state] = onedef; - chk[firstfree] = state; - nxt[firstfree] = onenxt; + base[state] = firstfree - sym; + def[state] = onedef; + chk[firstfree] = state; + nxt[firstfree] = onenxt; - if ( firstfree > tblend ) - { - tblend = firstfree++; + if ( firstfree > tblend ) + { + tblend = firstfree++; - if ( firstfree >= current_max_xpairs ) - expand_nxt_chk(); + if ( firstfree >= current_max_xpairs ) + expand_nxt_chk(); + } } - } -/* mkprot - create new proto entry - * - * synopsis - * int state[], statenum, comstate; - * mkprot( state, statenum, comstate ); - */ +/* mkprot - create new proto entry */ void mkprot( state, statenum, comstate ) int state[], statenum, comstate; - - { - int i, slot, tblbase; - - if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) { - /* gotta make room for the new proto by dropping last entry in - * the queue - */ - slot = lastprot; - lastprot = protprev[lastprot]; - protnext[lastprot] = NIL; - } + int i, slot, tblbase; - else - slot = numprots; + if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) + { + /* Gotta make room for the new proto by dropping last entry in + * the queue. + */ + slot = lastprot; + lastprot = protprev[lastprot]; + protnext[lastprot] = NIL; + } - protnext[slot] = firstprot; + else + slot = numprots; - if ( firstprot != NIL ) - protprev[firstprot] = slot; + protnext[slot] = firstprot; - firstprot = slot; - prottbl[slot] = statenum; - protcomst[slot] = comstate; + if ( firstprot != NIL ) + protprev[firstprot] = slot; - /* copy state into save area so it can be compared with rapidly */ - tblbase = numecs * (slot - 1); + firstprot = slot; + prottbl[slot] = statenum; + protcomst[slot] = comstate; - for ( i = 1; i <= numecs; ++i ) - protsave[tblbase + i] = state[i]; - } + /* Copy state into save area so it can be compared with rapidly. */ + tblbase = numecs * (slot - 1); + + for ( i = 1; i <= numecs; ++i ) + protsave[tblbase + i] = state[i]; + } /* mktemplate - create a template entry based on a state, and connect the state * to it - * - * synopsis - * int state[], statenum, comstate, totaltrans; - * mktemplate( state, statenum, comstate, totaltrans ); */ void mktemplate( state, statenum, comstate ) int state[], statenum, comstate; + { + int i, numdiff, tmpbase, tmp[CSIZE + 1]; + Char transset[CSIZE + 1]; + int tsptr; - { - int i, numdiff, tmpbase, tmp[CSIZE + 1]; - Char transset[CSIZE + 1]; - int tsptr; - - ++numtemps; + ++numtemps; - tsptr = 0; + tsptr = 0; - /* calculate where we will temporarily store the transition table - * of the template in the tnxt[] array. The final transition table - * gets created by cmptmps() - */ + /* Calculate where we will temporarily store the transition table + * of the template in the tnxt[] array. The final transition table + * gets created by cmptmps(). + */ - tmpbase = numtemps * numecs; + tmpbase = numtemps * numecs; - if ( tmpbase + numecs >= current_max_template_xpairs ) - { - current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; + if ( tmpbase + numecs >= current_max_template_xpairs ) + { + current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; - ++num_reallocs; + ++num_reallocs; - tnxt = reallocate_integer_array( tnxt, current_max_template_xpairs ); - } + tnxt = reallocate_integer_array( tnxt, + current_max_template_xpairs ); + } - for ( i = 1; i <= numecs; ++i ) - if ( state[i] == 0 ) - tnxt[tmpbase + i] = 0; - else - { - transset[tsptr++] = i; - tnxt[tmpbase + i] = comstate; - } + for ( i = 1; i <= numecs; ++i ) + if ( state[i] == 0 ) + tnxt[tmpbase + i] = 0; + else + { + transset[tsptr++] = i; + tnxt[tmpbase + i] = comstate; + } - if ( usemecs ) - mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); + if ( usemecs ) + mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); - mkprot( tnxt + tmpbase, -numtemps, comstate ); + mkprot( tnxt + tmpbase, -numtemps, comstate ); - /* we rely on the fact that mkprot adds things to the beginning - * of the proto queue - */ + /* We rely on the fact that mkprot adds things to the beginning + * of the proto queue. + */ - numdiff = tbldiff( state, firstprot, tmp ); - mkentry( tmp, numecs, statenum, -numtemps, numdiff ); - } + numdiff = tbldiff( state, firstprot, tmp ); + mkentry( tmp, numecs, statenum, -numtemps, numdiff ); + } -/* mv2front - move proto queue element to front of queue - * - * synopsis - * int qelm; - * mv2front( qelm ); - */ +/* mv2front - move proto queue element to front of queue */ void mv2front( qelm ) int qelm; - - { - if ( firstprot != qelm ) { - if ( qelm == lastprot ) - lastprot = protprev[lastprot]; + if ( firstprot != qelm ) + { + if ( qelm == lastprot ) + lastprot = protprev[lastprot]; - protnext[protprev[qelm]] = protnext[qelm]; + protnext[protprev[qelm]] = protnext[qelm]; - if ( protnext[qelm] != NIL ) - protprev[protnext[qelm]] = protprev[qelm]; + if ( protnext[qelm] != NIL ) + protprev[protnext[qelm]] = protprev[qelm]; - protprev[qelm] = NIL; - protnext[qelm] = firstprot; - protprev[firstprot] = qelm; - firstprot = qelm; + protprev[qelm] = NIL; + protnext[qelm] = firstprot; + protprev[firstprot] = qelm; + firstprot = qelm; + } } - } /* place_state - place a state into full speed transition table - * - * synopsis - * int *state, statenum, transnum; - * place_state( state, statenum, transnum ); * * State is the statenum'th state. It is indexed by equivalence class and * gives the number of the state to enter for a given equivalence class. @@ -823,44 +800,42 @@ int qelm; void place_state( state, statenum, transnum ) int *state, statenum, transnum; + { + register int i; + register int *state_ptr; + int position = find_table_space( state, transnum ); - { - register int i; - register int *state_ptr; - int position = find_table_space( state, transnum ); - - /* base is the table of start positions */ - base[statenum] = position; + /* "base" is the table of start positions. */ + base[statenum] = position; - /* put in action number marker; this non-zero number makes sure that - * find_table_space() knows that this position in chk/nxt is taken - * and should not be used for another accepting number in another state - */ - chk[position - 1] = 1; + /* Put in action number marker; this non-zero number makes sure that + * find_table_space() knows that this position in chk/nxt is taken + * and should not be used for another accepting number in another + * state. + */ + chk[position - 1] = 1; - /* put in end-of-buffer marker; this is for the same purposes as above */ - chk[position] = 1; + /* Put in end-of-buffer marker; this is for the same purposes as + * above. + */ + chk[position] = 1; - /* place the state into chk and nxt */ - state_ptr = &state[1]; + /* Place the state into chk and nxt. */ + state_ptr = &state[1]; - for ( i = 1; i <= numecs; ++i, ++state_ptr ) - if ( *state_ptr != 0 ) - { - chk[position + i] = i; - nxt[position + i] = *state_ptr; - } + for ( i = 1; i <= numecs; ++i, ++state_ptr ) + if ( *state_ptr != 0 ) + { + chk[position + i] = i; + nxt[position + i] = *state_ptr; + } - if ( position + numecs > tblend ) - tblend = position + numecs; - } + if ( position + numecs > tblend ) + tblend = position + numecs; + } /* stack1 - save states with only one out-transition to be processed later - * - * synopsis - * int statenum, sym, nextstate, deflink; - * stack1( statenum, sym, nextstate, deflink ); * * if there's room for another state on the "one-transition" stack, the * state is pushed onto it, to be processed later by mk1tbl. If there's @@ -869,34 +844,29 @@ int *state, statenum, transnum; void stack1( statenum, sym, nextstate, deflink ) int statenum, sym, nextstate, deflink; - - { - if ( onesp >= ONE_STACK_SIZE - 1 ) - mk1tbl( statenum, sym, nextstate, deflink ); - - else { - ++onesp; - onestate[onesp] = statenum; - onesym[onesp] = sym; - onenext[onesp] = nextstate; - onedef[onesp] = deflink; + if ( onesp >= ONE_STACK_SIZE - 1 ) + mk1tbl( statenum, sym, nextstate, deflink ); + + else + { + ++onesp; + onestate[onesp] = statenum; + onesym[onesp] = sym; + onenext[onesp] = nextstate; + onedef[onesp] = deflink; + } } - } /* tbldiff - compute differences between two state tables - * - * synopsis - * int state[], pr, ext[]; - * int tbldiff, numdifferences; - * numdifferences = tbldiff( state, pr, ext ) * * "state" is the state array which is to be extracted from the pr'th * proto. "pr" is both the number of the proto we are extracting from * and an index into the save area where we can find the proto's complete * state table. Each entry in "state" which differs from the corresponding * entry of "pr" will appear in "ext". + * * Entries which are the same in both "state" and "pr" will be marked * as transitions to "SAME_TRANS" in "ext". The total number of differences * between "state" and "pr" is returned as function value. Note that this @@ -905,23 +875,22 @@ int statenum, sym, nextstate, deflink; int tbldiff( state, pr, ext ) int state[], pr, ext[]; + { + register int i, *sp = state, *ep = ext, *protp; + register int numdiff = 0; - { - register int i, *sp = state, *ep = ext, *protp; - register int numdiff = 0; + protp = &protsave[numecs * (pr - 1)]; - protp = &protsave[numecs * (pr - 1)]; + for ( i = numecs; i > 0; --i ) + { + if ( *++protp == *++sp ) + *++ep = SAME_TRANS; + else + { + *++ep = *sp; + ++numdiff; + } + } - for ( i = numecs; i > 0; --i ) - { - if ( *++protp == *++sp ) - *++ep = SAME_TRANS; - else - { - *++ep = *sp; - ++numdiff; - } + return numdiff; } - - return ( numdiff ); - } diff --git a/yylex.c b/yylex.c index a4a3d47..6b90652 100644 --- a/yylex.c +++ b/yylex.c @@ -42,175 +42,173 @@ static char rcsid[] = #endif -/* yylex - scan for a regular expression token - * - * synopsis - * - * token = yylex(); - * - * token - return token found - */ +/* yylex - scan for a regular expression token */ int yylex() - - { - int toktype; - static int beglin = false; - - if ( eofseen ) - toktype = EOF; - else - toktype = flexscan(); - - if ( toktype == EOF || toktype == 0 ) { - eofseen = 1; - - if ( sectnum == 1 ) - { - synerr( "premature EOF" ); - sectnum = 2; - toktype = SECTEND; - } - - else if ( sectnum == 2 ) - { - sectnum = 3; - toktype = 0; - } + int toktype; + static int beglin = false; + if ( eofseen ) + toktype = EOF; else - toktype = 0; - } - - if ( trace ) - { - if ( beglin ) - { - fprintf( stderr, "%d\t", num_rules + 1 ); - beglin = 0; - } - - switch ( toktype ) - { - case '<': - case '>': - case '^': - case '$': - case '"': - case '[': - case ']': - case '{': - case '}': - case '|': - case '(': - case ')': - case '-': - case '/': - case '\\': - case '?': - case '.': - case '*': - case '+': - case ',': - (void) putc( toktype, stderr ); - break; - - case '\n': - (void) putc( '\n', stderr ); - - if ( sectnum == 2 ) - beglin = 1; - - break; - - case SCDECL: - fputs( "%s", stderr ); - break; - - case XSCDECL: - fputs( "%x", stderr ); - break; - - case WHITESPACE: - (void) putc( ' ', stderr ); - break; - - case SECTEND: - fputs( "%%\n", stderr ); - - /* we set beglin to be true so we'll start - * writing out numbers as we echo rules. flexscan() has - * already assigned sectnum - */ - - if ( sectnum == 2 ) - beglin = 1; - - break; - - case NAME: - fprintf( stderr, "'%s'", nmstr ); - break; - - case CHAR: - switch ( yylval ) - { - case '<': - case '>': - case '^': - case '$': - case '"': - case '[': - case ']': - case '{': - case '}': - case '|': - case '(': - case ')': - case '-': - case '/': - case '\\': - case '?': - case '.': - case '*': - case '+': - case ',': - fprintf( stderr, "\\%c", yylval ); - break; - - default: - if ( ! isascii( yylval ) || ! isprint( yylval ) ) - fprintf( stderr, "\\%.3o", (unsigned int) yylval ); - else - (void) putc( yylval, stderr ); - break; - } - - break; - - case NUMBER: - fprintf( stderr, "%d", yylval ); - break; - - case PREVCCL: - fprintf( stderr, "[%d]", yylval ); - break; - - case EOF_OP: - fprintf( stderr, "<>" ); - break; - - case 0: - fprintf( stderr, "End Marker" ); - break; - - default: - fprintf( stderr, "*Something Weird* - tok: %d val: %d\n", - toktype, yylval ); - break; - } + toktype = flexscan(); + + if ( toktype == EOF || toktype == 0 ) + { + eofseen = 1; + + if ( sectnum == 1 ) + { + synerr( "premature EOF" ); + sectnum = 2; + toktype = SECTEND; + } + + else if ( sectnum == 2 ) + { + sectnum = 3; + toktype = 0; + } + + else + toktype = 0; + } + + if ( trace ) + { + if ( beglin ) + { + fprintf( stderr, "%d\t", num_rules + 1 ); + beglin = 0; + } + + switch ( toktype ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + (void) putc( toktype, stderr ); + break; + + case '\n': + (void) putc( '\n', stderr ); + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case SCDECL: + fputs( "%s", stderr ); + break; + + case XSCDECL: + fputs( "%x", stderr ); + break; + + case WHITESPACE: + (void) putc( ' ', stderr ); + break; + + case SECTEND: + fputs( "%%\n", stderr ); + + /* We set beglin to be true so we'll start + * writing out numbers as we echo rules. + * flexscan() has already assigned sectnum. + */ + + if ( sectnum == 2 ) + beglin = 1; + + break; + + case NAME: + fprintf( stderr, "'%s'", nmstr ); + break; + + case CHAR: + switch ( yylval ) + { + case '<': + case '>': + case '^': + case '$': + case '"': + case '[': + case ']': + case '{': + case '}': + case '|': + case '(': + case ')': + case '-': + case '/': + case '\\': + case '?': + case '.': + case '*': + case '+': + case ',': + fprintf( stderr, "\\%c", + yylval ); + break; + + default: + if ( ! isascii( yylval ) || + ! isprint( yylval ) ) + fprintf( stderr, + "\\%.3o", + (unsigned int) yylval ); + else + (void) putc( yylval, + stderr ); + break; + } + + break; + + case NUMBER: + fprintf( stderr, "%d", yylval ); + break; + + case PREVCCL: + fprintf( stderr, "[%d]", yylval ); + break; + + case EOF_OP: + fprintf( stderr, "<>" ); + break; + + case 0: + fprintf( stderr, "End Marker" ); + break; + + default: + fprintf( stderr, + "*Something Weird* - tok: %d val: %d\n", + toktype, yylval ); + break; + } + } + + return toktype; } - - return ( toktype ); - } -- cgit v1.2.3 From 1a3efb8ed7501d1c77e7baca747aa6182a7ada80 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 12 Jun 1993 13:40:15 +0000 Subject: Got rid of code needed for %t --- dfa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dfa.c b/dfa.c index f91c193..c2c98cc 100644 --- a/dfa.c +++ b/dfa.c @@ -1041,8 +1041,7 @@ int symlist[], duplist[]; if ( tch >= 0 ) { /* character transition */ - /* abs() needed for fake %t ec's */ - int ec = abs( ecgroup[tch] ); + int ec = ecgroup[tch]; mkechar( ec, dupfwd, duplist ); symlist[ec] = 1; -- cgit v1.2.3 From 182894adc30643189cce0aaedb2a7a1ed6d2acf8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 12 Jun 1993 13:41:52 +0000 Subject: Modified to use yy_flex_alloc() and friends Moved some globals earlier in the file to permit access in section 1 --- flex.skl | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/flex.skl b/flex.skl index 0c5669e..4a3fa48 100644 --- a/flex.skl +++ b/flex.skl @@ -32,14 +32,6 @@ #ifdef __STDC__ -#ifdef __GNUC__ -#include -void *malloc( size_t ); -void free( void* ); -#else -#include -#endif /* __GNUC__ */ - #define YY_USE_PROTOS #define YY_USE_CONST @@ -61,13 +53,6 @@ void free( void* ); #define YY_PROTO(proto) proto #else #define YY_PROTO(proto) () -/* We can't get here if it's an ANSI C compiler, or a C++ compiler, - * so it's got to be a K&R compiler, and therefore there's no standard - * place from which to include these definitions. - */ -char *malloc(); -int free(); -int read(); #endif @@ -150,7 +135,7 @@ int read(); /* Default declaration of generated scanner - a define so the user can * easily add parameters. */ -#define YY_DECL int yylex YY_PROTO(( void )) +#define YY_DECL int yylex YY_PROTO(( void )) /* Code executed at the end of each rule. */ #define YY_BREAK break; @@ -163,6 +148,9 @@ int read(); typedef struct yy_buffer_state *YY_BUFFER_STATE; +extern int yyleng; +extern FILE *yyin, *yyout; + %% section 1 definitions and declarations of yytext/yytext_ptr go here /* Done after the current pattern has been matched and before the @@ -204,7 +192,7 @@ struct yy_buffer_state /* Size of input buffer in bytes, not including room for EOB * characters. */ - int yy_buf_size; + int yy_buf_size; /* Number of characters read into yy_ch_buf, not including EOB * characters. @@ -236,9 +224,6 @@ static YY_CHAR yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ -extern int yyleng; -extern FILE *yyin, *yyout; - int yyleng; FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; @@ -271,6 +256,9 @@ void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); #define yy_new_buffer yy_create_buffer +extern void *yy_flex_alloc(); +extern void yy_flex_free(); + #ifdef __cplusplus static int yyinput YY_PROTO(( void )); #else @@ -466,7 +454,7 @@ do_action: /* This label is used only to access EOF actions. */ /* yy_get_next_buffer - try to read in a new buffer * * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - + * EOB_ACT_LAST_MATCH - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position * EOB_ACT_END_OF_FILE - end of file */ @@ -763,7 +751,7 @@ int size; { YY_BUFFER_STATE b; - b = (YY_BUFFER_STATE) malloc( sizeof( struct yy_buffer_state ) ); + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); @@ -773,7 +761,8 @@ int size; /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ - b->yy_ch_buf = (YY_CHAR *) malloc( (unsigned) (b->yy_buf_size + 2) ); + b->yy_ch_buf = + (YY_CHAR *) yy_flex_alloc( (unsigned) (b->yy_buf_size + 2) ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); @@ -794,8 +783,8 @@ YY_BUFFER_STATE b; if ( b == yy_current_buffer ) yy_current_buffer = (YY_BUFFER_STATE) 0; - free( (char *) b->yy_ch_buf ); - free( (char *) b ); + yy_flex_free( b->yy_ch_buf ); + yy_flex_free( b ); } -- cgit v1.2.3 From 3a90a34ff0c0512c1aea63bece9e2bdbabf2bef5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 12 Jun 1993 13:42:18 +0000 Subject: Modified to use yy_flex_alloc() --- sym.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sym.c b/sym.c index 1558cf4..0c1ad80 100644 --- a/sym.c +++ b/sym.c @@ -74,7 +74,8 @@ int table_size; } /* create new entry */ - new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) ); + new_entry = (struct hash_entry *) + yy_flex_alloc( sizeof( struct hash_entry ) ); if ( new_entry == NULL ) flexfatal( "symbol table memory allocation failed" ); -- cgit v1.2.3 From 2d95f93b3da682c8c000383ea7e76eeb8f5990c0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 12 Jun 1993 13:42:32 +0000 Subject: Modified to use yy_flex_alloc() and friends --- misc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/misc.c b/misc.c index b66b0ec..bb77474 100644 --- a/misc.c +++ b/misc.c @@ -82,7 +82,7 @@ int size, element_size; if ( element_size * size <= 0 ) flexfatal( "request for < 1 byte in allocate_array()" ); - mem = (void *) malloc( (unsigned) (element_size * size) ); + mem = yy_flex_alloc( element_size * size ); if ( mem == NULL ) flexfatal( "memory allocation failed in allocate_array()" ); @@ -175,7 +175,7 @@ register char *str; for ( c = str; *c; ++c ) ; - copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) ); + copy = yy_flex_alloc( (c - str + 1) * sizeof( char ) ); if ( copy == NULL ) flexfatal( "dynamic memory failure in copy_string()" ); @@ -666,8 +666,7 @@ int size, element_size; flexfatal( "attempt to increase array size by less than 1 byte" ); - new_array = - (void *) realloc( (char *)array, (unsigned) (size * element_size )); + new_array = yy_flex_realloc( array, size * element_size ); if ( new_array == NULL ) flexfatal( "attempt to increase array size failed" ); -- cgit v1.2.3 From 417e9e8e9a9c43b3f1bae61de9c83073351a6e2f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 12 Jun 1993 13:42:49 +0000 Subject: Fixed bug in lex % directives --- scan.l | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scan.l b/scan.l index 0566a2b..ee87937 100644 --- a/scan.l +++ b/scan.l @@ -113,8 +113,8 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} return SECTEND; } -^"%pointer".*\n ++linenum; yytext_is_array = false; -^"%array".*\n ++linenum; yytext_is_array = true; +^"%pointer".*{NL} ++linenum; yytext_is_array = false; +^"%array".*{NL} ++linenum; yytext_is_array = true; ^"%used" { warn( "%used/%unused have been deprecated" ); @@ -126,9 +126,9 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} } -^"%"[aceknopr]{WS}.*{NL} ++linenum; /* ignore */ +^"%"[aceknopr]{OPTWS}[0-9]*{OPTWS}{NL} ++linenum; /* ignore */ -^"%"[^sxanpekotcru{}]{OPTWS} synerr( "unrecognized '%' directive" ); +^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); ^{NAME} { (void) strcpy( nmstr, (char *) yytext ); -- cgit v1.2.3 From 79b7a59a56376d4a90f13a759971380665e6a419 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 12 Jun 1993 13:43:31 +0000 Subject: Added -P flag --- main.c | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/main.c b/main.c index 1370829..c32af32 100644 --- a/main.c +++ b/main.c @@ -100,10 +100,11 @@ int num_input_files; char *program_name; #ifndef SHORT_FILE_NAMES -static char *outfile = "lex.yy.c"; +static char *outfile_template = "lex.%s.c"; #else -static char *outfile = "lexyy.c"; +static char *outfile_template = "lex%s.c"; #endif +static char outfile_path[64]; static int outfile_created = 0; static int use_stdout; @@ -225,7 +226,7 @@ int exit_status; else if ( fclose( stdout ) ) flexfatal( "error occurred when closing output file" ); - else if ( unlink( outfile ) ) + else if ( unlink( outfile_path ) ) flexfatal( "error occurred when deleting output file" ); } @@ -416,7 +417,7 @@ char **argv; { int i, sawcmpflag; int csize_given, interactive_given; - char *arg, *flex_gettime(), *mktemp(); + char *arg, *prefix, *flex_gettime(), *mktemp(); printstats = syntaxerror = trace = spprdflt = caseins = false; backtrack_report = ddebug = fulltbl = fullspd = false; @@ -436,6 +437,8 @@ char **argv; allocate_character_array( action_size ); action_offset = action_index = 0; + prefix = "yy"; + starttime = flex_gettime(); program_name = argv[0]; @@ -545,6 +548,14 @@ char **argv; */ break; + case 'P': + if ( i != 1 ) + flexerror( + "-P flag must be given separately" ); + + prefix = arg + i + 1; + goto get_next_arg; + case 'p': ++performance_report; break; @@ -600,7 +611,9 @@ char **argv; exit( 1 ); } - /* Used by -C and -S flags in lieu of a "continue 2" control. */ + /* Used by -C, -S and -P flags in lieu of a "continue 2" + * control. + */ get_next_arg: ; } @@ -631,10 +644,14 @@ char **argv; if ( ! use_stdout ) { - FILE *prev_stdout = freopen( outfile, "w", stdout ); + FILE *prev_stdout; + + sprintf( outfile_path, outfile_template, prefix ); + + prev_stdout = freopen( outfile_path, "w", stdout ); if ( prev_stdout == NULL ) - lerrsf( "could not create %s", outfile ); + lerrsf( "could not create %s", outfile_path ); outfile_created = 1; } @@ -665,6 +682,23 @@ char **argv; if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) lerrsf( "can't open skeleton file %s", skelname ); + if ( strcmp( prefix, "yy" ) ) + { +#define GEN_PREFIX(name) printf( "#define yy%s %s%s\n", name, prefix, name ); + GEN_PREFIX( "_create_buffer" ); + GEN_PREFIX( "_delete_buffer" ); + GEN_PREFIX( "_init_buffer" ); + GEN_PREFIX( "_load_buffer_state" ); + GEN_PREFIX( "_switch_to_buffer" ); + GEN_PREFIX( "in" ); + GEN_PREFIX( "leng" ); + GEN_PREFIX( "lex" ); + GEN_PREFIX( "out" ); + GEN_PREFIX( "restart" ); + GEN_PREFIX( "text" ); + printf( "\n" ); + } + lastdfa = lastnfa = 0; num_rules = num_eof_rules = default_rule = 0; numas = numsnpairs = tmpuses = 0; -- cgit v1.2.3 From 98cfc6eb3f2626017b36fa63178238764118bd38 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 12 Jun 1993 13:44:08 +0000 Subject: Changed to use yy_flex_alloc() and friends --- flexdef.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/flexdef.h b/flexdef.h index d227363..23fe538 100644 --- a/flexdef.h +++ b/flexdef.h @@ -82,23 +82,9 @@ char *sprintf(); /* keep lint happy */ #define SHORT_FILE_NAMES #endif -#ifdef __STDC__ - -#ifdef __GNUC__ -#include -void *malloc( size_t ); -void free( void* ); -#else -#include -#endif - -#else /* ! __STDC__ */ -char *malloc(), *realloc(); -#endif - /* Maximum line length we'll have to deal with. */ -#define MAXLINE BUFSIZ +#define MAXLINE 2048 /* Maximum size of file name. */ #define FILENAMESIZE 1024 @@ -635,7 +621,12 @@ extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; extern int num_backtracking, bol_needed; -void *allocate_array(), *reallocate_array(); +void *allocate_array PROTO((int, int)); +void *reallocate_array PROTO((void*, int, int)); + +void *yy_flex_alloc PROTO((int)); +void *yy_flex_realloc PROTO((void*, int)); +void yy_flex_free PROTO((void*)); #define allocate_integer_array(size) \ (int *) allocate_array( size, sizeof( int ) ) -- cgit v1.2.3 From 7f39933ca43a57200c910a10439254eac1a80ab5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:31:46 +0000 Subject: Added check_char() --- ccl.c | 3 ++- misc.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/ccl.c b/ccl.c index 77adfae..2929fb9 100644 --- a/ccl.c +++ b/ccl.c @@ -41,6 +41,8 @@ int ch; { int ind, len, newpos, i; + check_char( ch ); + len = ccllen[cclp]; ind = cclmap[cclp]; @@ -121,7 +123,6 @@ void list_character_set( file, cset ) FILE *file; int cset[]; { - char *readable_form(); register int i; putc( '[', file ); diff --git a/misc.c b/misc.c index bb77474..abb6018 100644 --- a/misc.c +++ b/misc.c @@ -154,6 +154,24 @@ int v[], n; } +/* check_char - checks a character to make sure it's within the range + * we're expecting. If not, generates fatal error message + * and exits. + */ + +void check_char( int c ) + { + if ( c >= CSIZE ) + lerrsf( "bad character '%s' detected in check_char()", + readable_form( c ) ); + + if ( c >= csize ) + lerrsf( "scanner requires -8 flag to use the character '%s'", + readable_form( c ) ); + } + + + /* clower - replace upper-case letter to lower-case */ Char clower( c ) -- cgit v1.2.3 From 44da1464042d54cf46fb1c7313a9956b877685c9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:32:22 +0000 Subject: Wilhems bug fixes. --- dfa.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/dfa.c b/dfa.c index c2c98cc..7ffe263 100644 --- a/dfa.c +++ b/dfa.c @@ -55,7 +55,8 @@ void check_for_backtracking( ds, state ) int ds; int state[]; { - if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state ) + if ( (reject && ! dfaacc[ds].dfaacc_set) || + (! reject && ! dfaacc[ds].dfaacc_state) ) { /* state is non-accepting */ ++num_backtracking; @@ -290,7 +291,7 @@ MARK_STATE(state) \ if ( ++numstates >= current_max_dfa_size ) \ DO_REALLOCATION \ t[numstates] = state; \ -hashval = hashval + state; \ +hashval += state; \ } #define STACK_STATE(state) \ @@ -318,10 +319,11 @@ ADD_STATE(state) \ * the stack. */ if ( ! IS_MARKED(ns) ) + { PUT_ON_STACK(ns) - - CHECK_ACCEPT(ns) - hashval = hashval + ns; + CHECK_ACCEPT(ns) + hashval += ns; + } } for ( stkpos = 1; stkpos <= stkend; ++stkpos ) @@ -413,12 +415,6 @@ void ntod() int duplist[CSIZE + 1], state[CSIZE + 1]; int targfreq[CSIZE + 1], targstate[CSIZE + 1]; - /* This is so find_table_space(...) will know where to start looking - * in chk/nxt for unused records for space to put in the state - */ - if ( fullspd ) - firstfree = 0; - accset = allocate_integer_array( num_rules + 1 ); nset = allocate_integer_array( current_max_dfa_size ); @@ -1031,11 +1027,7 @@ int symlist[], duplist[]; { if ( tch < -lastccl || tch >= csize ) { - if ( tch >= csize && tch <= CSIZE ) - flexerror( "scanner requires -8 flag" ); - - else - flexfatal( + flexfatal( "bad transition character detected in sympartition()" ); } -- cgit v1.2.3 From e4fdd926c773c1cc35b4a02bd596ba4cf4b04235 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:34:29 +0000 Subject: Added #ifndef's around #define's to let user override Moved a bunch of definitions prior to section 1 --- flex.skl | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/flex.skl b/flex.skl index 4a3fa48..e5d4814 100644 --- a/flex.skl +++ b/flex.skl @@ -70,21 +70,25 @@ /* this used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ +#ifndef ECHO #define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout ) +#endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". */ +#ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ YY_FATAL_ERROR( "read() in flex scanner failed" ); -#define YY_NULL 0 +#endif /* No semi-colon after return; correct usage is to write "yyterminate();" - * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ #define yyterminate() return YY_NULL +#define YY_NULL 0 /* Report a fatal error. */ @@ -102,6 +106,7 @@ * done when it reached the ';' after the YY_FATAL_ERROR() call. */ +#ifndef YY_FATAL_ERROR #define YY_FATAL_ERROR(msg) \ do \ { \ @@ -110,9 +115,7 @@ exit( 1 ); \ } \ while ( 0 ) - -/* Default yywrap function - always treat EOF as an EOF. */ -#define yywrap() 1 +#endif /* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less @@ -135,10 +138,14 @@ /* Default declaration of generated scanner - a define so the user can * easily add parameters. */ +#ifndef YY_DECL #define YY_DECL int yylex YY_PROTO(( void )) +#endif /* Code executed at the end of each rule. */ +#ifndef YY_BREAK #define YY_BREAK break; +#endif #define YY_END_OF_BUFFER_CHAR 0 @@ -151,7 +158,9 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; extern int yyleng; extern FILE *yyin, *yyout; -%% section 1 definitions and declarations of yytext/yytext_ptr go here +extern void *yy_flex_alloc YY_PROTO(( int )); +extern void yy_flex_free YY_PROTO(( void * )); +extern int yywrap YY_PROTO(( void )); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -228,11 +237,6 @@ int yyleng; FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; -%% data tables for the DFA go here - -/* These variables are all declared out here so that section 3 code can - * manipulate them. - */ /* Points to current character in buffer. */ static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; static int yy_init = 1; /* whether we need to initialize */ @@ -256,15 +260,16 @@ void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); #define yy_new_buffer yy_create_buffer -extern void *yy_flex_alloc(); -extern void yy_flex_free(); - #ifdef __cplusplus static int yyinput YY_PROTO(( void )); #else static int input YY_PROTO(( void )); #endif +%% section 1 definitions and declarations of yytext/yytext_ptr go here + +%% data tables for the DFA go here + YY_DECL { register yy_state_type yy_current_state; -- cgit v1.2.3 From 3e5f3507a6c76366f63ec10b651ab656b94cbe06 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:34:56 +0000 Subject: Added check_char(), readable_form() --- flexdef.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/flexdef.h b/flexdef.h index 23fe538..a605bf4 100644 --- a/flexdef.h +++ b/flexdef.h @@ -734,6 +734,9 @@ extern int all_upper PROTO((register Char *)); /* Bubble sort an integer array. */ extern void bubble PROTO((int [], int)); +/* Check a character to make sure it's in the expected range. */ +extern void check_char PROTO((int c)); + /* Shell sort a character array. */ extern void cshell PROTO((Char [], int, int)); @@ -766,6 +769,11 @@ extern void mkdata PROTO((int)); /* generate a data statement */ /* Return the integer represented by a string of digits. */ extern int myctoi PROTO((Char [])); +/* Return a printable version of the given character, which might be + * 8-bit + */ +extern char *readable_form PROTO((int)); + /* Write out one section of the skeleton file. */ extern void skelout PROTO((void)); -- cgit v1.2.3 From 3f0153d4db3a7414956e4655cb547ef21f83ab23 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:35:35 +0000 Subject: Fixed some reallocation bugs, etc. as per Wilhelms --- gen.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/gen.c b/gen.c index db3f4a1..9b582a8 100644 --- a/gen.c +++ b/gen.c @@ -141,23 +141,28 @@ void genctbl() /* We want the transition to be represented as the offset to the * next state, not the actual state number, which is what it currently - * is. The offset is base[nxt[i]] - base[chk[i]]. That's just the - * difference between the starting points of the two involved states - * (to - from). + * is. The offset is base[nxt[i]] - (base of current state)]. That's + * just the difference between the starting points of the two involved + * states (to - from). * * First, though, we need to find some way to put in our end-of-buffer * flags and states. We do this by making a state with absolutely no * transitions. We put it at the end of the table. */ - /* At this point, we're guaranteed that there's enough room in nxt[] - * and chk[] to hold tblend + numecs entries. We need just two slots. - * One for the action and one for the end-of-buffer transition. We - * now *assume* that we're guaranteed the only character we'll try to - * index this nxt/chk pair with is EOB, i.e., 0, so we don't have to - * make sure there's room for jam entries for other characters. + /* We need to have room in nxt/chk for two more slots: One for the + * action and one for the end-of-buffer transition. We now *assume* + * that we're guaranteed the only character we'll try to index this + * nxt/chk pair with is EOB, i.e., 0, so we don't have to make sure + * there's room for jam entries for other characters. */ + while ( tblend + 2 >= current_max_xpairs ) + expand_nxt_chk(); + + while ( lastdfa + 1 >= current_max_dfas ) + increase_max_dfas(); + base[lastdfa + 1] = tblend + 2; nxt[tblend + 1] = end_of_buffer_action; chk[tblend + 1] = numecs + 1; @@ -246,8 +251,6 @@ void genecs() if ( trace ) { - char *readable_form(); - fputs( "\n\nEquivalence Classes:\n\n", stderr ); numrows = csize / 8; @@ -445,7 +448,7 @@ char *char_map; if ( usemecs ) { /* We've arrange it so that templates are never chained - * to one another. This means we can afford make a + * to one another. This means we can afford to make a * very simple test to see if we need to convert to * yy_c's meta-equivalence class without worrying * about erroneously looking up the meta-equivalence @@ -913,7 +916,7 @@ void gentabs() total_states = lastdfa + numtemps; - printf( total_states > MAX_SHORT ? C_long_decl : C_short_decl, + printf( total_states >= MAX_SHORT ? C_long_decl : C_short_decl, "yy_base", total_states + 1 ); for ( i = 1; i <= lastdfa; ++i ) @@ -947,7 +950,7 @@ void gentabs() dataend(); - printf( tblend > MAX_SHORT ? C_long_decl : C_short_decl, + printf( total_states >= MAX_SHORT ? C_long_decl : C_short_decl, "yy_def", total_states + 1 ); for ( i = 1; i <= total_states; ++i ) @@ -955,7 +958,7 @@ void gentabs() dataend(); - printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + printf( tblend >= MAX_SHORT ? C_long_decl : C_short_decl, "yy_nxt", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) @@ -968,7 +971,7 @@ void gentabs() dataend(); - printf( lastdfa > MAX_SHORT ? C_long_decl : C_short_decl, + printf( tblend >= MAX_SHORT ? C_long_decl : C_short_decl, "yy_chk", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) @@ -1058,7 +1061,7 @@ void make_tables() */ int total_table_size = tblend + numecs + 1; char *trans_offset_type = - total_table_size > MAX_SHORT ? "long" : "short"; + total_table_size >= MAX_SHORT ? "long" : "short"; set_indent( 0 ); indent_puts( "struct yy_trans_info" ); @@ -1092,7 +1095,11 @@ void make_tables() else gentabs(); - if ( num_backtracking > 0 ) + /* Definitions for backtracking. We don't need them if REJECT + * is being used because then we use an alternative backtracking + * technique instead. + */ + if ( num_backtracking > 0 && ! reject ) { indent_puts( "static yy_state_type yy_last_accepting_state;" ); indent_puts( "static YY_CHAR *yy_last_accepting_cpos;\n" ); @@ -1105,14 +1112,7 @@ void make_tables() for ( i = 1; i <= lastdfa; ++i ) { if ( fullspd ) - { - if ( nultrans ) - printf( " &yy_transition[%d],\n", - base[i] ); - else - printf( " 0,\n" ); - } - + printf( " &yy_transition[%d],\n", base[i] ); else mkdata( nultrans[i] ); } -- cgit v1.2.3 From 9887dec3ed4a1792f20d42db939127e5f6207c66 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:36:26 +0000 Subject: added check_char call in mkstate() to prevent bad xtion chars --- nfa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nfa.c b/nfa.c index cca39d6..f2e4849 100644 --- a/nfa.c +++ b/nfa.c @@ -642,6 +642,8 @@ int sym; else { + check_char( sym ); + if ( useecs ) /* Map NUL's to csize. */ mkechar( sym ? sym : csize, nextecm, ecgroup ); -- cgit v1.2.3 From 9e1c7e268616ce081bd9b8c14a35f345486ffeb2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:36:47 +0000 Subject: Fixed bug with Z-a character classes as per Wilhelms --- parse.y | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/parse.y b/parse.y index 828e0e2..c70d7d6 100644 --- a/parse.y +++ b/parse.y @@ -604,19 +604,19 @@ fullccl : '[' ccl ']' ccl : ccl CHAR '-' CHAR { + if ( caseins ) + { + if ( $2 >= 'A' && $2 <= 'Z' ) + $2 = clower( $2 ); + if ( $4 >= 'A' && $4 <= 'Z' ) + $4 = clower( $4 ); + } + if ( $2 > $4 ) synerr( "negative range in character class" ); else { - if ( caseins ) - { - if ( $2 >= 'A' && $2 <= 'Z' ) - $2 = clower( $2 ); - if ( $4 >= 'A' && $4 <= 'Z' ) - $4 = clower( $4 ); - } - for ( i = $2; i <= $4; ++i ) ccladd( $1, i ); -- cgit v1.2.3 From 03006d42f1733d38a527f0bc913aae6ef0672994 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:37:06 +0000 Subject: Nuked #undef of yywrap, now that it's a function --- scan.l | 2 -- 1 file changed, 2 deletions(-) diff --git a/scan.l b/scan.l index ee87937..fb2f465 100644 --- a/scan.l +++ b/scan.l @@ -32,8 +32,6 @@ static char rcsid[] = "@(#) $Header$ (LBL)"; #endif -#undef yywrap - #include "flexdef.h" #include "parse.h" -- cgit v1.2.3 From ec87c4d60198bf67a1a7a166aa1487b6b0fb514f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:37:27 +0000 Subject: Some comment fixes as per Wilhelms --- tblcmp.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/tblcmp.c b/tblcmp.c index 26e7cb6..28d61a7 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -225,9 +225,7 @@ int state[], statenum, totaltrans, comstate, comfreq; * Template tables are compressed by using the 'template equivalence * classes', which are collections of transition character equivalence * classes which always appear together in templates - really meta-equivalence - * classes. until this point, the tables for templates have been stored - * up at the top end of the nxt array; they will now be compressed and have - * table entries made for them. + * classes. */ void cmptmps() @@ -435,9 +433,7 @@ int *state, numtrans; /* inittbl - initialize transition tables * * Initializes "firstfree" to be one beyond the end of the table. Initializes - * all "chk" entries to be zero. Note that templates are built in their - * own tbase/tdef tables. They are shifted down to be contiguous - * with the non-template entries during table generation. + * all "chk" entries to be zero. */ void inittbl() { @@ -566,11 +562,10 @@ int numchars, statenum, deflink, totaltrans; /* Whether we try to fit the state table in the middle of the table * entries we have already generated, or if we just take the state * table at the end of the nxt/chk tables, we must make sure that we - * have a valid base address (i.e., non-negative). Note that not - * only are negative base addresses dangerous at run-time (because - * indexing the next array with one and a low-valued character might - * generate an array-out-of-bounds error message), but at compile-time - * negative base addresses denote TEMPLATES. + * have a valid base address (i.e., non-negative). Note that + * negative base addresses dangerous at run-time (because indexing + * the nxt array with one and a low-valued character will access + * memory before the start of the array. */ /* Find the first transition of state that we need to worry about. */ @@ -837,7 +832,7 @@ int *state, statenum, transnum; /* stack1 - save states with only one out-transition to be processed later * - * if there's room for another state on the "one-transition" stack, the + * If there's room for another state on the "one-transition" stack, the * state is pushed onto it, to be processed later by mk1tbl. If there's * no room, we process the sucker right now. */ -- cgit v1.2.3 From f9afb4c40c08f528536eb4f589f3b2059edd6095 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:52:12 +0000 Subject: Some rearranging to make sure things get declared in the right order --- flex.skl | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/flex.skl b/flex.skl index e5d4814..3ea9813 100644 --- a/flex.skl +++ b/flex.skl @@ -162,17 +162,6 @@ extern void *yy_flex_alloc YY_PROTO(( int )); extern void yy_flex_free YY_PROTO(( void * )); extern int yywrap YY_PROTO(( void )); -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - yytext_ptr = yy_bp; \ -%% code to fiddle yytext and yyleng for yymore() goes here - yy_hold_char = *yy_cp; \ - *yy_cp = '\0'; \ -%% code to copy yytext_ptr to yytext[] goes here, if %array - yy_c_buf_p = yy_cp; - #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 @@ -247,9 +236,6 @@ static int yy_start = 0; /* start state number */ */ static int yy_did_buffer_switch_on_eof; -static yy_state_type yy_get_previous_state YY_PROTO(( void )); -static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); -static int yy_get_next_buffer YY_PROTO(( void )); static void yyunput YY_PROTO(( YY_CHAR c, YY_CHAR *buf_ptr )); void yyrestart YY_PROTO(( FILE *input_file )); void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); @@ -268,8 +254,23 @@ static int input YY_PROTO(( void )); %% section 1 definitions and declarations of yytext/yytext_ptr go here +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ +%% code to fiddle yytext and yyleng for yymore() goes here + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ +%% code to copy yytext_ptr to yytext[] goes here, if %array + yy_c_buf_p = yy_cp; + %% data tables for the DFA go here +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); + YY_DECL { register yy_state_type yy_current_state; -- cgit v1.2.3 From 32ae837b83545b262703e64daee192571be3a696 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 5 Jul 1993 21:52:29 +0000 Subject: Spit out definition of YY_CHAR early --- main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.c b/main.c index c32af32..7ff70c8 100644 --- a/main.c +++ b/main.c @@ -751,16 +751,16 @@ char **argv; void readin() { - skelout(); - - if ( ddebug ) - puts( "#define FLEX_DEBUG" ); - if ( csize == 256 ) puts( "typedef unsigned char YY_CHAR;" ); else puts( "typedef char YY_CHAR;" ); + skelout(); + + if ( ddebug ) + puts( "#define FLEX_DEBUG" ); + line_directive_out( stdout ); if ( yyparse() ) -- cgit v1.2.3 From ee15993b3542238e82492f7d17d33de312ae6c6e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 9 Jul 1993 19:47:11 +0000 Subject: Fixed to not generate extra EOF's after reading one. --- flex.skl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/flex.skl b/flex.skl index 3ea9813..5deca11 100644 --- a/flex.skl +++ b/flex.skl @@ -126,14 +126,10 @@ /* Action number for EOF rule of a given start state. */ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE \ - do \ - { \ - yy_init_buffer( yy_current_buffer, yyin ); \ - yy_load_buffer_state(); \ - } \ - while ( 0 ) +/* Special action meaning "start processing a new file". Now included + * only for backward compatibility with previous versions of flex. + */ +#define YY_NEW_FILE yyrestart( yyin ) /* Default declaration of generated scanner - a define so the user can * easily add parameters. @@ -201,10 +197,12 @@ struct yy_buffer_state int yy_eof_status; #define EOF_NOT_SEEN 0 /* "Pending" happens when the EOF has been seen but there's still - * some text to process. + * some text to process. Note that when we actually see the EOF, + * we switch the status back to "not seen" (via yyrestart()), so + * that the user can continue scanning by just pointing yyin at + * a new input file. */ #define EOF_PENDING 1 -#define EOF_DONE 2 }; static YY_BUFFER_STATE yy_current_buffer = 0; @@ -512,7 +510,7 @@ static int yy_get_next_buffer() if ( number_to_move - YY_MORE_ADJ == 1 ) { ret_val = EOB_ACT_END_OF_FILE; - yy_current_buffer->yy_eof_status = EOF_DONE; + yyrestart( yyin ); } else @@ -699,6 +697,9 @@ void yyrestart( input_file ) FILE *input_file; #endif { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + yy_init_buffer( yy_current_buffer, input_file ); yy_load_buffer_state(); } -- cgit v1.2.3 From 4e27b497e883e60246e4daa4cc4208072dcebc30 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:37:11 +0000 Subject: "backtracking" -> "backing up" --- dfa.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dfa.c b/dfa.c index 7ffe263..b323336 100644 --- a/dfa.c +++ b/dfa.c @@ -42,38 +42,38 @@ void sympartition PROTO((int[], int, int[], int[])); int symfollowset PROTO((int[], int, int, int[])); -/* check_for_backtracking - check a DFA state for backtracking +/* check_for_backing_up - check a DFA state for backing up * * synopsis - * void check_for_backtracking( int ds, int state[numecs] ); + * void check_for_backing_up( int ds, int state[numecs] ); * * ds is the number of the state to check and state[] is its out-transitions, * indexed by equivalence class. */ -void check_for_backtracking( ds, state ) +void check_for_backing_up( ds, state ) int ds; int state[]; { if ( (reject && ! dfaacc[ds].dfaacc_set) || (! reject && ! dfaacc[ds].dfaacc_state) ) { /* state is non-accepting */ - ++num_backtracking; + ++num_backing_up; - if ( backtrack_report ) + if ( backing_up_report ) { - fprintf( backtrack_file, + fprintf( backing_up_file, "State #%d is non-accepting -\n", ds ); /* identify the state */ - dump_associated_rules( backtrack_file, ds ); + dump_associated_rules( backing_up_file, ds ); /* Now identify it further using the out- and * jam-transitions. */ - dump_transitions( backtrack_file, state ); + dump_transitions( backing_up_file, state ); - putc( '\n', backtrack_file ); + putc( '\n', backing_up_file ); } } } @@ -689,7 +689,7 @@ void ntod() } if ( ds > num_start_states ) - check_for_backtracking( ds, state ); + check_for_backing_up( ds, state ); if ( nultrans ) { -- cgit v1.2.3 From 41032843ca338e15af9f36b3228a373c22bacec8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:40:56 +0000 Subject: preserve yytext on input() bug fix when combining yyless() with yymore() checkpoint prior to C++ option --- flex.skl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/flex.skl b/flex.skl index 5deca11..2973b31 100644 --- a/flex.skl +++ b/flex.skl @@ -45,8 +45,10 @@ #ifndef YY_USE_CONST +#ifndef const #define const #endif +#endif #ifdef YY_USE_PROTOS @@ -168,7 +170,7 @@ extern int yywrap YY_PROTO(( void )); { \ /* Undo effects of setting up yytext. */ \ *yy_cp = yy_hold_char; \ - yy_c_buf_p = yy_cp = yy_bp + n; \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ } \ while ( 0 ) @@ -351,7 +353,7 @@ do_action: /* This label is used only to access EOF actions. */ * already have been incremented past the NUL character * (since all states make transitions on EOB to the * end-of-buffer state). Contrast this with the test - * in yyinput(). + * in input(). */ if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) { /* This was really a NUL. */ @@ -384,7 +386,7 @@ do_action: /* This label is used only to access EOF actions. */ else { -%% code to do backtracking for compressed tables and set up yy_cp goes here +%% code to do back-up for compressed tables and set up yy_cp goes here goto yy_find_action; } } @@ -571,7 +573,7 @@ register yy_state_type yy_current_state; #endif { register int yy_is_jam; -%% code to find the next state, and perhaps do backtracking, goes here +%% code to find the next state, and perhaps do backing up, goes here return yy_is_jam ? 0 : yy_current_state; } @@ -629,9 +631,8 @@ static int input() #endif { int c; - YY_CHAR *yy_cp = yy_c_buf_p; - *yy_cp = yy_hold_char; + *yy_c_buf_p = yy_hold_char; if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) { @@ -684,6 +685,7 @@ static int input() } c = *yy_c_buf_p; + *yy_c_buf_p = '\0'; /* preserve yytext */ yy_hold_char = *++yy_c_buf_p; return c; -- cgit v1.2.3 From 2b84ab9e5489f4df70d67c2a3b9fe6dfa433ed75 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:42:31 +0000 Subject: Chucked definition of isascii() --- yylex.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/yylex.c b/yylex.c index 6b90652..2f6ff67 100644 --- a/yylex.c +++ b/yylex.c @@ -36,12 +36,6 @@ static char rcsid[] = #include "parse.h" -/* ANSI C does not guarantee that isascii() is defined */ -#ifndef isascii -#define isascii(c) ((c) <= 0177) -#endif - - /* yylex - scan for a regular expression token */ int yylex() -- cgit v1.2.3 From eb4bc1e0cf3d9d7c8b8802bcac8909fb26758b2d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:43:03 +0000 Subject: Fixed bug with empty section 2 --- scan.l | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scan.l b/scan.l index fb2f465..9f0e8ca 100644 --- a/scan.l +++ b/scan.l @@ -213,7 +213,12 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} .*{NL} ++linenum; ACTION_ECHO; -<> MARK_END_OF_PROLOG; yyterminate(); +<> { + MARK_END_OF_PROLOG; + sectnum = 3; + BEGIN(SECT3); + yyterminate(); /* to stop the parser */ + } ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ @@ -266,7 +271,7 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} ^"%%".* { sectnum = 3; BEGIN(SECT3); - return EOF; /* to stop the parser */ + yyterminate(); /* to stop the parser */ } "["{FIRST_CCL_CHAR}{CCL_CHAR}* { @@ -485,6 +490,7 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} .*(\n?) ECHO; +<> sectnum = 0; yyterminate(); <*>.|\n format_synerr( "bad character: %s", (char *) yytext ); -- cgit v1.2.3 From d9c32a4e3f3ba92ef7e81e947dfa2521156f7fbd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:43:23 +0000 Subject: isascii() moved to flexdef.h nuked flex_gettime() --- misc.c | 47 +---------------------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/misc.c b/misc.c index abb6018..0fc5959 100644 --- a/misc.c +++ b/misc.c @@ -31,20 +31,13 @@ static char rcsid[] = "@(#) $Header$ (LBL)"; #endif -#include #include "flexdef.h" -/* ANSI C does not guarantee that isascii() is defined */ -#ifndef isascii -#define isascii(c) ((c) <= 0177) -#endif - - /* declare functions that have forward references */ -void dataflush PROTO(()); +void dataflush PROTO((void)); int otoi PROTO((Char [])); @@ -333,44 +326,6 @@ char msg[]; } -/* flex_gettime - return current time - * - * note - * the routine name has the "flex_" prefix because of name clashes - * with Turbo-C - */ - -/* include sys/types.h to use time_t and make lint happy */ - -#ifndef MS_DOS -#ifndef VMS -#include -#else -#include -#endif -#endif - -#ifdef MS_DOS -#include -typedef long time_t; -#endif - -char *flex_gettime() - { - time_t t, time(); - char *result, *ctime(), *copy_string(); - - t = time( (long *) 0 ); - - result = copy_string( ctime( &t ) ); - - /* get rid of trailing newline */ - result[24] = '\0'; - - return result; - } - - /* lerrif - report an error message formatted with one integer argument */ void lerrif( msg, arg ) -- cgit v1.2.3 From 2517e4a17174f0d291f978e15c0eb6e9ea0dfc2d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:44:11 +0000 Subject: "backtracking" -> "backing up" some portability tweaks fixed to only call flexscan() when done if known to be in section 3 --- gen.c | 102 ++++++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/gen.c b/gen.c index 9b582a8..c51de78 100644 --- a/gen.c +++ b/gen.c @@ -37,7 +37,7 @@ static char rcsid[] = /* declare functions that have forward references */ void gen_next_state PROTO((int)); -void genecs PROTO(()); +void genecs PROTO((void)); void indent_put2s PROTO((char [], char [])); void indent_puts PROTO((char [])); @@ -77,11 +77,11 @@ void do_indent() } -/* Generate the code to keep backtracking information. */ +/* Generate the code to keep backing-up information. */ -void gen_backtracking() +void gen_backing_up() { - if ( reject || num_backtracking == 0 ) + if ( reject || num_backing_up == 0 ) return; if ( fullspd ) @@ -98,23 +98,23 @@ void gen_backtracking() } -/* Generate the code to perform the backtrack. */ +/* Generate the code to perform the backing up. */ -void gen_bt_action() +void gen_bu_action() { - if ( reject || num_backtracking == 0 ) + if ( reject || num_backing_up == 0 ) return; set_indent( 3 ); - indent_puts( "case 0: /* must backtrack */" ); + indent_puts( "case 0: /* must back up */" ); indent_puts( "/* undo the effects of YY_DO_BEFORE_ACTION */" ); indent_puts( "*yy_cp = yy_hold_char;" ); if ( fullspd || fulltbl ) indent_puts( "yy_cp = yy_last_accepting_cpos + 1;" ); else - /* Backtracking info for compressed tables is taken \after/ + /* Backing-up info for compressed tables is taken \after/ * yy_cp has been incremented for the next state. */ indent_puts( "yy_cp = yy_last_accepting_cpos;" ); @@ -287,7 +287,7 @@ void gen_find_action() indent_puts( "yy_lp = yy_accept[yy_current_state];" ); puts( - "find_rule: /* we branch to this label when backtracking */" ); + "find_rule: /* we branch to this label when backing up */" ); indent_puts( "for ( ; ; ) /* until we find what rule we matched */" ); @@ -433,11 +433,11 @@ char *char_map; { indent_put2s( "register YY_CHAR yy_c = %s;", char_map ); - /* Save the backtracking info \before/ computing the next state + /* Save the backing-up info \before/ computing the next state * because we always compute one more state than needed - we * always proceed until we reach a jam state */ - gen_backtracking(); + gen_backing_up(); indent_puts( "while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )" ); @@ -468,7 +468,7 @@ char *char_map; indent_down(); indent_puts( - "yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];" ); +"yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];" ); } @@ -479,8 +479,9 @@ void gen_next_match() /* NOTE - changes in here should be reflected in gen_next_state() and * gen_NUL_trans(). */ - char *char_map = useecs ? "yy_ec[*yy_cp]" : "*yy_cp"; - char *char_map_2 = useecs ? "yy_ec[*++yy_cp]" : "*++yy_cp"; + char *char_map = useecs ? "yy_ec[(unsigned int) *yy_cp]" : "*yy_cp"; + char *char_map_2 = + useecs ? "yy_ec[(unsigned int) *++yy_cp]" : "*++yy_cp"; if ( fulltbl ) { @@ -490,16 +491,16 @@ void gen_next_match() indent_up(); - if ( num_backtracking > 0 ) + if ( num_backing_up > 0 ) { indent_puts( "{" ); /* } for vi */ - gen_backtracking(); + gen_backing_up(); putchar( '\n' ); } indent_puts( "++yy_cp;" ); - if ( num_backtracking > 0 ) + if ( num_backing_up > 0 ) /* { for vi */ indent_puts( "}" ); @@ -517,20 +518,21 @@ void gen_next_match() indent_puts( "register YY_CHAR yy_c;\n" ); indent_put2s( "for ( yy_c = %s;", char_map ); indent_puts( - " (yy_trans_info = &yy_current_state[yy_c])->yy_verify == yy_c;" ); + " (yy_trans_info = &yy_current_state[(unsigned int) yy_c])->" ); + indent_puts( "yy_verify == yy_c;" ); indent_put2s( " yy_c = %s )", char_map_2 ); indent_up(); - if ( num_backtracking > 0 ) + if ( num_backing_up > 0 ) indent_puts( "{" ); /* } for vi */ indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); - if ( num_backtracking > 0 ) + if ( num_backing_up > 0 ) { putchar( '\n' ); - gen_backtracking(); /* { for vi */ + gen_backing_up(); /* { for vi */ indent_puts( "}" ); } @@ -564,7 +566,7 @@ void gen_next_match() if ( ! reject && ! interactive ) { - /* Do the guaranteed-needed backtrack to figure out + /* Do the guaranteed-needed backing up to figure out * the match. */ indent_puts( "yy_cp = yy_last_accepting_cpos;" ); @@ -586,7 +588,8 @@ int worry_about_NULs; { if ( useecs ) (void) sprintf( char_map, - "(*yy_cp ? yy_ec[*yy_cp] : %d)", NUL_ec ); + "(*yy_cp ? yy_ec[(unsigned int) *yy_cp] : %d)", + NUL_ec ); else (void) sprintf( char_map, "(*yy_cp ? *yy_cp : %d)", NUL_ec ); @@ -598,8 +601,8 @@ int worry_about_NULs; if ( worry_about_NULs && nultrans ) { if ( ! fulltbl && ! fullspd ) - /* Compressed tables backtrack *before* they match. */ - gen_backtracking(); + /* Compressed tables back up *before* they match. */ + gen_backing_up(); indent_puts( "if ( *yy_cp )" ); indent_up(); @@ -632,7 +635,7 @@ int worry_about_NULs; } if ( fullspd || fulltbl ) - gen_backtracking(); + gen_backing_up(); if ( reject ) indent_puts( "*yy_state_ptr++ = yy_current_state;" ); @@ -643,10 +646,10 @@ int worry_about_NULs; void gen_NUL_trans() { /* NOTE - changes in here should be reflected in get_next_match() */ - int need_backtracking = (num_backtracking > 0 && ! reject); + int need_backing_up = (num_backing_up > 0 && ! reject); - if ( need_backtracking ) - /* We'll need yy_cp lying around for the gen_backtracking(). */ + if ( need_backing_up ) + /* We'll need yy_cp lying around for the gen_backing_up(). */ indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); putchar( '\n' ); @@ -673,7 +676,8 @@ void gen_NUL_trans() indent_puts( "register const struct yy_trans_info *yy_trans_info;\n" ); - indent_puts( "yy_trans_info = &yy_current_state[yy_c];" ); + indent_puts( + "yy_trans_info = &yy_current_state[(unsigned int) yy_c];" ); indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); indent_puts( @@ -695,17 +699,17 @@ void gen_NUL_trans() printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); } - /* If we've entered an accepting state, backtrack; note that - * compressed tables have *already* done such backtracking, so + /* If we've entered an accepting state, back up; note that + * compressed tables have *already* done such backing up, so * we needn't bother with it again. */ - if ( need_backtracking && (fullspd || fulltbl) ) + if ( need_backing_up && (fullspd || fulltbl) ) { putchar( '\n' ); indent_puts( "if ( ! yy_is_jam )" ); indent_up(); indent_puts( "{" ); - gen_backtracking(); + gen_backing_up(); indent_puts( "}" ); indent_down(); } @@ -762,9 +766,9 @@ void gentabs() /* The compressed table format jams by entering the "jam state", * losing information about the previous state in the process. * In order to recover the previous state, we effectively need - * to keep backtracking information. + * to keep backing-up information. */ - ++num_backtracking; + ++num_backing_up; if ( reject ) { @@ -1095,11 +1099,11 @@ void make_tables() else gentabs(); - /* Definitions for backtracking. We don't need them if REJECT - * is being used because then we use an alternative backtracking + /* Definitions for backing up. We don't need them if REJECT + * is being used because then we use an alternative backin-up * technique instead. */ - if ( num_backtracking > 0 && ! reject ) + if ( num_backing_up > 0 && ! reject ) { indent_puts( "static yy_state_type yy_last_accepting_state;" ); indent_puts( "static YY_CHAR *yy_last_accepting_cpos;\n" ); @@ -1185,12 +1189,11 @@ void make_tables() if ( yymore_used ) { indent_puts( "static int yy_more_flag = 0;" ); - indent_puts( "static int yy_doing_yy_more = 0;" ); indent_puts( "static int yy_more_len = 0;" ); indent_puts( "#define yymore() do { yy_more_flag = 1; } while ( 0 )" ); indent_puts( - "#define YY_MORE_ADJ (yy_doing_yy_more ? yy_more_len : 0)" ); + "#define YY_MORE_ADJ yy_more_len" ); } else @@ -1211,8 +1214,7 @@ void make_tables() if ( yymore_used ) { indent_puts( "yy_more_len = 0;" ); - indent_puts( "yy_doing_yy_more = yy_more_flag;" ); - indent_puts( "if ( yy_doing_yy_more )" ); + indent_puts( "if ( yy_more_flag )" ); indent_up(); indent_puts( "{" ); indent_puts( "yy_more_len = yyleng;" ); @@ -1243,7 +1245,7 @@ void make_tables() indent_puts( "if ( yy_act == 0 )" ); indent_up(); indent_puts( - "fprintf( stderr, \"--scanner backtracking\\n\" );" ); + "fprintf( stderr, \"--scanner backing up\\n\" );" ); indent_down(); do_indent(); @@ -1283,7 +1285,7 @@ void make_tables() /* Copy actions to output file. */ skelout(); indent_up(); - gen_bt_action(); + gen_bu_action(); fputs( action, stdout ); /* generate cases for any missing EOF rules */ @@ -1305,7 +1307,7 @@ void make_tables() /* Generate code for handling NUL's, if needed. */ - /* First, deal with backtracking and setting up yy_cp if the scanner + /* First, deal with backing up and setting up yy_cp if the scanner * finds that it should JAM on the NUL> */ skelout(); @@ -1318,7 +1320,7 @@ void make_tables() { /* compressed table */ if ( ! reject && ! interactive ) { - /* Do the guaranteed-needed backtrack to figure + /* Do the guaranteed-needed backing up to figure * out the match. */ indent_puts( "yy_cp = yy_last_accepting_cpos;" ); @@ -1350,5 +1352,7 @@ void make_tables() /* Copy remainder of input to output. */ line_directive_out( stdout ); - (void) flexscan(); /* copy remainder of input to output */ + + if ( sectnum == 3 ) + (void) flexscan(); /* copy remainder of input to output */ } -- cgit v1.2.3 From 15a9f98dda36f5ee3178bd7030aa6e778389c400 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:45:35 +0000 Subject: "backtracking" -> "backing up" got rid of time reports --- main.c | 84 ++++++++++++++++++++++++++++-------------------------------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/main.c b/main.c index 7ff70c8..8b3904a 100644 --- a/main.c +++ b/main.c @@ -47,14 +47,14 @@ static char flex_version[] = FLEX_VERSION; /* declare functions that have forward references */ void flexinit PROTO((int, char**)); -void readin PROTO(()); -void set_up_initial_allocations PROTO(()); +void readin PROTO((void)); +void set_up_initial_allocations PROTO((void)); /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; -int fullspd, gen_line_dirs, performance_report, backtrack_report; +int fullspd, gen_line_dirs, performance_report, backing_up_report; int yytext_is_array, csize; int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; @@ -89,11 +89,11 @@ int numsnpairs, jambase, jamstate; int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; int current_max_ccl_tbl_size; Char *ccltbl; -char *starttime = 0, *endtime, nmstr[MAXLINE]; +char nmstr[MAXLINE]; int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -int num_backtracking, bol_needed; -FILE *backtrack_file; +int num_backing_up, bol_needed; +FILE *backing_up_file; int end_of_buffer_state; char **input_files; int num_input_files; @@ -205,8 +205,6 @@ int exit_status; { int tblsiz; - char *flex_gettime(); - if ( skelfile != NULL ) { if ( ferror( skelfile ) ) @@ -230,25 +228,23 @@ int exit_status; flexfatal( "error occurred when deleting output file" ); } - if ( backtrack_report && backtrack_file ) + if ( backing_up_report && backing_up_file ) { - if ( num_backtracking == 0 ) - fprintf( backtrack_file, "No backtracking.\n" ); + if ( num_backing_up == 0 ) + fprintf( backing_up_file, "No backing up.\n" ); else if ( fullspd || fulltbl ) - fprintf( backtrack_file, - "%d backtracking (non-accepting) states.\n", - num_backtracking ); + fprintf( backing_up_file, + "%d backing up (non-accepting) states.\n", + num_backing_up ); else - fprintf( backtrack_file, - "Compressed tables always backtrack.\n" ); + fprintf( backing_up_file, + "Compressed tables always back up.\n" ); - if ( ferror( backtrack_file ) ) - flexfatal( - "error occurred when writing backtracking file" ); + if ( ferror( backing_up_file ) ) + flexfatal( "error occurred when writing backup file" ); - else if ( fclose( backtrack_file ) ) - flexfatal( - "error occurred when closing backtracking file" ); + else if ( fclose( backing_up_file ) ) + flexfatal( "error occurred when closing backup file" ); } if ( printstats ) @@ -256,16 +252,9 @@ int exit_status; fprintf( stderr, "%s version %s usage statistics:\n", program_name, flex_version ); - if ( starttime ) - { - endtime = flex_gettime(); - fprintf( stderr, " started at %s, finished at %s\n", - starttime, endtime ); - } - fprintf( stderr, " scanner options: -" ); - if ( backtrack_report ) + if ( backing_up_report ) putc( 'b', stderr ); if ( ddebug ) putc( 'd', stderr ); @@ -318,15 +307,15 @@ int exit_status; fprintf( stderr, " %d rules\n", num_rules + num_eof_rules - 1 /* - 1 for def. rule */ ); - if ( num_backtracking == 0 ) - fprintf( stderr, " No backtracking\n" ); + if ( num_backing_up == 0 ) + fprintf( stderr, " No backing up\n" ); else if ( fullspd || fulltbl ) fprintf( stderr, - " %d backtracking (non-accepting) states\n", - num_backtracking ); + " %d backing-up (non-accepting) states\n", + num_backing_up ); else fprintf( stderr, - " compressed tables always backtrack\n" ); + " compressed tables always back-up\n" ); if ( bol_needed ) fprintf( stderr, @@ -417,10 +406,10 @@ char **argv; { int i, sawcmpflag; int csize_given, interactive_given; - char *arg, *prefix, *flex_gettime(), *mktemp(); + char *arg, *prefix, *mktemp(); printstats = syntaxerror = trace = spprdflt = caseins = false; - backtrack_report = ddebug = fulltbl = fullspd = false; + backing_up_report = ddebug = fulltbl = fullspd = false; nowarn = yymore_used = continued_action = reject = false; yytext_is_array = yymore_really_used = reject_really_used = false; gen_line_dirs = usemecs = useecs = true; @@ -439,8 +428,6 @@ char **argv; prefix = "yy"; - starttime = flex_gettime(); - program_name = argv[0]; /* read flags */ @@ -460,7 +447,7 @@ char **argv; break; case 'b': - backtrack_report = true; + backing_up_report = true; break; case 'c': @@ -660,20 +647,20 @@ char **argv; input_files = argv; set_input_file( num_input_files > 0 ? input_files[0] : NULL ); - if ( backtrack_report ) + if ( backing_up_report ) { #ifndef SHORT_FILE_NAMES - backtrack_file = fopen( "lex.backtrack", "w" ); + backing_up_file = fopen( "lex.backup", "w" ); #else - backtrack_file = fopen( "lex.bck", "w" ); + backing_up_file = fopen( "lex.bck", "w" ); #endif - if ( backtrack_file == NULL ) - flexerror( "could not create lex.backtrack" ); + if ( backing_up_file == NULL ) + flexerror( "could not create lex.backup" ); } else - backtrack_file = NULL; + backing_up_file = NULL; lastccl = 0; @@ -696,6 +683,7 @@ char **argv; GEN_PREFIX( "out" ); GEN_PREFIX( "restart" ); GEN_PREFIX( "text" ); + GEN_PREFIX( "_flex_debug" ); printf( "\n" ); } @@ -704,7 +692,7 @@ char **argv; numas = numsnpairs = tmpuses = 0; numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; - num_backtracking = onesp = numprots = 0; + num_backing_up = onesp = numprots = 0; variable_trailing_context_rules = bol_needed = false; linenum = sectnum = 1; @@ -866,7 +854,7 @@ void usage() program_name ); fprintf( stderr, - "\t-b generate backtracking information to lex.backtrack\n" ); + "\t-b generate backing-up information to lex.backup\n" ); fprintf( stderr, "\t-c do-nothing POSIX option\n" ); fprintf( stderr, "\t-d turn on debug mode in generated scanner\n" ); fprintf( stderr, "\t-f generate fast, large scanner\n" ); -- cgit v1.2.3 From 13beeb8acbe3713d6aedd09b86f0669fd25ae794 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 24 Aug 1993 20:46:03 +0000 Subject: Removed a lot of #ifdef chud "backtracking" -> "backing up" --- flexdef.h | 68 ++++++++++++++++++++++----------------------------------------- 1 file changed, 24 insertions(+), 44 deletions(-) diff --git a/flexdef.h b/flexdef.h index a605bf4..859d0ce 100644 --- a/flexdef.h +++ b/flexdef.h @@ -32,16 +32,12 @@ #include #endif -/* Always be prepared to generate an 8-bit scanner. */ -#define FLEX_8_BIT_CHARS +#include +#include -#ifdef FLEX_8_BIT_CHARS +/* Always be prepared to generate an 8-bit scanner. */ #define CSIZE 256 #define Char unsigned char -#else -#define Char char -#define CSIZE 128 -#endif /* Size of input alphabet - should be size of ASCII set. */ #ifndef DEFAULT_CSIZE @@ -56,32 +52,15 @@ #endif #endif - -#ifdef USG -#define SYS_V -#endif - -#ifdef SYS_V -#include -#else - -#include -#ifdef lint -char *sprintf(); /* keep lint happy */ -#endif -#endif - -#ifdef AMIGA -#ifndef abs -#define abs(x) ((x) < 0 ? -(x) : (x)) -#endif -#endif - #ifdef VMS #define unlink delete #define SHORT_FILE_NAMES #endif +#ifdef MS_DOS +#define SHORT_FILE_NAMES +#endif + /* Maximum line length we'll have to deal with. */ #define MAXLINE 2048 @@ -95,14 +74,17 @@ char *sprintf(); /* keep lint happy */ #ifndef max #define max(x,y) ((x) > (y) ? (x) : (y)) #endif - -#ifdef MS_DOS #ifndef abs #define abs(x) ((x) < 0 ? -(x) : (x)) #endif -#define SHORT_FILE_NAMES + + +/* ANSI C does not guarantee that isascii() is defined */ +#ifndef isascii +#define isascii(c) ((c) <= 0177) #endif + #define true 1 #define false 0 @@ -309,7 +291,7 @@ struct hash_entry int int_val; } ; -typedef struct hash_entry *hash_table[]; +typedef struct hash_entry **hash_table; #define NAME_TABLE_HASH_SIZE 101 #define START_COND_HASH_SIZE 101 @@ -338,14 +320,14 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * performance_report - if > 0 (i.e., -p flag), generate a report relating * to scanner performance; if > 1 (-p -p), report on minor performance * problems, too - * backtrack_report - if true (i.e., -b flag), generate "lex.backtrack" file - * listing backtracking states + * backing_up_report - if true (i.e., -b flag), generate "lex.backup" file + * listing backing-up states * yytext_is_array - if true (i.e., %array directive), then declare * yytext as a array instead of a character pointer. Nice and inefficient. * csize - size of character set for the scanner we're generating; * 128 for 7-bit chars and 256 for 8-bit * yymore_used - if true, yymore() is used in input rules - * reject - if true, generate backtracking tables for REJECT macro + * reject - if true, generate back-up tables for REJECT macro * real_reject - if true, scanner really uses REJECT (as opposed to just * having "reject" set for variable trailing context) * continued_action - true if this rule's action is to "fall through" to @@ -357,7 +339,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs; -extern int fullspd, gen_line_dirs, performance_report, backtrack_report; +extern int fullspd, gen_line_dirs, performance_report, backing_up_report; extern int yytext_is_array, csize; extern int yymore_used, reject, real_reject, continued_action; @@ -376,7 +358,7 @@ extern int yymore_really_used, reject_really_used; * skel - compiled-in skeleton array * skel_ind - index into "skel" array, if skelfile is nil * yyin - input file - * backtrack_file - file to summarize backtracking states to + * backing_up_file - file to summarize backing-up states to * infilename - name of input file * input_files - array holding names of input files * num_input_files - size of input_files array @@ -393,7 +375,7 @@ extern int yymore_really_used, reject_really_used; */ extern int datapos, dataline, linenum; -extern FILE *skelfile, *yyin, *backtrack_file; +extern FILE *skelfile, *yyin, *backing_up_file; extern char *skel[]; extern int skel_ind; extern char *infilename; @@ -595,8 +577,6 @@ extern Char *ccltbl; /* Variables for miscellaneous information: - * starttime - real-time when we started - * endtime - real-time when we ended * nmstr - last NAME scanned by the scanner * sectnum - section number currently being parsed * nummt - number of empty nxt/chk table entries @@ -612,14 +592,14 @@ extern Char *ccltbl; * numuniq - number of unique transitions * numdup - number of duplicate transitions * hshsave - number of hash collisions saved by checking number of states - * num_backtracking - number of DFA states requiring back-tracking + * num_backing_up - number of DFA states requiring backing up * bol_needed - whether scanner needs beginning-of-line recognition */ -extern char *starttime, *endtime, nmstr[MAXLINE]; +extern char nmstr[MAXLINE]; extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; -extern int num_backtracking, bol_needed; +extern int num_backing_up, bol_needed; void *allocate_array PROTO((int, int)); void *reallocate_array PROTO((void*, int, int)); @@ -759,7 +739,7 @@ extern void lerrsf PROTO((char[], char[])); extern void line_directive_out PROTO((FILE*)); /* Mark the current position in the action array as the end of the prolog. */ -extern void mark_prolog PROTO(()); +extern void mark_prolog PROTO((void)); /* Generate a data statment for a two-dimensional array. */ extern void mk2data PROTO((int)); -- cgit v1.2.3 From 77e495cb1f563bf3530e2cbd8d6e5697e61b11f7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 16:40:03 +0000 Subject: yy_nxt table should be "const" --- dfa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dfa.c b/dfa.c index b323336..c5d1295 100644 --- a/dfa.c +++ b/dfa.c @@ -532,7 +532,7 @@ void ntod() /* Declare it "short" because it's a real long-shot that that * won't be large enough. */ - printf( "static short int yy_nxt[][%d] =\n {\n", + printf( "static const short yy_nxt[][%d] =\n {\n", /* '}' so vi doesn't get too confused */ num_full_table_rows ); -- cgit v1.2.3 From 5ca409af32ca48e75ccb8285cedae7c80a9ae0bf Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 16:40:25 +0000 Subject: Dump promotion of EOF in section 2 to turn on section 3; instead just treat it like a final EOF --- yylex.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/yylex.c b/yylex.c index 2f6ff67..879ad9e 100644 --- a/yylex.c +++ b/yylex.c @@ -59,12 +59,6 @@ int yylex() toktype = SECTEND; } - else if ( sectnum == 2 ) - { - sectnum = 3; - toktype = 0; - } - else toktype = 0; } -- cgit v1.2.3 From b14be0fbcaf61496a357bfe0e157860729cf67ef Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 16:51:45 +0000 Subject: EOF in section 2 prolog leads to section 0, not section 3 --- scan.l | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scan.l b/scan.l index 9f0e8ca..53b7792 100644 --- a/scan.l +++ b/scan.l @@ -215,8 +215,7 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} <> { MARK_END_OF_PROLOG; - sectnum = 3; - BEGIN(SECT3); + sectnum = 0; yyterminate(); /* to stop the parser */ } -- cgit v1.2.3 From e1ece112deac1b2e56aac1d7c0f44dc01db71a9a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 16:52:05 +0000 Subject: Added %+/%-/%* to skelout() --- misc.c | 56 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/misc.c b/misc.c index 0fc5959..9d3b62b 100644 --- a/misc.c +++ b/misc.c @@ -656,26 +656,52 @@ int size, element_size; */ void skelout() { - if ( skelfile ) - { - char buf[MAXLINE]; + char buf_storage[MAXLINE]; + char *buf = buf_storage; + int do_copy = 1; - while ( fgets( buf, MAXLINE, skelfile ) != NULL ) - if ( buf[0] == '%' && buf[1] == '%' ) - break; - else - fputs( buf, stdout ); - } - - else + /* Loop pulling lines either from the skelfile, if we're using + * one, or from the skel[] array. + */ + while ( skelfile ? + (fgets( buf, MAXLINE, skelfile ) != NULL) : + ((buf = skel[skel_ind++]) != 0) ) { /* copy from skel array */ - char *buf; + if ( buf[0] == '%' ) + { /* control line */ + switch ( buf[1] ) + { + case '%': + return; - while ( (buf = skel[skel_ind++]) ) - if ( buf[0] == '%' && buf[1] == '%' ) - break; + case '+': + do_copy = C_plus_plus; + break; + + case '-': + do_copy = ! C_plus_plus; + break; + + case '*': + do_copy = 1; + break; + + default: + flexfatal( + "bad line in skeleton file" ); + } + } + + else if ( do_copy ) + { + if ( skelfile ) + /* Skeleton file reads include final + * newline, skel[] array does not. + */ + fputs( buf, stdout ); else printf( "%s\n", buf ); + } } } -- cgit v1.2.3 From 32831551c272bb9cb47c0a003c4bc5872bae07ad Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 16:59:54 +0000 Subject: Added -+ option, updated usage() output, rearranged some generated code to come at the right point in the output for yyflexlexer.h. --- main.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/main.c b/main.c index 8b3904a..0812ac7 100644 --- a/main.c +++ b/main.c @@ -55,7 +55,7 @@ void set_up_initial_allocations PROTO((void)); int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; int fullspd, gen_line_dirs, performance_report, backing_up_report; -int yytext_is_array, csize; +int C_plus_plus, yytext_is_array, csize; int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; int datapos, dataline, linenum; @@ -254,6 +254,8 @@ int exit_status; fprintf( stderr, " scanner options: -" ); + if ( C_plus_plus ) + putc( '+', stderr ); if ( backing_up_report ) putc( 'b', stderr ); if ( ddebug ) @@ -409,7 +411,7 @@ char **argv; char *arg, *prefix, *mktemp(); printstats = syntaxerror = trace = spprdflt = caseins = false; - backing_up_report = ddebug = fulltbl = fullspd = false; + C_plus_plus = backing_up_report = ddebug = fulltbl = fullspd = false; nowarn = yymore_used = continued_action = reject = false; yytext_is_array = yymore_really_used = reject_really_used = false; gen_line_dirs = usemecs = useecs = true; @@ -441,6 +443,10 @@ char **argv; for ( i = 1; arg[i] != '\0'; ++i ) switch ( arg[i] ) { + case '+': + C_plus_plus = true; + break; + case 'B': interactive = false; interactive_given = true; @@ -684,9 +690,11 @@ char **argv; GEN_PREFIX( "restart" ); GEN_PREFIX( "text" ); GEN_PREFIX( "_flex_debug" ); + GEN_PREFIX( "FlexLexer" ); printf( "\n" ); } + lastdfa = lastnfa = 0; num_rules = num_eof_rules = default_rule = 0; numas = numsnpairs = tmpuses = 0; @@ -746,8 +754,46 @@ void readin() skelout(); + if ( yytext_is_array ) + { + if ( C_plus_plus ) + flexerror( "%array and C++ scanners are incompatible" ); + + puts( "extern char yytext[];\n" ); + puts( "#ifndef YYLMAX" ); + puts( "#define YYLMAX YY_READ_BUF_SIZE" ); + puts( "#endif YYLMAX\n" ); + puts( "char yytext[YYLMAX];" ); + puts( "YY_CHAR *yytext_ptr;" ); + } + + else + { + if ( ! C_plus_plus ) + { + puts( "extern YY_CHAR *yytext;" ); + puts( "YY_CHAR *yytext;" ); + } + + puts( "#define yytext_ptr yytext" ); + } + + if ( fullspd ) + printf( + "typedef const struct yy_trans_info *yy_state_type;\n" ); + else + printf( "typedef int yy_state_type;\n" ); + + if ( reject ) + printf( "\n#define YY_USES_REJECT\n" ); + + if ( C_plus_plus ) + printf( "\n#include \"yyflexlexer.h\"\n" ); + if ( ddebug ) - puts( "#define FLEX_DEBUG" ); + puts( "\n#define FLEX_DEBUG" ); + + skelout(); line_directive_out( stdout ); @@ -768,23 +814,6 @@ void readin() if ( useecs ) ccl2ecl(); - - if ( yytext_is_array ) - { - puts( "extern char yytext[];\n" ); - puts( "#ifndef YYLMAX" ); - puts( "#define YYLMAX YY_READ_BUF_SIZE" ); - puts( "#endif YYLMAX\n" ); - puts( "char yytext[YYLMAX];" ); - puts( "YY_CHAR *yytext_ptr;" ); - } - - else - { - puts( "extern YY_CHAR *yytext;" ); - puts( "YY_CHAR *yytext;" ); - puts( "#define yytext_ptr yytext" ); - } } @@ -850,7 +879,7 @@ void set_up_initial_allocations() void usage() { fprintf( stderr, - "%s [-bcdfhinpstvwBFILTV78 -C[efmF] -Sskeleton] [filename ...]\n", + "%s [-bcdfhinpstvwBFILTV78+ -C[efmF] -Pprefix -Sskeleton] [file ...]\n", program_name ); fprintf( stderr, @@ -879,6 +908,7 @@ void usage() fprintf( stderr, "\t-V report %s version\n", program_name ); fprintf( stderr, "\t-7 generate 7-bit scanner\n" ); fprintf( stderr, "\t-8 generate 8-bit scanner\n" ); + fprintf( stderr, "\t-+ generate C++ scanner class\n" ); fprintf( stderr, "\t-C specify degree of table compression (default is -Cem):\n" ); fprintf( stderr, "\t\t-Ce construct equivalence classes\n" ); @@ -887,5 +917,6 @@ void usage() fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" ); fprintf( stderr, "\t\t-CF do not compress scanner tables; use -F representation\n" ); + fprintf( stderr, "\t-P specify scanner prefix other than \"yy\"\n" ); fprintf( stderr, "\t-S specify non-default skeleton file\n" ); } -- cgit v1.2.3 From 2a7cdd6f5568d261ecc682e0a20b0f60501e85a9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 17:10:57 +0000 Subject: yy_state_type declared earlier. Made a bunch of statics only output if not -+ --- gen.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/gen.c b/gen.c index c51de78..424d321 100644 --- a/gen.c +++ b/gen.c @@ -1084,14 +1084,8 @@ void make_tables() indent_put2s( "%s yy_nxt;", trans_offset_type ); indent_puts( "};" ); indent_down(); - - indent_puts( - "typedef const struct yy_trans_info *yy_state_type;" ); } - else - indent_puts( "typedef int yy_state_type;" ); - if ( fullspd ) genctbl(); else if ( fulltbl ) @@ -1105,8 +1099,13 @@ void make_tables() */ if ( num_backing_up > 0 && ! reject ) { - indent_puts( "static yy_state_type yy_last_accepting_state;" ); - indent_puts( "static YY_CHAR *yy_last_accepting_cpos;\n" ); + if ( ! C_plus_plus ) + { + indent_puts( + "static yy_state_type yy_last_accepting_state;" ); + indent_puts( + "static YY_CHAR *yy_last_accepting_cpos;\n" ); + } } if ( nultrans ) @@ -1138,16 +1137,24 @@ void make_tables() if ( reject ) { /* Declare state buffer variables. */ - puts( + if ( ! C_plus_plus ) + { + puts( "static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); - puts( "static YY_CHAR *yy_full_match;" ); - puts( "static int yy_lp;" ); + puts( "static YY_CHAR *yy_full_match;" ); + puts( "static int yy_lp;" ); + } if ( variable_trailing_context_rules ) { - puts( "static int yy_looking_for_trail_begin = 0;" ); - puts( "static int yy_full_lp;" ); - puts( "static int *yy_full_state;" ); + if ( ! C_plus_plus ) + { + puts( + "static int yy_looking_for_trail_begin = 0;" ); + puts( "static int yy_full_lp;" ); + puts( "static int *yy_full_state;" ); + } + printf( "#define YY_TRAILING_MASK 0x%x\n", (unsigned int) YY_TRAILING_MASK ); printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", @@ -1188,8 +1195,12 @@ void make_tables() if ( yymore_used ) { - indent_puts( "static int yy_more_flag = 0;" ); - indent_puts( "static int yy_more_len = 0;" ); + if ( ! C_plus_plus ) + { + indent_puts( "static int yy_more_flag = 0;" ); + indent_puts( "static int yy_more_len = 0;" ); + } + indent_puts( "#define yymore() do { yy_more_flag = 1; } while ( 0 )" ); indent_puts( -- cgit v1.2.3 From 814985ad3cc4fac18bd23aee555610c50692b816 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 17:11:24 +0000 Subject: First version of C/C++ skeleton --- flex.skl | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 82 insertions(+), 10 deletions(-) diff --git a/flex.skl b/flex.skl index 2973b31..cee89c1 100644 --- a/flex.skl +++ b/flex.skl @@ -73,7 +73,11 @@ * we now use fwrite(). */ #ifndef ECHO +%- Standard (non-C++) definition #define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout ) +%+ C++ definition +#define ECHO LexerOutput( (const char *) yytext, yyleng ) +%* #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, @@ -81,7 +85,11 @@ */ #ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ +%- Standard (non-C++) definition if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ +%+ C++ definition + if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ +%* YY_FATAL_ERROR( "read() in flex scanner failed" ); #endif @@ -137,7 +145,11 @@ * easily add parameters. */ #ifndef YY_DECL +%- Standard (non-C++) definition #define YY_DECL int yylex YY_PROTO(( void )) +%+ C++ definition +#define YY_DECL int YY_MYCLASS::yylex() +%* #endif /* Code executed at the end of each rule. */ @@ -156,9 +168,15 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; extern int yyleng; extern FILE *yyin, *yyout; -extern void *yy_flex_alloc YY_PROTO(( int )); -extern void yy_flex_free YY_PROTO(( void * )); -extern int yywrap YY_PROTO(( void )); +#ifdef __cplusplus +extern "C" { +#endif + extern void *yy_flex_alloc YY_PROTO(( int )); + extern void yy_flex_free YY_PROTO(( void * )); + extern int yywrap YY_PROTO(( void )); +#ifdef __cplusplus + } +#endif #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 @@ -207,7 +225,9 @@ struct yy_buffer_state #define EOF_PENDING 1 }; +%- Standard (non-C++) definition static YY_BUFFER_STATE yy_current_buffer = 0; +%* /* We provide macros for accessing buffer states in case in the * future we want to put the buffer states in a more general @@ -216,6 +236,7 @@ static YY_BUFFER_STATE yy_current_buffer = 0; #define YY_CURRENT_BUFFER yy_current_buffer +%- Standard (non-C++) definition /* yy_hold_char holds the character lost when yytext is formed. */ static YY_CHAR yy_hold_char; @@ -243,16 +264,27 @@ void yy_load_buffer_state YY_PROTO(( void )); YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +%* #define yy_new_buffer yy_create_buffer +%% declarations of yytext/yytext_ptr (and C++ include, if used) go here + +%- Standard (non-C++) definition #ifdef __cplusplus static int yyinput YY_PROTO(( void )); #else static int input YY_PROTO(( void )); #endif +%* + +%% section 1 definitions go here -%% section 1 definitions and declarations of yytext/yytext_ptr go here +%- Standard (non-C++) definition +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +%* /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -267,10 +299,6 @@ static int input YY_PROTO(( void )); %% data tables for the DFA go here -static yy_state_type yy_get_previous_state YY_PROTO(( void )); -static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); -static int yy_get_next_buffer YY_PROTO(( void )); - YY_DECL { register yy_state_type yy_current_state; @@ -465,7 +493,11 @@ do_action: /* This label is used only to access EOF actions. */ * EOB_ACT_END_OF_FILE - end of file */ +%- static int yy_get_next_buffer() +%+ +int yyFlexLexer::yy_get_next_buffer() +%* { register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; register YY_CHAR *source = yytext_ptr - 1; /* copy prev. char, too */ @@ -543,7 +575,11 @@ static int yy_get_next_buffer() /* yy_get_previous_state - get the state just before the EOB char was reached */ +%- static yy_state_type yy_get_previous_state() +%+ +yy_state_type yyFlexLexer::yy_get_previous_state() +%* { register yy_state_type yy_current_state; register YY_CHAR *yy_cp; @@ -565,12 +601,16 @@ static yy_state_type yy_get_previous_state() * next_state = yy_try_NUL_trans( current_state ); */ +%- #ifdef YY_USE_PROTOS -static yy_state_type yy_try_NUL_trans( register yy_state_type yy_current_state ) +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) #else static yy_state_type yy_try_NUL_trans( yy_current_state ) -register yy_state_type yy_current_state; +yy_state_type yy_current_state; #endif +%+ +yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state ) +%* { register int yy_is_jam; %% code to find the next state, and perhaps do backing up, goes here @@ -579,6 +619,7 @@ register yy_state_type yy_current_state; } +%- #ifdef YY_USE_PROTOS static void yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) #else @@ -586,6 +627,9 @@ static void yyunput( c, yy_bp ) YY_CHAR c; register YY_CHAR *yy_bp; #endif +%+ +void yyFlexLexer::yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) +%* { register YY_CHAR *yy_cp = yy_c_buf_p; @@ -624,11 +668,15 @@ register YY_CHAR *yy_bp; } +%- #ifdef __cplusplus static int yyinput() #else static int input() #endif +%+ +int yyFlexLexer::yyinput() +%* { int c; @@ -692,12 +740,16 @@ static int input() } +%- #ifdef YY_USE_PROTOS void yyrestart( FILE *input_file ) #else void yyrestart( input_file ) FILE *input_file; #endif +%+ +void yyFlexLexer::yyrestart( FILE *input_file ) +%* { if ( ! yy_current_buffer ) yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); @@ -707,12 +759,16 @@ FILE *input_file; } +%- #ifdef YY_USE_PROTOS void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) #else void yy_switch_to_buffer( new_buffer ) YY_BUFFER_STATE new_buffer; #endif +%+ +void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +%* { if ( yy_current_buffer == new_buffer ) return; @@ -737,11 +793,15 @@ YY_BUFFER_STATE new_buffer; } +%- #ifdef YY_USE_PROTOS void yy_load_buffer_state( void ) #else void yy_load_buffer_state() #endif +%+ +void yyFlexLexer::yy_load_buffer_state() +%* { yy_n_chars = yy_current_buffer->yy_n_chars; yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; @@ -750,6 +810,7 @@ void yy_load_buffer_state() } +%- #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) #else @@ -757,6 +818,9 @@ YY_BUFFER_STATE yy_create_buffer( file, size ) FILE *file; int size; #endif +%+ +YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( FILE* file, int size ) +%* { YY_BUFFER_STATE b; @@ -782,12 +846,16 @@ int size; } +%- #ifdef YY_USE_PROTOS void yy_delete_buffer( YY_BUFFER_STATE b ) #else void yy_delete_buffer( b ) YY_BUFFER_STATE b; #endif +%+ +void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b ) +%* { if ( b == yy_current_buffer ) yy_current_buffer = (YY_BUFFER_STATE) 0; @@ -797,6 +865,7 @@ YY_BUFFER_STATE b; } +%- #ifdef YY_USE_PROTOS void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) #else @@ -804,6 +873,9 @@ void yy_init_buffer( b, file ) YY_BUFFER_STATE b; FILE *file; #endif +%+ +void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, FILE* file ) +%* { b->yy_input_file = file; -- cgit v1.2.3 From d2bf08fb3fbaf1b52696f05e494138419515c020 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 25 Aug 1993 17:11:42 +0000 Subject: Added C_plus_plus flag. --- flexdef.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flexdef.h b/flexdef.h index 859d0ce..17b742c 100644 --- a/flexdef.h +++ b/flexdef.h @@ -322,6 +322,8 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * problems, too * backing_up_report - if true (i.e., -b flag), generate "lex.backup" file * listing backing-up states + * C_plus_plus - if true (i.e., -+ flag), generate a C++ scanner class; + * otherwise, a standard C scanner * yytext_is_array - if true (i.e., %array directive), then declare * yytext as a array instead of a character pointer. Nice and inefficient. * csize - size of character set for the scanner we're generating; @@ -340,7 +342,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs; extern int fullspd, gen_line_dirs, performance_report, backing_up_report; -extern int yytext_is_array, csize; +extern int C_plus_plus, yytext_is_array, csize; extern int yymore_used, reject, real_reject, continued_action; #define REALLY_NOT_DETERMINED 0 -- cgit v1.2.3 From 77210a1d93980ae1fcaa2ce34d1de42780aea2fb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Sep 1993 20:27:27 +0000 Subject: Fixed bugs regarding %{%} code in section 2 prolog %array not allowed with C++ scanners --- scan.l | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/scan.l b/scan.l index 53b7792..0099043 100644 --- a/scan.l +++ b/scan.l @@ -107,12 +107,19 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} ^"%%".* { sectnum = 2; line_directive_out( stdout ); + bracelevel = 0; BEGIN(SECT2PROLOG); return SECTEND; } ^"%pointer".*{NL} ++linenum; yytext_is_array = false; -^"%array".*{NL} ++linenum; yytext_is_array = true; +^"%array".*{NL} { + if ( C_plus_plus ) + warn( "%array incompatible with -+ option" ); + else + yytext_is_array = true; + ++linenum; + } ^"%used" { warn( "%used/%unused have been deprecated" ); @@ -204,14 +211,24 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} {NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); -.*{NL}/{NOT_WS} { - ++linenum; - ACTION_ECHO; - MARK_END_OF_PROLOG; - BEGIN(SECT2); +^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ +^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ + +^{WS}.* ACTION_ECHO; /* indented code in prolog */ + +^{NOT_WS}.* { /* non-indented code */ + if ( bracelevel <= 0 ) + { /* not in %{ ... %} */ + yyless( 0 ); /* put it all back */ + MARK_END_OF_PROLOG; + BEGIN(SECT2); + } + else + ACTION_ECHO; } -.*{NL} ++linenum; ACTION_ECHO; +.* ACTION_ECHO; +{NL} ++linenum; ACTION_ECHO; <> { MARK_END_OF_PROLOG; -- cgit v1.2.3 From fa6761a877403cecaf0166642ca103b12fae868c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Sep 1993 20:28:08 +0000 Subject: %array not allowed with C++ scanners --- main.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/main.c b/main.c index 0812ac7..c5d1df3 100644 --- a/main.c +++ b/main.c @@ -754,29 +754,8 @@ void readin() skelout(); - if ( yytext_is_array ) - { - if ( C_plus_plus ) - flexerror( "%array and C++ scanners are incompatible" ); - - puts( "extern char yytext[];\n" ); - puts( "#ifndef YYLMAX" ); - puts( "#define YYLMAX YY_READ_BUF_SIZE" ); - puts( "#endif YYLMAX\n" ); - puts( "char yytext[YYLMAX];" ); - puts( "YY_CHAR *yytext_ptr;" ); - } - - else - { - if ( ! C_plus_plus ) - { - puts( "extern YY_CHAR *yytext;" ); - puts( "YY_CHAR *yytext;" ); - } - + if ( C_plus_plus ) puts( "#define yytext_ptr yytext" ); - } if ( fullspd ) printf( @@ -814,6 +793,26 @@ void readin() if ( useecs ) ccl2ecl(); + + if ( ! C_plus_plus ) + { + if ( yytext_is_array ) + { + puts( "extern char yytext[];\n" ); + puts( "#ifndef YYLMAX" ); + puts( "#define YYLMAX YY_READ_BUF_SIZE" ); + puts( "#endif YYLMAX\n" ); + puts( "char yytext[YYLMAX];" ); + puts( "YY_CHAR *yytext_ptr;" ); + } + + else + { + puts( "extern YY_CHAR *yytext;" ); + puts( "YY_CHAR *yytext;" ); + puts( "#define yytext_ptr yytext" ); + } + } } -- cgit v1.2.3 From bfb31788644adad45ff72670a07fd9e504431b69 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Sep 1993 20:31:48 +0000 Subject: nuked static RCS string --- ccl.c | 5 +---- dfa.c | 5 +---- ecs.c | 5 +---- gen.c | 5 +---- main.c | 5 +---- misc.c | 5 +---- nfa.c | 5 +---- parse.y | 5 +---- scan.l | 5 +---- sym.c | 5 +---- tblcmp.c | 5 +---- yylex.c | 5 +---- 12 files changed, 12 insertions(+), 48 deletions(-) diff --git a/ccl.c b/ccl.c index 2929fb9..215ae6b 100644 --- a/ccl.c +++ b/ccl.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/dfa.c b/dfa.c index c5d1295..7afd879 100644 --- a/dfa.c +++ b/dfa.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/ecs.c b/ecs.c index ab6fadc..cc4e955 100644 --- a/ecs.c +++ b/ecs.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/gen.c b/gen.c index 424d321..5a6b487 100644 --- a/gen.c +++ b/gen.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/main.c b/main.c index c5d1df3..a1a08fe 100644 --- a/main.c +++ b/main.c @@ -32,10 +32,7 @@ char copyright[] = All rights reserved.\n"; #endif /* not lint */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/misc.c b/misc.c index 9d3b62b..267e44a 100644 --- a/misc.c +++ b/misc.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/nfa.c b/nfa.c index f2e4849..b64bb21 100644 --- a/nfa.c +++ b/nfa.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/parse.y b/parse.y index c70d7d6..257d964 100644 --- a/parse.y +++ b/parse.y @@ -29,10 +29,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/scan.l b/scan.l index 0099043..7d3c76e 100644 --- a/scan.l +++ b/scan.l @@ -27,10 +27,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" #include "parse.h" diff --git a/sym.c b/sym.c index 0c1ad80..3583aab 100644 --- a/sym.c +++ b/sym.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/tblcmp.c b/tblcmp.c index 28d61a7..654f95c 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include "flexdef.h" diff --git a/yylex.c b/yylex.c index 879ad9e..49e6b74 100644 --- a/yylex.c +++ b/yylex.c @@ -26,10 +26,7 @@ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#ifndef lint -static char rcsid[] = - "@(#) $Header$ (LBL)"; -#endif +/* $Header$ */ #include #include "flexdef.h" -- cgit v1.2.3 From 56f154bc0d0ea593a84554bb6497495f15aea3e7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Sep 1993 20:38:58 +0000 Subject: Delete prototypes for Unix system calls. --- flexdef.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/flexdef.h b/flexdef.h index 17b742c..b01b4a6 100644 --- a/flexdef.h +++ b/flexdef.h @@ -883,10 +883,3 @@ extern void stack1 PROTO((int, int, int, int)); /* from file yylex.c */ extern int yylex PROTO((void)); - - -/* The Unix system calls used here. */ - -extern int read PROTO((int, char*, int)); -extern int unlink PROTO((char*)); -extern int write PROTO((int, char*, int)); -- cgit v1.2.3 From 2496b2552f0ab644222ca54231c876ff56c81bcb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Sep 1993 20:53:46 +0000 Subject: Initial revision --- FlexLexer.h | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 FlexLexer.h diff --git a/FlexLexer.h b/FlexLexer.h new file mode 100644 index 0000000..7ff494f --- /dev/null +++ b/FlexLexer.h @@ -0,0 +1,132 @@ +// $Header$ + +// FlexLexer.h -- define a base class for lexical analyzers generated by flex + +// Copyright (c) 1993 The Regents of the University of California. +// All rights reserved. +// +// This code is derived from software contributed to Berkeley by +// Kent Williams. +// +// Redistribution and use in source and binary forms are permitted provided +// that: (1) source distributions retain this entire copyright notice and +// comment, and (2) distributions including binaries display the following +// acknowledgement: ``This product includes software developed by the +// University of California, Berkeley and its contributors'' in the +// documentation or other materials provided with the distribution and in +// all advertising materials mentioning features or use of this software. +// Neither the name of the University nor the names of its contributors may +// be used to endorse or promote products derived from this software without +// specific prior written permission. +// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +#ifndef __FLEXCXX_H +#define __FLEXCXX_H + +#include + + +class yyFlexLexer { + public: + yyFlexLexer( FILE* arg_yyin = 0, FILE* arg_yyout = 0 ) + { + yyin = arg_yyin; + yyout = arg_yyout; + yy_c_buf_p = (YY_CHAR*) 0; + yy_init = 1; + yy_start = 0; + + yy_did_buffer_switch_on_eof = 0; + + yy_looking_for_trail_begin = 0; + yy_more_flag = 0; + yy_more_len = 0; + + yy_current_buffer = 0; + +#ifdef YY_USES_REJECT + yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2]; +#else + yy_state_buf = 0; +#endif + } + + virtual ~yyFlexLexer() + { + delete yy_state_buf; + } + + void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ); + YY_BUFFER_STATE yy_create_buffer( FILE* file, int size ); + void yy_delete_buffer( YY_BUFFER_STATE b ); + void yyrestart( FILE *input_file ); + + virtual int yylex() = 0; + + protected: + virtual int LexerInput( char* buf, int max_size ) + { + return read( fileno(yyin), buf, max_size ); + } + + virtual void LexerOutput( const char* buf, int size ) + { + (void) fwrite( (char*) buf, size, 1, yyout ); + } + + void yyunput( YY_CHAR c, YY_CHAR* buf_ptr ); + int yyinput(); + + void yy_load_buffer_state(); + void yy_init_buffer( YY_BUFFER_STATE b, FILE* file ); + + yy_state_type yy_get_previous_state(); + yy_state_type yy_try_NUL_trans( yy_state_type current_state ); + int yy_get_next_buffer(); + + FILE* yyin; // input source for default LexerInput + FILE* yyout; // output sink for default LexerOutput + + YY_BUFFER_STATE yy_current_buffer; + + // yy_hold_char holds the character lost when yytext is formed. + YY_CHAR yy_hold_char; + + // Number of characters read into yy_ch_buf. + int yy_n_chars; + + YY_CHAR* yytext; + int yyleng; + + // Points to current character in buffer. + YY_CHAR* yy_c_buf_p; + + int yy_init; // whether we need to initialize + int yy_start; // start state number + + // Flag which is used to allow yywrap()'s to do buffer switches + // instead of setting up a fresh yyin. A bit of a hack ... + int yy_did_buffer_switch_on_eof; + + // The following are not always needed, but may be depending + // on use of certain flex features (like REJECT or yymore()). + + yy_state_type yy_last_accepting_state; + YY_CHAR* yy_last_accepting_cpos; + + yy_state_type* yy_state_buf; + yy_state_type* yy_state_ptr; + + YY_CHAR* yy_full_match; + int* yy_full_state; + int yy_full_lp; + + int yy_lp; + int yy_looking_for_trail_begin; + + int yy_more_flag; + int yy_more_len; +}; +#endif -- cgit v1.2.3 From b70dc5d68d0a9bcef3b4539c6b07ce89533c8fda Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:42:09 +0000 Subject: minor lint tweak --- nfa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nfa.c b/nfa.c index b64bb21..9be63e1 100644 --- a/nfa.c +++ b/nfa.c @@ -58,7 +58,7 @@ int mach, accepting_number; { int astate = mkstate( SYM_EPSILON ); accptnum[astate] = accepting_number; - mach = link_machines( mach, astate ); + (void) link_machines( mach, astate ); } } -- cgit v1.2.3 From 1748e3c90f3547f5b0ddfe2a798f31a4b625e7d6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:42:18 +0000 Subject: YYSTYPE #define'd to int --- parse.y | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/parse.y b/parse.y index 257d964..46fc7f2 100644 --- a/parse.y +++ b/parse.y @@ -43,6 +43,14 @@ void yyerror(); static int madeany = false; /* whether we've made the '.' character class */ int previous_continued_action; /* whether the previous rule's action was '|' */ +/* On some over-ambitious machines, such as DEC Alpha's, the default + * token type is "long" instead of "int"; this leads to problems with + * declaring yylval in flexdef.h. But so far, all the yacc's I've seen + * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the + * following should ensure that the default token type is "int". + */ +#define YYSTYPE int + %} %% -- cgit v1.2.3 From 4c75cda990110a078be146036f803b34cc657e0d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:42:48 +0000 Subject: PC lint tweak --- scan.l | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scan.l b/scan.l index 7d3c76e..02d6dce 100644 --- a/scan.l +++ b/scan.l @@ -354,11 +354,11 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} } } -[/|*+?.()] return yytext[0]; +[/|*+?.()] return (int) yytext[0]; . RETURNCHAR; -[,*] return yytext[0]; +[,*] return (int) yytext[0]; ">" BEGIN(SECT2); return '>'; ">"/^ BEGIN(CARETISBOL); return '>'; {SCNAME} RETURNNAME; -- cgit v1.2.3 From 073a354e8dab725d7bee3048504fc10168970e61 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:43:49 +0000 Subject: Split into two classes, one fully abstract. yylex() no longer abstract in yyFlexLexer --- FlexLexer.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/FlexLexer.h b/FlexLexer.h index 7ff494f..f8e0d35 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -28,7 +28,29 @@ #include -class yyFlexLexer { +// This file defines two classes. The first, FlexLexer, is an abstract +// class which specifies the external interface provided to flex C++ +// lexer objects. The second, yyFlexLexer, fills out most of the meat +// of the lexer class; its internals may vary from lexer to lexer +// depending on things like whether REJECT is used, and the type +// of YY_CHAR. If you want to create multiple lexer classes, you +// use the -P flag to rename each yyFlexLexer to some other xxFlexLexer. + + +class FlexLexer { + public: + virtual ~FlexLexer() { } + + virtual void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) = 0; + virtual YY_BUFFER_STATE yy_create_buffer( FILE* file, int size ) = 0; + virtual void yy_delete_buffer( YY_BUFFER_STATE b ) = 0; + virtual void yyrestart( FILE *input_file ) = 0; + + virtual int yylex() = 0; +}; + + +class yyFlexLexer : public FlexLexer { public: yyFlexLexer( FILE* arg_yyin = 0, FILE* arg_yyout = 0 ) { @@ -63,7 +85,7 @@ class yyFlexLexer { void yy_delete_buffer( YY_BUFFER_STATE b ); void yyrestart( FILE *input_file ); - virtual int yylex() = 0; + virtual int yylex(); protected: virtual int LexerInput( char* buf, int max_size ) -- cgit v1.2.3 From 60497fb3c585da250e996fd26bec0f89cd8d1199 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:44:17 +0000 Subject: Minor portability tweaks --- misc.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/misc.c b/misc.c index 267e44a..96cff93 100644 --- a/misc.c +++ b/misc.c @@ -167,7 +167,7 @@ void check_char( int c ) Char clower( c ) register int c; { - return (isascii( c ) && isupper( c )) ? tolower( c ) : c; + return (Char) ((isascii( c ) && isupper( c )) ? tolower( c ) : c); } @@ -496,17 +496,19 @@ Char array[]; switch ( array[1] ) { -#ifdef __STDC__ - case 'a': return '\a'; -#else - case 'a': return '\007'; -#endif case 'b': return '\b'; case 'f': return '\f'; case 'n': return '\n'; case 'r': return '\r'; case 't': return '\t'; + +#ifdef __STDC__ + case 'a': return '\a'; case 'v': return '\v'; +#else + case 'a': return '\007'; + case 'v': return '\013'; +#endif case '0': case '1': @@ -593,15 +595,16 @@ register int c; { switch ( c ) { -#ifdef __STDC__ - case '\a': return "\\a"; -#endif case '\b': return "\\b"; case '\f': return "\\f"; case '\n': return "\\n"; case '\r': return "\\r"; case '\t': return "\\t"; + +#ifdef __STDC__ + case '\a': return "\\a"; case '\v': return "\\v"; +#endif default: (void) sprintf( rform, "\\%.3o", -- cgit v1.2.3 From b6d65fcb6884f914a4fbf3c17a61206ea5b9c81a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:44:55 +0000 Subject: Added YY_START changed yyFlexLexer to define yylex() --- flex.skl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/flex.skl b/flex.skl index cee89c1..c5616eb 100644 --- a/flex.skl +++ b/flex.skl @@ -133,6 +133,11 @@ */ #define BEGIN yy_start = 1 + 2 * +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. + */ +#define YY_START ((yy_start - 1) / 2) + /* Action number for EOF rule of a given start state. */ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) @@ -148,7 +153,7 @@ %- Standard (non-C++) definition #define YY_DECL int yylex YY_PROTO(( void )) %+ C++ definition -#define YY_DECL int YY_MYCLASS::yylex() +#define YY_DECL int yyFlexLexer::yylex() %* #endif @@ -438,8 +443,7 @@ do_action: /* This label is used only to access EOF actions. */ */ yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; - yy_act = YY_STATE_EOF( - (yy_start - 1) / 2); + yy_act = YY_STATE_EOF(YY_START); goto do_action; } -- cgit v1.2.3 From 4247b750761a5dcae589046c82a86b3cb89a845c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:45:18 +0000 Subject: Added start condition to EOF trace output --- gen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gen.c b/gen.c index 5a6b487..164081b 100644 --- a/gen.c +++ b/gen.c @@ -1283,7 +1283,8 @@ void make_tables() do_indent(); printf( "else\n" ); indent_up(); - indent_puts( "fprintf( stderr, \"--EOF\\n\" );" ); + indent_puts( + "fprintf( stderr, \"--EOF (start condition %d)\\n\", YY_START );" ); indent_down(); indent_puts( "}" ); -- cgit v1.2.3 From 8630c095da1baa2e4ecebf1efa51f088d30d157e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:45:33 +0000 Subject: yyflexlexer.h -> FlexLexer.h minor portability tweak --- main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.c b/main.c index a1a08fe..de62516 100644 --- a/main.c +++ b/main.c @@ -202,6 +202,8 @@ int exit_status; { int tblsiz; + int unlink(); + if ( skelfile != NULL ) { if ( ferror( skelfile ) ) @@ -764,7 +766,7 @@ void readin() printf( "\n#define YY_USES_REJECT\n" ); if ( C_plus_plus ) - printf( "\n#include \"yyflexlexer.h\"\n" ); + printf( "\n#include \"FlexLexer.h\"\n" ); if ( ddebug ) puts( "\n#define FLEX_DEBUG" ); -- cgit v1.2.3 From deaf7728db5b42cd486538e94ac5ba48b74ad041 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 21 Sep 1993 20:45:51 +0000 Subject: Nuked FILENAMESIZE --- flexdef.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/flexdef.h b/flexdef.h index b01b4a6..81a1fe1 100644 --- a/flexdef.h +++ b/flexdef.h @@ -65,9 +65,6 @@ /* Maximum line length we'll have to deal with. */ #define MAXLINE 2048 -/* Maximum size of file name. */ -#define FILENAMESIZE 1024 - #ifndef min #define min(x,y) ((x) < (y) ? (x) : (y)) #endif -- cgit v1.2.3 From aad7b2b167fb0d36a01fa8d38a80ac2c8689f70c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 13:36:08 +0000 Subject: formfeed no longer considered whitespace --- scan.l | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scan.l b/scan.l index 02d6dce..c91b792 100644 --- a/scan.l +++ b/scan.l @@ -64,9 +64,9 @@ %x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT %x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 -WS [ \t\f]+ -OPTWS [ \t\f]* -NOT_WS [^ \t\f\n] +WS [ \t]+ +OPTWS [ \t]* +NOT_WS [^ \t\n] NL \n|\r\n|\n\r -- cgit v1.2.3 From e1cffb75bcf9e136cf9947834382344a8d6d0bb8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 13:37:14 +0000 Subject: Added -a option for long-align. --- dfa.c | 8 ++++---- flexdef.h | 3 ++- gen.c | 36 ++++++++++++++++++++++++------------ main.c | 15 ++++++++++++--- 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/dfa.c b/dfa.c index 7afd879..4016e85 100644 --- a/dfa.c +++ b/dfa.c @@ -526,12 +526,12 @@ void ntod() */ num_full_table_rows = numecs + 1; - /* Declare it "short" because it's a real long-shot that that - * won't be large enough. + /* Unless -a, declare it "short" because it's a real + * long-shot that that won't be large enough. */ - printf( "static const short yy_nxt[][%d] =\n {\n", + printf( "static const %s yy_nxt[][%d] =\n {\n", /* '}' so vi doesn't get too confused */ - num_full_table_rows ); + long_align ? "long" : "short", num_full_table_rows ); /* Generate 0 entries for state #0. */ for ( i = 0; i < num_full_table_rows; ++i ) diff --git a/flexdef.h b/flexdef.h index 81a1fe1..028d516 100644 --- a/flexdef.h +++ b/flexdef.h @@ -321,6 +321,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * listing backing-up states * C_plus_plus - if true (i.e., -+ flag), generate a C++ scanner class; * otherwise, a standard C scanner + * long_align - if true (-a flag), favor long-word alignment. * yytext_is_array - if true (i.e., %array directive), then declare * yytext as a array instead of a character pointer. Nice and inefficient. * csize - size of character set for the scanner we're generating; @@ -339,7 +340,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs; extern int fullspd, gen_line_dirs, performance_report, backing_up_report; -extern int C_plus_plus, yytext_is_array, csize; +extern int C_plus_plus, long_align, yytext_is_array, csize; extern int yymore_used, reject, real_reject, continued_action; #define REALLY_NOT_DETERMINED 0 diff --git a/gen.c b/gen.c index 164081b..d31ff93 100644 --- a/gen.c +++ b/gen.c @@ -397,7 +397,8 @@ void genftbl() register int i; int end_of_buffer_action = num_rules + 1; - printf( C_short_decl, "yy_accept", lastdfa + 1 ); + printf( long_align ? C_long_decl : C_short_decl, + "yy_accept", lastdfa + 1 ); dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; @@ -440,7 +441,7 @@ char *char_map; "while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )" ); indent_up(); indent_puts( "{" ); - indent_puts( "yy_current_state = yy_def[yy_current_state];" ); + indent_puts( "yy_current_state = (int) yy_def[yy_current_state];" ); if ( usemecs ) { @@ -783,7 +784,8 @@ void gentabs() accsiz[end_of_buffer_state] = 1; dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; - printf( C_short_decl, "yy_acclist", max( numas, 1 ) + 1 ); + printf( long_align ? C_long_decl : C_short_decl, + "yy_acclist", max( numas, 1 ) + 1 ); j = 1; /* index into "yy_acclist" array */ @@ -869,7 +871,7 @@ void gentabs() */ ++k; - printf( C_short_decl, "yy_accept", k ); + printf( long_align ? C_long_decl : C_short_decl, "yy_accept", k ); for ( i = 1; i <= lastdfa; ++i ) { @@ -917,7 +919,8 @@ void gentabs() total_states = lastdfa + numtemps; - printf( total_states >= MAX_SHORT ? C_long_decl : C_short_decl, + printf( (total_states >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, "yy_base", total_states + 1 ); for ( i = 1; i <= lastdfa; ++i ) @@ -951,7 +954,8 @@ void gentabs() dataend(); - printf( total_states >= MAX_SHORT ? C_long_decl : C_short_decl, + printf( (total_states >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, "yy_def", total_states + 1 ); for ( i = 1; i <= total_states; ++i ) @@ -959,7 +963,8 @@ void gentabs() dataend(); - printf( tblend >= MAX_SHORT ? C_long_decl : C_short_decl, + printf( (tblend >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, "yy_nxt", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) @@ -972,7 +977,8 @@ void gentabs() dataend(); - printf( tblend >= MAX_SHORT ? C_long_decl : C_short_decl, + printf( (tblend >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, "yy_chk", tblend + 1 ); for ( i = 1; i <= tblend; ++i ) @@ -1025,7 +1031,7 @@ void make_tables() /* First, take care of YY_DO_BEFORE_ACTION depending on yymore * being used. */ - set_indent( 2 ); + set_indent( 1 ); if ( yymore_used ) { @@ -1062,13 +1068,18 @@ void make_tables() */ int total_table_size = tblend + numecs + 1; char *trans_offset_type = - total_table_size >= MAX_SHORT ? "long" : "short"; + (total_table_size >= MAX_SHORT || long_align) ? + "long" : "short"; set_indent( 0 ); indent_puts( "struct yy_trans_info" ); indent_up(); indent_puts( "{" ); /* } for vi */ - indent_puts( "short yy_verify;" ); + + if ( long_align ) + indent_puts( "long yy_verify;" ); + else + indent_puts( "short yy_verify;" ); /* In cases where its sister yy_verify *is* a "yes, there is * a transition", yy_nxt is the offset (in records) to the @@ -1125,7 +1136,8 @@ void make_tables() indent_puts( "extern int yy_flex_debug;" ); indent_puts( "int yy_flex_debug = 1;\n" ); - printf( C_short_decl, "yy_rule_linenum", num_rules ); + printf( long_align ? C_long_decl : C_short_decl, + "yy_rule_linenum", num_rules ); for ( i = 1; i < num_rules; ++i ) mkdata( rule_linenum[i] ); dataend(); diff --git a/main.c b/main.c index de62516..c9104a6 100644 --- a/main.c +++ b/main.c @@ -52,7 +52,7 @@ void set_up_initial_allocations PROTO((void)); int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; int fullspd, gen_line_dirs, performance_report, backing_up_report; -int C_plus_plus, yytext_is_array, csize; +int C_plus_plus, long_align, yytext_is_array, csize; int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; int datapos, dataline, linenum; @@ -255,6 +255,8 @@ int exit_status; if ( C_plus_plus ) putc( '+', stderr ); + if ( long_align ) + putc( 'a', stderr ); if ( backing_up_report ) putc( 'b', stderr ); if ( ddebug ) @@ -411,7 +413,7 @@ char **argv; printstats = syntaxerror = trace = spprdflt = caseins = false; C_plus_plus = backing_up_report = ddebug = fulltbl = fullspd = false; - nowarn = yymore_used = continued_action = reject = false; + long_align = nowarn = yymore_used = continued_action = reject = false; yytext_is_array = yymore_really_used = reject_really_used = false; gen_line_dirs = usemecs = useecs = true; performance_report = 0; @@ -446,6 +448,10 @@ char **argv; C_plus_plus = true; break; + case 'a': + long_align = true; + break; + case 'B': interactive = false; interactive_given = true; @@ -797,6 +803,7 @@ void readin() { if ( yytext_is_array ) { + puts( "\n#include \n" ); puts( "extern char yytext[];\n" ); puts( "#ifndef YYLMAX" ); puts( "#define YYLMAX YY_READ_BUF_SIZE" ); @@ -877,9 +884,11 @@ void set_up_initial_allocations() void usage() { fprintf( stderr, - "%s [-bcdfhinpstvwBFILTV78+ -C[efmF] -Pprefix -Sskeleton] [file ...]\n", +"%s [-abcdfhinpstvwBFILTV78+ -C[efmF] -Pprefix -Sskeleton] [file ...]\n", program_name ); + fprintf( stderr, + "\t-a trade off larger tables for better memory alignment\n" ); fprintf( stderr, "\t-b generate backing-up information to lex.backup\n" ); fprintf( stderr, "\t-c do-nothing POSIX option\n" ); -- cgit v1.2.3 From 983f617068e6d0840618166c5176fb0ed105d9f9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 13:38:07 +0000 Subject: Added dynamic buffer growing. Added yyless() for section 3. --- flex.skl | 51 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/flex.skl b/flex.skl index c5616eb..26407f2 100644 --- a/flex.skl +++ b/flex.skl @@ -177,6 +177,7 @@ extern FILE *yyin, *yyout; extern "C" { #endif extern void *yy_flex_alloc YY_PROTO(( int )); + extern void *yy_flex_realloc YY_PROTO(( void *ptr, int size )); extern void yy_flex_free YY_PROTO(( void * )); extern int yywrap YY_PROTO(( void )); #ifdef __cplusplus @@ -531,13 +532,32 @@ int yyFlexLexer::yy_get_next_buffer() int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = yy_c_buf_p - b->yy_ch_buf; + + b->yy_buf_size *= 2; + b->yy_ch_buf = (YY_CHAR *) + yy_flex_realloc( (void *) b->yy_ch_buf, + b->yy_buf_size ); - else if ( num_to_read <= 0 ) - YY_FATAL_ERROR( + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + /* Read in more data. */ YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), yy_n_chars, num_to_read ); @@ -838,8 +858,7 @@ YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( FILE* file, int size ) /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ - b->yy_ch_buf = - (YY_CHAR *) yy_flex_alloc( (unsigned) (b->yy_buf_size + 2) ); + b->yy_ch_buf = (YY_CHAR *) yy_flex_alloc( b->yy_buf_size + 2 ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); @@ -864,8 +883,8 @@ void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b ) if ( b == yy_current_buffer ) yy_current_buffer = (YY_BUFFER_STATE) 0; - yy_flex_free( b->yy_ch_buf ); - yy_flex_free( b ); + yy_flex_free( (void *) b->yy_ch_buf ); + yy_flex_free( (void *) b ); } @@ -901,3 +920,19 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, FILE* file ) b->yy_eof_status = EOF_NOT_SEEN; } + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n - YY_MORE_ADJ; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) -- cgit v1.2.3 From 65dee2525e3e700eeeed462ff3ea505e630be384 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 13:44:09 +0000 Subject: Added expand_nxt_chk() extern. --- flexdef.h | 1 + 1 file changed, 1 insertion(+) diff --git a/flexdef.h b/flexdef.h index 028d516..110c025 100644 --- a/flexdef.h +++ b/flexdef.h @@ -862,6 +862,7 @@ extern int sclookup PROTO((char[])); extern void bldtbl PROTO((int[], int, int, int, int)); extern void cmptmps PROTO((void)); /* compress template table entries */ +extern void expand_nxt_chk PROTO((void)); /* increase nxt/chk arrays */ extern void inittbl PROTO((void)); /* initialize transition tables */ /* Make the default, "jam" table entries. */ extern void mkdeftbl PROTO((void)); -- cgit v1.2.3 From c5d8db8d39ab03dd579900b948b0e206bc0e5a0a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 15:19:20 +0000 Subject: Switched from FILE*'s to stream's --- FlexLexer.h | 39 ++++++++++++++++----------------------- flex.skl | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 27 deletions(-) diff --git a/FlexLexer.h b/FlexLexer.h index f8e0d35..0e9d183 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -1,12 +1,12 @@ // $Header$ -// FlexLexer.h -- define a base class for lexical analyzers generated by flex +// FlexLexer.h -- define classes for lexical analyzers generated by flex // Copyright (c) 1993 The Regents of the University of California. // All rights reserved. // // This code is derived from software contributed to Berkeley by -// Kent Williams. +// Kent Williams and Tom Epperly. // // Redistribution and use in source and binary forms are permitted provided // that: (1) source distributions retain this entire copyright notice and @@ -22,10 +22,8 @@ // WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. -#ifndef __FLEXCXX_H -#define __FLEXCXX_H - -#include +#ifndef __FLEX_LEXER_H +#define __FLEX_LEXER_H // This file defines two classes. The first, FlexLexer, is an abstract @@ -42,9 +40,9 @@ class FlexLexer { virtual ~FlexLexer() { } virtual void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) = 0; - virtual YY_BUFFER_STATE yy_create_buffer( FILE* file, int size ) = 0; + virtual YY_BUFFER_STATE yy_create_buffer( istream* s, int size ) = 0; virtual void yy_delete_buffer( YY_BUFFER_STATE b ) = 0; - virtual void yyrestart( FILE *input_file ) = 0; + virtual void yyrestart( istream* s ) = 0; virtual int yylex() = 0; }; @@ -52,7 +50,9 @@ class FlexLexer { class yyFlexLexer : public FlexLexer { public: - yyFlexLexer( FILE* arg_yyin = 0, FILE* arg_yyout = 0 ) + // arg_yyin and arg_yyout default to the cin and cout, but we + // only make that assignment when initializing in yylex(). + yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 ) { yyin = arg_yyin; yyout = arg_yyout; @@ -81,35 +81,28 @@ class yyFlexLexer : public FlexLexer { } void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ); - YY_BUFFER_STATE yy_create_buffer( FILE* file, int size ); + YY_BUFFER_STATE yy_create_buffer( istream* s, int size ); void yy_delete_buffer( YY_BUFFER_STATE b ); - void yyrestart( FILE *input_file ); + void yyrestart( istream* s ); virtual int yylex(); protected: - virtual int LexerInput( char* buf, int max_size ) - { - return read( fileno(yyin), buf, max_size ); - } - - virtual void LexerOutput( const char* buf, int size ) - { - (void) fwrite( (char*) buf, size, 1, yyout ); - } + virtual int LexerInput( char* buf, int max_size ); + virtual void LexerOutput( const char* buf, int size ); void yyunput( YY_CHAR c, YY_CHAR* buf_ptr ); int yyinput(); void yy_load_buffer_state(); - void yy_init_buffer( YY_BUFFER_STATE b, FILE* file ); + void yy_init_buffer( YY_BUFFER_STATE b, istream* s ); yy_state_type yy_get_previous_state(); yy_state_type yy_try_NUL_trans( yy_state_type current_state ); int yy_get_next_buffer(); - FILE* yyin; // input source for default LexerInput - FILE* yyout; // output sink for default LexerOutput + istream* yyin; // input source for default LexerInput + ostream* yyout; // output sink for default LexerOutput YY_BUFFER_STATE yy_current_buffer; diff --git a/flex.skl b/flex.skl index 26407f2..3615ab4 100644 --- a/flex.skl +++ b/flex.skl @@ -20,6 +20,9 @@ #ifdef __cplusplus #include +%+ +#include +%* #include /* Use prototypes in function declarations. */ @@ -120,8 +123,11 @@ #define YY_FATAL_ERROR(msg) \ do \ { \ - (void) fputs( msg, stderr ); \ +%- (void) putc( '\n', stderr ); \ +%+ + cerr << msg << '\n'; \ +%* exit( 1 ); \ } \ while ( 0 ) @@ -171,7 +177,9 @@ typedef struct yy_buffer_state *YY_BUFFER_STATE; extern int yyleng; +%- extern FILE *yyin, *yyout; +%* #ifdef __cplusplus extern "C" { @@ -204,7 +212,11 @@ extern "C" { struct yy_buffer_state { +%- FILE *yy_input_file; +%+ + istream* yy_input_file; +%* YY_CHAR *yy_ch_buf; /* input buffer */ YY_CHAR *yy_buf_pos; /* current position in input buffer */ @@ -323,10 +335,18 @@ YY_DECL yy_start = 1; /* first start state */ if ( ! yyin ) +%- yyin = stdin; +%+ + yyin = &cin; +%* if ( ! yyout ) +%- yyout = stdout; +%+ + yyout = &cout; +%* if ( yy_current_buffer ) yy_init_buffer( yy_current_buffer, yyin ); @@ -481,7 +501,11 @@ do_action: /* This label is used only to access EOF actions. */ default: #ifdef FLEX_DEBUG +%- printf( "action # %d\n", yy_act ); +%+ + cout << "action # " << yy_act << '\n'; +%* #endif YY_FATAL_ERROR( "fatal flex scanner internal error--no action found" ); @@ -489,6 +513,25 @@ do_action: /* This label is used only to access EOF actions. */ } /* end of scanning one token */ } /* end of yylex */ +%+ +int yyFlexLexer::LexerInput( char* buf, int max_size ) + { + if ( yyin->eof() || yyin->fail() ) + return 0; + + (void) yyin->read( buf, max_size ); + + if ( yyin->bad() ) + return -1; + else + return yyin->gcount(); + } + +void yyFlexLexer::LexerOutput( const char* buf, int size ) + { + (void) yyout->write( buf, size ); + } +%* /* yy_get_next_buffer - try to read in a new buffer * @@ -772,7 +815,7 @@ void yyrestart( input_file ) FILE *input_file; #endif %+ -void yyFlexLexer::yyrestart( FILE *input_file ) +void yyFlexLexer::yyrestart( istream* input_file ) %* { if ( ! yy_current_buffer ) @@ -843,7 +886,7 @@ FILE *file; int size; #endif %+ -YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( FILE* file, int size ) +YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( istream* file, int size ) %* { YY_BUFFER_STATE b; @@ -897,7 +940,7 @@ YY_BUFFER_STATE b; FILE *file; #endif %+ -void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, FILE* file ) +void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) %* { b->yy_input_file = file; -- cgit v1.2.3 From 759ab635e1695b80236ee1b4d4ea3b7586126ff5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 15:20:15 +0000 Subject: If -+ used, output to lex.yy.cc --- main.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/main.c b/main.c index c9104a6..4678464 100644 --- a/main.c +++ b/main.c @@ -97,9 +97,9 @@ int num_input_files; char *program_name; #ifndef SHORT_FILE_NAMES -static char *outfile_template = "lex.%s.c"; +static char *outfile_template = "lex.%s.%s"; #else -static char *outfile_template = "lex%s.c"; +static char *outfile_template = "lex%s.%s"; #endif static char outfile_path[64]; @@ -643,8 +643,14 @@ char **argv; if ( ! use_stdout ) { FILE *prev_stdout; + char *suffix; - sprintf( outfile_path, outfile_template, prefix ); + if ( C_plus_plus ) + suffix = "cc"; + else + suffix = "c"; + + sprintf( outfile_path, outfile_template, prefix, suffix ); prev_stdout = freopen( outfile_path, "w", stdout ); -- cgit v1.2.3 From e41633078a7a9c3a484a0f097fd33b34ea9b02f6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 15:23:09 +0000 Subject: Initial revision --- libyywrap.c | 8 ++++++++ version.h | 1 + 2 files changed, 9 insertions(+) create mode 100644 libyywrap.c create mode 100644 version.h diff --git a/libyywrap.c b/libyywrap.c new file mode 100644 index 0000000..aa2cb13 --- /dev/null +++ b/libyywrap.c @@ -0,0 +1,8 @@ +/* libyywrap - flex run-time support library "yywrap" function */ + +/* $Header$ */ + +int yywrap() + { + return 1; + } diff --git a/version.h b/version.h new file mode 100644 index 0000000..59a10ca --- /dev/null +++ b/version.h @@ -0,0 +1 @@ +#define FLEX_VERSION "2.4.0 (October, 1993)" -- cgit v1.2.3 From b67a55c5eb55a662e1ae7b0eb8f4bd6680cc82dd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 2 Oct 1993 15:25:48 +0000 Subject: Clarified help message for -S --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index 4678464..fc9dded 100644 --- a/main.c +++ b/main.c @@ -931,5 +931,5 @@ void usage() fprintf( stderr, "\t\t-CF do not compress scanner tables; use -F representation\n" ); fprintf( stderr, "\t-P specify scanner prefix other than \"yy\"\n" ); - fprintf( stderr, "\t-S specify non-default skeleton file\n" ); + fprintf( stderr, "\t-S specify skeleton file\n" ); } -- cgit v1.2.3 From 640e372ac8831e9566c47e0fbf4230ecb86105ba Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 3 Oct 1993 16:01:41 +0000 Subject: Added "flex++" feature Minimized use of YY_CHAR --- main.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/main.c b/main.c index fc9dded..5776de4 100644 --- a/main.c +++ b/main.c @@ -433,6 +433,10 @@ char **argv; program_name = argv[0]; + if ( program_name[0] != '\0' && + program_name[strlen( program_name ) - 1] == '+' ) + C_plus_plus = true; + /* read flags */ for ( --argc, ++argv; argc ; --argc, ++argv ) { @@ -758,13 +762,13 @@ char **argv; void readin() { + skelout(); + if ( csize == 256 ) puts( "typedef unsigned char YY_CHAR;" ); else puts( "typedef char YY_CHAR;" ); - skelout(); - if ( C_plus_plus ) puts( "#define yytext_ptr yytext" ); @@ -815,13 +819,13 @@ void readin() puts( "#define YYLMAX YY_READ_BUF_SIZE" ); puts( "#endif YYLMAX\n" ); puts( "char yytext[YYLMAX];" ); - puts( "YY_CHAR *yytext_ptr;" ); + puts( "char *yytext_ptr;" ); } else { - puts( "extern YY_CHAR *yytext;" ); - puts( "YY_CHAR *yytext;" ); + puts( "extern char *yytext;" ); + puts( "char *yytext;" ); puts( "#define yytext_ptr yytext" ); } } -- cgit v1.2.3 From 1fd6facb52598dc4b6c307101fbd3de5cd8bf0a1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 3 Oct 1993 16:01:56 +0000 Subject: Minimized use of YY_CHAR --- flex.skl | 38 +++++++++++++++++++------------------- gen.c | 17 +++++++++-------- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/flex.skl b/flex.skl index 3615ab4..09c765c 100644 --- a/flex.skl +++ b/flex.skl @@ -218,8 +218,8 @@ struct yy_buffer_state istream* yy_input_file; %* - YY_CHAR *yy_ch_buf; /* input buffer */ - YY_CHAR *yy_buf_pos; /* current position in input buffer */ + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ /* Size of input buffer in bytes, not including room for EOB * characters. @@ -256,7 +256,7 @@ static YY_BUFFER_STATE yy_current_buffer = 0; %- Standard (non-C++) definition /* yy_hold_char holds the character lost when yytext is formed. */ -static YY_CHAR yy_hold_char; +static char yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ @@ -266,7 +266,7 @@ int yyleng; FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; /* Points to current character in buffer. */ -static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; +static char *yy_c_buf_p = (char *) 0; static int yy_init = 1; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ @@ -275,7 +275,7 @@ static int yy_start = 0; /* start state number */ */ static int yy_did_buffer_switch_on_eof; -static void yyunput YY_PROTO(( YY_CHAR c, YY_CHAR *buf_ptr )); +static void yyunput YY_PROTO(( int c, char *buf_ptr )); void yyrestart YY_PROTO(( FILE *input_file )); void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); void yy_load_buffer_state YY_PROTO(( void )); @@ -320,7 +320,7 @@ static int yy_get_next_buffer YY_PROTO(( void )); YY_DECL { register yy_state_type yy_current_state; - register YY_CHAR *yy_cp, *yy_bp; + register char *yy_cp, *yy_bp; register int yy_act; %% user's declarations go here @@ -547,8 +547,8 @@ static int yy_get_next_buffer() int yyFlexLexer::yy_get_next_buffer() %* { - register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; - register YY_CHAR *source = yytext_ptr - 1; /* copy prev. char, too */ + register char *dest = yy_current_buffer->yy_ch_buf; + register char *source = yytext_ptr - 1; /* copy prev. char, too */ register int number_to_move, i; int ret_val; @@ -584,7 +584,7 @@ int yyFlexLexer::yy_get_next_buffer() int yy_c_buf_p_offset = yy_c_buf_p - b->yy_ch_buf; b->yy_buf_size *= 2; - b->yy_ch_buf = (YY_CHAR *) + b->yy_ch_buf = (char *) yy_flex_realloc( (void *) b->yy_ch_buf, b->yy_buf_size ); @@ -649,7 +649,7 @@ yy_state_type yyFlexLexer::yy_get_previous_state() %* { register yy_state_type yy_current_state; - register YY_CHAR *yy_cp; + register char *yy_cp; %% code to get the start state into yy_current_state goes here @@ -688,17 +688,17 @@ yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state ) %- #ifdef YY_USE_PROTOS -static void yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) +static void yyunput( int c, register char *yy_bp ) #else static void yyunput( c, yy_bp ) -YY_CHAR c; -register YY_CHAR *yy_bp; +int c; +register char *yy_bp; #endif %+ -void yyFlexLexer::yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) +void yyFlexLexer::yyunput( int c, register char* yy_bp ) %* { - register YY_CHAR *yy_cp = yy_c_buf_p; + register char *yy_cp = yy_c_buf_p; /* undo effects of setting up yytext */ *yy_cp = yy_hold_char; @@ -707,9 +707,9 @@ void yyFlexLexer::yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) { /* need to shift things up to make room */ /* +2 for EOB chars. */ register int number_to_move = yy_n_chars + 2; - register YY_CHAR *dest = &yy_current_buffer->yy_ch_buf[ + register char *dest = &yy_current_buffer->yy_ch_buf[ yy_current_buffer->yy_buf_size + 2]; - register YY_CHAR *source = + register char *source = &yy_current_buffer->yy_ch_buf[number_to_move]; while ( source > yy_current_buffer->yy_ch_buf ) @@ -726,7 +726,7 @@ void yyFlexLexer::yyunput( YY_CHAR c, register YY_CHAR *yy_bp ) if ( yy_cp > yy_bp && yy_cp[-1] == '\n' ) yy_cp[-2] = '\n'; - *--yy_cp = c; + *--yy_cp = (char) c; /* Note: the formal parameter *must* be called "yy_bp" for this * macro to now work correctly. @@ -901,7 +901,7 @@ YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( istream* file, int size ) /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ - b->yy_ch_buf = (YY_CHAR *) yy_flex_alloc( b->yy_buf_size + 2 ); + b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); diff --git a/gen.c b/gen.c index d31ff93..9f2f558 100644 --- a/gen.c +++ b/gen.c @@ -484,7 +484,7 @@ void gen_next_match() if ( fulltbl ) { indent_put2s( - "while ( (yy_current_state = yy_nxt[yy_current_state][%s]) > 0 )", +"while ( (yy_current_state = yy_nxt[yy_current_state][(unsigned int)%s]) > 0 )", char_map ); indent_up(); @@ -594,7 +594,8 @@ int worry_about_NULs; } else - (void) strcpy( char_map, useecs ? "yy_ec[*yy_cp]" : "*yy_cp" ); + (void) strcpy( char_map, + useecs ? "yy_ec[(unsigned int) *yy_cp]" : "*yy_cp" ); if ( worry_about_NULs && nultrans ) { @@ -609,12 +610,12 @@ int worry_about_NULs; if ( fulltbl ) indent_put2s( - "yy_current_state = yy_nxt[yy_current_state][%s];", + "yy_current_state = yy_nxt[yy_current_state][(unsigned int) %s];", char_map ); else if ( fullspd ) indent_put2s( - "yy_current_state += yy_current_state[%s].yy_nxt;", + "yy_current_state += yy_current_state[(unsigned int) %s].yy_nxt;", char_map ); else @@ -648,7 +649,7 @@ void gen_NUL_trans() if ( need_backing_up ) /* We'll need yy_cp lying around for the gen_backing_up(). */ - indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); + indent_puts( "register char *yy_cp = yy_c_buf_p;" ); putchar( '\n' ); @@ -1112,7 +1113,7 @@ void make_tables() indent_puts( "static yy_state_type yy_last_accepting_state;" ); indent_puts( - "static YY_CHAR *yy_last_accepting_cpos;\n" ); + "static char *yy_last_accepting_cpos;\n" ); } } @@ -1150,7 +1151,7 @@ void make_tables() { puts( "static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); - puts( "static YY_CHAR *yy_full_match;" ); + puts( "static char *yy_full_match;" ); puts( "static int yy_lp;" ); } @@ -1356,7 +1357,7 @@ void make_tables() skelout(); if ( bol_needed ) - indent_puts( "register YY_CHAR *yy_bp = yytext_ptr;\n" ); + indent_puts( "register char *yy_bp = yytext_ptr;\n" ); gen_start_state(); -- cgit v1.2.3 From f953b20a11776c9818ade8b9e5e0365584c33e63 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 3 Oct 1993 16:05:17 +0000 Subject: YY_CHAR -> char added YYText(), YYLeng() --- FlexLexer.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/FlexLexer.h b/FlexLexer.h index 0e9d183..719597f 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -30,21 +30,29 @@ // class which specifies the external interface provided to flex C++ // lexer objects. The second, yyFlexLexer, fills out most of the meat // of the lexer class; its internals may vary from lexer to lexer -// depending on things like whether REJECT is used, and the type -// of YY_CHAR. If you want to create multiple lexer classes, you -// use the -P flag to rename each yyFlexLexer to some other xxFlexLexer. +// depending on things like whether REJECT is used. +// +// If you want to create multiple lexer classes, you use the -P flag +// to rename each yyFlexLexer to some other xxFlexLexer. class FlexLexer { public: virtual ~FlexLexer() { } + const char* YYText() { return yytext; } + int YYLeng() { return yyleng; } + virtual void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) = 0; virtual YY_BUFFER_STATE yy_create_buffer( istream* s, int size ) = 0; virtual void yy_delete_buffer( YY_BUFFER_STATE b ) = 0; virtual void yyrestart( istream* s ) = 0; virtual int yylex() = 0; + +protected: + char* yytext; + int yyleng; }; @@ -56,7 +64,7 @@ class yyFlexLexer : public FlexLexer { { yyin = arg_yyin; yyout = arg_yyout; - yy_c_buf_p = (YY_CHAR*) 0; + yy_c_buf_p = 0; yy_init = 1; yy_start = 0; @@ -91,7 +99,7 @@ class yyFlexLexer : public FlexLexer { virtual int LexerInput( char* buf, int max_size ); virtual void LexerOutput( const char* buf, int size ); - void yyunput( YY_CHAR c, YY_CHAR* buf_ptr ); + void yyunput( int c, char* buf_ptr ); int yyinput(); void yy_load_buffer_state(); @@ -107,16 +115,13 @@ class yyFlexLexer : public FlexLexer { YY_BUFFER_STATE yy_current_buffer; // yy_hold_char holds the character lost when yytext is formed. - YY_CHAR yy_hold_char; + char yy_hold_char; // Number of characters read into yy_ch_buf. int yy_n_chars; - YY_CHAR* yytext; - int yyleng; - // Points to current character in buffer. - YY_CHAR* yy_c_buf_p; + char* yy_c_buf_p; int yy_init; // whether we need to initialize int yy_start; // start state number @@ -129,12 +134,12 @@ class yyFlexLexer : public FlexLexer { // on use of certain flex features (like REJECT or yymore()). yy_state_type yy_last_accepting_state; - YY_CHAR* yy_last_accepting_cpos; + char* yy_last_accepting_cpos; yy_state_type* yy_state_buf; yy_state_type* yy_state_ptr; - YY_CHAR* yy_full_match; + char* yy_full_match; int* yy_full_state; int yy_full_lp; -- cgit v1.2.3 From fef15fbf76e626005c5b73f95ae00b9be4692abd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 3 Oct 1993 17:34:54 +0000 Subject: Got rid of (char *) casts of yytext, no longer needed. --- flex.skl | 11 +++++------ gen.c | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/flex.skl b/flex.skl index 09c765c..afb7857 100644 --- a/flex.skl +++ b/flex.skl @@ -71,15 +71,14 @@ /* Copy whatever the last rule matched to the standard output. */ -/* Cast to (char *) is because for 8-bit chars, yytext is (unsigned char *) */ -/* this used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ #ifndef ECHO %- Standard (non-C++) definition -#define ECHO (void) fwrite( (char *) yytext, yyleng, 1, yyout ) +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) %+ C++ definition -#define ECHO LexerOutput( (const char *) yytext, yyleng ) +#define ECHO LexerOutput( yytext, yyleng ) %* #endif diff --git a/gen.c b/gen.c index 9f2f558..ee0f6ff 100644 --- a/gen.c +++ b/gen.c @@ -1052,7 +1052,7 @@ void make_tables() indent_puts( "YY_FATAL_ERROR( \"token too large, exceeds YYLMAX\" ); \\" ); indent_down(); - indent_puts( "strcpy( yytext, (char *) yytext_ptr ); \\" ); + indent_puts( "strcpy( yytext, yytext_ptr ); \\" ); } set_indent( 0 ); -- cgit v1.2.3 From 151339baea2fa878f41e7f12b4db9effc59197aa Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 4 Oct 1993 10:17:40 +0000 Subject: Added yy_fatal_error function. --- flex.skl | 59 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/flex.skl b/flex.skl index afb7857..1590b6d 100644 --- a/flex.skl +++ b/flex.skl @@ -103,33 +103,8 @@ #define YY_NULL 0 /* Report a fatal error. */ - -/* The funky do-while is used to turn this macro definition into - * a single C statement (which needs a semi-colon terminator). - * This avoids problems with code like: - * - * if ( something_happens ) - * YY_FATAL_ERROR( "oops, the something happened" ); - * else - * everything_okay(); - * - * Prior to using the do-while the compiler would get upset at the - * "else" because it interpreted the "if" statement as being all - * done when it reached the ';' after the YY_FATAL_ERROR() call. - */ - #ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) \ - do \ - { \ -%- - (void) putc( '\n', stderr ); \ -%+ - cerr << msg << '\n'; \ -%* - exit( 1 ); \ - } \ - while ( 0 ) +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) #endif /* Enter a start condition. This macro really ought to take a parameter, @@ -195,7 +170,22 @@ extern "C" { #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + /* Return all but the first 'n' matched characters back to the input stream. */ + #define yyless(n) \ do \ { \ @@ -302,6 +292,7 @@ static yy_state_type yy_get_previous_state YY_PROTO(( void )); static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); static int yy_get_next_buffer YY_PROTO(( void )); %* +static void yy_fatal_error YY_PROTO(( const char msg[] )); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -964,6 +955,22 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) } +#ifdef YY_USE_PROTOS +void yy_fatal_error( const char msg[] ) +#else +void yy_fatal_error( msg ) +char msg[]; +#endif + { +%- + (void) putc( '\n', stderr ); +%+ + cerr << msg << '\n'; +%* + exit( 1 ); + } + + /* Redefine yyless() so it works in section 3 code. */ #undef yyless -- cgit v1.2.3 From 3738e46dbb61f25906081f51cc7a2fad8cd83ee6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 4 Oct 1993 10:56:08 +0000 Subject: Added "static" to definition of yy_fatal_error as well as fwd decl. --- flex.skl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flex.skl b/flex.skl index 1590b6d..35a26bf 100644 --- a/flex.skl +++ b/flex.skl @@ -956,9 +956,9 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) #ifdef YY_USE_PROTOS -void yy_fatal_error( const char msg[] ) +static void yy_fatal_error( const char msg[] ) #else -void yy_fatal_error( msg ) +static void yy_fatal_error( msg ) char msg[]; #endif { -- cgit v1.2.3 From af36cab3e7c315045fe3059d4d0d91705434fde8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 4 Oct 1993 16:44:10 +0000 Subject: osfcn.h -> unistd.h --- flex.skl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flex.skl b/flex.skl index 35a26bf..fae24ce 100644 --- a/flex.skl +++ b/flex.skl @@ -23,7 +23,7 @@ %+ #include %* -#include +#include /* Use prototypes in function declarations. */ #define YY_USE_PROTOS -- cgit v1.2.3 From 10af06378803f8f6a85d7b685ba93da23af907eb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 4 Oct 1993 21:15:19 +0000 Subject: Raw 2.4 changes --- NEWS | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/NEWS b/NEWS index d2d0d97..0214847 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,78 @@ +Changes between release 2.4 (04Oct93) and release 2.3: + +skeleton file: John Gilmore + flex.skel -> flex.skl + -S to go away unless hear it's useful +yywrap() a function +must link with libfl.a +C++: Kent Williams + use for reentrant scanners + C++ streams: Tom Epperly + lex.yy.cc + FlexLexer.h + flex++ +no temp file +manual/ +detects if -8 flag needed +-h for help +buffer dynamically enlarges; but *slow* to match big tokens + and doesn't do so on unput() +fewer porting headaches; memset(), malloc() type in particular +MISC/ +-P prefix +-V +<*> +%array/%pointer + use %array to avoid input()/unput() trashing yytext + YYLMAX + not with -+ +flex -V +-8 by default (except -C{f,F}?); -7 supported +-A align option, promotes short -> long +number of fencepost table expansion bugs fixed +YY_START: integer value, can do stacks +yyrestart() no longer needed; just point yyin at a new file +fencepost bug fixes +'#' no longer a comment character +\n in flex input can optionally include leading or trailing \r +warnings for rules that can't be matched; REJECT obviates +more consistent identification of error locations +yyleng a global +definitions can now include leading '^' or trailing '$' +scanners are -I interactive if compressed, by default; -B +warnings for unmatchable rules; if -s given but default rule can be matched +YY_USER_ACTION only called for real actions +many misc bug fixes thanks to Gerhard Wilhelms +\n\r stuff +yyless() usable in section 3 (Ceriel Jacobs) +formfeed no longer a whitespace character +run-time detection of out-of-range characters (8 bit when built for 7 bit) +%t nuked +yyleng may be modified +-w: suppress warnings +-p -p: report minor performance problems, too +no more time information in -v output +MISC/fastwc +MISC/debflex.awk (Francois) +MISC/testxxLexerl.l +YY_NEW_FILE no longer needed +definitions with ^, $ allowed; not expanded inside parens +version.h + +corrected doc: -C options are cumulative + may modify yytext but not lengthen it (append chars) + modifying last char may affect anchoring + backtracking -> backing up + unindented comments allowed in first section, but not in second + yyless() only usable in scanner source, not externally + yyrestart(yyin) throws away current buffer + high-speed scanners: match as much text as possible w/ each rule + beginning-of-line operator is fairly cheap + unput() expensive, yyless() cheap + corrected backing-up example + +code reformatted + Changes between 2.3 Patch #8 (21Feb93) and 2.3 Patch #7: - Fixed bugs in dynamic memory allocation leading to grievous -- cgit v1.2.3 From fd46b13cee77bc98232e6c050bb0e24ec6dccac4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Oct 1993 14:47:10 +0000 Subject: Checkpoint prior to final 2.4 update --- flex.1 | 361 ++++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 200 insertions(+), 161 deletions(-) diff --git a/flex.1 b/flex.1 index 1fd184d..b5dad63 100644 --- a/flex.1 +++ b/flex.1 @@ -1,9 +1,9 @@ -.TH FLEX 1 "26 May 1990" "Version 2.3" +.TH FLEXDOC 1 "October 1993" "Version 2.4" .SH NAME -flexdoc - documentation for flex, fast lexical analyzer generator +flexdoc \- documentation for flex, fast lexical analyzer generator .SH SYNOPSIS .B flex -.B [-bcdfinpstvFILT8 -C[efmF] -Sskeleton] +.B [\-bcdfinpstvFILT8 \-C[efmF] \-Sskeleton] .I [filename ...] .SH DESCRIPTION .I flex @@ -22,7 +22,7 @@ generates as output a C source file, which defines a routine .B yylex(). This file is compiled and linked with the -.B -lfl +.B \-lfl library to produce an executable. When the executable is run, it analyzes its input for occurrences of the regular expressions. Whenever it finds one, it executes @@ -250,11 +250,10 @@ errors (this feature is present for .I POSIX compliance; see below for other such features). .PP -In the definitions section, an unindented comment (i.e., a line +In the definitions section (but not in the rules section), +an unindented comment (i.e., a line beginning with "/*") is also copied verbatim to the output up -to the next "*/". Also, any line in the definitions section -beginning with '#' is ignored, though this style of comment is -deprecated and may go away in the future. +to the next "*/". .SH PATTERNS The patterns in the input are written using an extended set of regular expressions. These are: @@ -484,14 +483,16 @@ Each time .B yylex() is called it continues processing tokens from where it last left off until it either reaches -the end of the file or executes a return. Once it reaches an end-of-file, -however, then any subsequent call to -.B yylex() -will simply immediately return, unless -.B yyrestart() -is first called (see below). +the end of the file or executes a return. .PP -Actions are not allowed to modify yytext or yyleng. +Actions are free to modify yytext except for lengthening it (adding +characters to its end--these will overwrite later characters in the +input stream). Modifying the final character of yytext may alter +whether when scanning resumes rules anchored with '^' are active. +Specifically, changing the final character of yytext to a newline will +activate such rules on the next scan, and changing it to anything else +will deactivate the rules. Users should not rely on this behavior being +present in future releases. .PP There are a number of special directives which can be included within an action: @@ -623,6 +624,11 @@ will cause the entire current input string to be scanned again. Unless you've changed how the scanner will subsequently process its input (using .B BEGIN, for example), this will result in an endless loop. +.PP +Note that +.B yyless +is a macro and can only be used in the flex input file, not from +other source files. .IP - .B unput(c) puts the character @@ -646,6 +652,9 @@ Note that since each puts the given character back at the .I beginning of the input stream, pushing back strings must be done back-to-front. +Also note that you cannot put back +.B EOF +to attempt to mark the input stream with an end-of-file. .IP - .B input() reads the next character from the input stream. For example, @@ -693,10 +702,6 @@ stream by the name of .B yyterminate() can be used in lieu of a return statement in an action. It terminates the scanner and returns a 0 to the scanner's caller, indicating "all done". -Subsequent calls to the scanner will immediately return unless preceded -by a call to -.B yyrestart() -(see below). By default, .B yyterminate() is also called when an end-of-file is encountered. It is a macro and @@ -745,18 +750,38 @@ an end-of-file (at which point it returns the value 0) or one of its actions executes a .I return statement. -In the former case, when called again the scanner will immediately -return unless +.PP +If the scanner reaches an end-of-file, subsequent calls are undefined +unless either +.I yyin +is pointed at a new input file (in which case scanning continues from +that file), or .B yyrestart() -is called to point +is called. .I yyin -at the new input file. ( .B yyrestart() takes one argument, a .B FILE * -pointer.) -In the latter case (i.e., when an action -executes a return), the scanner may then be called again and it +pointer, and initializes +.I yyin +for scanning from that file. Essentially there is no difference between +just assigning +.I yyin +to a new input file or using +.B yyrestart() +to do so; the latter is available for compatibility with previous versions +of +.I flex, +and because it can be used to switch input files in the middle of scanning. +It can also be used to throw away the current input buffer, by calling +it with an argument of +.I yyin. +.PP +If +.B yylex() +stops scanning due to executing a +.I return +statement in one of the actions, the scanner may then be called again and it will resume scanning where it left off. .PP By default (and for purposes of efficiency), the scanner uses @@ -995,6 +1020,11 @@ maintaining a count of the current input line. "*"+"/" BEGIN(INITIAL); .fi +This scanner goes to a bit of trouble to match as much +text as possible with each rule. In general, when attempting to write +a high-speed scanner try to match as much possible in each rule, as +it's a big win. +.PP Note that start-conditions names are really integer values and can be stored as such. Thus, the above could be extended in the following fashion: @@ -1023,12 +1053,17 @@ following fashion: "*"+"/" BEGIN(comment_caller); .fi -One can then implement a "stack" of start conditions using an -array of integers. (It is likely that such stacks will become -a full-fledged -.I flex -feature in the future.) Note, though, that -start conditions do not have their own name-space; %s's and %x's +Furthermore, you can access the current start condition using +the integer-valued +.B YY_START +macro. For example, the above assignments to +.I comment_caller +could instead be written +.nf + comment_caller = YY_START; +.fi +.PP +Note that start conditions do not have their own name-space; %s's and %x's declare names in the same fashion as #define's. .SH MULTIPLE INPUT BUFFERS Some scanners (such as those which support "include" files) @@ -1150,8 +1185,11 @@ feature is discussed below): } else + { + yy_delete_buffer( YY_CURRENT_BUFFER ); yy_switch_to_buffer( include_stack[include_stack_ptr] ); + } } .fi @@ -1162,17 +1200,18 @@ encountered and yywrap() returns non-zero (i.e., indicates no further files to process). The action must finish by doing one of four things: .IP - -the special -.B YY_NEW_FILE -action, if +assigning .I yyin -has been pointed at a new file to process; +to a new input file (in previous versions of flex, after doing the +assignment you had to call the special action +.B YY_NEW_FILE; +this is no longer necessary); .IP - -a +executing a .I return statement; .IP - -the special +executing the special .B yyterminate() action; .IP - @@ -1208,10 +1247,7 @@ An example: } <> { if ( *++filelist ) - { yyin = fopen( *filelist, "r" ); - YY_NEW_FILE; - } else yyterminate(); } @@ -1264,7 +1300,7 @@ To use with .I yacc, one specifies the -.B -d +.B \-d option to .I yacc to instruct it to generate the file @@ -1324,7 +1360,7 @@ provides a crude way for introducing equivalence classes into the scanner specification. .PP Note that the -.B -i +.B \-i option (see below) coupled with the equivalence classes which .I flex automatically generates take care of virtually all the instances @@ -1335,46 +1371,46 @@ But what the hell, it's there if you want it. .I flex has the following options: .TP -.B -b -Generate backtracking information to -.I lex.backtrack. -This is a list of scanner states which require backtracking +.B \-b +Generate backing-up information to +.I lex.backup. +This is a list of scanner states which require backing up and the input characters on which they do so. By adding rules one -can remove backtracking states. If all backtracking states +can remove backing-up states. If all backing-up states are eliminated and -.B -f +.B \-f or -.B -F +.B \-F is used, the generated scanner will run faster (see the -.B -p +.B \-p flag). Only users who wish to squeeze every last cycle out of their scanners need worry about this option. (See the section on PERFORMANCE CONSIDERATIONS below.) .TP -.B -c +.B \-c is a do-nothing, deprecated option included for POSIX compliance. .IP .B NOTE: in previous releases of .I flex -.B -c +.B \-c specified table-compression options. This functionality is now given by the -.B -C +.B \-C flag. To ease the the impact of this change, when .I flex encounters -.B -c, +.B \-c, it currently issues a warning message and assumes that -.B -C +.B \-C was desired instead. In the future this "promotion" of -.B -c +.B \-c to -.B -C +.B \-C will go away in the name of full POSIX compliance (unless the POSIX meaning is removed first). .TP -.B -d +.B \-d makes the generated scanner run in .I debug mode. Whenever a pattern is recognized and the global @@ -1390,22 +1426,22 @@ a line of the form: .fi The line number refers to the location of the rule in the file defining the scanner (i.e., the file that was fed to flex). Messages -are also generated when the scanner backtracks, accepts the +are also generated when the scanner backs up, accepts the default rule, reaches the end of its input buffer (or encounters a NUL; at this point, the two look the same as far as the scanner's concerned), or reaches an end-of-file. .TP -.B -f +.B \-f specifies (take your pick) .I full table or .I fast scanner. No table compression is done. The result is large but fast. This option is equivalent to -.B -Cf +.B \-Cf (see below). .TP -.B -i +.B \-i instructs .I flex to generate a @@ -1418,11 +1454,11 @@ matched text given in .I yytext will have the preserved case (i.e., it will not be folded). .TP -.B -n +.B \-n is another do-nothing, deprecated option included only for POSIX compliance. .TP -.B -p +.B \-p generates a performance report to stderr. The report consists of comments regarding features of the .I flex @@ -1436,10 +1472,10 @@ the .B ^ operator, and the -.B -I +.B \-I flag entail minor performance penalties. .TP -.B -s +.B \-s causes the .I default rule (that unmatched scanner input is echoed to @@ -1448,14 +1484,14 @@ to be suppressed. If the scanner encounters input that does not match any of its rules, it aborts with an error. This option is useful for finding holes in a scanner's rule set. .TP -.B -t +.B \-t instructs .I flex to write the scanner it generates to standard output instead of .B lex.yy.c. .TP -.B -v +.B \-v specifies that .I flex should write to @@ -1471,7 +1507,7 @@ out where you stand with respect to patches and new releases, and the next two lines give the date when the scanner was created and a summary of the flags which were in effect. .TP -.B -F +.B \-F specifies that the .ul fast @@ -1498,10 +1534,10 @@ to detect the keywords, you're better off using -F. .IP This option is equivalent to -.B -CF +.B \-CF (see below). .TP -.B -I +.B \-I instructs .I flex to generate an @@ -1518,33 +1554,33 @@ when needed. Such scanners are called because if you want to write a scanner for an interactive system such as a command shell, you will probably want the user's input to be terminated with a newline, and without -.B -I +.B \-I the user will have to type a character in addition to the newline in order to have the newline recognized. This leads to dreadful interactive performance. .IP If all this seems to confusing, here's the general rule: if a human will be typing in input to your scanner, use -.B -I, +.B \-I, otherwise don't; if you don't care about squeezing the utmost performance from your scanner and you don't want to make any assumptions about the input to your scanner, use -.B -I. +.B \-I. .IP Note, -.B -I +.B \-I cannot be used in conjunction with .I full or .I fast tables, i.e., the -.B -f, -F, -Cf, +.B \-f, \-F, \-Cf, or -.B -CF +.B \-CF flags. .TP -.B -L +.B \-L instructs .I flex not to generate @@ -1566,7 +1602,7 @@ to "retarget" the line numbers for those parts of which it generated. So if there is an error in the generated code, a meaningless line number is reported.) .TP -.B -T +.B \-T makes .I flex run in @@ -1578,7 +1614,7 @@ the form of the input and the resultant non-deterministic and deterministic finite automata. This option is mostly for use in maintaining .I flex. .TP -.B -8 +.B \-8 instructs .I flex to generate an 8-bit scanner, i.e., one which can recognize 8-bit @@ -1586,13 +1622,13 @@ characters. On some sites, .I flex is installed with this option as the default. On others, the default is 7-bit characters. To see which is the case, check the verbose -.B (-v) +.B (\-v) output for "equivalence classes created". If the denominator of the number shown is 128, then by default .I flex is generating 7-bit characters. If it is 256, then the default is 8-bit characters and the -.B -8 +.B \-8 flag is not required (but may be a good idea to keep the scanner specification portable). Feeding a 7-bit scanner 8-bit characters will result in infinite loops, bus errors, or other such fireworks, @@ -1602,10 +1638,10 @@ are used, 8-bit scanners take only slightly more table space than not used, however, then the tables may grow up to twice their 7-bit size. .TP -.B -C[efmF] +.B \-C[efmF] controls the degree of table compression. .IP -.B -Ce +.B \-Ce directs .I flex to construct @@ -1621,7 +1657,7 @@ dramatic reductions in the final table/object file sizes (typically a factor of 2-5) and are pretty cheap performance-wise (one array look-up per character scanned). .IP -.B -Cf +.B \-Cf specifies that the .I full scanner tables should be generated - @@ -1630,14 +1666,14 @@ should not compress the tables by taking advantages of similar transition functions for different states. .IP -.B -CF +.B \-CF specifies that the alternate fast scanner representation (described above under the -.B -F +.B \-F flag) should be used. .IP -.B -Cm +.B \-Cm directs .I flex to construct @@ -1649,22 +1685,22 @@ have a moderate performance impact (one or two "if" tests and one array look-up per character scanned). .IP A lone -.B -C +.B \-C specifies that the scanner tables should be compressed but neither equivalence classes nor meta-equivalence classes should be used. .IP The options -.B -Cf +.B \-Cf or -.B -CF +.B \-CF and -.B -Cm +.B \-Cm do not make sense together - there is no opportunity for meta-equivalence classes if the table is not being compressed. Otherwise the options -may be freely mixed. +may be freely mixed, and are cumulative. .IP The default setting is -.B -Cem, +.B \-Cem, which specifies that .I flex should generate equivalence classes @@ -1689,15 +1725,11 @@ compiled the quickest, so during development you will usually want to use the default, maximal compression. .IP -.B -Cfe +.B \-Cfe is often a good compromise between speed and size for production scanners. -.IP -.B -C -options are not cumulative; whenever the flag is encountered, the -previous -C settings are forgotten. .TP -.B -Sskeleton_file +.B \-Sskeleton_file overrides the default skeleton file from which .I flex constructs its scanners. You'll never need this option unless you are doing @@ -1715,7 +1747,7 @@ are, from most expensive to least: REJECT - pattern sets that require backtracking + pattern sets that require backing up arbitrary trailing context yymore() @@ -1723,18 +1755,25 @@ are, from most expensive to least: .fi with the first three all being quite expensive and the last two -being quite cheap. +being quite cheap. Note also that +.B unput() +is implemented as a routine call that potentially does quite a bit of +work, while +.B yyless() +is a quite-cheap macro; so if just putting back some excess text you +scanned, use +.B yyless(). .PP .B REJECT should be avoided at all costs when performance is important. It is a particularly expensive option. .PP -Getting rid of backtracking is messy and often may be an enormous +Getting rid of backing up is messy and often may be an enormous amount of work for a complicated scanner. In principal, one begins by using the -.B -b +.B \-b flag to generate a -.I lex.backtrack +.I lex.backup file. For example, on the input .nf @@ -1764,7 +1803,7 @@ the file looks like: out-transitions: [ r ] jam-transitions: EOF [ \\001-q s-\\177 ] - Compressed tables always backtrack. + Compressed tables always back up. .fi The first few lines tell us that there's a scanner state in @@ -1773,7 +1812,7 @@ character, and that in that state the currently scanned text does not match any rule. The state occurs when trying to match the rules found at lines 2 and 3 in the input file. If the scanner is in that state and then reads -something other than an 'o', it will have to backtrack to find +something other than an 'o', it will have to back up to find a rule which is matched. With a bit of headscratching one can see that this must be the state it's in when it has seen "fo". When this has happened, @@ -1787,14 +1826,14 @@ Similarly, the comment for State #9 concerns when "fooba" has been scanned and an 'r' does not follow. .PP The final comment reminds us that there's no point going to -all the trouble of removing backtracking from the rules unless +all the trouble of removing backing up from the rules unless we're using -.B -f +.B \-f or -.B -F, +.B \-F, since there's no performance gain doing so with compressed scanners. .PP -The way to remove the backtracking is to add "error" rules: +The way to remove the backing up is to add "error" rules: .nf %% @@ -1810,7 +1849,7 @@ The way to remove the backtracking is to add "error" rules: .fi .PP -Eliminating backtracking among a list of keywords can also be +Eliminating backing up among a list of keywords can also be done using a "catch-all" rule: .nf @@ -1823,14 +1862,14 @@ done using a "catch-all" rule: .fi This is usually the best solution when appropriate. .PP -Backtracking messages tend to cascade. +Backing up messages tend to cascade. With a complicated set of rules it's not uncommon to get hundreds of messages. If one can decipher them, though, it often -only takes a dozen or so rules to eliminate the backtracking (though +only takes a dozen or so rules to eliminate the backing up (though it's easy to make a mistake and have an error rule accidentally match a valid token. A possible future .I flex -feature will be to automatically add rules to eliminate backtracking). +feature will be to automatically add rules to eliminate backing up). .PP .I Variable trailing context (where both the leading and trailing parts do not have @@ -1965,17 +2004,17 @@ tokens: .|\\n /* it's not a keyword */ .fi -One has to be careful here, as we have now reintroduced backtracking +One has to be careful here, as we have now reintroduced backing up into the scanner. In particular, while .I we know that there will never be any characters in the input stream other than letters or newlines, .I flex -can't figure this out, and it will plan for possibly needing backtracking +can't figure this out, and it will plan for possibly needing to back up when it has scanned a token like "auto" and then the next character is something other than a newline or a letter. Previously it would then just match the "auto" rule and be done, but now it has no "auto" -rule, only a "auto\\n" rule. To eliminate the possibility of backtracking, +rule, only a "auto\\n" rule. To eliminate the possibility of backing up, we could either duplicate all rules but without final newlines, or, since we never expect to encounter such an input and therefore don't how it's classified, we can introduce one more catch-all rule, this @@ -1996,7 +2035,7 @@ one which doesn't include a newline: .fi Compiled with -.B -Cf, +.B \-Cf, this is about as fast as one can get a .I flex scanner to go for this particular problem. @@ -2102,7 +2141,7 @@ scanners use for their input. Also, when writing interactive scanners with .I flex, the -.B -I +.B \-I flag must be used. .IP - .I flex @@ -2123,6 +2162,8 @@ To reenter the scanner, first use yyrestart( yyin ); .fi +Note that this call will throw away any buffered input; usually this +isn't a problem with an interactive scanner. .IP - .B output() is not supported. @@ -2348,13 +2389,13 @@ Using in a scanner suppresses this warning. .PP .I warning, -.B -s +.B \-s .I option given but default rule .I can be matched means that it is possible (perhaps only in a particular start condition) that the default rule (match any single character) is the only one that will match a particular input. Since -.B -s +.B \-s was given, presumably this is not intended. .PP .I reject_used_but_not_detected undefined @@ -2383,7 +2424,7 @@ people who can argue compellingly that they need it.) .PP .I flex scanner jammed - a scanner compiled with -.B -s +.B \-s has encountered an input string which wasn't matched by any of its rules. .PP @@ -2395,10 +2436,10 @@ in "flex.skel". Note that to redefine this macro, you must first .B #undef it). .PP -.I scanner requires -8 flag - +.I scanner requires \-8 flag - Your scanner specification includes recognizing 8-bit characters and -you did not specify the -8 flag (and your site has not installed flex -with -8 as the default). +you did not specify the \-8 flag (and your site has not installed flex +with \-8 as the default). .PP .I fatal flex scanner internal error--end of buffer missed - @@ -2415,6 +2456,10 @@ reentering the scanner, use: You managed to put every single character into its own %t class. .I flex requires that at least one of the classes share characters. +.PP +.I too many start conditions in <> construct! - +you listed more start conditions in a <> construct than exist (so +you must have listed at least one of them twice). .SH DEFICIENCIES / BUGS See flex(1). .SH "SEE ALSO" @@ -2422,7 +2467,7 @@ See flex(1). flex(1), lex(1), yacc(1), sed(1), awk(1). .PP M. E. Lesk and E. Schmidt, -.I LEX - Lexical Analyzer Generator +.I LEX \- Lexical Analyzer Generator .SH AUTHOR Vern Paxson, with the help of many ideas and much inspiration from Van Jacobson. Original version by Jef Poskanzer. The fast table @@ -2431,37 +2476,32 @@ Jacobson. The implementation was done by Kevin Gong and Vern Paxson. .PP Thanks to the many .I flex -beta-testers, feedbackers, and contributors, especially Casey -Leedom, benson@odi.com, Peter A. Bigot, Keith Bostic, -Frederic Brehm, Nick Christopher, Jason Coughlin, -Scott David Daniels, Leo Eskin, -Chris Faylor, Eric Goldman, Eric -Hughes, Jeffrey R. Jones, Kevin B. Kenny, Ronald Lamprecht, -Greg Lee, Craig Leres, Mohamed el Lozy, Jim Meyering, Marc Nozell, -Walter Pelissero, Francois Pinard, Esmond Pitt, Jef Poskanzer, Jim Roskind, -Dave Tallman, Frank Whaley, Ken Yap, and those whose names -have slipped my marginal mail-archiving skills but whose contributions -are appreciated all the same. -.PP -Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob -Mulcahy, Rich Salz, and Richard Stallman for help with various distribution -headaches. -.PP -Thanks to Esmond Pitt and Earle Horton for 8-bit character support; -to Benson Margulies and Fred -Burke for C++ support; to Ove Ewerlid for the basics of support for -NUL's; and to Eric Hughes for the basics of support for multiple buffers. -.PP -Work is being done on extending -.I flex -to generate scanners in which the -state machine is directly represented in C code rather than tables. -These scanners may well be substantially faster than those generated -using -f or -F. If you are working in this area and are interested -in comparing notes and seeing whether redundant work can be avoided, -contact Ove Ewerlid (ewerlid@mizar.DoCS.UU.SE). -.PP -This work was primarily done when I was at the Real Time Systems Group +beta-testers, feedbackers, and contributors, especially Casey Leedom, +Nelson H.F. Beebe, benson@odi.com, Peter A. Bigot, Keith Bostic, Frederic +Brehm, Nick Christopher, Jason Coughlin, Bill Cox, Dave Curtis, Scott David +Daniels, Mike Donahue, Chuck Doucette, Tom Epperly, Leo Eskin, Chris +Faylor, Jon Forrest, Eric Goldman, Ulrich Grepel, Jan Hajic, Jarkko +Hietaniemi, Eric Hughes, Ceriel Jacobs, Jeffrey R. Jones, Amir Katz, +ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, +Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim +Meyering, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter +Pelissero, Gaumond Pierre, Francois Pinard, Esmond Pitt, Jef Poskanzer, +Kevin Rodgers, Jim Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave +Tallman, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent +Williams, Ken Yap, David Zuhn, and those whose names have slipped my +marginal mail-archiving skills but whose contributions are appreciated all +the same. +.PP +Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, G.T. +Nicol, Rich Salz, and Richard Stallman for help with various +distribution headaches. +.PP +Thanks to Esmond Pitt and Earle Horton for 8-bit character support; to +Benson Margulies and Fred Burke for C++ support; to Kent Williams and Tom +Epperly for C++ class support; to Ove Ewerlid for support of NUL's; and to +Eric Hughes for support of multiple buffers. +.PP +This work was primarily done when I was with the Real Time Systems Group at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there for the support I received. .PP @@ -2469,13 +2509,12 @@ Send comments to: .nf Vern Paxson - Computer Systems Engineering + Systems Engineering Bldg. 46A, Room 1123 Lawrence Berkeley Laboratory University of California Berkeley, CA 94720 vern@ee.lbl.gov - ucbvax!ee.lbl.gov!vern .fi -- cgit v1.2.3 From 8606320b7922e07c749a9d9106639027c6fc3e02 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Oct 1993 14:52:55 +0000 Subject: Use DEFAULT_CSIZE only if not using equivalence classes. --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index 5776de4..90c6c81 100644 --- a/main.c +++ b/main.c @@ -621,7 +621,7 @@ char **argv; if ( ! csize_given ) { - if ( fulltbl || fullspd ) + if ( (fulltbl || fullspd) && ! useecs ) csize = DEFAULT_CSIZE; else csize = CSIZE; -- cgit v1.2.3 From a4fa8632b7b60d35eb519367a880873f4c6fae1e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 10 Oct 1993 15:28:08 +0000 Subject: Whitespace tweaking --- FlexLexer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/FlexLexer.h b/FlexLexer.h index 719597f..571a6b7 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -37,7 +37,7 @@ class FlexLexer { - public: +public: virtual ~FlexLexer() { } const char* YYText() { return yytext; } @@ -57,7 +57,7 @@ protected: class yyFlexLexer : public FlexLexer { - public: +public: // arg_yyin and arg_yyout default to the cin and cout, but we // only make that assignment when initializing in yylex(). yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 ) @@ -95,7 +95,7 @@ class yyFlexLexer : public FlexLexer { virtual int yylex(); - protected: +protected: virtual int LexerInput( char* buf, int max_size ); virtual void LexerOutput( const char* buf, int size ); -- cgit v1.2.3 From fea40aaad1f6c54f4f304aa383611e4aefa61340 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 9 Nov 1993 21:11:42 +0000 Subject: updated date for 2.4.0 :-( --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 59a10ca..f6302d2 100644 --- a/version.h +++ b/version.h @@ -1 +1 @@ -#define FLEX_VERSION "2.4.0 (October, 1993)" +#define FLEX_VERSION "2.4.0 (November 9, 1993)" -- cgit v1.2.3 From 31395609cae40e75df0d326fdd03b5c830be7de3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 10 Nov 1993 10:05:26 +0000 Subject: Added global to remember -P prefix so it can be written in -v summary. Alphabetized prefix generation, added yywrap --- main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/main.c b/main.c index 90c6c81..bea8999 100644 --- a/main.c +++ b/main.c @@ -106,6 +106,7 @@ static char outfile_path[64]; static int outfile_created = 0; static int use_stdout; static char *skelname = NULL; +static char *prefix = "yy"; int main( argc, argv ) @@ -302,6 +303,9 @@ int exit_status; if ( skelname ) fprintf( stderr, " -S%s", skelname ); + if ( strcmp( prefix, "yy" ) ) + fprintf( stderr, " -P%s", prefix ); + putc( '\n', stderr ); fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); @@ -409,7 +413,7 @@ char **argv; { int i, sawcmpflag; int csize_given, interactive_given; - char *arg, *prefix, *mktemp(); + char *arg, *mktemp(); printstats = syntaxerror = trace = spprdflt = caseins = false; C_plus_plus = backing_up_report = ddebug = fulltbl = fullspd = false; @@ -429,8 +433,6 @@ char **argv; allocate_character_array( action_size ); action_offset = action_index = 0; - prefix = "yy"; - program_name = argv[0]; if ( program_name[0] != '\0' && @@ -693,8 +695,10 @@ char **argv; if ( strcmp( prefix, "yy" ) ) { #define GEN_PREFIX(name) printf( "#define yy%s %s%s\n", name, prefix, name ); + GEN_PREFIX( "FlexLexer" ); GEN_PREFIX( "_create_buffer" ); GEN_PREFIX( "_delete_buffer" ); + GEN_PREFIX( "_flex_debug" ); GEN_PREFIX( "_init_buffer" ); GEN_PREFIX( "_load_buffer_state" ); GEN_PREFIX( "_switch_to_buffer" ); @@ -704,8 +708,7 @@ char **argv; GEN_PREFIX( "out" ); GEN_PREFIX( "restart" ); GEN_PREFIX( "text" ); - GEN_PREFIX( "_flex_debug" ); - GEN_PREFIX( "FlexLexer" ); + GEN_PREFIX( "wrap" ); printf( "\n" ); } -- cgit v1.2.3 From fd48fa42d0b3c457e90842fa572edab21087d55b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 10 Nov 1993 10:06:51 +0000 Subject: 2.4 documentation --- flex.1 | 853 ++++++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 665 insertions(+), 188 deletions(-) diff --git a/flex.1 b/flex.1 index b5dad63..3d12fb9 100644 --- a/flex.1 +++ b/flex.1 @@ -1,9 +1,9 @@ -.TH FLEXDOC 1 "October 1993" "Version 2.4" +.TH FLEXDOC 1 "November 1993" "Version 2.4" .SH NAME flexdoc \- documentation for flex, fast lexical analyzer generator .SH SYNOPSIS .B flex -.B [\-bcdfinpstvFILT8 \-C[efmF] \-Sskeleton] +.B [\-abcdfhinpstvwBFILTV78+ \-C[efmF] \-Pprefix \-Sskeleton] .I [filename ...] .SH DESCRIPTION .I flex @@ -311,6 +311,7 @@ expressions. These are: r same, but in any of start conditions s1, s2, or s3 + <*>r an r in any start condition, even an exclusive one. <> an end-of-file @@ -318,6 +319,10 @@ expressions. These are: an end-of-file when in start condition s1 or s2 .fi +Note that inside of a character class, all regular expression operators +lose their special meaning except escape ('\\') and the character class +operators, '-', ']', and, at the beginning of the class, '^'. +.PP The regular expressions listed above are grouped according to precedence, from highest precedence at the top to lowest at the bottom. Those grouped together have equal precedence. For example, @@ -362,9 +367,8 @@ characters explicitly present in the negated character class (e.g., "[^A-Z\\n]"). This is unlike how many other regular expression tools treat negated character classes, but unfortunately the inconsistency is historically entrenched. -Matching newlines means that a pattern like [^"]* can match an entire -input (overflowing the scanner's input buffer) unless there's another -quote in the input. +Matching newlines means that a pattern like [^"]* can match the entire +input unless there's another quote in the input. .IP - A rule can have at most one instance of trailing context (the '/' operator or the '$' operator). The start condition, '^', and "<>" patterns @@ -436,6 +440,92 @@ input is: .fi which generates a scanner that simply copies its input (one character at a time) to its output. +.PP +Note that +.B yytext +can be defined in two different ways: either as a character +.I pointer +or as a character +.I array. +You can control which definition +.I flex +uses by including one of the special directives +.B %pointer +or +.B %array +in the first (definitions) section of your flex input. The default is +.B %pointer. +The advantage of using +.B %pointer +is substantially faster scanning and no buffer overflow when matching +very large tokens (unless you run out of dynamic memory). The disadvantage +is that you are restricted in how your actions can modify +.B yytext +(see the next section), and calls to the +.B input() +and +.B unput() +functions destroy the present contents of +.B yytext, +which can be a considerable porting headache when moving between different +.I lex +versions. +.PP +The advantage of +.B %array +is that you can then modify +.B yytext +to your heart's content, and calls to +.B input() +and +.B unput() +do not destroy +.B yytext +(see below). Furthermore, existing +.I lex +programs sometimes access +.B yytext +externally using declarations of the form: +.nf + extern char yytext[]; +.fi +This definition is erroneous when used with +.B %pointer, +but correct for +.B %array. +.PP +.B %array +defines +.B yytext +to be an array of +.B YYLMAX +characters, which defaults to a fairly large value. You can change +the size by simply #define'ing +.B YYLMAX +to a different value in the first section of your +.I flex +input. As mentioned above, with +.B %pointer +yytext grows dynamically to accomodate large tokens. While this means your +.B %pointer +scanner can accomodate very large tokens (such as matching entire blocks +of comments), bear in mind that each time the scanner must resize +.B yytext +it also must rescan the entire token from the beginning, so matching such +tokens can prove slow. +.B yytext +presently does +.I not +dynamically grow if a call to +.B unput() +results in too much text being pushed back; instead, a run-time error results. +.PP +Also note that you cannot use +.B %array +with C++ scanner classes +(the +.B \-+ +option; see below). .SH ACTIONS Each pattern in a rule has a corresponding action, which can be any arbitrary C statement. The pattern ends at the first non-escaped @@ -485,14 +575,25 @@ is called it continues processing tokens from where it last left off until it either reaches the end of the file or executes a return. .PP -Actions are free to modify yytext except for lengthening it (adding +Actions are free to modify +.B yytext +except for lengthening it (adding characters to its end--these will overwrite later characters in the input stream). Modifying the final character of yytext may alter whether when scanning resumes rules anchored with '^' are active. Specifically, changing the final character of yytext to a newline will activate such rules on the next scan, and changing it to anything else will deactivate the rules. Users should not rely on this behavior being -present in future releases. +present in future releases. Finally, note that none of this paragraph +applies when using +.B %array +(see above). +.PP +Actions are free to modify +.B yyleng +except they should not do so if the action also includes use of +.B yymore() +(see below). .PP There are a number of special directives which can be included within an action: @@ -758,7 +859,6 @@ is pointed at a new input file (in which case scanning continues from that file), or .B yyrestart() is called. -.I yyin .B yyrestart() takes one argument, a .B FILE * @@ -839,10 +939,7 @@ caller. .PP The default .B yywrap() -always returns 1. Presently, to redefine it you must first -"#undef yywrap", as it is currently implemented as a macro. As indicated -by the hedging in the previous sentence, it may be changed to -a true function in the near future. +always returns 1. .PP The scanner writes its .B ECHO @@ -927,6 +1024,18 @@ is equivalent to %% foo /* do something */ +.fi +.PP +Also note that the special start-condition specifier +.B <*> +matches every start condition. Thus, the above example could also +have been written; +.nf + + %x example + %% + <*>foo /* do something */ + .fi .PP The default rule (to @@ -1060,11 +1169,74 @@ macro. For example, the above assignments to .I comment_caller could instead be written .nf + comment_caller = YY_START; .fi .PP Note that start conditions do not have their own name-space; %s's and %x's declare names in the same fashion as #define's. +.PP +Finally, here's an example of how to match C-style quoted strings using +exclusive start conditions, including expanded escape sequences (but +not including checking for a string that's too long): +.nf + + %x str + + %% + char string_buf[MAX_STR_CONST]; + char *string_buf_ptr; + + + \\" string_buf_ptr = string_buf; BEGIN(str); + + \\" { /* saw closing quote - all done */ + BEGIN(INITIAL); + *string_buf_ptr = '\\0'; + /* return string constant token type and + * value to parser + */ + } + + \\n { + /* error - unterminated string constant */ + /* generate error message */ + } + + \\\\[0-7]{1,3} { + /* octal escape sequence */ + int result; + + (void) sscanf( yytext + 1, "%o", &result ); + + if ( result > 0xff ) + /* error, constant is out-of-bounds */ + + *string_buf_ptr++ = result; + } + + \\\\[0-9]+ { + /* generate error - bad escape sequence; something + * like '\\48' or '\\0777777' + */ + } + + \\\\n *string_buf_ptr++ = '\\n'; + \\\\t *string_buf_ptr++ = '\\t'; + \\\\r *string_buf_ptr++ = '\\r'; + \\\\b *string_buf_ptr++ = '\\b'; + \\\\f *string_buf_ptr++ = '\\f'; + + \\\\(.|\\n) *string_buf_ptr++ = yytext[1]; + + [^\\\\\\n\\"]+ { + char *yytext_ptr = yytext; + + while ( *yytext_ptr ) + *string_buf_ptr++ = *yytext_ptr++; + } + +.fi .SH MULTIPLE INPUT BUFFERS Some scanners (such as those which support "include" files) require reading from several input streams. As @@ -1324,53 +1496,18 @@ part of the scanner might look like: [0-9]+ yylval = atoi( yytext ); return TOK_NUMBER; .fi -.SH TRANSLATION TABLE -In the name of POSIX compliance, -.I flex -supports a -.I translation table -for mapping input characters into groups. -The table is specified in the first section, and its format looks like: -.nf - - %t - 1 abcd - 2 ABCDEFGHIJKLMNOPQRSTUVWXYZ - 52 0123456789 - 6 \\t\\ \\n - %t - -.fi -This example specifies that the characters 'a', 'b', 'c', and 'd' -are to all be lumped into group #1, upper-case letters -in group #2, digits in group #52, tabs, blanks, and newlines into -group #6, and -.I -no other characters will appear in the patterns. -The group numbers are actually disregarded by -.I flex; -.B %t -serves, though, to lump characters together. Given the above -table, for example, the pattern "a(AA)*5" is equivalent to "d(ZQ)*0". -They both say, "match any character in group #1, followed by -zero-or-more pairs of characters -from group #2, followed by a character from group #52." Thus -.B %t -provides a crude way for introducing equivalence classes into -the scanner specification. -.PP -Note that the -.B \-i -option (see below) coupled with the equivalence classes which -.I flex -automatically generates take care of virtually all the instances -when one might consider using -.B %t. -But what the hell, it's there if you want it. .SH OPTIONS .I flex has the following options: .TP +.B \-a +(``align'') instructs flex to trade off larger tables in the +generated scanner for faster performance because the elements of +the tables are better aligned for memory access and computation. On some RISC +architectures, fetching and manipulating longwords is more efficient than +with smaller-sized datums such as shortwords. This option can +double the size of the tables used by your scanner. +.TP .B \-b Generate backing-up information to .I lex.backup. @@ -1384,8 +1521,8 @@ or is used, the generated scanner will run faster (see the .B \-p flag). Only users who wish to squeeze every last cycle out of their -scanners need worry about this option. (See the section on PERFORMANCE -CONSIDERATIONS below.) +scanners need worry about this option. (See the section on Performance +Considerations below.) .TP .B \-c is a do-nothing, deprecated option included for POSIX compliance. @@ -1441,6 +1578,13 @@ This option is equivalent to .B \-Cf (see below). .TP +.B \-h +generates a "help" summary of +.I flex's +options to +.I stderr +and then exits. +.TP .B \-i instructs .I flex @@ -1462,10 +1606,13 @@ POSIX compliance. generates a performance report to stderr. The report consists of comments regarding features of the .I flex -input file which will cause a loss of performance in the resulting scanner. +input file which will cause a serious loss of performance in the resulting +scanner. If you give the flag twice, you will also get comments regarding +features that lead to minor performance losses. +.IP Note that the use of .I REJECT -and variable trailing context (see the BUGS section in flex(1)) +and variable trailing context (see the Bugs section in flex(1)) entails a substantial performance penalty; use of .I yymore(), the @@ -1499,13 +1646,41 @@ should write to a summary of statistics regarding the scanner it generates. Most of the statistics are meaningless to the casual .I flex -user, but the -first line identifies the version of -.I flex, -which is useful for figuring -out where you stand with respect to patches and new releases, -and the next two lines give the date when the scanner was created -and a summary of the flags which were in effect. +user, but the first line identifies the version of +.I flex +(same as reported by +.B \-V), +and the next line the flags used when generating the scanner, including +those that are on by default. +.TP +.B \-w +suppresses warning messages. +.TP +.B \-B +instructs +.I flex +to generate a +.I batch +scanner, the opposite of +.I interactive +scanners generated by +.B \-I +(see below). In general, you use +.B \-B +when you are +.I certain +that your scanner will never be used interactively, and you want to +squeeze a +.I little +more performance out of it. If your goal is instead to squeeze out a +.I lot +more performance, you should be using the +.B \-Cf +or +.B \-CF +options (discussed below), which turn on +.B \-B +automatically anyway. .TP .B \-F specifies that the @@ -1542,43 +1717,44 @@ instructs .I flex to generate an .I interactive -scanner. Normally, scanners generated by -.I flex -always look ahead one -character before deciding that a rule has been matched. At the cost of -some scanning overhead, -.I flex -will generate a scanner which only looks ahead -when needed. Such scanners are called -.I interactive -because if you want to write a scanner for an interactive system such as a -command shell, you will probably want the user's input to be terminated -with a newline, and without -.B \-I -the user will have to type a character in addition to the newline in order -to have the newline recognized. This leads to dreadful interactive -performance. +scanner. An interactive scanner is one that only looks ahead to decide +what token has been matched if it absolutely must. It turns out that +always looking one extra character ahead, even if the scanner has already +seen enough text to disambiguate the current token, is a bit faster than +only looking ahead when necessary. But scanners that always look ahead +give dreadful interactive performance; for example, when a user types +a newline, it is not recognized as a newline token until they enter +.I another +token, which often means typing in another whole line. .IP -If all this seems to confusing, here's the general rule: if a human will -be typing in input to your scanner, use -.B \-I, -otherwise don't; if you don't care about squeezing the utmost performance -from your scanner and you -don't want to make any assumptions about the input to your scanner, +.I Flex +scanners default to +.I interactive +unless you use the +.B \-Cf +or +.B \-CF +table-compression options (see below). That's because if you're looking +for high-performance you should be using one of these options, so if you +didn't, +.I flex +assumes you'd rather trade off a bit of run-time performance for intuitive +interactive behavior. Note also that you +.I cannot use -.B \-I. -.IP -Note, .B \-I -cannot be used in conjunction with -.I full -or -.I fast tables, -i.e., the -.B \-f, \-F, \-Cf, +in conjunction with +.B \-Cf or -.B \-CF -flags. +.B \-CF. +Thus, this option is not really needed; it is on by default for all those +cases in which it is allowed. +.IP +You can force a scanner to +.I not +be interactive by using +.B \-B +(see above). .TP .B \-L instructs @@ -1614,29 +1790,73 @@ the form of the input and the resultant non-deterministic and deterministic finite automata. This option is mostly for use in maintaining .I flex. .TP -.B \-8 +.B \-V +prints the version number to +.I stderr +and exits. +.TP +.B \-7 instructs .I flex -to generate an 8-bit scanner, i.e., one which can recognize 8-bit -characters. On some sites, -.I flex -is installed with this option as the default. On others, the default -is 7-bit characters. To see which is the case, check the verbose -.B (\-v) -output for "equivalence classes created". If the denominator of -the number shown is 128, then by default +to generate a 7-bit scanner, i.e., one which can only recognized 7-bit +characters in its input. The advantage of using +.B \-7 +is that the scanner's tables can be up to half the size of those generated +using the +.B \-8 +option (see below). The disadvantage is that such scanners often hang +or crash if their input contains an 8-bit character. +.IP +Note, however, that unless you generate your scanner using the +.B \-Cf +or +.B \-CF +table compression options, use of +.B \-7 +will save only a small amount of table space, and make your scanner +considerably less portable. +.I Flex's +default behavior is to generate an 8-bit scanner unless you use the +.B \-Cf +or +.B \-CF, +in which case .I flex -is generating 7-bit characters. If it is 256, then the default is -8-bit characters and the +defaults to generating 7-bit scanners unless your site was always +configured to generate 8-bit scanners (as will often be the case +with non-USA sites). You can tell whether flex generated a 7-bit +or an 8-bit scanner by inspecting the flag summary in the +.B \-v +output as described above. +.IP +Note that if you use +.B \-Cfe +or +.B \-CFe +(those table compression options, but also using equivalence classes as +discussed see below), flex still defaults to generating an 8-bit +scanner, since usually with these compression options full 8-bit tables +are not much more expensive than 7-bit tables. +.TP .B \-8 -flag is not required (but may be a good idea to keep the scanner -specification portable). Feeding a 7-bit scanner 8-bit characters -will result in infinite loops, bus errors, or other such fireworks, -so when in doubt, use the flag. Note that if equivalence classes -are used, 8-bit scanners take only slightly more table space than -7-bit scanners (128 bytes, to be exact); if equivalence classes are -not used, however, then the tables may grow up to twice their -7-bit size. +instructs +.I flex +to generate an 8-bit scanner, i.e., one which can recognize 8-bit +characters. This flag is only needed for scanners generated using +.B \-Cf +or +.B \-CF, +as otherwise flex defaults to generating an 8-bit scanner anyway. +.IP +See the discussion of +.B \-7 +above for flex's default behavior and the tradeoffs between 7-bit +and 8-bit scanners. +.TP +.B \-+ +specifies that you want flex to generate a C++ +scanner class. See the section on Generating C++ Scanners below for +details. .TP .B \-C[efmF] controls the degree of table compression. @@ -1729,6 +1949,58 @@ compression. is often a good compromise between speed and size for production scanners. .TP +.B \-Pprefix +changes the default +.I "yy" +prefix used by +.I flex +for all globally-visible variable and function names to instead be +.I prefix. +For example, +.B \-Pfoo +changes the name of +.B yytext +to +.B footext. +It also changes the name of the default output file from +.B lex.yy.c +to +.B lex.foo.c. +Here are all of the names affected: +.nf + + yyFlexLexer + yy_create_buffer + yy_delete_buffer + yy_flex_debug + yy_init_buffer + yy_load_buffer_state + yy_switch_to_buffer + yyin + yyleng + yylex + yyout + yyrestart + yytext + yywrap + +.fi +Within your scanner itself, you can still refer to the global variables +and functions using either version of their name; but eternally, they +have the modified name. +.IP +This option lets you easily link together multiple +.I flex +programs into the same executable. Note, though, that using this +option also renames +.B yywrap(), +so you now +.I must +provide your own (appropriately-named) version of the routine for your +scanner, as linking with +.B \-lfl +no longer provides one for you by default. +.TP .B \-Sskeleton_file overrides the default skeleton file from which .I flex @@ -1739,8 +2011,12 @@ maintenance or development. The main design goal of .I flex is that it generate high-performance scanners. It has been optimized -for dealing well with large sets of rules. Aside from the effects -of table compression on scanner speed outlined above, +for dealing well with large sets of rules. Aside from the effects on +scanner speed of the table compression +.B \-C +and +.B \-a +options outlined above, there are a number of options/actions which degrade performance. These are, from most expensive to least: .nf @@ -1901,8 +2177,15 @@ or as Note that here the special '|' action does .I not provide any savings, and can even make things worse (see -.B BUGS -in flex(1)). +.PP +A final note regarding performance: as mentioned above in the section +How the Input is Matched, dynamically resizing +.B yytext +to accomodate huge tokens is a slow process because it presently requires that +the (huge) token be rescanned from the beginning. Thus if performance is +vital, you should attempt to match "large" quantities of text but not +"huge" quantities, where the cutoff between the two is at about 8K +characters/token. .PP Another area where the user can increase a scanner's performance (and one that's easier to implement) arises from the fact that @@ -2047,6 +2330,192 @@ multiple NUL's. It's best to write rules which match .I short amounts of text if it's anticipated that the text will often include NUL's. +.SH GENERATING C++ SCANNERS +.I flex +provides two different ways to generate scanners for use with C++. The +first way is to simply compile a scanner generated by +.I flex +using a C++ compiler instead of a C compiler. You should not encounter +any compilations errors (please report any you find to the email address +given in the Author section below). You can then use C++ code in your +rule actions instead of C code. Note that the default input source for +your scanner remains +.I yyin, +and default echoing is still done to +.I yyout. +Both of these remain +.I FILE * +variables and not C++ +.I streams. +.PP +You can also use +.I flex +to generate a C++ scanner class, using the +.B \-+ +option, which is automatically specified if the name of the flex +executable ends in a '+', such as +.I flex++. +When using this option, flex defaults to generating the scanner to the file +.B lex.yy.cc +instead of +.B lex.yy.c. +The generated scanner includes the header file +.I FlexLexer.h, +which defines the interface to two C++ classes. +.PP +The first class, +.B FlexLexer, +provides an abstract base class defining the general scanner class +interface. It provides the following member functions: +.TP +.B const char* YYText() +returns the text of the most recently matched token, the equivalent of +.B yytext. +.TP +.B int YYLeng() +returns the length of the most recently matched token, the equivalent of +.B yyleng. +.PP +Also provided are member functions equivalent to +.B yy_switch_to_buffer(), +.B yy_create_buffer() +(though the first argument is an +.B istream* +object pointer and not a +.B FILE*), +.B yy_delete_buffer(), +and +.B yyrestart() +(again, the first argument is a +.B istream* +object pointer). +.PP +The second class defined in +.I FlexLexer.h +is +.B yyFlexLexer, +which is derived from +.B FlexLexer. +It defines the following additional member functions: +.TP +.B +yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 ) +constructs a +.B yyFlexLexer +object using the given streams for input and output. If not specified, +the streams default to +.B cin +and +.B cout, +respectively. +.TP +.B virtual int yylex() +performs the same role is +.B yylex() +does for ordinary flex scanners: it scans the input stream, consuming +tokens, until a rule's action returns a value. +.PP +In addition, +.B yyFlexLexer +defines the following protected virtual functions which you can redefine +in derived classes to tailor the scanner's input and output: +.TP +.B +virtual int LexerInput( char* buf, int max_size ) +reads up to +.B max_size +characters into +.B buf +and returns the number of characters read. To indicate end-of-input, +return 0 characters. +.TP +.B +virtual void LexerOutput( const char* buf, int size ) +writes out +.B size +characters from the buffer +.B buf, +which, while NUL-terminated, may also contain "internal" NUL's if +the scanner's rules can match text with NUL's in them. +.PP +Note that a +.B yyFlexLexer +object contains its +.I entire +scanning state. Thus you can use such objects to create reentrant +scanners. You can instantiate multiple instances of the same +.B yyFlexLexer +class, and you can also combine multiple C++ scanner classes together +in the same program using the +.B \-P +option discussed above. +.PP +Finally, note that the +.B %array +feature is not available to C++ scanner classes; you must use +.B %pointer +(the default). +.PP +Here is an example of a simple C++ scanner: +.nf + + // An example of using the flex C++ scanner class. + + %{ + int mylineno = 0; + %} + + string \\"[^\\n"]+\\" + + ws [ \\t]+ + + alpha [A-Za-z] + dig [0-9] + name ({alpha}|{dig}|\\$)({alpha}|{dig}|[_.\\-/$])* + num1 [-+]?{dig}+\\.?([eE][-+]?{dig}+)? + num2 [-+]?{dig}*\\.{dig}+([eE][-+]?{dig}+)? + number {num1}|{num2} + + %% + + {ws} /* skip blanks and tabs */ + + "/*" { + int c; + + while((c = yyinput()) != 0) + { + if(c == '\\n') + ++mylineno; + + else if(c == '*') + { + if((c = yyinput()) == '/') + break; + else + unput(c); + } + } + } + + {number} cout << "number " << YYText() << '\\n'; + + \\n mylineno++; + + {name} cout << "name " << YYText() << '\\n'; + + {string} cout << "string " << YYText() << '\\n'; + + %% + + int main( int /* argc */, char** /* argv */ ) + { + FlexLexer* lexer = new yyFlexLexer; + while(lexer->yylex() != 0) + ; + return 0; + } +.fi .SH INCOMPATIBILITIES WITH LEX AND POSIX .I flex is a rewrite of the Unix @@ -2057,20 +2526,16 @@ are of concern to those who wish to write scanners acceptable to either implementation. At present, the POSIX .I lex draft is -very close to the original +close to the original .I lex implementation, so some of these incompatibilities are also in conflict with the POSIX draft. But -the intent is that except as noted below, +the intent is that ultimately .I flex -as it presently stands will -ultimately be POSIX conformant (i.e., that those areas of conflict with -the POSIX draft will be resolved in -.I flex's -favor). Please bear in +will be fully POSIX-conformant. Please bear in mind that all the comments which follow are with regard to the POSIX .I draft -standard of Summer 1989, and not the final document (or subsequent +of Spring 1990 (draft 10), and not the final document (or subsequent drafts); they are included so .I flex users can be aware of the standardization issues and those areas where @@ -2138,11 +2603,7 @@ such writes are automatically flushed since .I lex scanners use .B getchar() -for their input. Also, when writing interactive scanners with -.I flex, -the -.B \-I -flag must be used. +for their input. .IP - .I flex scanners are not as reentrant as @@ -2164,6 +2625,11 @@ To reenter the scanner, first use .fi Note that this call will throw away any buffered input; usually this isn't a problem with an interactive scanner. +.IP +Also note that flex C++ scanner classes +.I are +reentrant, so if using C++ is an option for you, you should use +them instead. See "Generating C++ Scanners" above for details. .IP - .B output() is not supported. @@ -2174,9 +2640,8 @@ macro is done to the file-pointer (default .I stdout). .IP -The POSIX draft mentions that an .B output() -routine exists but currently gives no details as to what it does. +is not part of the POSIX draft. .IP - .I lex does not support exclusive start conditions (%x), though they @@ -2201,7 +2666,7 @@ and the precedence is such that the '?' is associated with .I flex, the rule will be expanded to "foo([A-Z][A-Z0-9]*)?" and so the string "foo" will match. -.PP +.IP Note that if the definition begins with .B ^ or ends with @@ -2235,17 +2700,6 @@ The (generate a Ratfor scanner) option is not supported. It is not part of the POSIX draft. .IP - -If you are providing your own yywrap() routine, you must include a -"#undef yywrap" in the definitions section (section 1). Note that -the "#undef" will have to be enclosed in %{}'s. -.IP -The POSIX draft -specifies that yywrap() is a function and this is very unlikely to change; so -.I flex users are warned -that -.B yywrap() -is likely to be changed to a function in the near future. -.IP - After a call to .B unput(), .I yytext @@ -2276,21 +2730,6 @@ or 'bar' anywhere", whereas interprets it as "match either 'foo' or 'bar' if they come at the beginning of a line". The latter is in agreement with the current POSIX draft. .IP - -To refer to yytext outside of the scanner source file, -the correct definition with -.I flex -is "extern char *yytext" rather than "extern char yytext[]". -This is contrary to the current POSIX draft but a point on which -.I flex -will not be changing, as the array representation entails a -serious performance penalty. It is hoped that the POSIX draft will -be emended to support the -.I flex -variety of declaration (as this is a fairly painless change to -require of -.I lex -users). -.IP - .I yyin is .I initialized @@ -2343,15 +2782,17 @@ or the POSIX draft standard: yyterminate() <> + <*> YY_DECL + YY_START + YY_USER_ACTION #line directives %{}'s around actions - yyrestart() - comments beginning with '#' (deprecated) multiple actions on a line .fi -This last feature refers to the fact that with +plus almost all of the flex flags. +The last feature in the list refers to the fact that with .I flex you can put multiple actions on the same line, separated with semi-colons, while with @@ -2372,6 +2813,23 @@ is (rather surprisingly) truncated to does not truncate the action. Actions that are not enclosed in braces are simply terminated at the end of the line. .SH DIAGNOSTICS +If you receive errors when linking a +.I flex +scanner complaining about the following missing routines: +.ds + yywrap + yy_flex_alloc + yy_flex_realloc + yy_flex_free +.de +then you forgot to link your program with +.B \-lfl. +This run-time library is +.I required +for all +.I flex +scanners. +.PP .I warning, rule cannot be matched indicates that the given rule cannot be matched because it follows other rules that will @@ -2390,8 +2848,8 @@ in a scanner suppresses this warning. .PP .I warning, .B \-s -.I option given but default rule -.I can be matched +.I +option given but default rule can be matched means that it is possible (perhaps only in a particular start condition) that the default rule (match any single character) is the only one that will match a particular input. Since @@ -2426,20 +2884,41 @@ people who can argue compellingly that they need it.) a scanner compiled with .B \-s has encountered an input string which wasn't matched by -any of its rules. -.PP -.I flex input buffer overflowed - -a scanner rule matched a string long enough to overflow the -scanner's internal input buffer (16K bytes by default - controlled by -.B YY_BUF_SIZE -in "flex.skel". Note that to redefine this macro, you must first -.B #undef -it). +any of its rules. This error can also occur due to internal problems. +.PP +.I token too large, exceeds YYLMAX - +your scanner uses +.B %array +and one of its rules matched a string longer than the +.B YYLMAX +constant (8K bytes by default). You can increase the value by +#define'ing +.B YYLMAX +in the definitions section of your +.I flex +input. +.PP +.I scanner requires \-8 flag to +.I use the character 'x' - +Your scanner specification includes recognizing the 8-bit character +.I 'x' +and you did not specify the \-8 flag, and your scanner defaulted to 7-bit +because you used the +.B \-Cf +or +.B \-CF +table compression options. See the discussion of the +.B \-7 +flag for details. .PP -.I scanner requires \-8 flag - -Your scanner specification includes recognizing 8-bit characters and -you did not specify the \-8 flag (and your site has not installed flex -with \-8 as the default). +.I flex scanner push-back overflow - +you used +.B unput() +to push back so much text that the scanner's buffer could not hold +both the pushed-back text and the current token in +.B yytext. +Ideally the scanner should dynamically resize the buffer in this case, but at +present it does not. .PP .I fatal flex scanner internal error--end of buffer missed - @@ -2451,17 +2930,15 @@ reentering the scanner, use: yyrestart( yyin ); .fi -.PP -.I too many %t classes! - -You managed to put every single character into its own %t class. -.I flex -requires that at least one of the classes share characters. +or, as noted above, switch to using the C++ scanner class. .PP .I too many start conditions in <> construct! - you listed more start conditions in a <> construct than exist (so you must have listed at least one of them twice). -.SH DEFICIENCIES / BUGS +.SH FILES See flex(1). +.SH DEFICIENCIES / BUGS +Again, see flex(1). .SH "SEE ALSO" .PP flex(1), lex(1), yacc(1), sed(1), awk(1). -- cgit v1.2.3 From 664fa5c992b1e8011b012df782c84544f35d3945 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 10 Nov 1993 10:07:50 +0000 Subject: 2.4 release --- NEWS | 276 +++++++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 202 insertions(+), 74 deletions(-) diff --git a/NEWS b/NEWS index 0214847..da7fc1c 100644 --- a/NEWS +++ b/NEWS @@ -1,77 +1,205 @@ -Changes between release 2.4 (04Oct93) and release 2.3: - -skeleton file: John Gilmore - flex.skel -> flex.skl - -S to go away unless hear it's useful -yywrap() a function -must link with libfl.a -C++: Kent Williams - use for reentrant scanners - C++ streams: Tom Epperly - lex.yy.cc - FlexLexer.h - flex++ -no temp file -manual/ -detects if -8 flag needed --h for help -buffer dynamically enlarges; but *slow* to match big tokens - and doesn't do so on unput() -fewer porting headaches; memset(), malloc() type in particular -MISC/ --P prefix --V -<*> -%array/%pointer - use %array to avoid input()/unput() trashing yytext - YYLMAX - not with -+ -flex -V --8 by default (except -C{f,F}?); -7 supported --A align option, promotes short -> long -number of fencepost table expansion bugs fixed -YY_START: integer value, can do stacks -yyrestart() no longer needed; just point yyin at a new file -fencepost bug fixes -'#' no longer a comment character -\n in flex input can optionally include leading or trailing \r -warnings for rules that can't be matched; REJECT obviates -more consistent identification of error locations -yyleng a global -definitions can now include leading '^' or trailing '$' -scanners are -I interactive if compressed, by default; -B -warnings for unmatchable rules; if -s given but default rule can be matched -YY_USER_ACTION only called for real actions -many misc bug fixes thanks to Gerhard Wilhelms -\n\r stuff -yyless() usable in section 3 (Ceriel Jacobs) -formfeed no longer a whitespace character -run-time detection of out-of-range characters (8 bit when built for 7 bit) -%t nuked -yyleng may be modified --w: suppress warnings --p -p: report minor performance problems, too -no more time information in -v output -MISC/fastwc -MISC/debflex.awk (Francois) -MISC/testxxLexerl.l -YY_NEW_FILE no longer needed -definitions with ^, $ allowed; not expanded inside parens -version.h - -corrected doc: -C options are cumulative - may modify yytext but not lengthen it (append chars) - modifying last char may affect anchoring - backtracking -> backing up - unindented comments allowed in first section, but not in second - yyless() only usable in scanner source, not externally - yyrestart(yyin) throws away current buffer - high-speed scanners: match as much text as possible w/ each rule - beginning-of-line operator is fairly cheap - unput() expensive, yyless() cheap - corrected backing-up example - -code reformatted +Changes between release 2.4 (09Nov93) and release 2.3: + + - The new '-+' flag instructs flex to generate a C++ scanner class + (thanks to Kent Williams). flex writes an implementation of the + class defined in FlexLexer.h to lex.yy.cc. You may include + multiple scanner classes in your program using the -P flag. Note + that the scanner class also provides a mechanism for creating + reentrant scanners. The scanner class uses C++ streams for I/O + instead of FILE*'s (thanks to Tom Epperly). If the flex executable's + name ends in '+' then the '-+' flag is automatically on, so creating + a symlink or copy of "flex" to "flex++" results in a version of + flex that can be used exclusively for C++ scanners. + + Note that without the '-+' flag, flex-generated scanners can still + be compiled using C++ compilers, though they use FILE*'s for I/O + instead of streams. + + See the "GENERATING C++ SCANNERS" section of flexdoc for details. + + - The new '-P' option specifies a prefix to use other than "yy" + for the scanner's globally-visible variables, and for the + "lex.yy.c" filename. Using -P you can link together multiple + flex scanners in the same executable. + + - The distribution includes G.T. Nicol's flex manual (note + that the manual has not yet been brought up to date; it + presently reflects flex version 2.3). See the manual/ + subdirectory. Note that the PostScript for the manual + presently lacks an index; this will be fixed soon. + + The distribution also includes a "texinfo" version of flexdoc.1, + contributed by Roland Pesch (thanks also to Marq Kole, who + contributed another version). It also has not been brought + up to date, but reflects version 2.3. See MISC/flex.texinfo. + + - yywrap() is now a function, and you now *must* link flex scanners + with libfl.a. + + - In the blessed name of POSIX compliance, flex supports "%array" + and "%pointer" directives in the definitions (first) section of + the scanner specification. The former specifies that yytext + should be an array (of size YYLMAX), the latter, that it should + be a pointer. The array version of yytext is universally slower + than the pointer version, but has the advantage that its contents + remain unmodified across calls to input() and unput() (the pointer + version of yytext is, still, trashed by such calls). + + "%array" cannot be used with the '-+' C++ scanner class option. + + - The new '-a' option directs flex to trade off memory for + natural alignment when generating a scanner's tables. In + particular, table entries that would otherwise be "short" + become "long". + + - The new '-h' option produces a summary of the flex flags. + + - The new '-V' option reports the flex version number and exits. + + - The new scanner macro YY_START returns an integer value + corresponding to the current start condition. You can return + to that start condition by passing the value to a subsequent + "BEGIN" action. You also can implement "start condition stacks" + by storing the values in an integer stack. + + - flex now generates warnings for rules that can't be matched. + These warnings can be turned off using the new '-w' flag. If + your scanner uses REJECT then you will not get these warnings. + + - If you specify the '-s' flag but the default rule can be matched, + flex now generates a warning. + + - "yyleng" is now a global, and may be modified by the user (though + doing so and then using yymore() will yield weird results). + + - Name definitions in the first section of a scanner specification + can now include a leading '^' or trailing '$' operator. In this + case, the definition is *not* pushed back inside of parentheses. + + - Scanners with compressed tables are now "interactive" (-I option) + by default. You can suppress this attribute (which makes them + run slighly slower) using the new '-B' flag. + + - Flex now generates 8-bit scanners by default, unless you use the + -Cf or -CF compression options (-Cfe and -CFe result in 8-bit + scanners). You can force it to generate a 7-bit scanner using + the new '-7' flag. You can build flex to generate 8-bit scanners + for -Cf and -CF, too, by adding -DDEFAULT_CSIZE=256 to CFLAGS + in the Makefile. + + - You no longer need to call the scanner routine yyrestart() to + inform the scanner that you have switched to a new file after + having seen an EOF on the current input file. Instead, just + point yyin at the new file and continue scanning. + + - You no longer need to invoke YY_NEW_FILE in an <> action + to indicate you wish to continue scanning. Simply point yyin + at a new file. + + - A leading '#' no longer introduces a comment in a flex input. + + - flex no longer considers formfeed ('\f') a whitespace character. + + - %t, I'm happy to report, has been nuked. + + - The '-p' option may be given twice ('-pp') to instruct flex to + report minor performance problems as well as major ones. + + - The '-v' verbose output no longer includes start/finish time + information. + + - Newlines in flex inputs can optionally include leading or + trailing carriage-returns ('\r'), in support of several PC/Mac + run-time libraries that automatically include these. + + - A start condition of the form "<*>" makes the following rule + active in every start condition, whether exclusive or inclusive. + + - The following items have been corrected in the flex documentation: + + - '-C' table compression options *are* cumulative. + + - You may modify yytext but not lengthen it by appending + characters to the end. Modifying its final character + will affect '^' anchoring for the next rule matched + if the character is changed to or from a newline. + + - The term "backtracking" has been renamed "backing up", + since it is a one-time repositioning and not a repeated + search. What used to be the "lex.backtrack" file is now + "lex.backup". + + - Unindented "/* ... */" comments are allowed in the first + flex input section, but not in the second. + + - yyless() can only be used in the flex input source, not + externally. + + - You can use "yyrestart(yyin)" to throw away the + current contents of the input buffer. + + - To write high-speed scanners, attempt to match as much + text as possible with each rule. See MISC/fastwc/README + for more information. + + - Using the beginning-of-line operator ('^') is fairly + cheap. Using unput() is expensive. Using yyless() is + cheap. + + - An example of scanning strings with embedded escape + sequences has been added. + + - The example of backing-up in flexdoc was erroneous; it + has been corrected. + + - A flex scanner's internal buffer now dynamically grows if needed + to match large tokens. Note that growing the buffer presently + requires rescanning the (large) token, so consuming a lot of + text this way is a slow process. Also note that presently the + buffer does *not* grow if you unput() more text than can fit + into the buffer. + + - The MISC/ directory has been reorganized; see MISC/README for + details. + + - yyless() can now be used in the third (user action) section + of a scanner specification, thanks to Ceriel Jacobs. yyless() + remains a macro and cannot be used outside of the scanner source. + + - The skeleton file is no longer opened at run-time, but instead + compiled into a large string array (thanks to John Gilmore and + friends at Cygnus). You can still use the -S flag to point flex + at a different skeleton file, though if you use this option let + me know, as I plan to otherwise do away with -S in the near + future. + + - flex no longer uses a temporary file to store the scanner's + actions. + + - A number of changes have been made to decrease porting headaches. + In particular, flex no longer uses memset() or ctime(), and + provides a single simple mechanism for dealing with C compilers + that still define malloc() as returning char* instead of void*. + + - Flex now detects if the scanner specification requires the -8 flag + but the flag was not given or on by default. + + - A number of table-expansion fencepost bugs have been fixed, + making flex more robust for generating large scanners. + + - flex more consistently identifies the location of errors in + its input. + + - YY_USER_ACTION is now invoked only for "real" actions, not for + internal actions used by the scanner for things like filling + the buffer or handling EOF. + + - A large number of miscellaneous bugs have been found and fixed + thanks to Gerhard Wilhelms. + + - The source code has been heavily reformatted, making patches + relative to previous flex releases no longer accurate. + Changes between 2.3 Patch #8 (21Feb93) and 2.3 Patch #7: -- cgit v1.2.3 From c5afeb31daf4ea845f93e9aff945438bf28cf548 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 10 Nov 1993 10:27:04 +0000 Subject: Added Landon Noll to thanks. --- flex.1 | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flex.1 b/flex.1 index 3d12fb9..d0b7ed3 100644 --- a/flex.1 +++ b/flex.1 @@ -2961,13 +2961,13 @@ Faylor, Jon Forrest, Eric Goldman, Ulrich Grepel, Jan Hajic, Jarkko Hietaniemi, Eric Hughes, Ceriel Jacobs, Jeffrey R. Jones, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim -Meyering, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter -Pelissero, Gaumond Pierre, Francois Pinard, Esmond Pitt, Jef Poskanzer, -Kevin Rodgers, Jim Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave -Tallman, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent -Williams, Ken Yap, David Zuhn, and those whose names have slipped my -marginal mail-archiving skills but whose contributions are appreciated all -the same. +Meyering, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland +Pesch, Walter Pelissero, Gaumond Pierre, Francois Pinard, Esmond Pitt, Jef +Poskanzer, Kevin Rodgers, Jim Roskind, Doug Schmidt, Alex Siegel, Paul +Stuart, Dave Tallman, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard +Wilhelms, Kent Williams, Ken Yap, David Zuhn, and those whose names have +slipped my marginal mail-archiving skills but whose contributions are +appreciated all the same. .PP Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, G.T. Nicol, Rich Salz, and Richard Stallman for help with various -- cgit v1.2.3 From a784260ef265f38612b19703b42632bda8fe4c1c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 10 Nov 1993 10:27:39 +0000 Subject: Heavily massaged for 2.4 --- README | 103 ++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/README b/README index 482a951..3fde5b1 100644 --- a/README +++ b/README @@ -1,65 +1,27 @@ // $Header$ -This is release 2.3 of flex - a full release. +This is release 2.4 of flex. See "version.h" for the exact patch-level. -The flex distribution consists of the following files: - - README This message - - Makefile - flexdef.h - parse.y - scan.l - ccl.c - dfa.c - ecs.c flex sources - gen.c - main.c - misc.c - nfa.c - sym.c - tblcmp.c - yylex.c - - libmain.c flex library (-lfl) source - - initscan.c pre-flex'd version of scan.l - - flex.skel skeleton for generated scanners - - flexdoc.1 full user documentation - flex.1 reference documentation - - Changes Differences between this release and the previous one - - COPYING flex's copyright - - MISC/ a directory containing miscellaneous porting-related - notes (for Atari, MS-DOS, Turbo-C, and VMS) +Read the "Installation considerations" note in the Makefile and make any +necessary changes. +To make this version of flex for the first time (even if you've already +installed earlier version of flex), use: -Decide where you want to keep flex.skel (suggestion: /usr/local/lib), -but don't move it there yet. Edit "Makefile" and change the definition -of SKELETON_FILE to reflect the full pathname of flex.skel. + make bootstrap -Read the "Porting considerations" note in the Makefile and make -the necessary changes. +and then -To make flex for the first time, use: + make - make first_flex +Assuming flex builds successfully, you can test it using -which uses the pre-generated copy of the flex scanner (the scanner -itself is written using flex). - -Assuming it builds successfully, you can test it using - - make test + make check The "diff" should not show any differences. -If you're feeling adventurous, issue "make bigtest" and be prepared -to wait a while. +If you're feeling adventurous, issue "make bigcheck" and be prepared to wait +a while. Install flex using: @@ -69,11 +31,48 @@ Install flex using: Please send problems and feedback to: vern@ee.lbl.gov - ucbvax!ee.lbl.gov!vern Vern Paxson - Computer Systems Engineering + Systems Engineering 46A/1123 Lawrence Berkeley Laboratory 1 Cyclotron Rd. Berkeley, CA 94720 + + +The flex distribution consists of the following files: + + README This message + + Makefile, flexdef.h, parse.y, scan.l, ccl.c, dfa.c, ecs.c, gen.c, + main.c, misc.c, nfa.c, sym.c, tblcmp.c, yylex.c + source files + + version.h version of this flex release + + flex.skl flex scanner skeleton + mkskel.sh script for converting flex.skl to C source file skel.c + + liballoc.c + libmain.c flex library (-lfl) sources + libyywrap.c + + initscan.c pre-flex'd version of scan.l + + FlexLexer.h header file for C++ lexer class + + flexdoc.1 full user documentation + flex.1 reference documentation + + flexdoc.man preformatted versions of documentation + flex.man + + manual/ A flex user manual written by G.T. Nicol. See + manual/README for details. + + Changes Differences between this release and the previous one + + COPYING flex's copyright + + MISC/ a directory containing miscellaneous contributions. + See MISC/README for details. -- cgit v1.2.3 From ce25824b1ce7b04df01169533d796359bd8f3ada Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 20 Nov 1993 14:31:36 +0000 Subject: No need to #undef before redefining prior to -Cr documentation --- flex.1 | 58 +++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/flex.1 b/flex.1 index d0b7ed3..00df47b 100644 --- a/flex.1 +++ b/flex.1 @@ -827,11 +827,10 @@ is declared as follows: .fi (If your environment supports function prototypes, then it will -be "int yylex( void )".) This definition may be changed by redefining +be "int yylex( void )".) This definition may be changed by defining the "YY_DECL" macro. For example, you could use: .nf - #undef YY_DECL #define YY_DECL float lexscan( a, b ) float a, b; .fi @@ -889,7 +888,7 @@ block-reads rather than simple .I getc() calls to read characters from .I yyin. -The nature of how it gets its input can be controlled by redefining the +The nature of how it gets its input can be controlled by defining the .B YY_INPUT macro. YY_INPUT's calling sequence is "YY_INPUT(buf,result,max_size)". Its @@ -904,12 +903,11 @@ number of characters read or the constant YY_NULL (0 on Unix systems) to indicate EOF. The default YY_INPUT reads from the global file-pointer "yyin". .PP -A sample redefinition of YY_INPUT (in the definitions +A sample definition of YY_INPUT (in the definitions section of the input file): .nf %{ - #undef YY_INPUT #define YY_INPUT(buf,result,max_size) \\ { \\ int c = getchar(); \\ @@ -1429,13 +1427,13 @@ An example: The macro .bd YY_USER_ACTION -can be redefined to provide an action +can be defined to provide an action which is always executed prior to the matched rule's action. For example, it could be #define'd to call a routine to convert yytext to lower-case. .PP The macro .B YY_USER_INIT -may be redefined to provide an action which is always executed before +may be defined to provide an action which is always executed before the first scan (and before the scanner's internal initializations are done). For example, it could be used to call a routine to read in a data table or open a logging file. @@ -2578,7 +2576,7 @@ processing is done. A ``real'' end-of-file is returned by as .I EOF. .IP -Input is instead controlled by redefining the +Input is instead controlled by defining the .B YY_INPUT macro. .IP @@ -2604,6 +2602,20 @@ such writes are automatically flushed since scanners use .B getchar() for their input. +.IP +You can make +.I +flex +scanners use stdio by changing the definition of +.B YY_INPUT +to: +.nf + #define YY_INPUT(buf,result,max_size) \\ + if ( ((result = fread( (char *) buf, 1, max_size, \\ + yyin )) == 0) && \\ + ferror( yyin ) ) \\ + YY_FATAL_ERROR( "fread() in flex scanner failed" ); +.fi .IP - .I flex scanners are not as reentrant as @@ -2705,7 +2717,10 @@ After a call to .I yytext and .I yyleng -are undefined until the next token is matched. This is not the case with +are undefined until the next token is matched, unless the scanner +was built using +.B %array. +This is not the case with .I lex or the present POSIX draft. .IP - @@ -2956,18 +2971,19 @@ Thanks to the many beta-testers, feedbackers, and contributors, especially Casey Leedom, Nelson H.F. Beebe, benson@odi.com, Peter A. Bigot, Keith Bostic, Frederic Brehm, Nick Christopher, Jason Coughlin, Bill Cox, Dave Curtis, Scott David -Daniels, Mike Donahue, Chuck Doucette, Tom Epperly, Leo Eskin, Chris -Faylor, Jon Forrest, Eric Goldman, Ulrich Grepel, Jan Hajic, Jarkko -Hietaniemi, Eric Hughes, Ceriel Jacobs, Jeffrey R. Jones, Amir Katz, -ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, -Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim -Meyering, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland -Pesch, Walter Pelissero, Gaumond Pierre, Francois Pinard, Esmond Pitt, Jef -Poskanzer, Kevin Rodgers, Jim Roskind, Doug Schmidt, Alex Siegel, Paul -Stuart, Dave Tallman, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard -Wilhelms, Kent Williams, Ken Yap, David Zuhn, and those whose names have -slipped my marginal mail-archiving skills but whose contributions are -appreciated all the same. +Daniels, Chris G. Demetriou, Mike Donahue, Chuck Doucette, Tom Epperly, Leo +Eskin, Chris Faylor, Jon Forrest, Eric Goldman, Ulrich Grepel, Jan Hajic, +Jarkko Hietaniemi, Eric Hughes, Ceriel Jacobs, Jeffrey R. Jones, Henry +Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald +Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris +Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, +Richard Ohnemus, Sven Panne, Roland Pesch, Walter Pelissero, Gaumond +Pierre, Francois Pinard, Esmond Pitt, Jef Poskanzer, Kevin Rodgers, Jim +Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave Tallman, Paul +Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken +Yap, David Zuhn, and those whose names have slipped my marginal +mail-archiving skills but whose contributions are appreciated all the +same. .PP Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, G.T. Nicol, Rich Salz, and Richard Stallman for help with various -- cgit v1.2.3 From e88e7bf172e37b10e3c84ec5baa7d2d7b4adad9e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 20 Nov 1993 14:56:18 +0000 Subject: Documented -Cr --- flex.1 | 85 +++++++++++++++++++++++++++++++----------------------------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/flex.1 b/flex.1 index 00df47b..69a33d5 100644 --- a/flex.1 +++ b/flex.1 @@ -3,7 +3,7 @@ flexdoc \- documentation for flex, fast lexical analyzer generator .SH SYNOPSIS .B flex -.B [\-abcdfhinpstvwBFILTV78+ \-C[efmF] \-Pprefix \-Sskeleton] +.B [\-abcdfhinpstvwBFILTV78+ \-C[efFmr] \-Pprefix \-Sskeleton] .I [filename ...] .SH DESCRIPTION .I flex @@ -658,9 +658,9 @@ of the scanner's actions it will slow down of the scanner's matching. Furthermore, .B REJECT cannot be used with the -.I -f +.I -Cf or -.I -F +.I -CF options (see below). .IP Note also that unlike the other special actions, @@ -1513,9 +1513,9 @@ This is a list of scanner states which require backing up and the input characters on which they do so. By adding rules one can remove backing-up states. If all backing-up states are eliminated and -.B \-f +.B \-Cf or -.B \-F +.B \-CF is used, the generated scanner will run faster (see the .B \-p flag). Only users who wish to squeeze every last cycle out of their @@ -1567,13 +1567,11 @@ a NUL; at this point, the two look the same as far as the scanner's concerned), or reaches an end-of-file. .TP .B \-f -specifies (take your pick) -.I full table -or +specifies .I fast scanner. -No table compression is done. The result is large but fast. -This option is equivalent to -.B \-Cf +No table compression is done and stdio is bypassed. +The result is large but fast. This option is equivalent to +.B \-Cfr (see below). .TP .B \-h @@ -1684,10 +1682,10 @@ automatically anyway. specifies that the .ul fast -scanner table representation should be used. This representation is +scanner table representation should be used (and stdio +bypassed). This representation is about as fast as the full table representation -.ul -(-f), +.B (-f), and for some sets of patterns will be considerably smaller (and for others, larger). In general, if the pattern set contains both "keywords" and a catch-all, "identifier" rule, such as in the set: @@ -1703,11 +1701,10 @@ and a catch-all, "identifier" rule, such as in the set: then you're better off using the full table representation. If only the "identifier" rule is present and you then use a hash table or some such to detect the keywords, you're better off using -.ul --F. +.B -F. .IP This option is equivalent to -.B \-CF +.B \-CFr (see below). .TP .B \-I @@ -1902,6 +1899,30 @@ classes are often a big win when using compressed tables, but they have a moderate performance impact (one or two "if" tests and one array look-up per character scanned). .IP +.B \-Cr +causes the generated scanner to +.I bypass +use of the standard I/O library (stdio) for input. Instead of calling +.B fread(), +the scanner will use the +.B read() +system call, resulting in a performance gain which varies from system +to system, but in general is probably negligible unless you are also using +.B \-Cf +or +.B \-CF. +Using +.B \-Cr +can cause strange behavior if, for example, you read from +.I yyin +using stdio prior to calling the scanner (because the scanner will miss +whatever text your previous reads left in the stdio input buffer). +.IP +.B \-Cr +has no effect if you define +.B YY_INPUT +(see The Generated Scanner above). +.IP A lone .B \-C specifies that the scanner tables should be compressed but neither @@ -2102,9 +2123,9 @@ been scanned and an 'r' does not follow. The final comment reminds us that there's no point going to all the trouble of removing backing up from the rules unless we're using -.B \-f +.B \-Cf or -.B \-F, +.B \-CF, since there's no performance gain doing so with compressed scanners. .PP The way to remove the backing up is to add "error" rules: @@ -2592,32 +2613,6 @@ scanner's input other than by making an initial assignment to .I yyin). .IP - .I flex -scanners do not use stdio for input. Because of this, when writing an -interactive scanner one must explicitly call fflush() on the -stream associated with the terminal after writing out a prompt. -With -.I lex -such writes are automatically flushed since -.I lex -scanners use -.B getchar() -for their input. -.IP -You can make -.I -flex -scanners use stdio by changing the definition of -.B YY_INPUT -to: -.nf - #define YY_INPUT(buf,result,max_size) \\ - if ( ((result = fread( (char *) buf, 1, max_size, \\ - yyin )) == 0) && \\ - ferror( yyin ) ) \\ - YY_FATAL_ERROR( "fread() in flex scanner failed" ); -.fi -.IP - -.I flex scanners are not as reentrant as .I lex scanners. In particular, if you have an interactive scanner and -- cgit v1.2.3 From 1d4525a89ac1fc96304dfdde2dfc29bf3df90e51 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 20 Nov 1993 15:03:48 +0000 Subject: K&R declaration for check_char() --- misc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/misc.c b/misc.c index 96cff93..9336880 100644 --- a/misc.c +++ b/misc.c @@ -149,7 +149,8 @@ int v[], n; * and exits. */ -void check_char( int c ) +void check_char( c ) +int c; { if ( c >= CSIZE ) lerrsf( "bad character '%s' detected in check_char()", -- cgit v1.2.3 From 6e42b547eeb5b34c2ac5e2089611c5858c5fcf50 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 20 Nov 1993 15:04:32 +0000 Subject: Added -Cr --- main.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/main.c b/main.c index bea8999..35aea29 100644 --- a/main.c +++ b/main.c @@ -52,7 +52,7 @@ void set_up_initial_allocations PROTO((void)); int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; int interactive, caseins, useecs, fulltbl, usemecs; int fullspd, gen_line_dirs, performance_report, backing_up_report; -int C_plus_plus, long_align, yytext_is_array, csize; +int C_plus_plus, long_align, use_read, yytext_is_array, csize; int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; int datapos, dataline, linenum; @@ -299,6 +299,8 @@ int exit_status; putc( 'e', stderr ); if ( usemecs ) putc( 'm', stderr ); + if ( use_read ) + putc( 'r', stderr ); if ( skelname ) fprintf( stderr, " -S%s", skelname ); @@ -423,7 +425,7 @@ char **argv; performance_report = 0; sawcmpflag = false; - use_stdout = false; + use_read = use_stdout = false; csize_given = false; interactive_given = false; @@ -506,6 +508,10 @@ char **argv; usemecs = true; break; + case 'r': + use_read = true; + break; + default: lerrif( "unknown -C option '%c'", @@ -521,12 +527,12 @@ char **argv; case 'f': useecs = usemecs = false; - fulltbl = true; + use_read = fulltbl = true; break; case 'F': useecs = usemecs = false; - fullspd = true; + use_read = fullspd = true; break; case 'h': @@ -767,6 +773,12 @@ void readin() { skelout(); + if ( yyparse() ) + { + pinpoint_message( "fatal parse error" ); + flexend( 1 ); + } + if ( csize == 256 ) puts( "typedef unsigned char YY_CHAR;" ); else @@ -784,22 +796,32 @@ void readin() if ( reject ) printf( "\n#define YY_USES_REJECT\n" ); - if ( C_plus_plus ) - printf( "\n#include \"FlexLexer.h\"\n" ); - if ( ddebug ) puts( "\n#define FLEX_DEBUG" ); skelout(); - line_directive_out( stdout ); - - if ( yyparse() ) + if ( ! C_plus_plus ) { - pinpoint_message( "fatal parse error" ); - flexend( 1 ); + if ( use_read ) + printf( +"\tif ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \\\n" ); + else + { + printf( +"\tif ( ((result = fread( (char *) buf, 1, max_size, yyin )) == 0) && \\\n" ); + printf( +"\t ferror( yyin ) ) \\\n" ); + } } + skelout(); + + if ( C_plus_plus ) + printf( "\n#include \"FlexLexer.h\"\n" ); + + line_directive_out( stdout ); + if ( useecs ) numecs = cre8ecs( nextecm, ecgroup, csize ); else -- cgit v1.2.3 From c21cca430c765a25d5acd8e7ec472ac17b58f647 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 20 Nov 1993 15:04:49 +0000 Subject: Cleaner definition for yymore() Fixed string broken across multiple lines --- gen.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gen.c b/gen.c index ee0f6ff..d4d6c06 100644 --- a/gen.c +++ b/gen.c @@ -1211,10 +1211,8 @@ void make_tables() indent_puts( "static int yy_more_len = 0;" ); } - indent_puts( - "#define yymore() do { yy_more_flag = 1; } while ( 0 )" ); - indent_puts( - "#define YY_MORE_ADJ yy_more_len" ); + indent_puts( "#define yymore() (yy_more_flag = 1)" ); + indent_puts( "#define YY_MORE_ADJ yy_more_len" ); } else @@ -1273,8 +1271,7 @@ void make_tables() printf( "else if ( yy_act < %d )\n", num_rules ); indent_up(); indent_puts( - "fprintf( - stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); + "fprintf( stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); indent_puts( " yy_rule_linenum[yy_act], yytext );" ); indent_down(); -- cgit v1.2.3 From a85c9f707cf27bb3e1778c25bc27bbcf5d584baa Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 20 Nov 1993 15:05:18 +0000 Subject: Added use_read global --- flexdef.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flexdef.h b/flexdef.h index 110c025..b8492d7 100644 --- a/flexdef.h +++ b/flexdef.h @@ -322,6 +322,8 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * C_plus_plus - if true (i.e., -+ flag), generate a C++ scanner class; * otherwise, a standard C scanner * long_align - if true (-a flag), favor long-word alignment. + * use_read - if true (-f, -F, or -Cr) then use read() for scanner input; + * otherwise, use fread(). * yytext_is_array - if true (i.e., %array directive), then declare * yytext as a array instead of a character pointer. Nice and inefficient. * csize - size of character set for the scanner we're generating; @@ -340,7 +342,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; extern int interactive, caseins, useecs, fulltbl, usemecs; extern int fullspd, gen_line_dirs, performance_report, backing_up_report; -extern int C_plus_plus, long_align, yytext_is_array, csize; +extern int C_plus_plus, long_align, use_read, yytext_is_array, csize; extern int yymore_used, reject, real_reject, continued_action; #define REALLY_NOT_DETERMINED 0 -- cgit v1.2.3 From 7438e63861cb1317977fc1f6fea29589a3276539 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 20 Nov 1993 15:05:34 +0000 Subject: Support for read()/fread() section 1 definitions precede default macro definitions --- flex.skl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/flex.skl b/flex.skl index fae24ce..98d8480 100644 --- a/flex.skl +++ b/flex.skl @@ -60,6 +60,7 @@ #define YY_PROTO(proto) () #endif +%% section 1 definitions go here /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE @@ -87,19 +88,21 @@ */ #ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ -%- Standard (non-C++) definition - if ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \ +%% fread()/read() definition of YY_INPUT goes here unless we're doing C++ %+ C++ definition if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ %* - YY_FATAL_ERROR( "read() in flex scanner failed" ); + YY_FATAL_ERROR( "input in flex scanner failed" ); #endif /* No semi-colon after return; correct usage is to write "yyterminate();" - * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ +#ifndef yyterminate #define yyterminate() return YY_NULL +#endif + #define YY_NULL 0 /* Report a fatal error. */ @@ -285,8 +288,6 @@ static int input YY_PROTO(( void )); #endif %* -%% section 1 definitions go here - %- Standard (non-C++) definition static yy_state_type yy_get_previous_state YY_PROTO(( void )); static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); -- cgit v1.2.3 From e0636c8ad09f2152137cf77f2e64528218b566c1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 15:08:34 +0000 Subject: -l lex compatibility flag --- flex.1 | 121 +++++++++++++++++++++++++++++++++++--------------------------- flex.skl | 17 +++++++-- flexdef.h | 3 +- gen.c | 27 ++++++++++++++ main.c | 65 +++++++++++++++++++++++++++++---- parse.y | 6 ++-- scan.l | 16 ++++++--- 7 files changed, 185 insertions(+), 70 deletions(-) diff --git a/flex.1 b/flex.1 index 69a33d5..bbb4bfc 100644 --- a/flex.1 +++ b/flex.1 @@ -3,7 +3,7 @@ flexdoc \- documentation for flex, fast lexical analyzer generator .SH SYNOPSIS .B flex -.B [\-abcdfhinpstvwBFILTV78+ \-C[efFmr] \-Pprefix \-Sskeleton] +.B [\-abcdfhilnpstvwBFILTV78+ \-C[efFmr] \-Pprefix \-Sskeleton] .I [filename ...] .SH DESCRIPTION .I flex @@ -454,7 +454,12 @@ uses by including one of the special directives or .B %array in the first (definitions) section of your flex input. The default is -.B %pointer. +.B %pointer, +unless you use the +.B -l +lex compatibility option, in which case +.B yytext +will be an array. The advantage of using .B %pointer is substantially faster scanning and no buffer overflow when matching @@ -1594,6 +1599,19 @@ matched text given in .I yytext will have the preserved case (i.e., it will not be folded). .TP +.B \-l +turns on maximum compatibility with the original AT&T +.I lex +implementation. Note that this does not mean +.I full +compatibility. Use of this option costs a considerable amount of +performance, and it cannot be used with the +.B \-+, -f, -F, -Cf, +or +.B -CF +options. For details on the compatibilities it provides, see the section +"Incompatibilities With Lex And POSIX" below. +.TP .B \-n is another do-nothing, deprecated option included only for POSIX compliance. @@ -1903,7 +1921,9 @@ array look-up per character scanned). causes the generated scanner to .I bypass use of the standard I/O library (stdio) for input. Instead of calling -.B fread(), +.B fread() +or +.B getc(), the scanner will use the .B read() system call, resulting in a performance gain which varies from system @@ -2537,30 +2557,33 @@ Here is an example of a simple C++ scanner: .fi .SH INCOMPATIBILITIES WITH LEX AND POSIX .I flex -is a rewrite of the Unix +is a rewrite of the AT&T Unix .I lex tool (the two implementations do not share any code, though), with some extensions and incompatibilities, both of which are of concern to those who wish to write scanners acceptable -to either implementation. At present, the POSIX +to either implementation. The POSIX .I lex -draft is -close to the original +specification is closer to +.I flex's +behavior than that of the original .I lex -implementation, so some of these -incompatibilities are also in conflict with the POSIX draft. But -the intent is that ultimately +implementation, but there also remain some incompatibilities between .I flex -will be fully POSIX-conformant. Please bear in -mind that all the comments which follow are with regard to the POSIX -.I draft -of Spring 1990 (draft 10), and not the final document (or subsequent -drafts); they are included so +and POSIX. The intent is that ultimately .I flex -users can be aware of the standardization issues and those areas where -.I flex -may in the near future undergo changes incompatible with -its current definition. +will be fully POSIX-conformant. In this section we discuss all of +the known areas of incompatibility. +.PP +.I flex's +.B \-l +option turns on maximum compatibility with the original AT&T +.I lex +implementation, at the cost of a major loss in the generated scanner's +performance. We note below which incompatibilities can be overcome +using the +.B \-l +option. .PP .I flex is fully compatible with @@ -2571,19 +2594,11 @@ The undocumented .I lex scanner internal variable .B yylineno -is not supported. It is difficult to support this option efficiently, -since it requires examining every character scanned and reexamining -the characters when the scanner backs up. -Things get more complicated when the end of buffer or file is reached or a -NUL is scanned (since the scan must then be restarted with the proper line -number count), or the user uses the yyless(), unput(), or REJECT actions, -or the multiple input buffer functions. -.IP -The fix is to add rules which, upon seeing a newline, increment -yylineno. This is usually an easy process, though it can be a drag if some -of the patterns can match multiple newlines along with other characters. +is not supported unless +.B \-l +is used. .IP -yylineno is not part of the POSIX draft. +yylineno is not part of the POSIX specification. .IP - The .B input() @@ -2605,12 +2620,10 @@ The .I flex restriction that .B input() -cannot be redefined is in accordance with the POSIX draft, but -.B YY_INPUT -has not yet been accepted into the draft (and probably won't; it looks -like the draft will simply not specify any way of controlling the +cannot be redefined is in accordance with the POSIX specification, +which simply does not specify any way of controlling the scanner's input other than by making an initial assignment to -.I yyin). +.I yyin. .IP - .I flex scanners are not as reentrant as @@ -2648,11 +2661,11 @@ macro is done to the file-pointer .I stdout). .IP .B output() -is not part of the POSIX draft. +is not part of the POSIX specification. .IP - .I lex does not support exclusive start conditions (%x), though they -are in the current POSIX draft. +are in the POSIX specification. .IP - When definitions are expanded, .I flex @@ -2689,23 +2702,19 @@ operators cannot be used in a .I flex definition. .IP -The POSIX draft interpretation is the same as -.I flex's. -.IP - -To specify a character class which matches anything but a left bracket (']'), -in +Using +.B \-l +results in the .I lex -one can use "[^]]" but with -.I flex -one must use "[^\\]]". The latter works with -.I lex, -too. +behavior of no parentheses around the definition. +.IP +The POSIX specification is that the definition be enclosed in parentheses. .IP - The .I lex .B %r (generate a Ratfor scanner) option is not supported. It is not part -of the POSIX draft. +of the POSIX specification. .IP - After a call to .B unput(), @@ -2717,7 +2726,9 @@ was built using .B %array. This is not the case with .I lex -or the present POSIX draft. +or the POSIX specification. The +.B \-l +option does away with this incompatibility. .IP - The precedence of the .B {} @@ -2728,7 +2739,7 @@ three occurrences of 'abc'", whereas .I flex interprets it as "match 'ab' followed by one, two, or three occurrences of 'c'". The latter is -in agreement with the current POSIX draft. +in agreement with the POSIX specification. .IP - The precedence of the .B ^ @@ -2738,7 +2749,7 @@ interprets "^foo|bar" as "match either 'foo' at the beginning of a line, or 'bar' anywhere", whereas .I flex interprets it as "match either 'foo' or 'bar' if they come at the beginning -of a line". The latter is in agreement with the current POSIX draft. +of a line". The latter is in agreement with the POSIX specification. .IP - .I yyin is @@ -2764,6 +2775,10 @@ subtle, but the net effect is that with scanners, .I yyin does not have a valid value until the scanner has been called. +.IP +The +.B \-l +option does away with this incompatibility. .IP - The special table-size declarations such as .B %a @@ -2787,7 +2802,7 @@ The following .I flex features are not included in .I lex -or the POSIX draft standard: +or the POSIX specification: .nf yyterminate() diff --git a/flex.skl b/flex.skl index 98d8480..cf3cb88 100644 --- a/flex.skl +++ b/flex.skl @@ -91,8 +91,8 @@ %% fread()/read() definition of YY_INPUT goes here unless we're doing C++ %+ C++ definition if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ -%* YY_FATAL_ERROR( "input in flex scanner failed" ); +%* #endif /* No semi-colon after return; correct usage is to write "yyterminate();" - @@ -223,6 +223,13 @@ struct yy_buffer_state */ int yy_n_chars; + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int is_interactive; + /* Whether we've seen an EOF on this buffer. */ int yy_eof_status; #define EOF_NOT_SEEN 0 @@ -255,7 +262,7 @@ static int yy_n_chars; /* number of characters read into yy_ch_buf */ int yyleng; -FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +%% yyin/yyout and (if -l option) yylineno definition & initialization goes here /* Points to current character in buffer. */ static char *yy_c_buf_p = (char *) 0; @@ -377,6 +384,8 @@ yy_find_action: } #endif +%% code for yylineno update goes here, if -l option + do_action: /* This label is used only to access EOF actions. */ %% debug code goes here @@ -719,6 +728,8 @@ void yyFlexLexer::yyunput( int c, register char* yy_bp ) *--yy_cp = (char) c; +%% update yylineno here, if doing -l + /* Note: the formal parameter *must* be called "yy_bp" for this * macro to now work correctly. */ @@ -952,6 +963,8 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) b->yy_buf_pos = &b->yy_ch_buf[1]; + b->is_interactive = file ? isatty( fileno(file) ) : 0; + b->yy_eof_status = EOF_NOT_SEEN; } diff --git a/flexdef.h b/flexdef.h index b8492d7..149f6e7 100644 --- a/flexdef.h +++ b/flexdef.h @@ -309,6 +309,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * spprdflt - if true (-s), suppress the default rule * interactive - if true (-I), generate an interactive scanner * caseins - if true (-i), generate a case-insensitive scanner + * lex_compat - if true (-l), maximize compatibility with AT&T lex * useecs - if true (-Ce flag), use equivalence classes * fulltbl - if true (-Cf flag), don't compress the DFA state table * usemecs - if true (-Cm flag), use meta-equivalence classes @@ -340,7 +341,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; */ extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; -extern int interactive, caseins, useecs, fulltbl, usemecs; +extern int interactive, caseins, lex_compat, useecs, fulltbl, usemecs; extern int fullspd, gen_line_dirs, performance_report, backing_up_report; extern int C_plus_plus, long_align, use_read, yytext_is_array, csize; extern int yymore_used, reject, real_reject, continued_action; diff --git a/gen.c b/gen.c index d4d6c06..1c47e0a 100644 --- a/gen.c +++ b/gen.c @@ -1254,6 +1254,24 @@ void make_tables() set_indent( 2 ); gen_find_action(); + skelout(); + if ( lex_compat ) + { + indent_puts( "if ( yy_act != YY_END_OF_BUFFER )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "int yyl;" ); + indent_puts( "for ( yyl = 0; yyl < yyleng; ++yyl )" ); + indent_up(); + indent_puts( "if ( yytext[yyl] == '\\n' )" ); + indent_up(); + indent_puts( "++yylineno;" ); + indent_down(); + indent_down(); + indent_puts( "}" ); + indent_down(); + } + skelout(); if ( ddebug ) { @@ -1366,6 +1384,15 @@ void make_tables() skelout(); gen_NUL_trans(); + skelout(); + if ( lex_compat ) + { /* update yylineno inside of unput() */ + indent_puts( "if ( c == '\\n' )" ); + indent_up(); + indent_puts( "--yylineno;" ); + indent_down(); + } + skelout(); /* Copy remainder of input to output. */ diff --git a/main.c b/main.c index 35aea29..b70bdfb 100644 --- a/main.c +++ b/main.c @@ -50,7 +50,7 @@ void set_up_initial_allocations PROTO((void)); /* these globals are all defined and commented in flexdef.h */ int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; -int interactive, caseins, useecs, fulltbl, usemecs; +int interactive, caseins, lex_compat, useecs, fulltbl, usemecs; int fullspd, gen_line_dirs, performance_report, backing_up_report; int C_plus_plus, long_align, use_read, yytext_is_array, csize; int yymore_used, reject, real_reject, continued_action; @@ -134,6 +134,14 @@ char **argv; if ( performance_report > 0 ) { + if ( lex_compat ) + { + fprintf( stderr, +"-l AT&T lex compatibility option entails a large performance penalty\n" ); + fprintf( stderr, +" and may be the actual source of other reported performance penalties\n" ); + } + if ( performance_report > 1 ) { if ( interactive ) @@ -264,6 +272,8 @@ int exit_status; putc( 'd', stderr ); if ( caseins ) putc( 'i', stderr ); + if ( lex_compat ) + putc( 'l', stderr ); if ( performance_report > 0 ) putc( 'p', stderr ); if ( performance_report > 1 ) @@ -418,6 +428,7 @@ char **argv; char *arg, *mktemp(); printstats = syntaxerror = trace = spprdflt = caseins = false; + lex_compat = false; C_plus_plus = backing_up_report = ddebug = fulltbl = fullspd = false; long_align = nowarn = yymore_used = continued_action = reject = false; yytext_is_array = yymore_really_used = reject_really_used = false; @@ -548,6 +559,10 @@ char **argv; caseins = true; break; + case 'l': + lex_compat = true; + break; + case 'L': gen_line_dirs = false; break; @@ -643,14 +658,31 @@ char **argv; interactive = true; } + if ( lex_compat ) + { + if ( C_plus_plus ) + flexerror( "Can't use -+ with -l option" ); + + if ( fulltbl || fullspd ) + flexerror( "Can't use -f or -F with -l option" ); + + /* Don't rely on detecting use of yymore() and REJECT, + * just assume they'll be used. + */ + yymore_really_used = reject_really_used = true; + + yytext_is_array = true; + use_read = false; + } + if ( (fulltbl || fullspd) && usemecs ) - flexerror( "full table and -Cm don't make sense together" ); + flexerror( "-f/-F and -Cm don't make sense together" ); if ( (fulltbl || fullspd) && interactive ) - flexerror( "full table and -I are incompatible" ); + flexerror( "-f/-F and -I are incompatible" ); if ( fulltbl && fullspd ) - flexerror( "full table and -F are mutually exclusive" ); + flexerror( "-f and -F are mutually exclusive" ); if ( ! use_stdout ) { @@ -804,19 +836,40 @@ void readin() if ( ! C_plus_plus ) { if ( use_read ) + { printf( "\tif ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \\\n" ); + printf( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); + } + else { printf( -"\tif ( ((result = fread( (char *) buf, 1, max_size, yyin )) == 0) && \\\n" ); + "\tif ( yy_current_buffer->is_interactive ) \\\n" ); + printf( + "\t\tresult = (buf[0] = getc( yyin )) == EOF ? 0 : 1; \\\n" ); printf( -"\t ferror( yyin ) ) \\\n" ); +"\telse if ( ((result = fread( (char *) buf, 1, max_size, yyin )) == 0)\\\n" ); + printf( "\t\t && ferror( yyin ) ) \\\n" ); + printf( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); } } skelout(); + if ( lex_compat ) + { + printf( "FILE *yyin = stdin, *yyout = stdout;\n" ); + printf( "extern int yylineno;\n" ); + printf( "int yylineno = 1;\n" ); + } + else + printf( "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;\n" ); + + skelout(); + if ( C_plus_plus ) printf( "\n#include \"FlexLexer.h\"\n" ); diff --git a/parse.y b/parse.y index 46fc7f2..709996c 100644 --- a/parse.y +++ b/parse.y @@ -311,7 +311,7 @@ rule : re2 re headcnt = 0; } - if ( varlength && headcnt == 0 ) + if ( lex_compat || (varlength && headcnt == 0) ) { /* variable trailing context rule */ /* Mark the first part of the rule as the * accepting "head" part of a trailing @@ -359,7 +359,7 @@ rule : re2 re headcnt = 0; } - if ( varlength && headcnt == 0 ) + if ( lex_compat || (varlength && headcnt == 0) ) { /* Again, see the comment in the rule for * "re2 re" above. @@ -391,7 +391,7 @@ rule : re2 re if ( trlcontxt ) { - if ( varlength && headcnt == 0 ) + if ( lex_compat || (varlength && headcnt == 0) ) /* Both head and trail are * variable-length. */ diff --git a/scan.l b/scan.l index c91b792..8686db1 100644 --- a/scan.l +++ b/scan.l @@ -109,7 +109,13 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} return SECTEND; } -^"%pointer".*{NL} ++linenum; yytext_is_array = false; +^"%pointer".*{NL} { + if ( lex_compat ) + warn( "%pointer incompatible with -l option" ); + else + yytext_is_array = false; + ++linenum; + } ^"%array".*{NL} { if ( C_plus_plus ) warn( "%array incompatible with -+ option" ); @@ -336,9 +342,9 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} { /* push back name surrounded by ()'s */ int len = strlen( nmdefptr ); - if ( nmdefptr[0] == '^' || + if ( lex_compat || nmdefptr[0] == '^' || (len > 0 && nmdefptr[len - 1] == '$') ) - { + { /* don't use ()'s after all */ PUT_BACK_STRING(nmdefptr, 0); if ( nmdefptr[0] == '^' ) @@ -381,8 +387,8 @@ CCL_CHAR [^\\\n\]]|{ESCSEQ} } -"^"/[^-\n] BEGIN(CCL); return '^'; -"^"/- return '^'; +"^"/[^-\]\n] BEGIN(CCL); return '^'; +"^"/("-"|"]") return '^'; . BEGIN(CCL); RETURNCHAR; -/[^\]\n] return '-'; -- cgit v1.2.3 From 529f1d0909198bf2e4b7ba7367b1555393a21ca4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 16:09:06 +0000 Subject: Documented YY_INTERACTIVE. --- flex.1 | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/flex.1 b/flex.1 index bbb4bfc..31bf7a8 100644 --- a/flex.1 +++ b/flex.1 @@ -2466,7 +2466,18 @@ reads up to characters into .B buf and returns the number of characters read. To indicate end-of-input, -return 0 characters. +return 0 characters. Note that "interactive" scanners (see the +.B \-B +and +.B \-I +flags) define the macro +.B YY_INTERACTIVE. +If you redefine +.B LexerInput() +and need to take different actions depending on whether or not +the scanner might be scanning an interactive input source, you can +test for the presence of this name via +.B #ifdef. .TP .B virtual void LexerOutput( const char* buf, int size ) -- cgit v1.2.3 From 9c6e5ab142375402e53400ace3d3381fb929c9c5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 16:09:24 +0000 Subject: Put definitions inside ()'s so we can test -l option for "make bigcheck" --- scan.l | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scan.l b/scan.l index 8686db1..07967fb 100644 --- a/scan.l +++ b/scan.l @@ -68,17 +68,17 @@ WS [ \t]+ OPTWS [ \t]* NOT_WS [^ \t\n] -NL \n|\r\n|\n\r +NL (\n|\r\n|\n\r) -NAME [a-z_][a-z_0-9-]* +NAME ([a-z_][a-z_0-9-]*) NOT_NAME [^a-z_*\n]+ SCNAME {NAME} -ESCSEQ \\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2}) +ESCSEQ (\\([^\n]|[0-9]{1,3}|x[0-9a-f]{1,2})) -FIRST_CCL_CHAR [^\\\n]|{ESCSEQ} -CCL_CHAR [^\\\n\]]|{ESCSEQ} +FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) +CCL_CHAR ([^\\\n\]]|{ESCSEQ}) %% static int bracelevel, didadef, indented_code, checking_used; -- cgit v1.2.3 From 9827cd1b55e78e9c2bfd1bb229be426b95a83abf Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 16:09:59 +0000 Subject: Added YY_INTERACTIVE. --- main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/main.c b/main.c index b70bdfb..9a87072 100644 --- a/main.c +++ b/main.c @@ -817,8 +817,13 @@ void readin() puts( "typedef char YY_CHAR;" ); if ( C_plus_plus ) + { puts( "#define yytext_ptr yytext" ); + if ( interactive ) + puts( "#define YY_INTERACTIVE" ); + } + if ( fullspd ) printf( "typedef const struct yy_trans_info *yy_state_type;\n" ); -- cgit v1.2.3 From 4c8a17578a2c19ad1ef68f39916215fa75052ca8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 16:10:13 +0000 Subject: Modified C++ scanners to get input a character at a time for interactive scanners. --- flex.skl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/flex.skl b/flex.skl index cf3cb88..efcbe3d 100644 --- a/flex.skl +++ b/flex.skl @@ -514,17 +514,34 @@ do_action: /* This label is used only to access EOF actions. */ } /* end of yylex */ %+ +#ifdef YY_INTERACTIVE +int yyFlexLexer::LexerInput( char* buf, int /* max_size */ ) +#else int yyFlexLexer::LexerInput( char* buf, int max_size ) +#endif { if ( yyin->eof() || yyin->fail() ) return 0; +#ifdef YY_INTERACTIVE + yyin->get( buf[0] ); + + if ( yyin->eof() ) + return 0; + + if ( yyin->bad() ) + return -1; + + return 1; + +#else (void) yyin->read( buf, max_size ); if ( yyin->bad() ) return -1; else return yyin->gcount(); +#endif } void yyFlexLexer::LexerOutput( const char* buf, int size ) @@ -963,7 +980,11 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) b->yy_buf_pos = &b->yy_ch_buf[1]; +%- b->is_interactive = file ? isatty( fileno(file) ) : 0; +%+ + b->is_interactive = 0; +%* b->yy_eof_status = EOF_NOT_SEEN; } -- cgit v1.2.3 From 21647d7481d96fa849cf027c39ccf4b24e6fc69c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 16:40:28 +0000 Subject: Added Francois Pinard to distribution headache helpers --- flex.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flex.1 b/flex.1 index 31bf7a8..1b4340a 100644 --- a/flex.1 +++ b/flex.1 @@ -3007,7 +3007,7 @@ mail-archiving skills but whose contributions are appreciated all the same. .PP Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, G.T. -Nicol, Rich Salz, and Richard Stallman for help with various +Nicol, Francois Pinard, Rich Salz, and Richard Stallman for help with various distribution headaches. .PP Thanks to Esmond Pitt and Earle Horton for 8-bit character support; to -- cgit v1.2.3 From 4bcb5af7af9a30e220fc057beca56a2c058f4b4f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 16:59:58 +0000 Subject: Removed #ifndef FILE protection from include of stdio --- flexdef.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/flexdef.h b/flexdef.h index 149f6e7..c4b98c5 100644 --- a/flexdef.h +++ b/flexdef.h @@ -28,10 +28,7 @@ /* @(#) $Header$ (LBL) */ -#ifndef FILE #include -#endif - #include #include -- cgit v1.2.3 From fe79b4c763cb497df8e0217ad04428c4a6600244 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 26 Nov 1993 17:02:12 +0000 Subject: Initial revision --- Makefile.in | 233 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 Makefile.in diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..d252fa4 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,233 @@ +# @(#) $Header$ (LBL) + +SHELL = /bin/sh +srcdir = @srcdir@ +VPATH = @srcdir@ + +YACC = @YACC@ +CC = @CC@ +AR = ar +RANLIB = @RANLIB@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +MAKEINFO = makeinfo +TEXI2DVI = texi2dvi + +# If your version of "make" does not define $(MAKE), comment in the +# definition of "MAKE" below. (You only need to do this if you intend +# to do "make bigcheck".) +# MAKE = make + +# Possible values for DEFS: +# +# For flex to always generate 8-bit scanners, add "-DDEFAULT_CSIZE=256" +# to DEFS. +# +# If your C run-time library defines malloc() as returning char* +# instead of void*, add "-DMALLOC_TYPE=char" to DEFS. (Note that on +# many systems, even if your malloc() returns char* you can get away +# with skipping this step.) +# +# For Vax/VMS, add "-DVMS" to DEFS. +# +# For MS-DOS, add "-DMS_DOS" to DEFS. See the directory MISC/MSDOS for +# additional info. + +CFLAGS = -O +DEFS = @DEFS@ +LDFLAGS = +LIBS = @LIBS@ + +# Installation targeting. Files will be installed under the tree +# rooted at prefix. flex will be installed in bindir, libfl.a in +# libdir, FlexLexer.h will be installed in includedir, and the manual +# pages will be installed in mandir with extension manext. +# +# Raw, unformatted troff source will be installed if INSTALLMAN=man, +# nroff preformatted versions will be installed if INSTALLMAN=cat. + +prefix = /usr/local +bindir = $(prefix)/bin +libdir = $(prefix)/lib +includedir = $(prefix)/include +manext = 1 +mandir = $(prefix)/man/man$(manext) + +INSTALLMAN = man + +# You normally do not need to modify anything below this point. +# ------------------------------------------------------------ + +CPPFLAGS = -I. -I$(srcdir) $(DEFS) + +.c.o: + $(CC) -c $(CPPFLAGS) $(CFLAGS) $< + +HEADERS = flexdef.h version.h + +SOURCES = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.y \ + scan.l skel.c sym.c tblcmp.c yylex.c +OBJECTS = ccl.o dfa.o ecs.o gen.o main.o misc.o nfa.o parse.o \ + scan.o skel.o sym.o tblcmp.o yylex.o + +LIBSRCS = liballoc.c libmain.c libyywrap.c +LIBOBJS = liballoc.o libmain.o libyywrap.o + +LINTSRCS = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.c \ + scan.c skel.c sym.c tblcmp.c yylex.c + +DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ + configure.in Makefile.in mkskel.sh flex.skl \ + $(HEADERS) $(SOURCES) $(LIBSRCS) MISC \ + flex.1 flexdoc.1 manual \ + configure initscan.c flex.man flexdoc.man + +# which "flex" to use to generate scan.c from scan.l +FLEX = ./flex +FLEX_FLAGS = -ipst +COMPRESSION = + +FLEXLIB = libfl.a + + +all: .bootstrap flex + +.bootstrap: initscan.c + @rm -f scan.c + cp $(srcdir)/initscan.c scan.c + touch .bootstrap + +flex: $(OBJECTS) $(FLEXLIB) + $(CC) $(CFLAGS) -o flex $(LDFLAGS) $(OBJECTS) $(FLEXLIB) $(LIBS) + +parse.c: parse.y + $(YACC) -d $(srcdir)/parse.y + @sed '/extern char.*malloc/d' parse.c + @rm -f y.tab.c + @mv y.tab.h parse.h + +parse.h: parse.c + +scan.c: scan.l + $(FLEX) $(FLEX_FLAGS) $(COMPRESSION) $(srcdir)/scan.l \ + | sed s,\"$(srcdir)/scan.l\",\"scan.l\", >scan.c + +scan.o: scan.c parse.h flexdef.h + +skel.c: flex.skl mkskel.sh + $(SHELL) $(srcdir)/mkskel.sh $(srcdir)/flex.skl >skel.c + +main.o: main.c flexdef.h version.h +ccl.o: ccl.c flexdef.h +dfa.o: dfa.c flexdef.h +ecs.o: ecs.c flexdef.h +gen.o: gen.c flexdef.h +misc.o: misc.c flexdef.h +nfa.o: nfa.c flexdef.h +parse.o: parse.c flexdef.h +skel.o: skel.c flexdef.h +sym.o: sym.c flexdef.h +tblcmp.o: tblcmp.c flexdef.h +yylex.o: yylex.c flexdef.h + +test: check +check: .bootstrap flex + ./flex $(FLEX_FLAGS) $(COMPRESSION) $(srcdir)/scan.l \ + | sed s,\"$(srcdir)/scan.l\",\"scan.l\", \ + | diff scan.c - + @echo "Check successful, using COMPRESSION=\"$(COMPRESSION)\"" + +bigcheck: + rm -f scan.c ; $(MAKE) COMPRESSION="-C" check + rm -f scan.c ; $(MAKE) COMPRESSION="-Ce" check + rm -f scan.c ; $(MAKE) COMPRESSION="-Cm" check + rm -f scan.c ; $(MAKE) COMPRESSION="-Cfe" check + rm -f scan.c ; $(MAKE) COMPRESSION="-CFe" check + rm -f scan.c ; $(MAKE) + @echo "All checks successful" + +$(FLEXLIB): $(LIBOBJS) + $(AR) cru $(FLEXLIB) $(LIBOBJS) + -$(RANLIB) $(FLEXLIB) + +flex.man: flex.1 + cd $(srcdir); nroff -man flex.1 >flex.man + +flexdoc.man: flexdoc.1 + cd $(srcdir); nroff -man flexdoc.1 >flexdoc.man + +install: flex install.$(INSTALLMAN) install-lib install-inc + @rm -f $(bindir)/flex + $(INSTALL_PROGRAM) flex $(bindir)/flex + @echo "NOTE: If you want to create flex++, either copy or link" + @echo "==== $(bindir)/flex to $(bindir)/flex++" + +install-lib: $(libdir) $(FLEXLIB) + @rm -f $(libdir)/libfl.a + $(INSTALL_DATA) $(FLEXLIB) $(libdir)/libfl.a + +install-inc: + @rm -f $(includedir)/FlexLexer.h + $(INSTALL_DATA) $(srcdir)/FlexLexer.h $(includedir)/FlexLexer.h + +install.man: flex.1 flexdoc.1 + @rm -f $(mandir)/flex.$(manext) $(mandir)/flexdoc.$(manext) + $(INSTALL_DATA) $(srcdir)/flex.1 $(mandir)/flex.$(manext) + $(INSTALL_DATA) $(srcdir)/flexdoc.1 $(mandir)/flexdoc.$(manext) + +install.cat: flex.man flexdoc.man + @rm -f $(mandir)/flex.$(manext) $(mandir)/flexdoc.$(manext) + $(INSTALL_DATA) $(srcdir)/flex.man $(mandir)/flex.$(manext) + $(INSTALL_DATA) $(srcdir)/flexdoc.man $(mandir)/flexdoc.$(manext) + +uninstall: + rm -f $(bindir)/flex $(bindir)/flex++ + rm -f $libdir)/libfl.a + rm -f $(includedir)/FlexLexer.h + rm -f $(mandir)/flex.$(manext) $(mandir)/flexdoc.$(manext) + +tags: $(SOURCES) + ctags $(SOURCES) + +lint: $(LINTSRCS) + lint $(LINTSRCS) > flex.lint + +gcc-lint: $(LINTSRCS) + gcc -Dlint -Wall $(LINTSRCS) >flex.gcc-lint 2>&1 + +mostlyclean: + rm -f *~ *.bak core errs + +clean: mostlyclean + rm -f flex *.o parse.c *.lint parse.h lex.yy.c $(FLEXLIB) + +distclean: clean + rm -f .bootstrap scan.c tags TAGS Makefile config.status + +realclean: distclean + rm -f flex.man flexdoc.man flex*.tar.gz + +dist: $(DISTFILES) + echo `pwd` | sed 's|.*/||' > .fname + rm -rf `cat .fname` + mkdir `cat .fname` + chmod 777 `cat .fname` + tar cf - $(DISTFILES) | (cd `cat .fname`; tar xf -) + @rm -f `cat .fname`/initscan.c + cp `cat .fname`/scan.c `cat .fname`/initscan.c + chmod -R a+r `cat .fname` + @rm -f flex.tar flex.tar.Z + tar chf `cat .fname`.tar `cat .fname` + gzip -8 `cat .fname`.tar + rm -rf `cat .fname` .fname + +Makefile: Makefile.in config.status + $(SHELL) config.status +config.status: configure + $(SHELL) config.status --recheck +configure: configure.in + cd $(srcdir); autoconf + +# Prevent GNU make v3 from overflowing arg limit on SysV. +.NOEXPORT: -- cgit v1.2.3 From 7aa92383f9fc7feece5723debafad3c974867acb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 12:56:32 +0000 Subject: Merge w/ 2.4.1 changes added "dist2" target --- Makefile.in | 59 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/Makefile.in b/Makefile.in index d252fa4..49a5302 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,19 +1,5 @@ # @(#) $Header$ (LBL) -SHELL = /bin/sh -srcdir = @srcdir@ -VPATH = @srcdir@ - -YACC = @YACC@ -CC = @CC@ -AR = ar -RANLIB = @RANLIB@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -MAKEINFO = makeinfo -TEXI2DVI = texi2dvi - # If your version of "make" does not define $(MAKE), comment in the # definition of "MAKE" below. (You only need to do this if you intend # to do "make bigcheck".) @@ -56,6 +42,19 @@ mandir = $(prefix)/man/man$(manext) INSTALLMAN = man +SHELL = /bin/sh +srcdir = @srcdir@ +VPATH = @srcdir@ + +YACC = @YACC@ +CC = @CC@ +AR = ar +RANLIB = @RANLIB@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +COMPRESS = compress + # You normally do not need to modify anything below this point. # ------------------------------------------------------------ @@ -81,12 +80,15 @@ DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ $(HEADERS) $(SOURCES) $(LIBSRCS) MISC \ flex.1 flexdoc.1 manual \ - configure initscan.c flex.man flexdoc.man + configure flex.man flexdoc.man + +DIST_NAME = flex # which "flex" to use to generate scan.c from scan.l FLEX = ./flex -FLEX_FLAGS = -ipst +FLEX_FLAGS = -ist $(PERF_REPORT) COMPRESSION = +PERF_REPORT = -p FLEXLIB = libfl.a @@ -206,21 +208,20 @@ distclean: clean rm -f .bootstrap scan.c tags TAGS Makefile config.status realclean: distclean - rm -f flex.man flexdoc.man flex*.tar.gz + rm -f flex.man flexdoc.man flex*.tar.gz flex*.tar.Z dist: $(DISTFILES) - echo `pwd` | sed 's|.*/||' > .fname - rm -rf `cat .fname` - mkdir `cat .fname` - chmod 777 `cat .fname` - tar cf - $(DISTFILES) | (cd `cat .fname`; tar xf -) - @rm -f `cat .fname`/initscan.c - cp `cat .fname`/scan.c `cat .fname`/initscan.c - chmod -R a+r `cat .fname` - @rm -f flex.tar flex.tar.Z - tar chf `cat .fname`.tar `cat .fname` - gzip -8 `cat .fname`.tar - rm -rf `cat .fname` .fname + $(MAKE) DIST_NAME=`pwd | sed 's|.*/||'` dist2 + +dist2: + @rm -rf $(DIST_NAME) + @mkdir $(DIST_NAME) + tar cf - $(DISTFILES) | (cd $(DIST_NAME); tar xfB -) + @mv $(DIST_NAME)/scan.c $(DIST_NAME)/initscan.c + @rm -f flex.tar flex.tar.Z flex.tar.gz + tar chf $(DIST_NAME).tar $(DIST_NAME) + $(COMPRESS) $(DIST_NAME).tar + @rm -rf $(DIST_NAME) Makefile: Makefile.in config.status $(SHELL) config.status -- cgit v1.2.3 From 1b14cd1a9981246c7399496080b9b6ab0a810d44 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 12:57:20 +0000 Subject: Initial revision --- configure.in | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 configure.in diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..88b357b --- /dev/null +++ b/configure.in @@ -0,0 +1,11 @@ +dnl Process this file with autoconf to produce a configure script. +dnl +AC_INIT(initscan.c) + +AC_PROG_YACC +AC_PROG_CC +AC_PROG_RANLIB +AC_PROG_INSTALL +AC_CONST + +AC_OUTPUT(Makefile) -- cgit v1.2.3 From 63b19ea9476de438a46b3e1c6eb50920edf2ea76 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 13:20:54 +0000 Subject: 2.4.1 release --- NEWS | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index da7fc1c..1a7165e 100644 --- a/NEWS +++ b/NEWS @@ -17,25 +17,38 @@ Changes between release 2.4 (09Nov93) and release 2.3: See the "GENERATING C++ SCANNERS" section of flexdoc for details. + - The new '-l' flag turns on maximum AT&T lex compatibility. In + particular, -l includes support for "yylineno" and makes yytext + be an array instead of a pointer. It does not, however, do away + with all incompatibilities. See the "INCOMPATIBILITIES WITH LEX + AND POSIX" section of flexdoc for details. + - The new '-P' option specifies a prefix to use other than "yy" for the scanner's globally-visible variables, and for the "lex.yy.c" filename. Using -P you can link together multiple flex scanners in the same executable. - - The distribution includes G.T. Nicol's flex manual (note - that the manual has not yet been brought up to date; it - presently reflects flex version 2.3). See the manual/ - subdirectory. Note that the PostScript for the manual - presently lacks an index; this will be fixed soon. - - The distribution also includes a "texinfo" version of flexdoc.1, + - The distribution includes a "texinfo" version of flexdoc.1, contributed by Roland Pesch (thanks also to Marq Kole, who - contributed another version). It also has not been brought - up to date, but reflects version 2.3. See MISC/flex.texinfo. + contributed another version). It has not been brought up to + date, but reflects version 2.3. See MISC/flex.texinfo. + + The flex distribution will soon include G.T. Nicol's flex + manual; he is presently bringing it up-to-date for version 2.4. - yywrap() is now a function, and you now *must* link flex scanners with libfl.a. + - Site-configuration is now done via an autoconf-generated + "configure" script contributed by Francois Pinard. + + - Scanners now use fread() (or getc(), if interactive) and not + read() for input. A new "table compression" option, -Cr, + overrides this change and causes the scanner to use read() + (because read() is a bit faster than fread()). -f and -F + are now equivalent to -Cfr and -CFr; i.e., they imply the + -Cr option. + - In the blessed name of POSIX compliance, flex supports "%array" and "%pointer" directives in the definitions (first) section of the scanner specification. The former specifies that yytext @@ -62,6 +75,10 @@ Changes between release 2.4 (09Nov93) and release 2.3: "BEGIN" action. You also can implement "start condition stacks" by storing the values in an integer stack. + - You can now redefine macros such as YY_INPUT by just #define'ing + them to some other value in the first section of the flex input; + no need to first #undef them. + - flex now generates warnings for rules that can't be matched. These warnings can be turned off using the new '-w' flag. If your scanner uses REJECT then you will not get these warnings. @@ -194,6 +211,10 @@ Changes between release 2.4 (09Nov93) and release 2.3: internal actions used by the scanner for things like filling the buffer or handling EOF. + - The rule "[^]]" now matches any character other than a ']'; + formerly it matched any character at all followed by a ']'. + This change was made for compatibility with AT&T lex. + - A large number of miscellaneous bugs have been found and fixed thanks to Gerhard Wilhelms. -- cgit v1.2.3 From 1c5784d0b679bf06fd276c9d76ed385f41cb6ba0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 13:29:45 +0000 Subject: Removed manual & nroff output from distribution --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 49a5302..175bceb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -79,8 +79,8 @@ LINTSRCS = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.c \ DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ $(HEADERS) $(SOURCES) $(LIBSRCS) MISC \ - flex.1 flexdoc.1 manual \ - configure flex.man flexdoc.man + flex.1 flexdoc.1 \ + configure DIST_NAME = flex -- cgit v1.2.3 From 4e1204998cfcf62757387f94069890a08aa89e37 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 13:37:34 +0000 Subject: Updated date of 2.4 release --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 1a7165e..eb9f40b 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Changes between release 2.4 (09Nov93) and release 2.3: +Changes between release 2.4 (30Nov93) and release 2.3: - The new '-+' flag instructs flex to generate a C++ scanner class (thanks to Kent Williams). flex writes an implementation of the -- cgit v1.2.3 From 99608bcf00a3219762df334fb0fa4e5b3bfa0c92 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 13:41:15 +0000 Subject: flex.skel -> flex.skl --- COPYING | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/COPYING b/COPYING index 9b01361..dcb775e 100644 --- a/COPYING +++ b/COPYING @@ -33,6 +33,6 @@ This basically says "do whatever you please with this software except remove this notice or take advantage of the University's (or the flex authors') name". -Note that the "flex.skel" scanner skeleton carries no copyright notice. +Note that the "flex.skl" scanner skeleton carries no copyright notice. You are free to do whatever you please with scanners generated using flex; for them, you are not even bound by the above copyright. -- cgit v1.2.3 From 8c1b482acf52e1411d7c089590f06ec12822c304 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 13:44:15 +0000 Subject: Revised as per Francois Pinard --- README | 63 +++++++++++++++++++++++---------------------------------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/README b/README index 3fde5b1..5e2f603 100644 --- a/README +++ b/README @@ -1,51 +1,44 @@ -// $Header$ - This is release 2.4 of flex. See "version.h" for the exact patch-level. -Read the "Installation considerations" note in the Makefile and make any -necessary changes. - -To make this version of flex for the first time (even if you've already -installed earlier version of flex), use: - - make bootstrap - -and then - - make - -Assuming flex builds successfully, you can test it using +See the file "NEWS" to find out what is new in this Flex release. - make check +Read the file "INSTALL" for general installation directives. Peek near +the beginning of the file "Makefile.in" for special DEFS values. On most +systems, you can just run the "configure" script and type "make" to build +flex; then "make check" to test whether it built correctly; and if it did, +then "make install" to install it. -The "diff" should not show any differences. - -If you're feeling adventurous, issue "make bigcheck" and be prepared to wait -a while. - -Install flex using: - - make install +If you're feeling adventurous, you can also issue "make bigcheck" (be +prepared to wait a while). +Note that flex is distributed under a copyright very similar to that of +BSD Unix, and not under the GNU General Public License (GPL), except for +the "configure" script, which is covered by the GPL. Please send problems and feedback to: - vern@ee.lbl.gov - Vern Paxson - Systems Engineering - 46A/1123 + ICSD, 46A/1123 Lawrence Berkeley Laboratory 1 Cyclotron Rd. Berkeley, CA 94720 + vern@ee.lbl.gov + The flex distribution consists of the following files: README This message - Makefile, flexdef.h, parse.y, scan.l, ccl.c, dfa.c, ecs.c, gen.c, - main.c, misc.c, nfa.c, sym.c, tblcmp.c, yylex.c + NEWS Differences between the various releases + + INSTALL General installation information + + COPYING flex's copyright + + configure.in, Makefile.in, flexdef.h, parse.y, scan.l, ccl.c, + dfa.c, ecs.c, gen.c, main.c, misc.c, nfa.c, sym.c, tblcmp.c, + yylex.c source files version.h version of this flex release @@ -64,15 +57,5 @@ The flex distribution consists of the following files: flexdoc.1 full user documentation flex.1 reference documentation - flexdoc.man preformatted versions of documentation - flex.man - - manual/ A flex user manual written by G.T. Nicol. See - manual/README for details. - - Changes Differences between this release and the previous one - - COPYING flex's copyright - MISC/ a directory containing miscellaneous contributions. See MISC/README for details. -- cgit v1.2.3 From 27e3097bf2ae39f0ede3d35ada355a784fb451a9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 13:44:31 +0000 Subject: 2.4.1 --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index f6302d2..3f55ad1 100644 --- a/version.h +++ b/version.h @@ -1 +1 @@ -#define FLEX_VERSION "2.4.0 (November 9, 1993)" +#define FLEX_VERSION "2.4.1" -- cgit v1.2.3 From c83a217c102b7e17da41a39e0dac50fa4e50d7aa Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 13:59:07 +0000 Subject: Changed "make dist" to use version.h, include scan.c in initial dir copy --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 175bceb..a9021ff 100644 --- a/Makefile.in +++ b/Makefile.in @@ -80,7 +80,7 @@ DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ $(HEADERS) $(SOURCES) $(LIBSRCS) MISC \ flex.1 flexdoc.1 \ - configure + scan.c configure DIST_NAME = flex @@ -211,7 +211,7 @@ realclean: distclean rm -f flex.man flexdoc.man flex*.tar.gz flex*.tar.Z dist: $(DISTFILES) - $(MAKE) DIST_NAME=`pwd | sed 's|.*/||'` dist2 + $(MAKE) DIST_NAME=flex-`sed Date: Sat, 27 Nov 1993 14:02:34 +0000 Subject: more "dist" tweaks --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index a9021ff..28c1c59 100644 --- a/Makefile.in +++ b/Makefile.in @@ -215,10 +215,10 @@ dist: $(DISTFILES) dist2: @rm -rf $(DIST_NAME) + @rm -f $(DIST_NAME).tar $(DIST_NAME).tar.Z $(DIST_NAME).tar.gz @mkdir $(DIST_NAME) tar cf - $(DISTFILES) | (cd $(DIST_NAME); tar xfB -) @mv $(DIST_NAME)/scan.c $(DIST_NAME)/initscan.c - @rm -f flex.tar flex.tar.Z flex.tar.gz tar chf $(DIST_NAME).tar $(DIST_NAME) $(COMPRESS) $(DIST_NAME).tar @rm -rf $(DIST_NAME) -- cgit v1.2.3 From 4a110344b099946c7d739917b3a523d9436f86c7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 14:07:08 +0000 Subject: permission tweaking for "dist" --- Makefile.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.in b/Makefile.in index 28c1c59..abfca8c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -219,6 +219,8 @@ dist2: @mkdir $(DIST_NAME) tar cf - $(DISTFILES) | (cd $(DIST_NAME); tar xfB -) @mv $(DIST_NAME)/scan.c $(DIST_NAME)/initscan.c + @chmod 444 $(DIST_NAME)/initscan.c + @chmod +w $(DIST_NAME)/Makefile.in tar chf $(DIST_NAME).tar $(DIST_NAME) $(COMPRESS) $(DIST_NAME).tar @rm -rf $(DIST_NAME) -- cgit v1.2.3 From ee065d56de397b3d81e148a14a34e5a3b872b17b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 14:14:06 +0000 Subject: Added -l compression to bigcheck --- Makefile.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index abfca8c..7b3f482 100644 --- a/Makefile.in +++ b/Makefile.in @@ -145,7 +145,8 @@ bigcheck: rm -f scan.c ; $(MAKE) COMPRESSION="-Ce" check rm -f scan.c ; $(MAKE) COMPRESSION="-Cm" check rm -f scan.c ; $(MAKE) COMPRESSION="-Cfe" check - rm -f scan.c ; $(MAKE) COMPRESSION="-CFe" check + rm -f scan.c ; $(MAKE) COMPRESSION="-CFer" check + rm -f scan.c ; $(MAKE) COMPRESSION="-l" PERF_REPORT="" check rm -f scan.c ; $(MAKE) @echo "All checks successful" -- cgit v1.2.3 From a07f768080a1227dac807227858fc0c1e8baac78 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 15:01:39 +0000 Subject: lint tweak --- misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc.c b/misc.c index 9336880..4541026 100644 --- a/misc.c +++ b/misc.c @@ -184,7 +184,7 @@ register char *str; for ( c = str; *c; ++c ) ; - copy = yy_flex_alloc( (c - str + 1) * sizeof( char ) ); + copy = (char *) yy_flex_alloc( (c - str + 1) * sizeof( char ) ); if ( copy == NULL ) flexfatal( "dynamic memory failure in copy_string()" ); -- cgit v1.2.3 From e895651d9a92d9056d6f01ff6dddbc5383ccb477 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 15:01:50 +0000 Subject: Include liballoc.c in lint targets --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 7b3f482..6c598a9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -74,7 +74,7 @@ LIBSRCS = liballoc.c libmain.c libyywrap.c LIBOBJS = liballoc.o libmain.o libyywrap.o LINTSRCS = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.c \ - scan.c skel.c sym.c tblcmp.c yylex.c + scan.c skel.c sym.c tblcmp.c yylex.c liballoc.c DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ -- cgit v1.2.3 From 34e310bfd853ab5dc459cfa6f6337598ba5fabc2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 15:03:24 +0000 Subject: Don't remove dist directory --- Makefile.in | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 6c598a9..d140cb2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -224,7 +224,6 @@ dist2: @chmod +w $(DIST_NAME)/Makefile.in tar chf $(DIST_NAME).tar $(DIST_NAME) $(COMPRESS) $(DIST_NAME).tar - @rm -rf $(DIST_NAME) Makefile: Makefile.in config.status $(SHELL) config.status -- cgit v1.2.3 From c5cfd15bf106389e29f1b5f573f49feaa5d98bf1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 27 Nov 1993 15:10:25 +0000 Subject: fixed typo --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index eb9f40b..374e8a1 100644 --- a/NEWS +++ b/NEWS @@ -95,7 +95,7 @@ Changes between release 2.4 (30Nov93) and release 2.3: - Scanners with compressed tables are now "interactive" (-I option) by default. You can suppress this attribute (which makes them - run slighly slower) using the new '-B' flag. + run slightly slower) using the new '-B' flag. - Flex now generates 8-bit scanners by default, unless you use the -Cf or -CF compression options (-Cfe and -CFe result in 8-bit -- cgit v1.2.3 From 8039319891115b5fde897d1fa9a5d757c6663dd8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:39:13 +0000 Subject: AC_LN_S, AC_STDC_HEADERS (but not AC_ALLOCA) --- configure.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure.in b/configure.in index 88b357b..b4a7fd1 100644 --- a/configure.in +++ b/configure.in @@ -2,10 +2,12 @@ dnl Process this file with autoconf to produce a configure script. dnl AC_INIT(initscan.c) +AC_LN_S AC_PROG_YACC AC_PROG_CC AC_PROG_RANLIB AC_PROG_INSTALL AC_CONST +AC_STDC_HEADERS AC_OUTPUT(Makefile) -- cgit v1.2.3 From 6d974612d3b5129abcc10dfd59b61deded738926 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:42:34 +0000 Subject: Francois' tweaks --- Makefile.in | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/Makefile.in b/Makefile.in index d140cb2..7955832 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2,18 +2,13 @@ # If your version of "make" does not define $(MAKE), comment in the # definition of "MAKE" below. (You only need to do this if you intend -# to do "make bigcheck".) +# to do "make bigcheck" or "make dist".) # MAKE = make # Possible values for DEFS: # # For flex to always generate 8-bit scanners, add "-DDEFAULT_CSIZE=256" # to DEFS. -# -# If your C run-time library defines malloc() as returning char* -# instead of void*, add "-DMALLOC_TYPE=char" to DEFS. (Note that on -# many systems, even if your malloc() returns char* you can get away -# with skipping this step.) # # For Vax/VMS, add "-DVMS" to DEFS. # @@ -46,6 +41,7 @@ SHELL = /bin/sh srcdir = @srcdir@ VPATH = @srcdir@ +LN_S = @LN_S@ YACC = @YACC@ CC = @CC@ AR = ar @@ -93,16 +89,16 @@ PERF_REPORT = -p FLEXLIB = libfl.a -all: .bootstrap flex +all: flex + +flex: .bootstrap $(OBJECTS) $(FLEXLIB) + $(CC) $(CFLAGS) -o flex $(LDFLAGS) $(OBJECTS) $(FLEXLIB) $(LIBS) .bootstrap: initscan.c @rm -f scan.c cp $(srcdir)/initscan.c scan.c touch .bootstrap -flex: $(OBJECTS) $(FLEXLIB) - $(CC) $(CFLAGS) -o flex $(LDFLAGS) $(OBJECTS) $(FLEXLIB) $(LIBS) - parse.c: parse.y $(YACC) -d $(srcdir)/parse.y @sed '/extern char.*malloc/d' parse.c @@ -134,7 +130,7 @@ tblcmp.o: tblcmp.c flexdef.h yylex.o: yylex.c flexdef.h test: check -check: .bootstrap flex +check: flex ./flex $(FLEX_FLAGS) $(COMPRESSION) $(srcdir)/scan.l \ | sed s,\"$(srcdir)/scan.l\",\"scan.l\", \ | diff scan.c - @@ -160,30 +156,25 @@ flex.man: flex.1 flexdoc.man: flexdoc.1 cd $(srcdir); nroff -man flexdoc.1 >flexdoc.man -install: flex install.$(INSTALLMAN) install-lib install-inc - @rm -f $(bindir)/flex +install: flex $(FLEXLIB) installdirs install.$(INSTALLMAN) $(INSTALL_PROGRAM) flex $(bindir)/flex - @echo "NOTE: If you want to create flex++, either copy or link" - @echo "==== $(bindir)/flex to $(bindir)/flex++" - -install-lib: $(libdir) $(FLEXLIB) - @rm -f $(libdir)/libfl.a + @rm -f $(bindir)/flex++ + $(LN_S) $(bindir)/flex $(bindir)/flex++ $(INSTALL_DATA) $(FLEXLIB) $(libdir)/libfl.a - -install-inc: - @rm -f $(includedir)/FlexLexer.h $(INSTALL_DATA) $(srcdir)/FlexLexer.h $(includedir)/FlexLexer.h install.man: flex.1 flexdoc.1 - @rm -f $(mandir)/flex.$(manext) $(mandir)/flexdoc.$(manext) $(INSTALL_DATA) $(srcdir)/flex.1 $(mandir)/flex.$(manext) $(INSTALL_DATA) $(srcdir)/flexdoc.1 $(mandir)/flexdoc.$(manext) install.cat: flex.man flexdoc.man - @rm -f $(mandir)/flex.$(manext) $(mandir)/flexdoc.$(manext) $(INSTALL_DATA) $(srcdir)/flex.man $(mandir)/flex.$(manext) $(INSTALL_DATA) $(srcdir)/flexdoc.man $(mandir)/flexdoc.$(manext) +installdirs: + $(SHELL) $(srcdir)/mkinstalldirs \ + $(bindir) $(libdir) $(includedir) $(mandir) + uninstall: rm -f $(bindir)/flex $(bindir)/flex++ rm -f $libdir)/libfl.a @@ -193,6 +184,9 @@ uninstall: tags: $(SOURCES) ctags $(SOURCES) +TAGS: $(SOURCES) + etags $(SOURCES) + lint: $(LINTSRCS) lint $(LINTSRCS) > flex.lint -- cgit v1.2.3 From bb95bbb88e538906f579eb5696f5cd61cff6f445 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:44:25 +0000 Subject: Fixed some casts now that yytext is always char* and never unsigned char* --- scan.l | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/scan.l b/scan.l index 07967fb..4ce971f 100644 --- a/scan.l +++ b/scan.l @@ -32,7 +32,7 @@ #include "flexdef.h" #include "parse.h" -#define ACTION_ECHO add_action( (char *) yytext ) +#define ACTION_ECHO add_action( yytext ) #define MARK_END_OF_PROLOG mark_prolog(); #undef YY_DECL @@ -44,11 +44,11 @@ return CHAR; #define RETURNNAME \ - (void) strcpy( nmstr, (char *) yytext ); \ + (void) strcpy( nmstr, yytext ); \ return NAME; #define PUT_BACK_STRING(str, start) \ - for ( i = strlen( (char *) (str) ) - 1; i >= start; --i ) \ + for ( i = strlen( str ) - 1; i >= start; --i ) \ unput((str)[i]) #define CHECK_REJECT(str) \ @@ -139,7 +139,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); ^{NAME} { - (void) strcpy( nmstr, (char *) yytext ); + (void) strcpy( nmstr, yytext ); didadef = false; BEGIN(PICKUPDEF); } @@ -171,7 +171,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {WS} /* separates name and definition */ {NOT_WS}.* { - (void) strcpy( (char *) nmdef, (char *) yytext ); + (void) strcpy( (char *) nmdef, yytext ); /* Skip trailing whitespace. */ for ( i = strlen( (char *) nmdef ) - 1; @@ -296,7 +296,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) "["{FIRST_CCL_CHAR}{CCL_CHAR}* { int cclval; - (void) strcpy( nmstr, (char *) yytext ); + (void) strcpy( nmstr, yytext ); /* Check to see if we've already encountered this * ccl. @@ -329,9 +329,9 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) "{"{NAME}"}" { register Char *nmdefptr; - Char *ndlookup(); + char *ndlookup(); - (void) strcpy( nmstr, (char *) yytext + 1 ); + (void) strcpy( nmstr, yytext + 1 ); nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ if ( ! (nmdefptr = ndlookup( nmstr )) ) @@ -340,12 +340,12 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) else { /* push back name surrounded by ()'s */ - int len = strlen( nmdefptr ); + int len = strlen( (char *) nmdefptr ); if ( lex_compat || nmdefptr[0] == '^' || (len > 0 && nmdefptr[len - 1] == '$') ) { /* don't use ()'s after all */ - PUT_BACK_STRING(nmdefptr, 0); + PUT_BACK_STRING((char *) nmdefptr, 0); if ( nmdefptr[0] == '^' ) BEGIN(CARETISBOL); @@ -354,7 +354,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) else { unput(')'); - PUT_BACK_STRING(nmdefptr, 0); + PUT_BACK_STRING((char *) nmdefptr, 0); unput('('); } } @@ -369,8 +369,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ">"/^ BEGIN(CARETISBOL); return '>'; {SCNAME} RETURNNAME; . { - format_synerr( "bad : %s", - (char *) yytext ); + format_synerr( "bad : %s", yytext ); } "^" BEGIN(SECT2); return '^'; @@ -497,12 +496,12 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {ESCSEQ} { - yylval = myesc( yytext ); + yylval = myesc( (Char *) yytext ); return CHAR; } {ESCSEQ} { - yylval = myesc( yytext ); + yylval = myesc( (Char *) yytext ); BEGIN(CCL); return CHAR; } @@ -511,7 +510,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) .*(\n?) ECHO; <> sectnum = 0; yyterminate(); -<*>.|\n format_synerr( "bad character: %s", (char *) yytext ); +<*>.|\n format_synerr( "bad character: %s", yytext ); %% -- cgit v1.2.3 From ff8e6141110257ed1b0e14c4d0726a71727d6017 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:44:47 +0000 Subject: all_lower, all_upper work on char* --- misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc.c b/misc.c index 4541026..3269a01 100644 --- a/misc.c +++ b/misc.c @@ -84,7 +84,7 @@ int size, element_size; /* all_lower - true if a string is all lower-case */ int all_lower( str ) -register Char *str; +register char *str; { while ( *str ) { @@ -100,7 +100,7 @@ register Char *str; /* all_upper - true if a string is all upper-case */ int all_upper( str ) -register Char *str; +register char *str; { while ( *str ) { -- cgit v1.2.3 From 4cf36734abf6ebd3158839e595a115d19e350d7b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:45:01 +0000 Subject: -a -> -Ca --- dfa.c | 2 +- flex.1 | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dfa.c b/dfa.c index 4016e85..0293e3c 100644 --- a/dfa.c +++ b/dfa.c @@ -526,7 +526,7 @@ void ntod() */ num_full_table_rows = numecs + 1; - /* Unless -a, declare it "short" because it's a real + /* Unless -Ca, declare it "short" because it's a real * long-shot that that won't be large enough. */ printf( "static const %s yy_nxt[][%d] =\n {\n", diff --git a/flex.1 b/flex.1 index 1b4340a..723abb8 100644 --- a/flex.1 +++ b/flex.1 @@ -3,7 +3,7 @@ flexdoc \- documentation for flex, fast lexical analyzer generator .SH SYNOPSIS .B flex -.B [\-abcdfhilnpstvwBFILTV78+ \-C[efFmr] \-Pprefix \-Sskeleton] +.B [\-bcdfhilnpstvwBFILTV78+ \-C[aefFmr] \-Pprefix \-Sskeleton] .I [filename ...] .SH DESCRIPTION .I flex @@ -1503,14 +1503,6 @@ part of the scanner might look like: .I flex has the following options: .TP -.B \-a -(``align'') instructs flex to trade off larger tables in the -generated scanner for faster performance because the elements of -the tables are better aligned for memory access and computation. On some RISC -architectures, fetching and manipulating longwords is more efficient than -with smaller-sized datums such as shortwords. This option can -double the size of the tables used by your scanner. -.TP .B \-b Generate backing-up information to .I lex.backup. @@ -1871,8 +1863,17 @@ specifies that you want flex to generate a C++ scanner class. See the section on Generating C++ Scanners below for details. .TP -.B \-C[efmF] -controls the degree of table compression. +.B \-C[aefFmr] +controls the degree of table compression and, more generally, trade-offs +between small scanners and fast scanners. +.IP +.B \-Ca +("align") instructs flex to trade off larger tables in the +generated scanner for faster performance because the elements of +the tables are better aligned for memory access and computation. On some +RISC architectures, fetching and manipulating longwords is more efficient +than with smaller-sized datums such as shortwords. This option can +double the size of the tables used by your scanner. .IP .B \-Ce directs @@ -1976,6 +1977,7 @@ the following generally being true: -C -C{f,F}e -C{f,F} + -C{f,F}a fastest & largest .fi @@ -2053,8 +2055,6 @@ is that it generate high-performance scanners. It has been optimized for dealing well with large sets of rules. Aside from the effects on scanner speed of the table compression .B \-C -and -.B \-a options outlined above, there are a number of options/actions which degrade performance. These are, from most expensive to least: -- cgit v1.2.3 From b33692211f1538f95bcb917c49128b1caf41206f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:45:47 +0000 Subject: -a -> -Ca; fixed help output --- main.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/main.c b/main.c index 9a87072..aa03217 100644 --- a/main.c +++ b/main.c @@ -264,8 +264,6 @@ int exit_status; if ( C_plus_plus ) putc( '+', stderr ); - if ( long_align ) - putc( 'a', stderr ); if ( backing_up_report ) putc( 'b', stderr ); if ( ddebug ) @@ -301,6 +299,8 @@ int exit_status; fprintf( stderr, " -C" ); + if ( long_align ) + putc( 'a', stderr ); if ( fulltbl ) putc( 'f', stderr ); if ( fullspd ) @@ -467,10 +467,6 @@ char **argv; C_plus_plus = true; break; - case 'a': - long_align = true; - break; - case 'B': interactive = false; interactive_given = true; @@ -503,6 +499,11 @@ char **argv; for ( ++i; arg[i] != '\0'; ++i ) switch ( arg[i] ) { + case 'a': + long_align = + true; + break; + case 'e': useecs = true; break; @@ -977,11 +978,9 @@ void set_up_initial_allocations() void usage() { fprintf( stderr, -"%s [-abcdfhinpstvwBFILTV78+ -C[efmF] -Pprefix -Sskeleton] [file ...]\n", +"%s [-bcdfhilnpstvwBFILTV78+ -C[aefFmr] -Pprefix -Sskeleton] [file ...]\n", program_name ); - fprintf( stderr, - "\t-a trade off larger tables for better memory alignment\n" ); fprintf( stderr, "\t-b generate backing-up information to lex.backup\n" ); fprintf( stderr, "\t-c do-nothing POSIX option\n" ); @@ -989,6 +988,7 @@ void usage() fprintf( stderr, "\t-f generate fast, large scanner\n" ); fprintf( stderr, "\t-h produce this help message\n" ); fprintf( stderr, "\t-i generate case-insensitive scanner\n" ); + fprintf( stderr, "\t-l maximal compatibility with original lex\n" ); fprintf( stderr, "\t-n do-nothing POSIX option\n" ); fprintf( stderr, "\t-p generate performance report to stderr\n" ); fprintf( stderr, @@ -1011,12 +1011,16 @@ void usage() fprintf( stderr, "\t-+ generate C++ scanner class\n" ); fprintf( stderr, "\t-C specify degree of table compression (default is -Cem):\n" ); + fprintf( stderr, + "\t\t-Ca trade off larger tables for better memory alignment\n" ); fprintf( stderr, "\t\t-Ce construct equivalence classes\n" ); fprintf( stderr, "\t\t-Cf do not compress scanner tables; use -f representation\n" ); - fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" ); fprintf( stderr, "\t\t-CF do not compress scanner tables; use -F representation\n" ); + fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" ); + fprintf( stderr, + "\t\t-Cr use read() instead of stdio for scanner input\n" ); fprintf( stderr, "\t-P specify scanner prefix other than \"yy\"\n" ); fprintf( stderr, "\t-S specify skeleton file\n" ); } -- cgit v1.2.3 From 120cf703e6606329f19aa78077df09de79d787c4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:46:31 +0000 Subject: Added -Ca to bigcheck --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 7955832..a99e7d0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -140,7 +140,7 @@ bigcheck: rm -f scan.c ; $(MAKE) COMPRESSION="-C" check rm -f scan.c ; $(MAKE) COMPRESSION="-Ce" check rm -f scan.c ; $(MAKE) COMPRESSION="-Cm" check - rm -f scan.c ; $(MAKE) COMPRESSION="-Cfe" check + rm -f scan.c ; $(MAKE) COMPRESSION="-Cfea" check rm -f scan.c ; $(MAKE) COMPRESSION="-CFer" check rm -f scan.c ; $(MAKE) COMPRESSION="-l" PERF_REPORT="" check rm -f scan.c ; $(MAKE) -- cgit v1.2.3 From 981e729e773198a6d3fc13e508274739acc3156e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 28 Nov 1993 16:46:47 +0000 Subject: -a -> -Ca all_lower, all_upper -> work on char* --- flexdef.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flexdef.h b/flexdef.h index c4b98c5..b51d62a 100644 --- a/flexdef.h +++ b/flexdef.h @@ -319,7 +319,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * listing backing-up states * C_plus_plus - if true (i.e., -+ flag), generate a C++ scanner class; * otherwise, a standard C scanner - * long_align - if true (-a flag), favor long-word alignment. + * long_align - if true (-Ca flag), favor long-word alignment. * use_read - if true (-f, -F, or -Cr) then use read() for scanner input; * otherwise, use fread(). * yytext_is_array - if true (i.e., %array directive), then declare @@ -706,10 +706,10 @@ extern void usage PROTO((void)); extern void add_action PROTO(( char *new_text )); /* True if a string is all lower case. */ -extern int all_lower PROTO((register Char *)); +extern int all_lower PROTO((register char *)); /* True if a string is all upper case. */ -extern int all_upper PROTO((register Char *)); +extern int all_upper PROTO((register char *)); /* Bubble sort an integer array. */ extern void bubble PROTO((int [], int)); -- cgit v1.2.3 From 2843d551d5fb650dd17c1b7f3a3bb6bf684d68bc Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 10:37:07 +0000 Subject: myctoi takes char[] instead of Char[] --- flexdef.h | 2 +- misc.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flexdef.h b/flexdef.h index b51d62a..d03c782 100644 --- a/flexdef.h +++ b/flexdef.h @@ -747,7 +747,7 @@ extern void mk2data PROTO((int)); extern void mkdata PROTO((int)); /* generate a data statement */ /* Return the integer represented by a string of digits. */ -extern int myctoi PROTO((Char [])); +extern int myctoi PROTO((char [])); /* Return a printable version of the given character, which might be * 8-bit diff --git a/misc.c b/misc.c index 3269a01..cb82151 100644 --- a/misc.c +++ b/misc.c @@ -477,11 +477,11 @@ int value; /* myctoi - return the integer represented by a string of digits */ int myctoi( array ) -Char array[]; +char array[]; { int val = 0; - (void) sscanf( (char *) array, "%d", &val ); + (void) sscanf( array, "%d", &val ); return val; } -- cgit v1.2.3 From 3fa0867350e1a441b4f2b24b4ccaeb56121d19dc Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 10:37:30 +0000 Subject: only "realclean" removes flex dist depends on flex --- Makefile.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.in b/Makefile.in index a99e7d0..dae2431 100644 --- a/Makefile.in +++ b/Makefile.in @@ -197,15 +197,15 @@ mostlyclean: rm -f *~ *.bak core errs clean: mostlyclean - rm -f flex *.o parse.c *.lint parse.h lex.yy.c $(FLEXLIB) + rm -f *.o parse.c *.lint parse.h lex.yy.c $(FLEXLIB) distclean: clean rm -f .bootstrap scan.c tags TAGS Makefile config.status realclean: distclean - rm -f flex.man flexdoc.man flex*.tar.gz flex*.tar.Z + rm -f flex flex.man flexdoc.man flex*.tar.gz flex*.tar.Z -dist: $(DISTFILES) +dist: flex $(DISTFILES) $(MAKE) DIST_NAME=flex-`sed Date: Mon, 29 Nov 1993 10:51:07 +0000 Subject: Added Nathan Zelle, "promoted" Francois --- flex.1 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flex.1 b/flex.1 index 723abb8..a99f866 100644 --- a/flex.1 +++ b/flex.1 @@ -2989,7 +2989,8 @@ Jacobson. The implementation was done by Kevin Gong and Vern Paxson. .PP Thanks to the many .I flex -beta-testers, feedbackers, and contributors, especially Casey Leedom, +beta-testers, feedbackers, and contributors, especially Francois Pinard, +Casey Leedom, Nelson H.F. Beebe, benson@odi.com, Peter A. Bigot, Keith Bostic, Frederic Brehm, Nick Christopher, Jason Coughlin, Bill Cox, Dave Curtis, Scott David Daniels, Chris G. Demetriou, Mike Donahue, Chuck Doucette, Tom Epperly, Leo @@ -2999,10 +3000,10 @@ Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter Pelissero, Gaumond -Pierre, Francois Pinard, Esmond Pitt, Jef Poskanzer, Kevin Rodgers, Jim +Pierre, Esmond Pitt, Jef Poskanzer, Kevin Rodgers, Jim Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave Tallman, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken -Yap, David Zuhn, and those whose names have slipped my marginal +Yap, Nathan Zelle, David Zuhn, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. .PP -- cgit v1.2.3 From 77599e50c56431dcfb96bd9dbb31843e748c0068 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 11:02:52 +0000 Subject: Added install.sh, mkinstalldirs to distribution files --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index dae2431..ecb5be7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -76,7 +76,7 @@ DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ $(HEADERS) $(SOURCES) $(LIBSRCS) MISC \ flex.1 flexdoc.1 \ - scan.c configure + scan.c install.sh mkinstalldirs configure DIST_NAME = flex -- cgit v1.2.3 From fd034bf3a1027f0d1e9e01989402ea3d3da93f4c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 11:04:29 +0000 Subject: 2.4 -> 2.4.1 --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 374e8a1..008c104 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Changes between release 2.4 (30Nov93) and release 2.3: +Changes between release 2.4.1 (30Nov93) and release 2.3.8: - The new '-+' flag instructs flex to generate a C++ scanner class (thanks to Kent Williams). flex writes an implementation of the -- cgit v1.2.3 From c365d8ae13ebebebb7cba29575864810a1cc4704 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 14:57:53 +0000 Subject: Fixed mis-definition of ndlookup() --- scan.l | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scan.l b/scan.l index 4ce971f..f19f87e 100644 --- a/scan.l +++ b/scan.l @@ -329,7 +329,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) "{"{NAME}"}" { register Char *nmdefptr; - char *ndlookup(); + Char *ndlookup(); (void) strcpy( nmstr, yytext + 1 ); nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ -- cgit v1.2.3 From d0d675f51ac958bb757c4c22d8a5dd3df833fa96 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 16:38:31 +0000 Subject: Fixed ANSI-C glitch with '%' operator --- sym.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sym.c b/sym.c index 3583aab..8e4788c 100644 --- a/sym.c +++ b/sym.c @@ -163,8 +163,10 @@ int hash_size; locstr = 0; while ( str[locstr] ) - hashval = ((hashval << 1) + (unsigned char) str[locstr++]) % - hash_size; + { + hashval = (hashval << 1) + (unsigned char) str[locstr++]; + hashval %= hash_size; + } return hashval; } -- cgit v1.2.3 From cbe0e6b465562d465a7fbc15e62402a96d694db1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 16:40:41 +0000 Subject: Fixed to buffer section 1 definitions --- flex.skl | 142 +++++++++++++++++++++++++++++++------------------------------- flexdef.h | 20 +++++---- gen.c | 32 +++++++++++++- main.c | 69 +++++++++--------------------- misc.c | 28 ++++++++----- scan.l | 30 ++++++------- 6 files changed, 165 insertions(+), 156 deletions(-) diff --git a/flex.skl b/flex.skl index efcbe3d..adf5d88 100644 --- a/flex.skl +++ b/flex.skl @@ -60,56 +60,9 @@ #define YY_PROTO(proto) () #endif -%% section 1 definitions go here - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#define YY_READ_BUF_SIZE 8192 -#endif - /* Returned upon end-of-file. */ -#define YY_END_TOK 0 - -/* Copy whatever the last rule matched to the standard output. */ - -#ifndef ECHO -%- Standard (non-C++) definition -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) -%+ C++ definition -#define ECHO LexerOutput( yytext, yyleng ) -%* -#endif - -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ -%% fread()/read() definition of YY_INPUT goes here unless we're doing C++ -%+ C++ definition - if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); -%* -#endif - -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif - #define YY_NULL 0 -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) -#endif - /* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less * definition of BEGIN. @@ -129,28 +82,8 @@ */ #define YY_NEW_FILE yyrestart( yyin ) -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -%- Standard (non-C++) definition -#define YY_DECL int yylex YY_PROTO(( void )) -%+ C++ definition -#define YY_DECL int yyFlexLexer::yylex() -%* -#endif - -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif - #define YY_END_OF_BUFFER_CHAR 0 -#ifndef YY_BUF_SIZE -#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of default input buffer */ -#endif - typedef struct yy_buffer_state *YY_BUFFER_STATE; extern int yyleng; @@ -262,8 +195,6 @@ static int yy_n_chars; /* number of characters read into yy_ch_buf */ int yyleng; -%% yyin/yyout and (if -l option) yylineno definition & initialization goes here - /* Points to current character in buffer. */ static char *yy_c_buf_p = (char *) 0; static int yy_init = 1; /* whether we need to initialize */ @@ -285,7 +216,7 @@ void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); #define yy_new_buffer yy_create_buffer -%% declarations of yytext/yytext_ptr (and C++ include, if used) go here +%% yytext/yyin/yyout/yy_state_type/yylineno etc. def's & init go here %- Standard (non-C++) definition #ifdef __cplusplus @@ -307,13 +238,80 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); */ #define YY_DO_BEFORE_ACTION \ yytext_ptr = yy_bp; \ -%% code to fiddle yytext and yyleng for yymore() goes here +%% code to fiddle yytext and yyleng for yymore() goes here yy_hold_char = *yy_cp; \ *yy_cp = '\0'; \ %% code to copy yytext_ptr to yytext[] goes here, if %array yy_c_buf_p = yy_cp; -%% data tables for the DFA go here +%% data tables for the DFA and the user's section 1 definitions go here + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +%- Standard (non-C++) definition +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +%+ C++ definition +#define ECHO LexerOutput( yytext, yyleng ) +%* +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ +%% fread()/read() definition of YY_INPUT goes here unless we're doing C++ +%+ C++ definition + if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); +%* +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +%- Standard (non-C++) definition +#define YY_DECL int yylex YY_PROTO(( void )) +%+ C++ definition +#define YY_DECL int yyFlexLexer::yylex() +%* +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of default input buffer */ +#endif YY_DECL { diff --git a/flexdef.h b/flexdef.h index d03c782..3b5cfd8 100644 --- a/flexdef.h +++ b/flexdef.h @@ -366,12 +366,12 @@ extern int yymore_really_used, reject_really_used; * * action_array - array to hold the rule actions * action_size - size of action_array - * prolog - pointer to where the prolog starts in action_array + * defs1_offset - index where the user's section 1 definitions start + * in action_array + * prolog_offset - index where the prolog starts in action_array * action_offset - index where the non-prolog starts in action_array * action_index - index where the next action should go, with respect - * to "action" - * action - pointer to where non-prolog starts; equal to - * &action_array[action_offset] + * to "action_array" */ extern int datapos, dataline, linenum; @@ -383,8 +383,9 @@ extern char **input_files; extern int num_input_files; extern char *program_name; -extern char *action_array, *prolog, *action; -extern int action_size, action_offset, action_index; +extern char *action_array; +extern int action_size; +extern int defs1_offset, prolog_offset, action_offset, action_index; /* Variables for stack of states having only one out-transition: @@ -738,6 +739,11 @@ extern void lerrsf PROTO((char[], char[])); /* Spit out a "# line" statement. */ extern void line_directive_out PROTO((FILE*)); +/* Mark the current position in the action array as the end of the section 1 + * user defs. + */ +extern void mark_defs1 PROTO((void)); + /* Mark the current position in the action array as the end of the prolog. */ extern void mark_prolog PROTO((void)); @@ -750,7 +756,7 @@ extern void mkdata PROTO((int)); /* generate a data statement */ extern int myctoi PROTO((char [])); /* Return a printable version of the given character, which might be - * 8-bit + * 8-bit. */ extern char *readable_form PROTO((int)); diff --git a/gen.c b/gen.c index 1c47e0a..5d527e0 100644 --- a/gen.c +++ b/gen.c @@ -1221,10 +1221,38 @@ void make_tables() indent_puts( "#define YY_MORE_ADJ 0" ); } + fputs( &action_array[defs1_offset], stdout ); + + skelout(); + + if ( ! C_plus_plus ) + { + if ( use_read ) + { + printf( +"\tif ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \\\n" ); + printf( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); + } + + else + { + printf( + "\tif ( yy_current_buffer->is_interactive ) \\\n" ); + printf( + "\t\tresult = (buf[0] = getc( yyin )) == EOF ? 0 : 1; \\\n" ); + printf( +"\telse if ( ((result = fread( (char *) buf, 1, max_size, yyin )) == 0)\\\n" ); + printf( "\t\t && ferror( yyin ) ) \\\n" ); + printf( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); + } + } + skelout(); /* Copy prolog to output file. */ - fputs( prolog, stdout ); + fputs( &action_array[prolog_offset], stdout ); skelout(); @@ -1323,7 +1351,7 @@ void make_tables() skelout(); indent_up(); gen_bu_action(); - fputs( action, stdout ); + fputs( &action_array[action_offset], stdout ); /* generate cases for any missing EOF rules */ for ( i = 1; i <= lastsc; ++i ) diff --git a/main.c b/main.c index aa03217..cee89ad 100644 --- a/main.c +++ b/main.c @@ -58,8 +58,8 @@ int yymore_really_used, reject_really_used; int datapos, dataline, linenum; FILE *skelfile = NULL; int skel_ind = 0; -char *action_array, *prolog, *action; -int action_size, action_offset, action_index; +char *action_array; +int action_size, defs1_offset, prolog_offset, action_offset, action_index; char *infilename = NULL; int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; @@ -442,9 +442,9 @@ char **argv; /* Initialize dynamic array for holding the rule actions. */ action_size = 2048; /* default size of action array in bytes */ - prolog = action = action_array = - allocate_character_array( action_size ); - action_offset = action_index = 0; + action_array = allocate_character_array( action_size ); + defs1_offset = prolog_offset = action_offset = action_index = 0; + action_array[0] = '\0'; program_name = argv[0]; @@ -806,6 +806,8 @@ void readin() { skelout(); + line_directive_out( (FILE *) 0 ); + if ( yyparse() ) { pinpoint_message( "fatal parse error" ); @@ -837,34 +839,6 @@ void readin() if ( ddebug ) puts( "\n#define FLEX_DEBUG" ); - skelout(); - - if ( ! C_plus_plus ) - { - if ( use_read ) - { - printf( -"\tif ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \\\n" ); - printf( - "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); - } - - else - { - printf( - "\tif ( yy_current_buffer->is_interactive ) \\\n" ); - printf( - "\t\tresult = (buf[0] = getc( yyin )) == EOF ? 0 : 1; \\\n" ); - printf( -"\telse if ( ((result = fread( (char *) buf, 1, max_size, yyin )) == 0)\\\n" ); - printf( "\t\t && ferror( yyin ) ) \\\n" ); - printf( - "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); - } - } - - skelout(); - if ( lex_compat ) { printf( "FILE *yyin = stdin, *yyout = stdout;\n" ); @@ -874,26 +848,10 @@ void readin() else printf( "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;\n" ); - skelout(); - if ( C_plus_plus ) printf( "\n#include \"FlexLexer.h\"\n" ); - line_directive_out( stdout ); - - if ( useecs ) - numecs = cre8ecs( nextecm, ecgroup, csize ); else - numecs = csize; - - /* Now map the equivalence class for NUL to its expected place. */ - ecgroup[0] = ecgroup[csize]; - NUL_ec = abs( ecgroup[0] ); - - if ( useecs ) - ccl2ecl(); - - if ( ! C_plus_plus ) { if ( yytext_is_array ) { @@ -913,8 +871,19 @@ void readin() puts( "#define yytext_ptr yytext" ); } } - } + if ( useecs ) + numecs = cre8ecs( nextecm, ecgroup, csize ); + else + numecs = csize; + + /* Now map the equivalence class for NUL to its expected place. */ + ecgroup[0] = ecgroup[csize]; + NUL_ec = abs( ecgroup[0] ); + + if ( useecs ) + ccl2ecl(); + } /* set_up_initial_allocations - allocate memory for internal tables */ diff --git a/misc.c b/misc.c index cb82151..94b41f2 100644 --- a/misc.c +++ b/misc.c @@ -43,16 +43,14 @@ char *new_text; { int len = strlen( new_text ); - while ( len + action_index + action_offset >= action_size - 10 - /* slop */ ) + while ( len + action_index >= action_size - 10 /* slop */ ) { action_size *= 2; - prolog = action_array = + action_array = reallocate_character_array( action_array, action_size ); - action = &action_array[action_offset]; } - strcpy( &action[action_index], new_text ); + strcpy( &action_array[action_index], new_text ); action_index += len; } @@ -408,17 +406,27 @@ FILE *output_file; } +/* mark_defs1 - mark the current position in the action array as + * representing where the user's section 1 definitions end + * and the prolog begins + */ +void mark_defs1() + { + defs1_offset = 0; + action_array[action_index++] = '\0'; + action_offset = prolog_offset = action_index; + action_array[action_index] = '\0'; + } + + /* mark_prolog - mark the current position in the action array as - * representing the action prolog + * representing the end of the action prolog */ void mark_prolog() { - prolog = action_array; action_array[action_index++] = '\0'; action_offset = action_index; - action = &action_array[action_offset]; - action_index = 0; - action[action_index] = '\0'; + action_array[action_index] = '\0'; } diff --git a/scan.l b/scan.l index f19f87e..a555624 100644 --- a/scan.l +++ b/scan.l @@ -35,7 +35,6 @@ #define ACTION_ECHO add_action( yytext ) #define MARK_END_OF_PROLOG mark_prolog(); -#undef YY_DECL #define YY_DECL \ int flexscan() @@ -89,12 +88,12 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^{WS} indented_code = true; BEGIN(CODEBLOCK); -^"/*" ECHO; BEGIN(C_COMMENT); +^"/*" ACTION_ECHO; BEGIN(C_COMMENT); ^"%s"{NAME}? return SCDECL; ^"%x"{NAME}? return XSCDECL; ^"%{".*{NL} { ++linenum; - line_directive_out( stdout ); + line_directive_out( (FILE *) 0 ); indented_code = false; BEGIN(CODEBLOCK); } @@ -103,8 +102,9 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^"%%".* { sectnum = 2; - line_directive_out( stdout ); bracelevel = 0; + mark_defs1(); + line_directive_out( (FILE *) 0 ); BEGIN(SECT2PROLOG); return SECTEND; } @@ -149,20 +149,20 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {OPTWS}{NL} ++linenum; return '\n'; -"*/" ECHO; BEGIN(INITIAL); -"*/".*{NL} ++linenum; ECHO; BEGIN(INITIAL); -[^*\n]+ ECHO; -"*" ECHO; -{NL} ++linenum; ECHO; +"*/" ACTION_ECHO; BEGIN(INITIAL); +"*/".*{NL} ++linenum; ACTION_ECHO; BEGIN(INITIAL); +[^*\n]+ ACTION_ECHO; +"*" ACTION_ECHO; +{NL} ++linenum; ACTION_ECHO; ^"%}".*{NL} ++linenum; BEGIN(INITIAL); -"reject" ECHO; CHECK_REJECT(yytext); -"yymore" ECHO; CHECK_YYMORE(yytext); -{NAME}|{NOT_NAME}|. ECHO; +"reject" ACTION_ECHO; CHECK_REJECT(yytext); +"yymore" ACTION_ECHO; CHECK_YYMORE(yytext); +{NAME}|{NOT_NAME}|. ACTION_ECHO; {NL} { ++linenum; - ECHO; + ACTION_ECHO; if ( indented_code ) BEGIN(INITIAL); } @@ -223,7 +223,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) if ( bracelevel <= 0 ) { /* not in %{ ... %} */ yyless( 0 ); /* put it all back */ - MARK_END_OF_PROLOG; + mark_prolog(); BEGIN(SECT2); } else @@ -234,7 +234,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {NL} ++linenum; ACTION_ECHO; <> { - MARK_END_OF_PROLOG; + mark_prolog(); sectnum = 0; yyterminate(); /* to stop the parser */ } -- cgit v1.2.3 From 5b100719dcf86274d0543e80f73144ea619a29da Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 17:12:49 +0000 Subject: Added parse.{c,h} to dist files --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index ecb5be7..24ae7e7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -76,7 +76,7 @@ DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ $(HEADERS) $(SOURCES) $(LIBSRCS) MISC \ flex.1 flexdoc.1 \ - scan.c install.sh mkinstalldirs configure + parse.c parse.h scan.c install.sh mkinstalldirs configure DIST_NAME = flex -- cgit v1.2.3 From 18a7d069a1aef814f454c6343e231ac847854d99 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 17:13:22 +0000 Subject: Documented that buffer can't grow if REJECT used --- flex.1 | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/flex.1 b/flex.1 index a99f866..54b6a60 100644 --- a/flex.1 +++ b/flex.1 @@ -1617,7 +1617,7 @@ scanner. If you give the flag twice, you will also get comments regarding features that lead to minor performance losses. .IP Note that the use of -.I REJECT +.B REJECT and variable trailing context (see the Bugs section in flex(1)) entails a substantial performance penalty; use of .I yymore(), @@ -2189,7 +2189,7 @@ feature will be to automatically add rules to eliminate backing up). .I Variable trailing context (where both the leading and trailing parts do not have a fixed length) entails almost the same performance loss as -.I REJECT +.B REJECT (i.e., substantial). So when possible a rule like: .nf @@ -2957,6 +2957,13 @@ Ideally the scanner should dynamically resize the buffer in this case, but at present it does not. .PP .I +input buffer overflow, can't enlarge buffer because scanner uses REJECT - +the scanner was working on matching an extremely large token and needed +to expand the input buffer. This doesn't work with scanners that use +.B +REJECT. +.PP +.I fatal flex scanner internal error--end of buffer missed - This can occur in an scanner which is reentered after a long-jump has jumped out (or over) the scanner's activation frame. Before -- cgit v1.2.3 From cde528826f68fd9019a23b0fd0d5a9a3737e2707 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 17:13:35 +0000 Subject: Fixed YYLMAX headaches --- gen.c | 15 +++++++++++++++ main.c | 6 ------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/gen.c b/gen.c index 5d527e0..b9e36ca 100644 --- a/gen.c +++ b/gen.c @@ -1221,6 +1221,21 @@ void make_tables() indent_puts( "#define YY_MORE_ADJ 0" ); } + if ( ! C_plus_plus ) + { + if ( yytext_is_array ) + { + puts( "#ifndef YYLMAX" ); + puts( "#define YYLMAX 8192" ); + puts( "#endif\n" ); + puts( "char yytext[YYLMAX];" ); + puts( "char *yytext_ptr;" ); + } + + else + puts( "char *yytext;" ); + } + fputs( &action_array[defs1_offset], stdout ); skelout(); diff --git a/main.c b/main.c index cee89ad..e00db15 100644 --- a/main.c +++ b/main.c @@ -857,17 +857,11 @@ void readin() { puts( "\n#include \n" ); puts( "extern char yytext[];\n" ); - puts( "#ifndef YYLMAX" ); - puts( "#define YYLMAX YY_READ_BUF_SIZE" ); - puts( "#endif YYLMAX\n" ); - puts( "char yytext[YYLMAX];" ); - puts( "char *yytext_ptr;" ); } else { puts( "extern char *yytext;" ); - puts( "char *yytext;" ); puts( "#define yytext_ptr yytext" ); } } -- cgit v1.2.3 From f44b05db4cc143a593c277beeb5a8d36324bf414 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 17:13:53 +0000 Subject: Fixed %array YYLMAX headaches, added error message if buffer needs growing but REJECT used --- flex.skl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/flex.skl b/flex.skl index adf5d88..9cf21d6 100644 --- a/flex.skl +++ b/flex.skl @@ -84,6 +84,9 @@ #define YY_END_OF_BUFFER_CHAR 0 +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + typedef struct yy_buffer_state *YY_BUFFER_STATE; extern int yyleng; @@ -309,10 +312,6 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); #define YY_BREAK break; #endif -#ifndef YY_BUF_SIZE -#define YY_BUF_SIZE (YY_READ_BUF_SIZE * 2) /* size of default input buffer */ -#endif - YY_DECL { register yy_state_type yy_current_state; @@ -592,6 +591,10 @@ int yyFlexLexer::yy_get_next_buffer() while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = yy_current_buffer; @@ -611,6 +614,7 @@ int yyFlexLexer::yy_get_next_buffer() num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; +#endif } if ( num_to_read > YY_READ_BUF_SIZE ) -- cgit v1.2.3 From 3e4b64f31709f644d8f5b4814ae03b4b523c585b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 29 Nov 1993 17:18:19 +0000 Subject: Removed parse.{c,h} from distribution files, since they may not be all that portable. --- Makefile.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile.in b/Makefile.in index 24ae7e7..e1a86d4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -76,7 +76,7 @@ DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ $(HEADERS) $(SOURCES) $(LIBSRCS) MISC \ flex.1 flexdoc.1 \ - parse.c parse.h scan.c install.sh mkinstalldirs configure + scan.c install.sh mkinstalldirs configure DIST_NAME = flex @@ -197,13 +197,13 @@ mostlyclean: rm -f *~ *.bak core errs clean: mostlyclean - rm -f *.o parse.c *.lint parse.h lex.yy.c $(FLEXLIB) + rm -f parse.c parse.h *.o *.lint lex.yy.c $(FLEXLIB) distclean: clean - rm -f .bootstrap scan.c tags TAGS Makefile config.status + rm -f .bootstrap flex scan.c tags TAGS Makefile config.status realclean: distclean - rm -f flex flex.man flexdoc.man flex*.tar.gz flex*.tar.Z + rm -f flex.man flexdoc.man flex*.tar.gz flex*.tar.Z dist: flex $(DISTFILES) $(MAKE) DIST_NAME=flex-`sed Date: Mon, 29 Nov 1993 21:35:04 +0000 Subject: Added intermediate file going scan.l -> scan.c --- Makefile.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index e1a86d4..c154cb2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -109,7 +109,8 @@ parse.h: parse.c scan.c: scan.l $(FLEX) $(FLEX_FLAGS) $(COMPRESSION) $(srcdir)/scan.l \ - | sed s,\"$(srcdir)/scan.l\",\"scan.l\", >scan.c + | sed s,\"$(srcdir)/scan.l\",\"scan.l\", >scan.tmp + mv scan.tmp scan.c scan.o: scan.c parse.h flexdef.h @@ -194,7 +195,7 @@ gcc-lint: $(LINTSRCS) gcc -Dlint -Wall $(LINTSRCS) >flex.gcc-lint 2>&1 mostlyclean: - rm -f *~ *.bak core errs + rm -f *~ *.bak core errs scan.tmp clean: mostlyclean rm -f parse.c parse.h *.o *.lint lex.yy.c $(FLEXLIB) -- cgit v1.2.3 From 95932f8e65ad52078833cb20f88af774d9d4cf98 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 12:35:27 +0000 Subject: Add AC_ALLOCA if using bison --- configure.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/configure.in b/configure.in index b4a7fd1..de6d00b 100644 --- a/configure.in +++ b/configure.in @@ -10,4 +10,10 @@ AC_PROG_INSTALL AC_CONST AC_STDC_HEADERS +case "$YACC" in +*bison*) + AC_ALLOCA + ;; +esac + AC_OUTPUT(Makefile) -- cgit v1.2.3 From b484ced544fec48ab851fc517898f6dbae35a491 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 12:59:19 +0000 Subject: Added alloca --- Makefile.in | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index c154cb2..e9ace7b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -64,7 +64,7 @@ HEADERS = flexdef.h version.h SOURCES = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.y \ scan.l skel.c sym.c tblcmp.c yylex.c OBJECTS = ccl.o dfa.o ecs.o gen.o main.o misc.o nfa.o parse.o \ - scan.o skel.o sym.o tblcmp.o yylex.o + scan.o skel.o sym.o tblcmp.o yylex.o @ALLOCA@ LIBSRCS = liballoc.c libmain.c libyywrap.c LIBOBJS = liballoc.o libmain.o libyywrap.o @@ -130,6 +130,10 @@ sym.o: sym.c flexdef.h tblcmp.o: tblcmp.c flexdef.h yylex.o: yylex.c flexdef.h +alloca.o: $(srcdir)/MISC/alloca.c + $(CC) $(CFLAGS) -o alloca.o -c -Dxmalloc=yy_flex_xmalloc \ + $(srcdir)/MISC/alloca.c + test: check check: flex ./flex $(FLEX_FLAGS) $(COMPRESSION) $(srcdir)/scan.l \ -- cgit v1.2.3 From 35357bf482d193306cf1c0606c67ea73ad149e3d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 12:59:42 +0000 Subject: Added #ifdef chud for alloca() --- parse.y | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/parse.y b/parse.y index 709996c..beb6e11 100644 --- a/parse.y +++ b/parse.y @@ -31,6 +31,31 @@ /* $Header$ */ + +/* Some versions of bison are broken in that they use alloca() but don't + * declare it properly. The following is the patented (just kidding!) + * #ifdef chud to fix the problem, courtesy of Francois Pinard. + */ +#ifdef YYBISON +/* AIX requires this to be the first thing in the file. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include +#else /* not HAVE_ALLOCA_H */ +#ifdef _AIX + #pragma alloca +#else /* not _AIX */ +char *alloca (); +#endif /* not _AIX */ +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ +#endif /* YYBISON */ + +/* Bletch, ^^^^ that was ugly! */ + + #include "flexdef.h" int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; -- cgit v1.2.3 From b57d555e5c27dd45041af8099717cf21c8d439f9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 12:59:54 +0000 Subject: Added casts to unsigned Char for isascii() calls --- misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc.c b/misc.c index 94b41f2..67bf6c8 100644 --- a/misc.c +++ b/misc.c @@ -86,7 +86,7 @@ register char *str; { while ( *str ) { - if ( ! isascii( *str ) || ! islower( *str ) ) + if ( ! isascii( (Char) *str ) || ! islower( *str ) ) return 0; ++str; } @@ -102,7 +102,7 @@ register char *str; { while ( *str ) { - if ( ! isascii( *str ) || ! isupper( (char) *str ) ) + if ( ! isascii( (Char) *str ) || ! isupper( (char) *str ) ) return 0; ++str; } -- cgit v1.2.3 From de30d9784f5dc4c561d0f1ac915562f0ffe94bd3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 13:00:42 +0000 Subject: Removed vestigal cast to (char) in isupper() call --- misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc.c b/misc.c index 67bf6c8..9db1afd 100644 --- a/misc.c +++ b/misc.c @@ -102,7 +102,7 @@ register char *str; { while ( *str ) { - if ( ! isascii( (Char) *str ) || ! isupper( (char) *str ) ) + if ( ! isascii( (Char) *str ) || ! isupper( *str ) ) return 0; ++str; } -- cgit v1.2.3 From 8f442975feb730c8b69e5ac26eb5e3c046f453d1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 13:00:58 +0000 Subject: Fixed very minor typo in -v output --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index e00db15..ee0c9ae 100644 --- a/main.c +++ b/main.c @@ -334,7 +334,7 @@ int exit_status; num_backing_up ); else fprintf( stderr, - " compressed tables always back-up\n" ); + " Compressed tables always back-up\n" ); if ( bol_needed ) fprintf( stderr, -- cgit v1.2.3 From 1d99f7ba80a3bd6480f4121ab73deb2792d8bedd Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 13:01:09 +0000 Subject: Lowered MAX_SHORT out of increased general paranoia. Added yy_flex_xmalloc() proto --- flexdef.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flexdef.h b/flexdef.h index 3b5cfd8..eb7af1a 100644 --- a/flexdef.h +++ b/flexdef.h @@ -266,7 +266,7 @@ /* Absolute value of largest number that can be stored in a short, with a * bit of slop thrown in for general paranoia. */ -#define MAX_SHORT 32766 +#define MAX_SHORT 32700 /* Declarations for global variables. */ @@ -608,6 +608,7 @@ void *reallocate_array PROTO((void*, int, int)); void *yy_flex_alloc PROTO((int)); void *yy_flex_realloc PROTO((void*, int)); void yy_flex_free PROTO((void*)); +void *yy_flex_xmalloc PROTO((int)); #define allocate_integer_array(size) \ (int *) allocate_array( size, sizeof( int ) ) -- cgit v1.2.3 From df6df2a091ecd4d8d3bdbfd558b99d4b9d31dee2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 13:01:40 +0000 Subject: Fixed nasty bug in short/long decl decision --- gen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gen.c b/gen.c index b9e36ca..6b766d2 100644 --- a/gen.c +++ b/gen.c @@ -920,7 +920,7 @@ void gentabs() total_states = lastdfa + numtemps; - printf( (total_states >= MAX_SHORT || long_align) ? + printf( (tblend >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_base", total_states + 1 ); @@ -964,7 +964,7 @@ void gentabs() dataend(); - printf( (tblend >= MAX_SHORT || long_align) ? + printf( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_nxt", tblend + 1 ); @@ -978,7 +978,7 @@ void gentabs() dataend(); - printf( (tblend >= MAX_SHORT || long_align) ? + printf( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_chk", tblend + 1 ); -- cgit v1.2.3 From b2aeb90d8f8db33780e9dd6799784d292c92b826 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 15:51:18 +0000 Subject: Credit to 2.4 pre-testers. --- README | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README b/README index 5e2f603..a8c98af 100644 --- a/README +++ b/README @@ -15,7 +15,11 @@ Note that flex is distributed under a copyright very similar to that of BSD Unix, and not under the GNU General Public License (GPL), except for the "configure" script, which is covered by the GPL. -Please send problems and feedback to: +Many thanks to the 2.4 pre-testers for finding a bunch of bugs and helping +increase/test portability: Francois Pinard, Nathan Zelle, Gavin Nicol, and +Matthew Jacob. + +Please send bug reports and feedback to: Vern Paxson ICSD, 46A/1123 -- cgit v1.2.3 From 8faa935f8f4da0e53990d787654c0412250e402f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 20:58:21 +0000 Subject: Added intermediate step of copying MISC/alloca.c -> alloca.c Included CPPFLAGS when compiling alloca.c --- Makefile.in | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Makefile.in b/Makefile.in index e9ace7b..709b246 100644 --- a/Makefile.in +++ b/Makefile.in @@ -130,9 +130,12 @@ sym.o: sym.c flexdef.h tblcmp.o: tblcmp.c flexdef.h yylex.o: yylex.c flexdef.h -alloca.o: $(srcdir)/MISC/alloca.c - $(CC) $(CFLAGS) -o alloca.o -c -Dxmalloc=yy_flex_xmalloc \ - $(srcdir)/MISC/alloca.c +alloca.o: alloca.c + $(CC) $(CPPFLAGS) $(CFLAGS) -c -Dxmalloc=yy_flex_xmalloc alloca.c + +alloca.c: $(srcdir)/MISC/alloca.c + @rm -f alloca.c + cp $(srcdir)/MISC/alloca.c . test: check check: flex @@ -202,7 +205,7 @@ mostlyclean: rm -f *~ *.bak core errs scan.tmp clean: mostlyclean - rm -f parse.c parse.h *.o *.lint lex.yy.c $(FLEXLIB) + rm -f parse.c parse.h *.o alloca.c *.lint lex.yy.c $(FLEXLIB) distclean: clean rm -f .bootstrap flex scan.c tags TAGS Makefile config.status -- cgit v1.2.3 From 94af445f5cc83fa3a58592cee8b09729f65bd80c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 20:59:34 +0000 Subject: described configuration files in manifest --- README | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README b/README index a8c98af..35bb72d 100644 --- a/README +++ b/README @@ -40,9 +40,11 @@ The flex distribution consists of the following files: COPYING flex's copyright - configure.in, Makefile.in, flexdef.h, parse.y, scan.l, ccl.c, - dfa.c, ecs.c, gen.c, main.c, misc.c, nfa.c, sym.c, tblcmp.c, - yylex.c + configure.in, configure, Makefile.in, install.sh, mkinstalldirs + elements of the "autoconf" auto-configuration process + + flexdef.h, parse.y, scan.l, ccl.c, dfa.c, ecs.c, gen.c, main.c, + misc.c, nfa.c, sym.c, tblcmp.c, yylex.c source files version.h version of this flex release -- cgit v1.2.3 From 6617dafb141fb18075921cf07c5f7797d01ce95f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 30 Nov 1993 23:37:00 +0000 Subject: -a -> -Ca --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 008c104..f09cfeb 100644 --- a/NEWS +++ b/NEWS @@ -60,7 +60,7 @@ Changes between release 2.4.1 (30Nov93) and release 2.3.8: "%array" cannot be used with the '-+' C++ scanner class option. - - The new '-a' option directs flex to trade off memory for + - The new '-Ca' option directs flex to trade off memory for natural alignment when generating a scanner's tables. In particular, table entries that would otherwise be "short" become "long". -- cgit v1.2.3 From 5d4573c08371fb9b07c34d82aa8e1938b270539f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 1 Dec 1993 07:52:28 +0000 Subject: Release 2.4.2 --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 3f55ad1..1b912fc 100644 --- a/version.h +++ b/version.h @@ -1 +1 @@ -#define FLEX_VERSION "2.4.1" +#define FLEX_VERSION "2.4.2" -- cgit v1.2.3 From 3d51bec23cf54ee6b19647c94d541fa3cacbeff9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 1 Dec 1993 07:56:36 +0000 Subject: Produce both compress'd and gzip'd distribution tar files --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 709b246..50589b6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -49,7 +49,6 @@ RANLIB = @RANLIB@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ -COMPRESS = compress # You normally do not need to modify anything below this point. # ------------------------------------------------------------ @@ -225,7 +224,8 @@ dist2: @chmod 444 $(DIST_NAME)/initscan.c @chmod +w $(DIST_NAME)/Makefile.in tar chf $(DIST_NAME).tar $(DIST_NAME) - $(COMPRESS) $(DIST_NAME).tar + compress <$(DIST_NAME).tar >$(DIST_NAME).tar.Z + gzip <$(DIST_NAME).tar >$(DIST_NAME).tar.gz Makefile: Makefile.in config.status $(SHELL) config.status -- cgit v1.2.3 From e7b840ae95df6bb6d7a42d18f4515ca2171cba84 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 1 Dec 1993 07:57:48 +0000 Subject: 2.4.2 --- NEWS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS b/NEWS index f09cfeb..963ac66 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,11 @@ +Changes between release 2.4.2 (01Dec93) and release 2.4.1: + + - Fixed bug in libfl.a referring to non-existent "flexfatal" function. + + - Modified to produce both compress'd and gzip'd tar files for + distributions (you probably don't care about this change!). + + Changes between release 2.4.1 (30Nov93) and release 2.3.8: - The new '-+' flag instructs flex to generate a C++ scanner class -- cgit v1.2.3 From 4bd8a388fdc7b71939aab373db14dd53d7342df3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 1 Dec 1993 07:59:12 +0000 Subject: ... and remove plain tar file after compression --- Makefile.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.in b/Makefile.in index 50589b6..b07114d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -226,6 +226,7 @@ dist2: tar chf $(DIST_NAME).tar $(DIST_NAME) compress <$(DIST_NAME).tar >$(DIST_NAME).tar.Z gzip <$(DIST_NAME).tar >$(DIST_NAME).tar.gz + @rm $(DIST_NAME).tar Makefile: Makefile.in config.status $(SHELL) config.status -- cgit v1.2.3 From e11471bb404a63a99c275ea7252a8a5a7f8adc44 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 1 Dec 1993 11:58:05 +0000 Subject: Fixed bug in yy_fatal_error() --- flex.skl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flex.skl b/flex.skl index 9cf21d6..3f862b0 100644 --- a/flex.skl +++ b/flex.skl @@ -1000,7 +1000,7 @@ char msg[]; #endif { %- - (void) putc( '\n', stderr ); + fprintf( stderr, "%s\n", msg ); %+ cerr << msg << '\n'; %* -- cgit v1.2.3 From 555e2640d7eefc00f02d372ccf60d66534620527 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 1 Dec 1993 11:58:18 +0000 Subject: yy_flex_xmalloc() moved to misc.c --- flexdef.h | 3 +++ misc.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/flexdef.h b/flexdef.h index eb7af1a..8164b5f 100644 --- a/flexdef.h +++ b/flexdef.h @@ -767,6 +767,9 @@ extern void skelout PROTO((void)); /* Output a yy_trans_info structure. */ extern void transition_struct_out PROTO((int, int)); +/* Only needed when using certain broken versions of bison to build parse.c. */ +extern void *yy_flex_xmalloc PROTO(( int )); + /* Set a region of memory to 0. */ extern void zero_out PROTO((char *, int)); diff --git a/misc.c b/misc.c index 9db1afd..8e0efa7 100644 --- a/misc.c +++ b/misc.c @@ -740,6 +740,21 @@ int element_v, element_n; } +/* The following is only needed when building flex's parser using certain + * broken versions of bison. + */ +void *yy_flex_xmalloc( size ) +int size; + { + void *result = yy_flex_alloc( size ); + + if ( ! result ) + flexfatal( "memory allocation failed in yy_flex_xmalloc()" ); + + return result; + } + + /* zero_out - set a region of memory to 0 * * Sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero. -- cgit v1.2.3 From 05262755f074b2ae76aba024bcbdf0690aa5b837 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 1 Dec 1993 11:58:37 +0000 Subject: 2.4.3 --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 1b912fc..6b8488f 100644 --- a/version.h +++ b/version.h @@ -1 +1 @@ -#define FLEX_VERSION "2.4.2" +#define FLEX_VERSION "2.4.3" -- cgit v1.2.3 From 894a71a5ff92683be4fd9a66996ad9d8e0615e6a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Dec 1993 21:52:00 +0000 Subject: Use yy_strXXX() routines instead of --- flex.skl | 1 + flexdef.h | 5 +++-- gen.c | 4 ++-- main.c | 9 +++------ misc.c | 4 ++-- scan.l | 16 ++++++++-------- sym.c | 4 ++-- 7 files changed, 21 insertions(+), 22 deletions(-) diff --git a/flex.skl b/flex.skl index 3f862b0..9894dc5 100644 --- a/flex.skl +++ b/flex.skl @@ -101,6 +101,7 @@ extern "C" { extern void *yy_flex_realloc YY_PROTO(( void *ptr, int size )); extern void yy_flex_free YY_PROTO(( void * )); extern int yywrap YY_PROTO(( void )); + extern void yy_strcpy YY_PROTO(( char *s1, const char *s2 )); #ifdef __cplusplus } #endif diff --git a/flexdef.h b/flexdef.h index 8164b5f..b0c818f 100644 --- a/flexdef.h +++ b/flexdef.h @@ -29,7 +29,6 @@ /* @(#) $Header$ (LBL) */ #include -#include #include /* Always be prepared to generate an 8-bit scanner. */ @@ -608,7 +607,9 @@ void *reallocate_array PROTO((void*, int, int)); void *yy_flex_alloc PROTO((int)); void *yy_flex_realloc PROTO((void*, int)); void yy_flex_free PROTO((void*)); -void *yy_flex_xmalloc PROTO((int)); +int yy_strcmp PROTO(( const char *s1, const char *s2 )); +void yy_strcpy PROTO(( char *s1, const char *s2 )); +int yy_strlen PROTO(( const char *s )); #define allocate_integer_array(size) \ (int *) allocate_array( size, sizeof( int ) ) diff --git a/gen.c b/gen.c index 6b766d2..58ff297 100644 --- a/gen.c +++ b/gen.c @@ -594,7 +594,7 @@ int worry_about_NULs; } else - (void) strcpy( char_map, + yy_strcpy( char_map, useecs ? "yy_ec[(unsigned int) *yy_cp]" : "*yy_cp" ); if ( worry_about_NULs && nultrans ) @@ -1052,7 +1052,7 @@ void make_tables() indent_puts( "YY_FATAL_ERROR( \"token too large, exceeds YYLMAX\" ); \\" ); indent_down(); - indent_puts( "strcpy( yytext, yytext_ptr ); \\" ); + indent_puts( "yy_strcpy( yytext, yytext_ptr ); \\" ); } set_indent( 0 ); diff --git a/main.c b/main.c index ee0c9ae..73981d6 100644 --- a/main.c +++ b/main.c @@ -315,7 +315,7 @@ int exit_status; if ( skelname ) fprintf( stderr, " -S%s", skelname ); - if ( strcmp( prefix, "yy" ) ) + if ( yy_strcmp( prefix, "yy" ) ) fprintf( stderr, " -P%s", prefix ); putc( '\n', stderr ); @@ -449,7 +449,7 @@ char **argv; program_name = argv[0]; if ( program_name[0] != '\0' && - program_name[strlen( program_name ) - 1] == '+' ) + program_name[yy_strlen( program_name ) - 1] == '+' ) C_plus_plus = true; /* read flags */ @@ -731,7 +731,7 @@ char **argv; if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) lerrsf( "can't open skeleton file %s", skelname ); - if ( strcmp( prefix, "yy" ) ) + if ( yy_strcmp( prefix, "yy" ) ) { #define GEN_PREFIX(name) printf( "#define yy%s %s%s\n", name, prefix, name ); GEN_PREFIX( "FlexLexer" ); @@ -854,10 +854,7 @@ void readin() else { if ( yytext_is_array ) - { - puts( "\n#include \n" ); puts( "extern char yytext[];\n" ); - } else { diff --git a/misc.c b/misc.c index 8e0efa7..48cf756 100644 --- a/misc.c +++ b/misc.c @@ -41,7 +41,7 @@ int otoi PROTO((Char [])); void add_action( new_text ) char *new_text; { - int len = strlen( new_text ); + int len = yy_strlen( new_text ); while ( len + action_index >= action_size - 10 /* slop */ ) { @@ -50,7 +50,7 @@ char *new_text; reallocate_character_array( action_array, action_size ); } - strcpy( &action_array[action_index], new_text ); + yy_strcpy( &action_array[action_index], new_text ); action_index += len; } diff --git a/scan.l b/scan.l index a555624..cff9d64 100644 --- a/scan.l +++ b/scan.l @@ -43,11 +43,11 @@ return CHAR; #define RETURNNAME \ - (void) strcpy( nmstr, yytext ); \ + yy_strcpy( nmstr, yytext ); \ return NAME; #define PUT_BACK_STRING(str, start) \ - for ( i = strlen( str ) - 1; i >= start; --i ) \ + for ( i = yy_strlen( str ) - 1; i >= start; --i ) \ unput((str)[i]) #define CHECK_REJECT(str) \ @@ -139,7 +139,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); ^{NAME} { - (void) strcpy( nmstr, yytext ); + yy_strcpy( nmstr, yytext ); didadef = false; BEGIN(PICKUPDEF); } @@ -171,10 +171,10 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {WS} /* separates name and definition */ {NOT_WS}.* { - (void) strcpy( (char *) nmdef, yytext ); + yy_strcpy( (char *) nmdef, yytext ); /* Skip trailing whitespace. */ - for ( i = strlen( (char *) nmdef ) - 1; + for ( i = yy_strlen( (char *) nmdef ) - 1; i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); --i ) ; @@ -296,7 +296,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) "["{FIRST_CCL_CHAR}{CCL_CHAR}* { int cclval; - (void) strcpy( nmstr, yytext ); + yy_strcpy( nmstr, yytext ); /* Check to see if we've already encountered this * ccl. @@ -331,7 +331,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) register Char *nmdefptr; Char *ndlookup(); - (void) strcpy( nmstr, yytext + 1 ); + yy_strcpy( nmstr, yytext + 1 ); nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ if ( ! (nmdefptr = ndlookup( nmstr )) ) @@ -340,7 +340,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) else { /* push back name surrounded by ()'s */ - int len = strlen( (char *) nmdefptr ); + int len = yy_strlen( (char *) nmdefptr ); if ( lex_compat || nmdefptr[0] == '^' || (len > 0 && nmdefptr[len - 1] == '$') ) diff --git a/sym.c b/sym.c index 8e4788c..28d8f65 100644 --- a/sym.c +++ b/sym.c @@ -62,7 +62,7 @@ int table_size; while ( sym_entry ) { - if ( ! strcmp( sym, sym_entry->name ) ) + if ( ! yy_strcmp( sym, sym_entry->name ) ) { /* entry already exists */ return -1; } @@ -141,7 +141,7 @@ int table_size; while ( sym_entry ) { - if ( ! strcmp( sym, sym_entry->name ) ) + if ( ! yy_strcmp( sym, sym_entry->name ) ) return sym_entry; sym_entry = sym_entry->next; } -- cgit v1.2.3 From f8b94222c30c31d1c22a9a2548dc830c57f00d27 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Dec 1993 21:52:36 +0000 Subject: Added libstring.c Modified "lint" target to use -Dconst= Added a.out, lex.yy.cc to sundry clean targets --- Makefile.in | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile.in b/Makefile.in index b07114d..8ce90b3 100644 --- a/Makefile.in +++ b/Makefile.in @@ -65,11 +65,11 @@ SOURCES = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.y \ OBJECTS = ccl.o dfa.o ecs.o gen.o main.o misc.o nfa.o parse.o \ scan.o skel.o sym.o tblcmp.o yylex.o @ALLOCA@ -LIBSRCS = liballoc.c libmain.c libyywrap.c -LIBOBJS = liballoc.o libmain.o libyywrap.o +LIBSRCS = liballoc.c libmain.c libstring.c libyywrap.c +LIBOBJS = liballoc.o libmain.o libstring.o libyywrap.o LINTSRCS = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.c \ - scan.c skel.c sym.c tblcmp.c yylex.c liballoc.c + scan.c skel.c sym.c tblcmp.c yylex.c liballoc.c libstring.c DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ @@ -195,16 +195,16 @@ TAGS: $(SOURCES) etags $(SOURCES) lint: $(LINTSRCS) - lint $(LINTSRCS) > flex.lint + lint -Dconst= $(LINTSRCS) > flex.lint gcc-lint: $(LINTSRCS) gcc -Dlint -Wall $(LINTSRCS) >flex.gcc-lint 2>&1 mostlyclean: - rm -f *~ *.bak core errs scan.tmp + rm -f *~ a.out *.bak core errs scan.tmp clean: mostlyclean - rm -f parse.c parse.h *.o alloca.c *.lint lex.yy.c $(FLEXLIB) + rm -f parse.c parse.h *.o alloca.c *.lint lex.yy.c lex.yy.cc $(FLEXLIB) distclean: clean rm -f .bootstrap flex scan.c tags TAGS Makefile config.status -- cgit v1.2.3 From 31ecd9e00b66f892ba10e8871ce2f3f10ca3cd52 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Dec 1993 21:58:30 +0000 Subject: Updated message regarding missing libfl.a routines Added thanks to Noah Friedman --- flex.1 | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/flex.1 b/flex.1 index 54b6a60..8698ff8 100644 --- a/flex.1 +++ b/flex.1 @@ -2855,10 +2855,9 @@ scanner complaining about the following missing routines: .ds yywrap yy_flex_alloc - yy_flex_realloc - yy_flex_free + ... .de -then you forgot to link your program with +(and various others) then you forgot to link your program with .B \-lfl. This run-time library is .I required @@ -3001,7 +3000,8 @@ Casey Leedom, Nelson H.F. Beebe, benson@odi.com, Peter A. Bigot, Keith Bostic, Frederic Brehm, Nick Christopher, Jason Coughlin, Bill Cox, Dave Curtis, Scott David Daniels, Chris G. Demetriou, Mike Donahue, Chuck Doucette, Tom Epperly, Leo -Eskin, Chris Faylor, Jon Forrest, Eric Goldman, Ulrich Grepel, Jan Hajic, +Eskin, Chris Faylor, Jon Forrest, Kaveh R. Ghazi, +Eric Goldman, Ulrich Grepel, Jan Hajic, Jarkko Hietaniemi, Eric Hughes, Ceriel Jacobs, Jeffrey R. Jones, Henry Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris @@ -3014,7 +3014,8 @@ Yap, Nathan Zelle, David Zuhn, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. .PP -Thanks to Keith Bostic, John Gilmore, Craig Leres, Bob Mulcahy, G.T. +Thanks to Keith Bostic, Noah Friedman, +John Gilmore, Craig Leres, Bob Mulcahy, G.T. Nicol, Francois Pinard, Rich Salz, and Richard Stallman for help with various distribution headaches. .PP -- cgit v1.2.3 From 664dced14ef54fc627f33450e99ca31bebe16d5a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Dec 1993 21:59:03 +0000 Subject: 2.4.3 --- NEWS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/NEWS b/NEWS index 963ac66..391e10f 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,14 @@ +Changes between release 2.4.3 (03Dec93) and release 2.4.2: + + - Fixed bug causing fatal scanner messages to fail to print. + + - libfl.a now supplies versions of the the / + string routines needed by flex and the scanners it generates, + to enhance portability. + + - More robust solution to 2.4.2's flexfatal() bug fix. + + Changes between release 2.4.2 (01Dec93) and release 2.4.1: - Fixed bug in libfl.a referring to non-existent "flexfatal" function. -- cgit v1.2.3 From 24c6b401caa5192debd89b06a8fbe4416a37e504 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 2 Dec 1993 22:18:28 +0000 Subject: Elaborated comments for 2.4.3 --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 391e10f..11e1b8a 100644 --- a/NEWS +++ b/NEWS @@ -4,7 +4,7 @@ Changes between release 2.4.3 (03Dec93) and release 2.4.2: - libfl.a now supplies versions of the the / string routines needed by flex and the scanners it generates, - to enhance portability. + to enhance portability to some BSD systems. - More robust solution to 2.4.2's flexfatal() bug fix. -- cgit v1.2.3 From 4c0a9ba02f78d50c0fb3d6c8b47bb7a42d38414f Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 12:47:05 +0000 Subject: -F incompatible with -+ --- flex.1 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flex.1 b/flex.1 index 8698ff8..1871316 100644 --- a/flex.1 +++ b/flex.1 @@ -1715,7 +1715,8 @@ to detect the keywords, you're better off using .IP This option is equivalent to .B \-CFr -(see below). +(see below). It cannot be used with +.B \-+. .TP .B \-I instructs @@ -1905,7 +1906,8 @@ specifies that the alternate fast scanner representation (described above under the .B \-F flag) -should be used. +should be used. This option cannot be used with +.B \-+. .IP .B \-Cm directs -- cgit v1.2.3 From c316bc196572022de225ba1cbcffec8f0f992fb5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 12:49:45 +0000 Subject: mods so FlexLexer.h can be included separately --- FlexLexer.h | 22 ++++++++++++++-------- flex.skl | 4 +++- main.c | 13 ++++++++----- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/FlexLexer.h b/FlexLexer.h index 571a6b7..4427def 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -35,6 +35,10 @@ // If you want to create multiple lexer classes, you use the -P flag // to rename each yyFlexLexer to some other xxFlexLexer. +#include + +struct yy_buffer_state; +typedef int yy_state_type; class FlexLexer { public: @@ -43,9 +47,11 @@ public: const char* YYText() { return yytext; } int YYLeng() { return yyleng; } - virtual void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) = 0; - virtual YY_BUFFER_STATE yy_create_buffer( istream* s, int size ) = 0; - virtual void yy_delete_buffer( YY_BUFFER_STATE b ) = 0; + virtual void + yy_switch_to_buffer( struct yy_buffer_state* new_buffer ) = 0; + virtual struct yy_buffer_state* + yy_create_buffer( istream* s, int size ) = 0; + virtual void yy_delete_buffer( struct yy_buffer_state* b ) = 0; virtual void yyrestart( istream* s ) = 0; virtual int yylex() = 0; @@ -88,9 +94,9 @@ public: delete yy_state_buf; } - void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ); - YY_BUFFER_STATE yy_create_buffer( istream* s, int size ); - void yy_delete_buffer( YY_BUFFER_STATE b ); + void yy_switch_to_buffer( struct yy_buffer_state* new_buffer ); + struct yy_buffer_state* yy_create_buffer( istream* s, int size ); + void yy_delete_buffer( struct yy_buffer_state* b ); void yyrestart( istream* s ); virtual int yylex(); @@ -103,7 +109,7 @@ protected: int yyinput(); void yy_load_buffer_state(); - void yy_init_buffer( YY_BUFFER_STATE b, istream* s ); + void yy_init_buffer( struct yy_buffer_state* b, istream* s ); yy_state_type yy_get_previous_state(); yy_state_type yy_try_NUL_trans( yy_state_type current_state ); @@ -112,7 +118,7 @@ protected: istream* yyin; // input source for default LexerInput ostream* yyout; // output sink for default LexerOutput - YY_BUFFER_STATE yy_current_buffer; + struct yy_buffer_state* yy_current_buffer; // yy_hold_char holds the character lost when yytext is formed. char yy_hold_char; diff --git a/flex.skl b/flex.skl index 9894dc5..2dd5df8 100644 --- a/flex.skl +++ b/flex.skl @@ -6,7 +6,9 @@ #define FLEX_SCANNER +%- #include +%* /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ @@ -21,7 +23,7 @@ #include %+ -#include +class istream; %* #include diff --git a/main.c b/main.c index 73981d6..8e9d108 100644 --- a/main.c +++ b/main.c @@ -677,13 +677,16 @@ char **argv; } if ( (fulltbl || fullspd) && usemecs ) - flexerror( "-f/-F and -Cm don't make sense together" ); + flexerror( "-Cf/-CF and -Cm don't make sense together" ); if ( (fulltbl || fullspd) && interactive ) - flexerror( "-f/-F and -I are incompatible" ); + flexerror( "-Cf/-CF and -I are incompatible" ); if ( fulltbl && fullspd ) - flexerror( "-f and -F are mutually exclusive" ); + flexerror( "-Cf and -CF are mutually exclusive" ); + + if ( C_plus_plus && fullspd ) + flexerror( "Can't use -+ with -CF option" ); if ( ! use_stdout ) { @@ -830,7 +833,7 @@ void readin() if ( fullspd ) printf( "typedef const struct yy_trans_info *yy_state_type;\n" ); - else + else if ( ! C_plus_plus ) printf( "typedef int yy_state_type;\n" ); if ( reject ) @@ -845,7 +848,7 @@ void readin() printf( "extern int yylineno;\n" ); printf( "int yylineno = 1;\n" ); } - else + else if ( ! C_plus_plus ) printf( "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;\n" ); if ( C_plus_plus ) -- cgit v1.2.3 From a65a41564c17ad24aded1c25dbc44607e10392eb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 12:50:09 +0000 Subject: FlexLexer.h fixed for separate inclusion --- NEWS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS b/NEWS index 11e1b8a..294aaa6 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ Changes between release 2.4.3 (03Dec93) and release 2.4.2: - Fixed bug causing fatal scanner messages to fail to print. + - Fixed things so FlexLexer.h can be included in other C++ + sources. One side-effect of this change is that -+ and -CF + are now incompatible. + - libfl.a now supplies versions of the the / string routines needed by flex and the scanners it generates, to enhance portability to some BSD systems. -- cgit v1.2.3 From 8a10749eadcf14862b94a44d5622f36983f505e6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 21:35:07 +0000 Subject: lint tweak --- flex.skl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flex.skl b/flex.skl index 2dd5df8..85c5c0d 100644 --- a/flex.skl +++ b/flex.skl @@ -1003,7 +1003,7 @@ char msg[]; #endif { %- - fprintf( stderr, "%s\n", msg ); + (void) fprintf( stderr, "%s\n", msg ); %+ cerr << msg << '\n'; %* -- cgit v1.2.3 From 6733bdf3e49166ca144157a88300473a9f5cb129 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 21:35:21 +0000 Subject: credit update --- flex.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flex.1 b/flex.1 index 1871316..6b77de3 100644 --- a/flex.1 +++ b/flex.1 @@ -3009,7 +3009,7 @@ Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter Pelissero, Gaumond -Pierre, Esmond Pitt, Jef Poskanzer, Kevin Rodgers, Jim +Pierre, Esmond Pitt, Jef Poskanzer, Joe Rahmeh, Kevin Rodgers, Jim Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave Tallman, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken Yap, Nathan Zelle, David Zuhn, and those whose names have slipped my marginal -- cgit v1.2.3 From 642caa6a88d3d74dea92468ae3bf6fc2e7eab242 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 21:35:38 +0000 Subject: Added exec_prefix --- Makefile.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8ce90b3..d5c8e53 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,8 +29,9 @@ LIBS = @LIBS@ # nroff preformatted versions will be installed if INSTALLMAN=cat. prefix = /usr/local -bindir = $(prefix)/bin -libdir = $(prefix)/lib +exec_prefix = $(prefix) +bindir = $(exec_prefix)/bin +libdir = $(exec_prefix)/lib includedir = $(prefix)/include manext = 1 mandir = $(prefix)/man/man$(manext) -- cgit v1.2.3 From c3a8ff54f61e8ead3e076a8773faad3e52130bc9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 21:59:50 +0000 Subject: lint tweaks --- flexdef.h | 2 +- gen.c | 2 +- misc.c | 3 +-- sym.c | 12 +++++++----- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/flexdef.h b/flexdef.h index b0c818f..bea7aa5 100644 --- a/flexdef.h +++ b/flexdef.h @@ -548,7 +548,7 @@ extern char **scname; extern int current_max_dfa_size, current_max_xpairs; extern int current_max_template_xpairs, current_max_dfas; -extern int lastdfa, lasttemp, *nxt, *chk, *tnxt; +extern int lastdfa, *nxt, *chk, *tnxt; extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; extern union dfaacc_union { diff --git a/gen.c b/gen.c index 58ff297..34c605e 100644 --- a/gen.c +++ b/gen.c @@ -1255,7 +1255,7 @@ void make_tables() printf( "\tif ( yy_current_buffer->is_interactive ) \\\n" ); printf( - "\t\tresult = (buf[0] = getc( yyin )) == EOF ? 0 : 1; \\\n" ); +"\t\tresult = ((int) (buf[0] = getc( yyin ))) == EOF ? 0 : 1; \\\n" ); printf( "\telse if ( ((result = fread( (char *) buf, 1, max_size, yyin )) == 0)\\\n" ); printf( "\t\t && ferror( yyin ) ) \\\n" ); diff --git a/misc.c b/misc.c index 48cf756..bcc75f3 100644 --- a/misc.c +++ b/misc.c @@ -501,7 +501,6 @@ Char myesc( array ) Char array[]; { Char c, esc_char; - register int sptr; switch ( array[1] ) { @@ -530,7 +529,7 @@ Char array[]; case '8': case '9': { /* \ */ - sptr = 1; + int sptr = 1; while ( isascii( array[sptr] ) && isdigit( array[sptr] ) ) diff --git a/sym.c b/sym.c index 28d8f65..29c0f30 100644 --- a/sym.c +++ b/sym.c @@ -134,7 +134,8 @@ int table_size; { static struct hash_entry empty_entry = { - (struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0, + (struct hash_entry *) 0, (struct hash_entry *) 0, + (char *) 0, (char *) 0, 0, } ; register struct hash_entry *sym_entry = table[hashfunct( sym, table_size )]; @@ -174,14 +175,15 @@ int hash_size; /* ndinstal - install a name definition */ -void ndinstal( nd, def ) -char nd[]; -Char def[]; +void ndinstal( name, definition ) +char name[]; +Char definition[]; { char *copy_string(); Char *copy_unsigned_string(); - if ( addsym( copy_string( nd ), (char *) copy_unsigned_string( def ), 0, + if ( addsym( copy_string( name ), + (char *) copy_unsigned_string( definition ), 0, ndtbl, NAME_TABLE_HASH_SIZE ) ) synerr( "name defined twice" ); } -- cgit v1.2.3 From 019956a55b14e31685e18b08b8149c94f9e731d1 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 22:02:03 +0000 Subject: Hopefully last update prior to 2.4.3 --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index 294aaa6..00ac0bb 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ Changes between release 2.4.3 (03Dec93) and release 2.4.2: - More robust solution to 2.4.2's flexfatal() bug fix. + - Some lint tweaks. + Changes between release 2.4.2 (01Dec93) and release 2.4.1: -- cgit v1.2.3 From a50e336829b65f471a01422f95c10e75fdfe4082 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 22:18:32 +0000 Subject: run ranlib on libfl.a --- Makefile.in | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.in b/Makefile.in index d5c8e53..1aaed6f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -169,6 +169,7 @@ install: flex $(FLEXLIB) installdirs install.$(INSTALLMAN) @rm -f $(bindir)/flex++ $(LN_S) $(bindir)/flex $(bindir)/flex++ $(INSTALL_DATA) $(FLEXLIB) $(libdir)/libfl.a + -(cd $(libdir); $(RANLIB) $(libdir)/libfl.a) $(INSTALL_DATA) $(srcdir)/FlexLexer.h $(includedir)/FlexLexer.h install.man: flex.1 flexdoc.1 -- cgit v1.2.3 From 893fc54c6a845f50ece114d2260f4427469b72f0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 22:19:23 +0000 Subject: Minor tweak to last change --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index 1aaed6f..2eb78ba 100644 --- a/Makefile.in +++ b/Makefile.in @@ -169,7 +169,7 @@ install: flex $(FLEXLIB) installdirs install.$(INSTALLMAN) @rm -f $(bindir)/flex++ $(LN_S) $(bindir)/flex $(bindir)/flex++ $(INSTALL_DATA) $(FLEXLIB) $(libdir)/libfl.a - -(cd $(libdir); $(RANLIB) $(libdir)/libfl.a) + -(cd $(libdir); $(RANLIB) libfl.a) $(INSTALL_DATA) $(srcdir)/FlexLexer.h $(includedir)/FlexLexer.h install.man: flex.1 flexdoc.1 -- cgit v1.2.3 From 6d624efd9544b890a55228ad489accd498f20710 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 22:20:15 +0000 Subject: ranlib addition for 2.4.3 --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index 00ac0bb..dee8689 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ Changes between release 2.4.3 (03Dec93) and release 2.4.2: - More robust solution to 2.4.2's flexfatal() bug fix. + - Added ranlib of installed libfl.a. + - Some lint tweaks. -- cgit v1.2.3 From f4de35b76ac7e067ea227c093903cd2e1bb31d45 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 22:20:45 +0000 Subject: updated credits --- flex.1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flex.1 b/flex.1 index 6b77de3..4756550 100644 --- a/flex.1 +++ b/flex.1 @@ -3004,7 +3004,8 @@ Brehm, Nick Christopher, Jason Coughlin, Bill Cox, Dave Curtis, Scott David Daniels, Chris G. Demetriou, Mike Donahue, Chuck Doucette, Tom Epperly, Leo Eskin, Chris Faylor, Jon Forrest, Kaveh R. Ghazi, Eric Goldman, Ulrich Grepel, Jan Hajic, -Jarkko Hietaniemi, Eric Hughes, Ceriel Jacobs, Jeffrey R. Jones, Henry +Jarkko Hietaniemi, Eric Hughes, John Interrante, +Ceriel Jacobs, Jeffrey R. Jones, Henry Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, -- cgit v1.2.3 From f04862cc1f038e69b6a374eee3d5f8094d60926c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 3 Dec 1993 22:45:17 +0000 Subject: Added note regarding g++ 2.5.X --- NEWS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NEWS b/NEWS index dee8689..946bd11 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,14 @@ Changes between release 2.4.3 (03Dec93) and release 2.4.2: - Some lint tweaks. + - NOTE: problems have been encountered attempting to build flex + C++ scanners using g++ version 2.5.X. The problem is due to an + unfortunate heuristic in g++ 2.5.X that attempts to discern between + C and C++ headers. Because FlexLexer.h is installed (by default) + in /usr/local/include and not /usr/local/lib/g++-include, g++ 2.5.X + decides that it's a C header :-(. So if you have problems, install + the header in /usr/local/lib/g++-include instead. + Changes between release 2.4.2 (01Dec93) and release 2.4.1: -- cgit v1.2.3 From b0c3575e2b004975e7bff9689774d692006ef598 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 4 Dec 1993 22:02:54 +0000 Subject: Fixed bug in pointing yyin at a new file and resuming scanning --- flex.skl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/flex.skl b/flex.skl index 85c5c0d..b30016d 100644 --- a/flex.skl +++ b/flex.skl @@ -402,6 +402,20 @@ do_action: /* This label is used only to access EOF actions. */ /* Undo the effects of YY_DO_BEFORE_ACTION. */ *yy_cp = yy_hold_char; + if ( yy_current_buffer->yy_input_file != yyin ) + { + /* This can happen if we scan a file, yywrap() returns + * 1, and then later the user points yyin at a new + * file to resume scanning. We have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input file. + */ + yy_current_buffer->yy_input_file = yyin; + yy_n_chars = yy_current_buffer->yy_n_chars; + } + /* Note that here we test for yy_c_buf_p "<=" to the position * of the first EOB in the buffer, since yy_c_buf_p will * already have been incremented past the NUL character -- cgit v1.2.3 From 2a2c13ba5f7faaebf10da777e01d11af1ec1f9c9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 5 Dec 1993 17:07:59 +0000 Subject: YY_USER_ACTION generated now for each case in action switch --- flex.skl | 14 +++++++------- nfa.c | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/flex.skl b/flex.skl index b30016d..62597cc 100644 --- a/flex.skl +++ b/flex.skl @@ -310,6 +310,13 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); %* #endif +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + /* Code executed at the end of each rule. */ #ifndef YY_BREAK #define YY_BREAK break; @@ -377,13 +384,6 @@ yy_find_action: YY_DO_BEFORE_ACTION; -#ifdef YY_USER_ACTION - if ( yy_act != YY_END_OF_BUFFER ) - { - YY_USER_ACTION; - } -#endif - %% code for yylineno update goes here, if -l option do_action: /* This label is used only to access EOF actions. */ diff --git a/nfa.c b/nfa.c index 9be63e1..2424176 100644 --- a/nfa.c +++ b/nfa.c @@ -262,6 +262,12 @@ int mach, variable_trail_rule, headcnt, trailcnt; } } + /* Okay, in the action code at this point yytext and yyleng have + * their proper final values for this rule, so here's the point + * to do any user action. + */ + add_action( "YY_USER_ACTION\n" ); + line_directive_out( (FILE *) 0 ); } -- cgit v1.2.3 From c766a8c9777a484f0aa6b2a2b8f9e2a332d97cca Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 5 Dec 1993 17:08:35 +0000 Subject: very minor "install" tweaks --- Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.in b/Makefile.in index 2eb78ba..e78576d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -167,9 +167,9 @@ flexdoc.man: flexdoc.1 install: flex $(FLEXLIB) installdirs install.$(INSTALLMAN) $(INSTALL_PROGRAM) flex $(bindir)/flex @rm -f $(bindir)/flex++ - $(LN_S) $(bindir)/flex $(bindir)/flex++ + cd $(bindir); $(LN_S) flex flex++ $(INSTALL_DATA) $(FLEXLIB) $(libdir)/libfl.a - -(cd $(libdir); $(RANLIB) libfl.a) + -cd $(libdir); $(RANLIB) libfl.a $(INSTALL_DATA) $(srcdir)/FlexLexer.h $(includedir)/FlexLexer.h install.man: flex.1 flexdoc.1 -- cgit v1.2.3 From 2478059e12bbfca53d1f86cc9677992d554b67f3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 5 Dec 1993 17:11:39 +0000 Subject: credits update --- flex.1 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flex.1 b/flex.1 index 4756550..ea7896f 100644 --- a/flex.1 +++ b/flex.1 @@ -3011,14 +3011,14 @@ Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter Pelissero, Gaumond Pierre, Esmond Pitt, Jef Poskanzer, Joe Rahmeh, Kevin Rodgers, Jim -Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave Tallman, Paul -Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken +Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave Tallman, Chris Thewalt, +Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken Yap, Nathan Zelle, David Zuhn, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. .PP Thanks to Keith Bostic, Noah Friedman, -John Gilmore, Craig Leres, Bob Mulcahy, G.T. +John Gilmore, Craig Leres, John Levine, Bob Mulcahy, G.T. Nicol, Francois Pinard, Rich Salz, and Richard Stallman for help with various distribution headaches. .PP -- cgit v1.2.3 From e478ea833f649bdb0e1b7a835b7259289b87b963 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 6 Dec 1993 09:43:52 +0000 Subject: 2.4.4 --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 6b8488f..6bf4ca0 100644 --- a/version.h +++ b/version.h @@ -1 +1 @@ -#define FLEX_VERSION "2.4.3" +#define FLEX_VERSION "2.4.4" -- cgit v1.2.3 From cd0e94e833e69b7b52210125b74f2728bdbb2afb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 7 Dec 1993 10:11:41 +0000 Subject: Support for yyFlexLexer::LexerError --- FlexLexer.h | 1 + flex.skl | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/FlexLexer.h b/FlexLexer.h index 4427def..f1616b4 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -104,6 +104,7 @@ public: protected: virtual int LexerInput( char* buf, int max_size ); virtual void LexerOutput( const char* buf, int size ); + virtual void LexerError( const char* msg ); void yyunput( int c, char* buf_ptr ); int yyinput(); diff --git a/flex.skl b/flex.skl index 62597cc..336fdec 100644 --- a/flex.skl +++ b/flex.skl @@ -296,7 +296,11 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); /* Report a fatal error. */ #ifndef YY_FATAL_ERROR +%- #define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +%+ +#define YY_FATAL_ERROR(msg) LexerError( msg ) +%* #endif /* Default declaration of generated scanner - a define so the user can @@ -514,13 +518,6 @@ do_action: /* This label is used only to access EOF actions. */ } default: -#ifdef FLEX_DEBUG -%- - printf( "action # %d\n", yy_act ); -%+ - cout << "action # " << yy_act << '\n'; -%* -#endif YY_FATAL_ERROR( "fatal flex scanner internal error--no action found" ); } /* end of action switch */ @@ -1009,6 +1006,7 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) } +%- #ifdef YY_USE_PROTOS static void yy_fatal_error( const char msg[] ) #else @@ -1016,13 +1014,18 @@ static void yy_fatal_error( msg ) char msg[]; #endif { -%- (void) fprintf( stderr, "%s\n", msg ); + exit( 1 ); + } + %+ + +void yyFlexLexer::LexerError( const char msg[] ) + { cerr << msg << '\n'; -%* exit( 1 ); } +%* /* Redefine yyless() so it works in section 3 code. */ -- cgit v1.2.3 From 08f37c757a8c9afbbe514527b6ba0676db5edee7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 7 Dec 1993 10:18:09 +0000 Subject: {min,max,abs} -> {MIN,MAX,ABS} --- dfa.c | 4 ++-- ecs.c | 2 +- flexdef.h | 12 ++++++------ gen.c | 8 ++++---- main.c | 2 +- nfa.c | 4 ++-- tblcmp.c | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/dfa.c b/dfa.c index 0293e3c..5d4c45f 100644 --- a/dfa.c +++ b/dfa.c @@ -204,7 +204,7 @@ int state[]; for ( i = 0; i < csize; ++i ) { - ec = abs( ecgroup[i] ); + ec = ABS( ecgroup[i] ); out_char_set[i] = state[ec]; } @@ -978,7 +978,7 @@ int ds[], dsize, transsym, nset[]; { /* do nothing */ } - else if ( abs( ecgroup[sym] ) == transsym ) + else if ( ABS( ecgroup[sym] ) == transsym ) nset[++numstates] = tsp; bottom: ; diff --git a/ecs.c b/ecs.c index cc4e955..4c3b950 100644 --- a/ecs.c +++ b/ecs.c @@ -80,7 +80,7 @@ int fwd[], bck[], num; numcl = 0; - /* Create equivalence class numbers. From now on, abs( bck(x) ) + /* Create equivalence class numbers. From now on, ABS( bck(x) ) * is the equivalence class number for object x. If bck(x) * is positive, then x is the representative of its equivalence * class. diff --git a/flexdef.h b/flexdef.h index bea7aa5..74f075b 100644 --- a/flexdef.h +++ b/flexdef.h @@ -61,14 +61,14 @@ /* Maximum line length we'll have to deal with. */ #define MAXLINE 2048 -#ifndef min -#define min(x,y) ((x) < (y) ? (x) : (y)) +#ifndef MIN +#define MIN(x,y) ((x) < (y) ? (x) : (y)) #endif -#ifndef max -#define max(x,y) ((x) > (y) ? (x) : (y)) +#ifndef MAX +#define MAX(x,y) ((x) > (y) ? (x) : (y)) #endif -#ifndef abs -#define abs(x) ((x) < 0 ? -(x) : (x)) +#ifndef ABS +#define ABS(x) ((x) < 0 ? -(x) : (x)) #endif diff --git a/gen.c b/gen.c index 34c605e..c12849c 100644 --- a/gen.c +++ b/gen.c @@ -240,7 +240,7 @@ void genecs() if ( caseins && (i >= 'A') && (i <= 'Z') ) ecgroup[i] = ecgroup[clower( i )]; - ecgroup[i] = abs( ecgroup[i] ); + ecgroup[i] = ABS( ecgroup[i] ); mkdata( ecgroup[i] ); } @@ -786,7 +786,7 @@ void gentabs() dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; printf( long_align ? C_long_decl : C_short_decl, - "yy_acclist", max( numas, 1 ) + 1 ); + "yy_acclist", MAX( numas, 1 ) + 1 ); j = 1; /* index into "yy_acclist" array */ @@ -910,9 +910,9 @@ void gentabs() { if ( trace ) fprintf( stderr, "%d = %d\n", - i, abs( tecbck[i] ) ); + i, ABS( tecbck[i] ) ); - mkdata( abs( tecbck[i] ) ); + mkdata( ABS( tecbck[i] ) ); } dataend(); diff --git a/main.c b/main.c index 8e9d108..dd6d8cc 100644 --- a/main.c +++ b/main.c @@ -873,7 +873,7 @@ void readin() /* Now map the equivalence class for NUL to its expected place. */ ecgroup[0] = ecgroup[csize]; - NUL_ec = abs( ecgroup[0] ); + NUL_ec = ABS( ecgroup[0] ); if ( useecs ) ccl2ecl(); diff --git a/nfa.c b/nfa.c index 2424176..44e893d 100644 --- a/nfa.c +++ b/nfa.c @@ -301,8 +301,8 @@ int first, last; { mkxtion( finalst[first], last ); finalst[first] = finalst[last]; - lastst[first] = max( lastst[first], lastst[last] ); - firstst[first] = min( firstst[first], firstst[last] ); + lastst[first] = MAX( lastst[first], lastst[last] ); + firstst[first] = MIN( firstst[first], firstst[last] ); return first; } diff --git a/tblcmp.c b/tblcmp.c index 654f95c..e8dd615 100644 --- a/tblcmp.c +++ b/tblcmp.c @@ -610,7 +610,7 @@ int numchars, statenum, deflink, totaltrans; /* Ensure that the base address we eventually generate is * non-negative. */ - baseaddr = max( tblend + 1, minec ); + baseaddr = MAX( tblend + 1, minec ); } tblbase = baseaddr - minec; @@ -635,7 +635,7 @@ int numchars, statenum, deflink, totaltrans; for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) ; - tblend = max( tblend, tbllast ); + tblend = MAX( tblend, tbllast ); } -- cgit v1.2.3 From 0892f3f0b30ea489a905e0d0513c0317cf1be528 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 7 Dec 1993 11:10:49 +0000 Subject: Fixed nasty 8-bit bugs --- flex.skl | 7 +++++++ gen.c | 35 ++++++++++++++++++----------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/flex.skl b/flex.skl index 336fdec..244a930 100644 --- a/flex.skl +++ b/flex.skl @@ -65,6 +65,13 @@ class istream; /* Returned upon end-of-file. */ #define YY_NULL 0 +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + /* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less * definition of BEGIN. diff --git a/gen.c b/gen.c index c12849c..e25160d 100644 --- a/gen.c +++ b/gen.c @@ -48,6 +48,7 @@ static int indent_level = 0; /* each level is 8 spaces */ /* *Everything* is done in terms of arrays starting at 1, so provide * a null entry for the zero element of all C arrays. */ +static char C_int_decl[] = "static const int %s[%d] =\n { 0,\n"; static char C_short_decl[] = "static const short int %s[%d] =\n { 0,\n"; static char C_long_decl[] = "static const long int %s[%d] =\n { 0,\n"; static char C_state_decl[] = @@ -225,15 +226,10 @@ void genctbl() void genecs() { Char clower(); - static char C_char_decl[] = "static const %s %s[%d] =\n { 0,\n"; - /* } so vi doesn't get confused */ register int i, j; int numrows; - if ( numecs < csize ) - printf( C_char_decl, "YY_CHAR", "yy_ec", csize ); - else - printf( C_char_decl, "short", "yy_ec", csize ); + printf( C_int_decl, "yy_ec", csize ); for ( i = 1; i < csize; ++i ) { @@ -477,14 +473,18 @@ void gen_next_match() /* NOTE - changes in here should be reflected in gen_next_state() and * gen_NUL_trans(). */ - char *char_map = useecs ? "yy_ec[(unsigned int) *yy_cp]" : "*yy_cp"; - char *char_map_2 = - useecs ? "yy_ec[(unsigned int) *++yy_cp]" : "*++yy_cp"; + char *char_map = useecs ? + "yy_ec[YY_SC_TO_UI(*yy_cp)]" : + "YY_SC_TO_UI(*yy_cp)"; + + char *char_map_2 = useecs ? + "yy_ec[YY_SC_TO_UI(*++yy_cp)]" : + "YY_SC_TO_UI(*++yy_cp)"; if ( fulltbl ) { indent_put2s( -"while ( (yy_current_state = yy_nxt[yy_current_state][(unsigned int)%s]) > 0 )", + "while ( (yy_current_state = yy_nxt[yy_current_state][%s]) > 0 )", char_map ); indent_up(); @@ -586,16 +586,17 @@ int worry_about_NULs; { if ( useecs ) (void) sprintf( char_map, - "(*yy_cp ? yy_ec[(unsigned int) *yy_cp] : %d)", + "(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : %d)", NUL_ec ); else (void) sprintf( char_map, - "(*yy_cp ? *yy_cp : %d)", NUL_ec ); + "(*yy_cp ? YY_SC_TO_UI(*yy_cp) : %d)", NUL_ec ); } else - yy_strcpy( char_map, - useecs ? "yy_ec[(unsigned int) *yy_cp]" : "*yy_cp" ); + yy_strcpy( char_map, useecs ? + "yy_ec[YY_SC_TO_UI(*yy_cp)]" : + "YY_SC_TO_UI(*yy_cp)" ); if ( worry_about_NULs && nultrans ) { @@ -610,12 +611,12 @@ int worry_about_NULs; if ( fulltbl ) indent_put2s( - "yy_current_state = yy_nxt[yy_current_state][(unsigned int) %s];", + "yy_current_state = yy_nxt[yy_current_state][%s];", char_map ); else if ( fullspd ) indent_put2s( - "yy_current_state += yy_current_state[(unsigned int) %s].yy_nxt;", + "yy_current_state += yy_current_state[%s].yy_nxt;", char_map ); else @@ -904,7 +905,7 @@ void gentabs() if ( trace ) fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); - printf( C_char_decl, "yy_meta", numecs + 1 ); + printf( C_int_decl, "yy_meta", numecs + 1 ); for ( i = 1; i <= numecs; ++i ) { -- cgit v1.2.3 From f11b7e2d605379289e54554c5752d2970d9f95c2 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 7 Dec 1993 11:11:04 +0000 Subject: Fixed 8-bit bug --- scan.l | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scan.l b/scan.l index cff9d64..7392714 100644 --- a/scan.l +++ b/scan.l @@ -39,7 +39,7 @@ int flexscan() #define RETURNCHAR \ - yylval = yytext[0]; \ + yylval = (unsigned char) yytext[0]; \ return CHAR; #define RETURNNAME \ @@ -360,11 +360,11 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } -[/|*+?.()] return (int) yytext[0]; +[/|*+?.()] return (unsigned char) yytext[0]; . RETURNCHAR; -[,*] return (int) yytext[0]; +[,*] return (unsigned char) yytext[0]; ">" BEGIN(SECT2); return '>'; ">"/^ BEGIN(CARETISBOL); return '>'; {SCNAME} RETURNNAME; -- cgit v1.2.3 From e8991ae5877b55e561fffe332ec95814a43abb91 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 7 Dec 1993 11:15:17 +0000 Subject: LexError(), C++ experiment warning, credits --- flex.1 | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/flex.1 b/flex.1 index ea7896f..6bd57d2 100644 --- a/flex.1 +++ b/flex.1 @@ -2459,7 +2459,7 @@ tokens, until a rule's action returns a value. In addition, .B yyFlexLexer defines the following protected virtual functions which you can redefine -in derived classes to tailor the scanner's input and output: +in derived classes to tailor the scanner: .TP .B virtual int LexerInput( char* buf, int max_size ) @@ -2489,6 +2489,13 @@ characters from the buffer .B buf, which, while NUL-terminated, may also contain "internal" NUL's if the scanner's rules can match text with NUL's in them. +.TP +.B +virtual void LexerError( const char* msg ) +reports a fatal error message. The default version of this function +writes the message to the stream +.B cerr +and exits. .PP Note that a .B yyFlexLexer @@ -2568,6 +2575,9 @@ Here is an example of a simple C++ scanner: return 0; } .fi +IMPORTANT: the present form of the scanning class is +.I experimental +and may change considerably between major releases. .SH INCOMPATIBILITIES WITH LEX AND POSIX .I flex is a rewrite of the AT&T Unix @@ -3010,14 +3020,17 @@ Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter Pelissero, Gaumond -Pierre, Esmond Pitt, Jef Poskanzer, Joe Rahmeh, Kevin Rodgers, Jim -Roskind, Doug Schmidt, Alex Siegel, Paul Stuart, Dave Tallman, Chris Thewalt, +Pierre, Esmond Pitt, Jef Poskanzer, Joe Rahmeh, Frederic Raimbault, +Rick Richardson, +Kevin Rodgers, Jim Roskind, +Doug Schmidt, Philippe Schnoebelen, +Alex Siegel, Paul Stuart, Dave Tallman, Chris Thewalt, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken Yap, Nathan Zelle, David Zuhn, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the same. .PP -Thanks to Keith Bostic, Noah Friedman, +Thanks to Keith Bostic, Jon Forrest, Noah Friedman, John Gilmore, Craig Leres, John Levine, Bob Mulcahy, G.T. Nicol, Francois Pinard, Rich Salz, and Richard Stallman for help with various distribution headaches. -- cgit v1.2.3 From f703524d81b556e8cd7a85b06c02db6b1aba99e4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 7 Dec 1993 11:16:08 +0000 Subject: 2.4.4 --- NEWS | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 946bd11..843c1e8 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,24 @@ +Changes between release 2.4.4 (07Dec93) and release 2.4.3: + + - Fixed two serious bugs in scanning 8-bit characters. + + - Fixed bug in YY_USER_ACTION that caused it to be executed + inappropriately (on the scanner's on internal actions, and + with incorrect yytext/yyleng values). + + - Fixed bug in pointing yyin at a new file and resuming scanning. + + - Portability fix regarding min/max/abs macros conflicting with + function definitions in standard header files. + + - Added a virtual LexerError() method to the C++ yyFlexLexer class + for reporting error messages instead of always using cerr. + + - Added warning in flexdoc that the C++ scanning class is presently + experimental and subject to considerable change between major + releases. + + Changes between release 2.4.3 (03Dec93) and release 2.4.2: - Fixed bug causing fatal scanner messages to fail to print. @@ -6,9 +27,9 @@ Changes between release 2.4.3 (03Dec93) and release 2.4.2: sources. One side-effect of this change is that -+ and -CF are now incompatible. - - libfl.a now supplies versions of the the / - string routines needed by flex and the scanners it generates, - to enhance portability to some BSD systems. + - libfl.a now supplies private versions of the the / + string routines needed by flex and the scanners + it generates, to enhance portability to some BSD systems. - More robust solution to 2.4.2's flexfatal() bug fix. -- cgit v1.2.3 From 3bed5da886728f20b5cff1ec2043d54895ea3150 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Tue, 7 Dec 1993 13:08:35 +0000 Subject: pretester update --- README | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README b/README index 35bb72d..339e9bb 100644 --- a/README +++ b/README @@ -16,8 +16,8 @@ BSD Unix, and not under the GNU General Public License (GPL), except for the "configure" script, which is covered by the GPL. Many thanks to the 2.4 pre-testers for finding a bunch of bugs and helping -increase/test portability: Francois Pinard, Nathan Zelle, Gavin Nicol, and -Matthew Jacob. +increase/test portability: Francois Pinard, Nathan Zelle, Gavin Nicol, +Chris Thewalt, and Matthew Jacob. Please send bug reports and feedback to: -- cgit v1.2.3 From 5e755f3efe7ca80c5ffb9336933b97ea004f7ab9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 9 Dec 1993 13:57:12 +0000 Subject: 2.4.5 --- version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.h b/version.h index 6bf4ca0..1ff0ddd 100644 --- a/version.h +++ b/version.h @@ -1 +1 @@ -#define FLEX_VERSION "2.4.4" +#define FLEX_VERSION "2.4.5" -- cgit v1.2.3 From d865c176c6e5bf6768dbc4577595cdfe20d0baab Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 9 Dec 1993 13:57:23 +0000 Subject: Fixed bug in treating '$' as variable trailing context --- parse.y | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/parse.y b/parse.y index beb6e11..b3f517f 100644 --- a/parse.y +++ b/parse.y @@ -364,6 +364,13 @@ rule : re2 re | re '$' { + headcnt = 0; + trailcnt = 1; + rulelen = 1; + varlength = false; + + current_state_type = STATE_TRAILING_CONTEXT; + if ( trlcontxt ) { synerr( "trailing context used twice" ); @@ -375,16 +382,13 @@ rule : re2 re /* See the comment in the rule for "re2 re" * above. */ - if ( ! varlength || headcnt != 0 ) - warn( + warn( "trailing context made variable due to preceding '|' action" ); - /* Mark as variable. */ varlength = true; - headcnt = 0; } - if ( lex_compat || (varlength && headcnt == 0) ) + if ( lex_compat || varlength ) { /* Again, see the comment in the rule for * "re2 re" above. @@ -394,15 +398,6 @@ rule : re2 re variable_trail_rule = true; } - else - { - if ( ! varlength ) - headcnt = rulelen; - - ++rulelen; - trailcnt = 1; - } - trlcontxt = true; eps = mkstate( SYM_EPSILON ); -- cgit v1.2.3 From e89ab843296311d34122769645394f04feb7f36c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 9 Dec 1993 13:58:09 +0000 Subject: Fixed bug in interactive reads where char is unsigned --- gen.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gen.c b/gen.c index e25160d..11dfb50 100644 --- a/gen.c +++ b/gen.c @@ -1255,10 +1255,13 @@ void make_tables() { printf( "\tif ( yy_current_buffer->is_interactive ) \\\n" ); + printf( "\t\t{ \\\n" ); + printf( "\t\tint c = getc( yyin ); \\\n" ); + printf( "\t\tresult = c == EOF ? 0 : 1; \\\n" ); + printf( "\t\tbuf[0] = (char) c; \\\n" ); + printf( "\t\t} \\\n" ); printf( -"\t\tresult = ((int) (buf[0] = getc( yyin ))) == EOF ? 0 : 1; \\\n" ); - printf( -"\telse if ( ((result = fread( (char *) buf, 1, max_size, yyin )) == 0)\\\n" ); + "\telse if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \\\n" ); printf( "\t\t && ferror( yyin ) ) \\\n" ); printf( "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); -- cgit v1.2.3 From efb1cf6aa4c2cfa507fd90df763092598a761f24 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 9 Dec 1993 13:58:40 +0000 Subject: Detect REJECT etc. before generating YY_USES_REJECT! --- main.c | 116 ++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/main.c b/main.c index dd6d8cc..ea19c19 100644 --- a/main.c +++ b/main.c @@ -119,64 +119,6 @@ char **argv; readin(); - if ( syntaxerror ) - flexend( 1 ); - - if ( yymore_really_used == REALLY_USED ) - yymore_used = true; - else if ( yymore_really_used == REALLY_NOT_USED ) - yymore_used = false; - - if ( reject_really_used == REALLY_USED ) - reject = true; - else if ( reject_really_used == REALLY_NOT_USED ) - reject = false; - - if ( performance_report > 0 ) - { - if ( lex_compat ) - { - fprintf( stderr, -"-l AT&T lex compatibility option entails a large performance penalty\n" ); - fprintf( stderr, -" and may be the actual source of other reported performance penalties\n" ); - } - - if ( performance_report > 1 ) - { - if ( interactive ) - fprintf( stderr, - "-I (interactive) entails a minor performance penalty\n" ); - - if ( yymore_used ) - fprintf( stderr, - "yymore() entails a minor performance penalty\n" ); - } - - if ( reject ) - fprintf( stderr, - "REJECT entails a large performance penalty\n" ); - - if ( variable_trailing_context_rules ) - fprintf( stderr, -"Variable trailing context rules entail a large performance penalty\n" ); - } - - if ( reject ) - real_reject = true; - - if ( variable_trailing_context_rules ) - reject = true; - - if ( (fulltbl || fullspd) && reject ) - { - if ( real_reject ) - flexerror( "REJECT cannot be used with -f or -F" ); - else - flexerror( - "variable trailing context rules cannot be used with -f or -F" ); - } - ntod(); for ( i = 1; i <= num_rules; ++i ) @@ -817,6 +759,64 @@ void readin() flexend( 1 ); } + if ( syntaxerror ) + flexend( 1 ); + + if ( yymore_really_used == REALLY_USED ) + yymore_used = true; + else if ( yymore_really_used == REALLY_NOT_USED ) + yymore_used = false; + + if ( reject_really_used == REALLY_USED ) + reject = true; + else if ( reject_really_used == REALLY_NOT_USED ) + reject = false; + + if ( performance_report > 0 ) + { + if ( lex_compat ) + { + fprintf( stderr, +"-l AT&T lex compatibility option entails a large performance penalty\n" ); + fprintf( stderr, +" and may be the actual source of other reported performance penalties\n" ); + } + + if ( performance_report > 1 ) + { + if ( interactive ) + fprintf( stderr, + "-I (interactive) entails a minor performance penalty\n" ); + + if ( yymore_used ) + fprintf( stderr, + "yymore() entails a minor performance penalty\n" ); + } + + if ( reject ) + fprintf( stderr, + "REJECT entails a large performance penalty\n" ); + + if ( variable_trailing_context_rules ) + fprintf( stderr, +"Variable trailing context rules entail a large performance penalty\n" ); + } + + if ( reject ) + real_reject = true; + + if ( variable_trailing_context_rules ) + reject = true; + + if ( (fulltbl || fullspd) && reject ) + { + if ( real_reject ) + flexerror( "REJECT cannot be used with -f or -F" ); + else + flexerror( + "variable trailing context rules cannot be used with -f or -F" ); + } + if ( csize == 256 ) puts( "typedef unsigned char YY_CHAR;" ); else -- cgit v1.2.3 From 9731f329b92c3287a248c8495a5eb40597bdad1c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:27:02 +0000 Subject: Added extern "C++" wrapper --- FlexLexer.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/FlexLexer.h b/FlexLexer.h index f1616b4..1d5a66d 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -35,6 +35,8 @@ // If you want to create multiple lexer classes, you use the -P flag // to rename each yyFlexLexer to some other xxFlexLexer. +extern "C++" { + #include struct yy_buffer_state; @@ -156,4 +158,7 @@ protected: int yy_more_flag; int yy_more_len; }; + +} + #endif -- cgit v1.2.3 From c1b386a44bd02a55003b29978906a1a02a559e21 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:29:39 +0000 Subject: #include "FlexLexer.h" -> --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index ea19c19..d82c1d6 100644 --- a/main.c +++ b/main.c @@ -852,7 +852,7 @@ void readin() printf( "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;\n" ); if ( C_plus_plus ) - printf( "\n#include \"FlexLexer.h\"\n" ); + printf( "\n#include \n" ); else { -- cgit v1.2.3 From 03a0b119fc36c17af4808fa738221d87eea896d8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:30:20 +0000 Subject: Initialize dfaacc[0] for -CF representation Fixed minor memory leak --- dfa.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dfa.c b/dfa.c index 5d4c45f..11bc44f 100644 --- a/dfa.c +++ b/dfa.c @@ -449,7 +449,7 @@ void ntod() * ecgroup[NUL]), (2) NUL's equivalence class is the last * equivalence class, and (3) the number of equivalence classes is * the same as the number of characters. This latter case comes - * about when useecs is false or when its true but every character + * about when useecs is false or when it's true but every character * still manages to land in its own class (unlikely, but it's * cheap to check for). If all these things are true then the * character code needed to represent NUL's equivalence class for @@ -508,7 +508,9 @@ void ntod() { for ( i = 0; i <= numecs; ++i ) state[i] = 0; + place_state( state, 0, 0 ); + dfaacc[i].dfaacc_state = 0; } else if ( fulltbl ) @@ -763,6 +765,9 @@ void ntod() mkdeftbl(); } + + yy_flex_free( (void *) accset ); + yy_flex_free( (void *) nset ); } -- cgit v1.2.3 From 485e9ca7937976ff80f4c2fe7303f9f844497c65 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:33:35 +0000 Subject: Updated comment regarding 0-based vs. 1-based arrays for -CF. --- gen.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gen.c b/gen.c index 11dfb50..71f6601 100644 --- a/gen.c +++ b/gen.c @@ -45,8 +45,10 @@ static int indent_level = 0; /* each level is 8 spaces */ #define indent_down() (--indent_level) #define set_indent(indent_val) indent_level = indent_val -/* *Everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all C arrays. +/* Almost everything is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays. (The exception + * to this is that the fast table representation generally uses the + * 0 elements of its arrays, too.) */ static char C_int_decl[] = "static const int %s[%d] =\n { 0,\n"; static char C_short_decl[] = "static const short int %s[%d] =\n { 0,\n"; @@ -169,16 +171,17 @@ void genctbl() /* So that "make test" won't show arb. differences. */ nxt[tblend + 2] = 0; - /* Make sure every state has a end-of-buffer transition and an + /* Make sure every state has an end-of-buffer transition and an * action #. */ for ( i = 0; i <= lastdfa; ++i ) { - register int anum = dfaacc[i].dfaacc_state; + int anum = dfaacc[i].dfaacc_state; + int offset = base[i]; - chk[base[i]] = EOB_POSITION; - chk[base[i] - 1] = ACTION_POSITION; - nxt[base[i] - 1] = anum; /* action number */ + chk[offset] = EOB_POSITION; + chk[offset - 1] = ACTION_POSITION; + nxt[offset - 1] = anum; /* action number */ } for ( i = 0; i <= tblend; ++i ) -- cgit v1.2.3 From b97c14b2dc19ad7629626e00ad940f7117e8512e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:33:54 +0000 Subject: Fixed typo in "uninstall" target --- Makefile.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.in b/Makefile.in index e78576d..16ee547 100644 --- a/Makefile.in +++ b/Makefile.in @@ -186,7 +186,7 @@ installdirs: uninstall: rm -f $(bindir)/flex $(bindir)/flex++ - rm -f $libdir)/libfl.a + rm -f $(libdir)/libfl.a rm -f $(includedir)/FlexLexer.h rm -f $(mandir)/flex.$(manext) $(mandir)/flexdoc.$(manext) -- cgit v1.2.3 From d8757336a081e8a86acf6ba59e56c7623f28f508 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:34:17 +0000 Subject: Updated credits --- flex.1 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flex.1 b/flex.1 index 6bd57d2..d27da80 100644 --- a/flex.1 +++ b/flex.1 @@ -3017,14 +3017,15 @@ Eric Goldman, Ulrich Grepel, Jan Hajic, Jarkko Hietaniemi, Eric Hughes, John Interrante, Ceriel Jacobs, Jeffrey R. Jones, Henry Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald -Lamprecht, Greg Lee, Craig Leres, John Levine, Mohamed el Lozy, Chris +Lamprecht, Greg Lee, Craig Leres, John Levine, Steve Liddle, +Mohamed el Lozy, Brian Madsen, Chris Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter Pelissero, Gaumond Pierre, Esmond Pitt, Jef Poskanzer, Joe Rahmeh, Frederic Raimbault, Rick Richardson, Kevin Rodgers, Jim Roskind, -Doug Schmidt, Philippe Schnoebelen, -Alex Siegel, Paul Stuart, Dave Tallman, Chris Thewalt, +Doug Schmidt, Philippe Schnoebelen, Andreas Schwab, +Alex Siegel, Mike Stump, Paul Stuart, Dave Tallman, Chris Thewalt, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken Yap, Nathan Zelle, David Zuhn, and those whose names have slipped my marginal mail-archiving skills but whose contributions are appreciated all the -- cgit v1.2.3 From dd1c92afa2e6fc13fbb6d9b4f9172f607d44c0d7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:56:16 +0000 Subject: is_interactive -> yy_is_interactive --- gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen.c b/gen.c index 71f6601..af696ee 100644 --- a/gen.c +++ b/gen.c @@ -1257,7 +1257,7 @@ void make_tables() else { printf( - "\tif ( yy_current_buffer->is_interactive ) \\\n" ); + "\tif ( yy_current_buffer->yy_is_interactive ) \\\n" ); printf( "\t\t{ \\\n" ); printf( "\t\tint c = getc( yyin ); \\\n" ); printf( "\t\tresult = c == EOF ? 0 : 1; \\\n" ); -- cgit v1.2.3 From ffeaa226cbad2ee5c88f635ddcb73459041400f5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:56:41 +0000 Subject: Added yy_fill_buffer --- flex.skl | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/flex.skl b/flex.skl index 244a930..f97e506 100644 --- a/flex.skl +++ b/flex.skl @@ -174,7 +174,12 @@ struct yy_buffer_state * instead of fread(), to make sure we stop fetching input after * each newline. */ - int is_interactive; + int yy_is_interactive; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; /* Whether we've seen an EOF on this buffer. */ int yy_eof_status; @@ -591,6 +596,25 @@ int yyFlexLexer::yy_get_next_buffer() YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a singled characater, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + /* Try to read more data. */ /* First move last chars to start of buffer. */ @@ -1004,11 +1028,13 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) b->yy_buf_pos = &b->yy_ch_buf[1]; %- - b->is_interactive = file ? isatty( fileno(file) ) : 0; + b->yy_is_interactive = file ? isatty( fileno(file) ) : 0; %+ - b->is_interactive = 0; + b->yy_is_interactive = 0; %* + b->yy_fill_buffer = 1; + b->yy_eof_status = EOF_NOT_SEEN; } -- cgit v1.2.3 From 189a314c1164562e4308988b84ef9db5e1c9e9ab Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 10:58:52 +0000 Subject: 2.4.5 --- NEWS | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 843c1e8..64a09dd 100644 --- a/NEWS +++ b/NEWS @@ -1,9 +1,34 @@ +Changes between release 2.4.5 (11Dec93) and release 2.4.4: + + - Fixed bug breaking C++ scanners that use REJECT or variable + trailing context. + + - Fixed serious input problem for interactive scanners on + systems for which char is unsigned. + + - Fixed bug in incorrectly treating '$' operator as variable + trailing context. + + - Fixed bug in -CF table representation that could lead to + corrupt tables. + + - Fixed fairly benign memory leak. + + - Added `extern "C++"' wrapper to FlexLexer.h header. + + - Changed #include of FlexLexer.h to use <> instead of "". + + - Added feature to control whether the scanner attempts to + refill the input buffer once it's exhausted. This feature + will be documented in the 2.5 release. + + Changes between release 2.4.4 (07Dec93) and release 2.4.3: - Fixed two serious bugs in scanning 8-bit characters. - Fixed bug in YY_USER_ACTION that caused it to be executed - inappropriately (on the scanner's on internal actions, and + inappropriately (on the scanner's own internal actions, and with incorrect yytext/yyleng values). - Fixed bug in pointing yyin at a new file and resuming scanning. -- cgit v1.2.3 From f107a169c2d21c58787b0291d19a2c660576f911 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 11:00:08 +0000 Subject: Expanded on extern "C++" news item --- NEWS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 64a09dd..bb7a818 100644 --- a/NEWS +++ b/NEWS @@ -14,7 +14,9 @@ Changes between release 2.4.5 (11Dec93) and release 2.4.4: - Fixed fairly benign memory leak. - - Added `extern "C++"' wrapper to FlexLexer.h header. + - Added `extern "C++"' wrapper to FlexLexer.h header. This + should overcome the g++ 2.5.X problems mentioned in the + NEWS for release 2.4.3. - Changed #include of FlexLexer.h to use <> instead of "". -- cgit v1.2.3 From b914ba5ae920b9da40e88eddd46aa863be059eff Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 11:09:51 +0000 Subject: lint tweak --- gen.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/gen.c b/gen.c index af696ee..fe847eb 100644 --- a/gen.c +++ b/gen.c @@ -757,12 +757,6 @@ void gentabs() int i, j, k, *accset, nacc, *acc_array, total_states; int end_of_buffer_action = num_rules + 1; - /* *Everything* is done in terms of arrays starting at 1, so provide - * a null entry for the zero element of all C arrays. - */ - static char C_char_decl[] = - "static const YY_CHAR %s[%d] =\n { 0,\n"; /* } for vi */ - acc_array = allocate_integer_array( current_max_dfas ); nummt = 0; -- cgit v1.2.3 From 9e6aac8df85a6e0e0bba6d269756847f1d720eb8 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 14:40:35 +0000 Subject: -o option --- Makefile.in | 2 +- dfa.c | 2 +- flexdef.h | 23 +++++++- gen.c | 189 +++++++++++++++++++++++++++++++----------------------------- main.c | 91 ++++++++++++++++++----------- misc.c | 162 ++++++++++++++++++++++++++++++++++++++++++--------- nfa.c | 2 +- parse.y | 2 +- scan.l | 4 +- sym.c | 2 +- 10 files changed, 316 insertions(+), 163 deletions(-) diff --git a/Makefile.in b/Makefile.in index 16ee547..c1a133d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -82,7 +82,7 @@ DIST_NAME = flex # which "flex" to use to generate scan.c from scan.l FLEX = ./flex -FLEX_FLAGS = -ist $(PERF_REPORT) +FLEX_FLAGS = -ist -oscan.c $(PERF_REPORT) COMPRESSION = PERF_REPORT = -p diff --git a/dfa.c b/dfa.c index 11bc44f..d427ca0 100644 --- a/dfa.c +++ b/dfa.c @@ -531,7 +531,7 @@ void ntod() /* Unless -Ca, declare it "short" because it's a real * long-shot that that won't be large enough. */ - printf( "static const %s yy_nxt[][%d] =\n {\n", + out_str_dec( "static const %s yy_nxt[][%d] =\n {\n", /* '}' so vi doesn't get too confused */ long_align ? "long" : "short", num_full_table_rows ); diff --git a/flexdef.h b/flexdef.h index 74f075b..bfed07e 100644 --- a/flexdef.h +++ b/flexdef.h @@ -353,12 +353,14 @@ extern int yymore_really_used, reject_really_used; * dataline - number of contiguous lines of data in current data * statement. Used to generate readable -f output * linenum - current input line number + * out_linenum - current output line number * skelfile - the skeleton file * skel - compiled-in skeleton array * skel_ind - index into "skel" array, if skelfile is nil * yyin - input file * backing_up_file - file to summarize backing-up states to * infilename - name of input file + * outfilename - name of output file * input_files - array holding names of input files * num_input_files - size of input_files array * program_name - name with which program was invoked @@ -373,11 +375,11 @@ extern int yymore_really_used, reject_really_used; * to "action_array" */ -extern int datapos, dataline, linenum; +extern int datapos, dataline, linenum, out_linenum; extern FILE *skelfile, *yyin, *backing_up_file; extern char *skel[]; extern int skel_ind; -extern char *infilename; +extern char *infilename, *outfilename; extern char **input_files; extern int num_input_files; extern char *program_name; @@ -739,7 +741,7 @@ extern void lerrif PROTO((char[], int)); extern void lerrsf PROTO((char[], char[])); /* Spit out a "# line" statement. */ -extern void line_directive_out PROTO((FILE*)); +extern void line_directive_out PROTO((FILE*, int)); /* Mark the current position in the action array as the end of the section 1 * user defs. @@ -757,6 +759,21 @@ extern void mkdata PROTO((int)); /* generate a data statement */ /* Return the integer represented by a string of digits. */ extern int myctoi PROTO((char [])); +/* Convert an octal digit string to an integer value. */ +extern int otoi PROTO((Char [] )); + +/* Output a (possibly-formatted) string to the generated scanner. */ +extern void out PROTO((char [])); +extern void out_dec PROTO((char [], int)); +extern void out_dec2 PROTO((char [], int, int)); +extern void out_hex PROTO((char [], unsigned int)); +extern void out_line_count PROTO((char []));; +extern void out_str PROTO((char [], char [])); +extern void out_str3 PROTO((char [], char [], char [], char [])); +extern void out_str_dec PROTO((char [], char [], int)); +extern void outc PROTO((int)); +extern void outn PROTO((char [])); + /* Return a printable version of the given character, which might be * 8-bit. */ diff --git a/gen.c b/gen.c index fe847eb..846e76f 100644 --- a/gen.c +++ b/gen.c @@ -65,13 +65,13 @@ void do_indent() while ( i >= 8 ) { - putchar( '\t' ); + outc( '\t' ); i -= 8; } while ( i > 0 ) { - putchar( ' ' ); + outc( ' ' ); --i; } } @@ -121,7 +121,7 @@ void gen_bu_action() indent_puts( "yy_current_state = yy_last_accepting_state;" ); indent_puts( "goto yy_find_action;" ); - putchar( '\n' ); + outc( '\n' ); set_indent( 0 ); } @@ -135,9 +135,9 @@ void genctbl() int end_of_buffer_action = num_rules + 1; /* Table of verify for transition and offset to next state. */ - printf( "static const struct yy_trans_info yy_transition[%d] =\n", + out_dec( "static const struct yy_trans_info yy_transition[%d] =\n", tblend + numecs + 1 ); - printf( " {\n" ); + outn( " {" ); /* We want the transition to be represented as the offset to the * next state, not the actual state number, which is what it currently @@ -205,17 +205,16 @@ void genctbl() transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); - printf( " };\n" ); - printf( "\n" ); + outn( " };\n" ); /* Table of pointers to start states. */ - printf( + out_dec( "static const struct yy_trans_info *yy_start_state_list[%d] =\n", lastsc * 2 + 1 ); - printf( " {\n" ); /* } so vi doesn't get confused */ + outn( " {" ); /* } so vi doesn't get confused */ for ( i = 0; i <= lastsc * 2; ++i ) - printf( " &yy_transition[%d],\n", base[i] ); + out_dec( " &yy_transition[%d],\n", base[i] ); dataend(); @@ -232,7 +231,7 @@ void genecs() register int i, j; int numrows; - printf( C_int_decl, "yy_ec", csize ); + out_str_dec( C_int_decl, "yy_ec", csize ); for ( i = 1; i < csize; ++i ) { @@ -282,7 +281,7 @@ void gen_find_action() indent_puts( "yy_current_state = *--yy_state_ptr;" ); indent_puts( "yy_lp = yy_accept[yy_current_state];" ); - puts( + outn( "find_rule: /* we branch to this label when backing up */" ); indent_puts( @@ -396,7 +395,7 @@ void genftbl() register int i; int end_of_buffer_action = num_rules + 1; - printf( long_align ? C_long_decl : C_short_decl, + out_str_dec( long_align ? C_long_decl : C_short_decl, "yy_accept", lastdfa + 1 ); dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; @@ -454,7 +453,7 @@ char *char_map; do_indent(); /* lastdfa + 2 is the beginning of the templates */ - printf( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); + out_dec( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); indent_up(); indent_puts( "yy_c = yy_meta[(unsigned int) yy_c];" ); @@ -496,7 +495,7 @@ void gen_next_match() { indent_puts( "{" ); /* } for vi */ gen_backing_up(); - putchar( '\n' ); + outc( '\n' ); } indent_puts( "++yy_cp;" ); @@ -507,7 +506,7 @@ void gen_next_match() indent_down(); - putchar( '\n' ); + outc( '\n' ); indent_puts( "yy_current_state = -yy_current_state;" ); } @@ -532,7 +531,7 @@ void gen_next_match() if ( num_backing_up > 0 ) { - putchar( '\n' ); + outc( '\n' ); gen_backing_up(); /* { for vi */ indent_puts( "}" ); } @@ -559,10 +558,10 @@ void gen_next_match() do_indent(); if ( interactive ) - printf( "while ( yy_base[yy_current_state] != %d );\n", + out_dec( "while ( yy_base[yy_current_state] != %d );\n", jambase ); else - printf( "while ( yy_current_state != %d );\n", + out_dec( "while ( yy_current_state != %d );\n", jamstate ); if ( ! reject && ! interactive ) @@ -655,7 +654,7 @@ void gen_NUL_trans() /* We'll need yy_cp lying around for the gen_backing_up(). */ indent_puts( "register char *yy_cp = yy_c_buf_p;" ); - putchar( '\n' ); + outc( '\n' ); if ( nultrans ) { @@ -667,7 +666,7 @@ void gen_NUL_trans() else if ( fulltbl ) { do_indent(); - printf( "yy_current_state = yy_nxt[yy_current_state][%d];\n", + out_dec( "yy_current_state = yy_nxt[yy_current_state][%d];\n", NUL_ec ); indent_puts( "yy_is_jam = (yy_current_state <= 0);" ); } @@ -675,7 +674,7 @@ void gen_NUL_trans() else if ( fullspd ) { do_indent(); - printf( "register int yy_c = %d;\n", NUL_ec ); + out_dec( "register int yy_c = %d;\n", NUL_ec ); indent_puts( "register const struct yy_trans_info *yy_trans_info;\n" ); @@ -699,7 +698,7 @@ void gen_NUL_trans() do_indent(); - printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); + out_dec( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); } /* If we've entered an accepting state, back up; note that @@ -708,7 +707,7 @@ void gen_NUL_trans() */ if ( need_backing_up && (fullspd || fulltbl) ) { - putchar( '\n' ); + outc( '\n' ); indent_puts( "if ( ! yy_is_jam )" ); indent_up(); indent_puts( "{" ); @@ -783,7 +782,7 @@ void gentabs() accsiz[end_of_buffer_state] = 1; dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; - printf( long_align ? C_long_decl : C_short_decl, + out_str_dec( long_align ? C_long_decl : C_short_decl, "yy_acclist", MAX( numas, 1 ) + 1 ); j = 1; /* index into "yy_acclist" array */ @@ -870,7 +869,7 @@ void gentabs() */ ++k; - printf( long_align ? C_long_decl : C_short_decl, "yy_accept", k ); + out_str_dec( long_align ? C_long_decl : C_short_decl, "yy_accept", k ); for ( i = 1; i <= lastdfa; ++i ) { @@ -902,7 +901,7 @@ void gentabs() if ( trace ) fputs( "\n\nMeta-Equivalence Classes:\n", stderr ); - printf( C_int_decl, "yy_meta", numecs + 1 ); + out_str_dec( C_int_decl, "yy_meta", numecs + 1 ); for ( i = 1; i <= numecs; ++i ) { @@ -918,7 +917,7 @@ void gentabs() total_states = lastdfa + numtemps; - printf( (tblend >= MAX_SHORT || long_align) ? + out_str_dec( (tblend >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_base", total_states + 1 ); @@ -953,7 +952,7 @@ void gentabs() dataend(); - printf( (total_states >= MAX_SHORT || long_align) ? + out_str_dec( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_def", total_states + 1 ); @@ -962,7 +961,7 @@ void gentabs() dataend(); - printf( (total_states >= MAX_SHORT || long_align) ? + out_str_dec( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_nxt", tblend + 1 ); @@ -976,7 +975,7 @@ void gentabs() dataend(); - printf( (total_states >= MAX_SHORT || long_align) ? + out_str_dec( (total_states >= MAX_SHORT || long_align) ? C_long_decl : C_short_decl, "yy_chk", tblend + 1 ); @@ -1000,8 +999,8 @@ void indent_put2s( fmt, arg ) char fmt[], arg[]; { do_indent(); - printf( fmt, arg ); - putchar( '\n' ); + out_str( fmt, arg ); + outn( "" ); } @@ -1013,7 +1012,7 @@ void indent_puts( str ) char str[]; { do_indent(); - puts( str ); + outn( str ); } @@ -1058,7 +1057,7 @@ void make_tables() skelout(); - printf( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); + out_dec( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); if ( fullspd ) { @@ -1117,12 +1116,12 @@ void make_tables() if ( nultrans ) { - printf( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); + out_str_dec( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); for ( i = 1; i <= lastdfa; ++i ) { if ( fullspd ) - printf( " &yy_transition[%d],\n", base[i] ); + out_dec( " &yy_transition[%d],\n", base[i] ); else mkdata( nultrans[i] ); } @@ -1135,7 +1134,7 @@ void make_tables() indent_puts( "extern int yy_flex_debug;" ); indent_puts( "int yy_flex_debug = 1;\n" ); - printf( long_align ? C_long_decl : C_short_decl, + out_str_dec( long_align ? C_long_decl : C_short_decl, "yy_rule_linenum", num_rules ); for ( i = 1; i < num_rules; ++i ) mkdata( rule_linenum[i] ); @@ -1147,58 +1146,58 @@ void make_tables() /* Declare state buffer variables. */ if ( ! C_plus_plus ) { - puts( + outn( "static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); - puts( "static char *yy_full_match;" ); - puts( "static int yy_lp;" ); + outn( "static char *yy_full_match;" ); + outn( "static int yy_lp;" ); } if ( variable_trailing_context_rules ) { if ( ! C_plus_plus ) { - puts( + outn( "static int yy_looking_for_trail_begin = 0;" ); - puts( "static int yy_full_lp;" ); - puts( "static int *yy_full_state;" ); + outn( "static int yy_full_lp;" ); + outn( "static int *yy_full_state;" ); } - printf( "#define YY_TRAILING_MASK 0x%x\n", + out_hex( "#define YY_TRAILING_MASK 0x%x\n", (unsigned int) YY_TRAILING_MASK ); - printf( "#define YY_TRAILING_HEAD_MASK 0x%x\n", + out_hex( "#define YY_TRAILING_HEAD_MASK 0x%x\n", (unsigned int) YY_TRAILING_HEAD_MASK ); } - puts( "#define REJECT \\" ); - puts( "{ \\" ); /* } for vi */ - puts( + outn( "#define REJECT \\" ); + outn( "{ \\" ); /* } for vi */ + outn( "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \\" ); - puts( + outn( "yy_cp = yy_full_match; /* restore poss. backed-over text */ \\" ); if ( variable_trailing_context_rules ) { - puts( + outn( "yy_lp = yy_full_lp; /* restore orig. accepting pos. */ \\" ); - puts( + outn( "yy_state_ptr = yy_full_state; /* restore orig. state */ \\" ); - puts( + outn( "yy_current_state = *yy_state_ptr; /* restore curr. state */ \\" ); } - puts( "++yy_lp; \\" ); - puts( "goto find_rule; \\" ); + outn( "++yy_lp; \\" ); + outn( "goto find_rule; \\" ); /* { for vi */ - puts( "}" ); + outn( "}" ); } else { - puts( + outn( "/* The intent behind this definition is that it'll catch" ); - puts( " * any uses of REJECT which flex missed." ); - puts( " */" ); - puts( "#define REJECT reject_used_but_not_detected" ); + outn( " * any uses of REJECT which flex missed." ); + outn( " */" ); + outn( "#define REJECT reject_used_but_not_detected" ); } if ( yymore_used ) @@ -1223,18 +1222,20 @@ void make_tables() { if ( yytext_is_array ) { - puts( "#ifndef YYLMAX" ); - puts( "#define YYLMAX 8192" ); - puts( "#endif\n" ); - puts( "char yytext[YYLMAX];" ); - puts( "char *yytext_ptr;" ); + outn( "#ifndef YYLMAX" ); + outn( "#define YYLMAX 8192" ); + outn( "#endif\n" ); + outn( "char yytext[YYLMAX];" ); + outn( "char *yytext_ptr;" ); } else - puts( "char *yytext;" ); + outn( "char *yytext;" ); } - fputs( &action_array[defs1_offset], stdout ); + out( &action_array[defs1_offset] ); + + line_directive_out( stdout, 0 ); skelout(); @@ -1242,33 +1243,35 @@ void make_tables() { if ( use_read ) { - printf( -"\tif ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \\\n" ); - printf( - "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); + outn( +"\tif ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \\" ); + outn( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );" ); } else { - printf( - "\tif ( yy_current_buffer->yy_is_interactive ) \\\n" ); - printf( "\t\t{ \\\n" ); - printf( "\t\tint c = getc( yyin ); \\\n" ); - printf( "\t\tresult = c == EOF ? 0 : 1; \\\n" ); - printf( "\t\tbuf[0] = (char) c; \\\n" ); - printf( "\t\t} \\\n" ); - printf( - "\telse if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \\\n" ); - printf( "\t\t && ferror( yyin ) ) \\\n" ); - printf( - "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );\n" ); + outn( + "\tif ( yy_current_buffer->yy_is_interactive ) \\" ); + outn( "\t\t{ \\" ); + outn( "\t\tint c = getc( yyin ); \\" ); + outn( "\t\tresult = c == EOF ? 0 : 1; \\" ); + outn( "\t\tbuf[0] = (char) c; \\" ); + outn( "\t\t} \\" ); + outn( + "\telse if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \\" ); + outn( "\t\t && ferror( yyin ) ) \\" ); + outn( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );" ); } } skelout(); /* Copy prolog to output file. */ - fputs( &action_array[prolog_offset], stdout ); + out( &action_array[prolog_offset] ); + + line_directive_out( stdout, 0 ); skelout(); @@ -1291,7 +1294,7 @@ void make_tables() gen_start_state(); /* Note, don't use any indentation. */ - puts( "yy_match:" ); + outn( "yy_match:" ); gen_next_match(); skelout(); @@ -1330,7 +1333,7 @@ void make_tables() indent_down(); do_indent(); - printf( "else if ( yy_act < %d )\n", num_rules ); + out_dec( "else if ( yy_act < %d )\n", num_rules ); indent_up(); indent_puts( "fprintf( stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); @@ -1338,7 +1341,7 @@ void make_tables() indent_down(); do_indent(); - printf( "else if ( yy_act == %d )\n", num_rules ); + out_dec( "else if ( yy_act == %d )\n", num_rules ); indent_up(); indent_puts( "fprintf( stderr, \"--accepting default rule (\\\"%s\\\")\\n\"," ); @@ -1346,14 +1349,14 @@ void make_tables() indent_down(); do_indent(); - printf( "else if ( yy_act == %d )\n", num_rules + 1 ); + out_dec( "else if ( yy_act == %d )\n", num_rules + 1 ); indent_up(); indent_puts( "fprintf( stderr, \"--(end of buffer or a NUL)\\n\" );" ); indent_down(); do_indent(); - printf( "else\n" ); + outn( "else" ); indent_up(); indent_puts( "fprintf( stderr, \"--EOF (start condition %d)\\n\", YY_START );" ); @@ -1367,14 +1370,16 @@ void make_tables() skelout(); indent_up(); gen_bu_action(); - fputs( &action_array[action_offset], stdout ); + out( &action_array[action_offset] ); + + line_directive_out( stdout, 0 ); /* generate cases for any missing EOF rules */ for ( i = 1; i <= lastsc; ++i ) if ( ! sceof[i] ) { do_indent(); - printf( "case YY_STATE_EOF(%s):\n", scname[i] ); + out_str( "case YY_STATE_EOF(%s):\n", scname[i] ); did_eof_rule = true; } @@ -1441,7 +1446,7 @@ void make_tables() /* Copy remainder of input to output. */ - line_directive_out( stdout ); + line_directive_out( stdout, 1 ); if ( sectnum == 3 ) (void) flexscan(); /* copy remainder of input to output */ diff --git a/main.c b/main.c index d82c1d6..a448d54 100644 --- a/main.c +++ b/main.c @@ -55,12 +55,12 @@ int fullspd, gen_line_dirs, performance_report, backing_up_report; int C_plus_plus, long_align, use_read, yytext_is_array, csize; int yymore_used, reject, real_reject, continued_action; int yymore_really_used, reject_really_used; -int datapos, dataline, linenum; +int datapos, dataline, linenum, out_linenum; FILE *skelfile = NULL; int skel_ind = 0; char *action_array; int action_size, defs1_offset, prolog_offset, action_offset, action_index; -char *infilename = NULL; +char *infilename = NULL, *outfilename = NULL; int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; int current_mns, num_rules, num_eof_rules, default_rule; @@ -101,9 +101,9 @@ static char *outfile_template = "lex.%s.%s"; #else static char *outfile_template = "lex%s.%s"; #endif -static char outfile_path[64]; static int outfile_created = 0; +static int did_outfilename = 0; static int use_stdout; static char *skelname = NULL; static char *prefix = "yy"; @@ -174,7 +174,7 @@ int exit_status; else if ( fclose( stdout ) ) flexfatal( "error occurred when closing output file" ); - else if ( unlink( outfile_path ) ) + else if ( unlink( outfilename ) ) flexfatal( "error occurred when deleting output file" ); } @@ -254,6 +254,9 @@ int exit_status; if ( use_read ) putc( 'r', stderr ); + if ( did_outfilename ) + fprintf( stderr, " -o%s", outfilename ); + if ( skelname ) fprintf( stderr, " -S%s", skelname ); @@ -516,6 +519,15 @@ char **argv; */ break; + case 'o': + if ( i != 1 ) + flexerror( + "-o flag must be given separately" ); + + outfilename = arg + i + 1; + did_outfilename = 1; + goto get_next_arg; + case 'P': if ( i != 1 ) flexerror( @@ -579,7 +591,7 @@ char **argv; exit( 1 ); } - /* Used by -C, -S and -P flags in lieu of a "continue 2" + /* Used by -C, -S, -o, and -P flags in lieu of a "continue 2" * control. */ get_next_arg: ; @@ -633,19 +645,27 @@ char **argv; if ( ! use_stdout ) { FILE *prev_stdout; - char *suffix; - if ( C_plus_plus ) - suffix = "cc"; - else - suffix = "c"; + if ( ! did_outfilename ) + { + static char outfile_path[64]; + char *suffix; + + if ( C_plus_plus ) + suffix = "cc"; + else + suffix = "c"; - sprintf( outfile_path, outfile_template, prefix, suffix ); + sprintf( outfile_path, outfile_template, + prefix, suffix ); - prev_stdout = freopen( outfile_path, "w", stdout ); + outfilename = outfile_path; + } + + prev_stdout = freopen( outfilename, "w", stdout ); if ( prev_stdout == NULL ) - lerrsf( "could not create %s", outfile_path ); + lerrsf( "could not create %s", outfilename ); outfile_created = 1; } @@ -678,7 +698,7 @@ char **argv; if ( yy_strcmp( prefix, "yy" ) ) { -#define GEN_PREFIX(name) printf( "#define yy%s %s%s\n", name, prefix, name ); +#define GEN_PREFIX(name) out_str3( "#define yy%s %s%s\n", name, prefix, name ); GEN_PREFIX( "FlexLexer" ); GEN_PREFIX( "_create_buffer" ); GEN_PREFIX( "_delete_buffer" ); @@ -693,7 +713,7 @@ char **argv; GEN_PREFIX( "restart" ); GEN_PREFIX( "text" ); GEN_PREFIX( "wrap" ); - printf( "\n" ); + outn( "" ); } @@ -705,7 +725,7 @@ char **argv; num_backing_up = onesp = numprots = 0; variable_trailing_context_rules = bol_needed = false; - linenum = sectnum = 1; + out_linenum = linenum = sectnum = 1; firstprot = NIL; /* Used in mkprot() so that the first proto goes in slot 1 @@ -749,9 +769,12 @@ char **argv; void readin() { + if ( did_outfilename ) + line_directive_out( stdout, 0 ); + skelout(); - line_directive_out( (FILE *) 0 ); + line_directive_out( (FILE *) 0, 1 ); if ( yyparse() ) { @@ -818,51 +841,50 @@ void readin() } if ( csize == 256 ) - puts( "typedef unsigned char YY_CHAR;" ); + outn( "typedef unsigned char YY_CHAR;" ); else - puts( "typedef char YY_CHAR;" ); + outn( "typedef char YY_CHAR;" ); if ( C_plus_plus ) { - puts( "#define yytext_ptr yytext" ); + outn( "#define yytext_ptr yytext" ); if ( interactive ) - puts( "#define YY_INTERACTIVE" ); + outn( "#define YY_INTERACTIVE" ); } if ( fullspd ) - printf( - "typedef const struct yy_trans_info *yy_state_type;\n" ); + outn( "typedef const struct yy_trans_info *yy_state_type;" ); else if ( ! C_plus_plus ) - printf( "typedef int yy_state_type;\n" ); + outn( "typedef int yy_state_type;" ); if ( reject ) - printf( "\n#define YY_USES_REJECT\n" ); + outn( "\n#define YY_USES_REJECT" ); if ( ddebug ) - puts( "\n#define FLEX_DEBUG" ); + outn( "\n#define FLEX_DEBUG" ); if ( lex_compat ) { - printf( "FILE *yyin = stdin, *yyout = stdout;\n" ); - printf( "extern int yylineno;\n" ); - printf( "int yylineno = 1;\n" ); + outn( "FILE *yyin = stdin, *yyout = stdout;" ); + outn( "extern int yylineno;" ); + outn( "int yylineno = 1;" ); } else if ( ! C_plus_plus ) - printf( "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;\n" ); + outn( "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;" ); if ( C_plus_plus ) - printf( "\n#include \n" ); + outn( "\n#include " ); else { if ( yytext_is_array ) - puts( "extern char yytext[];\n" ); + outn( "extern char yytext[];\n" ); else { - puts( "extern char *yytext;" ); - puts( "#define yytext_ptr yytext" ); + outn( "extern char *yytext;" ); + outn( "#define yytext_ptr yytext" ); } } @@ -984,6 +1006,7 @@ void usage() fprintf( stderr, "\t\t-Cm construct meta-equivalence classes\n" ); fprintf( stderr, "\t\t-Cr use read() instead of stdio for scanner input\n" ); + fprintf( stderr, "\t-o specify output filename\n" ); fprintf( stderr, "\t-P specify scanner prefix other than \"yy\"\n" ); fprintf( stderr, "\t-S specify skeleton file\n" ); } diff --git a/misc.c b/misc.c index bcc75f3..2b513cb 100644 --- a/misc.c +++ b/misc.c @@ -275,7 +275,7 @@ void dataend() dataflush(); /* add terminator for initialization; { for vi */ - puts( " } ;\n" ); + outn( " } ;\n" ); dataline = 0; datapos = 0; @@ -286,14 +286,14 @@ void dataend() void dataflush() { - putchar( '\n' ); + outc( '\n' ); if ( ++dataline >= NUMDATALINES ) { /* Put out a blank line so that the table is grouped into * large blocks that enable the user to find elements easily. */ - putchar( '\n' ); + outc( '\n' ); dataline = 0; } @@ -387,22 +387,40 @@ int ch; /* line_directive_out - spit out a "# line" statement */ -void line_directive_out( output_file ) +void line_directive_out( output_file, do_infile ) FILE *output_file; +int do_infile; { - if ( infilename && gen_line_dirs ) + char directive[MAXLINE]; + static char line_fmt[] = "# line %d \"%s\"\n"; + + if ( ! gen_line_dirs ) + return; + + if ( (do_infile && ! infilename) || (! do_infile && ! outfilename) ) + /* don't know the filename to use, skip */ + return; + + if ( do_infile ) + sprintf( directive, line_fmt, linenum, infilename ); + else { - char directive[MAXLINE]; - sprintf( directive, "# line %d \"%s\"\n", linenum, infilename ); + if ( output_file == stdout ) + /* Account for the line directive itself. */ + ++out_linenum; - /* If output_file is nil then we should put the directive in - * the accumulated actions. - */ - if ( output_file ) - fputs( directive, output_file ); - else - add_action( directive ); + sprintf( directive, line_fmt, out_linenum, outfilename ); + } + + /* If output_file is nil then we should put the directive in + * the accumulated actions. + */ + if ( output_file ) + { + fputs( directive, output_file ); } + else + add_action( directive ); } @@ -439,20 +457,20 @@ int value; { if ( datapos >= NUMDATAITEMS ) { - putchar( ',' ); + outc( ',' ); dataflush(); } if ( datapos == 0 ) /* Indent. */ - fputs( " ", stdout ); + out( " " ); else - putchar( ',' ); + outc( ',' ); ++datapos; - printf( "%5d", value ); + out_dec( "%5d", value ); } @@ -466,19 +484,19 @@ int value; { if ( datapos >= NUMDATAITEMS ) { - putchar( ',' ); + outc( ',' ); dataflush(); } if ( datapos == 0 ) /* Indent. */ - fputs( " ", stdout ); + out( " " ); else - putchar( ',' ); + outc( ',' ); ++datapos; - printf( "%5d", value ); + out_dec( "%5d", value ); } @@ -589,6 +607,96 @@ Char str[]; } +/* out - various flavors of outputing a (possibly formatted) string for the + * generated scanner, keeping track of the line count. + */ + +void out( str ) +char str[]; + { + fputs( str, stdout ); + out_line_count( str ); + } + +void out_dec( fmt, n ) +char fmt[]; +int n; + { + printf( fmt, n ); + out_line_count( fmt ); + } + +void out_dec2( fmt, n1, n2 ) +char fmt[]; +int n1, n2; + { + printf( fmt, n1, n2 ); + out_line_count( fmt ); + } + +void out_hex( fmt, x ) +char fmt[]; +unsigned int x; + { + printf( fmt, x ); + out_line_count( fmt ); + } + +void out_line_count( str ) +char str[]; + { + register int i; + + for ( i = 0; str[i]; ++i ) + if ( str[i] == '\n' ) + ++out_linenum; + } + +void out_str( fmt, str ) +char fmt[], str[]; + { + printf( fmt, str ); + out_line_count( fmt ); + out_line_count( str ); + } + +void out_str3( fmt, s1, s2, s3 ) +char fmt[], s1[], s2[], s3[]; + { + printf( fmt, s1, s2, s3 ); + out_line_count( fmt ); + out_line_count( s1 ); + out_line_count( s2 ); + out_line_count( s3 ); + } + +void out_str_dec( fmt, str, n ) +char fmt[], str[]; +int n; + { + printf( fmt, str, n ); + out_line_count( fmt ); + out_line_count( str ); + } + +void outc( c ) +int c; + { + putc( c, stdout ); + + if ( c == '\n' ) + ++out_linenum; + } + +void outn( str ) +char str[]; + { + puts( str ); + out_line_count( str ); + ++out_linenum; + } + + /* readable_form - return the the human-readable form of a character * * The returned string is in static storage. @@ -706,9 +814,9 @@ void skelout() /* Skeleton file reads include final * newline, skel[] array does not. */ - fputs( buf, stdout ); + out( buf ); else - printf( "%s\n", buf ); + outn( buf ); } } } @@ -723,16 +831,16 @@ void skelout() void transition_struct_out( element_v, element_n ) int element_v, element_n; { - printf( "%7d, %5d,", element_v, element_n ); + out_dec2( "%7d, %5d,", element_v, element_n ); datapos += TRANS_STRUCT_PRINT_LENGTH; if ( datapos >= 75 ) { - putchar( '\n' ); + outc( '\n' ); if ( ++dataline % 10 == 0 ) - putchar( '\n' ); + outc( '\n' ); datapos = 0; } diff --git a/nfa.c b/nfa.c index 44e893d..2f104f1 100644 --- a/nfa.c +++ b/nfa.c @@ -268,7 +268,7 @@ int mach, variable_trail_rule, headcnt, trailcnt; */ add_action( "YY_USER_ACTION\n" ); - line_directive_out( (FILE *) 0 ); + line_directive_out( (FILE *) 0, 1 ); } diff --git a/parse.y b/parse.y index b3f517f..841a379 100644 --- a/parse.y +++ b/parse.y @@ -716,7 +716,7 @@ void build_eof_action() } } - line_directive_out( (FILE *) 0 ); + line_directive_out( (FILE *) 0, 1 ); /* This isn't a normal rule after all - don't count it as * such, so we don't have any holes in the rule numbering diff --git a/scan.l b/scan.l index 7392714..7aa864d 100644 --- a/scan.l +++ b/scan.l @@ -93,7 +93,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^"%x"{NAME}? return XSCDECL; ^"%{".*{NL} { ++linenum; - line_directive_out( (FILE *) 0 ); + line_directive_out( (FILE *) 0, 1 ); indented_code = false; BEGIN(CODEBLOCK); } @@ -104,7 +104,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) sectnum = 2; bracelevel = 0; mark_defs1(); - line_directive_out( (FILE *) 0 ); + line_directive_out( (FILE *) 0, 1 ); BEGIN(SECT2PROLOG); return SECTEND; } diff --git a/sym.c b/sym.c index 29c0f30..4058332 100644 --- a/sym.c +++ b/sym.c @@ -231,7 +231,7 @@ int xcluflg; char *copy_string(); /* Generate start condition definition, for use in BEGIN et al. */ - printf( "#define %s %d\n", str, lastsc ); + out_str_dec( "#define %s %d\n", str, lastsc ); if ( ++lastsc >= current_max_scs ) scextend(); -- cgit v1.2.3 From 3b1159f9651f6cce1d6383ef8f3bdb12274476f3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sat, 11 Dec 1993 17:06:24 +0000 Subject: yy_str*() -> str*() --- flex.skl | 2 +- flexdef.h | 10 +++++++--- gen.c | 7 +++---- main.c | 6 +++--- misc.c | 4 ++-- scan.l | 16 ++++++++-------- sym.c | 4 ++-- 7 files changed, 26 insertions(+), 23 deletions(-) diff --git a/flex.skl b/flex.skl index f97e506..f4d8356 100644 --- a/flex.skl +++ b/flex.skl @@ -110,7 +110,7 @@ extern "C" { extern void *yy_flex_realloc YY_PROTO(( void *ptr, int size )); extern void yy_flex_free YY_PROTO(( void * )); extern int yywrap YY_PROTO(( void )); - extern void yy_strcpy YY_PROTO(( char *s1, const char *s2 )); + extern void yy_flex_strcpy YY_PROTO(( char *s1, const char *s2 )); #ifdef __cplusplus } #endif diff --git a/flexdef.h b/flexdef.h index bfed07e..c8a26f1 100644 --- a/flexdef.h +++ b/flexdef.h @@ -609,9 +609,13 @@ void *reallocate_array PROTO((void*, int, int)); void *yy_flex_alloc PROTO((int)); void *yy_flex_realloc PROTO((void*, int)); void yy_flex_free PROTO((void*)); -int yy_strcmp PROTO(( const char *s1, const char *s2 )); -void yy_strcpy PROTO(( char *s1, const char *s2 )); -int yy_strlen PROTO(( const char *s )); +int yy_flex_strcmp PROTO(( const char *s1, const char *s2 )); +void yy_flex_strcpy PROTO(( char *s1, const char *s2 )); +int yy_flex_strlen PROTO(( const char *s )); + +#define strcmp yy_flex_strcmp +#define strcpy yy_flex_strcpy +#define strlen yy_flex_strlen #define allocate_integer_array(size) \ (int *) allocate_array( size, sizeof( int ) ) diff --git a/gen.c b/gen.c index 846e76f..09c579d 100644 --- a/gen.c +++ b/gen.c @@ -596,9 +596,8 @@ int worry_about_NULs; } else - yy_strcpy( char_map, useecs ? - "yy_ec[YY_SC_TO_UI(*yy_cp)]" : - "YY_SC_TO_UI(*yy_cp)" ); + strcpy( char_map, useecs ? + "yy_ec[YY_SC_TO_UI(*yy_cp)]" : "YY_SC_TO_UI(*yy_cp)" ); if ( worry_about_NULs && nultrans ) { @@ -1049,7 +1048,7 @@ void make_tables() indent_puts( "YY_FATAL_ERROR( \"token too large, exceeds YYLMAX\" ); \\" ); indent_down(); - indent_puts( "yy_strcpy( yytext, yytext_ptr ); \\" ); + indent_puts( "yy_flex_strcpy( yytext, yytext_ptr ); \\" ); } set_indent( 0 ); diff --git a/main.c b/main.c index a448d54..7fa5e6a 100644 --- a/main.c +++ b/main.c @@ -260,7 +260,7 @@ int exit_status; if ( skelname ) fprintf( stderr, " -S%s", skelname ); - if ( yy_strcmp( prefix, "yy" ) ) + if ( strcmp( prefix, "yy" ) ) fprintf( stderr, " -P%s", prefix ); putc( '\n', stderr ); @@ -394,7 +394,7 @@ char **argv; program_name = argv[0]; if ( program_name[0] != '\0' && - program_name[yy_strlen( program_name ) - 1] == '+' ) + program_name[strlen( program_name ) - 1] == '+' ) C_plus_plus = true; /* read flags */ @@ -696,7 +696,7 @@ char **argv; if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) lerrsf( "can't open skeleton file %s", skelname ); - if ( yy_strcmp( prefix, "yy" ) ) + if ( strcmp( prefix, "yy" ) ) { #define GEN_PREFIX(name) out_str3( "#define yy%s %s%s\n", name, prefix, name ); GEN_PREFIX( "FlexLexer" ); diff --git a/misc.c b/misc.c index 2b513cb..4bb7b61 100644 --- a/misc.c +++ b/misc.c @@ -41,7 +41,7 @@ int otoi PROTO((Char [])); void add_action( new_text ) char *new_text; { - int len = yy_strlen( new_text ); + int len = strlen( new_text ); while ( len + action_index >= action_size - 10 /* slop */ ) { @@ -50,7 +50,7 @@ char *new_text; reallocate_character_array( action_array, action_size ); } - yy_strcpy( &action_array[action_index], new_text ); + strcpy( &action_array[action_index], new_text ); action_index += len; } diff --git a/scan.l b/scan.l index 7aa864d..3517dcf 100644 --- a/scan.l +++ b/scan.l @@ -43,11 +43,11 @@ return CHAR; #define RETURNNAME \ - yy_strcpy( nmstr, yytext ); \ + strcpy( nmstr, yytext ); \ return NAME; #define PUT_BACK_STRING(str, start) \ - for ( i = yy_strlen( str ) - 1; i >= start; --i ) \ + for ( i = strlen( str ) - 1; i >= start; --i ) \ unput((str)[i]) #define CHECK_REJECT(str) \ @@ -139,7 +139,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); ^{NAME} { - yy_strcpy( nmstr, yytext ); + strcpy( nmstr, yytext ); didadef = false; BEGIN(PICKUPDEF); } @@ -171,10 +171,10 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {WS} /* separates name and definition */ {NOT_WS}.* { - yy_strcpy( (char *) nmdef, yytext ); + strcpy( (char *) nmdef, yytext ); /* Skip trailing whitespace. */ - for ( i = yy_strlen( (char *) nmdef ) - 1; + for ( i = strlen( (char *) nmdef ) - 1; i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); --i ) ; @@ -296,7 +296,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) "["{FIRST_CCL_CHAR}{CCL_CHAR}* { int cclval; - yy_strcpy( nmstr, yytext ); + strcpy( nmstr, yytext ); /* Check to see if we've already encountered this * ccl. @@ -331,7 +331,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) register Char *nmdefptr; Char *ndlookup(); - yy_strcpy( nmstr, yytext + 1 ); + strcpy( nmstr, yytext + 1 ); nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ if ( ! (nmdefptr = ndlookup( nmstr )) ) @@ -340,7 +340,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) else { /* push back name surrounded by ()'s */ - int len = yy_strlen( (char *) nmdefptr ); + int len = strlen( (char *) nmdefptr ); if ( lex_compat || nmdefptr[0] == '^' || (len > 0 && nmdefptr[len - 1] == '$') ) diff --git a/sym.c b/sym.c index 4058332..745a2cc 100644 --- a/sym.c +++ b/sym.c @@ -62,7 +62,7 @@ int table_size; while ( sym_entry ) { - if ( ! yy_strcmp( sym, sym_entry->name ) ) + if ( ! strcmp( sym, sym_entry->name ) ) { /* entry already exists */ return -1; } @@ -142,7 +142,7 @@ int table_size; while ( sym_entry ) { - if ( ! yy_strcmp( sym, sym_entry->name ) ) + if ( ! strcmp( sym, sym_entry->name ) ) return sym_entry; sym_entry = sym_entry->next; } -- cgit v1.2.3 From d421ddabec3f405ed52eda4ef4a1f0de2f5e4fbb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 13 Dec 1993 12:33:07 +0000 Subject: Do #bytes computation in {re,}allocate_array() only once --- misc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/misc.c b/misc.c index 4bb7b61..bb7b29b 100644 --- a/misc.c +++ b/misc.c @@ -62,15 +62,16 @@ void *allocate_array( size, element_size ) int size, element_size; { register void *mem; + int num_bytes = element_size * size; /* On 16-bit int machines (e.g., 80286) we might be trying to * allocate more than a signed int can hold, and that won't * work. Cheap test: */ - if ( element_size * size <= 0 ) + if ( num_bytes <= 0 ) flexfatal( "request for < 1 byte in allocate_array()" ); - mem = yy_flex_alloc( element_size * size ); + mem = yy_flex_alloc( num_bytes ); if ( mem == NULL ) flexfatal( "memory allocation failed in allocate_array()" ); @@ -749,13 +750,14 @@ void *array; int size, element_size; { register void *new_array; + int num_bytes = element_size * size; /* Same worry as in allocate_array(): */ - if ( size * element_size <= 0 ) + if ( num_bytes <= 0 ) flexfatal( "attempt to increase array size by less than 1 byte" ); - new_array = yy_flex_realloc( array, size * element_size ); + new_array = yy_flex_realloc( array, num_bytes ); if ( new_array == NULL ) flexfatal( "attempt to increase array size failed" ); -- cgit v1.2.3 From d331303dde961f499fdf53090cc99e38906c8e8e Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 15 Dec 1993 10:08:29 +0000 Subject: bracket -CF table elements --- flexdef.h | 4 ++-- misc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flexdef.h b/flexdef.h index c8a26f1..b9046c2 100644 --- a/flexdef.h +++ b/flexdef.h @@ -96,8 +96,8 @@ */ #define NUMDATALINES 10 -/* Transition_struct_out() definitions. */ -#define TRANS_STRUCT_PRINT_LENGTH 15 +/* transition_struct_out() definitions. */ +#define TRANS_STRUCT_PRINT_LENGTH 14 /* Returns true if an nfa state has an epsilon out-transition slot * that can be used. This definition is currently not used. diff --git a/misc.c b/misc.c index bb7b29b..af7e5fd 100644 --- a/misc.c +++ b/misc.c @@ -833,11 +833,11 @@ void skelout() void transition_struct_out( element_v, element_n ) int element_v, element_n; { - out_dec2( "%7d, %5d,", element_v, element_n ); + out_dec2( " {%4d,%4d },", element_v, element_n ); datapos += TRANS_STRUCT_PRINT_LENGTH; - if ( datapos >= 75 ) + if ( datapos >= 79 - TRANS_STRUCT_PRINT_LENGTH ) { outc( '\n' ); -- cgit v1.2.3 From bc0de0da240ebf516fcef5fde216308da1cb62b6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 15 Dec 1993 10:23:08 +0000 Subject: start-state stacks, alloc routines take unsigned --- flex.skl | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/flex.skl b/flex.skl index f4d8356..4f0429e 100644 --- a/flex.skl +++ b/flex.skl @@ -106,8 +106,8 @@ extern FILE *yyin, *yyout; #ifdef __cplusplus extern "C" { #endif - extern void *yy_flex_alloc YY_PROTO(( int )); - extern void *yy_flex_realloc YY_PROTO(( void *ptr, int size )); + extern void *yy_flex_alloc YY_PROTO(( unsigned int )); + extern void *yy_flex_realloc YY_PROTO(( void *ptr, unsigned int )); extern void yy_flex_free YY_PROTO(( void * )); extern int yywrap YY_PROTO(( void )); extern void yy_flex_strcpy YY_PROTO(( char *s1, const char *s2 )); @@ -230,6 +230,12 @@ void yy_load_buffer_state YY_PROTO(( void )); YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); + +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +static void yy_push_state YY_PROTO(( int new_state )); +static void yy_pop_state YY_PROTO(( void )); %* #define yy_new_buffer yy_create_buffer @@ -248,8 +254,8 @@ static int input YY_PROTO(( void )); static yy_state_type yy_get_previous_state YY_PROTO(( void )); static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); static int yy_get_next_buffer YY_PROTO(( void )); -%* static void yy_fatal_error YY_PROTO(( const char msg[] )); +%* /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -306,6 +312,11 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); #define yyterminate() return YY_NULL #endif +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + /* Report a fatal error. */ #ifndef YY_FATAL_ERROR %- @@ -1039,6 +1050,55 @@ void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) } +%- +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif +%+ +void yyFlexLexer::yy_push_state( int new_state ) +%* + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + int new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } + + +%- +static void yy_pop_state() +%+ +void yyFlexLexer::yy_pop_state() +%* + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } + + %- #ifdef YY_USE_PROTOS static void yy_fatal_error( const char msg[] ) -- cgit v1.2.3 From f22335869e33218a0111c5c27735a106393bc04c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 15 Dec 1993 10:23:16 +0000 Subject: alloc routines take unsigned --- flexdef.h | 4 ++-- misc.c | 22 ++++++---------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/flexdef.h b/flexdef.h index b9046c2..b0f5fe6 100644 --- a/flexdef.h +++ b/flexdef.h @@ -606,8 +606,8 @@ extern int num_backing_up, bol_needed; void *allocate_array PROTO((int, int)); void *reallocate_array PROTO((void*, int, int)); -void *yy_flex_alloc PROTO((int)); -void *yy_flex_realloc PROTO((void*, int)); +void *yy_flex_alloc PROTO((unsigned int)); +void *yy_flex_realloc PROTO((void*, unsigned int)); void yy_flex_free PROTO((void*)); int yy_flex_strcmp PROTO(( const char *s1, const char *s2 )); void yy_flex_strcpy PROTO(( char *s1, const char *s2 )); diff --git a/misc.c b/misc.c index af7e5fd..271a3a4 100644 --- a/misc.c +++ b/misc.c @@ -62,14 +62,7 @@ void *allocate_array( size, element_size ) int size, element_size; { register void *mem; - int num_bytes = element_size * size; - - /* On 16-bit int machines (e.g., 80286) we might be trying to - * allocate more than a signed int can hold, and that won't - * work. Cheap test: - */ - if ( num_bytes <= 0 ) - flexfatal( "request for < 1 byte in allocate_array()" ); + unsigned int num_bytes = element_size * size; mem = yy_flex_alloc( num_bytes ); @@ -178,12 +171,14 @@ register char *str; { register char *c; char *copy; + unsigned int size; /* find length */ for ( c = str; *c; ++c ) ; - copy = (char *) yy_flex_alloc( (c - str + 1) * sizeof( char ) ); + size = (c - str + 1) * sizeof( char ); + copy = (char *) yy_flex_alloc( size ); if ( copy == NULL ) flexfatal( "dynamic memory failure in copy_string()" ); @@ -750,12 +745,7 @@ void *array; int size, element_size; { register void *new_array; - int num_bytes = element_size * size; - - /* Same worry as in allocate_array(): */ - if ( num_bytes <= 0 ) - flexfatal( - "attempt to increase array size by less than 1 byte" ); + unsigned int num_bytes = element_size * size; new_array = yy_flex_realloc( array, num_bytes ); @@ -855,7 +845,7 @@ int element_v, element_n; void *yy_flex_xmalloc( size ) int size; { - void *result = yy_flex_alloc( size ); + void *result = yy_flex_alloc( (unsigned) size ); if ( ! result ) flexfatal( "memory allocation failed in yy_flex_xmalloc()" ); -- cgit v1.2.3 From 00627242d65e221c10a59e70630da3c804df9210 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 15 Dec 1993 10:23:40 +0000 Subject: Bug fix for -CF --- dfa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dfa.c b/dfa.c index d427ca0..33702f0 100644 --- a/dfa.c +++ b/dfa.c @@ -510,7 +510,7 @@ void ntod() state[i] = 0; place_state( state, 0, 0 ); - dfaacc[i].dfaacc_state = 0; + dfaacc[0].dfaacc_state = 0; } else if ( fulltbl ) -- cgit v1.2.3 From 28f45f1f486fda4323d6f6e6aea3194e949fc227 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Wed, 15 Dec 1993 10:23:53 +0000 Subject: start stack, extern "C++" moved --- FlexLexer.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/FlexLexer.h b/FlexLexer.h index 1d5a66d..c90f30d 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -35,10 +35,10 @@ // If you want to create multiple lexer classes, you use the -P flag // to rename each yyFlexLexer to some other xxFlexLexer. -extern "C++" { - #include +extern "C++" { + struct yy_buffer_state; typedef int yy_state_type; @@ -82,6 +82,9 @@ public: yy_more_flag = 0; yy_more_len = 0; + yy_start_stack_ptr = yy_start_stack_depth = 0; + yy_start_stack = 0; + yy_current_buffer = 0; #ifdef YY_USES_REJECT @@ -114,6 +117,13 @@ protected: void yy_load_buffer_state(); void yy_init_buffer( struct yy_buffer_state* b, istream* s ); + int yy_start_stack_ptr; + int yy_start_stack_depth; + int* yy_start_stack; + + void yy_push_state( int new_state ); + int yy_pop_state(); + yy_state_type yy_get_previous_state(); yy_state_type yy_try_NUL_trans( yy_state_type current_state ); int yy_get_next_buffer(); -- cgit v1.2.3 From dadbb3884ab2f629c0271e9e0358afb73bc4ff02 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Dec 1993 20:44:10 +0000 Subject: removed crufty WHITESPACE token, some uses of '\n' token --- parse.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parse.y b/parse.y index 841a379..ba90acd 100644 --- a/parse.y +++ b/parse.y @@ -1,6 +1,6 @@ /* parse.y - parser for flex input */ -%token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP +%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP %{ /*- @@ -121,9 +121,9 @@ initlex : } ; -sect1 : sect1 startconddecl WHITESPACE namelist1 '\n' +sect1 : sect1 startconddecl namelist1 | - | error '\n' + | error { synerr( "unknown error processing section 1" ); } ; @@ -147,7 +147,7 @@ startconddecl : SCDECL { xcluflg = true; } ; -namelist1 : namelist1 WHITESPACE NAME +namelist1 : namelist1 NAME { scinstal( nmstr, xcluflg ); } | NAME -- cgit v1.2.3 From 008d27567e2d804ad7de3ae5bf79fd5ce57c9e93 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Dec 1993 20:44:22 +0000 Subject: simplified comment-scanning using push/pop states --- scan.l | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/scan.l b/scan.l index 3517dcf..050a52b 100644 --- a/scan.l +++ b/scan.l @@ -60,7 +60,7 @@ %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE -%x FIRSTCCL CCL ACTION RECOVER BRACEERROR C_COMMENT ACTION_COMMENT +%x FIRSTCCL CCL ACTION RECOVER BRACEERROR COMMENT %x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 WS [ \t]+ @@ -88,7 +88,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^{WS} indented_code = true; BEGIN(CODEBLOCK); -^"/*" ACTION_ECHO; BEGIN(C_COMMENT); +^"/*" ACTION_ECHO; yy_push_state( COMMENT ); ^"%s"{NAME}? return SCDECL; ^"%x"{NAME}? return XSCDECL; ^"%{".*{NL} { @@ -98,7 +98,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) BEGIN(CODEBLOCK); } -{WS} return WHITESPACE; +{WS} /* discard */ ^"%%".* { sectnum = 2; @@ -146,19 +146,16 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {SCNAME} RETURNNAME; ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ -{OPTWS}{NL} ++linenum; return '\n'; +{OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ -"*/" ACTION_ECHO; BEGIN(INITIAL); -"*/".*{NL} ++linenum; ACTION_ECHO; BEGIN(INITIAL); -[^*\n]+ ACTION_ECHO; -"*" ACTION_ECHO; -{NL} ++linenum; ACTION_ECHO; +"*/" ACTION_ECHO; yy_pop_state(); +"*" ACTION_ECHO; +[^*\n]+ ACTION_ECHO; +[^*\n]*{NL} ++linenum; ACTION_ECHO; ^"%}".*{NL} ++linenum; BEGIN(INITIAL); -"reject" ACTION_ECHO; CHECK_REJECT(yytext); -"yymore" ACTION_ECHO; CHECK_YYMORE(yytext); {NAME}|{NOT_NAME}|. ACTION_ECHO; {NL} { ++linenum; @@ -426,13 +423,13 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {NL} synerr( "missing }" ); ++linenum; BEGIN(SECT2); -"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); +"/*" ACTION_ECHO; yy_push_state( COMMENT ); {OPTWS}"%}".* bracelevel = 0; -"reject" { +"reject" { ACTION_ECHO; CHECK_REJECT(yytext); } -"yymore" { +"yymore" { ACTION_ECHO; CHECK_YYMORE(yytext); } @@ -457,7 +454,6 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) "}" ACTION_ECHO; --bracelevel; [^a-z_{}"'/\n]+ ACTION_ECHO; {NAME} ACTION_ECHO; -"/*" ACTION_ECHO; BEGIN(ACTION_COMMENT); "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ \" ACTION_ECHO; BEGIN(ACTION_STRING); {NL} { @@ -471,25 +467,13 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } . ACTION_ECHO; -"*/" { - ACTION_ECHO; - if ( doing_codeblock ) - BEGIN(CODEBLOCK_2); - else - BEGIN(ACTION); - } - -"*" ACTION_ECHO; -[^*\n]+ ACTION_ECHO; -[^*\n]*{NL} ++linenum; ACTION_ECHO; - [^"\\\n]+ ACTION_ECHO; \\. ACTION_ECHO; {NL} ++linenum; ACTION_ECHO; \" ACTION_ECHO; BEGIN(ACTION); . ACTION_ECHO; -<> { +<> { synerr( "EOF encountered inside an action" ); yyterminate(); } -- cgit v1.2.3 From 9ae90fea66ea209fc69e78f601a6e3b1c8471fcb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 16 Dec 1993 20:46:12 +0000 Subject: Added yy_top_state() --- FlexLexer.h | 3 ++- flex.skl | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/FlexLexer.h b/FlexLexer.h index c90f30d..87636e3 100644 --- a/FlexLexer.h +++ b/FlexLexer.h @@ -122,7 +122,8 @@ protected: int* yy_start_stack; void yy_push_state( int new_state ); - int yy_pop_state(); + void yy_pop_state(); + int yy_top_state(); yy_state_type yy_get_previous_state(); yy_state_type yy_try_NUL_trans( yy_state_type current_state ); diff --git a/flex.skl b/flex.skl index 4f0429e..a6be4d9 100644 --- a/flex.skl +++ b/flex.skl @@ -236,6 +236,7 @@ static int yy_start_stack_depth = 0; static int *yy_start_stack = 0; static void yy_push_state YY_PROTO(( int new_state )); static void yy_pop_state YY_PROTO(( void )); +static int yy_top_state YY_PROTO(( void )); %* #define yy_new_buffer yy_create_buffer @@ -1099,6 +1100,16 @@ void yyFlexLexer::yy_pop_state() } +%- +static int yy_top_state() +%+ +int yyFlexLexer::yy_top_state() +%* + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } + + %- #ifdef YY_USE_PROTOS static void yy_fatal_error( const char msg[] ) -- cgit v1.2.3 From eb3ab75b6f3283813d3b235a747d5cfa35c929a7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:03:44 +0000 Subject: No more WHITESPACE token --- yylex.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/yylex.c b/yylex.c index 49e6b74..c1db44f 100644 --- a/yylex.c +++ b/yylex.c @@ -109,10 +109,6 @@ int yylex() fputs( "%x", stderr ); break; - case WHITESPACE: - (void) putc( ' ', stderr ); - break; - case SECTEND: fputs( "%%\n", stderr ); -- cgit v1.2.3 From f64cbf3694e6322c0ead661c143ecf0d7a807fe7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:04:01 +0000 Subject: yy_flex_XXX -> flex_XXX --- dfa.c | 4 ++-- misc.c | 8 ++++---- sym.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dfa.c b/dfa.c index 33702f0..4273cfc 100644 --- a/dfa.c +++ b/dfa.c @@ -766,8 +766,8 @@ void ntod() mkdeftbl(); } - yy_flex_free( (void *) accset ); - yy_flex_free( (void *) nset ); + flex_free( (void *) accset ); + flex_free( (void *) nset ); } diff --git a/misc.c b/misc.c index 271a3a4..ef030c2 100644 --- a/misc.c +++ b/misc.c @@ -64,7 +64,7 @@ int size, element_size; register void *mem; unsigned int num_bytes = element_size * size; - mem = yy_flex_alloc( num_bytes ); + mem = flex_alloc( num_bytes ); if ( mem == NULL ) flexfatal( "memory allocation failed in allocate_array()" ); @@ -178,7 +178,7 @@ register char *str; ; size = (c - str + 1) * sizeof( char ); - copy = (char *) yy_flex_alloc( size ); + copy = (char *) flex_alloc( size ); if ( copy == NULL ) flexfatal( "dynamic memory failure in copy_string()" ); @@ -747,7 +747,7 @@ int size, element_size; register void *new_array; unsigned int num_bytes = element_size * size; - new_array = yy_flex_realloc( array, num_bytes ); + new_array = flex_realloc( array, num_bytes ); if ( new_array == NULL ) flexfatal( "attempt to increase array size failed" ); @@ -845,7 +845,7 @@ int element_v, element_n; void *yy_flex_xmalloc( size ) int size; { - void *result = yy_flex_alloc( (unsigned) size ); + void *result = flex_alloc( (unsigned) size ); if ( ! result ) flexfatal( "memory allocation failed in yy_flex_xmalloc()" ); diff --git a/sym.c b/sym.c index 745a2cc..f00fdd2 100644 --- a/sym.c +++ b/sym.c @@ -72,7 +72,7 @@ int table_size; /* create new entry */ new_entry = (struct hash_entry *) - yy_flex_alloc( sizeof( struct hash_entry ) ); + flex_alloc( sizeof( struct hash_entry ) ); if ( new_entry == NULL ) flexfatal( "symbol table memory allocation failed" ); -- cgit v1.2.3 From c7288ae69923d797df796d0568a11732977101b7 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:04:12 +0000 Subject: Added flex_XXX -> yy_flex_XXX wrappers --- scan.l | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/scan.l b/scan.l index 050a52b..76c6dec 100644 --- a/scan.l +++ b/scan.l @@ -532,3 +532,25 @@ char *file; infilename = ""; } } + + +/* Wrapper routines for accessing the scanner's malloc routines. */ + +void *flex_alloc( size ) +unsigned int size; + { + return yy_flex_alloc( size ); + } + +void *flex_realloc( ptr, size ) +void *ptr; +unsigned int size; + { + return yy_flex_realloc( ptr, size ); + } + +void flex_free( ptr ) +void *ptr; + { + yy_flex_free( ptr ); + } -- cgit v1.2.3 From 931a0c05d8f8067dc3d9bf5148a93a7663bf38ac Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:04:28 +0000 Subject: Use autoconf for string/strings.h yy_flex_XXX -> flex_XXX --- flexdef.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/flexdef.h b/flexdef.h index b0f5fe6..be09463 100644 --- a/flexdef.h +++ b/flexdef.h @@ -31,6 +31,12 @@ #include #include +#if HAVE_STRING_H +#include +#else +#include +#endif + /* Always be prepared to generate an 8-bit scanner. */ #define CSIZE 256 #define Char unsigned char @@ -606,16 +612,9 @@ extern int num_backing_up, bol_needed; void *allocate_array PROTO((int, int)); void *reallocate_array PROTO((void*, int, int)); -void *yy_flex_alloc PROTO((unsigned int)); -void *yy_flex_realloc PROTO((void*, unsigned int)); -void yy_flex_free PROTO((void*)); -int yy_flex_strcmp PROTO(( const char *s1, const char *s2 )); -void yy_flex_strcpy PROTO(( char *s1, const char *s2 )); -int yy_flex_strlen PROTO(( const char *s )); - -#define strcmp yy_flex_strcmp -#define strcpy yy_flex_strcpy -#define strlen yy_flex_strlen +void *flex_alloc PROTO((unsigned int)); +void *flex_realloc PROTO((void*, unsigned int)); +void flex_free PROTO((void*)); #define allocate_integer_array(size) \ (int *) allocate_array( size, sizeof( int ) ) -- cgit v1.2.3 From 1c5b4f69c5cd3b7b865262aabb87b40cf260d8f0 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:05:04 +0000 Subject: Check for string.h --- configure.in | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.in b/configure.in index de6d00b..8654711 100644 --- a/configure.in +++ b/configure.in @@ -9,6 +9,7 @@ AC_PROG_RANLIB AC_PROG_INSTALL AC_CONST AC_STDC_HEADERS +AC_HAVE_HEADERS(string.h) case "$YACC" in *bison*) -- cgit v1.2.3 From 0598edc27f7be1b2d4fe10e1bb6b505aacfc33c5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:05:49 +0000 Subject: Nuked lib{string,alloc}.c, added dependency of yylex.o on parse.h --- Makefile.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile.in b/Makefile.in index c1a133d..c097c38 100644 --- a/Makefile.in +++ b/Makefile.in @@ -66,11 +66,11 @@ SOURCES = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.y \ OBJECTS = ccl.o dfa.o ecs.o gen.o main.o misc.o nfa.o parse.o \ scan.o skel.o sym.o tblcmp.o yylex.o @ALLOCA@ -LIBSRCS = liballoc.c libmain.c libstring.c libyywrap.c -LIBOBJS = liballoc.o libmain.o libstring.o libyywrap.o +LIBSRCS = libmain.c libyywrap.c +LIBOBJS = libmain.o libyywrap.o LINTSRCS = ccl.c dfa.c ecs.c gen.c main.c misc.c nfa.c parse.c \ - scan.c skel.c sym.c tblcmp.c yylex.c liballoc.c libstring.c + scan.c skel.c sym.c tblcmp.c yylex.c DISTFILES = README NEWS COPYING INSTALL FlexLexer.h \ configure.in Makefile.in mkskel.sh flex.skl \ @@ -113,6 +113,7 @@ scan.c: scan.l mv scan.tmp scan.c scan.o: scan.c parse.h flexdef.h +yylex.o: yylex.c parse.h flexdef.h skel.c: flex.skl mkskel.sh $(SHELL) $(srcdir)/mkskel.sh $(srcdir)/flex.skl >skel.c @@ -128,7 +129,6 @@ parse.o: parse.c flexdef.h skel.o: skel.c flexdef.h sym.o: sym.c flexdef.h tblcmp.o: tblcmp.c flexdef.h -yylex.o: yylex.c flexdef.h alloca.o: alloca.c $(CC) $(CPPFLAGS) $(CFLAGS) -c -Dxmalloc=yy_flex_xmalloc alloca.c -- cgit v1.2.3 From 6d3aeaa10b32f039052083cc9af69129c268728b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:06:38 +0000 Subject: alloc, string routines internal --- flex.skl | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/flex.skl b/flex.skl index a6be4d9..cd2bf8d 100644 --- a/flex.skl +++ b/flex.skl @@ -106,11 +106,7 @@ extern FILE *yyin, *yyout; #ifdef __cplusplus extern "C" { #endif - extern void *yy_flex_alloc YY_PROTO(( unsigned int )); - extern void *yy_flex_realloc YY_PROTO(( void *ptr, unsigned int )); - extern void yy_flex_free YY_PROTO(( void * )); extern int yywrap YY_PROTO(( void )); - extern void yy_flex_strcpy YY_PROTO(( char *s1, const char *s2 )); #ifdef __cplusplus } #endif @@ -239,6 +235,14 @@ static void yy_pop_state YY_PROTO(( void )); static int yy_top_state YY_PROTO(( void )); %* +#ifdef yytext_ptr +static void yy_flex_strcpy YY_PROTO(( char *, const char * )); +#endif + +static void *yy_flex_alloc YY_PROTO(( unsigned int )); +static void *yy_flex_realloc YY_PROTO(( void *ptr, unsigned int )); +static void yy_flex_free YY_PROTO(( void * )); + #define yy_new_buffer yy_create_buffer %% yytext/yyin/yyout/yy_state_type/yylineno etc. def's & init go here @@ -275,6 +279,19 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); * section 1. */ +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#include +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE #define YY_READ_BUF_SIZE 8192 @@ -1146,3 +1163,36 @@ void yyFlexLexer::LexerError( const char msg[] ) yyleng = n; \ } \ while ( 0 ) + + +/* Internal utility routines. */ + +#ifdef yytext_ptr +static void yy_flex_strcpy( s1, s2 ) +char *s1; +const char *s2; + { + while ( (*(s1++) = *(s2++)) ) + ; + } +#endif + + +static void *yy_flex_alloc( size ) +unsigned int size; + { + return (void *) malloc( size ); + } + +static void *yy_flex_realloc( ptr, size ) +void *ptr; +unsigned int size; + { + return (void *) realloc( ptr, size ); + } + +static void yy_flex_free( ptr ) +void *ptr; + { + free( ptr ); + } -- cgit v1.2.3 From a4a405374e3154d43b2f3e4db916ce439a7b87fb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Fri, 17 Dec 1993 10:31:10 +0000 Subject: prototypes for alloc/string routines --- flex.skl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/flex.skl b/flex.skl index cd2bf8d..2126c08 100644 --- a/flex.skl +++ b/flex.skl @@ -283,7 +283,9 @@ static void yy_fatal_error YY_PROTO(( const char msg[] )); YY_MALLOC_DECL #else #if __STDC__ +#ifndef __cplusplus #include +#endif #else /* Just try to get by without declaring the routines. This will fail * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) @@ -1168,9 +1170,13 @@ void yyFlexLexer::LexerError( const char msg[] ) /* Internal utility routines. */ #ifdef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strcpy( char *s1, const char *s2 ) +#else static void yy_flex_strcpy( s1, s2 ) char *s1; const char *s2; +#endif { while ( (*(s1++) = *(s2++)) ) ; @@ -1178,21 +1184,33 @@ const char *s2; #endif +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( unsigned int size ) +#else static void *yy_flex_alloc( size ) unsigned int size; +#endif { return (void *) malloc( size ); } +#ifdef YY_USE_PROTOS +static void *yy_flex_realloc( void *ptr, unsigned int size ) +#else static void *yy_flex_realloc( ptr, size ) void *ptr; unsigned int size; +#endif { return (void *) realloc( ptr, size ); } +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else static void yy_flex_free( ptr ) void *ptr; +#endif { free( ptr ); } -- cgit v1.2.3 From 2e3e9d6be023d053e564debd794cf50ce3e02e9b Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 20 Dec 1993 09:41:06 +0000 Subject: Added --- flexdef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/flexdef.h b/flexdef.h index be09463..30003ec 100644 --- a/flexdef.h +++ b/flexdef.h @@ -37,6 +37,10 @@ #include #endif +#if __STDC__ +#include +#endif + /* Always be prepared to generate an 8-bit scanner. */ #define CSIZE 256 #define Char unsigned char -- cgit v1.2.3 From 18f65cd6257beea039939193b415cde0dab49f13 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 23 Dec 1993 20:41:11 +0000 Subject: Simplified scanning {}'s --- scan.l | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scan.l b/scan.l index 76c6dec..5b7dbb5 100644 --- a/scan.l +++ b/scan.l @@ -253,7 +253,6 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^"^" return '^'; \" BEGIN(QUOTE); return '"'; "{"/[0-9] BEGIN(NUM); return '{'; -"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR); "$"/([ \t]|{NL}) return '$'; {WS}"%{" { @@ -357,7 +356,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } -[/|*+?.()] return (unsigned char) yytext[0]; +[/|*+?.(){}] return (unsigned char) yytext[0]; . RETURNCHAR; -- cgit v1.2.3 From 451cf441bcc0e656c359a0495e5c2736294e5642 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 23 Dec 1993 20:41:47 +0000 Subject: Added beginnings of { ... } --- parse.y | 1 + 1 file changed, 1 insertion(+) diff --git a/parse.y b/parse.y index ba90acd..9b5e991 100644 --- a/parse.y +++ b/parse.y @@ -158,6 +158,7 @@ namelist1 : namelist1 NAME ; sect2 : sect2 initforrule flexrule '\n' + | sect2 '{' '\n' sect2 '}' '\n' | ; -- cgit v1.2.3 From f1fe21f39446c1f3bc10be40f1ee2fab265eb3c3 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 23 Dec 1993 20:42:36 +0000 Subject: 2.5.0 snapshot for Craig --- NEWS | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/NEWS b/NEWS index bb7a818..68e466c 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,22 @@ +Changes between release 2.5.0 (xxDec93) and release 2.4.5: + + - Snapshot for installation on ell. + + - Self-contained scanners (modulo yywrap()). + + - Three new routines are available for manipulating stacks + of start conditions: yy_push_state( int new_state ), + void yy_pop_state(), and int yy_top_state(). + + - Flex now generates #line directives relating the code it + produces to the output file; this means that error messages + in the flex-generated code should be correctly pinpointed. + + - A new "-ooutput" option writes the generated scanner to "output". + If used with -t, the scanner is still written to stdout, but + its internal #line directives use "output". + + Changes between release 2.4.5 (11Dec93) and release 2.4.4: - Fixed bug breaking C++ scanners that use REJECT or variable -- cgit v1.2.3 From 5445b66b6aafddb5dc89b05effab163b76bf837a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 23 Dec 1993 20:43:15 +0000 Subject: Fixsed sense of test for %array --- flex.skl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flex.skl b/flex.skl index 2126c08..4444e08 100644 --- a/flex.skl +++ b/flex.skl @@ -235,7 +235,7 @@ static void yy_pop_state YY_PROTO(( void )); static int yy_top_state YY_PROTO(( void )); %* -#ifdef yytext_ptr +#ifndef yytext_ptr static void yy_flex_strcpy YY_PROTO(( char *, const char * )); #endif @@ -1169,7 +1169,7 @@ void yyFlexLexer::LexerError( const char msg[] ) /* Internal utility routines. */ -#ifdef yytext_ptr +#ifndef yytext_ptr #ifdef YY_USE_PROTOS static void yy_flex_strcpy( char *s1, const char *s2 ) #else -- cgit v1.2.3 From 2b2cf0de76738b95705b6f120cf3ae29b8f1b14d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Thu, 23 Dec 1993 20:44:02 +0000 Subject: -ooutput #line directives credits --- flex.1 | 53 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/flex.1 b/flex.1 index d27da80..cd44019 100644 --- a/flex.1 +++ b/flex.1 @@ -1,9 +1,9 @@ -.TH FLEXDOC 1 "November 1993" "Version 2.4" +.TH FLEXDOC 1 "December 1993" "Version 2.5" .SH NAME flexdoc \- documentation for flex, fast lexical analyzer generator .SH SYNOPSIS .B flex -.B [\-bcdfhilnpstvwBFILTV78+ \-C[aefFmr] \-Pprefix \-Sskeleton] +.B [\-bcdfhilnpstvwBFILTV78+ \-C[aefFmr] \-ooutput \-Pprefix \-Sskeleton] .I [filename ...] .SH DESCRIPTION .I flex @@ -924,10 +924,6 @@ section of the input file): This definition will change the input processing to occur one character at a time. .PP -You also can add in things like keeping track of the -input line number this way; but don't expect your scanner to -go very fast. -.PP When the scanner receives an end-of-file indication from YY_INPUT, it then checks the .B yywrap() @@ -1771,18 +1767,14 @@ directives. Without this option, .I flex peppers the generated scanner with #line directives so error messages in the actions will be correctly -located with respect to the original -.I flex -input file, and not to -the fairly meaningless line numbers of -.B lex.yy.c. -(Unfortunately +located with respect to either the original .I flex -does not presently generate the necessary directives -to "retarget" the line numbers for those parts of +input file (if the errors are due to code in the input file), or .B lex.yy.c -which it generated. So if there is an error in the generated code, -a meaningless line number is reported.) +(if the errors are +.I flex's +fault -- you should report these sorts of errors to the email address +given below). .TP .B \-T makes @@ -1992,6 +1984,24 @@ compression. is often a good compromise between speed and size for production scanners. .TP +.B \-ooutput +directs flex to write the scanner to the file +.B output +instead of +.B lex.yy.c. +If you combine +.B \-o +with the +.B \-t +option, then the scanner is written to +.I stdout +but its +.B #line +directives (see the +.B \\-L +option above) refer to the file +.B output. +.TP .B \-Pprefix changes the default .I "yy" @@ -2828,6 +2838,7 @@ features are not included in or the POSIX specification: .nf + C++ scanners yyterminate() <> <*> @@ -3011,19 +3022,21 @@ beta-testers, feedbackers, and contributors, especially Francois Pinard, Casey Leedom, Nelson H.F. Beebe, benson@odi.com, Peter A. Bigot, Keith Bostic, Frederic Brehm, Nick Christopher, Jason Coughlin, Bill Cox, Dave Curtis, Scott David -Daniels, Chris G. Demetriou, Mike Donahue, Chuck Doucette, Tom Epperly, Leo +Daniels, Chris G. Demetriou, Theo Deraadt, +Mike Donahue, Chuck Doucette, Tom Epperly, Leo Eskin, Chris Faylor, Jon Forrest, Kaveh R. Ghazi, Eric Goldman, Ulrich Grepel, Jan Hajic, Jarkko Hietaniemi, Eric Hughes, John Interrante, -Ceriel Jacobs, Jeffrey R. Jones, Henry +Ceriel Jacobs, Michal Jaegermann, Jeffrey R. Jones, Henry Juengst, Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, Marq Kole, Ronald Lamprecht, Greg Lee, Craig Leres, John Levine, Steve Liddle, Mohamed el Lozy, Brian Madsen, Chris -Metcalf, Luke Mewburn, Jim Meyering, G.T. Nicol, Landon Noll, Marc Nozell, +Metcalf, Luke Mewburn, Jim Meyering, Erik Naggum, +G.T. Nicol, Landon Noll, Marc Nozell, Richard Ohnemus, Sven Panne, Roland Pesch, Walter Pelissero, Gaumond Pierre, Esmond Pitt, Jef Poskanzer, Joe Rahmeh, Frederic Raimbault, Rick Richardson, -Kevin Rodgers, Jim Roskind, +Kevin Rodgers, Kai Uwe Rommel, Jim Roskind, Doug Schmidt, Philippe Schnoebelen, Andreas Schwab, Alex Siegel, Mike Stump, Paul Stuart, Dave Tallman, Chris Thewalt, Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken -- cgit v1.2.3 From e7f1aa317204a43b279a45b43844b110a81819fb Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 26 Dec 1993 15:02:46 +0000 Subject: Removed actvsc --- sym.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sym.c b/sym.c index f00fdd2..a404f2a 100644 --- a/sym.c +++ b/sym.c @@ -214,7 +214,6 @@ void scextend() scxclu = reallocate_integer_array( scxclu, current_max_scs ); sceof = reallocate_integer_array( sceof, current_max_scs ); scname = reallocate_char_ptr_array( scname, current_max_scs ); - actvsc = reallocate_integer_array( actvsc, current_max_scs ); } -- cgit v1.2.3 From ff2a5a7404a04ae9da300132a16671a36897a6b5 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 26 Dec 1993 15:02:58 +0000 Subject: Added in_rule, doing_rule_action --- scan.l | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/scan.l b/scan.l index 5b7dbb5..9625ec3 100644 --- a/scan.l +++ b/scan.l @@ -81,6 +81,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) %% static int bracelevel, didadef, indented_code, checking_used; + static int doing_rule_action = false; int doing_codeblock = false; int i; @@ -258,7 +259,13 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {WS}"%{" { bracelevel = 1; BEGIN(PERCENT_BRACE_ACTION); - return '\n'; + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } } {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; @@ -270,7 +277,13 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) bracelevel = 0; continued_action = false; BEGIN(ACTION); - return '\n'; + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } } {OPTWS}{NL} { @@ -278,7 +291,13 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) continued_action = false; BEGIN(ACTION); unput( '\n' ); /* so sees it */ - return '\n'; + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } } "<>" return EOF_OP; @@ -439,10 +458,10 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) if ( bracelevel == 0 || (doing_codeblock && indented_code) ) { - if ( ! doing_codeblock ) + if ( doing_rule_action ) add_action( "\tYY_BREAK\n" ); - - doing_codeblock = false; + + doing_rule_action = doing_codeblock = false; BEGIN(SECT2); } } @@ -460,7 +479,10 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ACTION_ECHO; if ( bracelevel == 0 ) { - add_action( "\tYY_BREAK\n" ); + if ( doing_rule_action ) + add_action( "\tYY_BREAK\n" ); + + doing_rule_action = false; BEGIN(SECT2); } } -- cgit v1.2.3 From 58e1e767cda4bffeb0d0650df7231a924b2df9ac Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 26 Dec 1993 15:03:14 +0000 Subject: Added doubling of '\'s in filenames --- misc.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/misc.c b/misc.c index ef030c2..12e4cf1 100644 --- a/misc.c +++ b/misc.c @@ -387,7 +387,8 @@ void line_directive_out( output_file, do_infile ) FILE *output_file; int do_infile; { - char directive[MAXLINE]; + char directive[MAXLINE], filename[MAXLINE]; + char *s1, *s2, *s3; static char line_fmt[] = "# line %d \"%s\"\n"; if ( ! gen_line_dirs ) @@ -397,15 +398,30 @@ int do_infile; /* don't know the filename to use, skip */ return; + s1 = do_infile ? infilename : outfilename; + s2 = filename; + s3 = &filename[sizeof( filename ) - 2]; + + while ( s2 < s3 && *s1 ) + { + if ( *s1 == '\\' ) + /* Escape the '\' */ + *s2++ = '\\'; + + *s2++ = *s1++; + } + + *s2 = '\0'; + if ( do_infile ) - sprintf( directive, line_fmt, linenum, infilename ); + sprintf( directive, line_fmt, linenum, filename ); else { if ( output_file == stdout ) /* Account for the line directive itself. */ ++out_linenum; - sprintf( directive, line_fmt, out_linenum, outfilename ); + sprintf( directive, line_fmt, out_linenum, filename ); } /* If output_file is nil then we should put the directive in -- cgit v1.2.3 From a09b607cfb81e034eeda6aa5c6cf73cccd908840 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 26 Dec 1993 15:04:06 +0000 Subject: Added in_rule, deleted actvsc --- flexdef.h | 12 +++--------- main.c | 11 +++++------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/flexdef.h b/flexdef.h index 30003ec..562af3a 100644 --- a/flexdef.h +++ b/flexdef.h @@ -341,6 +341,7 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * having "reject" set for variable trailing context) * continued_action - true if this rule's action is to "fall through" to * the next rule's action (i.e., the '|' action) + * in_rule - true if we're inside an individual rule, false if not. * yymore_really_used - has a REALLY_xxx value indicating whether a * %used or %notused was used with yymore() * reject_really_used - same for REJECT @@ -350,7 +351,7 @@ extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; extern int interactive, caseins, lex_compat, useecs, fulltbl, usemecs; extern int fullspd, gen_line_dirs, performance_report, backing_up_report; extern int C_plus_plus, long_align, use_read, yytext_is_array, csize; -extern int yymore_used, reject, real_reject, continued_action; +extern int yymore_used, reject, real_reject, continued_action, in_rule; #define REALLY_NOT_DETERMINED 0 #define REALLY_USED 1 @@ -515,16 +516,9 @@ extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; * scxclu - true if start condition is exclusive * sceof - true if start condition has EOF rule * scname - start condition name - * actvsc - stack of active start conditions for the current rule; - * a negative entry means that the start condition is *not* - * active for the current rule. Start conditions may appear - * multiple times on the stack; the entry for it closest - * to the top of the stack (i.e., actvsc[actvp]) is the - * one to use. Others are present from "{" scoping - * constructs. */ -extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; +extern int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof; extern char **scname; diff --git a/main.c b/main.c index 7fa5e6a..3ca92b9 100644 --- a/main.c +++ b/main.c @@ -53,7 +53,7 @@ int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; int interactive, caseins, lex_compat, useecs, fulltbl, usemecs; int fullspd, gen_line_dirs, performance_report, backing_up_report; int C_plus_plus, long_align, use_read, yytext_is_array, csize; -int yymore_used, reject, real_reject, continued_action; +int yymore_used, reject, real_reject, continued_action, in_rule; int yymore_really_used, reject_really_used; int datapos, dataline, linenum, out_linenum; FILE *skelfile = NULL; @@ -74,7 +74,7 @@ int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; int tecbck[CSIZE + 1]; -int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; +int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof; char **scname; int current_max_dfa_size, current_max_xpairs; int current_max_template_xpairs, current_max_dfas; @@ -373,9 +373,9 @@ char **argv; char *arg, *mktemp(); printstats = syntaxerror = trace = spprdflt = caseins = false; - lex_compat = false; - C_plus_plus = backing_up_report = ddebug = fulltbl = fullspd = false; - long_align = nowarn = yymore_used = continued_action = reject = false; + lex_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false; + fullspd = long_align = nowarn = yymore_used = continued_action = false; + in_rule = reject = false; yytext_is_array = yymore_really_used = reject_really_used = false; gen_line_dirs = usemecs = useecs = true; performance_report = 0; @@ -928,7 +928,6 @@ void set_up_initial_allocations() scxclu = allocate_integer_array( current_max_scs ); sceof = allocate_integer_array( current_max_scs ); scname = allocate_char_ptr_array( current_max_scs ); - actvsc = allocate_integer_array( current_max_scs ); current_maxccls = INITIAL_MAX_CCLS; cclmap = allocate_integer_array( current_maxccls ); -- cgit v1.2.3 From 56e26f74d4bebb7c386e3e2ff60f25cb64931e4d Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 26 Dec 1993 15:04:47 +0000 Subject: Working checkpoint prior to adding { stuff --- parse.y | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/parse.y b/parse.y index 9b5e991..596e387 100644 --- a/parse.y +++ b/parse.y @@ -60,7 +60,10 @@ char *alloca (); int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; -int *active_ss; + +int *actvsc, *active_ss, *scon_stk; +int scon_stk_ptr, max_scon_stk; + Char clower(); void build_eof_action(); void yyerror(); @@ -133,10 +136,15 @@ sect1end : SECTEND * are, so create the "activity" map indicating * which conditions are active. */ + actvsc = allocate_integer_array( lastsc + 1 ); active_ss = allocate_integer_array( lastsc + 1 ); for ( i = 1; i <= lastsc; ++i ) active_ss[i] = 0; + + max_scon_stk = lastsc + 1; + scon_stk = allocate_integer_array( max_scon_stk ); + scon_stk_ptr = 0; } ; @@ -158,7 +166,8 @@ namelist1 : namelist1 NAME ; sect2 : sect2 initforrule flexrule '\n' - | sect2 '{' '\n' sect2 '}' '\n' + | sect2 '\n' scons '{' sect2 '}' + { scon_stk_ptr = $2; } | ; @@ -169,6 +178,7 @@ initforrule : trailcnt = headcnt = rulelen = 0; current_state_type = STATE_NORMAL; previous_continued_action = continued_action; + in_rule = true; new_rule(); } ; @@ -265,6 +275,23 @@ flexrule : scon '^' rule { synerr( "unrecognized rule" ); } ; +scons : scon + { + $$ = scon_stk_ptr; + + scon_stk_ptr += actvp; + + while ( scon_stk_ptr >= max_scon_stk ) + { + max_scon_stk *= 2; + scon_stk = reallocate_integer_array( scon_stk, + max_scon_stk ); + } + + for ( i = 1; i <= actvp; ++i ) + scon_stk[$$ + i] = actvsc[i]; + } + scon : '<' namelist2 '>' | '<' '*' '>' -- cgit v1.2.3 From 7ae311ad1df1f4438b115f7f5199609258e6efb9 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 26 Dec 1993 22:22:12 +0000 Subject: Added format_warn --- flexdef.h | 1 + 1 file changed, 1 insertion(+) diff --git a/flexdef.h b/flexdef.h index 562af3a..7b96c6f 100644 --- a/flexdef.h +++ b/flexdef.h @@ -852,6 +852,7 @@ void line_pinpoint PROTO(( char[], int )); /* Report a formatted syntax error. */ extern void format_synerr PROTO((char [], char[])); extern void synerr PROTO((char [])); /* report a syntax error */ +extern void format_warn PROTO((char [], char[])); extern void warn PROTO((char [])); /* report a warning */ extern int yyparse PROTO((void)); /* the YACC parser */ -- cgit v1.2.3 From a7cd1c98a1c6f0246091f192bb634d09b739ac87 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Sun, 26 Dec 1993 22:22:39 +0000 Subject: Added scon_stk stuff, format_warn --- parse.y | 211 +++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 103 insertions(+), 108 deletions(-) diff --git a/parse.y b/parse.y index 596e387..307db76 100644 --- a/parse.y +++ b/parse.y @@ -58,10 +58,10 @@ char *alloca (); #include "flexdef.h" -int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen; +int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen; int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule; -int *actvsc, *active_ss, *scon_stk; +int *scon_stk; int scon_stk_ptr, max_scon_stk; Char clower(); @@ -116,11 +116,6 @@ initlex : /* Create default DFA start condition. */ scinstal( "INITIAL", false ); - - /* Initially, the start condition scoping is - * "no start conditions active". - */ - actvp = 0; } ; @@ -132,18 +127,7 @@ sect1 : sect1 startconddecl namelist1 sect1end : SECTEND { - /* We now know how many start conditions there - * are, so create the "activity" map indicating - * which conditions are active. - */ - actvsc = allocate_integer_array( lastsc + 1 ); - active_ss = allocate_integer_array( lastsc + 1 ); - - for ( i = 1; i <= lastsc; ++i ) - active_ss[i] = 0; - - max_scon_stk = lastsc + 1; - scon_stk = allocate_integer_array( max_scon_stk ); + scon_stk = allocate_integer_array( lastsc + 1 ); scon_stk_ptr = 0; } ; @@ -165,8 +149,9 @@ namelist1 : namelist1 NAME { synerr( "bad start condition list" ); } ; -sect2 : sect2 initforrule flexrule '\n' - | sect2 '\n' scons '{' sect2 '}' +sect2 : sect2 scon initforrule flexrule '\n' + { scon_stk_ptr = $2; } + | sect2 scon '{' sect2 '}' { scon_stk_ptr = $2; } | ; @@ -179,54 +164,36 @@ initforrule : current_state_type = STATE_NORMAL; previous_continued_action = continued_action; in_rule = true; + new_rule(); } ; -flexrule : scon '^' rule +flexrule : '^' rule { - pat = $3; + pat = $2; finish_rule( pat, variable_trail_rule, headcnt, trailcnt ); - for ( i = 1; i <= actvp; ++i ) - scbol[actvsc[i]] = - mkbranch( scbol[actvsc[i]], pat ); - - if ( ! bol_needed ) + if ( scon_stk_ptr > 0 ) { - bol_needed = true; - - if ( performance_report > 1 ) - pinpoint_message( - "'^' operator results in sub-optimal performance" ); + for ( i = 1; i <= scon_stk_ptr; ++i ) + scbol[scon_stk[i]] = + mkbranch( scbol[scon_stk[i]], + pat ); } - } - - | scon rule - { - pat = $2; - finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); - for ( i = 1; i <= actvp; ++i ) - scset[actvsc[i]] = - mkbranch( scset[actvsc[i]], pat ); - } - - | '^' rule - { - pat = $2; - finish_rule( pat, variable_trail_rule, - headcnt, trailcnt ); - - /* Add to all non-exclusive start conditions, - * including the default (0) start condition. - */ + else + { + /* Add to all non-exclusive start conditions, + * including the default (0) start condition. + */ - for ( i = 1; i <= lastsc; ++i ) - if ( ! scxclu[i] ) - scbol[i] = mkbranch( scbol[i], pat ); + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scbol[i] = mkbranch( scbol[i], + pat ); + } if ( ! bol_needed ) { @@ -244,68 +211,82 @@ flexrule : scon '^' rule finish_rule( pat, variable_trail_rule, headcnt, trailcnt ); - for ( i = 1; i <= lastsc; ++i ) - if ( ! scxclu[i] ) - scset[i] = mkbranch( scset[i], pat ); - } + if ( scon_stk_ptr > 0 ) + { + for ( i = 1; i <= scon_stk_ptr; ++i ) + scset[scon_stk[i]] = + mkbranch( scset[scon_stk[i]], + pat ); + } - | scon EOF_OP - { build_eof_action(); } + else + { + for ( i = 1; i <= lastsc; ++i ) + if ( ! scxclu[i] ) + scset[i] = + mkbranch( scset[i], + pat ); + } + } | EOF_OP { - /* This EOF applies to all start conditions - * which don't already have EOF actions. - */ - actvp = 0; - - for ( i = 1; i <= lastsc; ++i ) - if ( ! sceof[i] ) - actvsc[++actvp] = i; + if ( scon_stk_ptr > 0 ) + build_eof_action(); + + else + { + /* This EOF applies to all start conditions + * which don't already have EOF actions. + */ + for ( i = 1; i <= lastsc; ++i ) + if ( ! sceof[i] ) + scon_stk[++scon_stk_ptr] = i; - if ( actvp == 0 ) - warn( + if ( scon_stk_ptr == 0 ) + warn( "all start conditions already have <> rules" ); - else - build_eof_action(); + else + build_eof_action(); + } } | error { synerr( "unrecognized rule" ); } ; -scons : scon - { - $$ = scon_stk_ptr; - - scon_stk_ptr += actvp; - - while ( scon_stk_ptr >= max_scon_stk ) - { - max_scon_stk *= 2; - scon_stk = reallocate_integer_array( scon_stk, - max_scon_stk ); - } - - for ( i = 1; i <= actvp; ++i ) - scon_stk[$$ + i] = actvsc[i]; - } +scon_stk_ptr : + { $$ = scon_stk_ptr; } + ; -scon : '<' namelist2 '>' +scon : '<' scon_stk_ptr namelist2 '>' + { $$ = $2; } | '<' '*' '>' { - actvp = 0; + $$ = scon_stk_ptr; for ( i = 1; i <= lastsc; ++i ) - actvsc[++actvp] = i; + { + int j; + + for ( j = 1; j <= scon_stk_ptr; ++j ) + if ( scon_stk[j] == i ) + break; + + if ( j > scon_stk_ptr ) + scon_stk[++scon_stk_ptr] = i; + } } + + | + { $$ = scon_stk_ptr; } ; namelist2 : namelist2 ',' sconname - | { actvp = 0; } sconname + | sconname | error { synerr( "bad start condition list" ); } @@ -319,15 +300,17 @@ sconname : NAME nmstr ); else { - if ( ++actvp >= current_max_scs ) - /* Some bozo has included multiple - * instances of start condition names. - */ - pinpoint_message( - "too many start conditions in <> construct!" ); + for ( i = 1; i <= scon_stk_ptr; ++i ) + if ( scon_stk[i] == scnum ) + { + format_warn( + "<%s> specified twice", + scname[scnum] ); + break; + } - else - actvsc[actvp] = scnum; + if ( i > scon_stk_ptr ) + scon_stk[++scon_stk_ptr] = scnum; } } ; @@ -728,18 +711,18 @@ void build_eof_action() register int i; char action_text[MAXLINE]; - for ( i = 1; i <= actvp; ++i ) + for ( i = 1; i <= scon_stk_ptr; ++i ) { - if ( sceof[actvsc[i]] ) + if ( sceof[scon_stk[i]] ) format_pinpoint_message( "multiple <> rules for start condition %s", - scname[actvsc[i]] ); + scname[scon_stk[i]] ); else { - sceof[actvsc[i]] = true; + sceof[scon_stk[i]] = true; sprintf( action_text, "case YY_STATE_EOF(%s):\n", - scname[actvsc[i]] ); + scname[scon_stk[i]] ); add_action( action_text ); } } @@ -778,6 +761,18 @@ char str[]; } +/* format_warn - write out formatted warning */ + +void format_warn( msg, arg ) +char msg[], arg[]; + { + char warn_msg[MAXLINE]; + + (void) sprintf( warn_msg, msg, arg ); + warn( warn_msg ); + } + + /* warn - report a warning, unless -w was given */ void warn( str ) -- cgit v1.2.3 From c3bd15490782c0b9bc5ece938f7657e22bd38f37 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 10:20:29 +0000 Subject: indented rules --- scan.l | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/scan.l b/scan.l index 9625ec3..ee8d866 100644 --- a/scan.l +++ b/scan.l @@ -239,21 +239,17 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ -^({WS}|"%{") { - indented_code = (yytext[0] != '%'); +^{OPTWS}"%{" { + indented_code = false; doing_codeblock = true; bracelevel = 1; - - if ( indented_code ) - ACTION_ECHO; - BEGIN(CODEBLOCK_2); } -^"<" BEGIN(SC); return '<'; -^"^" return '^'; +^{OPTWS}"<" BEGIN(SC); return '<'; +^{OPTWS}"^" return '^'; \" BEGIN(QUOTE); return '"'; -"{"/[0-9] BEGIN(NUM); return '{'; +"{"/[0-9] BEGIN(NUM); return '{'; "$"/([ \t]|{NL}) return '$'; {WS}"%{" { @@ -269,6 +265,15 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; +^{WS}"/*" { + yyless( yyleng - 2 ); /* put back '/', '*' */ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + } + +^{WS} /* allow indented rules */ + {WS} { /* This rule is separate from the one below because * otherwise we get variable trailing context, so @@ -300,6 +305,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } +^{OPTWS}"<>" | "<>" return EOF_OP; ^"%%".* { -- cgit v1.2.3 From 0f34dc3933484a44c49f00b1ba2b64377f293323 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 10:36:59 +0000 Subject: Modified to use scon scopes --- scan.l | 232 +++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 134 insertions(+), 98 deletions(-) diff --git a/scan.l b/scan.l index ee8d866..786b938 100644 --- a/scan.l +++ b/scan.l @@ -88,20 +88,21 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) Char nmdef[MAXLINE], myesc(); -^{WS} indented_code = true; BEGIN(CODEBLOCK); -^"/*" ACTION_ECHO; yy_push_state( COMMENT ); -^"%s"{NAME}? return SCDECL; -^"%x"{NAME}? return XSCDECL; -^"%{".*{NL} { +{ + ^{WS} indented_code = true; BEGIN(CODEBLOCK); + ^"/*" ACTION_ECHO; yy_push_state( COMMENT ); + ^"%s"{NAME}? return SCDECL; + ^"%x"{NAME}? return XSCDECL; + ^"%{".*{NL} { ++linenum; line_directive_out( (FILE *) 0, 1 ); indented_code = false; BEGIN(CODEBLOCK); } -{WS} /* discard */ + {WS} /* discard */ -^"%%".* { + ^"%%".* { sectnum = 2; bracelevel = 0; mark_defs1(); @@ -110,14 +111,14 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) return SECTEND; } -^"%pointer".*{NL} { + ^"%pointer".*{NL} { if ( lex_compat ) warn( "%pointer incompatible with -l option" ); else yytext_is_array = false; ++linenum; } -^"%array".*{NL} { + ^"%array".*{NL} { if ( C_plus_plus ) warn( "%array incompatible with -+ option" ); else @@ -125,50 +126,57 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ++linenum; } -^"%used" { + ^"%used" { warn( "%used/%unused have been deprecated" ); checking_used = REALLY_USED; BEGIN(USED_LIST); } -^"%unused" { + ^"%unused" { warn( "%used/%unused have been deprecated" ); checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); } -^"%"[aceknopr]{OPTWS}[0-9]*{OPTWS}{NL} ++linenum; /* ignore */ + ^"%"[aceknopr]{OPTWS}[0-9]*{OPTWS}{NL} ++linenum; /* ignore */ -^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); + ^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); -^{NAME} { + ^{NAME} { strcpy( nmstr, yytext ); didadef = false; BEGIN(PICKUPDEF); } -{SCNAME} RETURNNAME; -^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ -{OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ + {SCNAME} RETURNNAME; + ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ + {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ +} -"*/" ACTION_ECHO; yy_pop_state(); -"*" ACTION_ECHO; -[^*\n]+ ACTION_ECHO; -[^*\n]*{NL} ++linenum; ACTION_ECHO; +{ + "*/" ACTION_ECHO; yy_pop_state(); + "*" ACTION_ECHO; + [^*\n]+ ACTION_ECHO; + [^*\n]*{NL} ++linenum; ACTION_ECHO; +} +{ + ^"%}".*{NL} ++linenum; BEGIN(INITIAL); -^"%}".*{NL} ++linenum; BEGIN(INITIAL); -{NAME}|{NOT_NAME}|. ACTION_ECHO; -{NL} { + {NAME}|{NOT_NAME}|. ACTION_ECHO; + + {NL} { ++linenum; ACTION_ECHO; if ( indented_code ) BEGIN(INITIAL); } +} -{WS} /* separates name and definition */ +{ + {WS} /* separates name and definition */ -{NOT_WS}.* { + {NOT_WS}.* { strcpy( (char *) nmdef, yytext ); /* Skip trailing whitespace. */ @@ -183,12 +191,13 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) didadef = true; } -{NL} { + {NL} { if ( ! didadef ) synerr( "incomplete name definition" ); BEGIN(INITIAL); ++linenum; } +} .*{NL} ++linenum; BEGIN(INITIAL); RETURNNAME; @@ -212,12 +221,13 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) {NOT_WS}+ synerr( "unrecognized %used/%unused construct" ); -^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ -^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ +{ + ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ + ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ -^{WS}.* ACTION_ECHO; /* indented code in prolog */ + ^{WS}.* ACTION_ECHO; /* indented code in prolog */ -^{NOT_WS}.* { /* non-indented code */ + ^{NOT_WS}.* { /* non-indented code */ if ( bracelevel <= 0 ) { /* not in %{ ... %} */ yyless( 0 ); /* put it all back */ @@ -228,31 +238,33 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) ACTION_ECHO; } -.* ACTION_ECHO; -{NL} ++linenum; ACTION_ECHO; + .* ACTION_ECHO; + {NL} ++linenum; ACTION_ECHO; -<> { + <> { mark_prolog(); sectnum = 0; yyterminate(); /* to stop the parser */ } +} -^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ +{ + ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ -^{OPTWS}"%{" { + ^{OPTWS}"%{" { indented_code = false; doing_codeblock = true; bracelevel = 1; BEGIN(CODEBLOCK_2); } -^{OPTWS}"<" BEGIN(SC); return '<'; -^{OPTWS}"^" return '^'; -\" BEGIN(QUOTE); return '"'; -"{"/[0-9] BEGIN(NUM); return '{'; -"$"/([ \t]|{NL}) return '$'; + ^{OPTWS}"<" BEGIN(SC); return '<'; + ^{OPTWS}"^" return '^'; + \" BEGIN(QUOTE); return '"'; + "{"/[0-9] BEGIN(NUM); return '{'; + "$"/([ \t]|{NL}) return '$'; -{WS}"%{" { + {WS}"%{" { bracelevel = 1; BEGIN(PERCENT_BRACE_ACTION); @@ -263,18 +275,18 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) return '\n'; } } -{WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; + {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; -^{WS}"/*" { + ^{WS}"/*" { yyless( yyleng - 2 ); /* put back '/', '*' */ bracelevel = 0; continued_action = false; BEGIN(ACTION); } -^{WS} /* allow indented rules */ + ^{WS} /* allow indented rules */ -{WS} { + {WS} { /* This rule is separate from the one below because * otherwise we get variable trailing context, so * we can't build the scanner using -{f,F}. @@ -291,7 +303,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } -{OPTWS}{NL} { + {OPTWS}{NL} { bracelevel = 0; continued_action = false; BEGIN(ACTION); @@ -305,16 +317,16 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } -^{OPTWS}"<>" | -"<>" return EOF_OP; + ^{OPTWS}"<>" | + "<>" return EOF_OP; -^"%%".* { + ^"%%".* { sectnum = 3; BEGIN(SECT3); yyterminate(); /* to stop the parser */ } -"["{FIRST_CCL_CHAR}{CCL_CHAR}* { + "["{FIRST_CCL_CHAR}{CCL_CHAR}* { int cclval; strcpy( nmstr, yytext ); @@ -348,7 +360,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } -"{"{NAME}"}" { + "{"{NAME}"}" { register Char *nmdefptr; Char *ndlookup(); @@ -381,84 +393,101 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } -[/|*+?.(){}] return (unsigned char) yytext[0]; -. RETURNCHAR; + [/|*+?.(){}] return (unsigned char) yytext[0]; + . RETURNCHAR; +} -[,*] return (unsigned char) yytext[0]; -">" BEGIN(SECT2); return '>'; -">"/^ BEGIN(CARETISBOL); return '>'; -{SCNAME} RETURNNAME; -. { - format_synerr( "bad : %s", yytext ); - } +{ + [,*] return (unsigned char) yytext[0]; + ">" BEGIN(SECT2); return '>'; + ">"/^ BEGIN(CARETISBOL); return '>'; + {SCNAME} RETURNNAME; + . format_synerr( "bad : %s", yytext ); +} "^" BEGIN(SECT2); return '^'; -[^"\n] RETURNCHAR; -\" BEGIN(SECT2); return '"'; +{ + [^"\n] RETURNCHAR; + \" BEGIN(SECT2); return '"'; -{NL} { + {NL} { synerr( "missing quote" ); BEGIN(SECT2); ++linenum; return '"'; } +} -"^"/[^-\]\n] BEGIN(CCL); return '^'; -"^"/("-"|"]") return '^'; -. BEGIN(CCL); RETURNCHAR; +{ + "^"/[^-\]\n] BEGIN(CCL); return '^'; + "^"/("-"|"]") return '^'; + . BEGIN(CCL); RETURNCHAR; +} --/[^\]\n] return '-'; -[^\]\n] RETURNCHAR; -"]" BEGIN(SECT2); return ']'; -.|{NL} { +{ + -/[^\]\n] return '-'; + [^\]\n] RETURNCHAR; + "]" BEGIN(SECT2); return ']'; + .|{NL} { synerr( "bad character class" ); BEGIN(SECT2); return ']'; } +} -[0-9]+ { +{ + [0-9]+ { yylval = myctoi( yytext ); return NUMBER; } -"," return ','; -"}" BEGIN(SECT2); return '}'; + "," return ','; + "}" BEGIN(SECT2); return '}'; -. { + . { synerr( "bad character inside {}'s" ); BEGIN(SECT2); return '}'; } -{NL} { + {NL} { synerr( "missing }" ); BEGIN(SECT2); ++linenum; return '}'; } +} -"}" synerr( "bad name in {}'s" ); BEGIN(SECT2); -{NL} synerr( "missing }" ); ++linenum; BEGIN(SECT2); +{ + "}" synerr( "bad name in {}'s" ); BEGIN(SECT2); + {NL} synerr( "missing }" ); ++linenum; BEGIN(SECT2); +} "/*" ACTION_ECHO; yy_push_state( COMMENT ); -{OPTWS}"%}".* bracelevel = 0; -"reject" { + +{ + {OPTWS}"%}".* bracelevel = 0; + + { + "reject" { ACTION_ECHO; CHECK_REJECT(yytext); } -"yymore" { + "yymore" { ACTION_ECHO; CHECK_YYMORE(yytext); } -{NAME}|{NOT_NAME}|. ACTION_ECHO; -{NL} { + } + + {NAME}|{NOT_NAME}|. ACTION_ECHO; + {NL} { ++linenum; ACTION_ECHO; if ( bracelevel == 0 || @@ -471,16 +500,18 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) BEGIN(SECT2); } } +} /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ -"{" ACTION_ECHO; ++bracelevel; -"}" ACTION_ECHO; --bracelevel; -[^a-z_{}"'/\n]+ ACTION_ECHO; -{NAME} ACTION_ECHO; -"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ -\" ACTION_ECHO; BEGIN(ACTION_STRING); -{NL} { +{ + "{" ACTION_ECHO; ++bracelevel; + "}" ACTION_ECHO; --bracelevel; + [^a-z_{}"'/\n]+ ACTION_ECHO; + {NAME} ACTION_ECHO; + "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ + \" ACTION_ECHO; BEGIN(ACTION_STRING); + {NL} { ++linenum; ACTION_ECHO; if ( bracelevel == 0 ) @@ -492,13 +523,16 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) BEGIN(SECT2); } } -. ACTION_ECHO; + . ACTION_ECHO; +} -[^"\\\n]+ ACTION_ECHO; -\\. ACTION_ECHO; -{NL} ++linenum; ACTION_ECHO; -\" ACTION_ECHO; BEGIN(ACTION); -. ACTION_ECHO; +{ + [^"\\\n]+ ACTION_ECHO; + \\. ACTION_ECHO; + {NL} ++linenum; ACTION_ECHO; + \" ACTION_ECHO; BEGIN(ACTION); + . ACTION_ECHO; +} <> { synerr( "EOF encountered inside an action" ); @@ -518,8 +552,10 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } -.*(\n?) ECHO; -<> sectnum = 0; yyterminate(); +{ + .*(\n?) ECHO; + <> sectnum = 0; yyterminate(); +} <*>.|\n format_synerr( "bad character: %s", yytext ); -- cgit v1.2.3 From 51b1e2e723ff419a20bc0e5e43659124fbfd8e44 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 10:59:54 +0000 Subject: Minor consolidation using scon scopes etc --- scan.l | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/scan.l b/scan.l index 786b938..0dbc29d 100644 --- a/scan.l +++ b/scan.l @@ -60,8 +60,8 @@ %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE -%x FIRSTCCL CCL ACTION RECOVER BRACEERROR COMMENT -%x ACTION_STRING PERCENT_BRACE_ACTION USED_LIST CODEBLOCK_2 +%x FIRSTCCL CCL ACTION RECOVER BRACEERROR COMMENT ACTION_STRING +%x PERCENT_BRACE_ACTION USED_LIST WS [ \t]+ OPTWS [ \t]* @@ -155,7 +155,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) { "*/" ACTION_ECHO; yy_pop_state(); "*" ACTION_ECHO; - [^*\n]+ ACTION_ECHO; + [^*\n]+ ACTION_ECHO; [^*\n]*{NL} ++linenum; ACTION_ECHO; } @@ -255,7 +255,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) indented_code = false; doing_codeblock = true; bracelevel = 1; - BEGIN(CODEBLOCK_2); + BEGIN(PERCENT_BRACE_ACTION); } ^{OPTWS}"<" BEGIN(SC); return '<'; @@ -470,11 +470,11 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } -"/*" ACTION_ECHO; yy_push_state( COMMENT ); - -{ +{ {OPTWS}"%}".* bracelevel = 0; + "/*" ACTION_ECHO; yy_push_state( COMMENT ); + { "reject" { ACTION_ECHO; @@ -540,14 +540,12 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } -{ESCSEQ} { +{ESCSEQ} { yylval = myesc( (Char *) yytext ); - return CHAR; - } -{ESCSEQ} { - yylval = myesc( (Char *) yytext ); - BEGIN(CCL); + if ( YY_START == FIRSTCCL ) + BEGIN(CCL); + return CHAR; } -- cgit v1.2.3 From 29919d726a5b95aed38166da993d5acc71ab9f4c Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 14:40:05 +0000 Subject: Added action_define() --- misc.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/misc.c b/misc.c index 12e4cf1..744cdfb 100644 --- a/misc.c +++ b/misc.c @@ -38,6 +38,24 @@ void dataflush PROTO((void)); int otoi PROTO((Char [])); +void action_define( defname, value ) +char *defname; +int value; + { + char buf[MAXLINE]; + + if ( strlen( defname ) > MAXLINE / 2 ) + { + format_pinpoint_message( "name \"%s\" ridiculously long", + defname ); + return; + } + + sprintf( buf, "#define %s %d\n", defname, value ); + add_action( buf ); + } + + void add_action( new_text ) char *new_text; { @@ -149,7 +167,7 @@ int c; readable_form( c ) ); if ( c >= csize ) - lerrsf( "scanner requires -8 flag to use the character '%s'", + lerrsf( "scanner requires -8 flag to use the character %s", readable_form( c ) ); } -- cgit v1.2.3 From 27ba613472b16677f6424439634094767c11c7a4 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 14:40:19 +0000 Subject: start condition #define's go to action file --- sym.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sym.c b/sym.c index a404f2a..bea9263 100644 --- a/sym.c +++ b/sym.c @@ -230,7 +230,7 @@ int xcluflg; char *copy_string(); /* Generate start condition definition, for use in BEGIN et al. */ - out_str_dec( "#define %s %d\n", str, lastsc ); + action_define( str, lastsc ); if ( ++lastsc >= current_max_scs ) scextend(); -- cgit v1.2.3 From b9b510025ffbf3609c76d9c3650963033859fa1a Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 14:40:42 +0000 Subject: Added "unspecified", globals for %option --- flexdef.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/flexdef.h b/flexdef.h index 7b96c6f..15525cd 100644 --- a/flexdef.h +++ b/flexdef.h @@ -90,6 +90,7 @@ #define true 1 #define false 0 +#define unspecified -1 /* Special chk[] values marking the slots taking by end-of-buffer and action @@ -342,8 +343,8 @@ extern struct hash_entry *ccltab[CCL_HASH_SIZE]; * continued_action - true if this rule's action is to "fall through" to * the next rule's action (i.e., the '|' action) * in_rule - true if we're inside an individual rule, false if not. - * yymore_really_used - has a REALLY_xxx value indicating whether a - * %used or %notused was used with yymore() + * yymore_really_used - whether to treat yymore() as really used, regardless + * of what we think based on references to it in the user's actions. * reject_really_used - same for REJECT */ @@ -353,9 +354,6 @@ extern int fullspd, gen_line_dirs, performance_report, backing_up_report; extern int C_plus_plus, long_align, use_read, yytext_is_array, csize; extern int yymore_used, reject, real_reject, continued_action, in_rule; -#define REALLY_NOT_DETERMINED 0 -#define REALLY_USED 1 -#define REALLY_NOT_USED 2 extern int yymore_really_used, reject_really_used; @@ -372,6 +370,9 @@ extern int yymore_really_used, reject_really_used; * backing_up_file - file to summarize backing-up states to * infilename - name of input file * outfilename - name of output file + * did_outfilename - whether outfilename was explicitly set + * prefix - the prefix used for externally visible names ("yy" by default) + * use_stdout - the -t flag * input_files - array holding names of input files * num_input_files - size of input_files array * program_name - name with which program was invoked @@ -391,6 +392,9 @@ extern FILE *skelfile, *yyin, *backing_up_file; extern char *skel[]; extern int skel_ind; extern char *infilename, *outfilename; +extern int did_outfilename; +extern char *prefix; +extern int use_stdout; extern char **input_files; extern int num_input_files; extern char *program_name; @@ -702,12 +706,16 @@ extern void make_tables PROTO((void)); /* generate transition tables */ /* from file main.c */ +extern void check_options PROTO((void)); extern void flexend PROTO((int)); extern void usage PROTO((void)); /* from file misc.c */ +/* Add a #define to the action file. */ +extern void action_define PROTO(( char *defname, int value )); + /* Add the given text to the stored actions. */ extern void add_action PROTO(( char *new_text )); -- cgit v1.2.3 From 9322563cf65f46d0a08b90fe2e518d9d433212d6 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 14:41:38 +0000 Subject: Reworked for %option --- main.c | 365 +++++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 188 insertions(+), 177 deletions(-) diff --git a/main.c b/main.c index 3ca92b9..da9626c 100644 --- a/main.c +++ b/main.c @@ -61,6 +61,9 @@ int skel_ind = 0; char *action_array; int action_size, defs1_offset, prolog_offset, action_offset, action_index; char *infilename = NULL, *outfilename = NULL; +int did_outfilename; +char *prefix; +int use_stdout; int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; int current_mns, num_rules, num_eof_rules, default_rule; @@ -103,10 +106,7 @@ static char *outfile_template = "lex%s.%s"; #endif static int outfile_created = 0; -static int did_outfilename = 0; -static int use_stdout; static char *skelname = NULL; -static char *prefix = "yy"; int main( argc, argv ) @@ -142,6 +142,148 @@ char **argv; } +/* check_options - check user-specified options */ + +void check_options() + { + int i; + + if ( lex_compat ) + { + if ( C_plus_plus ) + flexerror( "Can't use -+ with -l option" ); + + if ( fulltbl || fullspd ) + flexerror( "Can't use -f or -F with -l option" ); + + /* Don't rely on detecting use of yymore() and REJECT, + * just assume they'll be used. + */ + yymore_really_used = reject_really_used = true; + + yytext_is_array = true; + use_read = false; + } + + if ( csize == unspecified ) + { + if ( (fulltbl || fullspd) && ! useecs ) + csize = DEFAULT_CSIZE; + else + csize = CSIZE; + } + + if ( interactive == unspecified ) + { + if ( fulltbl || fullspd ) + interactive = false; + else + interactive = true; + } + + if ( (fulltbl || fullspd) && usemecs ) + flexerror( "-Cf/-CF and -Cm don't make sense together" ); + + if ( (fulltbl || fullspd) && interactive ) + flexerror( "-Cf/-CF and -I are incompatible" ); + + if ( fulltbl && fullspd ) + flexerror( "-Cf and -CF are mutually exclusive" ); + + if ( C_plus_plus && fullspd ) + flexerror( "Can't use -+ with -CF option" ); + + if ( C_plus_plus && yytext_is_array ) + { + warn( "%array incompatible with -+ option" ); + yytext_is_array = false; + } + + if ( useecs ) + { /* Set up doubly-linked equivalence classes. */ + + /* We loop all the way up to csize, since ecgroup[csize] is + * the position used for NUL characters. + */ + ecgroup[1] = NIL; + + for ( i = 2; i <= csize; ++i ) + { + ecgroup[i] = i - 1; + nextecm[i - 1] = i; + } + + nextecm[csize] = NIL; + } + + else + { + /* Put everything in its own equivalence class. */ + for ( i = 1; i <= csize; ++i ) + { + ecgroup[i] = i; + nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ + } + } + + if ( ! use_stdout ) + { + FILE *prev_stdout; + + if ( ! did_outfilename ) + { + static char outfile_path[64]; + char *suffix; + + if ( C_plus_plus ) + suffix = "cc"; + else + suffix = "c"; + + sprintf( outfile_path, outfile_template, + prefix, suffix ); + + outfilename = outfile_path; + } + + prev_stdout = freopen( outfilename, "w", stdout ); + + if ( prev_stdout == NULL ) + lerrsf( "could not create %s", outfilename ); + + outfile_created = 1; + } + + if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) + lerrsf( "can't open skeleton file %s", skelname ); + + if ( strcmp( prefix, "yy" ) ) + { +#define GEN_PREFIX(name) out_str3( "#define yy%s %s%s\n", name, prefix, name ); + GEN_PREFIX( "FlexLexer" ); + GEN_PREFIX( "_create_buffer" ); + GEN_PREFIX( "_delete_buffer" ); + GEN_PREFIX( "_flex_debug" ); + GEN_PREFIX( "_init_buffer" ); + GEN_PREFIX( "_load_buffer_state" ); + GEN_PREFIX( "_switch_to_buffer" ); + GEN_PREFIX( "in" ); + GEN_PREFIX( "leng" ); + GEN_PREFIX( "lex" ); + GEN_PREFIX( "out" ); + GEN_PREFIX( "restart" ); + GEN_PREFIX( "text" ); + GEN_PREFIX( "wrap" ); + outn( "" ); + } + + if ( did_outfilename ) + line_directive_out( stdout, 0 ); + + skelout(); + } + + /* flexend - terminate flex * * note @@ -226,14 +368,22 @@ int exit_status; putc( 'v', stderr ); /* always true! */ if ( nowarn ) putc( 'w', stderr ); - if ( ! interactive ) + if ( interactive == false ) putc( 'B', stderr ); - if ( interactive ) + if ( interactive == true ) putc( 'I', stderr ); if ( ! gen_line_dirs ) putc( 'L', stderr ); if ( trace ) putc( 'T', stderr ); + + if ( csize == unspecified ) + /* We encountered an error fairly early on, so csize + * never got specified. Define it now, to prevent + * bogus table sizes being written out below. + */ + csize = 256; + if ( csize == 128 ) putc( '7', stderr ); else @@ -369,21 +519,21 @@ int argc; char **argv; { int i, sawcmpflag; - int csize_given, interactive_given; char *arg, *mktemp(); printstats = syntaxerror = trace = spprdflt = caseins = false; lex_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false; fullspd = long_align = nowarn = yymore_used = continued_action = false; - in_rule = reject = false; - yytext_is_array = yymore_really_used = reject_really_used = false; + yytext_is_array = in_rule = reject = false; + yymore_really_used = reject_really_used = unspecified; + interactive = csize = unspecified; gen_line_dirs = usemecs = useecs = true; performance_report = 0; + did_outfilename = 0; + prefix = "yy"; + use_read = use_stdout = false; sawcmpflag = false; - use_read = use_stdout = false; - csize_given = false; - interactive_given = false; /* Initialize dynamic array for holding the rule actions. */ action_size = 2048; /* default size of action array in bytes */ @@ -414,7 +564,6 @@ char **argv; case 'B': interactive = false; - interactive_given = true; break; case 'b': @@ -422,11 +571,7 @@ char **argv; break; case 'c': - fprintf( stderr, - "%s: Assuming use of deprecated -c flag is really intended to be -C\n", - program_name ); - - /* fall through */ + break; case 'C': if ( i != 1 ) @@ -498,7 +643,6 @@ char **argv; case 'I': interactive = true; - interactive_given = true; break; case 'i': @@ -575,12 +719,10 @@ char **argv; case '7': csize = 128; - csize_given = true; break; case '8': csize = CSIZE; - csize_given = true; break; default: @@ -597,127 +739,11 @@ char **argv; get_next_arg: ; } - if ( ! csize_given ) - { - if ( (fulltbl || fullspd) && ! useecs ) - csize = DEFAULT_CSIZE; - else - csize = CSIZE; - } - - if ( ! interactive_given ) - { - if ( fulltbl || fullspd ) - interactive = false; - else - interactive = true; - } - - if ( lex_compat ) - { - if ( C_plus_plus ) - flexerror( "Can't use -+ with -l option" ); - - if ( fulltbl || fullspd ) - flexerror( "Can't use -f or -F with -l option" ); - - /* Don't rely on detecting use of yymore() and REJECT, - * just assume they'll be used. - */ - yymore_really_used = reject_really_used = true; - - yytext_is_array = true; - use_read = false; - } - - if ( (fulltbl || fullspd) && usemecs ) - flexerror( "-Cf/-CF and -Cm don't make sense together" ); - - if ( (fulltbl || fullspd) && interactive ) - flexerror( "-Cf/-CF and -I are incompatible" ); - - if ( fulltbl && fullspd ) - flexerror( "-Cf and -CF are mutually exclusive" ); - - if ( C_plus_plus && fullspd ) - flexerror( "Can't use -+ with -CF option" ); - - if ( ! use_stdout ) - { - FILE *prev_stdout; - - if ( ! did_outfilename ) - { - static char outfile_path[64]; - char *suffix; - - if ( C_plus_plus ) - suffix = "cc"; - else - suffix = "c"; - - sprintf( outfile_path, outfile_template, - prefix, suffix ); - - outfilename = outfile_path; - } - - prev_stdout = freopen( outfilename, "w", stdout ); - - if ( prev_stdout == NULL ) - lerrsf( "could not create %s", outfilename ); - - outfile_created = 1; - } - num_input_files = argc; input_files = argv; set_input_file( num_input_files > 0 ? input_files[0] : NULL ); - if ( backing_up_report ) - { -#ifndef SHORT_FILE_NAMES - backing_up_file = fopen( "lex.backup", "w" ); -#else - backing_up_file = fopen( "lex.bck", "w" ); -#endif - - if ( backing_up_file == NULL ) - flexerror( "could not create lex.backup" ); - } - - else - backing_up_file = NULL; - - - lastccl = 0; - lastsc = 0; - - if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) - lerrsf( "can't open skeleton file %s", skelname ); - - if ( strcmp( prefix, "yy" ) ) - { -#define GEN_PREFIX(name) out_str3( "#define yy%s %s%s\n", name, prefix, name ); - GEN_PREFIX( "FlexLexer" ); - GEN_PREFIX( "_create_buffer" ); - GEN_PREFIX( "_delete_buffer" ); - GEN_PREFIX( "_flex_debug" ); - GEN_PREFIX( "_init_buffer" ); - GEN_PREFIX( "_load_buffer_state" ); - GEN_PREFIX( "_switch_to_buffer" ); - GEN_PREFIX( "in" ); - GEN_PREFIX( "leng" ); - GEN_PREFIX( "lex" ); - GEN_PREFIX( "out" ); - GEN_PREFIX( "restart" ); - GEN_PREFIX( "text" ); - GEN_PREFIX( "wrap" ); - outn( "" ); - } - - - lastdfa = lastnfa = 0; + lastccl = lastsc = lastdfa = lastnfa = 0; num_rules = num_eof_rules = default_rule = 0; numas = numsnpairs = tmpuses = 0; numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; @@ -733,34 +759,6 @@ char **argv; */ lastprot = 1; - if ( useecs ) - { - /* Set up doubly-linked equivalence classes. */ - - /* We loop all the way up to csize, since ecgroup[csize] is - * the position used for NUL characters. - */ - ecgroup[1] = NIL; - - for ( i = 2; i <= csize; ++i ) - { - ecgroup[i] = i - 1; - nextecm[i - 1] = i; - } - - nextecm[csize] = NIL; - } - - else - { - /* Put everything in its own equivalence class. */ - for ( i = 1; i <= csize; ++i ) - { - ecgroup[i] = i; - nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ - } - } - set_up_initial_allocations(); } @@ -769,11 +767,6 @@ char **argv; void readin() { - if ( did_outfilename ) - line_directive_out( stdout, 0 ); - - skelout(); - line_directive_out( (FILE *) 0, 1 ); if ( yyparse() ) @@ -785,14 +778,29 @@ void readin() if ( syntaxerror ) flexend( 1 ); - if ( yymore_really_used == REALLY_USED ) + if ( backing_up_report ) + { +#ifndef SHORT_FILE_NAMES + backing_up_file = fopen( "lex.backup", "w" ); +#else + backing_up_file = fopen( "lex.bck", "w" ); +#endif + + if ( backing_up_file == NULL ) + flexerror( "could not create lex.backup" ); + } + + else + backing_up_file = NULL; + + if ( yymore_really_used == true ) yymore_used = true; - else if ( yymore_really_used == REALLY_NOT_USED ) + else if ( yymore_really_used == false ) yymore_used = false; - if ( reject_really_used == REALLY_USED ) + if ( reject_really_used == true ) reject = true; - else if ( reject_really_used == REALLY_NOT_USED ) + else if ( reject_really_used == false ) reject = false; if ( performance_report > 0 ) @@ -840,6 +848,12 @@ void readin() "variable trailing context rules cannot be used with -f or -F" ); } + if ( reject ) + outn( "\n#define YY_USES_REJECT" ); + + if ( ddebug ) + outn( "\n#define FLEX_DEBUG" ); + if ( csize == 256 ) outn( "typedef unsigned char YY_CHAR;" ); else @@ -858,9 +872,6 @@ void readin() else if ( ! C_plus_plus ) outn( "typedef int yy_state_type;" ); - if ( reject ) - outn( "\n#define YY_USES_REJECT" ); - if ( ddebug ) outn( "\n#define FLEX_DEBUG" ); -- cgit v1.2.3 From 2f595d6d62d8b94f37c06f478578161204497491 Mon Sep 17 00:00:00 2001 From: Vern Paxson Date: Mon, 27 Dec 1993 14:41:51 +0000 Subject: %option --- parse.y | 20 ++++++++++++ scan.l | 113 ++++++++++++++++++++++++++++++++++++---------------------------- 2 files changed, 84 insertions(+), 49 deletions(-) diff --git a/parse.y b/parse.y index 307db76..9d447b0 100644 --- a/parse.y +++ b/parse.y @@ -1,6 +1,7 @@ /* parse.y - parser for flex input */ %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP +%token OPTION_OP OPT_OUTFILE OPT_PREFIX %{ /*- @@ -65,6 +66,7 @@ int *scon_stk; int scon_stk_ptr, max_scon_stk; Char clower(); +char *copy_string(); void build_eof_action(); void yyerror(); @@ -120,6 +122,7 @@ initlex : ; sect1 : sect1 startconddecl namelist1 + | sect1 options | | error { synerr( "unknown error processing section 1" ); } @@ -127,6 +130,7 @@ sect1 : sect1 startconddecl namelist1 sect1end : SECTEND { + check_options(); scon_stk = allocate_integer_array( lastsc + 1 ); scon_stk_ptr = 0; } @@ -149,6 +153,22 @@ namelist1 : namelist1 NAME { synerr( "bad start condition list" ); } ; +options : OPTION_OP optionlist + ; + +optionlist : optionlist option + | + ; + +option : OPT_OUTFILE '=' NAME + { + outfilename = copy_string( nmstr ); + did_outfilename = 1; + } + | OPT_PREFIX '=' NAME + { prefix = copy_string( nmstr ); } + ; + sect2 : sect2 scon initforrule flexrule '\n' { scon_stk_ptr = $2; } | sect2 scon '{' sect2 '}' diff --git a/scan.l b/scan.l index 0dbc29d..58a6b9d 100644 --- a/scan.l +++ b/scan.l @@ -60,8 +60,8 @@ %} %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE -%x FIRSTCCL CCL ACTION RECOVER BRACEERROR COMMENT ACTION_STRING -%x PERCENT_BRACE_ACTION USED_LIST +%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION +%x OPTION WS [ \t]+ OPTWS [ \t]* @@ -82,6 +82,7 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) %% static int bracelevel, didadef, indented_code, checking_used; static int doing_rule_action = false; + static int option_sense; int doing_codeblock = false; int i; @@ -111,34 +112,14 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) return SECTEND; } - ^"%pointer".*{NL} { - if ( lex_compat ) - warn( "%pointer incompatible with -l option" ); - else - yytext_is_array = false; - ++linenum; - } - ^"%array".*{NL} { - if ( C_plus_plus ) - warn( "%array incompatible with -+ option" ); - else - yytext_is_array = true; - ++linenum; - } - - ^"%used" { - warn( "%used/%unused have been deprecated" ); - checking_used = REALLY_USED; BEGIN(USED_LIST); - } - ^"%unused" { - warn( "%used/%unused have been deprecated" ); - checking_used = REALLY_NOT_USED; BEGIN(USED_LIST); - } + ^"%pointer".*{NL} yytext_is_array = false; ++linenum; + ^"%array".*{NL} yytext_is_array = true; ++linenum; + ^"%option" BEGIN(OPTION); return OPTION_OP; ^"%"[aceknopr]{OPTWS}[0-9]*{OPTWS}{NL} ++linenum; /* ignore */ - ^"%"[^sxanpekotcru{}].* synerr( "unrecognized '%' directive" ); + ^"%"[^sxaceknopr{}].* synerr( "unrecognized '%' directive" ); ^{NAME} { strcpy( nmstr, yytext ); @@ -199,26 +180,66 @@ CCL_CHAR ([^\\\n\]]|{ESCSEQ}) } } -.*{NL} ++linenum; BEGIN(INITIAL); RETURNNAME; - -{NL} ++linenum; BEGIN(INITIAL); -{WS} -"reject" { - if ( all_upper( yytext ) ) - reject_really_used = checking_used; - else - synerr( - "unrecognized %used/%unused construct" ); +