diff options
author | Will Estes <wlestes@users.sourceforge.net> | 2001-05-01 20:47:11 +0000 |
---|---|---|
committer | Will Estes <wlestes@users.sourceforge.net> | 2001-05-01 20:47:11 +0000 |
commit | 2eae8800306d74d3507eee2464630a085b211779 (patch) | |
tree | a892110dd6ff826cf3691ce0b38615ed6d1fc061 /to.do | |
parent | 26e78464e71e0d03bf16cda54bcf06a6785e727c (diff) |
adding the rest of vern's files
Diffstat (limited to 'to.do')
-rw-r--r-- | to.do/README | 46 | ||||
-rw-r--r-- | to.do/Wilhelms.todo | 711 | ||||
-rw-r--r-- | to.do/Wish-List | 123 | ||||
-rw-r--r-- | to.do/flex.rmail | 3182 | ||||
-rw-r--r-- | to.do/unicode/FlexLexer.h | 195 | ||||
-rw-r--r-- | to.do/unicode/ccl.c | 149 | ||||
-rw-r--r-- | to.do/unicode/changes.txt | 102 | ||||
-rw-r--r-- | to.do/unicode/ecs.c | 225 | ||||
-rw-r--r-- | to.do/unicode/flex.1 | 4099 | ||||
-rw-r--r-- | to.do/unicode/flex.skl | 1542 | ||||
-rw-r--r-- | to.do/unicode/flexdef.h | 1062 | ||||
-rw-r--r-- | to.do/unicode/gen.c | 1650 | ||||
-rw-r--r-- | to.do/unicode/main.c | 1228 | ||||
-rw-r--r-- | to.do/unicode/misc.c | 894 | ||||
-rw-r--r-- | to.do/unicode/scan.l | 710 | ||||
-rw-r--r-- | to.do/unicode/tblcmp.c | 887 |
16 files changed, 16805 insertions, 0 deletions
diff --git a/to.do/README b/to.do/README new file mode 100644 index 0000000..7123c30 --- /dev/null +++ b/to.do/README @@ -0,0 +1,46 @@ +The contents of this directory are: + + Wilhelms.todo + This guy Gerhard Wilhelms did an exhaustive line-by-line + study of flex, finding a large number of glitches; it's + not clear how significant they are (some have subseuqently + been fixed). + + Wish-List + A long list of cryptic one-line descriptions of various + features people have asked for, or I've thought would be + handy. If you have questions about particular ones, let + me know. + + reentrant + A set of mods contributed by John Bossom for adding + an option to flex to make reentrant scanners. + + rflex-2.5.4-diffs + A set of mods contributed by Chris Appleton for + the same purpose. + + streams + email from David Madden about coping with non-blocking + I/O. + + unicode + patches to support Unicode scanners, contributed + by James A. Lauth. + + +Of these, the ones that people frequently ask about are support for reentrant +scanners and support for Unicode. I haven't played with the reentrant stuff +so don't know how solid / well designed it is. I've sent out the Unicode +stuff to numerous people and haven't received any complaints, so I imagine +it works well. + +Another thing people ask for fairly often is removing the limit on size +of the scanners. There's some mail in the faqs/ directory about ways to +do this by cranking some #define's in flexdef.h, though of course the best +solution would be dynamic memory/resizing. + +By the way, I have translated the core flex algorithms into a C++ regular +expression class (but one that doesn't support the uglier stuff like +yymore()/yyless(), trailing context, etc.). If you ever wind up wanting +it, just let me know. diff --git a/to.do/Wilhelms.todo b/to.do/Wilhelms.todo new file mode 100644 index 0000000..f64a335 --- /dev/null +++ b/to.do/Wilhelms.todo @@ -0,0 +1,711 @@ +PARSE.Y 344: ('rule'-rule " | re '$' ": There are some errors concerning + trailing context. First of all the rule " re '$' " implies that this is + no variable_trail_rule because the tail of it ( '$' ) has a fixed length + of 1. The only possible reason for making this rule variable is when + 'previous_continued_action' is true. In this case 'variable_trail_rule' + must be set and the beginning of the trailing part must be marked. + However the variables 'varlength' and 'headcnt' have not the same meaning + as in the rule " re2 re ". Here ( in the rule " re '$' " ) 'varlength' + is true if the head ( 're' ) of the rule has variable length, and + 'headcnt' is still 0 because it isn't set during reduction of 're'. + Therefore the test for a variable trailing rule + " if ( ! varlength || headcnt != 0 ) " + is wrong and should be removed. + Also it is not necessary to set 'varlength' or 'headcnt' if you set + " trailcnt = 1; ". If this rule is made variable then 'variable_trail_rule' + is set and neither 'headcnt' nor 'trailcnt' are used in 'finish_rule()'. + And if this rule is normal then the head may be variable or not, but in + 'finish_rule()' code is generated to reduce 'yy_cp' by 1. + Finally I found no reason to create an epsilon-state and insert it in + front of mkstate( '\n' ) instead of adding it behind. This epsilon-state + should be marked as STATE_TRAILING_CONTEXT. Otherwise you get no warning + of dangerous trailing context if you have a rule " x\n*$ " which was made + variable with '|'.) + + | re '$' + { + /* if ( trlcontxt ) + { + synerr( "trailing context used twice" ); + $$ = mkstate( SYM_EPSILON ); + } + + else */ if ( previous_continued_action ) + { + /* see the comment in the rule for "re2 re" + * above + */ + /* if ( ! varlength || headcnt != 0 ) */ + { + fprintf( stderr, + "%s: warning - trailing context rule at line %d made variable because\n", + program_name, linenum ); + fprintf( stderr, + " of preceding '|' action\n" ); + } + + /* mark as variable */ + /* varlength = true; + headcnt = 0; */ + + add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK ) +; + variable_trail_rule = true; + } + + /* trlcontxt = true; + + if ( ! varlength ) + headcnt = rulelen; + + ++rulelen; */ + trailcnt = 1; + + current_state_type = STATE_TRAILING_CONTEXT; + eps = mkstate( SYM_EPSILON ); + current_state_type = STATE_NORMAL; + + $$ = link_machines( $1, + link_machines( mkstate( '\n' ), eps ) ); + } + +DFA.C 618: (ntod(): The arrays 'targstate[]' and 'targfreq[]' can be + maintained in a better way. Up to now it is possible that states are added + to 'targstate[]' more than once, because the state 'newds' from the call + to snstods() creates a new entry in 'targstate[]'. But 'newds' may already + exist in 'targstate[]' ! + Another point is that 'targfreq[]' is not updated if "caseins && ! useecs" + is true. + My algorithm should solve these problems. However it could be simplified + by replacing 'newds' by 'targ' and removing the statement "targ = newds;". + Remark to the second point: I decremented the targfreq-counter if 'sym' + was an uppercase letter and incremented it if 'sym' was a lowercase + letter. The index 'i' of 'targfreq[i]' points to the correct position in + 'targstate[]' even if a new state was added.) + + for ( sym = 1; sym <= numecs; ++sym ) + { + if ( symlist[sym] ) + { + symlist[sym] = 0; + + if ( duplist[sym] == NIL ) + { /* symbol has unique out-transitions */ + numstates = symfollowset( dset, dsize, sym, nset ); + nset = epsclosure( nset, &numstates, accset, + &nacc, &hashval ); + + if ( snstods( nset, numstates, accset, + nacc, hashval, &newds ) ) + { + totnst = totnst + numstates; + ++todo_next; + numas += nacc; + + if ( variable_trailing_context_rules && nacc > 0 ) + check_trailing_context( nset, numstates, + accset, nacc ); + } + + targ = newds; + } + + else + { + /* sym's equivalence class has the same transitions + * as duplist(sym)'s equivalence class + */ + targ = state[duplist[sym]]; + } + + state[sym] = targ; + + if ( trace ) + fprintf( stderr, "\t%d\t%d\n", sym, targ ); + + /* update frequency count for destination state */ + + for ( i = 1; i <= targptr; ++i ) + if ( targstate[i] == targ ) + break; + + if ( i <= targptr ) + { + ++targfreq[i]; + ++numdup; + } + else + { + targfreq[++targptr] = 1; + targstate[targptr] = targ; + ++numuniq; + } + + if ( caseins && ! useecs ) + { + if ( sym >= 'A' && sym <= 'Z' ) + { + --targfreq[i]; + --totaltrans; + } + else if ( sym >= 'a' && sym <= 'z' ) + { + ++targfreq[i]; + ++totaltrans; + } + } + + ++totaltrans; + duplist[sym] = NIL; + } + } + + +GEN.C 438: (gen_next_compressed_state(): I have rewritten the function + 'yy_try_NUL_trans()' so it really just tries to find out whether a + transition on the NUL character goes to the jamstate or not. ( That means + I removed each creation of backtracking information and the saving of the + new state on the 'yy_state_buf[]'. ) + Therefore I removed the call for 'gen_backtracking()' here, because the + function 'gen_next_compressed_state()' is also used in 'gen_NUL_trans()'.) + +/* gen_backtracking(); */ + +GEN.C 587ff: (gen_next_state(): Since the backtracking information is not + created in 'gen_next_compressed_state()' any more, it is done here + before the next state is computed ( for "compressed" tables ). This + removes the bug that the backtracking information is created twice if + 'nultrans' is not NULL and 'gen_next_compressed_state()' is called. + Finally I had to insert the creation of a "{" and a "}", because there + is a local variable created in 'gen_next_compressed_state()'. ( These are + needed only when backtracking information is really created.) ) + + if ( ! fulltbl && ! fullspd ) + gen_backtracking(); + + if ( worry_about_NULs && nultrans ) + { + indent_puts( "if ( *yy_cp )" ); + indent_up(); + indent_puts( "{" ); + } + else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 ) + indent_puts( "{" ); + + if ( fulltbl ) + indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];", + char_map ); + + else if ( fullspd ) + indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;", + char_map ); + + else + gen_next_compressed_state( char_map ); + + if ( worry_about_NULs && nultrans ) + { + indent_puts( "}" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + + indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_down(); + } + else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 ) + indent_puts( "}" ); + + if ( fullspd || fulltbl ) + gen_backtracking(); + + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + +GEN.C 553: (gen_next_match(): There is a problem if 'interactive' is true. In + this case the scanner jams if the next state is the jamstate ( i.e. + yy_base[yy_current_state] == jambase ). However the scanner reaches also + the jamstate if the transition character is the NUL-character or if the + end of the buffer is reached. Then in the EOB-action is decided whether + this was really a NUL character or the end-of-buffer. ( If it was a NUL, + scanning will be resumed. If it was the end-of-buffer, the buffer will be + filled first, before scanning will be resumed. ) + These actions are not done if you use an 'interactive' scanner, because + the EOB-action is not executed. Therefore you have to continue scanning, + if you have just matched a NUL character ( i.e. *yy_cp == '\0' and + yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) and if you are not + already in the yamstate ( i.e. yy_current_state != jamstate ). + Note that the '<' in " yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] " + implies that the EOB action is *not* executed if the last match before the + end-of-buffer was maximal. + The following change in the algorithm results in a minor performance + penalty because the additional conditions are tested only if you have + reached the end of the match or if you are using NUL characters in your + patterns.) + + if ( interactive ) + { + printf( "while ( yy_base[yy_current_state] != %d\n", jambase ); + set_indent( 4 ); + indent_puts( "|| ( *yy_cp == '\\0'" ); + indent_up(); + indent_puts( + " && yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" ); + do_indent(); + printf( " && yy_current_state != %d ) );\n", jamstate ); + set_indent( 2 ); + } + else + printf( "while ( yy_current_state != %d );\n", jamstate ); + +GEN.C 341: (gen_find_action(): Question: The variables 'yy_full_match', + 'yy_full_state' and 'yy_full_lp' are used only in the REJECT macro. Why + do you not also test here on 'real_reject' before you create code to set + these variables ( like you did in line 327ff for the action of the case + " ( yy_act & YY_TRAILING_MASK ) " ) ?) + + New code beginning at line 338 to show the context: + indent_puts( "else" ); + indent_up(); + indent_puts( "{" ); + + if ( real_reject ) + { + /* remember matched text in case we back up due to REJECT */ + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + } + + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "++yy_lp;" ); + indent_puts( "goto find_rule;" ); + } + + +FLEX.SKEL 364,379: (YY_END_OF_BUFFER action: If it was really a NUL character + which started this action, then 'yy_bp' points still at the beginning of + the current run and 'yy_c_buf_p' points behind the NUL character. + Contrast this with the situation after the call of 'yy_get_next_buffer()'! + Therefore I removed the statement " yy_bp = yytext + YY_MORE_ADJ; " + ( line 379 ) and replaced the statement + " yy_c_buf_p = yytext + yy_amount_of_matched_text; " ( line 364 ) by the + easier one " yy_c_buf_p = --yy_cp; ". Here 'yy_cp' is also adjusted. + This guarantees that both 'yy_c_buf_p' and 'yy_cp' point at the NUL + character. Therefore 'yy_cp' will have the correct value when it is needed + after the call to 'yy_try_NUL_trans()' ( when we know whether we make a + transition or not ). + + line 364: + yy_c_buf_p = --yy_cp; + + line 379: + /* yy_bp = yytext + YY_MORE_ADJ; */ + +GEN.C 632: (gen_NUL_trans(): I have rewritten 'yy_try_NUL_trans()'. The new + version just finds out whether a transition on the NUL character goes to + the jamstate or not. See also my remarks to 'gen_next_compressed_state()'. + Note that the test " yy_is_jam = (yy_current_state == jamstate); " is + also used, if 'interactive' is true. Otherwise 'yy_try_NUL_trans()' would + return 0, if the NUL character was the last character of a pattern + ( e.g. "x\0" ), and we therefore would not reach the last state. + Remark: Change also the comment in FLEX.SKEL for this function.) + + FLEX.SKEL, line 583: +%% code to find the next state goes here + + GEN.C, line 632ff: +/* int need_backtracking = (num_backtracking > 0 && ! reject); + + if ( need_backtracking ) + / * we'll need yy_cp lying around for the gen_backtracking() * / + indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); */ + + GEN.C, line 674ff: +/* if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); */ + + do_indent(); + +/* if ( interactive ) + printf( "yy_is_jam = (yy_base[yy_current_state] == %d);\n", + jambase ); + else */ + printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); + } + + /* if we've entered an accepting state, backtrack; note that + * compressed tables have *already* done such backtracking, so + * we needn't bother with it again + */ +/* if ( need_backtracking && (fullspd || fulltbl) ) + { + putchar( '\n' ); + indent_puts( "if ( ! yy_is_jam )" ); + indent_up(); + indent_puts( "{" ); + gen_backtracking(); + indent_puts( "}" ); + indent_down(); + } */ + } + +GEN.C 1293: (make_tables(): The changed functionality of 'yy_try_NUL_trans()' + implies changes in the EOB action. If the next state 'yy_next_state' is 0 + ( i.e. the jamstate ), you can immediately jump to 'yy_find_action'. + Remember that 'yy_cp' was already adjusted to point at the NUL ! + Also you must not use the backtracking information because the actual + state 'yy_current_state' may be an accepting state. + If 'yy_next_state' is not the jamstate, we make a transition on the NUL. + This requires the following actions: + - Create backtracking information for compressed tables *before* we make + the transition on NUL. + - Now increment 'yy_cp' and set 'yy_current_state' to 'yy_next_state'. + ( Note that 'yy_cp' points at the NUL up to now. ) + - Save the new state on the stack 'yy_state_buf[]' if 'reject' is true. + - Create backtracking information *after* the transition, if 'fulltbl' + or 'fullspd' is true. + - Finally decide, if 'interactive' is true, whether scanning should be + resumed at 'yy_match' or whether we have reached a final state and + should jump to 'yy_find_action'. (Condition like in 'gen_next_match()'.) + If 'interactive' is false, just resume scanning.) + + Corresponding code in FLEX.SKEL beginning at line 381: + if ( yy_next_state ) + { + /* consume the NUL */ +%% code to do backtracking for compressed tables and set up yy_cp goes here + } + else + goto yy_find_action; + + Code in GEN.C beginning at line 1293: + /* first, deal with backtracking and setting up yy_cp if the scanner + * finds that it should JAM on the NUL + */ + skelout(); + set_indent( 6 ); + + if ( ! fulltbl && ! fullspd ) + gen_backtracking(); + + indent_puts( "++yy_cp;" ); + indent_puts( "yy_current_state = yy_next_state;" ); + + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + + if ( fulltbl || fullspd ) + gen_backtracking(); + + if ( interactive ) + { + do_indent(); + printf( "if ( yy_base[yy_current_state] != %d\n", jambase ); + indent_up(); + indent_puts( "|| ( *yy_cp == '\\0'" ); + indent_puts( "&& yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" ); + do_indent(); + printf( "&& yy_current_state != %d ) )\n", jamstate ); + indent_puts( "goto yy_match;" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + indent_puts( "goto yy_find_action;" ); + indent_down(); + } + else + indent_puts( "goto yy_match;" ); + +/* if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_c_buf_p;" ); + + else + { / * compressed table * / + if ( ! reject && ! interactive ) + { + / * do the guaranteed-needed backtrack to figure out the match * / + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + } + } */ + +FLEX.SKEL 513: (yy_get_next_buffer(): Here is an error if 'yymore()' is active + in the last match (i.e. yy_doing_yy_more == 1 and yy_more_len > 0). Then + 'number_to_move' will be (1 + yy_more_len), i.e. the previous character + plus the additional characters for using 'yymore()'.) + + if ( number_to_move == 1 + YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yy_current_buffer->yy_eof_status = EOF_DONE; + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_eof_status = EOF_PENDING; + } + } + +GEN.C 1317: (make_tables(): In the generation of 'yy_get_previous_state()' the + variable 'yy_bp' must be set to 'yytext + YY_MORE_ADJ' if 'bol_needed' is + true. Otherwise 'yy_bp' points eventually at the beginning of the + yymore-prefix instead of the current run.) + + if ( bol_needed ) + indent_puts( "register YY_CHAR *yy_bp = yytext + YY_MORE_ADJ;\n" ); + +FLEX.SKEL 589ff: (yyunput(): The function 'yyunput()' should be rewritten. + First of all the example for 'unput()' in file flexdoc doesn't work: + { + int i; + unput( ')' ); + for ( i = yyleng - 1; i >= 0; --i ) + unput( yytext[i] ); + unput( '(' ); + } + The actual version of 'yyunput()' modifies 'yyleng'. Therefore 'yyleng' is + decremented by " unput( ')' ) " and the pattern to be pushed back has lost + its last character. To avoid this just copy the 'yytext'-string and + 'yyleng' before you call 'unput()'. + Another point is that 'yytext' and 'yyleng' could be maintained in a + better way. ( Up to now 'yyleng' can become negative ! ) + I think it's better to say that the pushed back pattern should fulfill + the beginning-of-line-condition if and only if the old pattern does + ( excluding a possibly existing 'yymore'-prefix ! ). + Up to now you have problems if there is a 'yymore'-prefix, because + 'yytext' will be corrupted by YY_DO_BEFORE_ACTION. ( This macro sets + 'yytext' to 'yy_bp - yy_more_len', but our 'yy_bp' points already at the + beginning of the 'yymore'-prefix. ) + + My version of 'yyunput()' reduces the 'yytext'-string by 1 for every + pushed back character and decrements 'yyleng' until 'yytext' is the empty + string. The beginning-of-line-condition is preserved when 'bol_needed' is + true. ( Then the character before the current run is copied in front of + the pushed back character. ) If there is a 'yymore'-prefix, 'yy_more_len' + will be decremented if 'yy_cp' reaches the beginning of the current run. + + Remark: The parameter 'yytext' in " yyunput( c, yytext ) " is not really + necessary since 'yytext' is a global variable. You could also set + " register YY_CHAR *yy_bp = yytext; " at the beginning of 'yyunput()'.) + + Replace lines 622 - 623 in FLEX.SKEL: + + if ( yy_cp > yy_bp && yy_cp[-1] == '\n' ) + yy_cp[-2] = '\n'; + + by + +%% code to adjust yy_bp and yy_more_len goes here + + Add in GEN.C a function 'gen_yyunput()': +/* generate code to adjust yy_bp and yy_more_len in yyunput + */ + +void gen_yyunput() + + { + if ( yymore_used ) + indent_puts( "yy_bp += YY_MORE_ADJ;\n" ); + + if ( bol_needed ) + indent_puts( "yy_cp[-2] = yy_bp[-1];\n" ); + + if ( yymore_used ) + { + indent_puts( "if ( (yy_cp == yy_bp) && YY_MORE_ADJ )" ); + indent_up(); + indent_puts( "--yy_more_len;" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + indent_puts( "--yy_bp;" ); + indent_down(); + } + else + indent_puts( "--yy_bp;" ); + } + + Finally add in the function 'make_tables()' behind the call of + 'gen_NUL_trans()' in line 1328: + + skelout(); + gen_yyunput(); + +FLEX.SKEL 642,658: (input(): There is an error in 'input()' if the end of + 'yy_current_buffer' is reached and 'yymore' is active. Then + 'yy_get_next_buffer()' is called which function assumes that 'yytext' + points at the beginning of the 'yymore'-prefix. This function can't + recognize the end of the input stream correctly and therefore returns + EOB_ACT_LAST_MATCH instead of EOB_ACT_END_OF_FILE. Also if the end of + the input file isn't reached yet (EOB_ACT_CONTINUE_SCAN) at least one + character will be lost. + To avoid this error just turn off 'yy_doing_yy_more'. Then you need + not to adjust with YY_MORE_ADJ in lines 667 and 682. However you have to + use a function 'gen_input()', because 'yy_doing_yy_more' does not exist + if 'yymore_used' is false. + + ( Another solution is to adjust 'yytext': + " yytext = yy_c_buf_p - YY_MORE_ADJ; ", line 658. ) + + I think the trick with "yy_did_buffer_switch_on_eof" should be done here + the same way as in the YY_END_OF_BUFFER action. + Finally I removed the variable 'yy_cp' and used 'yy_c_buf_p' instead.) + +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + + { + int c; + + *yy_c_buf_p = yy_hold_char; /* yy_cp not needed */ + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* this was really a NUL */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ +%% code to turn off yy_doing_yy_more and yy_more_len goes here + yytext = yy_c_buf_p; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */ + return ( EOF ); + } + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + +#ifdef __cplusplus + return ( yyinput() ); +#else + return ( input() ); +#endif + } + break; + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */ + break; + + case EOB_ACT_LAST_MATCH: +#ifdef __cplusplus + YY_FATAL_ERROR( "unexpected last match in yyinput()" ); +#else + YY_FATAL_ERROR( "unexpected last match in input()" ); +#endif + } + } + } + + c = *yy_c_buf_p; + yy_hold_char = *++yy_c_buf_p; + + return ( c ); + } + + Add in GEN.C a function 'gen_input()': +/* generate code to turn off yy_doing_yy_more and yy_more_len in input + */ + +void gen_input() + + { + if ( yymore_used ) + indent_puts( "yy_doing_yy_more = yy_more_len = 0;" ); + } + + Finally add in the function 'make_tables()' behind the call of + 'gen_yyunput()': + + set_indent( 3 ); + skelout(); + gen_input(); + +PARSE.Y 54: ( 'goal'-rule: If there is no rule in the input file, the end of + the prolog is not marked yet, because 'flexscan()' is still in the start + condition <SECT2PROLOG> and the rule <SECT2PROLOG><<EOF>> is not done up + to now. Therefore mark the end of prolog here, before you add the default + rule. I test here on " num_rules == 1 ", because the 'initforrule'-rule + increments 'num_rules' before this action is executed.) + + if ( num_rules == 1 ) + fprintf( temp_action_file, "%%%% end of prolog\n" ) +; + +SCAN.L 255: ( '<SECT2PROLOG><<EOF>>'-rule: If there are no rules at all in + the input file, then this rule will be executed at the end of + 'make_tables()'. At this point 'temp_action_file' was closed for writing + and has been reopened for reading. The macro MARK_END_OF_PROLOG will + therefore lead to a write-error. + To avoid this error add the condition " if ( num_rules == 0 ) ". If this + rule is executed at the end of 'make_tables()' there will be at least the + default rule, i.e. 'num_rules' will be greater than 0. + Remark: This correction together with the one before will allow an input + file which just consists of "%%". ( Copy 'stdin' to 'stdout'. )) + +<SECT2PROLOG><<EOF>> { + if ( num_rules == 0 ) + MARK_END_OF_PROLOG; + yyterminate(); + } + +MISC.C 376: ( flexfatal(): The call of 'flexend( 1 )' will lead to an + infinite loop if 'flexfatal()' is called from 'flexend()'. I therefore + introduced the flag 'doing_flexend' to prevent 'flexend()' to be called + more than once.) + + Replace the function call 'flexend( 1 );' in GEN.C, line 376, by + if ( ! doing_flexend ) + flexend( 1 ); + + Set 'doing_flexend' at the beginning of 'flexend()' in MAIN.C, line 195: + doing_flexend = true; + + Add in FLEXDEF.H, line 381, the declaration of 'doing_flexend': +extern int yymore_used, reject, real_reject, continued_action, doing_flexend; + + Add in FLEXDEF.H, line 376, a comment for this variable: + * doing_flexend - true if flexend() has been started + + Initialize 'doing_flexend' in 'flexinit()' in MAIN.C, line 401: + yymore_used = continued_action = reject = doing_flexend = false; + +FLEX.SKEL 94: ( 'YY_INPUT()'-macro: I have problems with 'fileno()' and + 'read()'. + I used the C Compiler of the BORLAND C++ Compiler and compiled the created + scanner with the option 'ANSI keywords'. + In this compiler the prototype of the function 'read(...)' is declared in + the header file 'io.h' and not in 'stdio.h'. Therefore I get a warning. + Real trouble caused 'fileno' which is defined as macro in 'stdio.h': + #define fileno(f) ((f)->fd) + However this macro does not belong to the 'ANSI keywords' because it is + define'd under the condition " #if !__STDC__ ". Therefore I get a warning + and a linker error that the function 'fileno()' does not exist. + (I can avoid this problem by adding the above define-macro in the *.l file + or by replacing the option 'ANSI keywords' by 'Borland C++ keywords'.)) diff --git a/to.do/Wish-List b/to.do/Wish-List new file mode 100644 index 0000000..59f2d74 --- /dev/null +++ b/to.do/Wish-List @@ -0,0 +1,123 @@ +start conditions given own name space by making them structure fields + #define BEGIN(x) yy_start_state = yy_states->x +reentrant/ +streams/ +yylineno maintained per input buffer +use yyconst instead of const, to fix __STDC__ == 0 problem +scan input for unput() +-CF/-Cf support interactive scanners +reject_really_used -> maintain_backup_tables +full library encapsulation: flex'ing on the fly +fix MAX_MNS/MARKER_DIFFERENCE to not be a hard limit +Two flags to warn when something is seen that lex or posix might interpret + differently; this should be quite doable as -l already exists. Proposed + names: -Wl, -Wp. +reentrant C scanners +yy_fseek() for positioning in input file +set-able "at beginning of line" , no more unput() trashes yytext? +yy_unput_string(); unput() shifts yytext to preserve it, grows buffer as needed +yy_malloc_type as void* so can be easily switched to char* for poor + hopeless bastards running SunSoft stuff? +public "TODO" file, requesting help? +test -P to make sure it's not broken now due to e.g. yy_scan_string +%option +hook for treating input interactively even if not isatty() +scan.l:22:error message :-( (see flex.todo) +document yy_fill_buffer +lint, gcc-lint +-lfl removed from flex.1 +merge 2.4.6, e.g., NEWS +'|' action copies action instead of omitting break +if yy_current_buffer defined on entry to yylex(), don't promote nil yyin + to stdin, etc. +multibyte character flex + + +ANSI only +multiple inclusion of <stdlib.h>? +[=...=] POSIX stuff ++flex.todo +yylineno, yycol by checking for whether rules can match embedded newlines, + only trailing newlines, always trailing newlines, or no newlines +compute transition path to each DFA state, to aid in backtracking + for each state, store pointer to predecessor, character for xtion +merge flex.1, flexdoc.1? +bison++ interface +YYLEXER_NAME +out-line FlexLexer destructors +GNU readline contrib? +isatty() decl? +#ifdef chud for unput() etc. not being used? + "../scan.l", line 207: warning: ::yy_did_buffer_switch_on_eof defined but not used + cc -c -g scan.c + "scan.cc", line 1752: warning: statement not reached +alloca.c removed from Makefile +// comments + +output partitioning for e.g., scanning tables, actions, etc. + + 497 09/11 14:17-PDT 3450 To:t_bonner@oscar Re: Modifying yytext in an actio +MISC stuff non-writeable +texinfo version of manual + + ALSO: document how to do so (including no need to redefine unput()), + whether feature added or not + +example of "error" backtracking rules as opposed to "catch-all" +get rid of get_previous_state via accepting #'s tied to previous state #'s +-p tells something about backtracking +easy way to scan strings instead of files +input() across buffer boundaries, buffer overflow; unput() fix +start state stack +NLSTATE - sets "in newline" state; also mechanism to clear "in newline" state +checks for bogus backtrack rules ... - rule shadowing +document incompatibility with lex when unput()'ing a newline + after a newline has been read +document that comments are not allowed on definition lines + foo bar /* the "foo" definition ... */ +perhaps indented code in section 2 leads to warnings? +#line directives for code at beginning of scanner routine +nuke %used etc. +hooks for direct access to the buffer, e.g. for flushing it +options in .l file as well as on command line; particularly the rename-prefix + option +clarify "eat up * not followed by /" in <comment> example; move it to + performance, offer simpler version for start states +hook for finding out how much text can be safely pushed back +the .backtrack code knows how to identify characters that cause transitions + (you wanted this for some clearer error messages for the + "default rule can be matched") +yy_switch_to_buffer sets yy_init to 0? +handy library routines, such as yy_C_comment(), yy_C_string(), +obey #line directives in input; first, get rid of # comments ... +flex.h header for declarations of e.g., yymore(), yytext? + but what about %array making the yytext definition out of date? +merge w/ okeeffe code +rearrange the Performance Considerations section so that the easy + fixes come first +copyright notice in manuals? +input() updates yytext and yyleng; perhaps unput too???; + right now it trashes them (doesn't restore '\0') +document that yyleng can now be modified + except if yymore() used? +anchoring allowed inside ()'s - (^abc|def$) +unput() propagates non-newline state too? +complain about invalid anchoring - foo(^abc), (^abc)+ +library in its own directory +yylineno +example in flexdoc on YY_INPUT reading from input() +redesign for retargetability (i.e., use w/ other languages ...) +clean up escape expansion +bison @N +example for doc. on scanning strings w/ escapes in them: +POSIX/ +get rid of duplicated code between "re2 re" rule and "re '$'" rule +preformatted man pages for VMS sites, possibly using col -b to get rid + of backspaces ... +slurp entire input file into mega-buffer; allows pointers to in-place + identifiers +lex compatibility flag +update flags in docs +-n removed from POSIX? +"MAKE = ..." shouldn't be commented out, or else bigtest can fail +BSD man macros diff --git a/to.do/flex.rmail b/to.do/flex.rmail new file mode 100644 index 0000000..6783fb0 --- /dev/null +++ b/to.do/flex.rmail @@ -0,0 +1,3182 @@ +BABYL OPTIONS: -*- rmail -*- +Version: 5 +Labels: +Note: This is the header of an rmail file. +Note: If you are seeing it in rmail, +Note: it means the file has no messages in it. + +1,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Wed Mar 14 04:01:40 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id EAA09959 + for <wlestes@localhost>; Wed, 14 Mar 2001 04:01:40 -0500 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 14 Mar 2001 04:01:40 -0500 (EST) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f2E8pme19617 + for <wlestes@hermes.email.uncg.edu>; Wed, 14 Mar 2001 03:51:48 -0500 (EST) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id DAA20394 + for <wlestes@uncg.edu>; Wed, 14 Mar 2001 03:51:47 -0500 (EST) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f2E8pkM07870; + Wed, 14 Mar 2001 00:51:46 -0800 (PST) +Message-Id: <200103140851.f2E8pkM07870@daffy.ee.lbl.gov> +To: "W. L. Estes" <wlestes@uncg.edu> +Subject: Re: possibly taking over maintenance of flex +In-reply-to: Your message of Tue, 13 Mar 2001 09:00:58 EST. +Date: Wed, 14 Mar 2001 00:51:46 PST +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: "W. L. Estes" <wlestes@uncg.edu> +Subject: Re: possibly taking over maintenance of flex +In-reply-to: Your message of Tue, 13 Mar 2001 09:00:58 EST. +Date: Wed, 14 Mar 2001 00:51:46 PST +From: Vern Paxson <vern@ee.lbl.gov> + +> probably best to have a trial period up front as you suggest. + +Okay, you can get a copy of the master sources from + + ftp://ftp.ee.lbl.gov/.vp-flex-sources.tar.gz + +The TODO/ subdirectory has the general wish list plus a number of contributed +patches. The faqs/ subdirectory has a bunch of email messages (in MH +folder format, i.e., one message per file, files numbered sequentially) +for commonly asked questions - worth taking a look through to see what +sorts of things people frequently ask about. + +Next messages are various stuff I sent to Dick King. + +Thanks for giving it a try - naturally, by all means let me know when you +have questions ... + + Vern + + +1,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Wed Mar 14 04:01:40 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id EAA09961 + for <wlestes@localhost>; Wed, 14 Mar 2001 04:01:40 -0500 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 14 Mar 2001 04:01:40 -0500 (EST) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f2E8ppe19633 + for <wlestes@hermes.email.uncg.edu>; Wed, 14 Mar 2001 03:51:51 -0500 (EST) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id DAA20402 + for <wlestes@uncg.edu>; Wed, 14 Mar 2001 03:51:50 -0500 (EST) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f2E8poV07877; + Wed, 14 Mar 2001 00:51:50 -0800 (PST) +Message-Id: <200103140851.f2E8poV07877@daffy.ee.lbl.gov> +To: wlestes@uncg.edu +Subject: messages sent to Dick King (1 of 2) +Date: Wed, 14 Mar 2001 00:51:49 PST +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: wlestes@uncg.edu +Subject: messages sent to Dick King (1 of 2) +Date: Wed, 14 Mar 2001 00:51:49 PST +From: Vern Paxson <vern@ee.lbl.gov> + + +------- Forwarded Messages + +Date: Mon, 21 Aug 2000 23:03:54 PDT +From: Vern Paxson <vern> +Subject: Re: Some new Flex maintainer volunteers +To: Dick King <king@reasoning.com> +Cc: bkuhn@ebb.org +In-reply-to: Your message of Tue, 15 Aug 2000 14:03:58 PDT. + +> Vern, i'm ready when you are. + +Okay, I've put the master sources in + + ftp://ftp.ee.lbl.gov/.vp-flex-sources.tar.gz + +The TODO/ subdirectory has a the general wish list plus a number of +contributed patches (per the mail I'll be cc'ing you on regarding reentrant +scanners). The faqs/ subdirectory has a bunch of email messages (in MH +folder format, i.e., one message per file, files numbered sequentially) for +commonly asked questions - worth taking a look through to see what sorts of +things people frequently ask about. + +I very much appreciate your taking over maintaining/developing flex, and +apologize for how long it's taken me to bundle stuff up to give to you (all +too symptomatic of why I'm no longer able to work on flex :-( ). By all +means, let me know when you have questions ... + + Vern + +------- Message 2 + +Date: Mon, 21 Aug 2000 23:38:00 PDT +From: Vern Paxson <vern> +Subject: Re: I volunteer to modify flex to generate reentrant C code. +To: "John W. Millaway" <jmillawa@nimbus.ocis.temple.edu> +Cc: help-flex@gnu.org, king@reasoning.com +In-reply-to: Your message of Mon, 21 Aug 2000 16:23:18 PDT. + +> I'm requesting this feature and at the same time volunteering to implement +> it: + +John Bossom contributed patches a while ago for adding reentrant scanners. +I never managed to try them out or integrate them (note that I'm no longer +the flex maintainer/developer, having just transferred the honors to Dick +King, whom I've cc'd), but I've made them available from: + + ftp://ftp.ee.lbl.gov/.vp-flex-reentrant.tar.gz + +I imagine Dick will be interested in hearing of problems/improvements/etc. + + Vern + +------- Message 3 + +Date: Wed, 30 Aug 2000 07:23:07 PDT +From: Vern Paxson <vern> +Subject: Re: bug in flex 2.5.4? +To: Keith McGuigan <keith.mcguigan@ecora.com> +Cc: king@reasoning.com +In-reply-to: Your message of Tue, 29 Aug 2000 12:21:02 EDT. + +This does indeed sound like a bug. Dick King (cc'd) recently took over +development & maintenance for flex; I've sent him a copy of your message. + + Vern + +------- Message 4 + +Date: Thu, 31 Aug 2000 15:41:00 PDT +From: Vern Paxson <vern> +Subject: Re: Flex +To: "Skifstrom, Eric" <SKIFSTRE@Mattel.com> +Cc: king@reasoning.com +In-reply-to: Your message of Wed, 30 Aug 2000 09:35:58 PDT. + +> Thanks for all the work on Flex. It is appreciated. How do I get a copy of +> flex for Windows 98 platform. + +I've only directly support flex for Unix, and have always distributed it +in source-code form only. So I don't know where you get a copy, though +I bet with some web surfing you can find one. + +Also, please note, Dick King (cc'd) has taken over flex development +and maintenance. + + Vern + +------- Message 5 + +Date: Thu, 31 Aug 2000 15:43:40 -0700 (PDT) +From: Dick King <king@reasoning.com> +Subject: Re: Flex +To: vern@ee.lbl.gov +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +In-Reply-To: <200008312241.e7VMf0G14335@daffy.ee.lbl.gov> +References: <200008312241.e7VMf0G14335@daffy.ee.lbl.gov> +X-Mailer: VM 6.61 under 19.14 XEmacs Lucid + +Vern Paxson writes: + >> Thanks for all the work on Flex. It is appreciated. How do I get a copy of + >> flex for Windows 98 platform. + > + >I've only directly support flex for Unix, and have always distributed it + >in source-code form only. So I don't know where you get a copy, though + >I bet with some web surfing you can find one. + > + >Also, please note, Dick King (cc'd) has taken over flex development + >and maintenance. + > + > Vern + > + +I have a couple of how-to-be-a-gnu-product-maintainer questions. + +First, how do i get to the wish list? I assume people mail the stuff and it +accumulates somewhere. + +Second, if i decide i have a fix for a group of problems what do i do to get it +distributed? + +- -dk + +------- Message 6 + +Date: Sun, 03 Sep 2000 12:22:48 PDT +From: Vern Paxson <vern> +Subject: Re: Flex +To: Dick King <king@reasoning.com> +In-reply-to: Your message of Thu, 31 Aug 2000 15:43:40 PDT. + +> First, how do i get to the wish list? I assume people mail the stuff and it +> accumulates somewhere. + +There's a honed-down copy in the distribution I put together. I also have +several hundred email messages, which will be rather tedious to wade through, +but I can put together for you if you wish (this is non-trivial, though, +since there's some personal mail intermixed in it which I'll have to locate +and remove). + +> Second, if i decide i have a fix for a group of problems what do i do to get it +> distributed? + +Put together a new release distribution (either a minor version change or +a major one, depennding on the scope of the changes). Make sure it works +with "make bigcheck", and make sure you can copy the distribution to a +virgin machine, unpack it & successfully build it (including "make bigcheck"). + +Next, make it available somewhere for anonymous ftp and/or Web access. +(If this is a problem, I can still host the distribution at ftp.ee.lbl.gov.) + +Finally, send mail like the appended example. For your first message, +you should explain that you've taken over development of flex from me. +It would probably be good to cc me, in case a thread develops from the +note that it would be helpful to have me contribute to. No need to cc +me on later notes (though I don't mind, either). + +I used to send the note to compilers@iecc.com (the USENET comp.compilers +group) and info-gnu@prep.ai.mit.edu. I don't know if the former is still +the correct address - you could find out from the USENET group's archives, +presumably. The latter has likely changed to info-gnu@gnu.org. + + Vern + + +Date: Tue, 28 Mar 95 11:58:04 PST +From: Vern Paxson <vern> +Subject: flex release 2.5.1 now available +To: compilers@iecc.com, info-gnu@prep.ai.mit.edu + +Release 2.5.1 of flex, a POSIX-compliant "lex" scanner generator, is now +available for anonymous ftp to ftp.ee.lbl.gov. Retrieve flex-2.5.1.tar.Z +or flex-2.5.1.tar.gz. flex-2.5.1.tar.gz should be showing up shortly at +the usual GNU mirror sites. + +The main changes between 2.5 and 2.4 are: + + - A new concept of "start condition" scope lets you group together + rules sharing the same start conditions into a syntactic unit. + - Flex now includes a general mechanism for specifying scanner options. + - Routines have been added for scanning strings instead of files. + - Routines have been added for manipulating stacks of start conditions. + - Fledgling support for POSIX character class expressions. + - Enhanced portability to MS-DOS, VMS, NT, Macintosh, Amiga. + - A number of bugs have been fixed. + - input() no longer trashes yytext. + - Interactive scanners now run significantly faster. + - C++ scanner objects now work with the -P option. + - The promised rewrite of the C++ FlexLexer class has not been done, + but is still planned. + +See the "NEWS" file for more details. + +Please report bugs to: vern@ee.lbl.gov or bug-gnu-utils@prep.ai.mit.edu + + Vern + +Vern Paxson +Lawrence Berkeley Laboratory +vern@ee.lbl.gov + +------- Message 7 + +Date: Wed, 13 Sep 2000 00:59:33 PDT +From: Vern Paxson <vern> +Subject: Re: Does the FSF maintain Flex? +To: "Leanard Lin" <leanard@syntest.com.tw> +Cc: king@reasoning.com +In-reply-to: Your message of Wed, 13 Sep 2000 00:55:38 PDT. + +> Dear Vern, +> +> I have a problem about flex of linux version. +> If comment is very long, flex will not re-malloc +> for it. +> It will display a error message and exit program. +> +> Thanks, +> +> Leanard + +Well, clearly if a comment is larger than the available memory, then flex +can't realloc to accommodate it. But presumably that's not what's going on +or you wouldn't have bothered writing; so I don't know what the problem +might be. + +Flex is now developed & maintained by Dick King, king@reasoning.com, whom +I've cc'd. + + Vern + +------- Message 8 + +Date: Wed, 04 Oct 2000 23:36:59 PDT +From: Vern Paxson <vern> +Subject: Re: Flex 2.5.4.1 Bugs +To: Hans Aberg <haberg@matematik.su.se> +Cc: king@reasoning.com +In-reply-to: Your message of Sun, 01 Oct 2000 20:32:48 PDT. + +> I made a Metrowerks MacOS CW Pro 5 plugin out of the Flex 2.5.4.1. Here are +> some bugs: +> +> -- flexerror() calls flexend() which becomes an infinite loop in the case +> of an error in flexend(). So, flexerror(), replace flexend() by exit(). +> +> -- If there is no output file error, flexend() fails to close the output +> file. This causes a problem as a plugin, because the file remains locked +> the next time the compiler runs, and the file cannot be reopened. +> +> This bug also occurs if, one first has created the .c file, then runs it +> with a -S option for a skeleton filet Flex cannot find, in which case +> flexend() fails to close the output file. + +Please note, Dick King (king@reasoning.com) has taken over flex maintenance +and development. I've cc'd him. + + Vern + +------- Message 9 + +Date: Wed, 04 Oct 2000 23:38:07 PDT +From: Vern Paxson <vern> +Subject: Re: flex -+ option and ISO C++ IOStreams +To: Harri Pasanen <harri.pasanen@trema.com> +Cc: king@reasoning.com +In-reply-to: Your message of Mon, 02 Oct 2000 10:15:59 PDT. + +> You probably already know this: +> +> flex version 2.5.4 generates with -+ flag a forward declaration of class +> istream. +> Problem is that it is incompatible with the new ANSI IOStreams. This +> becomes apparent if I try to use KAI C++ (http://www.kai.com/) to +> compile the generated scanner. +> +> Lexing Scanner.l... +> Compiling Scanner.o... +> "/usr/local/KAI/KCC.pu-4.0b-1/KCC_BASE/include/istream.h", line 12: +> error #101: +> "istream" has already been declared in the current scope +> using std::istream; +> +> I wonder if an updated version of flex is in the works? In the time +> being I can compile my own... + +Dick King (king@reasoning.com) has recently taken over flex maintenance and +development (I've cc'd him). Since there will be a fair amount of getting +up to speed for him, I think building your own version for now is the +best course. + + Vern + +------- Message 10 + +Date: Thu, 05 Oct 2000 00:27:15 PDT +From: Vern Paxson <vern> +Subject: Re: Flex Feature Idea +To: Davy Durham <david.durham@wcom.com> +Cc: king@reasoning.com +In-reply-to: Your message of Thu, 28 Sep 2000 11:28:52 EDT. + +> First I just want to say how much flex has been helpful to me in +> writing my parser, and that I really appreciate the free use of the +> program... + +You're welcome! + +Please note, though, that Dick King (cc'd) has taken over flex maintenance +and development. + +> I can't re-write YY_INPUT which could detect and remove the '\' and +> \n and go ahead and return the _next_ char, because I'm keeping up +> with the line-number and column-number in YY_USER_ACTION for each +> token... Since flex buffer's the data retrieved from YY_INPUT, _my_ +> YY_INPUT would need to increment the line-number, but it would happen +> possibly too soon... Plus I'm using yy_switch_buffer to do macro +> replacements quite a bit which I think would complicate things even +> more.... + +Well, that's how you have to do it, I'm afraid. I can't see any practical +way to add this as a feature to flex at a later stage in the input analysis, +since the semantics really are "pretend this slash and this newline don't +exist (mod line numbers)". What you can probably do for line numbers is +the following: create a data structure that maps byte offsets in the current +input stream to line numbers. YY_INPUT would fill this in as it returns +new (possibly munged) lines, and your actions would consult it in order to +generate correct location information for error messages. (To deal with +multiple input buffers, you maintain one data structure per buffer.) + + Vern + +------- Message 11 + +Date: Thu, 05 Oct 2000 01:03:01 PDT +From: Vern Paxson <vern> +Subject: Re: [minor] obscure error messages if YY_DECL ends in semicolon +To: "Eric R. Buddington" <ebuddington@mail.wesleyan.edu> +Cc: king@reasoning.com +In-reply-to: Your message of Thu, 14 Sep 2000 13:39:52 EDT. + +> Your address was listed in the flex info pages, so I send this to you; if +> you aren't the currnet maintainer, please let me know. + +I'm not longer the maintainer - Dick King (cc'd) has taken over. + +> First, I would like to add an '%option reentrant", such that the flex code +> would have the lexer function as its only global symbol. Would you +> consider such a patch for inclusion in the official flex? + +Someone has already contributed patches for this - you can get them +from ftp://ftp.ee.lbl.gov/.vp-flex-reentrant.tar.gz . + +> Secondly, an erroneous input that might give better error messages: +> +> If YY_DECL ends with a semicolon, the generated error messages refer to +> wrong (nonexistant in this case) line numbers in the source file. The +> user error is only clear after turning off #line directives and looking at +> the generated .c file. +> +> It would be good to have this error (a fairly easy one to make) reported +> clearly, if that can be done elegantly. This I what I get now: +> +> flex -t -Pcap_string_ cap_string.flex > cap_string.c +> cc -Wall -c -o cap_string.o cap_string.c +> cap_string.flex:163: parse error before `{' +> cap_string.flex:165: register name not specified for `yy_cp' +> cap_string.flex:165: register name not specified for `yy_bp' +> cap_string.flex:166: register name not specified for `yy_act' +> cap_string.flex:39: parse error before `if' +> ... + +I don't offhand see an easy way to fix this, other than scanning the source +for a YY_DECL definition and looking for a semi-colon (urgh, what a hack). +Dick will have to toss it onto the to-do list. + + Vern + +------- Message 12 + +Date: Thu, 5 Oct 2000 10:51:53 +0200 +From: Hans Aberg <haberg@matematik.su.se> +Subject: Re: Flex 2.5.4.1 Bugs +To: Vern Paxson <vern@ee.lbl.gov> +Cc: king@reasoning.com +X-Sender: haberg@pop.matematik.su.se (Unverified) +In-Reply-To: <200010050636.e956axT12828@daffy.ee.lbl.gov> +References: Your message of Sun, 01 Oct 2000 20:32:48 PDT. +Mime-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Status: U + +At 23.36 -0700 0-10-04, Vern Paxson wrote: +>Please note, Dick King (king@reasoning.com) has taken over flex maintenance +>and development. I've cc'd him. + +OK. + +To: Dick King +Bison seems to under revision right now (I'm working hard with the Bison +development team just these days on pinpointing some bugs). -- Perhaps Flex +& Bison development should somehow be synchronized. + +I have labelled Flex as BSD. Is that correct? + + Hans Aberg + + + +------- Message 13 + +Date: Fri, 06 Oct 2000 02:04:11 PDT +From: Vern Paxson <vern> +Subject: Re: C++, flex under cygwin and Visual C++ +To: "Simon J. Julier" <sjulier@erols.com> +Cc: king@reasoning.com +In-reply-to: Your message of Thu, 05 Oct 2000 07:15:17 PDT. + +> Many apologies if either (a) it's a stupid question or (b) if I'm emailing +> the wrong person. + +It's (b) :-). Dick King (king@reasoning.com) has taken over flex +maintenance and development. I've cc'd him. + +> I have been using flex (v. impressive) to write a parser +> for a C++ program. The program is being compiled using a Windows +> 2000/Windows NT machine using the cygwin tool set and the Microsoft Visual +> C++ compiler. Because the cygwin distribution does not include FlexLexer.h, +> I took the advice from the man page and simply renamed lex.yy.c as +> lex.yy.cxx and compiled this as a C++ program. This almost works except for +> the fact that, in C++ compile mode, there is a dependency on unistd.h due +> to the following code snippet: +> +> #ifdef __cplusplus +> +> #include <stdlib.h> +> #include <unistd.h> +> +> ... (other stuff) ... +> +> Since the Visual C++ compiler does not come with the header file unistd.h, +> I manually modified the code to: +> +> #ifdef __cplusplus +> +> #include <stdlib.h> +> #ifndef _WIN32 +> #include <unistd.h> +> #endif /* ! _WIN32 */ +> +> Is this the "right way" to solve the problem? + +I imagine it is. This problem has come up a bunch of times, and, +much as I hate adding #ifdef's to the skeleton, I haven't thought of +any other way to do it. + +> If so, would it be possible +> to consider making the appropriate changes to flex.skl? + +Sounds okay to me, but this is now Dick's call. + + Vern + +------- Message 14 + +Date: Fri, 6 Oct 2000 09:31:08 -0700 (PDT) +From: Dick King <king@reasoning.com> +Subject: Re: C++, flex under cygwin and Visual C++ +To: vern@ee.lbl.gov +Cc: sjulier@erols.com +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +In-Reply-To: <200010060904.e9694BU17584@daffy.ee.lbl.gov> +References: <200010060904.e9694BU17584@daffy.ee.lbl.gov> +X-Mailer: VM 6.61 under 19.14 XEmacs Lucid + + + +I'm getting my flex support operations organized, finally. + +I've established a new eddress, flex@2xtreme.net . + +I'll start looking at the small bug fixes flex has been asked for in a week or +so. Then i will get to meaty changes. + +One meaty change i've been asked for which i would like to do [i did it for my +current company's lexer product so i think i understand the issues] is a +character level prefilter infrastructure, that would allow you to integrate a +cpp-style preprocessor [rather than making it a separate pass as is the norm] +or to write a flex lexer for a language like FORTRAN or COBOL or that language +i've heard about whose name escapes me where indentation is syntactically +significant. + +Vern Paxson writes: + >> Many apologies if either (a) it's a stupid question or (b) if I'm emailing + >> the wrong person. + > + >It's (b) :-). Dick King (king@reasoning.com) has taken over flex + >maintenance and development. I've cc'd him. + > + >> I have been using flex (v. impressive) to write a parser + >> for a C++ program. The program is being compiled using a Windows + >> 2000/Windows NT machine using the cygwin tool set and the Microsoft Visual + >> C++ compiler. Because the cygwin distribution does not include FlexLexer.h, + >> I took the advice from the man page and simply renamed lex.yy.c as + >> lex.yy.cxx and compiled this as a C++ program. This almost works except for + >> the fact that, in C++ compile mode, there is a dependency on unistd.h due + >> to the following code snippet: + >> + >> #ifdef __cplusplus + >> + >> #include <stdlib.h> + >> #include <unistd.h> + >> + >> ... (other stuff) ... + >> + >> Since the Visual C++ compiler does not come with the header file unistd.h, + >> I manually modified the code to: + >> + >> #ifdef __cplusplus + >> + >> #include <stdlib.h> + >> #ifndef _WIN32 + >> #include <unistd.h> + >> #endif /* ! _WIN32 */ + >> + >> Is this the "right way" to solve the problem? + > + >I imagine it is. This problem has come up a bunch of times, and, + >much as I hate adding #ifdef's to the skeleton, I haven't thought of + >any other way to do it. + > + >> If so, would it be possible + >> to consider making the appropriate changes to flex.skl? + > + >Sounds okay to me, but this is now Dick's call. + > + > Vern + > + +------- Message 15 + +Date: Sun, 08 Oct 2000 01:19:08 PDT +From: Vern Paxson <vern> +Subject: Re: C++, flex under cygwin and Visual C++ +To: Dick King <king@reasoning.com> +In-reply-to: Your message of Fri, 06 Oct 2000 09:31:08 PDT. + +> One meaty change i've been asked for which i would like to do [i did it for my +> current company's lexer product so i think i understand the issues] is a +> character level prefilter infrastructure, that would allow you to integrate a +> cpp-style preprocessor [rather than making it a separate pass as is the norm] +> or to write a flex lexer for a language like FORTRAN or COBOL or that language +> i've heard about whose name escapes me where indentation is syntactically +> significant. + +That would be really nice to have! + + Vern + +------- Message 16 + +Date: Mon, 9 Oct 2000 12:27:13 -0400 (EDT) +From: Andrew Droffner <adroffne@versus.dmz.advance.net> +Subject: Re: Flex & Multiple Threads +To: Vern Paxson <vern@ee.lbl.gov> +Cc: help-flex@gnu.org, king@reasoning.com +In-Reply-To: <200010050752.e957qGq13639@daffy.ee.lbl.gov> +MIME-Version: 1.0 +Content-Type: TEXT/PLAIN; charset=US-ASCII + +I'm unable to compile the scan.l LEX specification using the bootstrap +scanner, initscan.c. I have tried it on the latest flex source from GNU, +and the tarball from your page at ee.lbl.gov. + +If I drop the "%option reentrant" from scan.l, then it all compiles. I +doubt it works right then. What else do I need here, a new initscan.c? + +Thanks for the help. + +On Thu, 5 Oct 2000, Vern Paxson wrote: + +> > I'm interested in building a multi-threaded server using YACC & Lex. +> > The server & client parse the data they pass to each other. +> > +> > The Bison dialect of YACC uses a "%pure_parser" option to force re-entrant +> > code. I would like a Flex equivalent; is there one already? +> +> There are patches to flex to support reentrant scanners, which I've +> put in ftp://ftp.ee.lbl.gov/.vp-flex-reentrant.tar.gz . +> +> Also, please note, flex maintenance has been taken over by Dick King +> <king@reasoning.com>. +> +> Vern +> + +- -- +[ Andrew Droffner +[ Advance Publications Internet +[ +[ adroffne@advance.net + + +------- Message 17 + +Date: Tue, 10 Oct 2000 22:40:12 PDT +From: Vern Paxson <vern> +Subject: Re: Flex & Multiple Threads +To: Andrew Droffner <adroffne@versus.dmz.advance.net> +Cc: help-flex@gnu.org, king@reasoning.com +In-reply-to: Your message of Mon, 09 Oct 2000 12:27:13 PDT. + +> I'm unable to compile the scan.l LEX specification using the bootstrap +> scanner, initscan.c. I have tried it on the latest flex source from GNU, +> and the tarball from your page at ee.lbl.gov. +> +> If I drop the "%option reentrant" from scan.l, then it all compiles. I +> doubt it works right then. What else do I need here, a new initscan.c? + +I don't have any firsthand experience with those patches, unfortunately, +so all I can offer is to try building without the %option reentrant; +then run flex on scan.l to build a new scan.c; recompile using that; +run "make check" to make sure it's working okay; then try putting the +%option reentrant back in. Because I believe the modified scan.l doesn't +actually need %option reentrant, it's just there to exercise the scanning +during "make check". + + Vern + +------- Message 18 + +Date: Wed, 11 Oct 2000 11:42:35 PDT +From: Vern Paxson <vern> +Subject: Re: Flex & Multiple Threads +To: Dick King <king@reasoning.com> +In-reply-to: Your message of Wed, 11 Oct 2000 11:29:49 PDT. + +> How do i read help-flex@gnu.org? + +Try sending mail to help-flex-request@gnu.org with a message body of +"subscribe". Let me know if that doesn't work. + + Ven + +------- End of Forwarded Messages + + +1,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Wed Mar 14 04:01:40 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id EAA09964 + for <wlestes@localhost>; Wed, 14 Mar 2001 04:01:40 -0500 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 14 Mar 2001 04:01:40 -0500 (EST) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f2E8pse19643 + for <wlestes@hermes.email.uncg.edu>; Wed, 14 Mar 2001 03:51:54 -0500 (EST) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id DAA20408 + for <wlestes@uncg.edu>; Wed, 14 Mar 2001 03:51:53 -0500 (EST) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f2E8pqR07884; + Wed, 14 Mar 2001 00:51:52 -0800 (PST) +Message-Id: <200103140851.f2E8pqR07884@daffy.ee.lbl.gov> +To: wlestes@uncg.edu +Subject: messages sent to Dick King (2 of 2) +Date: Wed, 14 Mar 2001 00:51:52 PST +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: wlestes@uncg.edu +Subject: messages sent to Dick King (2 of 2) +Date: Wed, 14 Mar 2001 00:51:52 PST +From: Vern Paxson <vern@ee.lbl.gov> + + +------- Forwarded Messages + +Date: Sat, 07 Oct 2000 10:31:22 -0700 +From: "Dick King, flex maintainer" <flex@2xtreme.net> +Subject: Re: the source tree +To: vern@ee.lbl.gov +Cc: flex@2xtreme.net +In-reply-to: <14814.15467.441052.399457@fiddle.mtview.reasoning.com> +Reply-to: flex@2xtreme.net +MIME-version: 1.0 +X-Mailer: Forte Agent 1.8/32.548 +Content-type: text/plain; charset=us-ascii +Content-transfer-encoding: 7bit +References: <14814.15467.441052.399457@fiddle.mtview.reasoning.com> + +On Fri, 6 Oct 2000 13:56:11 -0700 (PDT), you wrote: + +> > I think i'm prepared to give this a try. +> +> Cool!, thanks. Here's a snapshot of the current source tree. I'm about +> to leave for two week's vacation, not reading email, but upon my return +> I'll put together the stuff-to-do email archive, etc. +> +> Vern +> + +What is the current release? + +You sent me 2.5.4a to my office computer on June 20, but i found 2.5.5a on +June 20 and downloaded it to my home computer. + +- -dk + +PS: when anyone writes to you, tell them about my new eddress +[flex@2xtreme.net]? Thanks. + +- -dk + + +------- Message 2 + +Date: Sun, 08 Oct 2000 01:26:26 PDT +From: Vern Paxson <vern> +Subject: Re: a sort of flex-related question :-) +To: Alan Donovan <alan.donovan@arm.com> +Cc: flex@2xtreme.net +In-reply-to: Your message of Thu, 05 Oct 2000 18:49:51 PDT. + +> Vern, sorry to write out of the blue like this but I figure that as the +> maintainer of flex, you could probably answer my question in a second. + +I'm actually no longer the maintainer. Dick King has taken over. He's set +up the address flex@2xtreme.net for flex correspondence. I've cc'd him. + +> I'm writing a tool somewhere between flex and yacc, in which you specify +> a set of rules and actions (as in yacc) but the language is that of +> regular expressions not context-free grammars. +> +> Each rule may include literals, operators, and simpler (i.e. previously +> declared) rules as components, and have an action. e.g. +> +> foo :[0-9]+ { func1(); } +> +> bar :"x"foo { func2(); } +> |"y"foo { func3(); } +> +> So the NDFA for "bar" actually contains two copies of the NDFA for +> "foo". My problem arises when trying to determine where in the DFA +> (generated by subset construction) to invoke the actions associated with +> each rule. All my DFA states are annotated as to which rules (if any) +> they start and which rules (if any) they accept. However this is not +> enough to correctly invoke the actions. +> +> Consider: +> +> foo: ab +> bar: a +> +> in the context of some larger expression zip:(foo|bar)"c". +> +> The DFA for "zip" looks like: +> +> a b c +> P ---> Q ---> R ---> T +> | +> +----> S +> c +> +> P starts rules "foo", "bar" and "zip". Q accepts "bar" and R accepts +> rule "foo". S and T accept zip. +> +> On input "abc", how do you know _not_ to invoke the action for rule +> "bar" when you reach state Q (or more correctly, when you receive the +> "b" and move to state R)? +> +> So the question is: do you think what I am trying to do is possible? + +What you are doing looks like a parse, and you can only parse LR(0) +languages with DFAs (if I recall correctly), i.e., those without +lookahead. The above example requires lookahead to differentiate +matching foo from bar. So I think you're out of luck here, if +I understand the example correctly. + + Vern + +------- Message 3 + +Date: Sun, 08 Oct 2000 07:47:01 -0700 +From: "Dick King, flex maintainer" <flex@2xtreme.net> +Subject: Re: a sort of flex-related question :-) +To: Vern Paxson <vern@ee.lbl.gov> +Cc: Alan Donovan <alan.donovan@arm.com> +In-reply-to: <200010080826.e988QQC23650@daffy.ee.lbl.gov> +Reply-to: flex@2xtreme.net +MIME-version: 1.0 +X-Mailer: Forte Agent 1.8/32.548 +Content-type: text/plain; charset=us-ascii +Content-transfer-encoding: 7bit +References: <200010080826.e988QQC23650@daffy.ee.lbl.gov> +Status: U + +On Sun, 08 Oct 2000 01:26:26 PDT, you wrote: + +> > Vern, sorry to write out of the blue like this but I figure that as the +> > maintainer of flex, you could probably answer my question in a second. +> +> I'm actually no longer the maintainer. Dick King has taken over. He's set +> up the address flex@2xtreme.net for flex correspondence. I've cc'd him. +> +> > I'm writing a tool somewhere between flex and yacc, in which you specify +> > a set of rules and actions (as in yacc) but the language is that of +> > regular expressions not context-free grammars. +> > +> > Each rule may include literals, operators, and simpler (i.e. previously +> > declared) rules as components, and have an action. e.g. +> > +> > foo :[0-9]+ { func1(); } +> > +> > bar :"x"foo { func2(); } +> > |"y"foo { func3(); } +> > +> > So the NDFA for "bar" actually contains two copies of the NDFA for +> > "foo". My problem arises when trying to determine where in the DFA +> > (generated by subset construction) to invoke the actions associated with +> > each rule. All my DFA states are annotated as to which rules (if any) +> > they start and which rules (if any) they accept. However this is not +> > enough to correctly invoke the actions. +> > +> > Consider: +> > +> > foo: ab +> > bar: a +> > +> > in the context of some larger expression zip:(foo|bar)"c". +> > +> > The DFA for "zip" looks like: +> > +> > a b c +> > P ---> Q ---> R ---> T +> > | +> > +----> S +> > c +> > +> > P starts rules "foo", "bar" and "zip". Q accepts "bar" and R accepts +> > rule "foo". S and T accept zip. +> > +> > On input "abc", how do you know _not_ to invoke the action for rule +> > "bar" when you reach state Q (or more correctly, when you receive the +> > "b" and move to state R)? +> > +> > So the question is: do you think what I am trying to do is possible? +> +> What you are doing looks like a parse, and you can only parse LR(0) +> languages with DFAs (if I recall correctly), i.e., those without +> lookahead. The above example requires lookahead to differentiate +> matching foo from bar. So I think you're out of luck here, if +> I understand the example correctly. +> +> Vern + +Vern is mostly correct. Small extensions can give you a single token +[character, here] lookahead but that is not enough to deliver the +functionality you want. Consider + +foo: a { foo_fn(); } +bar: ab { bar_fn(); } + +mumble: foo bcde.....yZ { any } + | bar cde.....yW { any } + +- -dk + + +------- Message 4 + +Date: Tue, 10 Oct 2000 02:36:57 PDT +From: Vern Paxson <vern> +Subject: Re: Bison's semantic parsers +To: Akim Demaille <akim@epita.fr> +Cc: Hans Aberg <haberg@matematik.su.se>, + Davy Durham <david.durham@wcom.com>, Bison Help <help-bison@gnu.org>, + flex@2xtreme.net +In-reply-to: Your message of 10 Oct 2000 11:43:07 PDT. + +> Nope, indeed Vern seems also to say he won't work on Flex again. + +Unfortunately, yes. + +> Vern, could we imagine that Flex be put on the CVS server of the FSF, +> subversions.gnu.org? + +Thank you very much for the offer, I certainly appreciate it. However, +Dick King has already volunteered to take over maitenance and development. +I've cc'd the address he's using for flex-related mail, flex@2xtreme.net. + + Vern + +------- Message 5 + +Date: Tue, 10 Oct 2000 13:30:02 +0200 +From: Hans Aberg <haberg@matematik.su.se> +Subject: Re: Bison's semantic parsers +To: Vern Paxson <vern@ee.lbl.gov> +Cc: Akim Demaille <akim@epita.fr>, Davy Durham <david.durham@wcom.com>, + Bison Help <help-bison@gnu.org>, flex@2xtreme.net +X-Sender: haberg@pop.matematik.su.se +In-Reply-To: <200010100936.e9A9avg01250@daffy.ee.lbl.gov> +References: Your message of 10 Oct 2000 11:43:07 PDT. +Mime-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Status: U + +At 02.36 -0700 0-10-10, Vern Paxson wrote: +>> Vern, could we imagine that Flex be put on the CVS server of the FSF, +>> subversions.gnu.org? +> +>Thank you very much for the offer, I certainly appreciate it. However, +>Dick King has already volunteered to take over maitenance and development. +>I've cc'd the address he's using for flex-related mail, flex@2xtreme.net. + +At 11.43 +0200 0-10-10, Akim Demaille wrote: +>Hans> Perhaps Bison and Flex should be bundled, or tuned up together. +> +>It's a good but bad idea. For instance you could also argue that +>Autoconf, Automake and Libtool be merged, but if you just observe that +>they have totally different evolutions, it becomes infeasible. While +>definitely a seducing idea, it wouldn't work IMHO. + +My experimenting with C++ though suggests that Bison and Flex needs some +synchronizing if one should take full advantage of that language: + +- -- One thing that come to my mind is that instead of letting the Bison +output .cc file include the Flex .cc file, they communicate the information +via header files. + +- -- When experimenting C++ manipulator style I mentioned before + Parser parser; + parser_value val; + ifstream ifs("...", ios_base::in | ios_base::binary); + ofstream ofs("..."); + if (ifs) try { for (;;) { + ifs >> parser >> val; + cout << val << endl; + } +I experience problems with how Flex resets its state, and how streams and +such are initialized. + +For example, if I try to gobble up UNIX, MacOS & DOS line separators in Flex by + \n|\r|\r\n { return '\n'; } +then, when Flex sees a \r it must first look ahead to verify the next +character isn't a \n. However, if a successful value "val" is produced, +then the next time one ends up at the line ifs >> parser >> val, Flex is +reset, and as a consequence, the extra character needed to the \n check is +gobbled up. + +One can get around this by not allowing this ifs >> parser >> val +construct, then the advantage of freer, and therefore easier user syntaxes +are lost. + +- -- I think that there might be some other issues, such as how to properly +localize all data that Bison and Flex uses, and how they should communicate +that. These are more questions of hand-shaking than bundling them together +as a single program. + +As for the comparisons of Bison and Flex versus other types of +compiler-compilers, I think the main disadvantage with them (the former) is +that they are extremely difficult to debug. It's rather hopeless trying to +debug the C/C++ files they produce as output. If one could hook up an +debugger though, so that one could step throw the .l/.y source files, just +as in the case of visual debugging of a C/C++ file, then this would change, +and it would not make any difference of what method is used, as long as it +is efficient and does the job. + + Hans Aberg + + + +------- Message 6 + +Date: 10 Oct 2000 13:51:33 +0200 +From: Akim Demaille <akim@epita.fr> +Subject: Re: Bison's semantic parsers +To: Vern Paxson <vern@ee.lbl.gov> +Cc: Hans Aberg <haberg@matematik.su.se>, Davy Durham <david.durham@wcom.com>, + Bison Help <help-bison@gnu.org>, flex@2xtreme.net, + Didier Verna <didier@epita.fr> +References: <200010100936.e9A9avg01250@daffy.ee.lbl.gov> +In-Reply-To: Vern Paxson's message of "Tue, 10 Oct 2000 02:36:57 PDT" +Lines: 27 +User-Agent: Gnus/5.0807 (Gnus v5.8.7) XEmacs/21.1 (Channel Islands) +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Status: U + +>>>>> "Vern" == Vern Paxson <vern@ee.lbl.gov> writes: + +>> Nope, indeed Vern seems also to say he won't work on Flex again. + +Vern> Unfortunately, yes. + +Just by curiosity: you're no longer interested, or you don't have time +enough? + +>> Vern, could we imagine that Flex be put on the CVS server of the +>> FSF, subversions.gnu.org? + +Vern> Thank you very much for the offer, I certainly appreciate it. + +I'm also quite pleased to see my offer pleased you so much :) + +Vern> However, Dick King has already volunteered to take over +Vern> maitenance and development. I've cc'd the address he's using +Vern> for flex-related mail, flex@2xtreme.net. + +Thanks! + +Then, I shall restate my question for flex@2xtreme: is there a CVS +server or something which would make it possible to participate the +development of Flex? + + Akim + +------- Message 7 + +Date: Tue, 10 Oct 2000 14:15:53 PDT +From: Vern Paxson <vern> +Subject: Re: Bison's semantic parsers +To: Akim Demaille <akim@epita.fr> +Cc: Hans Aberg <haberg@matematik.su.se>, + Davy Durham <david.durham@wcom.com>, Bison Help <help-bison@gnu.org>, + flex@2xtreme.net, Didier Verna <didier@epita.fr> +In-reply-to: Your message of 10 Oct 2000 13:51:33 PDT. + +> Just by curiosity: you're no longer interested, or you don't have time +> enough? + +No time ... + + Vern + +------- Message 8 + +Date: Thu, 12 Oct 2000 20:07:57 PDT +From: Vern Paxson <vern> +Subject: Re: Flex 2.5.4 Bug +To: Cliff Sarginson <csarginson@descartes.com> +Cc: flex@2xtreme.net +In-reply-to: Your message of Thu, 12 Oct 2000 13:52:06 +0200. + +> I dont know if you still receive bug reports about flex .. + +I'm not, Dick King (cc'd) has taken over maintenance & development. + +> but here is one.. ! +> +> In a scanner we have .. +> +> %start this that msg another +> +> The field "msg" gets turned into a +> +> #define msg 3 +> +> We do not define a xxx_YY_FATAL_ERROR +> +> So flex supplies one, as a macro, with a definition: +> +> xxx_yy_fatal_error (const msg []) +> +> The "C" pre-processor merrily turns this into +> +> xx_yy_fatal_error ( const 3 [] ) +> +> Are we really the first to come across this ? Flex should maybe +> be a bit more creative with it's paramater naming for macro's +> or do some other name space checking maybe ? + +Yep, this is a known problem. The difficulty is that when lex was +originally designed, it allowed you to switch start states using +"BEGIN msg" rather than "BEGIN(msg)". This means that either you +do a major analysis of the source to only patch up true BEGIN's +(rather than, say, one that occurs inside a string literal); or you +use a gross hack in defining BEGIN, like flex does: + + #define BEGIN yy_start = 1 + 2 * + +With this sort of definition, there's no way to modify the namespace +consumed by start states. If instead you could require the ()'s, then +you could use + + #define BEGIN(state) yy_start = 1 + 2 * (YY_STATE_ ## state) + +and all would be happy ... + + Vern + +------- Message 9 + +Date: Thu, 12 Oct 2000 20:08:03 PDT +From: Vern Paxson <vern> +Subject: Re: a sort of flex-related question :-) +To: Alan.Donovan@arm.com +Cc: flex@2xtreme.net +In-reply-to: Your message of Thu, 12 Oct 2000 15:36:38 BST. + +> Thank you both for your help. I can probably rearrange my grammars to be +> LR(0). Can you give me a reference for the algorithm for LR(0) parsing with +> a DFA? + +Dunno off hand, but I imagine it must be in the Dragon Book. + + Vern + +------- Message 10 + +Date: Thu, 02 Nov 2000 11:56:35 PST +From: Vern Paxson <vern> +Subject: Re: flex documentation enhancement request +To: Tracy Camp <campt@thalvors.miralink.com> +Cc: flex@2xtreme.net +In-reply-to: Your message of Thu, 02 Nov 2000 10:17:17 PST. + +Yes, a lot of people get bitten by this, unfortunately. + +Note, Dick King has taken over flex maintenance and development, and has +set up the address flex@2xtreme.net for flex correspondence. I've cc'd him. + + Vern + + +> As a new user to flex I found the following situation quite confusing and +> think a breif caviate note in the manual would have helped out (or at +> least an emphisis if there was one) :) +> +> when using external variables when patching a pattern such as: +> +> %x buffer_dev +> %{ +> extern struct mystruct astruct; /* struct containing member buffer_dev */ +> %} +> %% +> buffer_dev= BEGIN(buffer_dev); +> +> <buffer_dev>.*$ { +> strncpy(astruct.buffer_dev,yytext,255); +> } +> %% +> +> this causes all sorts of problems because 'buffer_dev' is defined by flex +> at some point and I used buffer_dev as a member of an external structure +> that I'm trying to fill in with flex. This causes all sorts of compile +> problems. Renaming my 'buffer_dev' condition to something else works +> around this, but it was not immediately obvious what the problem was from +> the compile error. For all else I found your manual to be of GREAT help - +> thanks! +> +> t. +> +> Tracy Camp +> Product Development +> Miralink Corp.PDX +> Portland OR +> 503-223-3140 +> + +------- Message 11 + +Date: Thu, 09 Nov 2000 23:03:38 PST +From: Vern Paxson <vern> +Subject: Re: Standard C++ compatibility for flex++ +To: Volker Simonis <simonis@informatik.uni-tuebingen.de> +Cc: help-flex@gnu.org, flex@2xtreme.net +In-reply-to: Your message of Wed, 08 Nov 2000 15:50:36 +0100. + +> Is there any chance for a new release of flex? Is there a maintainer right now? +> Is it possible to submit some bug-fixes or to contribute in some way? + +Dick King has taken over flex development and maintenance. You can +reach him at flex@2xtreme.net (cc'd). + + Vern + +------- Message 12 + +Date: Wed, 14 Feb 2001 12:57:37 PST +From: Vern Paxson <vern> +Subject: Re: Bug#84780: Fix for info documentation (fwd) +To: Santiago Vila <sanvila@unex.es> +Cc: Gordon Sadler <gbsadler1@lcisp.com>, flex@2xtreme.net +In-reply-to: Your message of Wed, 14 Feb 2001 15:40:30 PST. + +Please note, Dick King has taken over flex maintenance and development, +and has set up the address flex@2xtreme.net for flex correspondence. + + Vern + + +> Hello. +> +> I received this from the Debian Bug System. +> +> ---------- Forwarded message ---------- +> Date: Sun, 4 Feb 2001 02:41:30 -0600 +> From: Gordon Sadler <gbsadler1@lcisp.com> +> To: Debian Bug Tracking System <submit@bugs.debian.org> +> Subject: #84780: Fix for info documentation +> +> Package: flex +> Version: 2.5.4a-9 +> Severity: wishlist +> +> Attached find a diff to MISC/texinfo/flex.texi +> I noticed during the texi2html run it complains of first section not +> being toplevel. I opened the texi file and changed the toplevel +> sections to chapters. While I was there noticed the comment about +> fixing all of the See... to make them @xref, so I did. +> +> Looks like upstream is pretty much done ... but you seem to be in +> touch, last changelog mentions manpage patch from author? +> +> If you use this texi to regen the info the x-refs make it a little more +> useable. Maybe I'll add the index in next -) +> +> Gordon Sadler +> ------------------------------------------------------------------------ +> This is the patch in unified format: +> +> diff -r -u flex-2.5.4.orig/MISC/texinfo/flex.texi flex-2.5.4/MISC/texinfo/flex.texi +> --- flex-2.5.4.orig/MISC/texinfo/flex.texi Sun Jul 27 04:47:21 1997 +> +++ flex-2.5.4/MISC/texinfo/flex.texi Wed Feb 14 15:34:49 2001 +> @@ -151,12 +151,12 @@ +> @end ifinfo +> +> @node Name, Synopsis, Top, Top +> -@section Name +> +@chapter Name +> +> flex - fast lexical analyzer generator +> +> @node Synopsis, Overview, Name, Top +> -@section Synopsis +> +@chapter Synopsis +> +> @example +> flex [-bcdfhilnpstvwBFILTV78+? -C[aefFmr] -ooutput -Pprefix -Sskeleton] +> @@ -164,7 +164,7 @@ +> @end example +> +> @node Overview, Description, Synopsis, Top +> -@section Overview +> +@chapter Overview +> +> This manual describes @code{flex}, a tool for generating programs +> that perform pattern-matching on text. The manual +> @@ -244,7 +244,7 @@ +> @end table +> +> @node Description, Examples, Overview, Top +> -@section Description +> +@chapter Description +> +> @code{flex} is a tool for generating @dfn{scanners}: programs which +> recognized lexical patterns in text. @code{flex} reads the given +> @@ -260,7 +260,7 @@ +> corresponding C code. +> +> @node Examples, Format, Description, Top +> -@section Some simple examples +> +@chapter Some simple examples +> +> First some simple examples to get the flavor of how one +> uses @code{flex}. The following @code{flex} input specifies a scanner +> @@ -371,7 +371,7 @@ +> following sections. +> +> @node Format, Patterns, Examples, Top +> -@section Format of the input file +> +@chapter Format of the input file +> +> The @code{flex} input file consists of three sections, separated +> by a line with just @samp{%%} in it: +> @@ -439,7 +439,7 @@ +> where the pattern must be unindented and the action must +> begin on the same line. +> +> -See below for a further description of patterns and +> +@xref{Patterns}, for a further description of patterns and +> actions. +> +> Finally, the user code section is simply copied to +> @@ -468,7 +468,7 @@ +> is also copied verbatim to the output up to the next "*/". +> +> @node Patterns, Matching, Format, Top +> -@section Patterns +> +@chapter Patterns +> +> The patterns in the input are written using an extended +> set of regular expressions. These are: +> @@ -697,7 +697,7 @@ +> @end itemize +> +> @node Matching, Actions, Patterns, Top +> -@section How the input is matched +> +@chapter How the input is matched +> +> When the generated scanner is run, it analyzes its input +> looking for strings which match any of its patterns. If +> @@ -773,7 +773,7 @@ +> classes (the @code{c++} option; see below). +> +> @node Actions, Generated scanner, Matching, Top +> -@section Actions +> +@chapter Actions +> +> Each pattern in a rule has a corresponding action, which +> can be any arbitrary C statement. The pattern ends at the +> @@ -1035,7 +1035,7 @@ +> @end itemize +> +> @node Generated scanner, Start conditions, Actions, Top +> -@section The generated scanner +> +@chapter The generated scanner +> +> The output of @code{flex} is the file @file{lex.yy.c}, which contains +> the scanning routine @samp{yylex()}, a number of tables used by +> @@ -1136,15 +1136,15 @@ +> +> Three routines are available for scanning from in-memory +> buffers rather than files: @samp{yy_scan_string()}, +> -@samp{yy_scan_bytes()}, and @samp{yy_scan_buffer()}. See the discussion +> -of them below in the section Multiple Input Buffers. +> +@samp{yy_scan_bytes()}, and @samp{yy_scan_buffer()}. +> +@xref{Multiple buffers, ,Multiple Input Buffers}. +> +> The scanner writes its @samp{ECHO} output to the @code{yyout} global +> (default, stdout), which may be redefined by the user +> simply by assigning it to some other @code{FILE} pointer. +> +> @node Start conditions, Multiple buffers, Generated scanner, Top +> -@section Start conditions +> +@chapter Start conditions +> +> @code{flex} provides a mechanism for conditionally activating +> rules. Any rule whose pattern is prefixed with "<sc>" +> @@ -1500,7 +1500,7 @@ +> @samp{%option stack} directive (see Options below). +> +> @node Multiple buffers, End-of-file rules, Start conditions, Top +> -@section Multiple input buffers +> +@chapter Multiple input buffers +> +> Some scanners (such as those which support "include" +> files) require reading from several input streams. As +> @@ -1675,7 +1675,7 @@ +> @end table +> +> @node End-of-file rules, Miscellaneous, Multiple buffers, Top +> -@section End-of-file rules +> +@chapter End-of-file rules +> +> The special rule "<<EOF>>" indicates actions which are to +> be taken when an end-of-file is encountered and yywrap() +> @@ -1735,7 +1735,7 @@ +> @end example +> +> @node Miscellaneous, User variables, End-of-file rules, Top +> -@section Miscellaneous macros +> +@chapter Miscellaneous macros +> +> The macro @code{YY_USER_ACTION} can be defined to provide an +> action which is always executed prior to the matched +> @@ -1800,7 +1800,7 @@ +> the @code{YY_BREAK} is inaccessible. +> +> @node User variables, YACC interface, Miscellaneous, Top +> -@section Values available to the user +> +@chapter Values available to the user +> +> This section summarizes the various values available to +> the user in the rule actions. +> @@ -1865,7 +1865,7 @@ +> @end itemize +> +> @node YACC interface, Options, User variables, Top +> -@section Interfacing with @code{yacc} +> +@chapter Interfacing with @code{yacc} +> +> One of the main uses of @code{flex} is as a companion to the @code{yacc} +> parser-generator. @code{yacc} parsers expect to call a routine +> @@ -1890,7 +1890,7 @@ +> @end example +> +> @node Options, Performance, YACC interface, Top +> -@section Options +> +@chapter Options +> @code{flex} has the following options: +> +> @table @samp +> @@ -1903,8 +1903,8 @@ +> and @samp{-Cf} or @samp{-CF} is used, the generated scanner will +> run faster (see the @samp{-p} flag). Only users who wish +> to squeeze every last cycle out of their scanners +> -need worry about this option. (See the section on +> -Performance Considerations below.) +> +need worry about this option. (@pxref{Performance, , +> +Performance Considerations}) +> +> @item -c +> is a do-nothing, deprecated option included for +> @@ -2138,8 +2138,7 @@ +> +> @item -+ +> specifies that you want flex to generate a C++ +> -scanner class. See the section on Generating C++ +> -Scanners below for details. +> +scanner class. @xref{C++, ,Generating C++ Scanners}. +> +> @item -C[aefFmr] +> controls the degree of table compression and, more +> @@ -2419,8 +2418,7 @@ +> function @samp{foo::yylex()} instead of @samp{yyFlexLexer::yylex()}. +> It also generates a @samp{yyFlexLexer::yylex()} member function that +> emits a run-time error (by invoking @samp{yyFlexLexer::LexerError()}) +> -if called. See Generating C++ Scanners, below, for additional +> -information. +> +if called. @xref{C++, ,Generating C++ Scanners}. +> +> A number of options are available for lint purists who +> want to suppress the appearance of unneeded routines in +> @@ -2439,7 +2437,7 @@ +> unless you use @samp{%option stack}). +> +> @node Performance, C++, Options, Top +> -@section Performance considerations +> +@chapter Performance considerations +> +> The main design goal of @code{flex} is that it generate +> high-performance scanners. It has been optimized for dealing +> @@ -2756,7 +2754,7 @@ +> the two is at about 8K characters/token. +> +> @node C++, Incompatibilities, Performance, Top +> -@section Generating C++ scanners +> +@chapter Generating C++ scanners +> +> @code{flex} provides two different ways to generate scanners for +> use with C++. The first way is to simply compile a +> @@ -2975,7 +2973,7 @@ +> releases. +> +> @node Incompatibilities, Diagnostics, C++, Top +> -@section Incompatibilities with @code{lex} and POSIX +> +@chapter Incompatibilities with @code{lex} and POSIX +> +> @code{flex} is a rewrite of the AT&T Unix @code{lex} tool (the two +> implementations do not share any code, though), with some +> @@ -3051,8 +3049,8 @@ +> +> Also note that flex C++ scanner classes @emph{are} +> reentrant, so if using C++ is an option for you, you +> -should use them instead. See "Generating C++ +> -Scanners" above for details. +> +should use them instead. @xref{C++, ,Generating C++ +> +Scanners}. +> +> @item +> @samp{output()} is not supported. Output from the @samp{ECHO} +> @@ -3198,7 +3196,7 @@ +> line. +> +> @node Diagnostics, Files, Incompatibilities, Top +> -@section Diagnostics +> +@chapter Diagnostics +> +> @table @samp +> @item warning, rule cannot be matched +> @@ -3251,8 +3249,7 @@ +> scanner specification includes recognizing the 8-bit +> character @var{x} and you did not specify the -8 flag, and your +> scanner defaulted to 7-bit because you used the @samp{-Cf} or @samp{-CF} +> -table compression options. See the discussion of the @samp{-7} +> -flag for details. +> +table compression options. @xref{Options, ,@samp{-7}flag}. +> +> @item flex scanner push-back overflow +> you used @samp{unput()} to push +> @@ -3285,7 +3282,7 @@ +> @end table +> +> @node Files, Deficiencies, Diagnostics, Top +> -@section Files +> +@chapter Files +> +> @table @file +> @item -lfl +> @@ -3311,7 +3308,7 @@ +> @end table +> +> @node Deficiencies, See also, Files, Top +> -@section Deficiencies / Bugs +> +@chapter Deficiencies / Bugs +> +> Some trailing context patterns cannot be properly matched +> and generate warning messages ("dangerous trailing +> @@ -3365,7 +3362,7 @@ +> The @code{flex} internal algorithms need documentation. +> +> @node See also, Author, Deficiencies, Top +> -@section See also +> +@chapter See also +> +> @code{lex}(1), @code{yacc}(1), @code{sed}(1), @code{awk}(1). +> +> @@ -3380,7 +3377,7 @@ +> (deterministic finite automata). +> +> @node Author, , See also, Top +> -@section Author +> +@chapter Author +> +> Vern Paxson, with the help of many ideas and much inspiration from +> Van Jacobson. Original version by Jef Poskanzer. The fast table +> + +------- End of Forwarded Messages + + +1, answered,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Mon Mar 26 18:48:05 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id SAA00754 + for <wlestes@localhost>; Mon, 26 Mar 2001 18:48:05 -0500 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Mon, 26 Mar 2001 18:48:05 -0500 (EST) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f2QNVUe03862 + for <wlestes@hermes.email.uncg.edu>; Mon, 26 Mar 2001 18:31:30 -0500 (EST) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id SAA18856 + for <wlestes@uncg.edu>; Mon, 26 Mar 2001 18:31:28 -0500 (EST) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f2QNVPc23647; + Mon, 26 Mar 2001 15:31:25 -0800 (PST) +Message-Id: <200103262331.f2QNVPc23647@daffy.ee.lbl.gov> +To: wlestes@uncg.edu +Subject: Fwd: release 2.5 of flex... +Date: Mon, 26 Mar 2001 15:31:25 PST +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: wlestes@uncg.edu +Subject: Fwd: release 2.5 of flex... +Date: Mon, 26 Mar 2001 15:31:25 PST +From: Vern Paxson <vern@ee.lbl.gov> + +(Any luck finding some flex cycles?) + +------- Forwarded Message + +Date: Mon, 26 Mar 2001 00:06:31 +0200 +From: Kai Hennig <khennig@pobox.com> +Subject: release 2.5 of flex... +To: vern@ee.lbl.gov +X-Spam-Filter: check_local@studserv.stud.uni-hannover.de by digitalanswers.org +Reply-To: khennig@pobox.com +Organization: TRI Systems +X-Mailer: Mozilla 4.76 (Macintosh; U; PPC) +X-Accept-Language: en +MIME-Version: 1.0 +Content-Type: text/plain; charset=iso-8859-1 +Content-Transfer-Encoding: 8bit + +> Extract from file 'INSTALL' from flex-2.5.4 +> +> ... +> If you need to do unusual things to compile the package, we encourage +> you to figure out how `configure' could check whether to do them, and +> mail diffs or instructions to the address given in the README so we +> can include them in the next release. +> ... + +Hi Vern, + +I'm using your flex package 2.5.4 on a Macintosh with Apple's Macintosh +Programmers Workshop 3.5 (MPW). There is a description comming along +with your package for the THINK C version 6.0 from Scott Hofmann +(23-JUL-94) which is not helpfull for using the package with MPW. I +enclosed a brief instruction for using the flex package with MPW which +might be more helpfull since Think C is no longer officially available +(as far as I know). + +greetings, +Kai + + + + +Notes on the MPW 3.5 version of Flex 2.5.4 +Kai Hennig 25-Mar-2001 +Internet: khennig@pobox.com + +To compile and run Flex 2.5.4 as an MPW Tool no changes to any file have +to be done. Instead add a file called 'config.h' to the files listed in +the command lines given below with the content listed below the +commands. To compile and link all necessary files execute the following +commands in the MPW shell: + +MrC -noMapCR -alloca -ansi strict main.c +MrC -noMapCR -alloca -ansi strict ccl.c +MrC -noMapCR -alloca -ansi strict dfa.c +MrC -noMapCR -alloca -ansi strict ecs.c +MrC -noMapCR -alloca -ansi strict gen.c +MrC -noMapCR -alloca -ansi strict misc.c +MrC -noMapCR -alloca -ansi strict nfa.c +MrC -noMapCR -alloca -ansi strict -w off parse.c +MrC -noMapCR -alloca -ansi strict initscan.c +MrC -noMapCR -alloca -ansi strict tblcmp.c +MrC -noMapCR -alloca -ansi strict sym.c +MrC -noMapCR -alloca -ansi strict skel.c +MrC -noMapCR -alloca -ansi strict yylex.c +PPCLink -t 'MPST' -c 'MPS ' -o flex * +main.c.o * +ccl.c.o * +dfa.c.o * +ecs.c.o * +gen.c.o * +misc.c.o * +nfa.c.o * +parse.c.o * +initscan.c.o * +tblcmp.c.o * +sym.c.o * +skel.c.o * +yylex.c.o * +"{SharedLibraries}InterfaceLib" * +"{SharedLibraries}StdCLib" * +"{SharedLibraries}MathLib" * +"{PPCLibraries}StdCRuntime.o" * +"{PPCLibraries}PPCCRuntime.o" * +"{PPCLibraries}MrCIOStreams.o" * +"{PPCLibraries}PPCToolLibs.o" + + +/* File 'config.h' */ +/* Define to `unsigned' if <sys/types.h> doesn't define. */ +#undef size_t + +/* Define if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Define if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define if you have <alloca.h> and it should be used (not on +Ultrix). */ +#define HAVE_ALLOCA_H 1 + +/* Define if you use FAT file system, leave undefined for NTFS */ +#undef SHORT_FILE_NAMES + +/* Never do interactive input */ +#define YY_NEVER_INTERACTIVE 1 +/* EOF File 'config.h' */ + +------- End of Forwarded Message + + +1,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Wed Mar 28 04:07:49 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id EAA02765 + for <wlestes@localhost>; Wed, 28 Mar 2001 04:07:49 -0500 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 28 Mar 2001 04:07:49 -0500 (EST) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f2S8o2e15934 + for <wlestes@hermes.email.uncg.edu>; Wed, 28 Mar 2001 03:50:02 -0500 (EST) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id DAA23213 + for <wlestes@uncg.edu>; Wed, 28 Mar 2001 03:50:00 -0500 (EST) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f2S8nr529885; + Wed, 28 Mar 2001 00:49:53 -0800 (PST) +Message-Id: <200103280849.f2S8nr529885@daffy.ee.lbl.gov> +To: "John W. Millaway" <johnmillaway@yahoo.com> +Cc: help-flex@gnu.org, wlestes@uncg.edu +Subject: Re: reentrant C scanner +In-reply-to: Your message of Tue, 27 Mar 2001 12:49:46 PST. +Date: Wed, 28 Mar 2001 00:49:53 PST +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: "John W. Millaway" <johnmillaway@yahoo.com> +Cc: help-flex@gnu.org, wlestes@uncg.edu +Subject: Re: reentrant C scanner +In-reply-to: Your message of Tue, 27 Mar 2001 12:49:46 PST. +Date: Wed, 28 Mar 2001 00:49:53 PST +From: Vern Paxson <vern@ee.lbl.gov> + +> I contacted you (or someone there at flex H.Q.) about modifying flex to generate a +> reentrant scanner. I found some time recently, and the modifications are nearly +> complete. It wasn't as bad as I had originally thought it would be. +> +> I have two questions for you: +> +> 1. What is the procedure for regression testing? +> 2. What is the procedure for merging my modifications with the current +> distribution? + +Both of these are not well defined at the moment. Will Estes (cc'd) has +volunteered to give taking over flex maintenance/development a try, and +we're right now in a preliminary period during which he's gauging whether +he'll indeed have the cycles for doing so. + + Vern + + +1,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Thu Mar 29 18:05:46 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id SAA02631 + for <wlestes@localhost>; Thu, 29 Mar 2001 18:05:46 -0500 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Thu, 29 Mar 2001 18:05:46 -0500 (EST) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f2TMmHe28250 + for <wlestes@hermes.email.uncg.edu>; Thu, 29 Mar 2001 17:48:17 -0500 (EST) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id RAA02561 + for <wlestes@uncg.edu>; Thu, 29 Mar 2001 17:48:16 -0500 (EST) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f2TMmCD10050; + Thu, 29 Mar 2001 14:48:12 -0800 (PST) +Message-Id: <200103292248.f2TMmCD10050@daffy.ee.lbl.gov> +To: John Tupper <john_tupper@tenornetworks.com> +Subject: Re: flex bug +Cc: wlestes@uncg.edu +In-reply-to: Your message of Thu, 29 Mar 2001 13:52:38 PST. +Date: Thu, 29 Mar 2001 14:48:12 PST +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: John Tupper <john_tupper@tenornetworks.com> +Subject: Re: flex bug +Cc: wlestes@uncg.edu +In-reply-to: Your message of Thu, 29 Mar 2001 13:52:38 PST. +Date: Thu, 29 Mar 2001 14:48:12 PST +From: Vern Paxson <vern@ee.lbl.gov> + +Argh, yes, you're right. This is (mis)feature interaction between +%option yylineno and trailing context. + +I've cc'd Will Estes, who is trying out taking over flex maintenance / +development. Will, I think the place to fix this is in finish_rule() in +nfa.c, making it generate extra code to loop through the characters being +put back and decrement yylineno accordingly. + + Vern + + +> +> --------------DC4CBAA5BA86BA0906373DD6 +> Content-Type: text/plain; charset=us-ascii +> Content-Transfer-Encoding: 7bit +> +> Vern, +> Are you the right person to send this to? It's a groaner. . . +> +> If you define a rule with a trailing context and the trailing context +> matches a newline, the newline gets counted twice (in yylineno), once in +> the trailing context and once when its matched for real. +> +> Sample grammer: +> +> \/\* { begin(Comment); } +> <Comment>\*/[^/] { ; /* this rule is problematic */ } +> <Comment>[^\*]+ { ; } +> <Comment>\*\/ { begin(0); } +> +> The above grammer strips out C style comments. If a comment contains a +> star at the end of a line, yylineno gets incremented twice. +> +> We used the obvious work around by adding a seperate rule to explicitly +> match *\n without any trailing context, so we're not waiting for a fix. +> +> Oh yeah, we're using version 2.5.4. +> +> Thanks, +> John Tupper +> (john_tupper@tenornetworks.com) +> +> +> --------------DC4CBAA5BA86BA0906373DD6 +> Content-Type: text/html; charset=us-ascii +> Content-Transfer-Encoding: 7bit +> +> <!doctype html public "-//w3c//dtd html 4.0 transitional//en"> +> <html> +> Vern, +> <br> Are you the right person to send this to? It's a groaner. +> . . +> <p>If you define a rule with a trailing context and the trailing context +> matches a newline, the newline gets counted twice (in yylineno), once in +> the trailing context and once when its matched for real. +> <p>Sample grammer: +> <p><tt>\/\* +> { begin(Comment); }</tt> +> <br><tt><Comment>\*/[^/] { ; /* this rule is problematic +> */ }</tt> +> <br><tt><Comment>[^\*]+ { ; }</tt> +> <br><tt><Comment>\*\/ { begin(0); }</tt> +> <p>The above grammer strips out C style comments. If a comment contains +> a star at the end of a line, yylineno gets incremented twice. +> <p>We used the obvious work around by adding a seperate rule to explicitly +> match *\n without any trailing context, so we're not waiting for a fix. +> <p>Oh yeah, we're using version 2.5.4. +> <p>Thanks, +> <br>John Tupper +> <br>(john_tupper@tenornetworks.com) +> <br> </html> +> +> --------------DC4CBAA5BA86BA0906373DD6-- +> + + +1, answered,, +X-Coding-System: nil +Mail-from: From johnmillaway@yahoo.com Sun Apr 8 18:10:46 2001 +Return-Path: <johnmillaway@yahoo.com> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id SAA06099 + for <wlestes@localhost>; Sun, 8 Apr 2001 18:10:46 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Sun, 08 Apr 2001 18:10:46 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f38Lr7e10030 + for <wlestes@hermes.email.uncg.edu>; Sun, 8 Apr 2001 17:53:07 -0400 (EDT) +Received: from web9407.mail.yahoo.com (web9407.mail.yahoo.com [216.136.129.23]) + by external-gw.uncg.edu (8.9.3/8.9.3) with SMTP id RAA28292 + for <wlestes@uncg.edu>; Sun, 8 Apr 2001 17:53:05 -0400 (EDT) +Message-ID: <20010408215303.56462.qmail@web9407.mail.yahoo.com> +Received: from [216.254.82.102] by web9407.mail.yahoo.com; Sun, 08 Apr 2001 14:53:03 PDT +Date: Sun, 8 Apr 2001 14:53:03 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: reentrant C scanner +To: "W. L. Estes" <wlestes@uncg.edu> +In-Reply-To: <200104082000.QAA05847@michael.uncg.edu> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii + +*** EOOH *** +Date: Sun, 8 Apr 2001 14:53:03 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: reentrant C scanner +To: "W. L. Estes" <wlestes@uncg.edu> +In-Reply-To: <200104082000.QAA05847@michael.uncg.edu> + +Hi, + +(Sorry it took a while, my DSL went down again.) + +Here is a link to the diff, because my browser (Opera) doesn't do file-uploads +properly. + +http://astro.temple.edu/~john43/flex_r.diff.gz + +I did not include a diff of skel.c, because it is built from flex.skl. I did a GNU +diff, instead of CVS diff, because I had already modified flex before importing it +into CVS... which means `cvs diff' won't work without some massaging. I'm no CVS +expert, let me know if you have a solution. + +This all works, but is not finished. I want to test it with as many scanners as I +can dig up. At some point, I'll submit a test directory and a "README.reenrant" as +well as a modification of the texinfo docs. + +Briefly, the changes are these: + +1. Added option "reentrant" (-R). +2. Added preprocessor directive: YY_REENTRANT. (and a few more related to this). +3. Added internal "struct yy_globals_t"; user-visible as "void*". +4. All global variables are conditionally eliminated and placed in the struct. +5. Nearly all functions conditionally take an additional argument. +6. yylex() is now yylex_r(void*) +7. All access to globals must be wrapped in YY_G() macro. +8. User may attach arbitrary data (void*) to the scanner, to maintain state. +9. New functions for using scanner: yy_init_r, yy_free_r; +10. Accessor functions to extract values from struct or globals. +11. Frequently accessed variables (yytext, yyleng, etc..) have macro shortcuts, +(yytext_r, yyleng_r, etc.) which hide the internals from users building reentrant +scanners. + +-John Millaway + + +__________________________________________________ +Do You Yahoo!? +Get email at your own domain with Yahoo! Mail. +http://personal.mail.yahoo.com/ + + +1,, +X-Coding-System: nil +Mail-from: From johnmillaway@yahoo.com Sun Apr 8 19:16:58 2001 +Return-Path: <johnmillaway@yahoo.com> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id TAA06140 + for <wlestes@localhost>; Sun, 8 Apr 2001 19:16:58 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Sun, 08 Apr 2001 19:16:58 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f38MwXe18411 + for <wlestes@hermes.email.uncg.edu>; Sun, 8 Apr 2001 18:58:33 -0400 (EDT) +Received: from web9402.mail.yahoo.com (web9402.mail.yahoo.com [216.136.129.108]) + by external-gw.uncg.edu (8.9.3/8.9.3) with SMTP id SAA01092 + for <wlestes@uncg.edu>; Sun, 8 Apr 2001 18:58:32 -0400 (EDT) +Message-ID: <20010408225832.81903.qmail@web9402.mail.yahoo.com> +Received: from [216.254.82.102] by web9402.mail.yahoo.com; Sun, 08 Apr 2001 15:58:32 PDT +Date: Sun, 8 Apr 2001 15:58:32 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: reentrant C scanner +To: "W. L. Estes" <wlestes@uncg.edu> +In-Reply-To: <200104082224.SAA06112@michael.uncg.edu> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii + +*** EOOH *** +Date: Sun, 8 Apr 2001 15:58:32 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: reentrant C scanner +To: "W. L. Estes" <wlestes@uncg.edu> +In-Reply-To: <200104082224.SAA06112@michael.uncg.edu> + +> have downloaded this and will set aside time to read your patch this +> week. +Please download again, I fixed another bug. + +> > I did not include a diff of skel.c, because it is built from flex.skl. I did a +> GNU> +> ok. will remember this... + +Just do `./mkskel.sh <flex.skl >skel.c` to rebuild it. + +> tag your current sources. +> import the standard sources and give them another tag/branch name. +> do a cvs diff explicitly between the two above. + +Will do. + + +> that's the gut reaction. let me know if that works or if you have +> troubles. But I need to prepare a publicly readable cvs archive anyway. + +Have you thought about sourceforge? + +> the scanner stuff is finished, yes? (sure, there will be bugs and +> things to work out, but i am asking if the reentrant stuff is in place.) +Yes, finished. But the changes were so widespread and there are so many options and +directives that I'm sure something will break very soon. That's why a +regression-test directory would be nice. + +> Go ahead and write the README. This is a good exercise--and usually +> fairly easy to knock off. It can serve as a guide later on down the +> road. +OK. + +> To clarify, reentrant should probably not be a default behavior for +> now. +It is not. In fact, yacc/bison parsers won't call yylex_r()... something I'll work +on later. + + +> If it's ok with you, I'm going to forward your post on to +> help-flex@gnu.org so the "community" can dig in too. + +Great. Please include my email address. + +Also, the community should really decide on the new API. I am wide open to +suggestions. Currently, the new API allows a lexer to be backwards compatible, so +that a working reentrant scanner should be able to be recompiled as a non-reentrant +scanner, and everything will work just dandy on the inside. However, the syntax is +not so pretty for the user. On the other hand, a reentrant scanner that is not +meant to be backwards compatible has a cleaner API (fewer oddball macros). + +An example to illustrate: + +%option reentrant +%s FOOBAR + +%% + + /* The line in each pair below do the same thing. + But second of each pair is backwards-compatible. */ + +"call-func" { yy_push_state ( FOOBAR , yy_globals ); } +"call-func" { yy_push_state ( FOOBAR YYCALL_LAST_ARG ); } + +"user-data" { printf( (char*) yyusr_r );} +"user-data" { printf( (char*) yy_get_usr(YYCALL_ONLY_ARG));} + +"yyleng" { printf( "%s %d" , yy_text_r, yyleng_r ); } +"yyleng" { printf( "%s %d" , YY_G(yy_text), YY_G(yyleng)); } + +%% +main() +{ + char* stuff = "user specific data."; + void * mylexer; + + yy_init_r ( & mylexer ); /* Create the scanner. */ + yy_set_usr( stuff, mylexer ); /* set user-defined stuff. */ + + while( 1 ) + yylex_r ( lexer ); + + yy_free_r ( lexer ); +} + +-John Millaway + + +__________________________________________________ +Do You Yahoo!? +Get email at your own domain with Yahoo! Mail. +http://personal.mail.yahoo.com/ + + +1,, +X-Coding-System: nil +Mail-from: From johnmillaway@yahoo.com Sun Apr 8 20:12:39 2001 +Return-Path: <johnmillaway@yahoo.com> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id UAA06162 + for <wlestes@localhost>; Sun, 8 Apr 2001 20:12:39 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Sun, 08 Apr 2001 20:12:39 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f38NsQe25613 + for <wlestes@hermes.email.uncg.edu>; Sun, 8 Apr 2001 19:54:26 -0400 (EDT) +Received: from web9406.mail.yahoo.com (web9406.mail.yahoo.com [216.136.129.22]) + by external-gw.uncg.edu (8.9.3/8.9.3) with SMTP id TAA03391 + for <wlestes@uncg.edu>; Sun, 8 Apr 2001 19:54:24 -0400 (EDT) +Message-ID: <20010408235423.12509.qmail@web9406.mail.yahoo.com> +Received: from [216.254.82.102] by web9406.mail.yahoo.com; Sun, 08 Apr 2001 16:54:23 PDT +Date: Sun, 8 Apr 2001 16:54:23 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: reentrant C scanner +To: "W. L. Estes" <wlestes@uncg.edu> +In-Reply-To: <200104082224.SAA06112@michael.uncg.edu> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii + +*** EOOH *** +Date: Sun, 8 Apr 2001 16:54:23 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: reentrant C scanner +To: "W. L. Estes" <wlestes@uncg.edu> +In-Reply-To: <200104082224.SAA06112@michael.uncg.edu> + +I'm sure you haven't had time to look at it yet, but there are two other points I +wanted to mention: + +1. Once the reentrant scanner is working, there is no need for the non-reentrant +scanner. The non-reentrant API can be built as a wrapper around the reentrant +scanner. Much, much less code maintenance. I will happily do the conversion. + +2. The reentrancy can be verified, in part, by running `nm -A -l' on a compiled +scanner. `nm` is part of binutils and shows, among other things, the global +variables. In the reentrant scanner, there should be none. + +-John Millaway + +PS - What is the mailing list? I guess I should subscribe! The gnu site says that +there is currently no list for flex: +http://www.gnu.org/software/flex/flex.html#lists + + +__________________________________________________ +Do You Yahoo!? +Get email at your own domain with Yahoo! Mail. +http://personal.mail.yahoo.com/ + + +1, answered,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Tue Apr 17 04:00:22 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id EAA31012 + for <wlestes@localhost>; Tue, 17 Apr 2001 04:00:21 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Tue, 17 Apr 2001 04:00:21 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3H7eqe08321 + for <wlestes@hermes.email.uncg.edu>; Tue, 17 Apr 2001 03:40:52 -0400 (EDT) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id DAA08895 + for <wlestes@uncg.edu>; Tue, 17 Apr 2001 03:40:50 -0400 (EDT) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f3H7eoW18946; + Tue, 17 Apr 2001 00:40:50 -0700 (PDT) +Message-Id: <200104170740.f3H7eoW18946@daffy.ee.lbl.gov> +To: "Yuri Victorovich" <yurivict@hotmail.com> +Subject: Re: Flex comments +Cc: wlestes@uncg.edu +In-reply-to: Your message of Sun, 15 Apr 2001 22:47:37 PDT. +Date: Tue, 17 Apr 2001 00:40:50 PDT +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: "Yuri Victorovich" <yurivict@hotmail.com> +Subject: Re: Flex comments +Cc: wlestes@uncg.edu +In-reply-to: Your message of Sun, 15 Apr 2001 22:47:37 PDT. +Date: Tue, 17 Apr 2001 00:40:50 PDT +From: Vern Paxson <vern@ee.lbl.gov> + +> One problem: how to eliminate the static variables without making it C++ +> scanner with option -+? I do not want to introduce any of C++ into my +> project and at the same time I use pthreads on FreeBSD. +> +> That would be a nice feature to have such an option. + +I agree it would be nice. I've cc'd Will Estes, who is trying out taking +over flex maintenance / development. + + Vern + + +1, answered,, +X-Coding-System: nil +Mail-from: From yurivict@hotmail.com Tue Apr 17 10:42:56 2001 +Return-Path: <yurivict@hotmail.com> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id KAA01266 + for <wlestes@localhost>; Tue, 17 Apr 2001 10:42:56 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Tue, 17 Apr 2001 10:42:56 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3HENMe01358 + for <wlestes@hermes.email.uncg.edu>; Tue, 17 Apr 2001 10:23:22 -0400 (EDT) +Received: from hotmail.com (f210.law12.hotmail.com [64.4.19.210]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id KAA06325 + for <wlestes@uncg.edu>; Tue, 17 Apr 2001 10:23:21 -0400 (EDT) +Received: from mail pickup service by hotmail.com with Microsoft SMTPSVC; + Tue, 17 Apr 2001 07:22:51 -0700 +Received: from 194.158.212.99 by lw12fd.law12.hotmail.msn.com with HTTP; Tue, 17 Apr 2001 14:22:51 GMT +X-Originating-IP: [194.158.212.99] +From: "Yuri Victorovich" <yurivict@hotmail.com> +To: wlestes@uncg.edu +Subject: Re: Flex comments +Date: Tue, 17 Apr 2001 07:22:51 -0700 +Mime-Version: 1.0 +Content-Type: text/plain; format=flowed +Message-ID: <F210b98KtdhQYZkxOjm00009556@hotmail.com> +X-OriginalArrivalTime: 17 Apr 2001 14:22:51.0694 (UTC) FILETIME=[E2BCD4E0:01C0C749] + +*** EOOH *** +From: "Yuri Victorovich" <yurivict@hotmail.com> +To: wlestes@uncg.edu +Subject: Re: Flex comments +Date: Tue, 17 Apr 2001 07:22:51 -0700 + +Will, Thank you for reply! + +I spent the whole day yesterday writing awk script to +convert normal flex output to thread-safe form. + +If you do not have time or will need help with this +work on flex -- I can do anything from research, programming +to testing. Just let me know -- I digged into it to some +extent already and will have some time in the coming weeks. + +Kind Regards, +Yuri. + + + + + +>From: "W. L. Estes" <wlestes@uncg.edu> +>To: vern@ee.lbl.gov +>CC: yurivict@hotmail.com +>Subject: Re: Flex comments +>Date: Tue, 17 Apr 2001 08:33:35 -0400 +> +> > > One problem: how to eliminate the static variables without making it +>C++ +> > > scanner with option -+? I do not want to introduce any of C++ into my +> > > project and at the same time I use pthreads on FreeBSD. +> > > +> > > That would be a nice feature to have such an option. +> > +> > I agree it would be nice. I've cc'd Will Estes, who is trying out +>taking +> > over flex maintenance / development. +> +>Thanks for the suggestion. I'll look into what it will take to do +>this. +> +>--Will + +_________________________________________________________________ +Get your FREE download of MSN Explorer at http://explorer.msn.com + + +1, answered,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Tue Apr 17 12:34:26 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA01409 + for <wlestes@localhost>; Tue, 17 Apr 2001 12:34:26 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Tue, 17 Apr 2001 12:34:26 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3HGFpe27457 + for <wlestes@hermes.email.uncg.edu>; Tue, 17 Apr 2001 12:15:51 -0400 (EDT) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA20041 + for <wlestes@uncg.edu>; Tue, 17 Apr 2001 12:15:49 -0400 (EDT) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f3HGFoP19785; + Tue, 17 Apr 2001 09:15:50 -0700 (PDT) +Message-Id: <200104171615.f3HGFoP19785@daffy.ee.lbl.gov> +To: "W. L. Estes" <wlestes@uncg.edu> +Subject: Re: Flex comments +In-reply-to: Your message of Tue, 17 Apr 2001 08:57:41 PDT. +Date: Tue, 17 Apr 2001 09:15:50 PDT +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: "W. L. Estes" <wlestes@uncg.edu> +Subject: Re: Flex comments +In-reply-to: Your message of Tue, 17 Apr 2001 08:57:41 PDT. +Date: Tue, 17 Apr 2001 09:15:50 PDT +From: Vern Paxson <vern@ee.lbl.gov> + +> If there is more detail in the original message, could you forward it +> to me? + +That was the whole message. The basic problem that people run into +is they want reentrant scanners, so no statics or global variables. +The general strategy for this is to have a new yylex() interface in +which the caller passes in a struct that has all of the otherwise-global +variables. This is a bit tricky to do cleanly because you can't switch +to that as *the* interface, of course, because it would break the +existing API; which means you have to be able to generate two forms of +the skeleton file, one for the existing API with the statics/globals, +and one without them and the struct definition instead. + +There's probably some more discussion on this in the mail tarball I sent +you - search for "reentrant". + + Vern + + +1, filed, answered,, +X-Coding-System: nil +Mail-from: From vern@daffy.ee.lbl.gov Tue Apr 17 12:34:26 2001 +Return-Path: <vern@daffy.ee.lbl.gov> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA01409 + for <wlestes@localhost>; Tue, 17 Apr 2001 12:34:26 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Tue, 17 Apr 2001 12:34:26 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3HGFpe27457 + for <wlestes@hermes.email.uncg.edu>; Tue, 17 Apr 2001 12:15:51 -0400 (EDT) +Received: from daffy.ee.lbl.gov (daffy.ee.lbl.gov [131.243.1.31]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA20041 + for <wlestes@uncg.edu>; Tue, 17 Apr 2001 12:15:49 -0400 (EDT) +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f3HGFoP19785; + Tue, 17 Apr 2001 09:15:50 -0700 (PDT) +Message-Id: <200104171615.f3HGFoP19785@daffy.ee.lbl.gov> +To: "W. L. Estes" <wlestes@uncg.edu> +Subject: Re: Flex comments +In-reply-to: Your message of Tue, 17 Apr 2001 08:57:41 PDT. +Date: Tue, 17 Apr 2001 09:15:50 PDT +From: Vern Paxson <vern@ee.lbl.gov> + +*** EOOH *** +To: "W. L. Estes" <wlestes@uncg.edu> +Subject: Re: Flex comments +In-reply-to: Your message of Tue, 17 Apr 2001 08:57:41 PDT. +Date: Tue, 17 Apr 2001 09:15:50 PDT +From: Vern Paxson <vern@ee.lbl.gov> + +> If there is more detail in the original message, could you forward it +> to me? + +That was the whole message. The basic problem that people run into +is they want reentrant scanners, so no statics or global variables. +The general strategy for this is to have a new yylex() interface in +which the caller passes in a struct that has all of the otherwise-global +variables. This is a bit tricky to do cleanly because you can't switch +to that as *the* interface, of course, because it would break the +existing API; which means you have to be able to generate two forms of +the skeleton file, one for the existing API with the statics/globals, +and one without them and the struct definition instead. + +There's probably some more discussion on this in the mail tarball I sent +you - search for "reentrant". + + Vern + + +1,, +X-Coding-System: nil +Mail-from: From help-flex-admin@gnu.org Wed Apr 25 00:20:40 2001 +Return-Path: <help-flex-admin@gnu.org> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id AAA22703 + for <wlestes@localhost>; Wed, 25 Apr 2001 00:20:40 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 25 Apr 2001 00:20:40 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3P403e01034 + for <wlestes@hermes.email.uncg.edu>; Wed, 25 Apr 2001 00:00:03 -0400 (EDT) +Received: from fencepost.gnu.org (we-refuse-to-spy-on-our-users@fencepost.gnu.org [199.232.76.164]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id AAA05202 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 00:00:02 -0400 (EDT) +Received: from localhost ([127.0.0.1] helo=fencepost.gnu.org) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sGTC-0006Tm-00 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 00:00:02 -0400 +Received: from [203.199.199.50] (helo=banyannetworks.com) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sGRt-0006Sr-00 + for <help-flex@gnu.org>; Tue, 24 Apr 2001 23:58:44 -0400 +Received: from localhost (arasi@localhost) by banyannetworks.com (8.9.3/8.8.3) with ESMTP id JAA03208 for <help-flex@gnu.org>; Wed, 25 Apr 2001 09:25:36 +0530 +From: Avai Arasi <arasi@banyanNetworks.com> +To: help-flex@gnu.org +Subject: Clarification Regarding Porting Flex and Bison to RTOS +Message-ID: <Pine.LNX.4.10.10104250920140.3092-100000@banyannetworks.com> +MIME-Version: 1.0 +Content-Type: TEXT/PLAIN; charset=US-ASCII +Sender: help-flex-admin@gnu.org +Errors-To: help-flex-admin@gnu.org +X-BeenThere: help-flex@gnu.org +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 09:25:36 +0530 (IST) + +*** EOOH *** +From: Avai Arasi <arasi@banyanNetworks.com> +To: help-flex@gnu.org +Subject: Clarification Regarding Porting Flex and Bison to RTOS +Sender: help-flex-admin@gnu.org +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 09:25:36 +0530 (IST) + +Hello, + + I am trying to port both Flex and Bison to a Real Time OS. I don't have +the device options like the file which the Flex expects as input. In the +place of the file i have a character pointer. How should I proceed with +the parsing ? + + Is there any standard procedure for this porting ? If somebody has +already done such a work can i have some hints please. + +Thank You. + +Regards, +Avaiarasi +Senior Engineer R & D +Banyan Networks Pvt. Ltd. +Chennai. + + +_______________________________________________ +Help-flex mailing list +Help-flex@gnu.org +http://mail.gnu.org/mailman/listinfo/help-flex + + +1,, +X-Coding-System: nil +Mail-from: From help-flex-admin@gnu.org Wed Apr 25 13:17:31 2001 +Return-Path: <help-flex-admin@gnu.org> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id NAA23790 + for <wlestes@localhost>; Wed, 25 Apr 2001 13:17:31 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 25 Apr 2001 13:17:31 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3PGv2e04258 + for <wlestes@hermes.email.uncg.edu>; Wed, 25 Apr 2001 12:57:02 -0400 (EDT) +Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA11157 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 12:57:01 -0400 (EDT) +Received: from localhost ([127.0.0.1] helo=fencepost.gnu.org) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sSb8-00056g-00 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 12:57:02 -0400 +Received: from web9403.mail.yahoo.com ([216.136.129.109]) + by fencepost.gnu.org with smtp (Exim 3.16 #1 (Debian)) + id 14sSYq-00053W-00 + for <help-flex@gnu.org>; Wed, 25 Apr 2001 12:54:41 -0400 +Message-ID: <20010425165439.64856.qmail@web9403.mail.yahoo.com> +Received: from [216.254.82.102] by web9403.mail.yahoo.com; Wed, 25 Apr 2001 09:54:39 PDT +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Bug: %option main overrides %option yywrap +To: help-flex@gnu.org +In-Reply-To: <200104251228.IAA23213@michael.uncg.edu> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Sender: help-flex-admin@gnu.org +Errors-To: help-flex-admin@gnu.org +X-BeenThere: help-flex@gnu.org +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 09:54:39 -0700 (PDT) + +*** EOOH *** +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Bug: %option main overrides %option yywrap +To: help-flex@gnu.org +In-Reply-To: <200104251228.IAA23213@michael.uncg.edu> +Sender: help-flex-admin@gnu.org +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 09:54:39 -0700 (PDT) + +Hello, + +There is a "bug" in the current version of flex (2.5.4) in which the %option +[no]main unconditionally sets the %option [no]yywrap. I use the term "bug" +hesitantly, because this behavior appears to be intentional. However, I can't think +of any use for it other than to cause unexpected link errors when yywrap() is not +found. + +Example: + + %option noyywrap /* Disable option yywrap. */ + %option nomain /* Option yywrap is now enabled! */ + +One workaround is to always specify %option yywrap AFTER %option main. In general, +if the user explicitly sets an option, then flex should either honor that setting, +or report a conflict. + +The bug is in "scan.l" at line 247, and is undocumented. + +244: lex-compat lex_compat = option_sense; +245: main { +246: action_define( "YY_MAIN", option_sense ); +247: do_yywrap = ! option_sense; +248: } +249: meta-ecs usemecs = option_sense; + +-John Millaway + + +__________________________________________________ +Do You Yahoo!? +Yahoo! Auctions - buy the things you want at great prices +http://auctions.yahoo.com/ + +_______________________________________________ +Help-flex mailing list +Help-flex@gnu.org +http://mail.gnu.org/mailman/listinfo/help-flex + + +1,, +X-Coding-System: nil +Mail-from: From help-flex-admin@gnu.org Wed Apr 25 14:11:45 2001 +Return-Path: <help-flex-admin@gnu.org> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id OAA23826 + for <wlestes@localhost>; Wed, 25 Apr 2001 14:11:44 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 25 Apr 2001 14:11:44 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3PHp7e27019 + for <wlestes@hermes.email.uncg.edu>; Wed, 25 Apr 2001 13:51:07 -0400 (EDT) +Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id NAA17880 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 13:51:08 -0400 (EDT) +Received: from localhost ([127.0.0.1] helo=fencepost.gnu.org) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sTRT-0003QA-00 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 13:51:07 -0400 +Received: from web9404.mail.yahoo.com ([216.136.129.110]) + by fencepost.gnu.org with smtp (Exim 3.16 #1 (Debian)) + id 14sTQy-0003Pm-00 + for <help-flex@gnu.org>; Wed, 25 Apr 2001 13:50:36 -0400 +Message-ID: <20010425175036.43329.qmail@web9404.mail.yahoo.com> +Received: from [216.254.82.102] by web9404.mail.yahoo.com; Wed, 25 Apr 2001 10:50:36 PDT +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: scaning from memory +To: "alexandre.gouraud" <alexandre.gouraud@laposte.net>, help-flex@gnu.org +In-Reply-To: <GCCZ8R$IiFF_2Id0FNAO2kAXNPACqX5slPk_GQyEsHVyKKjYXVpqFi3j4Ntc@laposte.net> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Sender: help-flex-admin@gnu.org +Errors-To: help-flex-admin@gnu.org +X-BeenThere: help-flex@gnu.org +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 10:50:36 -0700 (PDT) + +*** EOOH *** +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: scaning from memory +To: "alexandre.gouraud" <alexandre.gouraud@laposte.net>, help-flex@gnu.org +In-Reply-To: <GCCZ8R$IiFF_2Id0FNAO2kAXNPACqX5slPk_GQyEsHVyKKjYXVpqFi3j4Ntc@laposte.net> +Sender: help-flex-admin@gnu.org +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 10:50:36 -0700 (PDT) + +> I am trying to write a parser with flex/bison scaning strings +> in memory, but I can't make it work. I am pretty sure it's a +> common way of using flex, but I didn't find any example on the +> web. (I have read the manual, of course). I have found several +> times this question, but no answer. +> I don't know where to put the yy_scan_string() statement. I +> use the <<EOF>> rule as last rule. And I have also modified +> the prototype of the yylex function with +> #define YY_DECL int yylex(YYSTYPE *lvalp,char *name) +> where name is the string I want to parse. + +You should call yy_scan_string before calling yylex. Here are two examples: + +1. In general: + + YY_BUFFER_STATE buf_state = yy_scan_string( str ); + while( yylex() != 0 ) + ; + yy_delete_buffer(buf_state); + + +2. Your particular scanner: + +%{ +#define YY_USER_INIT buf_state = yy_scan_string( name ); +%} +%% + static YY_BUFFER_STATE buf_state; + +<<EOF>> { yy_buffer_delete( buf_state ); yyterminate(); } +%% + +-John + + +__________________________________________________ +Do You Yahoo!? +Yahoo! Auctions - buy the things you want at great prices +http://auctions.yahoo.com/ + +_______________________________________________ +Help-flex mailing list +Help-flex@gnu.org +http://mail.gnu.org/mailman/listinfo/help-flex + + +1,, +X-Coding-System: nil +Mail-from: From help-flex-admin@gnu.org Wed Apr 25 14:41:58 2001 +Return-Path: <help-flex-admin@gnu.org> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id OAA23852 + for <wlestes@localhost>; Wed, 25 Apr 2001 14:41:58 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 25 Apr 2001 14:41:58 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3PILLe10211 + for <wlestes@hermes.email.uncg.edu>; Wed, 25 Apr 2001 14:21:21 -0400 (EDT) +Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id OAA21692 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 14:21:21 -0400 (EDT) +Received: from localhost ([127.0.0.1] helo=fencepost.gnu.org) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sTuj-0007Yw-00 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 14:21:21 -0400 +Received: from daffy.ee.lbl.gov ([131.243.1.31]) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sTtm-0007Xg-00 + for <help-flex@gnu.org>; Wed, 25 Apr 2001 14:20:22 -0400 +Received: (from vern@localhost) + by daffy.ee.lbl.gov (8.10.0/8.10.0) id f3PIKJa19998; + Wed, 25 Apr 2001 11:20:19 -0700 (PDT) +Message-Id: <200104251820.f3PIKJa19998@daffy.ee.lbl.gov> +To: "John W. Millaway" <johnmillaway@yahoo.com> +Cc: help-flex@gnu.org +Subject: Re: Bug: %option main overrides %option yywrap +In-reply-to: Your message of Wed, 25 Apr 2001 09:54:39 PDT. +From: Vern Paxson <vern@ee.lbl.gov> +Sender: help-flex-admin@gnu.org +Errors-To: help-flex-admin@gnu.org +X-BeenThere: help-flex@gnu.org +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 11:20:19 PDT + +*** EOOH *** +To: "John W. Millaway" <johnmillaway@yahoo.com> +Cc: help-flex@gnu.org +Subject: Re: Bug: %option main overrides %option yywrap +In-reply-to: Your message of Wed, 25 Apr 2001 09:54:39 PDT. +From: Vern Paxson <vern@ee.lbl.gov> +Sender: help-flex-admin@gnu.org +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 11:20:19 PDT + +> 245: main { +> 246: action_define( "YY_MAIN", option_sense ); +> 247: do_yywrap = ! option_sense; +> 248: } + +I think I did that (which is documented in the manual, by the way) so that +"%option main" would be a quick way to specify you want a bare-bones +program for which you don't have to define any additional routines. +I'd be disinclined to change it (though it's Will's call) because it's +so easy to work around - just don't use "%option main", which is only +saving you a few lines of code anyway. + + Vern + +_______________________________________________ +Help-flex mailing list +Help-flex@gnu.org +http://mail.gnu.org/mailman/listinfo/help-flex + + +1,, +X-Coding-System: nil +Mail-from: From help-flex-admin@gnu.org Wed Apr 25 16:28:59 2001 +Return-Path: <help-flex-admin@gnu.org> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id QAA23955 + for <wlestes@localhost>; Wed, 25 Apr 2001 16:28:59 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 25 Apr 2001 16:28:59 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3PK99e25951 + for <wlestes@hermes.email.uncg.edu>; Wed, 25 Apr 2001 16:09:09 -0400 (EDT) +Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id QAA05277 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 16:09:08 -0400 (EDT) +Received: from localhost ([127.0.0.1] helo=fencepost.gnu.org) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sVb1-0005jV-00 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 16:09:07 -0400 +Received: from michael.uncg.edu ([152.13.5.20]) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14sVaJ-0005iu-00 + for <help-flex@gnu.org>; Wed, 25 Apr 2001 16:08:23 -0400 +Received: (from wlestes@localhost) + by michael.uncg.edu (8.9.3/8.9.3) id QAA23947; + Wed, 25 Apr 2001 16:27:46 -0400 +Message-Id: <200104252027.QAA23947@michael.uncg.edu> +X-Authentication-Warning: michael.uncg.edu: wlestes set sender to wlestes@michael.uncg.edu using -f +From: "W. L. Estes" <wlestes@uncg.edu> +To: vern@ee.lbl.gov +CC: johnmillaway@yahoo.com, help-flex@gnu.org +In-reply-to: <200104251820.f3PIKJa19998@daffy.ee.lbl.gov> (message from Vern + Paxson on Wed, 25 Apr 2001 11:20:19 PDT) +Subject: Re: Bug: %option main overrides %option yywrap +References: <200104251820.f3PIKJa19998@daffy.ee.lbl.gov> +Sender: help-flex-admin@gnu.org +Errors-To: help-flex-admin@gnu.org +X-BeenThere: help-flex@gnu.org +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 16:27:46 -0400 + +*** EOOH *** +From: "W. L. Estes" <wlestes@uncg.edu> +To: vern@ee.lbl.gov +CC: johnmillaway@yahoo.com, help-flex@gnu.org +In-reply-to: <200104251820.f3PIKJa19998@daffy.ee.lbl.gov> (message from Vern + Paxson on Wed, 25 Apr 2001 11:20:19 PDT) +Subject: Re: Bug: %option main overrides %option yywrap +Sender: help-flex-admin@gnu.org +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Wed, 25 Apr 2001 16:27:46 -0400 + +> I think I did that (which is documented in the manual, by the way) so that +> "%option main" would be a quick way to specify you want a bare-bones +> program for which you don't have to define any additional routines. +> I'd be disinclined to change it (though it's Will's call) because it's +> so easy to work around - just don't use "%option main", which is only +> saving you a few lines of code anyway. + +Additionally, flex.1 says the following: + + main directs flex to provide a default main() program + for the scanner, which simply calls yylex(). This + option implies noyywrap (see below). + +My sense is that Vern is correct: The right thing to do is to not use +%option main if you want yywrap() to be under your control. + +Thanks, + +--Will + +_______________________________________________ +Help-flex mailing list +Help-flex@gnu.org +http://mail.gnu.org/mailman/listinfo/help-flex + + +1,, +X-Coding-System: nil +Mail-from: From help-flex-admin@gnu.org Thu Apr 26 12:11:23 2001 +Return-Path: <help-flex-admin@gnu.org> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA25162 + for <wlestes@localhost>; Thu, 26 Apr 2001 12:11:22 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Thu, 26 Apr 2001 12:11:22 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3QFp6e21203 + for <wlestes@hermes.email.uncg.edu>; Thu, 26 Apr 2001 11:51:06 -0400 (EDT) +Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id LAA21348 + for <wlestes@uncg.edu>; Thu, 26 Apr 2001 11:51:05 -0400 (EDT) +Received: from localhost ([127.0.0.1] helo=fencepost.gnu.org) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14so2r-00043k-00 + for <wlestes@uncg.edu>; Thu, 26 Apr 2001 11:51:05 -0400 +Received: from [208.155.173.144] (helo=symbology.com) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14so2e-0003Dl-00 + for <help-flex@gnu.org>; Thu, 26 Apr 2001 11:50:53 -0400 +Received: from Spooler by symbology.com (Mercury/32 v3.21c) ID MO003737; + 26 Apr 01 10:52:43 -0500 +Received: from spooler by symbology.com (Mercury/32 v3.21c); 26 Apr 01 10:52:29 -0500 +Received: from grommit (10.0.0.74) by Symbology (Mercury/32 v3.21c) ID MG003736; + 26 Apr 01 10:52:22 -0500 +From: "Dave Dutcher" <ddutcher@symbology.com> +To: <help-flex@gnu.org> +Subject: Suppress copying characters to yytext? +Message-ID: <002501c0ce68$99b3ec40$4a00000a@symbology.com> +MIME-Version: 1.0 +Content-Type: text/plain; + charset="iso-8859-1" +Content-Transfer-Encoding: 7bit +X-Priority: 3 (Normal) +X-MSMail-Priority: Normal +X-Mailer: Microsoft Outlook CWS, Build 9.0.2416 (9.0.2910.0) +X-MimeOLE: Produced By Microsoft MimeOLE V5.50.4133.2400 +Importance: Normal +Sender: help-flex-admin@gnu.org +Errors-To: help-flex-admin@gnu.org +X-BeenThere: help-flex@gnu.org +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Thu, 26 Apr 2001 10:50:20 -0500 + +*** EOOH *** +From: "Dave Dutcher" <ddutcher@symbology.com> +To: <help-flex@gnu.org> +Subject: Suppress copying characters to yytext? +Importance: Normal +Sender: help-flex-admin@gnu.org +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Thu, 26 Apr 2001 10:50:20 -0500 + +Hi, + +First off I was wondering if there is an official web site for Flex? + +Second, I would like to be able to have Flex use certain characters in a +regular expression to match the expression but suppress copying the +characters to yytext. A simple example would be if I was trying to match +text surrounded by quotes, but I don't want to use the quotes. i.e. + +\"[^\n]\" { yylval = strdup(yytext); //This gives me the text and quotes. } + +or + +\"[^\n]\" { yylval = strdup(yytext+1); yylval[strlen(yylval)-1] = '\0'; +//Now I've removed the quotes. } + +But since I'm lazy ;) I would like to be able to tell Flex not to copy the +quotes. What I would like is an operator that would tell flex to match an +expression but to not copy it to yytext. Off the top of my head maybe the # +symbol could work for such an operator, so I could then do: + +\"#[^\n]\"# { yylval = strdup(yytext); //This would give me the text without +the quotes. } + +Does anyone know if such a feature or something similar exists? If this +doesn't exists would it be practical? + +Thanks, + +Dave Dutcher +Software Engineer +Symbology, Inc. +Minneapolis, MN + + +_______________________________________________ +Help-flex mailing list +Help-flex@gnu.org +http://mail.gnu.org/mailman/listinfo/help-flex + + +1,, +X-Coding-System: nil +Mail-from: From help-flex-admin@gnu.org Thu Apr 26 12:26:26 2001 +Return-Path: <help-flex-admin@gnu.org> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA25203 + for <wlestes@localhost>; Thu, 26 Apr 2001 12:26:26 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Thu, 26 Apr 2001 12:26:26 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3QG63e27511 + for <wlestes@hermes.email.uncg.edu>; Thu, 26 Apr 2001 12:06:03 -0400 (EDT) +Received: from fencepost.gnu.org (fencepost.gnu.org [199.232.76.164]) + by external-gw.uncg.edu (8.9.3/8.9.3) with ESMTP id MAA23192 + for <wlestes@uncg.edu>; Thu, 26 Apr 2001 12:06:02 -0400 (EDT) +Received: from localhost ([127.0.0.1] helo=fencepost.gnu.org) + by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) + id 14soHK-0004gM-00 + for <wlestes@uncg.edu>; Thu, 26 Apr 2001 12:06:02 -0400 +Received: from acp3bf.physik.rwth-aachen.de ([137.226.32.75]) + by fencepost.gnu.org with smtp (Exim 3.16 #1 (Debian)) + id 14soFe-0004cw-00 + for <help-flex@gnu.org>; Thu, 26 Apr 2001 12:04:18 -0400 +Received: from localhost (broeker@localhost) by acp3bf.physik.rwth-aachen.de (8.7.5/v3.2) with ESMTP id SAA00797; Thu, 26 Apr 2001 18:04:09 +0200 +X-Authentication-Warning: acp3bf.physik.rwth-aachen.de: broeker owned process doing -bs +From: Hans-Bernhard Broeker <broeker@physik.rwth-aachen.de> +X-Sender: broeker@acp3bf +To: Dave Dutcher <ddutcher@symbology.com> +cc: help-flex@gnu.org +Subject: Re: Suppress copying characters to yytext? +In-Reply-To: <002501c0ce68$99b3ec40$4a00000a@symbology.com> +Message-ID: <Pine.LNX.4.10.10104261759540.592-100000@acp3bf> +MIME-Version: 1.0 +Content-Type: TEXT/PLAIN; charset=US-ASCII +Sender: help-flex-admin@gnu.org +Errors-To: help-flex-admin@gnu.org +X-BeenThere: help-flex@gnu.org +X-Mailman-Version: 2.0.3 +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Thu, 26 Apr 2001 18:04:06 +0200 (MET DST) + +*** EOOH *** +From: Hans-Bernhard Broeker <broeker@physik.rwth-aachen.de> +To: Dave Dutcher <ddutcher@symbology.com> +cc: help-flex@gnu.org +Subject: Re: Suppress copying characters to yytext? +In-Reply-To: <002501c0ce68$99b3ec40$4a00000a@symbology.com> +Sender: help-flex-admin@gnu.org +Precedence: bulk +List-Help: <mailto:help-flex-request@gnu.org?subject=help> +List-Post: <mailto:help-flex@gnu.org> +List-Subscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=subscribe> +List-Id: Users list for Flex, + the GNU lexical analyser generator <help-flex.gnu.org> +List-Unsubscribe: <http://mail.gnu.org/mailman/listinfo/help-flex>, + <mailto:help-flex-request@gnu.org?subject=unsubscribe> +List-Archive: <http://mail.gnu.org/pipermail/help-flex/> +Date: Thu, 26 Apr 2001 18:04:06 +0200 (MET DST) + +On Thu, 26 Apr 2001, Dave Dutcher wrote: + +> Second, I would like to be able to have Flex use certain characters in a +> regular expression to match the expression but suppress copying the +> characters to yytext. + +You can do that with a start condition and trailing context rules. I.e. +roughly like this: + +<INITIAL>\" BEGIN(IN_STRING) + +<IN_STRING>[^\n]*/\" { + /* yytext is the interior of the string, now */ + BEGIN(INITIAL) + } + +-- +Hans-Bernhard Broeker (broeker@physik.rwth-aachen.de) +Even if all the snow were burnt, ashes would remain. + + +_______________________________________________ +Help-flex mailing list +Help-flex@gnu.org +http://mail.gnu.org/mailman/listinfo/help-flex + + +1, answered,, +X-Coding-System: nil +Mail-from: From johnmillaway@yahoo.com Wed Apr 25 18:52:08 2001 +Return-Path: <johnmillaway@yahoo.com> +Received: from localhost (localhost [127.0.0.1]) + by michael.uncg.edu (8.9.3/8.9.3) with ESMTP id SAA24103 + for <wlestes@localhost>; Wed, 25 Apr 2001 18:52:08 -0400 +Received: from imap.uncg.edu + by localhost with IMAP (fetchmail-5.1.0) + for wlestes@localhost (single-drop); Wed, 25 Apr 2001 18:52:08 -0400 (EDT) +Received: from external-gw.uncg.edu (external-gw.uncg.edu [152.13.2.70]) + by hermes.email.uncg.edu (8.11.0/8.11.0) with ESMTP id f3PMWQe07299 + for <wlestes@hermes.email.uncg.edu>; Wed, 25 Apr 2001 18:32:26 -0400 (EDT) +Received: from web9404.mail.yahoo.com (web9404.mail.yahoo.com [216.136.129.110]) + by external-gw.uncg.edu (8.9.3/8.9.3) with SMTP id SAA23019 + for <wlestes@uncg.edu>; Wed, 25 Apr 2001 18:32:24 -0400 (EDT) +Message-ID: <20010425223059.65780.qmail@web9404.mail.yahoo.com> +Received: from [216.254.82.102] by web9404.mail.yahoo.com; Wed, 25 Apr 2001 15:30:59 PDT +Date: Wed, 25 Apr 2001 15:30:59 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: Bug: %option main overrides %option yywrap +To: "W. L. Estes" <wlestes@uncg.edu>, vern@ee.lbl.gov +Cc: help-flex@gnu.org +In-Reply-To: <200104252027.QAA23947@michael.uncg.edu> +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii + +*** EOOH *** +Date: Wed, 25 Apr 2001 15:30:59 -0700 (PDT) +From: "John W. Millaway" <johnmillaway@yahoo.com> +Subject: Re: Bug: %option main overrides %option yywrap +To: "W. L. Estes" <wlestes@uncg.edu>, vern@ee.lbl.gov +Cc: help-flex@gnu.org +In-Reply-To: <200104252027.QAA23947@michael.uncg.edu> + +> Additionally, flex.1 says the following: +> +> main directs flex to provide a default main() program +> for the scanner, which simply calls yylex(). This +> option implies noyywrap (see below). + +I understand, and it makes sense. Just be aware that the inverse of this rule is +also true -- %option nomain implies yywrap -- which makes much less sense. + +Of course, I only complain because it had me on one heck of a bug hunt. I had +specified %option noyywrap and %option nomain, yet the linker complained that it +couldn't find yywrap. I thought I had broken something! :) + +-John + + +__________________________________________________ +Do You Yahoo!? +Yahoo! Auctions - buy the things you want at great prices +http://auctions.yahoo.com/ + +
\ No newline at end of file diff --git a/to.do/unicode/FlexLexer.h b/to.do/unicode/FlexLexer.h new file mode 100644 index 0000000..1764112 --- /dev/null +++ b/to.do/unicode/FlexLexer.h @@ -0,0 +1,195 @@ +// $Header$ + +// FlexLexer.h -- define interfaces for lexical analyzer classes generated +// by flex + +// Copyright (c) 1993 The Regents of the University of California. +// All rights reserved. +// +// This code is derived from software contributed to Berkeley by +// Kent Williams and Tom Epperly. +// +// Redistribution and use in source and binary forms are permitted provided +// that: (1) source distributions retain this entire copyright notice and +// comment, and (2) distributions including binaries display the following +// acknowledgement: ``This product includes software developed by the +// University of California, Berkeley and its contributors'' in the +// documentation or other materials provided with the distribution and in +// all advertising materials mentioning features or use of this software. +// Neither the name of the University nor the names of its contributors may +// be used to endorse or promote products derived from this software without +// specific prior written permission. +// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +// This file defines FlexLexer, an abstract class which specifies the +// external interface provided to flex C++ lexer objects, and yyFlexLexer, +// which defines a particular lexer class. +// +// If you want to create multiple lexer classes, you use the -P flag +// to rename each yyFlexLexer to some other xxFlexLexer. You then +// include <FlexLexer.h> in your other sources once per lexer class: +// +// #undef yyFlexLexer +// #define yyFlexLexer xxFlexLexer +// #include <FlexLexer.h> +// +// #undef yyFlexLexer +// #define yyFlexLexer zzFlexLexer +// #include <FlexLexer.h> +// ... +// +// Since this header is generic for all sizes of flex scanners, you must +// define the type YY_CHAR before including it: +// +// typedef xxx YY_CHAR; +// #include <FlexLexer.h> +// ... +// +// where xxx = char for 7-bit scanners, unsigned char for 8-bit, and +// wchar_t for 16-bit. + +#ifndef __FLEX_LEXER_H +// Never included before - need to define base class. +#define __FLEX_LEXER_H +#include <iostream.h> + +extern "C++" { + +struct yy_buffer_state; +typedef int yy_state_type; + +class FlexLexer { +public: + virtual ~FlexLexer() { } + + const YY_CHAR* YYText() { return yytext; } + int YYLeng() { return yyleng; } + + virtual void + yy_switch_to_buffer( struct yy_buffer_state* new_buffer ) = 0; + virtual struct yy_buffer_state* + yy_create_buffer( istream* s, int size ) = 0; + virtual void yy_delete_buffer( struct yy_buffer_state* b ) = 0; + virtual void yyrestart( istream* s ) = 0; + + virtual int yylex() = 0; + + // Call yylex with new input/output sources. + int yylex( istream* new_in, ostream* new_out = 0 ) + { + switch_streams( new_in, new_out ); + return yylex(); + } + + // Switch to new input/output streams. A nil stream pointer + // indicates "keep the current one". + virtual void switch_streams( istream* new_in = 0, + ostream* new_out = 0 ) = 0; + + int lineno() const { return yylineno; } + + int debug() const { return yy_flex_debug; } + void set_debug( int flag ) { yy_flex_debug = flag; } + +protected: + YY_CHAR* yytext; + int yyleng; + int yylineno; // only maintained if you use %option yylineno + int yy_flex_debug; // only has effect with -d or "%option debug" +}; + +} +#endif + +#if defined(yyFlexLexer) || ! defined(yyFlexLexerOnce) +// Either this is the first time through (yyFlexLexerOnce not defined), +// or this is a repeated include to define a different flavor of +// yyFlexLexer, as discussed in the flex man page. +#define yyFlexLexerOnce + +class yyFlexLexer : public FlexLexer { +public: + // arg_yyin and arg_yyout default to the cin and cout, but we + // only make that assignment when initializing in yylex(). + yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 ); + + virtual ~yyFlexLexer(); + + void yy_switch_to_buffer( struct yy_buffer_state* new_buffer ); + struct yy_buffer_state* yy_create_buffer( istream* s, int size ); + void yy_delete_buffer( struct yy_buffer_state* b ); + void yyrestart( istream* s ); + + virtual int yylex(); + virtual void switch_streams( istream* new_in, ostream* new_out ); + +protected: + virtual int LexerInput( YY_CHAR* buf, int max_size ); + virtual void LexerOutput( const YY_CHAR* buf, int size ); + virtual void LexerError( const char* msg ); + + void yyunput( int c, YY_CHAR* buf_ptr ); + int yyinput(); + + void yy_load_buffer_state(); + void yy_init_buffer( struct yy_buffer_state* b, istream* s ); + void yy_flush_buffer( struct yy_buffer_state* b ); + + int yy_start_stack_ptr; + int yy_start_stack_depth; + int* yy_start_stack; + + void yy_push_state( int new_state ); + void yy_pop_state(); + int yy_top_state(); + + yy_state_type yy_get_previous_state(); + yy_state_type yy_try_NUL_trans( yy_state_type current_state ); + int yy_get_next_buffer(); + + istream* yyin; // input source for default LexerInput + ostream* yyout; // output sink for default LexerOutput + + struct yy_buffer_state* yy_current_buffer; + + // yy_hold_char holds the character lost when yytext is formed. + YY_CHAR yy_hold_char; + + // Number of characters read into yy_ch_buf. + int yy_n_chars; + + // Points to current character in buffer. + YY_CHAR* yy_c_buf_p; + + int yy_init; // whether we need to initialize + int yy_start; // start state number + + // Flag which is used to allow yywrap()'s to do buffer switches + // instead of setting up a fresh yyin. A bit of a hack ... + int yy_did_buffer_switch_on_eof; + + // The following are not always needed, but may be depending + // on use of certain flex features (like REJECT or yymore()). + + yy_state_type yy_last_accepting_state; + YY_CHAR* yy_last_accepting_cpos; + + yy_state_type* yy_state_buf; + yy_state_type* yy_state_ptr; + + YY_CHAR* yy_full_match; + int* yy_full_state; + int yy_full_lp; + + int yy_lp; + int yy_looking_for_trail_begin; + + int yy_more_flag; + int yy_more_len; + int yy_more_offset; + int yy_prev_more_offset; +}; + +#endif diff --git a/to.do/unicode/ccl.c b/to.do/unicode/ccl.c new file mode 100644 index 0000000..1bfc1d5 --- /dev/null +++ b/to.do/unicode/ccl.c @@ -0,0 +1,149 @@ +/* ccl - routines for character classes */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* $Header$ */ + +#include "flexdef.h" + +/* ccladd - add a single character to a ccl */ + +void ccladd( cclp, ch ) +int cclp; +int ch; + { + int ind, len, newpos, i; + + check_char( ch ); + + len = ccllen[cclp]; + ind = cclmap[cclp]; + + /* check to see if the character is already in the ccl */ + + for ( i = 0; i < len; ++i ) + if ( ccltbl[ind + i] == ch ) + return; + + newpos = ind + len; + + if ( newpos >= current_max_ccl_tbl_size ) + { + current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; + + ++num_reallocs; + + ccltbl = reallocate_wchar_array( ccltbl, + current_max_ccl_tbl_size ); + } + + ccllen[cclp] = len + 1; + ccltbl[newpos] = ch; + } + + +/* cclinit - return an empty ccl */ + +int cclinit() + { + if ( ++lastccl >= current_maxccls ) + { + current_maxccls += MAX_CCLS_INCREMENT; + + ++num_reallocs; + + cclmap = reallocate_integer_array( cclmap, current_maxccls ); + ccllen = reallocate_integer_array( ccllen, current_maxccls ); + cclng = reallocate_integer_array( cclng, current_maxccls ); + } + + if ( lastccl == 1 ) + /* we're making the first ccl */ + cclmap[lastccl] = 0; + + else + /* The new pointer is just past the end of the last ccl. + * Since the cclmap points to the \first/ character of a + * ccl, adding the length of the ccl to the cclmap pointer + * will produce a cursor to the first free space. + */ + cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; + + ccllen[lastccl] = 0; + cclng[lastccl] = 0; /* ccl's start out life un-negated */ + + return lastccl; + } + + +/* cclnegate - negate the given ccl */ + +void cclnegate( cclp ) +int cclp; + { + cclng[cclp] = 1; + } + + +/* list_character_set - list the members of a set of characters in CCL form + * + * Writes to the given file a character-class representation of those + * characters present in the given CCL. A character is present if it + * has a non-zero value in the cset array. + */ + +void list_character_set( file, cset ) +FILE *file; +int cset[]; + { + register int i; + + putc( '[', file ); + + for ( i = 0; i < csize; ++i ) + { + if ( cset[i] ) + { + register int start_char = i; + + putc( ' ', file ); + + fputs( readable_form( i ), file ); + + while ( ++i < csize && cset[i] ) + ; + + if ( i - 1 > start_char ) + /* this was a run */ + fprintf( file, "-%s", readable_form( i - 1 ) ); + + putc( ' ', file ); + } + } + + putc( ']', file ); + } diff --git a/to.do/unicode/changes.txt b/to.do/unicode/changes.txt new file mode 100644 index 0000000..4d8ab37 --- /dev/null +++ b/to.do/unicode/changes.txt @@ -0,0 +1,102 @@ +Summary of changes for flex Unicode support + +- ccl.c + - ccladd() + - changed call to reallocate_Character_array to reallocate_wchar_array + +- ecs.c + - mkeccl() + - changed type of ccls from Char to wchar_t + +- flex.1 + - added description of -U option + - added extra qualifier to -Ca option regarding usage with -U + - modified -C, -Cf, and -CF options regarding usage with -U + +- flex.skl + - changed all references of char (except error messages) to YY_CHAR + - added new insertion point for defining YY_CHAR and YY_SC_TO_UI() + - yy_scan_bytes() + - renamed to yy_scan_chars to avoid confusion with 2-byte chars + - renamed param bytes to chars + - ECHO + - redefined C version as fwrite(yytext, sizeof(YY_CHAR), yyleng, yyout) + - YY_INPUT + - removed char* cast on param buf of C++ version + - yyFlexLexer::LexerInput() + - changed get() call to read((unsigned char *) buf, sizeof(YY_CHAR)) + - changed read() call to read((unsigned char *) buf, max_size * + sizeof(YY_CHAR)) + - changed gcount() call to gcount() / sizeof(YY_CHAR) + - yyFlexLexer::LexerOutput() + - changed write() call to write((unsigned char *) buf, size * + sizeof(YY_CHAR)) + - yy_get_next_buffer() + - yy_flex_realloc() call + - changed param b->yy_buf_size + 2 to + (b->yy_buf_size + 2) * sizeof(YY_CHAR) + - input() and yyFlexLexer::yyinput() + - changed line c = *(unsigned char *) yy_c_buf_p; to + c = YY_SC_TO_UI(*yy_c_buf_p); + +- flexdef.h + - defined CSIZE as 65536 + - changed myesc() proto to return int + - changed type of ccltbl from Char * to wchar_t * + - added allocate_wchar_array() and reallocate_wchar_array() macros + - changed mkeccl() proto's first param to wchar_t[] + - changed cshell() proto's first param to wchar_t[] + + +- FlexLexer.h + - changed all references of char (except error messages) to YY_CHAR + - added description about typedef'ing YY_CHAR before inclusion + +- gen.c + - changed appropriate references of char in output strings to YY_CHAR + - added C_uchar_decl and C_ushort_decl for 16-bit yy_ec type bump-down + - genecs() + - added code for 16-bit yy_ec type bump; -Ca bumps type to long + - make_tables() + - YY_INPUT + - read redefined as read(..., max_size * sizeof(YY_CHAR)) + - added code to use getwc() and WEOF for 16-bit interactive + - fread redefined as fread(buf, sizeof(YY_CHAR), max_size, yyin) + +- main.c + - changed type of ccltbl from Char * to wchar_t * + - check_options() + - changed default csize from CSIZE to 256 due to redef of CSIZE + - added code to check for options incompatible with -U + - added code to ouput typedef of YY_CHAR to skeleton, plus extra call to + skelout() to get down to original insertion point + - flexend() + - added code to print "U" when printing stats + - flexinit() + - added code set csize for option -U + - changed assignment of csize in option -8 from CSIZE to 256 due to redef + of CSIZE + - readin() + - changed appropriate references of char in output strings to YY_CHAR + - removed output of YY_CHAR typedef; now located in check_options() + - usage() + - added fprintf for -U usage + +- misc.c + - check_char() + - added code to distinguish chars needing -8 and -U flags + - cshell() + - changed type of v from Char to wchar_t + - changed type of k from Char to wchar_t + - myesc() + - now returns an int to handle 16-bit escape sequences + - changed esc_char from Char to unsigned int as per htoi() and otoi() + +- scan.l + - changed ESCSEQ to accept 6 digit octal escapes and 4 digit hex escapes + - removed myesc() and ndlookup() protos + - added option "16bit" + +- tblcmp.c + - mktemplate + - changed type of transset from Char to wchar_t diff --git a/to.do/unicode/ecs.c b/to.do/unicode/ecs.c new file mode 100644 index 0000000..5c70191 --- /dev/null +++ b/to.do/unicode/ecs.c @@ -0,0 +1,225 @@ +/* ecs - equivalence class routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* $Header$ */ + +#include "flexdef.h" + +/* ccl2ecl - convert character classes to set of equivalence classes */ + +void ccl2ecl() + { + int i, ich, newlen, cclp, ccls, cclmec; + + for ( i = 1; i <= lastccl; ++i ) + { + /* We loop through each character class, and for each character + * in the class, add the character's equivalence class to the + * new "character" class we are creating. Thus when we are all + * done, character classes will really consist of collections + * of equivalence classes + */ + + newlen = 0; + cclp = cclmap[i]; + + for ( ccls = 0; ccls < ccllen[i]; ++ccls ) + { + ich = ccltbl[cclp + ccls]; + cclmec = ecgroup[ich]; + + if ( cclmec > 0 ) + { + ccltbl[cclp + newlen] = cclmec; + ++newlen; + } + } + + ccllen[i] = newlen; + } + } + + +/* cre8ecs - associate equivalence class numbers with class members + * + * fwd is the forward linked-list of equivalence class members. bck + * is the backward linked-list, and num is the number of class members. + * + * Returned is the number of classes. + */ + +int cre8ecs( fwd, bck, num ) +int fwd[], bck[], num; + { + int i, j, numcl; + + numcl = 0; + + /* Create equivalence class numbers. From now on, ABS( bck(x) ) + * is the equivalence class number for object x. If bck(x) + * is positive, then x is the representative of its equivalence + * class. + */ + for ( i = 1; i <= num; ++i ) + if ( bck[i] == NIL ) + { + bck[i] = ++numcl; + for ( j = fwd[i]; j != NIL; j = fwd[j] ) + bck[j] = -numcl; + } + + return numcl; + } + + +/* mkeccl - update equivalence classes based on character class xtions + * + * synopsis + * Char ccls[]; + * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping; + * void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz], + * int llsiz, int NUL_mapping ); + * + * ccls contains the elements of the character class, lenccl is the + * number of elements in the ccl, fwd is the forward link-list of equivalent + * characters, bck is the backward link-list, and llsiz size of the link-list. + * + * NUL_mapping is the value which NUL (0) should be mapped to. + */ + +void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping ) +wchar_t ccls[]; +int lenccl, fwd[], bck[], llsiz, NUL_mapping; + { + int cclp, oldec, newec; + int cclm, i, j; + static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */ + + /* Note that it doesn't matter whether or not the character class is + * negated. The same results will be obtained in either case. + */ + + cclp = 0; + + while ( cclp < lenccl ) + { + cclm = ccls[cclp]; + + if ( NUL_mapping && cclm == 0 ) + cclm = NUL_mapping; + + oldec = bck[cclm]; + newec = cclm; + + j = cclp + 1; + + for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] ) + { /* look for the symbol in the character class */ + for ( ; j < lenccl; ++j ) + { + register int ccl_char; + + if ( NUL_mapping && ccls[j] == 0 ) + ccl_char = NUL_mapping; + else + ccl_char = ccls[j]; + + if ( ccl_char > i ) + break; + + if ( ccl_char == i && ! cclflags[j] ) + { + /* We found an old companion of cclm + * in the ccl. Link it into the new + * equivalence class and flag it as + * having been processed. + */ + + bck[i] = newec; + fwd[newec] = i; + newec = i; + /* Set flag so we don't reprocess. */ + cclflags[j] = 1; + + /* Get next equivalence class member. */ + /* continue 2 */ + goto next_pt; + } + } + + /* Symbol isn't in character class. Put it in the old + * equivalence class. + */ + + bck[i] = oldec; + + if ( oldec != NIL ) + fwd[oldec] = i; + + oldec = i; + + next_pt: ; + } + + if ( bck[cclm] != NIL || oldec != bck[cclm] ) + { + bck[cclm] = NIL; + fwd[oldec] = NIL; + } + + fwd[newec] = NIL; + + /* Find next ccl member to process. */ + + for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp ) + { + /* Reset "doesn't need processing" flag. */ + cclflags[cclp] = 0; + } + } + } + + +/* mkechar - create equivalence class for single character */ + +void mkechar( tch, fwd, bck ) +int tch, fwd[], bck[]; + { + /* If until now the character has been a proper subset of + * an equivalence class, break it away to create a new ec + */ + + if ( fwd[tch] != NIL ) + bck[fwd[tch]] = bck[tch]; + + if ( bck[tch] != NIL ) + fwd[bck[tch]] = fwd[tch]; + + fwd[tch] = NIL; + bck[tch] = NIL; + } diff --git a/to.do/unicode/flex.1 b/to.do/unicode/flex.1 new file mode 100644 index 0000000..545c58f --- /dev/null +++ b/to.do/unicode/flex.1 @@ -0,0 +1,4099 @@ +.TH FLEX 1 "April 1995" "Version 2.5" +.SH NAME +flex \- fast lexical analyzer generator +.SH SYNOPSIS +.B flex +.B [\-bcdfhilnpstvwBFILTV78+? \-C[aefFmr] \-ooutput \-Pprefix \-Sskeleton] +.B [\-\-help \-\-version] +.I [filename ...] +.SH OVERVIEW +This manual describes +.I flex, +a tool for generating programs that perform pattern-matching on text. The +manual includes both tutorial and reference sections: +.nf + + Description + a brief overview of the tool + + Some Simple Examples + + Format Of The Input File + + Patterns + the extended regular expressions used by flex + + How The Input Is Matched + the rules for determining what has been matched + + Actions + how to specify what to do when a pattern is matched + + The Generated Scanner + details regarding the scanner that flex produces; + how to control the input source + + Start Conditions + introducing context into your scanners, and + managing "mini-scanners" + + Multiple Input Buffers + how to manipulate multiple input sources; how to + scan from strings instead of files + + End-of-file Rules + special rules for matching the end of the input + + Miscellaneous Macros + a summary of macros available to the actions + + Values Available To The User + a summary of values available to the actions + + Interfacing With Yacc + connecting flex scanners together with yacc parsers + + Options + flex command-line options, and the "%option" + directive + + Performance Considerations + how to make your scanner go as fast as possible + + Generating C++ Scanners + the (experimental) facility for generating C++ + scanner classes + + Incompatibilities With Lex And POSIX + how flex differs from AT&T lex and the POSIX lex + standard + + Diagnostics + those error messages produced by flex (or scanners + it generates) whose meanings might not be apparent + + Files + files used by flex + + Deficiencies / Bugs + known problems with flex + + See Also + other documentation, related tools + + Author + includes contact information + +.fi +.SH DESCRIPTION +.I flex +is a tool for generating +.I scanners: +programs which recognized lexical patterns in text. +.I flex +reads +the given input files, or its standard input if no file names are given, +for a description of a scanner to generate. The description is in +the form of pairs +of regular expressions and C code, called +.I rules. flex +generates as output a C source file, +.B lex.yy.c, +which defines a routine +.B yylex(). +This file is compiled and linked with the +.B \-lfl +library to produce an executable. When the executable is run, +it analyzes its input for occurrences +of the regular expressions. Whenever it finds one, it executes +the corresponding C code. +.SH SOME SIMPLE EXAMPLES +.PP +First some simple examples to get the flavor of how one uses +.I flex. +The following +.I flex +input specifies a scanner which whenever it encounters the string +"username" will replace it with the user's login name: +.nf + + %% + username printf( "%s", getlogin() ); + +.fi +By default, any text not matched by a +.I flex +scanner +is copied to the output, so the net effect of this scanner is +to copy its input file to its output with each occurrence +of "username" expanded. +In this input, there is just one rule. "username" is the +.I pattern +and the "printf" is the +.I action. +The "%%" marks the beginning of the rules. +.PP +Here's another simple example: +.nf + + int num_lines = 0, num_chars = 0; + + %% + \\n ++num_lines; ++num_chars; + . ++num_chars; + + %% + main() + { + yylex(); + printf( "# of lines = %d, # of chars = %d\\n", + num_lines, num_chars ); + } + +.fi +This scanner counts the number of characters and the number +of lines in its input (it produces no output other than the +final report on the counts). The first line +declares two globals, "num_lines" and "num_chars", which are accessible +both inside +.B yylex() +and in the +.B main() +routine declared after the second "%%". There are two rules, one +which matches a newline ("\\n") and increments both the line count and +the character count, and one which matches any character other than +a newline (indicated by the "." regular expression). +.PP +A somewhat more complicated example: +.nf + + /* scanner for a toy Pascal-like language */ + + %{ + /* need this for the call to atof() below */ + #include <math.h> + %} + + DIGIT [0-9] + ID [a-z][a-z0-9]* + + %% + + {DIGIT}+ { + printf( "An integer: %s (%d)\\n", yytext, + atoi( yytext ) ); + } + + {DIGIT}+"."{DIGIT}* { + printf( "A float: %s (%g)\\n", yytext, + atof( yytext ) ); + } + + if|then|begin|end|procedure|function { + printf( "A keyword: %s\\n", yytext ); + } + + {ID} printf( "An identifier: %s\\n", yytext ); + + "+"|"-"|"*"|"/" printf( "An operator: %s\\n", yytext ); + + "{"[^}\\n]*"}" /* eat up one-line comments */ + + [ \\t\\n]+ /* eat up whitespace */ + + . printf( "Unrecognized character: %s\\n", yytext ); + + %% + + main( argc, argv ) + int argc; + char **argv; + { + ++argv, --argc; /* skip over program name */ + if ( argc > 0 ) + yyin = fopen( argv[0], "r" ); + else + yyin = stdin; + + yylex(); + } + +.fi +This is the beginnings of a simple scanner for a language like +Pascal. It identifies different types of +.I tokens +and reports on what it has seen. +.PP +The details of this example will be explained in the following +sections. +.SH FORMAT OF THE INPUT FILE +The +.I flex +input file consists of three sections, separated by a line with just +.B %% +in it: +.nf + + definitions + %% + rules + %% + user code + +.fi +The +.I definitions +section contains declarations of simple +.I name +definitions to simplify the scanner specification, and declarations of +.I start conditions, +which are explained in a later section. +.PP +Name definitions have the form: +.nf + + name definition + +.fi +The "name" is a word beginning with a letter or an underscore ('_') +followed by zero or more letters, digits, '_', or '-' (dash). +The definition is taken to begin at the first non-white-space character +following the name and continuing to the end of the line. +The definition can subsequently be referred to using "{name}", which +will expand to "(definition)". For example, +.nf + + DIGIT [0-9] + ID [a-z][a-z0-9]* + +.fi +defines "DIGIT" to be a regular expression which matches a +single digit, and +"ID" to be a regular expression which matches a letter +followed by zero-or-more letters-or-digits. +A subsequent reference to +.nf + + {DIGIT}+"."{DIGIT}* + +.fi +is identical to +.nf + + ([0-9])+"."([0-9])* + +.fi +and matches one-or-more digits followed by a '.' followed +by zero-or-more digits. +.PP +The +.I rules +section of the +.I flex +input contains a series of rules of the form: +.nf + + pattern action + +.fi +where the pattern must be unindented and the action must begin +on the same line. +.PP +See below for a further description of patterns and actions. +.PP +Finally, the user code section is simply copied to +.B lex.yy.c +verbatim. +It is used for companion routines which call or are called +by the scanner. The presence of this section is optional; +if it is missing, the second +.B %% +in the input file may be skipped, too. +.PP +In the definitions and rules sections, any +.I indented +text or text enclosed in +.B %{ +and +.B %} +is copied verbatim to the output (with the %{}'s removed). +The %{}'s must appear unindented on lines by themselves. +.PP +In the rules section, +any indented or %{} text appearing before the +first rule may be used to declare variables +which are local to the scanning routine and (after the declarations) +code which is to be executed whenever the scanning routine is entered. +Other indented or %{} text in the rule section is still copied to the output, +but its meaning is not well-defined and it may well cause compile-time +errors (this feature is present for +.I POSIX +compliance; see below for other such features). +.PP +In the definitions section (but not in the rules section), +an unindented comment (i.e., a line +beginning with "/*") is also copied verbatim to the output up +to the next "*/". +.SH PATTERNS +The patterns in the input are written using an extended set of regular +expressions. These are: +.nf + + x match the character 'x' + . any character (byte) except newline + [xyz] a "character class"; in this case, the pattern + matches either an 'x', a 'y', or a 'z' + [abj-oZ] a "character class" with a range in it; matches + an 'a', a 'b', any letter from 'j' through 'o', + or a 'Z' + [^A-Z] a "negated character class", i.e., any character + but those in the class. In this case, any + character EXCEPT an uppercase letter. + [^A-Z\\n] any character EXCEPT an uppercase letter or + a newline + r* zero or more r's, where r is any regular expression + r+ one or more r's + r? zero or one r's (that is, "an optional r") + r{2,5} anywhere from two to five r's + r{2,} two or more r's + r{4} exactly 4 r's + {name} the expansion of the "name" definition + (see above) + "[xyz]\\"foo" + the literal string: [xyz]"foo + \\X if X is an 'a', 'b', 'f', 'n', 'r', 't', or 'v', + then the ANSI-C interpretation of \\x. + Otherwise, a literal 'X' (used to escape + operators such as '*') + \\0 a NUL character (ASCII code 0) + \\123 the character with octal value 123 + \\x2a the character with hexadecimal value 2a + (r) match an r; parentheses are used to override + precedence (see below) + + + rs the regular expression r followed by the + regular expression s; called "concatenation" + + + r|s either an r or an s + + + r/s an r but only if it is followed by an s. The + text matched by s is included when determining + whether this rule is the "longest match", + but is then returned to the input before + the action is executed. So the action only + sees the text matched by r. This type + of pattern is called trailing context". + (There are some combinations of r/s that flex + cannot match correctly; see notes in the + Deficiencies / Bugs section below regarding + "dangerous trailing context".) + ^r an r, but only at the beginning of a line (i.e., + which just starting to scan, or right after a + newline has been scanned). + r$ an r, but only at the end of a line (i.e., just + before a newline). Equivalent to "r/\\n". + + Note that flex's notion of "newline" is exactly + whatever the C compiler used to compile flex + interprets '\\n' as; in particular, on some DOS + systems you must either filter out \\r's in the + input yourself, or explicitly use r/\\r\\n for "r$". + + + <s>r an r, but only in start condition s (see + below for discussion of start conditions) + <s1,s2,s3>r + same, but in any of start conditions s1, + s2, or s3 + <*>r an r in any start condition, even an exclusive one. + + + <<EOF>> an end-of-file + <s1,s2><<EOF>> + an end-of-file when in start condition s1 or s2 + +.fi +Note that inside of a character class, all regular expression operators +lose their special meaning except escape ('\\') and the character class +operators, '-', ']', and, at the beginning of the class, '^'. +.PP +The regular expressions listed above are grouped according to +precedence, from highest precedence at the top to lowest at the bottom. +Those grouped together have equal precedence. For example, +.nf + + foo|bar* + +.fi +is the same as +.nf + + (foo)|(ba(r*)) + +.fi +since the '*' operator has higher precedence than concatenation, +and concatenation higher than alternation ('|'). This pattern +therefore matches +.I either +the string "foo" +.I or +the string "ba" followed by zero-or-more r's. +To match "foo" or zero-or-more "bar"'s, use: +.nf + + foo|(bar)* + +.fi +and to match zero-or-more "foo"'s-or-"bar"'s: +.nf + + (foo|bar)* + +.fi +.PP +In addition to characters and ranges of characters, character classes +can also contain character class +.I expressions. +These are expressions enclosed inside +.B [: +and +.B :] +delimiters (which themselves must appear between the '[' and ']' of the +character class; other elements may occur inside the character class, too). +The valid expressions are: +.nf + + [:alnum:] [:alpha:] [:blank:] + [:cntrl:] [:digit:] [:graph:] + [:lower:] [:print:] [:punct:] + [:space:] [:upper:] [:xdigit:] + +.fi +These expressions all designate a set of characters equivalent to +the corresponding standard C +.B isXXX +function. For example, +.B [:alnum:] +designates those characters for which +.B isalnum() +returns true - i.e., any alphabetic or numeric. +Some systems don't provide +.B isblank(), +so flex defines +.B [:blank:] +as a blank or a tab. +.PP +For example, the following character classes are all equivalent: +.nf + + [[:alnum:]] + [[:alpha:][:digit:] + [[:alpha:]0-9] + [a-zA-Z0-9] + +.fi +If your scanner is case-insensitive (the +.B \-i +flag), then +.B [:upper:] +and +.B [:lower:] +are equivalent to +.B [:alpha:]. +.PP +Some notes on patterns: +.IP - +A negated character class such as the example "[^A-Z]" +above +.I will match a newline +unless "\\n" (or an equivalent escape sequence) is one of the +characters explicitly present in the negated character class +(e.g., "[^A-Z\\n]"). This is unlike how many other regular +expression tools treat negated character classes, but unfortunately +the inconsistency is historically entrenched. +Matching newlines means that a pattern like [^"]* can match the entire +input unless there's another quote in the input. +.IP - +A rule can have at most one instance of trailing context (the '/' operator +or the '$' operator). The start condition, '^', and "<<EOF>>" patterns +can only occur at the beginning of a pattern, and, as well as with '/' and '$', +cannot be grouped inside parentheses. A '^' which does not occur at +the beginning of a rule or a '$' which does not occur at the end of +a rule loses its special properties and is treated as a normal character. +.IP +The following are illegal: +.nf + + foo/bar$ + <sc1>foo<sc2>bar + +.fi +Note that the first of these, can be written "foo/bar\\n". +.IP +The following will result in '$' or '^' being treated as a normal character: +.nf + + foo|(bar$) + foo|^bar + +.fi +If what's wanted is a "foo" or a bar-followed-by-a-newline, the following +could be used (the special '|' action is explained below): +.nf + + foo | + bar$ /* action goes here */ + +.fi +A similar trick will work for matching a foo or a +bar-at-the-beginning-of-a-line. +.SH HOW THE INPUT IS MATCHED +When the generated scanner is run, it analyzes its input looking +for strings which match any of its patterns. If it finds more than +one match, it takes the one matching the most text (for trailing +context rules, this includes the length of the trailing part, even +though it will then be returned to the input). If it finds two +or more matches of the same length, the +rule listed first in the +.I flex +input file is chosen. +.PP +Once the match is determined, the text corresponding to the match +(called the +.I token) +is made available in the global character pointer +.B yytext, +and its length in the global integer +.B yyleng. +The +.I action +corresponding to the matched pattern is then executed (a more +detailed description of actions follows), and then the remaining +input is scanned for another match. +.PP +If no match is found, then the +.I default rule +is executed: the next character in the input is considered matched and +copied to the standard output. Thus, the simplest legal +.I flex +input is: +.nf + + %% + +.fi +which generates a scanner that simply copies its input (one character +at a time) to its output. +.PP +Note that +.B yytext +can be defined in two different ways: either as a character +.I pointer +or as a character +.I array. +You can control which definition +.I flex +uses by including one of the special directives +.B %pointer +or +.B %array +in the first (definitions) section of your flex input. The default is +.B %pointer, +unless you use the +.B -l +lex compatibility option, in which case +.B yytext +will be an array. +The advantage of using +.B %pointer +is substantially faster scanning and no buffer overflow when matching +very large tokens (unless you run out of dynamic memory). The disadvantage +is that you are restricted in how your actions can modify +.B yytext +(see the next section), and calls to the +.B unput() +function destroys the present contents of +.B yytext, +which can be a considerable porting headache when moving between different +.I lex +versions. +.PP +The advantage of +.B %array +is that you can then modify +.B yytext +to your heart's content, and calls to +.B unput() +do not destroy +.B yytext +(see below). Furthermore, existing +.I lex +programs sometimes access +.B yytext +externally using declarations of the form: +.nf + extern char yytext[]; +.fi +This definition is erroneous when used with +.B %pointer, +but correct for +.B %array. +.PP +.B %array +defines +.B yytext +to be an array of +.B YYLMAX +characters, which defaults to a fairly large value. You can change +the size by simply #define'ing +.B YYLMAX +to a different value in the first section of your +.I flex +input. As mentioned above, with +.B %pointer +yytext grows dynamically to accommodate large tokens. While this means your +.B %pointer +scanner can accommodate very large tokens (such as matching entire blocks +of comments), bear in mind that each time the scanner must resize +.B yytext +it also must rescan the entire token from the beginning, so matching such +tokens can prove slow. +.B yytext +presently does +.I not +dynamically grow if a call to +.B unput() +results in too much text being pushed back; instead, a run-time error results. +.PP +Also note that you cannot use +.B %array +with C++ scanner classes +(the +.B c++ +option; see below). +.SH ACTIONS +Each pattern in a rule has a corresponding action, which can be any +arbitrary C statement. The pattern ends at the first non-escaped +whitespace character; the remainder of the line is its action. If the +action is empty, then when the pattern is matched the input token +is simply discarded. For example, here is the specification for a program +which deletes all occurrences of "zap me" from its input: +.nf + + %% + "zap me" + +.fi +(It will copy all other characters in the input to the output since +they will be matched by the default rule.) +.PP +Here is a program which compresses multiple blanks and tabs down to +a single blank, and throws away whitespace found at the end of a line: +.nf + + %% + [ \\t]+ putchar( ' ' ); + [ \\t]+$ /* ignore this token */ + +.fi +.PP +If the action contains a '{', then the action spans till the balancing '}' +is found, and the action may cross multiple lines. +.I flex +knows about C strings and comments and won't be fooled by braces found +within them, but also allows actions to begin with +.B %{ +and will consider the action to be all the text up to the next +.B %} +(regardless of ordinary braces inside the action). +.PP +An action consisting solely of a vertical bar ('|') means "same as +the action for the next rule." See below for an illustration. +.PP +Actions can include arbitrary C code, including +.B return +statements to return a value to whatever routine called +.B yylex(). +Each time +.B yylex() +is called it continues processing tokens from where it last left +off until it either reaches +the end of the file or executes a return. +.PP +Actions are free to modify +.B yytext +except for lengthening it (adding +characters to its end--these will overwrite later characters in the +input stream). This however does not apply when using +.B %array +(see above); in that case, +.B yytext +may be freely modified in any way. +.PP +Actions are free to modify +.B yyleng +except they should not do so if the action also includes use of +.B yymore() +(see below). +.PP +There are a number of special directives which can be included within +an action: +.IP - +.B ECHO +copies yytext to the scanner's output. +.IP - +.B BEGIN +followed by the name of a start condition places the scanner in the +corresponding start condition (see below). +.IP - +.B REJECT +directs the scanner to proceed on to the "second best" rule which matched the +input (or a prefix of the input). The rule is chosen as described +above in "How the Input is Matched", and +.B yytext +and +.B yyleng +set up appropriately. +It may either be one which matched as much text +as the originally chosen rule but came later in the +.I flex +input file, or one which matched less text. +For example, the following will both count the +words in the input and call the routine special() whenever "frob" is seen: +.nf + + int word_count = 0; + %% + + frob special(); REJECT; + [^ \\t\\n]+ ++word_count; + +.fi +Without the +.B REJECT, +any "frob"'s in the input would not be counted as words, since the +scanner normally executes only one action per token. +Multiple +.B REJECT's +are allowed, each one finding the next best choice to the currently +active rule. For example, when the following scanner scans the token +"abcd", it will write "abcdabcaba" to the output: +.nf + + %% + a | + ab | + abc | + abcd ECHO; REJECT; + .|\\n /* eat up any unmatched character */ + +.fi +(The first three rules share the fourth's action since they use +the special '|' action.) +.B REJECT +is a particularly expensive feature in terms of scanner performance; +if it is used in +.I any +of the scanner's actions it will slow down +.I all +of the scanner's matching. Furthermore, +.B REJECT +cannot be used with the +.I -Cf +or +.I -CF +options (see below). +.IP +Note also that unlike the other special actions, +.B REJECT +is a +.I branch; +code immediately following it in the action will +.I not +be executed. +.IP - +.B yymore() +tells the scanner that the next time it matches a rule, the corresponding +token should be +.I appended +onto the current value of +.B yytext +rather than replacing it. For example, given the input "mega-kludge" +the following will write "mega-mega-kludge" to the output: +.nf + + %% + mega- ECHO; yymore(); + kludge ECHO; + +.fi +First "mega-" is matched and echoed to the output. Then "kludge" +is matched, but the previous "mega-" is still hanging around at the +beginning of +.B yytext +so the +.B ECHO +for the "kludge" rule will actually write "mega-kludge". +.PP +Two notes regarding use of +.B yymore(). +First, +.B yymore() +depends on the value of +.I yyleng +correctly reflecting the size of the current token, so you must not +modify +.I yyleng +if you are using +.B yymore(). +Second, the presence of +.B yymore() +in the scanner's action entails a minor performance penalty in the +scanner's matching speed. +.IP - +.B yyless(n) +returns all but the first +.I n +characters of the current token back to the input stream, where they +will be rescanned when the scanner looks for the next match. +.B yytext +and +.B yyleng +are adjusted appropriately (e.g., +.B yyleng +will now be equal to +.I n +). For example, on the input "foobar" the following will write out +"foobarbar": +.nf + + %% + foobar ECHO; yyless(3); + [a-z]+ ECHO; + +.fi +An argument of 0 to +.B yyless +will cause the entire current input string to be scanned again. Unless you've +changed how the scanner will subsequently process its input (using +.B BEGIN, +for example), this will result in an endless loop. +.PP +Note that +.B yyless +is a macro and can only be used in the flex input file, not from +other source files. +.IP - +.B unput(c) +puts the character +.I c +back onto the input stream. It will be the next character scanned. +The following action will take the current token and cause it +to be rescanned enclosed in parentheses. +.nf + + { + int i; + /* Copy yytext because unput() trashes yytext */ + char *yycopy = strdup( yytext ); + unput( ')' ); + for ( i = yyleng - 1; i >= 0; --i ) + unput( yycopy[i] ); + unput( '(' ); + free( yycopy ); + } + +.fi +Note that since each +.B unput() +puts the given character back at the +.I beginning +of the input stream, pushing back strings must be done back-to-front. +.PP +An important potential problem when using +.B unput() +is that if you are using +.B %pointer +(the default), a call to +.B unput() +.I destroys +the contents of +.I yytext, +starting with its rightmost character and devouring one character to +the left with each call. If you need the value of yytext preserved +after a call to +.B unput() +(as in the above example), +you must either first copy it elsewhere, or build your scanner using +.B %array +instead (see How The Input Is Matched). +.PP +Finally, note that you cannot put back +.B EOF +to attempt to mark the input stream with an end-of-file. +.IP - +.B input() +reads the next character from the input stream. For example, +the following is one way to eat up C comments: +.nf + + %% + "/*" { + register int c; + + for ( ; ; ) + { + while ( (c = input()) != '*' && + c != EOF ) + ; /* eat up text of comment */ + + if ( c == '*' ) + { + while ( (c = input()) == '*' ) + ; + if ( c == '/' ) + break; /* found the end */ + } + + if ( c == EOF ) + { + error( "EOF in comment" ); + break; + } + } + } + +.fi +(Note that if the scanner is compiled using +.B C++, +then +.B input() +is instead referred to as +.B yyinput(), +in order to avoid a name clash with the +.B C++ +stream by the name of +.I input.) +.IP - +.B YY_FLUSH_BUFFER +flushes the scanner's internal buffer +so that the next time the scanner attempts to match a token, it will +first refill the buffer using +.B YY_INPUT +(see The Generated Scanner, below). This action is a special case +of the more general +.B yy_flush_buffer() +function, described below in the section Multiple Input Buffers. +.IP - +.B yyterminate() +can be used in lieu of a return statement in an action. It terminates +the scanner and returns a 0 to the scanner's caller, indicating "all done". +By default, +.B yyterminate() +is also called when an end-of-file is encountered. It is a macro and +may be redefined. +.SH THE GENERATED SCANNER +The output of +.I flex +is the file +.B lex.yy.c, +which contains the scanning routine +.B yylex(), +a number of tables used by it for matching tokens, and a number +of auxiliary routines and macros. By default, +.B yylex() +is declared as follows: +.nf + + int yylex() + { + ... various definitions and the actions in here ... + } + +.fi +(If your environment supports function prototypes, then it will +be "int yylex( void )".) This definition may be changed by defining +the "YY_DECL" macro. For example, you could use: +.nf + + #define YY_DECL float lexscan( a, b ) float a, b; + +.fi +to give the scanning routine the name +.I lexscan, +returning a float, and taking two floats as arguments. Note that +if you give arguments to the scanning routine using a +K&R-style/non-prototyped function declaration, you must terminate +the definition with a semi-colon (;). +.PP +Whenever +.B yylex() +is called, it scans tokens from the global input file +.I yyin +(which defaults to stdin). It continues until it either reaches +an end-of-file (at which point it returns the value 0) or +one of its actions executes a +.I return +statement. +.PP +If the scanner reaches an end-of-file, subsequent calls are undefined +unless either +.I yyin +is pointed at a new input file (in which case scanning continues from +that file), or +.B yyrestart() +is called. +.B yyrestart() +takes one argument, a +.B FILE * +pointer (which can be nil, if you've set up +.B YY_INPUT +to scan from a source other than +.I yyin), +and initializes +.I yyin +for scanning from that file. Essentially there is no difference between +just assigning +.I yyin +to a new input file or using +.B yyrestart() +to do so; the latter is available for compatibility with previous versions +of +.I flex, +and because it can be used to switch input files in the middle of scanning. +It can also be used to throw away the current input buffer, by calling +it with an argument of +.I yyin; +but better is to use +.B YY_FLUSH_BUFFER +(see above). +Note that +.B yyrestart() +does +.I not +reset the start condition to +.B INITIAL +(see Start Conditions, below). +.PP +If +.B yylex() +stops scanning due to executing a +.I return +statement in one of the actions, the scanner may then be called again and it +will resume scanning where it left off. +.PP +By default (and for purposes of efficiency), the scanner uses +block-reads rather than simple +.I getc() +calls to read characters from +.I yyin. +The nature of how it gets its input can be controlled by defining the +.B YY_INPUT +macro. +YY_INPUT's calling sequence is "YY_INPUT(buf,result,max_size)". Its +action is to place up to +.I max_size +characters in the character array +.I buf +and return in the integer variable +.I result +either the +number of characters read or the constant YY_NULL (0 on Unix systems) +to indicate EOF. The default YY_INPUT reads from the +global file-pointer "yyin". +.PP +A sample definition of YY_INPUT (in the definitions +section of the input file): +.nf + + %{ + #define YY_INPUT(buf,result,max_size) \\ + { \\ + int c = getchar(); \\ + result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \\ + } + %} + +.fi +This definition will change the input processing to occur +one character at a time. +.PP +When the scanner receives an end-of-file indication from YY_INPUT, +it then checks the +.B yywrap() +function. If +.B yywrap() +returns false (zero), then it is assumed that the +function has gone ahead and set up +.I yyin +to point to another input file, and scanning continues. If it returns +true (non-zero), then the scanner terminates, returning 0 to its +caller. Note that in either case, the start condition remains unchanged; +it does +.I not +revert to +.B INITIAL. +.PP +If you do not supply your own version of +.B yywrap(), +then you must either use +.B %option noyywrap +(in which case the scanner behaves as though +.B yywrap() +returned 1), or you must link with +.B \-lfl +to obtain the default version of the routine, which always returns 1. +.PP +Three routines are available for scanning from in-memory buffers rather +than files: +.B yy_scan_string(), yy_scan_bytes(), +and +.B yy_scan_buffer(). +See the discussion of them below in the section Multiple Input Buffers. +.PP +The scanner writes its +.B ECHO +output to the +.I yyout +global (default, stdout), which may be redefined by the user simply +by assigning it to some other +.B FILE +pointer. +.SH START CONDITIONS +.I flex +provides a mechanism for conditionally activating rules. Any rule +whose pattern is prefixed with "<sc>" will only be active when +the scanner is in the start condition named "sc". For example, +.nf + + <STRING>[^"]* { /* eat up the string body ... */ + ... + } + +.fi +will be active only when the scanner is in the "STRING" start +condition, and +.nf + + <INITIAL,STRING,QUOTE>\\. { /* handle an escape ... */ + ... + } + +.fi +will be active only when the current start condition is +either "INITIAL", "STRING", or "QUOTE". +.PP +Start conditions +are declared in the definitions (first) section of the input +using unindented lines beginning with either +.B %s +or +.B %x +followed by a list of names. +The former declares +.I inclusive +start conditions, the latter +.I exclusive +start conditions. A start condition is activated using the +.B BEGIN +action. Until the next +.B BEGIN +action is executed, rules with the given start +condition will be active and +rules with other start conditions will be inactive. +If the start condition is +.I inclusive, +then rules with no start conditions at all will also be active. +If it is +.I exclusive, +then +.I only +rules qualified with the start condition will be active. +A set of rules contingent on the same exclusive start condition +describe a scanner which is independent of any of the other rules in the +.I flex +input. Because of this, +exclusive start conditions make it easy to specify "mini-scanners" +which scan portions of the input that are syntactically different +from the rest (e.g., comments). +.PP +If the distinction between inclusive and exclusive start conditions +is still a little vague, here's a simple example illustrating the +connection between the two. The set of rules: +.nf + + %s example + %% + + <example>foo do_something(); + + bar something_else(); + +.fi +is equivalent to +.nf + + %x example + %% + + <example>foo do_something(); + + <INITIAL,example>bar something_else(); + +.fi +Without the +.B <INITIAL,example> +qualifier, the +.I bar +pattern in the second example wouldn't be active (i.e., couldn't match) +when in start condition +.B example. +If we just used +.B <example> +to qualify +.I bar, +though, then it would only be active in +.B example +and not in +.B INITIAL, +while in the first example it's active in both, because in the first +example the +.B example +startion condition is an +.I inclusive +.B (%s) +start condition. +.PP +Also note that the special start-condition specifier +.B <*> +matches every start condition. Thus, the above example could also +have been written; +.nf + + %x example + %% + + <example>foo do_something(); + + <*>bar something_else(); + +.fi +.PP +The default rule (to +.B ECHO +any unmatched character) remains active in start conditions. It +is equivalent to: +.nf + + <*>.|\\n ECHO; + +.fi +.PP +.B BEGIN(0) +returns to the original state where only the rules with +no start conditions are active. This state can also be +referred to as the start-condition "INITIAL", so +.B BEGIN(INITIAL) +is equivalent to +.B BEGIN(0). +(The parentheses around the start condition name are not required but +are considered good style.) +.PP +.B BEGIN +actions can also be given as indented code at the beginning +of the rules section. For example, the following will cause +the scanner to enter the "SPECIAL" start condition whenever +.B yylex() +is called and the global variable +.I enter_special +is true: +.nf + + int enter_special; + + %x SPECIAL + %% + if ( enter_special ) + BEGIN(SPECIAL); + + <SPECIAL>blahblahblah + ...more rules follow... + +.fi +.PP +To illustrate the uses of start conditions, +here is a scanner which provides two different interpretations +of a string like "123.456". By default it will treat it as +three tokens, the integer "123", a dot ('.'), and the integer "456". +But if the string is preceded earlier in the line by the string +"expect-floats" +it will treat it as a single token, the floating-point number +123.456: +.nf + + %{ + #include <math.h> + %} + %s expect + + %% + expect-floats BEGIN(expect); + + <expect>[0-9]+"."[0-9]+ { + printf( "found a float, = %f\\n", + atof( yytext ) ); + } + <expect>\\n { + /* that's the end of the line, so + * we need another "expect-number" + * before we'll recognize any more + * numbers + */ + BEGIN(INITIAL); + } + + [0-9]+ { + printf( "found an integer, = %d\\n", + atoi( yytext ) ); + } + + "." printf( "found a dot\\n" ); + +.fi +Here is a scanner which recognizes (and discards) C comments while +maintaining a count of the current input line. +.nf + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + <comment>[^*\\n]* /* eat anything that's not a '*' */ + <comment>"*"+[^*/\\n]* /* eat up '*'s not followed by '/'s */ + <comment>\\n ++line_num; + <comment>"*"+"/" BEGIN(INITIAL); + +.fi +This scanner goes to a bit of trouble to match as much +text as possible with each rule. In general, when attempting to write +a high-speed scanner try to match as much possible in each rule, as +it's a big win. +.PP +Note that start-conditions names are really integer values and +can be stored as such. Thus, the above could be extended in the +following fashion: +.nf + + %x comment foo + %% + int line_num = 1; + int comment_caller; + + "/*" { + comment_caller = INITIAL; + BEGIN(comment); + } + + ... + + <foo>"/*" { + comment_caller = foo; + BEGIN(comment); + } + + <comment>[^*\\n]* /* eat anything that's not a '*' */ + <comment>"*"+[^*/\\n]* /* eat up '*'s not followed by '/'s */ + <comment>\\n ++line_num; + <comment>"*"+"/" BEGIN(comment_caller); + +.fi +Furthermore, you can access the current start condition using +the integer-valued +.B YY_START +macro. For example, the above assignments to +.I comment_caller +could instead be written +.nf + + comment_caller = YY_START; + +.fi +Flex provides +.B YYSTATE +as an alias for +.B YY_START +(since that is what's used by AT&T +.I lex). +.PP +Note that start conditions do not have their own name-space; %s's and %x's +declare names in the same fashion as #define's. +.PP +Finally, here's an example of how to match C-style quoted strings using +exclusive start conditions, including expanded escape sequences (but +not including checking for a string that's too long): +.nf + + %x str + + %% + char string_buf[MAX_STR_CONST]; + char *string_buf_ptr; + + + \\" string_buf_ptr = string_buf; BEGIN(str); + + <str>\\" { /* saw closing quote - all done */ + BEGIN(INITIAL); + *string_buf_ptr = '\\0'; + /* return string constant token type and + * value to parser + */ + } + + <str>\\n { + /* error - unterminated string constant */ + /* generate error message */ + } + + <str>\\\\[0-7]{1,3} { + /* octal escape sequence */ + int result; + + (void) sscanf( yytext + 1, "%o", &result ); + + if ( result > 0xff ) + /* error, constant is out-of-bounds */ + + *string_buf_ptr++ = result; + } + + <str>\\\\[0-9]+ { + /* generate error - bad escape sequence; something + * like '\\48' or '\\0777777' + */ + } + + <str>\\\\n *string_buf_ptr++ = '\\n'; + <str>\\\\t *string_buf_ptr++ = '\\t'; + <str>\\\\r *string_buf_ptr++ = '\\r'; + <str>\\\\b *string_buf_ptr++ = '\\b'; + <str>\\\\f *string_buf_ptr++ = '\\f'; + + <str>\\\\(.|\\n) *string_buf_ptr++ = yytext[1]; + + <str>[^\\\\\\n\\"]+ { + char *yptr = yytext; + + while ( *yptr ) + *string_buf_ptr++ = *yptr++; + } + +.fi +.PP +Often, such as in some of the examples above, you wind up writing a +whole bunch of rules all preceded by the same start condition(s). Flex +makes this a little easier and cleaner by introducing a notion of +start condition +.I scope. +A start condition scope is begun with: +.nf + + <SCs>{ + +.fi +where +.I SCs +is a list of one or more start conditions. Inside the start condition +scope, every rule automatically has the prefix +.I <SCs> +applied to it, until a +.I '}' +which matches the initial +.I '{'. +So, for example, +.nf + + <ESC>{ + "\\\\n" return '\\n'; + "\\\\r" return '\\r'; + "\\\\f" return '\\f'; + "\\\\0" return '\\0'; + } + +.fi +is equivalent to: +.nf + + <ESC>"\\\\n" return '\\n'; + <ESC>"\\\\r" return '\\r'; + <ESC>"\\\\f" return '\\f'; + <ESC>"\\\\0" return '\\0'; + +.fi +Start condition scopes may be nested. +.PP +Three routines are available for manipulating stacks of start conditions: +.TP +.B void yy_push_state(int new_state) +pushes the current start condition onto the top of the start condition +stack and switches to +.I new_state +as though you had used +.B BEGIN new_state +(recall that start condition names are also integers). +.TP +.B void yy_pop_state() +pops the top of the stack and switches to it via +.B BEGIN. +.TP +.B int yy_top_state() +returns the top of the stack without altering the stack's contents. +.PP +The start condition stack grows dynamically and so has no built-in +size limitation. If memory is exhausted, program execution aborts. +.PP +To use start condition stacks, your scanner must include a +.B %option stack +directive (see Options below). +.SH MULTIPLE INPUT BUFFERS +Some scanners (such as those which support "include" files) +require reading from several input streams. As +.I flex +scanners do a large amount of buffering, one cannot control +where the next input will be read from by simply writing a +.B YY_INPUT +which is sensitive to the scanning context. +.B YY_INPUT +is only called when the scanner reaches the end of its buffer, which +may be a long time after scanning a statement such as an "include" +which requires switching the input source. +.PP +To negotiate these sorts of problems, +.I flex +provides a mechanism for creating and switching between multiple +input buffers. An input buffer is created by using: +.nf + + YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) + +.fi +which takes a +.I FILE +pointer and a size and creates a buffer associated with the given +file and large enough to hold +.I size +characters (when in doubt, use +.B YY_BUF_SIZE +for the size). It returns a +.B YY_BUFFER_STATE +handle, which may then be passed to other routines (see below). The +.B YY_BUFFER_STATE +type is a pointer to an opaque +.B struct yy_buffer_state +structure, so you may safely initialize YY_BUFFER_STATE variables to +.B ((YY_BUFFER_STATE) 0) +if you wish, and also refer to the opaque structure in order to +correctly declare input buffers in source files other than that +of your scanner. Note that the +.I FILE +pointer in the call to +.B yy_create_buffer +is only used as the value of +.I yyin +seen by +.B YY_INPUT; +if you redefine +.B YY_INPUT +so it no longer uses +.I yyin, +then you can safely pass a nil +.I FILE +pointer to +.B yy_create_buffer. +You select a particular buffer to scan from using: +.nf + + void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) + +.fi +switches the scanner's input buffer so subsequent tokens will +come from +.I new_buffer. +Note that +.B yy_switch_to_buffer() +may be used by yywrap() to set things up for continued scanning, instead +of opening a new file and pointing +.I yyin +at it. Note also that switching input sources via either +.B yy_switch_to_buffer() +or +.B yywrap() +does +.I not +change the start condition. +.nf + + void yy_delete_buffer( YY_BUFFER_STATE buffer ) + +.fi +is used to reclaim the storage associated with a buffer. ( +.B buffer +can be nil, in which case the routine does nothing.) +You can also clear the current contents of a buffer using: +.nf + + void yy_flush_buffer( YY_BUFFER_STATE buffer ) + +.fi +This function discards the buffer's contents, +so the next time the scanner attempts to match a token from the +buffer, it will first fill the buffer anew using +.B YY_INPUT. +.PP +.B yy_new_buffer() +is an alias for +.B yy_create_buffer(), +provided for compatibility with the C++ use of +.I new +and +.I delete +for creating and destroying dynamic objects. +.PP +Finally, the +.B YY_CURRENT_BUFFER +macro returns a +.B YY_BUFFER_STATE +handle to the current buffer. +.PP +Here is an example of using these features for writing a scanner +which expands include files (the +.B <<EOF>> +feature is discussed below): +.nf + + /* the "incl" state is used for picking up the name + * of an include file + */ + %x incl + + %{ + #define MAX_INCLUDE_DEPTH 10 + YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; + int include_stack_ptr = 0; + %} + + %% + include BEGIN(incl); + + [a-z]+ ECHO; + [^a-z\\n]*\\n? ECHO; + + <incl>[ \\t]* /* eat the whitespace */ + <incl>[^ \\t\\n]+ { /* got the include file name */ + if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) + { + fprintf( stderr, "Includes nested too deeply" ); + exit( 1 ); + } + + include_stack[include_stack_ptr++] = + YY_CURRENT_BUFFER; + + yyin = fopen( yytext, "r" ); + + if ( ! yyin ) + error( ... ); + + yy_switch_to_buffer( + yy_create_buffer( yyin, YY_BUF_SIZE ) ); + + BEGIN(INITIAL); + } + + <<EOF>> { + if ( --include_stack_ptr < 0 ) + { + yyterminate(); + } + + else + { + yy_delete_buffer( YY_CURRENT_BUFFER ); + yy_switch_to_buffer( + include_stack[include_stack_ptr] ); + } + } + +.fi +Three routines are available for setting up input buffers for +scanning in-memory strings instead of files. All of them create +a new input buffer for scanning the string, and return a corresponding +.B YY_BUFFER_STATE +handle (which you should delete with +.B yy_delete_buffer() +when done with it). They also switch to the new buffer using +.B yy_switch_to_buffer(), +so the next call to +.B yylex() +will start scanning the string. +.TP +.B yy_scan_string(const char *str) +scans a NUL-terminated string. +.TP +.B yy_scan_bytes(const char *bytes, int len) +scans +.I len +bytes (including possibly NUL's) +starting at location +.I bytes. +.PP +Note that both of these functions create and scan a +.I copy +of the string or bytes. (This may be desirable, since +.B yylex() +modifies the contents of the buffer it is scanning.) You can avoid the +copy by using: +.TP +.B yy_scan_buffer(char *base, yy_size_t size) +which scans in place the buffer starting at +.I base, +consisting of +.I size +bytes, the last two bytes of which +.I must +be +.B YY_END_OF_BUFFER_CHAR +(ASCII NUL). +These last two bytes are not scanned; thus, scanning +consists of +.B base[0] +through +.B base[size-2], +inclusive. +.IP +If you fail to set up +.I base +in this manner (i.e., forget the final two +.B YY_END_OF_BUFFER_CHAR +bytes), then +.B yy_scan_buffer() +returns a nil pointer instead of creating a new input buffer. +.IP +The type +.B yy_size_t +is an integral type to which you can cast an integer expression +reflecting the size of the buffer. +.SH END-OF-FILE RULES +The special rule "<<EOF>>" indicates +actions which are to be taken when an end-of-file is +encountered and yywrap() returns non-zero (i.e., indicates +no further files to process). The action must finish +by doing one of four things: +.IP - +assigning +.I yyin +to a new input file (in previous versions of flex, after doing the +assignment you had to call the special action +.B YY_NEW_FILE; +this is no longer necessary); +.IP - +executing a +.I return +statement; +.IP - +executing the special +.B yyterminate() +action; +.IP - +or, switching to a new buffer using +.B yy_switch_to_buffer() +as shown in the example above. +.PP +<<EOF>> rules may not be used with other +patterns; they may only be qualified with a list of start +conditions. If an unqualified <<EOF>> rule is given, it +applies to +.I all +start conditions which do not already have <<EOF>> actions. To +specify an <<EOF>> rule for only the initial start condition, use +.nf + + <INITIAL><<EOF>> + +.fi +.PP +These rules are useful for catching things like unclosed comments. +An example: +.nf + + %x quote + %% + + ...other rules for dealing with quotes... + + <quote><<EOF>> { + error( "unterminated quote" ); + yyterminate(); + } + <<EOF>> { + if ( *++filelist ) + yyin = fopen( *filelist, "r" ); + else + yyterminate(); + } + +.fi +.SH MISCELLANEOUS MACROS +The macro +.B YY_USER_ACTION +can be defined to provide an action +which is always executed prior to the matched rule's action. For example, +it could be #define'd to call a routine to convert yytext to lower-case. +When +.B YY_USER_ACTION +is invoked, the variable +.I yy_act +gives the number of the matched rule (rules are numbered starting with 1). +Suppose you want to profile how often each of your rules is matched. The +following would do the trick: +.nf + + #define YY_USER_ACTION ++ctr[yy_act] + +.fi +where +.I ctr +is an array to hold the counts for the different rules. Note that +the macro +.B YY_NUM_RULES +gives the total number of rules (including the default rule, even if +you use +.B \-s), +so a correct declaration for +.I ctr +is: +.nf + + int ctr[YY_NUM_RULES]; + +.fi +.PP +The macro +.B YY_USER_INIT +may be defined to provide an action which is always executed before +the first scan (and before the scanner's internal initializations are done). +For example, it could be used to call a routine to read +in a data table or open a logging file. +.PP +The macro +.B yy_set_interactive(is_interactive) +can be used to control whether the current buffer is considered +.I interactive. +An interactive buffer is processed more slowly, +but must be used when the scanner's input source is indeed +interactive to avoid problems due to waiting to fill buffers +(see the discussion of the +.B \-I +flag below). A non-zero value +in the macro invocation marks the buffer as interactive, a zero +value as non-interactive. Note that use of this macro overrides +.B %option always-interactive +or +.B %option never-interactive +(see Options below). +.B yy_set_interactive() +must be invoked prior to beginning to scan the buffer that is +(or is not) to be considered interactive. +.PP +The macro +.B yy_set_bol(at_bol) +can be used to control whether the current buffer's scanning +context for the next token match is done as though at the +beginning of a line. A non-zero macro argument makes rules anchored with +'^' active, while a zero argument makes '^' rules inactive. +.PP +The macro +.B YY_AT_BOL() +returns true if the next token scanned from the current buffer +will have '^' rules active, false otherwise. +.PP +In the generated scanner, the actions are all gathered in one large +switch statement and separated using +.B YY_BREAK, +which may be redefined. By default, it is simply a "break", to separate +each rule's action from the following rule's. +Redefining +.B YY_BREAK +allows, for example, C++ users to +#define YY_BREAK to do nothing (while being very careful that every +rule ends with a "break" or a "return"!) to avoid suffering from +unreachable statement warnings where because a rule's action ends with +"return", the +.B YY_BREAK +is inaccessible. +.SH VALUES AVAILABLE TO THE USER +This section summarizes the various values available to the user +in the rule actions. +.IP - +.B char *yytext +holds the text of the current token. It may be modified but not lengthened +(you cannot append characters to the end). +.IP +If the special directive +.B %array +appears in the first section of the scanner description, then +.B yytext +is instead declared +.B char yytext[YYLMAX], +where +.B YYLMAX +is a macro definition that you can redefine in the first section +if you don't like the default value (generally 8KB). Using +.B %array +results in somewhat slower scanners, but the value of +.B yytext +becomes immune to calls to +.I input() +and +.I unput(), +which potentially destroy its value when +.B yytext +is a character pointer. The opposite of +.B %array +is +.B %pointer, +which is the default. +.IP +You cannot use +.B %array +when generating C++ scanner classes +(the +.B \-+ +flag). +.IP - +.B int yyleng +holds the length of the current token. +.IP - +.B FILE *yyin +is the file which by default +.I flex +reads from. It may be redefined but doing so only makes sense before +scanning begins or after an EOF has been encountered. Changing it in +the midst of scanning will have unexpected results since +.I flex +buffers its input; use +.B yyrestart() +instead. +Once scanning terminates because an end-of-file +has been seen, you can assign +.I yyin +at the new input file and then call the scanner again to continue scanning. +.IP - +.B void yyrestart( FILE *new_file ) +may be called to point +.I yyin +at the new input file. The switch-over to the new file is immediate +(any previously buffered-up input is lost). Note that calling +.B yyrestart() +with +.I yyin +as an argument thus throws away the current input buffer and continues +scanning the same input file. +.IP - +.B FILE *yyout +is the file to which +.B ECHO +actions are done. It can be reassigned by the user. +.IP - +.B YY_CURRENT_BUFFER +returns a +.B YY_BUFFER_STATE +handle to the current buffer. +.IP - +.B YY_START +returns an integer value corresponding to the current start +condition. You can subsequently use this value with +.B BEGIN +to return to that start condition. +.SH INTERFACING WITH YACC +One of the main uses of +.I flex +is as a companion to the +.I yacc +parser-generator. +.I yacc +parsers expect to call a routine named +.B yylex() +to find the next input token. The routine is supposed to +return the type of the next token as well as putting any associated +value in the global +.B yylval. +To use +.I flex +with +.I yacc, +one specifies the +.B \-d +option to +.I yacc +to instruct it to generate the file +.B y.tab.h +containing definitions of all the +.B %tokens +appearing in the +.I yacc +input. This file is then included in the +.I flex +scanner. For example, if one of the tokens is "TOK_NUMBER", +part of the scanner might look like: +.nf + + %{ + #include "y.tab.h" + %} + + %% + + [0-9]+ yylval = atoi( yytext ); return TOK_NUMBER; + +.fi +.SH OPTIONS +.I flex +has the following options: +.TP +.B \-b +Generate backing-up information to +.I lex.backup. +This is a list of scanner states which require backing up +and the input characters on which they do so. By adding rules one +can remove backing-up states. If +.I all +backing-up states are eliminated and +.B \-Cf +or +.B \-CF +is used, the generated scanner will run faster (see the +.B \-p +flag). Only users who wish to squeeze every last cycle out of their +scanners need worry about this option. (See the section on Performance +Considerations below.) +.TP +.B \-c +is a do-nothing, deprecated option included for POSIX compliance. +.TP +.B \-d +makes the generated scanner run in +.I debug +mode. Whenever a pattern is recognized and the global +.B yy_flex_debug +is non-zero (which is the default), +the scanner will write to +.I stderr +a line of the form: +.nf + + --accepting rule at line 53 ("the matched text") + +.fi +The line number refers to the location of the rule in the file +defining the scanner (i.e., the file that was fed to flex). Messages +are also generated when the scanner backs up, accepts the +default rule, reaches the end of its input buffer (or encounters +a NUL; at this point, the two look the same as far as the scanner's concerned), +or reaches an end-of-file. +.TP +.B \-f +specifies +.I fast scanner. +No table compression is done and stdio is bypassed. +The result is large but fast. This option is equivalent to +.B \-Cfr +(see below). +.TP +.B \-h +generates a "help" summary of +.I flex's +options to +.I stdout +and then exits. +.B \-? +and +.B \-\-help +are synonyms for +.B \-h. +.TP +.B \-i +instructs +.I flex +to generate a +.I case-insensitive +scanner. The case of letters given in the +.I flex +input patterns will +be ignored, and tokens in the input will be matched regardless of case. The +matched text given in +.I yytext +will have the preserved case (i.e., it will not be folded). +.TP +.B \-l +turns on maximum compatibility with the original AT&T +.I lex +implementation. Note that this does not mean +.I full +compatibility. Use of this option costs a considerable amount of +performance, and it cannot be used with the +.B \-+, -f, -F, -Cf, +or +.B -CF +options. For details on the compatibilities it provides, see the section +"Incompatibilities With Lex And POSIX" below. This option also results +in the name +.B YY_FLEX_LEX_COMPAT +being #define'd in the generated scanner. +.TP +.B \-n +is another do-nothing, deprecated option included only for +POSIX compliance. +.TP +.B \-p +generates a performance report to stderr. The report +consists of comments regarding features of the +.I flex +input file which will cause a serious loss of performance in the resulting +scanner. If you give the flag twice, you will also get comments regarding +features that lead to minor performance losses. +.IP +Note that the use of +.B REJECT, +.B %option yylineno, +and variable trailing context (see the Deficiencies / Bugs section below) +entails a substantial performance penalty; use of +.I yymore(), +the +.B ^ +operator, +and the +.B \-I +flag entail minor performance penalties. +.TP +.B \-s +causes the +.I default rule +(that unmatched scanner input is echoed to +.I stdout) +to be suppressed. If the scanner encounters input that does not +match any of its rules, it aborts with an error. This option is +useful for finding holes in a scanner's rule set. +.TP +.B \-t +instructs +.I flex +to write the scanner it generates to standard output instead +of +.B lex.yy.c. +.TP +.B \-v +specifies that +.I flex +should write to +.I stderr +a summary of statistics regarding the scanner it generates. +Most of the statistics are meaningless to the casual +.I flex +user, but the first line identifies the version of +.I flex +(same as reported by +.B \-V), +and the next line the flags used when generating the scanner, including +those that are on by default. +.TP +.B \-w +suppresses warning messages. +.TP +.B \-B +instructs +.I flex +to generate a +.I batch +scanner, the opposite of +.I interactive +scanners generated by +.B \-I +(see below). In general, you use +.B \-B +when you are +.I certain +that your scanner will never be used interactively, and you want to +squeeze a +.I little +more performance out of it. If your goal is instead to squeeze out a +.I lot +more performance, you should be using the +.B \-Cf +or +.B \-CF +options (discussed below), which turn on +.B \-B +automatically anyway. +.TP +.B \-F +specifies that the +.ul +fast +scanner table representation should be used (and stdio +bypassed). This representation is +about as fast as the full table representation +.B (-f), +and for some sets of patterns will be considerably smaller (and for +others, larger). In general, if the pattern set contains both "keywords" +and a catch-all, "identifier" rule, such as in the set: +.nf + + "case" return TOK_CASE; + "switch" return TOK_SWITCH; + ... + "default" return TOK_DEFAULT; + [a-z]+ return TOK_ID; + +.fi +then you're better off using the full table representation. If only +the "identifier" rule is present and you then use a hash table or some such +to detect the keywords, you're better off using +.B -F. +.IP +This option is equivalent to +.B \-CFr +(see below). It cannot be used with +.B \-+. +.TP +.B \-I +instructs +.I flex +to generate an +.I interactive +scanner. An interactive scanner is one that only looks ahead to decide +what token has been matched if it absolutely must. It turns out that +always looking one extra character ahead, even if the scanner has already +seen enough text to disambiguate the current token, is a bit faster than +only looking ahead when necessary. But scanners that always look ahead +give dreadful interactive performance; for example, when a user types +a newline, it is not recognized as a newline token until they enter +.I another +token, which often means typing in another whole line. +.IP +.I Flex +scanners default to +.I interactive +unless you use the +.B \-Cf +or +.B \-CF +table-compression options (see below). That's because if you're looking +for high-performance you should be using one of these options, so if you +didn't, +.I flex +assumes you'd rather trade off a bit of run-time performance for intuitive +interactive behavior. Note also that you +.I cannot +use +.B \-I +in conjunction with +.B \-Cf +or +.B \-CF. +Thus, this option is not really needed; it is on by default for all those +cases in which it is allowed. +.IP +You can force a scanner to +.I not +be interactive by using +.B \-B +(see above). +.TP +.B \-L +instructs +.I flex +not to generate +.B #line +directives. Without this option, +.I flex +peppers the generated scanner +with #line directives so error messages in the actions will be correctly +located with respect to either the original +.I flex +input file (if the errors are due to code in the input file), or +.B lex.yy.c +(if the errors are +.I flex's +fault -- you should report these sorts of errors to the email address +given below). +.TP +.B \-T +makes +.I flex +run in +.I trace +mode. It will generate a lot of messages to +.I stderr +concerning +the form of the input and the resultant non-deterministic and deterministic +finite automata. This option is mostly for use in maintaining +.I flex. +.TP +.B \-V +prints the version number to +.I stdout +and exits. +.B \-\-version +is a synonym for +.B \-V. +.TP +.B \-7 +instructs +.I flex +to generate a 7-bit scanner, i.e., one which can only recognized 7-bit +characters in its input. The advantage of using +.B \-7 +is that the scanner's tables can be up to half the size of those generated +using the +.B \-8 +option (see below). The disadvantage is that such scanners often hang +or crash if their input contains an 8-bit character. +.IP +Note, however, that unless you generate your scanner using the +.B \-Cf +or +.B \-CF +table compression options, use of +.B \-7 +will save only a small amount of table space, and make your scanner +considerably less portable. +.I Flex's +default behavior is to generate an 8-bit scanner unless you use the +.B \-Cf +or +.B \-CF, +in which case +.I flex +defaults to generating 7-bit scanners unless your site was always +configured to generate 8-bit scanners (as will often be the case +with non-USA sites). You can tell whether flex generated a 7-bit +or an 8-bit scanner by inspecting the flag summary in the +.B \-v +output as described above. +.IP +Note that if you use +.B \-Cfe +or +.B \-CFe +(those table compression options, but also using equivalence classes as +discussed see below), flex still defaults to generating an 8-bit +scanner, since usually with these compression options full 8-bit tables +are not much more expensive than 7-bit tables. +.TP +.B \-8 +instructs +.I flex +to generate an 8-bit scanner, i.e., one which can recognize 8-bit +characters. This flag is only needed for scanners generated using +.B \-Cf +or +.B \-CF, +as otherwise flex defaults to generating an 8-bit scanner anyway. +.IP +See the discussion of +.B \-7 +above for flex's default behavior and the tradeoffs between 7-bit +and 8-bit scanners. +.TP +.B \-U +instructs +.I flex +to generate a 16-bit scanner, i.e., one which can recognize Unicode +characters. The tables of a scanner generated with +.B \-U +are always substantially larger than those of a 7- or 8-bit scanner, but there +are three significant benefits if you need to scan Unicode. First, a 16-bit +Unicode scanner is much faster than an equivalent 8-bit because it does not +have to match NULs. Second, the generated scanner is portable - 8-bit Unicode +scanners are non-portable because their patterns reflect the endianness of the +platform on which they were written. Third, 16-bit patterns use the standard +syntax of regular expressions with one small addition: escape sequences can +specify 16-bit characters. For example, the patterns \177377 and \xFEFF both +match the Unicode byte-order mark. Note the following related issues: +.IP +If your operating system distinguishes between text and binary file I/O, +.I yyin +and +.I yyout +should be opened in binary mode. +.IP +C++ support of Unicode varies. This currently limits 16-bit C++ +scanners to file I/O. +.IP +The +.B \-C, -Cf, +and +.B \-CF +table compression options are not available with +.B \-U +in order to keep table sizes within reason. +.TP +.B \-+ +specifies that you want flex to generate a C++ +scanner class. See the section on Generating C++ Scanners below for +details. +.TP +.B \-C[aefFmr] +controls the degree of table compression and, more generally, trade-offs +between small scanners and fast scanners. +.IP +.B \-Ca +("align") instructs flex to trade off larger tables in the +generated scanner for faster performance because the elements of +the tables are better aligned for memory access and computation. On some +RISC architectures, fetching and manipulating longwords is more efficient +than with smaller-sized units such as shortwords. This option can +double the size of the tables used by a 7- or 8-bit scanner, and can +quadruple those of a 16-bit scanner. +.IP +.B \-Ce +directs +.I flex +to construct +.I equivalence classes, +i.e., sets of characters +which have identical lexical properties (for example, if the only +appearance of digits in the +.I flex +input is in the character class +"[0-9]" then the digits '0', '1', ..., '9' will all be put +in the same equivalence class). Equivalence classes usually give +dramatic reductions in the final table/object file sizes (typically +a factor of 2-5) and are pretty cheap performance-wise (one array +look-up per character scanned). +.IP +.B \-Cf +specifies that the +.I full +scanner tables should be generated - +.I flex +should not compress the +tables by taking advantages of similar transition functions for +different states. This option cannot be used with +.B \-U. +.IP +.B \-CF +specifies that the alternate fast scanner representation (described +above under the +.B \-F +flag) +should be used. This option cannot be used with +.B \-+ +or +.B \-U. +.IP +.B \-Cm +directs +.I flex +to construct +.I meta-equivalence classes, +which are sets of equivalence classes (or characters, if equivalence +classes are not being used) that are commonly used together. Meta-equivalence +classes are often a big win when using compressed tables, but they +have a moderate performance impact (one or two "if" tests and one +array look-up per character scanned). +.IP +.B \-Cr +causes the generated scanner to +.I bypass +use of the standard I/O library (stdio) for input. Instead of calling +.B fread() +or +.B getc(), +the scanner will use the +.B read() +system call, resulting in a performance gain which varies from system +to system, but in general is probably negligible unless you are also using +.B \-Cf +or +.B \-CF. +Using +.B \-Cr +can cause strange behavior if, for example, you read from +.I yyin +using stdio prior to calling the scanner (because the scanner will miss +whatever text your previous reads left in the stdio input buffer). +.IP +.B \-Cr +has no effect if you define +.B YY_INPUT +(see The Generated Scanner above). +.IP +A lone +.B \-C +specifies that the scanner tables should be compressed but neither +equivalence classes nor meta-equivalence classes should be used. +This option cannot be used with +.B \-U. +.IP +The options +.B \-Cf +or +.B \-CF +and +.B \-Cm +do not make sense together - there is no opportunity for meta-equivalence +classes if the table is not being compressed. Otherwise the options +may be freely mixed, and are cumulative. +.IP +The default setting is +.B \-Cem, +which specifies that +.I flex +should generate equivalence classes +and meta-equivalence classes. This setting provides the highest +degree of table compression. You can trade off +faster-executing scanners at the cost of larger tables with +the following generally being true: +.nf + + slowest & smallest + -Cem + -Cm + -Ce + -C + -C{f,F}e + -C{f,F} + -C{f,F}a + fastest & largest + +.fi +Note that scanners with the smallest tables are usually generated and +compiled the quickest, so +during development you will usually want to use the default, maximal +compression. +.IP +.B \-Cfe +is often a good compromise between speed and size for production +scanners. +.TP +.B \-ooutput +directs flex to write the scanner to the file +.B output +instead of +.B lex.yy.c. +If you combine +.B \-o +with the +.B \-t +option, then the scanner is written to +.I stdout +but its +.B #line +directives (see the +.B \\-L +option above) refer to the file +.B output. +.TP +.B \-Pprefix +changes the default +.I "yy" +prefix used by +.I flex +for all globally-visible variable and function names to instead be +.I prefix. +For example, +.B \-Pfoo +changes the name of +.B yytext +to +.B footext. +It also changes the name of the default output file from +.B lex.yy.c +to +.B lex.foo.c. +Here are all of the names affected: +.nf + + yy_create_buffer + yy_delete_buffer + yy_flex_debug + yy_init_buffer + yy_flush_buffer + yy_load_buffer_state + yy_switch_to_buffer + yyin + yyleng + yylex + yylineno + yyout + yyrestart + yytext + yywrap + +.fi +(If you are using a C++ scanner, then only +.B yywrap +and +.B yyFlexLexer +are affected.) +Within your scanner itself, you can still refer to the global variables +and functions using either version of their name; but externally, they +have the modified name. +.IP +This option lets you easily link together multiple +.I flex +programs into the same executable. Note, though, that using this +option also renames +.B yywrap(), +so you now +.I must +either +provide your own (appropriately-named) version of the routine for your +scanner, or use +.B %option noyywrap, +as linking with +.B \-lfl +no longer provides one for you by default. +.TP +.B \-Sskeleton_file +overrides the default skeleton file from which +.I flex +constructs its scanners. You'll never need this option unless you are doing +.I flex +maintenance or development. +.PP +.I flex +also provides a mechanism for controlling options within the +scanner specification itself, rather than from the flex command-line. +This is done by including +.B %option +directives in the first section of the scanner specification. +You can specify multiple options with a single +.B %option +directive, and multiple directives in the first section of your flex input +file. +.PP +Most options are given simply as names, optionally preceded by the +word "no" (with no intervening whitespace) to negate their meaning. +A number are equivalent to flex flags or their negation: +.nf + + 7bit -7 option + 8bit -8 option + align -Ca option + backup -b option + batch -B option + c++ -+ option + + caseful or + case-sensitive opposite of -i (default) + + case-insensitive or + caseless -i option + + debug -d option + default opposite of -s option + ecs -Ce option + fast -F option + full -f option + interactive -I option + lex-compat -l option + meta-ecs -Cm option + perf-report -p option + read -Cr option + stdout -t option + verbose -v option + warn opposite of -w option + (use "%option nowarn" for -w) + + array equivalent to "%array" + pointer equivalent to "%pointer" (default) + +.fi +Some +.B %option's +provide features otherwise not available: +.TP +.B always-interactive +instructs flex to generate a scanner which always considers its input +"interactive". Normally, on each new input file the scanner calls +.B isatty() +in an attempt to determine whether +the scanner's input source is interactive and thus should be read a +character at a time. When this option is used, however, then no +such call is made. +.TP +.B main +directs flex to provide a default +.B main() +program for the scanner, which simply calls +.B yylex(). +This option implies +.B noyywrap +(see below). +.TP +.B never-interactive +instructs flex to generate a scanner which never considers its input +"interactive" (again, no call made to +.B isatty()). +This is the opposite of +.B always-interactive. +.TP +.B stack +enables the use of start condition stacks (see Start Conditions above). +.TP +.B stdinit +if set (i.e., +.B %option stdinit) +initializes +.I yyin +and +.I yyout +to +.I stdin +and +.I stdout, +instead of the default of +.I nil. +Some existing +.I lex +programs depend on this behavior, even though it is not compliant with +ANSI C, which does not require +.I stdin +and +.I stdout +to be compile-time constant. +.TP +.B yylineno +directs +.I flex +to generate a scanner that maintains the number of the current line +read from its input in the global variable +.B yylineno. +This option is implied by +.B %option lex-compat. +.TP +.B yywrap +if unset (i.e., +.B %option noyywrap), +makes the scanner not call +.B yywrap() +upon an end-of-file, but simply assume that there are no more +files to scan (until the user points +.I yyin +at a new file and calls +.B yylex() +again). +.PP +.I flex +scans your rule actions to determine whether you use the +.B REJECT +or +.B yymore() +features. The +.B reject +and +.B yymore +options are available to override its decision as to whether you use the +options, either by setting them (e.g., +.B %option reject) +to indicate the feature is indeed used, or +unsetting them to indicate it actually is not used +(e.g., +.B %option noyymore). +.PP +Three options take string-delimited values, offset with '=': +.nf + + %option outfile="ABC" + +.fi +is equivalent to +.B -oABC, +and +.nf + + %option prefix="XYZ" + +.fi +is equivalent to +.B -PXYZ. +Finally, +.nf + + %option yyclass="foo" + +.fi +only applies when generating a C++ scanner ( +.B \-+ +option). It informs +.I flex +that you have derived +.B foo +as a subclass of +.B yyFlexLexer, +so +.I flex +will place your actions in the member function +.B foo::yylex() +instead of +.B yyFlexLexer::yylex(). +It also generates a +.B yyFlexLexer::yylex() +member function that emits a run-time error (by invoking +.B yyFlexLexer::LexerError()) +if called. +See Generating C++ Scanners, below, for additional information. +.PP +A number of options are available for lint purists who want to suppress +the appearance of unneeded routines in the generated scanner. Each of the +following, if unset +(e.g., +.B %option nounput +), results in the corresponding routine not appearing in +the generated scanner: +.nf + + input, unput + yy_push_state, yy_pop_state, yy_top_state + yy_scan_buffer, yy_scan_bytes, yy_scan_string + +.fi +(though +.B yy_push_state() +and friends won't appear anyway unless you use +.B %option stack). +.SH PERFORMANCE CONSIDERATIONS +The main design goal of +.I flex +is that it generate high-performance scanners. It has been optimized +for dealing well with large sets of rules. Aside from the effects on +scanner speed of the table compression +.B \-C +options outlined above, +there are a number of options/actions which degrade performance. These +are, from most expensive to least: +.nf + + REJECT + %option yylineno + arbitrary trailing context + + pattern sets that require backing up + %array + %option interactive + %option always-interactive + + '^' beginning-of-line operator + yymore() + +.fi +with the first three all being quite expensive and the last two +being quite cheap. Note also that +.B unput() +is implemented as a routine call that potentially does quite a bit of +work, while +.B yyless() +is a quite-cheap macro; so if just putting back some excess text you +scanned, use +.B yyless(). +.PP +.B REJECT +should be avoided at all costs when performance is important. +It is a particularly expensive option. +.PP +Getting rid of backing up is messy and often may be an enormous +amount of work for a complicated scanner. In principal, one begins +by using the +.B \-b +flag to generate a +.I lex.backup +file. For example, on the input +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + +.fi +the file looks like: +.nf + + State #6 is non-accepting - + associated rule line numbers: + 2 3 + out-transitions: [ o ] + jam-transitions: EOF [ \\001-n p-\\177 ] + + State #8 is non-accepting - + associated rule line numbers: + 3 + out-transitions: [ a ] + jam-transitions: EOF [ \\001-` b-\\177 ] + + State #9 is non-accepting - + associated rule line numbers: + 3 + out-transitions: [ r ] + jam-transitions: EOF [ \\001-q s-\\177 ] + + Compressed tables always back up. + +.fi +The first few lines tell us that there's a scanner state in +which it can make a transition on an 'o' but not on any other +character, and that in that state the currently scanned text does not match +any rule. The state occurs when trying to match the rules found +at lines 2 and 3 in the input file. +If the scanner is in that state and then reads +something other than an 'o', it will have to back up to find +a rule which is matched. With +a bit of headscratching one can see that this must be the +state it's in when it has seen "fo". When this has happened, +if anything other than another 'o' is seen, the scanner will +have to back up to simply match the 'f' (by the default rule). +.PP +The comment regarding State #8 indicates there's a problem +when "foob" has been scanned. Indeed, on any character other +than an 'a', the scanner will have to back up to accept "foo". +Similarly, the comment for State #9 concerns when "fooba" has +been scanned and an 'r' does not follow. +.PP +The final comment reminds us that there's no point going to +all the trouble of removing backing up from the rules unless +we're using +.B \-Cf +or +.B \-CF, +since there's no performance gain doing so with compressed scanners. +.PP +The way to remove the backing up is to add "error" rules: +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + fooba | + foob | + fo { + /* false alarm, not really a keyword */ + return TOK_ID; + } + +.fi +.PP +Eliminating backing up among a list of keywords can also be +done using a "catch-all" rule: +.nf + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + [a-z]+ return TOK_ID; + +.fi +This is usually the best solution when appropriate. +.PP +Backing up messages tend to cascade. +With a complicated set of rules it's not uncommon to get hundreds +of messages. If one can decipher them, though, it often +only takes a dozen or so rules to eliminate the backing up (though +it's easy to make a mistake and have an error rule accidentally match +a valid token. A possible future +.I flex +feature will be to automatically add rules to eliminate backing up). +.PP +It's important to keep in mind that you gain the benefits of eliminating +backing up only if you eliminate +.I every +instance of backing up. Leaving just one means you gain nothing. +.PP +.I Variable +trailing context (where both the leading and trailing parts do not have +a fixed length) entails almost the same performance loss as +.B REJECT +(i.e., substantial). So when possible a rule like: +.nf + + %% + mouse|rat/(cat|dog) run(); + +.fi +is better written: +.nf + + %% + mouse/cat|dog run(); + rat/cat|dog run(); + +.fi +or as +.nf + + %% + mouse|rat/cat run(); + mouse|rat/dog run(); + +.fi +Note that here the special '|' action does +.I not +provide any savings, and can even make things worse (see +Deficiencies / Bugs below). +.LP +Another area where the user can increase a scanner's performance +(and one that's easier to implement) arises from the fact that +the longer the tokens matched, the faster the scanner will run. +This is because with long tokens the processing of most input +characters takes place in the (short) inner scanning loop, and +does not often have to go through the additional work of setting up +the scanning environment (e.g., +.B yytext) +for the action. Recall the scanner for C comments: +.nf + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + <comment>[^*\\n]* + <comment>"*"+[^*/\\n]* + <comment>\\n ++line_num; + <comment>"*"+"/" BEGIN(INITIAL); + +.fi +This could be sped up by writing it as: +.nf + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + <comment>[^*\\n]* + <comment>[^*\\n]*\\n ++line_num; + <comment>"*"+[^*/\\n]* + <comment>"*"+[^*/\\n]*\\n ++line_num; + <comment>"*"+"/" BEGIN(INITIAL); + +.fi +Now instead of each newline requiring the processing of another +action, recognizing the newlines is "distributed" over the other rules +to keep the matched text as long as possible. Note that +.I adding +rules does +.I not +slow down the scanner! The speed of the scanner is independent +of the number of rules or (modulo the considerations given at the +beginning of this section) how complicated the rules are with +regard to operators such as '*' and '|'. +.PP +A final example in speeding up a scanner: suppose you want to scan +through a file containing identifiers and keywords, one per line +and with no other extraneous characters, and recognize all the +keywords. A natural first approach is: +.nf + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + .|\\n /* it's not a keyword */ + +.fi +To eliminate the back-tracking, introduce a catch-all rule: +.nf + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + [a-z]+ | + .|\\n /* it's not a keyword */ + +.fi +Now, if it's guaranteed that there's exactly one word per line, +then we can reduce the total number of matches by a half by +merging in the recognition of newlines with that of the other +tokens: +.nf + + %% + asm\\n | + auto\\n | + break\\n | + ... etc ... + volatile\\n | + while\\n /* it's a keyword */ + + [a-z]+\\n | + .|\\n /* it's not a keyword */ + +.fi +One has to be careful here, as we have now reintroduced backing up +into the scanner. In particular, while +.I we +know that there will never be any characters in the input stream +other than letters or newlines, +.I flex +can't figure this out, and it will plan for possibly needing to back up +when it has scanned a token like "auto" and then the next character +is something other than a newline or a letter. Previously it would +then just match the "auto" rule and be done, but now it has no "auto" +rule, only a "auto\\n" rule. To eliminate the possibility of backing up, +we could either duplicate all rules but without final newlines, or, +since we never expect to encounter such an input and therefore don't +how it's classified, we can introduce one more catch-all rule, this +one which doesn't include a newline: +.nf + + %% + asm\\n | + auto\\n | + break\\n | + ... etc ... + volatile\\n | + while\\n /* it's a keyword */ + + [a-z]+\\n | + [a-z]+ | + .|\\n /* it's not a keyword */ + +.fi +Compiled with +.B \-Cf, +this is about as fast as one can get a +.I flex +scanner to go for this particular problem. +.PP +A final note: +.I flex +is slow when matching NUL's, particularly when a token contains +multiple NUL's. +It's best to write rules which match +.I short +amounts of text if it's anticipated that the text will often include NUL's. +.PP +Another final note regarding performance: as mentioned above in the section +How the Input is Matched, dynamically resizing +.B yytext +to accommodate huge tokens is a slow process because it presently requires that +the (huge) token be rescanned from the beginning. Thus if performance is +vital, you should attempt to match "large" quantities of text but not +"huge" quantities, where the cutoff between the two is at about 8K +characters/token. +.SH GENERATING C++ SCANNERS +.I flex +provides two different ways to generate scanners for use with C++. The +first way is to simply compile a scanner generated by +.I flex +using a C++ compiler instead of a C compiler. You should not encounter +any compilations errors (please report any you find to the email address +given in the Author section below). You can then use C++ code in your +rule actions instead of C code. Note that the default input source for +your scanner remains +.I yyin, +and default echoing is still done to +.I yyout. +Both of these remain +.I FILE * +variables and not C++ +.I streams. +.PP +You can also use +.I flex +to generate a C++ scanner class, using the +.B \-+ +option (or, equivalently, +.B %option c++), +which is automatically specified if the name of the flex +executable ends in a '+', such as +.I flex++. +When using this option, flex defaults to generating the scanner to the file +.B lex.yy.cc +instead of +.B lex.yy.c. +The generated scanner includes the header file +.I FlexLexer.h, +which defines the interface to two C++ classes. +.PP +The first class, +.B FlexLexer, +provides an abstract base class defining the general scanner class +interface. It provides the following member functions: +.TP +.B const char* YYText() +returns the text of the most recently matched token, the equivalent of +.B yytext. +.TP +.B int YYLeng() +returns the length of the most recently matched token, the equivalent of +.B yyleng. +.TP +.B int lineno() const +returns the current input line number +(see +.B %option yylineno), +or +.B 1 +if +.B %option yylineno +was not used. +.TP +.B void set_debug( int flag ) +sets the debugging flag for the scanner, equivalent to assigning to +.B yy_flex_debug +(see the Options section above). Note that you must build the scanner +using +.B %option debug +to include debugging information in it. +.TP +.B int debug() const +returns the current setting of the debugging flag. +.PP +Also provided are member functions equivalent to +.B yy_switch_to_buffer(), +.B yy_create_buffer() +(though the first argument is an +.B istream* +object pointer and not a +.B FILE*), +.B yy_flush_buffer(), +.B yy_delete_buffer(), +and +.B yyrestart() +(again, the first argument is a +.B istream* +object pointer). +.PP +The second class defined in +.I FlexLexer.h +is +.B yyFlexLexer, +which is derived from +.B FlexLexer. +It defines the following additional member functions: +.TP +.B +yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 ) +constructs a +.B yyFlexLexer +object using the given streams for input and output. If not specified, +the streams default to +.B cin +and +.B cout, +respectively. +.TP +.B virtual int yylex() +performs the same role is +.B yylex() +does for ordinary flex scanners: it scans the input stream, consuming +tokens, until a rule's action returns a value. If you derive a subclass +.B S +from +.B yyFlexLexer +and want to access the member functions and variables of +.B S +inside +.B yylex(), +then you need to use +.B %option yyclass="S" +to inform +.I flex +that you will be using that subclass instead of +.B yyFlexLexer. +In this case, rather than generating +.B yyFlexLexer::yylex(), +.I flex +generates +.B S::yylex() +(and also generates a dummy +.B yyFlexLexer::yylex() +that calls +.B yyFlexLexer::LexerError() +if called). +.TP +.B +virtual void switch_streams(istream* new_in = 0, +.B +ostream* new_out = 0) +reassigns +.B yyin +to +.B new_in +(if non-nil) +and +.B yyout +to +.B new_out +(ditto), deleting the previous input buffer if +.B yyin +is reassigned. +.TP +.B +int yylex( istream* new_in, ostream* new_out = 0 ) +first switches the input streams via +.B switch_streams( new_in, new_out ) +and then returns the value of +.B yylex(). +.PP +In addition, +.B yyFlexLexer +defines the following protected virtual functions which you can redefine +in derived classes to tailor the scanner: +.TP +.B +virtual int LexerInput( char* buf, int max_size ) +reads up to +.B max_size +characters into +.B buf +and returns the number of characters read. To indicate end-of-input, +return 0 characters. Note that "interactive" scanners (see the +.B \-B +and +.B \-I +flags) define the macro +.B YY_INTERACTIVE. +If you redefine +.B LexerInput() +and need to take different actions depending on whether or not +the scanner might be scanning an interactive input source, you can +test for the presence of this name via +.B #ifdef. +.TP +.B +virtual void LexerOutput( const char* buf, int size ) +writes out +.B size +characters from the buffer +.B buf, +which, while NUL-terminated, may also contain "internal" NUL's if +the scanner's rules can match text with NUL's in them. +.TP +.B +virtual void LexerError( const char* msg ) +reports a fatal error message. The default version of this function +writes the message to the stream +.B cerr +and exits. +.PP +Note that a +.B yyFlexLexer +object contains its +.I entire +scanning state. Thus you can use such objects to create reentrant +scanners. You can instantiate multiple instances of the same +.B yyFlexLexer +class, and you can also combine multiple C++ scanner classes together +in the same program using the +.B \-P +option discussed above. +.PP +Finally, note that the +.B %array +feature is not available to C++ scanner classes; you must use +.B %pointer +(the default). +.PP +Here is an example of a simple C++ scanner: +.nf + + // An example of using the flex C++ scanner class. + + %{ + int mylineno = 0; + %} + + string \\"[^\\n"]+\\" + + ws [ \\t]+ + + alpha [A-Za-z] + dig [0-9] + name ({alpha}|{dig}|\\$)({alpha}|{dig}|[_.\\-/$])* + num1 [-+]?{dig}+\\.?([eE][-+]?{dig}+)? + num2 [-+]?{dig}*\\.{dig}+([eE][-+]?{dig}+)? + number {num1}|{num2} + + %% + + {ws} /* skip blanks and tabs */ + + "/*" { + int c; + + while((c = yyinput()) != 0) + { + if(c == '\\n') + ++mylineno; + + else if(c == '*') + { + if((c = yyinput()) == '/') + break; + else + unput(c); + } + } + } + + {number} cout << "number " << YYText() << '\\n'; + + \\n mylineno++; + + {name} cout << "name " << YYText() << '\\n'; + + {string} cout << "string " << YYText() << '\\n'; + + %% + + int main( int /* argc */, char** /* argv */ ) + { + FlexLexer* lexer = new yyFlexLexer; + while(lexer->yylex() != 0) + ; + return 0; + } +.fi +If you want to create multiple (different) lexer classes, you use the +.B \-P +flag (or the +.B prefix= +option) to rename each +.B yyFlexLexer +to some other +.B xxFlexLexer. +You then can include +.B <FlexLexer.h> +in your other sources once per lexer class, first renaming +.B yyFlexLexer +as follows: +.nf + + #undef yyFlexLexer + #define yyFlexLexer xxFlexLexer + #include <FlexLexer.h> + + #undef yyFlexLexer + #define yyFlexLexer zzFlexLexer + #include <FlexLexer.h> + +.fi +if, for example, you used +.B %option prefix="xx" +for one of your scanners and +.B %option prefix="zz" +for the other. +.PP +IMPORTANT: the present form of the scanning class is +.I experimental +and may change considerably between major releases. +.SH INCOMPATIBILITIES WITH LEX AND POSIX +.I flex +is a rewrite of the AT&T Unix +.I lex +tool (the two implementations do not share any code, though), +with some extensions and incompatibilities, both of which +are of concern to those who wish to write scanners acceptable +to either implementation. Flex is fully compliant with the POSIX +.I lex +specification, except that when using +.B %pointer +(the default), a call to +.B unput() +destroys the contents of +.B yytext, +which is counter to the POSIX specification. +.PP +In this section we discuss all of the known areas of incompatibility +between flex, AT&T lex, and the POSIX specification. +.PP +.I flex's +.B \-l +option turns on maximum compatibility with the original AT&T +.I lex +implementation, at the cost of a major loss in the generated scanner's +performance. We note below which incompatibilities can be overcome +using the +.B \-l +option. +.PP +.I flex +is fully compatible with +.I lex +with the following exceptions: +.IP - +The undocumented +.I lex +scanner internal variable +.B yylineno +is not supported unless +.B \-l +or +.B %option yylineno +is used. +.IP +.B yylineno +should be maintained on a per-buffer basis, rather than a per-scanner +(single global variable) basis. +.IP +.B yylineno +is not part of the POSIX specification. +.IP - +The +.B input() +routine is not redefinable, though it may be called to read characters +following whatever has been matched by a rule. If +.B input() +encounters an end-of-file the normal +.B yywrap() +processing is done. A ``real'' end-of-file is returned by +.B input() +as +.I EOF. +.IP +Input is instead controlled by defining the +.B YY_INPUT +macro. +.IP +The +.I flex +restriction that +.B input() +cannot be redefined is in accordance with the POSIX specification, +which simply does not specify any way of controlling the +scanner's input other than by making an initial assignment to +.I yyin. +.IP - +The +.B unput() +routine is not redefinable. This restriction is in accordance with POSIX. +.IP - +.I flex +scanners are not as reentrant as +.I lex +scanners. In particular, if you have an interactive scanner and +an interrupt handler which long-jumps out of the scanner, and +the scanner is subsequently called again, you may get the following +message: +.nf + + fatal flex scanner internal error--end of buffer missed + +.fi +To reenter the scanner, first use +.nf + + yyrestart( yyin ); + +.fi +Note that this call will throw away any buffered input; usually this +isn't a problem with an interactive scanner. +.IP +Also note that flex C++ scanner classes +.I are +reentrant, so if using C++ is an option for you, you should use +them instead. See "Generating C++ Scanners" above for details. +.IP - +.B output() +is not supported. +Output from the +.B ECHO +macro is done to the file-pointer +.I yyout +(default +.I stdout). +.IP +.B output() +is not part of the POSIX specification. +.IP - +.I lex +does not support exclusive start conditions (%x), though they +are in the POSIX specification. +.IP - +When definitions are expanded, +.I flex +encloses them in parentheses. +With lex, the following: +.nf + + NAME [A-Z][A-Z0-9]* + %% + foo{NAME}? printf( "Found it\\n" ); + %% + +.fi +will not match the string "foo" because when the macro +is expanded the rule is equivalent to "foo[A-Z][A-Z0-9]*?" +and the precedence is such that the '?' is associated with +"[A-Z0-9]*". With +.I flex, +the rule will be expanded to +"foo([A-Z][A-Z0-9]*)?" and so the string "foo" will match. +.IP +Note that if the definition begins with +.B ^ +or ends with +.B $ +then it is +.I not +expanded with parentheses, to allow these operators to appear in +definitions without losing their special meanings. But the +.B <s>, /, +and +.B <<EOF>> +operators cannot be used in a +.I flex +definition. +.IP +Using +.B \-l +results in the +.I lex +behavior of no parentheses around the definition. +.IP +The POSIX specification is that the definition be enclosed in parentheses. +.IP - +Some implementations of +.I lex +allow a rule's action to begin on a separate line, if the rule's pattern +has trailing whitespace: +.nf + + %% + foo|bar<space here> + { foobar_action(); } + +.fi +.I flex +does not support this feature. +.IP - +The +.I lex +.B %r +(generate a Ratfor scanner) option is not supported. It is not part +of the POSIX specification. +.IP - +After a call to +.B unput(), +.I yytext +is undefined until the next token is matched, unless the scanner +was built using +.B %array. +This is not the case with +.I lex +or the POSIX specification. The +.B \-l +option does away with this incompatibility. +.IP - +The precedence of the +.B {} +(numeric range) operator is different. +.I lex +interprets "abc{1,3}" as "match one, two, or +three occurrences of 'abc'", whereas +.I flex +interprets it as "match 'ab' +followed by one, two, or three occurrences of 'c'". The latter is +in agreement with the POSIX specification. +.IP - +The precedence of the +.B ^ +operator is different. +.I lex +interprets "^foo|bar" as "match either 'foo' at the beginning of a line, +or 'bar' anywhere", whereas +.I flex +interprets it as "match either 'foo' or 'bar' if they come at the beginning +of a line". The latter is in agreement with the POSIX specification. +.IP - +The special table-size declarations such as +.B %a +supported by +.I lex +are not required by +.I flex +scanners; +.I flex +ignores them. +.IP - +The name +.B +FLEX_SCANNER +is #define'd so scanners may be written for use with either +.I flex +or +.I lex. +Scanners also include +.B YY_FLEX_MAJOR_VERSION +and +.B YY_FLEX_MINOR_VERSION +indicating which version of +.I flex +generated the scanner +(for example, for the 2.5 release, these defines would be 2 and 5 +respectively). +.PP +The following +.I flex +features are not included in +.I lex +or the POSIX specification: +.nf + + C++ scanners + %option + start condition scopes + start condition stacks + interactive/non-interactive scanners + yy_scan_string() and friends + yyterminate() + yy_set_interactive() + yy_set_bol() + YY_AT_BOL() + <<EOF>> + <*> + YY_DECL + YY_START + YY_USER_ACTION + YY_USER_INIT + #line directives + %{}'s around actions + multiple actions on a line + +.fi +plus almost all of the flex flags. +The last feature in the list refers to the fact that with +.I flex +you can put multiple actions on the same line, separated with +semi-colons, while with +.I lex, +the following +.nf + + foo handle_foo(); ++num_foos_seen; + +.fi +is (rather surprisingly) truncated to +.nf + + foo handle_foo(); + +.fi +.I flex +does not truncate the action. Actions that are not enclosed in +braces are simply terminated at the end of the line. +.SH DIAGNOSTICS +.PP +.I warning, rule cannot be matched +indicates that the given rule +cannot be matched because it follows other rules that will +always match the same text as it. For +example, in the following "foo" cannot be matched because it comes after +an identifier "catch-all" rule: +.nf + + [a-z]+ got_identifier(); + foo got_foo(); + +.fi +Using +.B REJECT +in a scanner suppresses this warning. +.PP +.I warning, +.B \-s +.I +option given but default rule can be matched +means that it is possible (perhaps only in a particular start condition) +that the default rule (match any single character) is the only one +that will match a particular input. Since +.B \-s +was given, presumably this is not intended. +.PP +.I reject_used_but_not_detected undefined +or +.I yymore_used_but_not_detected undefined - +These errors can occur at compile time. They indicate that the +scanner uses +.B REJECT +or +.B yymore() +but that +.I flex +failed to notice the fact, meaning that +.I flex +scanned the first two sections looking for occurrences of these actions +and failed to find any, but somehow you snuck some in (via a #include +file, for example). Use +.B %option reject +or +.B %option yymore +to indicate to flex that you really do use these features. +.PP +.I flex scanner jammed - +a scanner compiled with +.B \-s +has encountered an input string which wasn't matched by +any of its rules. This error can also occur due to internal problems. +.PP +.I token too large, exceeds YYLMAX - +your scanner uses +.B %array +and one of its rules matched a string longer than the +.B YYLMAX +constant (8K bytes by default). You can increase the value by +#define'ing +.B YYLMAX +in the definitions section of your +.I flex +input. +.PP +.I scanner requires \-8 flag to +.I use the character 'x' - +Your scanner specification includes recognizing the 8-bit character +.I 'x' +and you did not specify the \-8 flag, and your scanner defaulted to 7-bit +because you used the +.B \-Cf +or +.B \-CF +table compression options. See the discussion of the +.B \-7 +flag for details. +.PP +.I flex scanner push-back overflow - +you used +.B unput() +to push back so much text that the scanner's buffer could not hold +both the pushed-back text and the current token in +.B yytext. +Ideally the scanner should dynamically resize the buffer in this case, but at +present it does not. +.PP +.I +input buffer overflow, can't enlarge buffer because scanner uses REJECT - +the scanner was working on matching an extremely large token and needed +to expand the input buffer. This doesn't work with scanners that use +.B +REJECT. +.PP +.I +fatal flex scanner internal error--end of buffer missed - +This can occur in an scanner which is reentered after a long-jump +has jumped out (or over) the scanner's activation frame. Before +reentering the scanner, use: +.nf + + yyrestart( yyin ); + +.fi +or, as noted above, switch to using the C++ scanner class. +.PP +.I too many start conditions in <> construct! - +you listed more start conditions in a <> construct than exist (so +you must have listed at least one of them twice). +.SH FILES +.TP +.B \-lfl +library with which scanners must be linked. +.TP +.I lex.yy.c +generated scanner (called +.I lexyy.c +on some systems). +.TP +.I lex.yy.cc +generated C++ scanner class, when using +.B -+. +.TP +.I <FlexLexer.h> +header file defining the C++ scanner base class, +.B FlexLexer, +and its derived class, +.B yyFlexLexer. +.TP +.I flex.skl +skeleton scanner. This file is only used when building flex, not when +flex executes. +.TP +.I lex.backup +backing-up information for +.B \-b +flag (called +.I lex.bck +on some systems). +.SH DEFICIENCIES / BUGS +.PP +Some trailing context +patterns cannot be properly matched and generate +warning messages ("dangerous trailing context"). These are +patterns where the ending of the +first part of the rule matches the beginning of the second +part, such as "zx*/xy*", where the 'x*' matches the 'x' at +the beginning of the trailing context. (Note that the POSIX draft +states that the text matched by such patterns is undefined.) +.PP +For some trailing context rules, parts which are actually fixed-length are +not recognized as such, leading to the abovementioned performance loss. +In particular, parts using '|' or {n} (such as "foo{3}") are always +considered variable-length. +.PP +Combining trailing context with the special '|' action can result in +.I fixed +trailing context being turned into the more expensive +.I variable +trailing context. For example, in the following: +.nf + + %% + abc | + xyz/def + +.fi +.PP +Use of +.B unput() +invalidates yytext and yyleng, unless the +.B %array +directive +or the +.B \-l +option has been used. +.PP +Pattern-matching of NUL's is substantially slower than matching other +characters. +.PP +Dynamic resizing of the input buffer is slow, as it entails rescanning +all the text matched so far by the current (generally huge) token. +.PP +Due to both buffering of input and read-ahead, you cannot intermix +calls to <stdio.h> routines, such as, for example, +.B getchar(), +with +.I flex +rules and expect it to work. Call +.B input() +instead. +.PP +The total table entries listed by the +.B \-v +flag excludes the number of table entries needed to determine +what rule has been matched. The number of entries is equal +to the number of DFA states if the scanner does not use +.B REJECT, +and somewhat greater than the number of states if it does. +.PP +.B REJECT +cannot be used with the +.B \-f +or +.B \-F +options. +.PP +The +.I flex +internal algorithms need documentation. +.SH SEE ALSO +.PP +lex(1), yacc(1), sed(1), awk(1). +.PP +John Levine, Tony Mason, and Doug Brown, +.I Lex & Yacc, +O'Reilly and Associates. Be sure to get the 2nd edition. +.PP +M. E. Lesk and E. Schmidt, +.I LEX \- Lexical Analyzer Generator +.PP +Alfred Aho, Ravi Sethi and Jeffrey Ullman, +.I Compilers: Principles, Techniques and Tools, +Addison-Wesley (1986). Describes the pattern-matching techniques used by +.I flex +(deterministic finite automata). +.SH AUTHOR +Vern Paxson, with the help of many ideas and much inspiration from +Van Jacobson. Original version by Jef Poskanzer. The fast table +representation is a partial implementation of a design done by Van +Jacobson. The implementation was done by Kevin Gong and Vern Paxson. +.PP +Thanks to the many +.I flex +beta-testers, feedbackers, and contributors, especially Francois Pinard, +Casey Leedom, +Robert Abramovitz, +Stan Adermann, Terry Allen, David Barker-Plummer, John Basrai, +Neal Becker, Nelson H.F. Beebe, benson@odi.com, +Karl Berry, Peter A. Bigot, Simon Blanchard, +Keith Bostic, Frederic Brehm, Ian Brockbank, Kin Cho, Nick Christopher, +Brian Clapper, J.T. Conklin, +Jason Coughlin, Bill Cox, Nick Cropper, Dave Curtis, Scott David +Daniels, Chris G. Demetriou, Theo Deraadt, +Mike Donahue, Chuck Doucette, Tom Epperly, Leo Eskin, +Chris Faylor, Chris Flatters, Jon Forrest, Jeffrey Friedl, +Joe Gayda, Kaveh R. Ghazi, Wolfgang Glunz, +Eric Goldman, Christopher M. Gould, Ulrich Grepel, Peer Griebel, +Jan Hajic, Charles Hemphill, NORO Hideo, +Jarkko Hietaniemi, Scott Hofmann, +Jeff Honig, Dana Hudes, Eric Hughes, John Interrante, +Ceriel Jacobs, Michal Jaegermann, Sakari Jalovaara, Jeffrey R. Jones, +Henry Juengst, Klaus Kaempf, Jonathan I. Kamens, Terrence O Kane, +Amir Katz, ken@ken.hilco.com, Kevin B. Kenny, +Steve Kirsch, Winfried Koenig, Marq Kole, Ronald Lamprecht, +Greg Lee, Rohan Lenard, Craig Leres, John Levine, Steve Liddle, +David Loffredo, Mike Long, +Mohamed el Lozy, Brian Madsen, Malte, Joe Marshall, +Bengt Martensson, Chris Metcalf, +Luke Mewburn, Jim Meyering, R. Alexander Milowski, Erik Naggum, +G.T. Nicol, Landon Noll, James Nordby, Marc Nozell, +Richard Ohnemus, Karsten Pahnke, +Sven Panne, Roland Pesch, Walter Pelissero, Gaumond +Pierre, Esmond Pitt, Jef Poskanzer, Joe Rahmeh, Jarmo Raiha, +Frederic Raimbault, Pat Rankin, Rick Richardson, +Kevin Rodgers, Kai Uwe Rommel, Jim Roskind, Alberto Santini, +Andreas Scherer, Darrell Schiebel, Raf Schietekat, +Doug Schmidt, Philippe Schnoebelen, Andreas Schwab, +Larry Schwimmer, Alex Siegel, Eckehard Stolz, Jan-Erik Strvmquist, +Mike Stump, Paul Stuart, Dave Tallman, Ian Lance Taylor, +Chris Thewalt, Richard M. Timoney, Jodi Tsai, +Paul Tuinenga, Gary Weik, Frank Whaley, Gerhard Wilhelms, Kent Williams, Ken +Yap, Ron Zellar, Nathan Zelle, David Zuhn, +and those whose names have slipped my marginal +mail-archiving skills but whose contributions are appreciated all the +same. +.PP +Thanks to Keith Bostic, Jon Forrest, Noah Friedman, +John Gilmore, Craig Leres, John Levine, Bob Mulcahy, G.T. +Nicol, Francois Pinard, Rich Salz, and Richard Stallman for help with various +distribution headaches. +.PP +Thanks to Esmond Pitt and Earle Horton for 8-bit character support; to +Benson Margulies and Fred Burke for C++ support; to Kent Williams and Tom +Epperly for C++ class support; to Ove Ewerlid for support of NUL's; and to +Eric Hughes for support of multiple buffers. +.PP +This work was primarily done when I was with the Real Time Systems Group +at the Lawrence Berkeley Laboratory in Berkeley, CA. Many thanks to all there +for the support I received. +.PP +Send comments to vern@ee.lbl.gov. diff --git a/to.do/unicode/flex.skl b/to.do/unicode/flex.skl new file mode 100644 index 0000000..9b527ec --- /dev/null +++ b/to.do/unicode/flex.skl @@ -0,0 +1,1542 @@ +/* A lexical scanner generated by flex */ + +/* Scanner skeleton version: + * $Header$ + */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 + +%- +#include <stdio.h> +%* + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include <stdlib.h> +%+ +class istream; +%* +#include <unistd.h> + +/* Use prototypes in function declarations. */ +#define YY_USE_PROTOS + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef __TURBOC__ + #pragma warn -rch + #pragma warn -use +#include <io.h> +#include <stdlib.h> +#define YY_USE_CONST +#define YY_USE_PROTOS +#endif + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + +%% YY_CHAR and YY_SC_TO_UI() go here + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +extern int yyleng; +%- +extern FILE *yyin, *yyout; +%* + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + +/* Return all but the first 'n' matched characters back to the input stream. */ + +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + *yy_cp = yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext_ptr ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ +typedef unsigned int yy_size_t; + + +struct yy_buffer_state + { +%- + FILE *yy_input_file; +%+ + istream* yy_input_file; +%* + + YY_CHAR *yy_ch_buf; /* input buffer */ + YY_CHAR *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + }; + +%- Standard (non-C++) definition +static YY_BUFFER_STATE yy_current_buffer = 0; +%* + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +%- Standard (non-C++) definition +/* yy_hold_char holds the character lost when yytext is formed. */ +static YY_CHAR yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + +int yyleng; + +/* Points to current character in buffer. */ +static YY_CHAR *yy_c_buf_p = (YY_CHAR *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart YY_PROTO(( FILE *input_file )); + +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); +#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) + +YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( YY_CHAR *base, yy_size_t size )); +YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst YY_CHAR *yy_str )); + +/* This is the old yy_scan_bytes function - renamed to avoid + * confusion since a character may now be 1 or 2 bytes. + */ +YY_BUFFER_STATE yy_scan_chars YY_PROTO(( yyconst YY_CHAR *chars, int len )); +%* + +static void *yy_flex_alloc YY_PROTO(( yy_size_t )); +static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); +static void yy_flex_free YY_PROTO(( void * )); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) + +%% yytext/yyin/yyout/yy_state_type/yylineno etc. def's & init go here + +%- Standard (non-C++) definition +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); +%* + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ +%% code to fiddle yytext and yyleng for yymore() goes here + yy_hold_char = *yy_cp; \ + *yy_cp = (YY_CHAR) '\0'; \ +%% code to copy yytext_ptr to yytext[] goes here, if %array + yy_c_buf_p = yy_cp; + +%% data tables for the DFA and the user's section 1 definitions go here + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap YY_PROTO(( void )); +#else +extern int yywrap YY_PROTO(( void )); +#endif +#endif + +%- +#ifndef YY_NO_UNPUT +static void yyunput YY_PROTO(( int c, YY_CHAR *buf_ptr )); +#endif +%* + +#ifndef yytext_ptr +static void yy_flex_strncpy YY_PROTO(( YY_CHAR *, yyconst YY_CHAR *, int )); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen YY_PROTO(( yyconst YY_CHAR * )); +#endif + +#ifndef YY_NO_INPUT +%- Standard (non-C++) definition +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif +%* +#endif + +#if YY_STACK_USED +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +#ifndef YY_NO_PUSH_STATE +static void yy_push_state YY_PROTO(( int new_state )); +#endif +#ifndef YY_NO_POP_STATE +static void yy_pop_state YY_PROTO(( void )); +#endif +#ifndef YY_NO_TOP_STATE +static int yy_top_state YY_PROTO(( void )); +#endif + +#else +#define YY_NO_PUSH_STATE 1 +#define YY_NO_POP_STATE 1 +#define YY_NO_TOP_STATE 1 +#endif + +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#ifndef __cplusplus +#include <stdlib.h> +#endif +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +%- Standard (non-C++) definition +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, sizeof( YY_CHAR ), yyleng, yyout ) +%+ C++ definition +#define ECHO LexerOutput( yytext, yyleng ) +%* +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ +%% fread()/read() definition of YY_INPUT goes here unless we're doing C++ +%+ C++ definition + if ( (result = LexerInput( buf, max_size )) < 0 ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); +%* +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +%- +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +%+ +#define YY_FATAL_ERROR(msg) LexerError( msg ) +%* +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +%- Standard (non-C++) definition +#define YY_DECL int yylex YY_PROTO(( void )) +%+ C++ definition +#define YY_DECL int yyFlexLexer::yylex() +%* +#endif + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +%% YY_RULE_SETUP definition goes here + +YY_DECL + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp, *yy_bp; + register int yy_act; + +%% user's declarations go here + + if ( yy_init ) + { + yy_init = 0; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) +%- + yyin = stdin; +%+ + yyin = &cin; +%* + + if ( ! yyout ) +%- + yyout = stdout; +%+ + yyout = &cout; +%* + + if ( ! yy_current_buffer ) + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { +%% yymore()-related code goes here + yy_cp = yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + +%% code to set up and find next match goes here + +yy_find_action: +%% code to find the action number goes here + + YY_DO_BEFORE_ACTION; + +%% code for yylineno update goes here + +do_action: /* This label is used only to access EOF actions. */ + +%% debug code goes here + + switch ( yy_act ) + { /* beginning of action switch */ +%% actions go here + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy_n_chars = yy_current_buffer->yy_n_chars; + yy_current_buffer->yy_input_file = yyin; + yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { +%% code to do back-up for compressed tables and set up yy_cp goes here + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ + +%+ +yyFlexLexer::yyFlexLexer( istream* arg_yyin, ostream* arg_yyout ) + { + yyin = arg_yyin; + yyout = arg_yyout; + yy_c_buf_p = 0; + yy_init = 1; + yy_start = 0; + yy_flex_debug = 0; + yylineno = 1; // this will only get updated if %option yylineno + + yy_did_buffer_switch_on_eof = 0; + + yy_looking_for_trail_begin = 0; + yy_more_flag = 0; + yy_more_len = 0; + yy_more_offset = yy_prev_more_offset = 0; + + yy_start_stack_ptr = yy_start_stack_depth = 0; + yy_start_stack = 0; + + yy_current_buffer = 0; + +#ifdef YY_USES_REJECT + yy_state_buf = new yy_state_type[YY_BUF_SIZE + 2]; +#else + yy_state_buf = 0; +#endif + } + +yyFlexLexer::~yyFlexLexer() + { + delete yy_state_buf; + yy_delete_buffer( yy_current_buffer ); + } + +void yyFlexLexer::switch_streams( istream* new_in, ostream* new_out ) + { + if ( new_in ) + { + yy_delete_buffer( yy_current_buffer ); + yy_switch_to_buffer( yy_create_buffer( new_in, YY_BUF_SIZE ) ); + } + + if ( new_out ) + yyout = new_out; + } + +#ifdef YY_INTERACTIVE +int yyFlexLexer::LexerInput( YY_CHAR* buf, int /* max_size */ ) +#else +int yyFlexLexer::LexerInput( YY_CHAR* buf, int max_size ) +#endif + { + if ( yyin->eof() || yyin->fail() ) + return 0; + +#ifdef YY_INTERACTIVE + (void) yyin->read((unsigned char *) buf, sizeof( YY_CHAR ) ); + + if ( yyin->eof() ) + return 0; + + if ( yyin->bad() ) + return -1; + + return 1; + +#else + (void) yyin->read((unsigned char *) buf, max_size * sizeof( YY_CHAR ) ); + + if ( yyin->bad() ) + return -1; + else + return ( yyin->gcount() / sizeof( YY_CHAR ) ); +#endif + } + +void yyFlexLexer::LexerOutput( const YY_CHAR* buf, int size ) + { + (void) yyout->write((unsigned char *) buf, size * sizeof( YY_CHAR ) ); + } +%* + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +%- +static int yy_get_next_buffer() +%+ +int yyFlexLexer::yy_get_next_buffer() +%* + { + register YY_CHAR *dest = yy_current_buffer->yy_ch_buf; + register YY_CHAR *source = yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_current_buffer->yy_n_chars = yy_n_chars = 0; + + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = + (int) (yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (YY_CHAR *) + /* Include room in for 2 EOB chars. */ + yy_flex_realloc( (void *) b->yy_ch_buf, + ( b->yy_buf_size + 2 ) * + sizeof( YY_CHAR ) ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; +#endif + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + + return ret_val; + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +%- +static yy_state_type yy_get_previous_state() +%+ +yy_state_type yyFlexLexer::yy_get_previous_state() +%* + { + register yy_state_type yy_current_state; + register YY_CHAR *yy_cp; + +%% code to get the start state into yy_current_state goes here + + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { +%% code to find the next state goes here + } + + return yy_current_state; + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +%- +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +yy_state_type yy_current_state; +#endif +%+ +yy_state_type yyFlexLexer::yy_try_NUL_trans( yy_state_type yy_current_state ) +%* + { + register int yy_is_jam; +%% code to find the next state, and perhaps do backing up, goes here + + return yy_is_jam ? 0 : yy_current_state; + } + + +%- +#ifndef YY_NO_UNPUT +#ifdef YY_USE_PROTOS +static void yyunput( int c, register YY_CHAR *yy_bp ) +#else +static void yyunput( c, yy_bp ) +int c; +register YY_CHAR *yy_bp; +#endif +%+ +void yyFlexLexer::yyunput( int c, register YY_CHAR* yy_bp ) +%* + { + register YY_CHAR *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register YY_CHAR *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register YY_CHAR *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + yy_current_buffer->yy_n_chars = + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (YY_CHAR) c; + +%% update yylineno here + + yytext_ptr = yy_bp; + yy_hold_char = *yy_cp; + yy_c_buf_p = yy_cp; + } +%- +#endif /* ifndef YY_NO_UNPUT */ +%* + + +%- +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif +%+ +int yyFlexLexer::yyinput() +%* + { + int c; + + *yy_c_buf_p = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = (YY_CHAR) '\0'; + + else + { /* need more input */ + int offset = yy_c_buf_p - yytext_ptr; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /* fall through */ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + return EOF; + + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + offset; + break; + } + } + } + + c = YY_SC_TO_UI(*yy_c_buf_p); + *yy_c_buf_p = (YY_CHAR) '\0'; /* preserve yytext */ + yy_hold_char = *++yy_c_buf_p; + +%% update BOL and yylineno + + return c; + } + + +%- +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif +%+ +void yyFlexLexer::yyrestart( istream* input_file ) +%* + { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +%- +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif +%+ +void yyFlexLexer::yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +%* + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +%- +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif +%+ +void yyFlexLexer::yy_load_buffer_state() +%* + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +%- +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif +%+ +YY_BUFFER_STATE yyFlexLexer::yy_create_buffer( istream* file, int size ) +%* + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (YY_CHAR *) yy_flex_alloc( + ( b->yy_buf_size + 2 ) * sizeof( YY_CHAR ) ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; + } + + +%- +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif +%+ +void yyFlexLexer::yy_delete_buffer( YY_BUFFER_STATE b ) +%* + { + if ( ! b ) + return; + + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yy_flex_free( (void *) b->yy_ch_buf ); + + yy_flex_free( (void *) b ); + } + + +%- +#ifndef YY_ALWAYS_INTERACTIVE +#ifndef YY_NEVER_INTERACTIVE +extern int isatty YY_PROTO(( int )); +#endif +#endif + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + +%+ +extern "C" int isatty YY_PROTO(( int )); +void yyFlexLexer::yy_init_buffer( YY_BUFFER_STATE b, istream* file ) +%* + + { + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + +%- +#if YY_ALWAYS_INTERACTIVE + b->yy_is_interactive = 1; +#else +#if YY_NEVER_INTERACTIVE + b->yy_is_interactive = 0; +#else + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; +#endif +#endif +%+ + b->yy_is_interactive = 0; +%* + } + + +%- +#ifdef YY_USE_PROTOS +void yy_flush_buffer( YY_BUFFER_STATE b ) +#else +void yy_flush_buffer( b ) +YY_BUFFER_STATE b; +#endif + +%+ +void yyFlexLexer::yy_flush_buffer( YY_BUFFER_STATE b ) +%* + { + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == yy_current_buffer ) + yy_load_buffer_state(); + } +%* + + +#ifndef YY_NO_SCAN_BUFFER +%- +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_buffer( YY_CHAR *base, yy_size_t size ) +#else +YY_BUFFER_STATE yy_scan_buffer( base, size ) +YY_CHAR *base; +yy_size_t size; +#endif + { + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; + } +%* +#endif + + +#ifndef YY_NO_SCAN_STRING +%- +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_string( yyconst YY_CHAR *yy_str ) +#else +YY_BUFFER_STATE yy_scan_string( yy_str ) +yyconst YY_CHAR *yy_str; +#endif + { + int len; + for ( len = 0; yy_str[len]; ++len ) + ; + + return yy_scan_chars( yy_str, len ); + } +%* +#endif + + +#ifndef YY_NO_SCAN_BYTES +%- +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_chars( yyconst YY_CHAR *chars, int len ) +#else +YY_BUFFER_STATE yy_scan_chars( chars, len ) +yyconst YY_CHAR *chars; +int len; +#endif + { + YY_BUFFER_STATE b; + YY_CHAR *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = len + 2; + buf = (YY_CHAR *) yy_flex_alloc( n * sizeof( YY_CHAR ) ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_chars()" ); + + for ( i = 0; i < len; ++i ) + buf[i] = chars[i]; + + buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_chars()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; + } +%* +#endif + + +#ifndef YY_NO_PUSH_STATE +%- +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif +%+ +void yyFlexLexer::yy_push_state( int new_state ) +%* + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + yy_size_t new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } +#endif + + +#ifndef YY_NO_POP_STATE +%- +static void yy_pop_state() +%+ +void yyFlexLexer::yy_pop_state() +%* + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } +#endif + + +#ifndef YY_NO_TOP_STATE +%- +static int yy_top_state() +%+ +int yyFlexLexer::yy_top_state() +%* + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } +#endif + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +%- +#ifdef YY_USE_PROTOS +static void yy_fatal_error( yyconst char msg[] ) +#else +static void yy_fatal_error( msg ) +YY_CHAR msg[]; +#endif + { + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); + } + +%+ + +void yyFlexLexer::LexerError( yyconst char msg[] ) + { + cerr << msg << '\n'; + exit( YY_EXIT_FAILURE ); + } +%* + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = (YY_CHAR) '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + + +/* Internal utility routines. */ + +#ifndef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strncpy( YY_CHAR *s1, yyconst YY_CHAR *s2, int n ) +#else +static void yy_flex_strncpy( s1, s2, n ) +YY_CHAR *s1; +yyconst YY_CHAR *s2; +int n; +#endif + { + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; + } +#endif + +#ifdef YY_NEED_STRLEN +#ifdef YY_USE_PROTOS +static int yy_flex_strlen( yyconst YY_CHAR *s ) +#else +static int yy_flex_strlen( s ) +yyconst YY_CHAR *s; +#endif + { + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; + } +#endif + + +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( yy_size_t size ) +#else +static void *yy_flex_alloc( size ) +yy_size_t size; +#endif + { + return (void *) malloc( size ); + } + +#ifdef YY_USE_PROTOS +static void *yy_flex_realloc( void *ptr, yy_size_t size ) +#else +static void *yy_flex_realloc( ptr, size ) +void *ptr; +yy_size_t size; +#endif + { + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); + } + +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else +static void yy_flex_free( ptr ) +void *ptr; +#endif + { + free( ptr ); + } + +#if YY_MAIN +int main() + { + yylex(); + return 0; + } +#endif diff --git a/to.do/unicode/flexdef.h b/to.do/unicode/flexdef.h new file mode 100644 index 0000000..4fd3f7e --- /dev/null +++ b/to.do/unicode/flexdef.h @@ -0,0 +1,1062 @@ +/* flexdef - definitions file for flex */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* @(#) $Header$ (LBL) */ + +#include <stdio.h> +#include <ctype.h> + +#include "config.h" + +#ifdef __TURBOC__ +#define HAVE_STRING_H 1 +#define MS_DOS 1 +#ifndef __STDC__ +#define __STDC__ 1 +#endif + #pragma warn -pro + #pragma warn -rch + #pragma warn -use + #pragma warn -aus + #pragma warn -par + #pragma warn -pia +#endif + +#ifdef HAVE_STRING_H +#include <string.h> +#else +#include <strings.h> +#endif + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif + +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif + +#ifdef STDC_HEADERS +#include <stdlib.h> +#endif + +/* As an aid for the internationalization patch to flex, which + * is maintained outside this distribution for copyright reasons. + */ +#define _(String) (String) + +/* Always be prepared to generate an 16-bit scanner. */ +#define CSIZE 65536 +#define Char unsigned char + +/* Size of input alphabet - should be size of ASCII set. */ +#ifndef DEFAULT_CSIZE +#define DEFAULT_CSIZE 128 +#endif + +#ifndef PROTO +#if __STDC__ +#define PROTO(proto) proto +#else +#define PROTO(proto) () +#endif +#endif + +#ifdef VMS +#ifndef __VMS_POSIX +#define unlink remove +#define SHORT_FILE_NAMES +#endif +#endif + +#ifdef MS_DOS +#define SHORT_FILE_NAMES +#endif + +#ifdef _WIN32 +#if __STDC__ +#define unlink _unlink +#define isatty _isatty +#define fileno _fileno +#endif +#endif + + +/* Maximum line length we'll have to deal with. */ +#define MAXLINE 2048 + +#ifndef MIN +#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#endif +#ifndef MAX +#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#endif +#ifndef ABS +#define ABS(x) ((x) < 0 ? -(x) : (x)) +#endif + + +/* ANSI C does not guarantee that isascii() is defined */ +#ifndef isascii +#define isascii(c) ((c) <= 0177) +#endif + + +#define true 1 +#define false 0 +#define unspecified -1 + + +/* Special chk[] values marking the slots taking by end-of-buffer and action + * numbers. + */ +#define EOB_POSITION -1 +#define ACTION_POSITION -2 + +/* Number of data items per line for -f output. */ +#define NUMDATAITEMS 10 + +/* Number of lines of data in -f output before inserting a blank line for + * readability. + */ +#define NUMDATALINES 10 + +/* transition_struct_out() definitions. */ +#define TRANS_STRUCT_PRINT_LENGTH 14 + +/* Returns true if an nfa state has an epsilon out-transition slot + * that can be used. This definition is currently not used. + */ +#define FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans2[state] == NO_TRANSITION && \ + finalst[state] != state) + +/* Returns true if an nfa state has an epsilon out-transition character + * and both slots are free + */ +#define SUPER_FREE_EPSILON(state) \ + (transchar[state] == SYM_EPSILON && \ + trans1[state] == NO_TRANSITION) \ + +/* Maximum number of NFA states that can comprise a DFA state. It's real + * big because if there's a lot of rules, the initial state will have a + * huge epsilon closure. + */ +#define INITIAL_MAX_DFA_SIZE 750 +#define MAX_DFA_SIZE_INCREMENT 750 + + +/* A note on the following masks. They are used to mark accepting numbers + * as being special. As such, they implicitly limit the number of accepting + * numbers (i.e., rules) because if there are too many rules the rule numbers + * will overload the mask bits. Fortunately, this limit is \large/ (0x2000 == + * 8192) so unlikely to actually cause any problems. A check is made in + * new_rule() to ensure that this limit is not reached. + */ + +/* Mask to mark a trailing context accepting number. */ +#define YY_TRAILING_MASK 0x2000 + +/* Mask to mark the accepting number of the "head" of a trailing context + * rule. + */ +#define YY_TRAILING_HEAD_MASK 0x4000 + +/* Maximum number of rules, as outlined in the above note. */ +#define MAX_RULE (YY_TRAILING_MASK - 1) + + +/* NIL must be 0. If not, its special meaning when making equivalence classes + * (it marks the representative of a given e.c.) will be unidentifiable. + */ +#define NIL 0 + +#define JAM -1 /* to mark a missing DFA transition */ +#define NO_TRANSITION NIL +#define UNIQUE -1 /* marks a symbol as an e.c. representative */ +#define INFINITY -1 /* for x{5,} constructions */ + +#define INITIAL_MAX_CCLS 100 /* max number of unique character classes */ +#define MAX_CCLS_INCREMENT 100 + +/* Size of table holding members of character classes. */ +#define INITIAL_MAX_CCL_TBL_SIZE 500 +#define MAX_CCL_TBL_SIZE_INCREMENT 250 + +#define INITIAL_MAX_RULES 100 /* default maximum number of rules */ +#define MAX_RULES_INCREMENT 100 + +#define INITIAL_MNS 2000 /* default maximum number of nfa states */ +#define MNS_INCREMENT 1000 /* amount to bump above by if it's not enough */ + +#define INITIAL_MAX_DFAS 1000 /* default maximum number of dfa states */ +#define MAX_DFAS_INCREMENT 1000 + +#define JAMSTATE -32766 /* marks a reference to the state that always jams */ + +/* Maximum number of NFA states. */ +#define MAXIMUM_MNS 31999 + +/* Enough so that if it's subtracted from an NFA state number, the result + * is guaranteed to be negative. + */ +#define MARKER_DIFFERENCE (MAXIMUM_MNS+2) + +/* Maximum number of nxt/chk pairs for non-templates. */ +#define INITIAL_MAX_XPAIRS 2000 +#define MAX_XPAIRS_INCREMENT 2000 + +/* Maximum number of nxt/chk pairs needed for templates. */ +#define INITIAL_MAX_TEMPLATE_XPAIRS 2500 +#define MAX_TEMPLATE_XPAIRS_INCREMENT 2500 + +#define SYM_EPSILON (CSIZE + 1) /* to mark transitions on the symbol epsilon */ + +#define INITIAL_MAX_SCS 40 /* maximum number of start conditions */ +#define MAX_SCS_INCREMENT 40 /* amount to bump by if it's not enough */ + +#define ONE_STACK_SIZE 500 /* stack of states with only one out-transition */ +#define SAME_TRANS -1 /* transition is the same as "default" entry for state */ + +/* The following percentages are used to tune table compression: + + * The percentage the number of out-transitions a state must be of the + * number of equivalence classes in order to be considered for table + * compaction by using protos. + */ +#define PROTO_SIZE_PERCENTAGE 15 + +/* The percentage the number of homogeneous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * that the state's transition table is first compared with a potential + * template of the most common out-transition instead of with the first + * proto in the proto queue. + */ +#define CHECK_COM_PERCENTAGE 50 + +/* The percentage the number of differences between a state's transition + * table and the proto it was first compared with must be of the total + * number of out-transitions of the state in order to keep the first + * proto as a good match and not search any further. + */ +#define FIRST_MATCH_DIFF_PERCENTAGE 10 + +/* The percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to use the proto as an acceptable close match. + */ +#define ACCEPTABLE_DIFF_PERCENTAGE 50 + +/* The percentage the number of homogeneous out-transitions of a state + * must be of the number of total out-transitions of the state in order + * to consider making a template from the state. + */ +#define TEMPLATE_SAME_PERCENTAGE 60 + +/* The percentage the number of differences between a state's transition + * table and the most similar proto must be of the state's total number + * of out-transitions to create a new proto from the state. + */ +#define NEW_PROTO_DIFF_PERCENTAGE 20 + +/* The percentage the total number of out-transitions of a state must be + * of the number of equivalence classes in order to consider trying to + * fit the transition table into "holes" inside the nxt/chk table. + */ +#define INTERIOR_FIT_PERCENTAGE 15 + +/* Size of region set aside to cache the complete transition table of + * protos on the proto queue to enable quick comparisons. + */ +#define PROT_SAVE_SIZE 2000 + +#define MSP 50 /* maximum number of saved protos (protos on the proto queue) */ + +/* Maximum number of out-transitions a state can have that we'll rummage + * around through the interior of the internal fast table looking for a + * spot for it. + */ +#define MAX_XTIONS_FULL_INTERIOR_FIT 4 + +/* Maximum number of rules which will be reported as being associated + * with a DFA state. + */ +#define MAX_ASSOC_RULES 100 + +/* Number that, if used to subscript an array, has a good chance of producing + * an error; should be small enough to fit into a short. + */ +#define BAD_SUBSCRIPT -32767 + +/* Absolute value of largest number that can be stored in a short, with a + * bit of slop thrown in for general paranoia. + */ +#define MAX_SHORT 32700 + + +/* Declarations for global variables. */ + +/* Variables for symbol tables: + * sctbl - start-condition symbol table + * ndtbl - name-definition symbol table + * ccltab - character class text symbol table + */ + +struct hash_entry + { + struct hash_entry *prev, *next; + char *name; + char *str_val; + int int_val; + } ; + +typedef struct hash_entry **hash_table; + +#define NAME_TABLE_HASH_SIZE 101 +#define START_COND_HASH_SIZE 101 +#define CCL_HASH_SIZE 101 + +extern struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE]; +extern struct hash_entry *sctbl[START_COND_HASH_SIZE]; +extern struct hash_entry *ccltab[CCL_HASH_SIZE]; + + +/* Variables for flags: + * printstats - if true (-v), dump statistics + * syntaxerror - true if a syntax error has been found + * eofseen - true if we've seen an eof in the input file + * ddebug - if true (-d), make a "debug" scanner + * trace - if true (-T), trace processing + * nowarn - if true (-w), do not generate warnings + * spprdflt - if true (-s), suppress the default rule + * interactive - if true (-I), generate an interactive scanner + * caseins - if true (-i), generate a case-insensitive scanner + * lex_compat - if true (-l), maximize compatibility with AT&T lex + * do_yylineno - if true, generate code to maintain yylineno + * useecs - if true (-Ce flag), use equivalence classes + * fulltbl - if true (-Cf flag), don't compress the DFA state table + * usemecs - if true (-Cm flag), use meta-equivalence classes + * fullspd - if true (-F flag), use Jacobson method of table representation + * gen_line_dirs - if true (i.e., no -L flag), generate #line directives + * performance_report - if > 0 (i.e., -p flag), generate a report relating + * to scanner performance; if > 1 (-p -p), report on minor performance + * problems, too + * backing_up_report - if true (i.e., -b flag), generate "lex.backup" file + * listing backing-up states + * C_plus_plus - if true (i.e., -+ flag), generate a C++ scanner class; + * otherwise, a standard C scanner + * long_align - if true (-Ca flag), favor long-word alignment. + * use_read - if true (-f, -F, or -Cr) then use read() for scanner input; + * otherwise, use fread(). + * yytext_is_array - if true (i.e., %array directive), then declare + * yytext as a array instead of a character pointer. Nice and inefficient. + * do_yywrap - do yywrap() processing on EOF. If false, EOF treated as + * "no more files". + * csize - size of character set for the scanner we're generating; + * 128 for 7-bit chars and 256 for 8-bit + * yymore_used - if true, yymore() is used in input rules + * reject - if true, generate back-up tables for REJECT macro + * real_reject - if true, scanner really uses REJECT (as opposed to just + * having "reject" set for variable trailing context) + * continued_action - true if this rule's action is to "fall through" to + * the next rule's action (i.e., the '|' action) + * in_rule - true if we're inside an individual rule, false if not. + * yymore_really_used - whether to treat yymore() as really used, regardless + * of what we think based on references to it in the user's actions. + * reject_really_used - same for REJECT + */ + +extern int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; +extern int interactive, caseins, lex_compat, do_yylineno; +extern int useecs, fulltbl, usemecs, fullspd; +extern int gen_line_dirs, performance_report, backing_up_report; +extern int C_plus_plus, long_align, use_read, yytext_is_array, do_yywrap; +extern int csize; +extern int yymore_used, reject, real_reject, continued_action, in_rule; + +extern int yymore_really_used, reject_really_used; + + +/* Variables used in the flex input routines: + * datapos - characters on current output line + * dataline - number of contiguous lines of data in current data + * statement. Used to generate readable -f output + * linenum - current input line number + * out_linenum - current output line number + * skelfile - the skeleton file + * skel - compiled-in skeleton array + * skel_ind - index into "skel" array, if skelfile is nil + * yyin - input file + * backing_up_file - file to summarize backing-up states to + * infilename - name of input file + * outfilename - name of output file + * did_outfilename - whether outfilename was explicitly set + * prefix - the prefix used for externally visible names ("yy" by default) + * yyclass - yyFlexLexer subclass to use for YY_DECL + * do_stdinit - whether to initialize yyin/yyout to stdin/stdout + * use_stdout - the -t flag + * input_files - array holding names of input files + * num_input_files - size of input_files array + * program_name - name with which program was invoked + * + * action_array - array to hold the rule actions + * action_size - size of action_array + * defs1_offset - index where the user's section 1 definitions start + * in action_array + * prolog_offset - index where the prolog starts in action_array + * action_offset - index where the non-prolog starts in action_array + * action_index - index where the next action should go, with respect + * to "action_array" + */ + +extern int datapos, dataline, linenum, out_linenum; +extern FILE *skelfile, *yyin, *backing_up_file; +extern const char *skel[]; +extern int skel_ind; +extern char *infilename, *outfilename; +extern int did_outfilename; +extern char *prefix, *yyclass; +extern int do_stdinit, use_stdout; +extern char **input_files; +extern int num_input_files; +extern char *program_name; + +extern char *action_array; +extern int action_size; +extern int defs1_offset, prolog_offset, action_offset, action_index; + + +/* Variables for stack of states having only one out-transition: + * onestate - state number + * onesym - transition symbol + * onenext - target state + * onedef - default base entry + * onesp - stack pointer + */ + +extern int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +extern int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; + + +/* Variables for nfa machine data: + * current_mns - current maximum on number of NFA states + * num_rules - number of the last accepting state; also is number of + * rules created so far + * num_eof_rules - number of <<EOF>> rules + * default_rule - number of the default rule + * current_max_rules - current maximum number of rules + * lastnfa - last nfa state number created + * firstst - physically the first state of a fragment + * lastst - last physical state of fragment + * finalst - last logical state of fragment + * transchar - transition character + * trans1 - transition state + * trans2 - 2nd transition state for epsilons + * accptnum - accepting number + * assoc_rule - rule associated with this NFA state (or 0 if none) + * state_type - a STATE_xxx type identifying whether the state is part + * of a normal rule, the leading state in a trailing context + * rule (i.e., the state which marks the transition from + * recognizing the text-to-be-matched to the beginning of + * the trailing context), or a subsequent state in a trailing + * context rule + * rule_type - a RULE_xxx type identifying whether this a ho-hum + * normal rule or one which has variable head & trailing + * context + * rule_linenum - line number associated with rule + * rule_useful - true if we've determined that the rule can be matched + */ + +extern int current_mns, current_max_rules; +extern int num_rules, num_eof_rules, default_rule, lastnfa; +extern int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; +extern int *accptnum, *assoc_rule, *state_type; +extern int *rule_type, *rule_linenum, *rule_useful; + +/* Different types of states; values are useful as masks, as well, for + * routines like check_trailing_context(). + */ +#define STATE_NORMAL 0x1 +#define STATE_TRAILING_CONTEXT 0x2 + +/* Global holding current type of state we're making. */ + +extern int current_state_type; + +/* Different types of rules. */ +#define RULE_NORMAL 0 +#define RULE_VARIABLE 1 + +/* True if the input rules include a rule with both variable-length head + * and trailing context, false otherwise. + */ +extern int variable_trailing_context_rules; + + +/* Variables for protos: + * numtemps - number of templates created + * numprots - number of protos created + * protprev - backlink to a more-recently used proto + * protnext - forward link to a less-recently used proto + * prottbl - base/def table entry for proto + * protcomst - common state of proto + * firstprot - number of the most recently used proto + * lastprot - number of the least recently used proto + * protsave contains the entire state array for protos + */ + +extern int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +extern int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; + + +/* Variables for managing equivalence classes: + * numecs - number of equivalence classes + * nextecm - forward link of Equivalence Class members + * ecgroup - class number or backward link of EC members + * nummecs - number of meta-equivalence classes (used to compress + * templates) + * tecfwd - forward link of meta-equivalence classes members + * tecbck - backward link of MEC's + */ + +/* Reserve enough room in the equivalence class arrays so that we + * can use the CSIZE'th element to hold equivalence class information + * for the NUL character. Later we'll move this information into + * the 0th element. + */ +extern int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs; + +/* Meta-equivalence classes are indexed starting at 1, so it's possible + * that they will require positions from 1 .. CSIZE, i.e., CSIZE + 1 + * slots total (since the arrays are 0-based). nextecm[] and ecgroup[] + * don't require the extra position since they're indexed from 1 .. CSIZE - 1. + */ +extern int tecfwd[CSIZE + 1], tecbck[CSIZE + 1]; + + +/* Variables for start conditions: + * lastsc - last start condition created + * current_max_scs - current limit on number of start conditions + * scset - set of rules active in start condition + * scbol - set of rules active only at the beginning of line in a s.c. + * scxclu - true if start condition is exclusive + * sceof - true if start condition has EOF rule + * scname - start condition name + */ + +extern int lastsc, *scset, *scbol, *scxclu, *sceof; +extern int current_max_scs; +extern char **scname; + + +/* Variables for dfa machine data: + * current_max_dfa_size - current maximum number of NFA states in DFA + * current_max_xpairs - current maximum number of non-template xtion pairs + * current_max_template_xpairs - current maximum number of template pairs + * current_max_dfas - current maximum number DFA states + * lastdfa - last dfa state number created + * nxt - state to enter upon reading character + * chk - check value to see if "nxt" applies + * tnxt - internal nxt table for templates + * base - offset into "nxt" for given state + * def - where to go if "chk" disallows "nxt" entry + * nultrans - NUL transition for each state + * NUL_ec - equivalence class of the NUL character + * tblend - last "nxt/chk" table entry being used + * firstfree - first empty entry in "nxt/chk" table + * dss - nfa state set for each dfa + * dfasiz - size of nfa state set for each dfa + * dfaacc - accepting set for each dfa state (if using REJECT), or accepting + * number, if not + * accsiz - size of accepting set for each dfa state + * dhash - dfa state hash value + * numas - number of DFA accepting states created; note that this + * is not necessarily the same value as num_rules, which is the analogous + * value for the NFA + * numsnpairs - number of state/nextstate transition pairs + * jambase - position in base/def where the default jam table starts + * jamstate - state number corresponding to "jam" state + * end_of_buffer_state - end-of-buffer dfa state number + */ + +extern int current_max_dfa_size, current_max_xpairs; +extern int current_max_template_xpairs, current_max_dfas; +extern int lastdfa, *nxt, *chk, *tnxt; +extern int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; +extern union dfaacc_union + { + int *dfaacc_set; + int dfaacc_state; + } *dfaacc; +extern int *accsiz, *dhash, numas; +extern int numsnpairs, jambase, jamstate; +extern int end_of_buffer_state; + +/* Variables for ccl information: + * lastccl - ccl index of the last created ccl + * current_maxccls - current limit on the maximum number of unique ccl's + * cclmap - maps a ccl index to its set pointer + * ccllen - gives the length of a ccl + * cclng - true for a given ccl if the ccl is negated + * cclreuse - counts how many times a ccl is re-used + * current_max_ccl_tbl_size - current limit on number of characters needed + * to represent the unique ccl's + * ccltbl - holds the characters in each ccl - indexed by cclmap + */ + +extern int lastccl, *cclmap, *ccllen, *cclng, cclreuse; +extern int current_maxccls, current_max_ccl_tbl_size; +extern wchar_t *ccltbl; + + +/* Variables for miscellaneous information: + * nmstr - last NAME scanned by the scanner + * sectnum - section number currently being parsed + * nummt - number of empty nxt/chk table entries + * hshcol - number of hash collisions detected by snstods + * dfaeql - number of times a newly created dfa was equal to an old one + * numeps - number of epsilon NFA states created + * eps2 - number of epsilon states which have 2 out-transitions + * num_reallocs - number of times it was necessary to realloc() a group + * of arrays + * tmpuses - number of DFA states that chain to templates + * totnst - total number of NFA states used to make DFA states + * peakpairs - peak number of transition pairs we had to store internally + * numuniq - number of unique transitions + * numdup - number of duplicate transitions + * hshsave - number of hash collisions saved by checking number of states + * num_backing_up - number of DFA states requiring backing up + * bol_needed - whether scanner needs beginning-of-line recognition + */ + +extern char nmstr[MAXLINE]; +extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; +extern int num_backing_up, bol_needed; + +void *allocate_array PROTO((int, size_t)); +void *reallocate_array PROTO((void*, int, size_t)); + +void *flex_alloc PROTO((size_t)); +void *flex_realloc PROTO((void*, size_t)); +void flex_free PROTO((void*)); + +#define allocate_integer_array(size) \ + (int *) allocate_array( size, sizeof( int ) ) + +#define reallocate_integer_array(array,size) \ + (int *) reallocate_array( (void *) array, size, sizeof( int ) ) + +#define allocate_int_ptr_array(size) \ + (int **) allocate_array( size, sizeof( int * ) ) + +#define allocate_char_ptr_array(size) \ + (char **) allocate_array( size, sizeof( char * ) ) + +#define allocate_dfaacc_union(size) \ + (union dfaacc_union *) \ + allocate_array( size, sizeof( union dfaacc_union ) ) + +#define reallocate_int_ptr_array(array,size) \ + (int **) reallocate_array( (void *) array, size, sizeof( int * ) ) + +#define reallocate_char_ptr_array(array,size) \ + (char **) reallocate_array( (void *) array, size, sizeof( char * ) ) + +#define reallocate_dfaacc_union(array, size) \ + (union dfaacc_union *) \ + reallocate_array( (void *) array, size, sizeof( union dfaacc_union ) ) + +#define allocate_character_array(size) \ + (char *) allocate_array( size, sizeof( char ) ) + +#define reallocate_character_array(array,size) \ + (char *) reallocate_array( (void *) array, size, sizeof( char ) ) + +#define allocate_Character_array(size) \ + (Char *) allocate_array( size, sizeof( Char ) ) + +#define reallocate_Character_array(array,size) \ + (Char *) reallocate_array( (void *) array, size, sizeof( Char ) ) + +#define allocate_wchar_array(size) \ + (wchar_t *) allocate_array( size, sizeof( wchar_t ) ) + +#define reallocate_wchar_array(array,size) \ + (wchar_t *) reallocate_array( (void *) array, size, sizeof( wchar_t ) ) + + +/* Used to communicate between scanner and parser. The type should really + * be YYSTYPE, but we can't easily get our hands on it. + */ +extern int yylval; + + +/* External functions that are cross-referenced among the flex source files. */ + + +/* from file ccl.c */ + +extern void ccladd PROTO((int, int)); /* add a single character to a ccl */ +extern int cclinit PROTO((void)); /* make an empty ccl */ +extern void cclnegate PROTO((int)); /* negate a ccl */ + +/* List the members of a set of characters in CCL form. */ +extern void list_character_set PROTO((FILE*, int[])); + + +/* from file dfa.c */ + +/* Check a DFA state for backing up. */ +extern void check_for_backing_up PROTO((int, int[])); + +/* Check to see if NFA state set constitutes "dangerous" trailing context. */ +extern void check_trailing_context PROTO((int*, int, int*, int)); + +/* Construct the epsilon closure of a set of ndfa states. */ +extern int *epsclosure PROTO((int*, int*, int[], int*, int*)); + +/* Increase the maximum number of dfas. */ +extern void increase_max_dfas PROTO((void)); + +extern void ntod PROTO((void)); /* convert a ndfa to a dfa */ + +/* Converts a set of ndfa states into a dfa state. */ +extern int snstods PROTO((int[], int, int[], int, int, int*)); + + +/* from file ecs.c */ + +/* Convert character classes to set of equivalence classes. */ +extern void ccl2ecl PROTO((void)); + +/* Associate equivalence class numbers with class members. */ +extern int cre8ecs PROTO((int[], int[], int)); + +/* Update equivalence classes based on character class transitions. */ +extern void mkeccl PROTO((wchar_t[], int, int[], int[], int, int)); + +/* Create equivalence class for single character. */ +extern void mkechar PROTO((int, int[], int[])); + + +/* from file gen.c */ + +extern void do_indent PROTO((void)); /* indent to the current level */ + +/* Generate the code to keep backing-up information. */ +extern void gen_backing_up PROTO((void)); + +/* Generate the code to perform the backing up. */ +extern void gen_bu_action PROTO((void)); + +/* Generate full speed compressed transition table. */ +extern void genctbl PROTO((void)); + +/* Generate the code to find the action number. */ +extern void gen_find_action PROTO((void)); + +extern void genftbl PROTO((void)); /* generate full transition table */ + +/* Generate the code to find the next compressed-table state. */ +extern void gen_next_compressed_state PROTO((char*)); + +/* Generate the code to find the next match. */ +extern void gen_next_match PROTO((void)); + +/* Generate the code to find the next state. */ +extern void gen_next_state PROTO((int)); + +/* Generate the code to make a NUL transition. */ +extern void gen_NUL_trans PROTO((void)); + +/* Generate the code to find the start state. */ +extern void gen_start_state PROTO((void)); + +/* Generate data statements for the transition tables. */ +extern void gentabs PROTO((void)); + +/* Write out a formatted string at the current indentation level. */ +extern void indent_put2s PROTO((char[], char[])); + +/* Write out a string + newline at the current indentation level. */ +extern void indent_puts PROTO((char[])); + +extern void make_tables PROTO((void)); /* generate transition tables */ + + +/* from file main.c */ + +extern void check_options PROTO((void)); +extern void flexend PROTO((int)); +extern void usage PROTO((void)); + + +/* from file misc.c */ + +/* Add a #define to the action file. */ +extern void action_define PROTO(( char *defname, int value )); + +/* Add the given text to the stored actions. */ +extern void add_action PROTO(( char *new_text )); + +/* True if a string is all lower case. */ +extern int all_lower PROTO((register char *)); + +/* True if a string is all upper case. */ +extern int all_upper PROTO((register char *)); + +/* Bubble sort an integer array. */ +extern void bubble PROTO((int [], int)); + +/* Check a character to make sure it's in the expected range. */ +extern void check_char PROTO((int c)); + +/* Replace upper-case letter to lower-case. */ +extern Char clower PROTO((int)); + +/* Returns a dynamically allocated copy of a string. */ +extern char *copy_string PROTO((register const char *)); + +/* Returns a dynamically allocated copy of a (potentially) unsigned string. */ +extern Char *copy_unsigned_string PROTO((register Char *)); + +/* Shell sort a character array. */ +extern void cshell PROTO((wchar_t[], int, int)); + +/* Finish up a block of data declarations. */ +extern void dataend PROTO((void)); + +/* Flush generated data statements. */ +extern void dataflush PROTO((void)); + +/* Report an error message and terminate. */ +extern void flexerror PROTO((const char[])); + +/* Report a fatal error message and terminate. */ +extern void flexfatal PROTO((const char[])); + +/* Convert a hexadecimal digit string to an integer value. */ +extern int htoi PROTO((Char[])); + +/* Report an error message formatted with one integer argument. */ +extern void lerrif PROTO((const char[], int)); + +/* Report an error message formatted with one string argument. */ +extern void lerrsf PROTO((const char[], const char[])); + +/* Spit out a "#line" statement. */ +extern void line_directive_out PROTO((FILE*, int)); + +/* Mark the current position in the action array as the end of the section 1 + * user defs. + */ +extern void mark_defs1 PROTO((void)); + +/* Mark the current position in the action array as the end of the prolog. */ +extern void mark_prolog PROTO((void)); + +/* Generate a data statment for a two-dimensional array. */ +extern void mk2data PROTO((int)); + +extern void mkdata PROTO((int)); /* generate a data statement */ + +/* Return the integer represented by a string of digits. */ +extern int myctoi PROTO((char [])); + +/* Return character corresponding to escape sequence. */ +extern int myesc PROTO((Char[])); + +/* Convert an octal digit string to an integer value. */ +extern int otoi PROTO((Char [] )); + +/* Output a (possibly-formatted) string to the generated scanner. */ +extern void out PROTO((const char [])); +extern void out_dec PROTO((const char [], int)); +extern void out_dec2 PROTO((const char [], int, int)); +extern void out_hex PROTO((const char [], unsigned int)); +extern void out_line_count PROTO((const char [])); +extern void out_str PROTO((const char [], const char [])); +extern void out_str3 + PROTO((const char [], const char [], const char [], const char [])); +extern void out_str_dec PROTO((const char [], const char [], int)); +extern void outc PROTO((int)); +extern void outn PROTO((const char [])); + +/* Return a printable version of the given character, which might be + * 8-bit. + */ +extern char *readable_form PROTO((int)); + +/* Write out one section of the skeleton file. */ +extern void skelout PROTO((void)); + +/* Output a yy_trans_info structure. */ +extern void transition_struct_out PROTO((int, int)); + +/* Only needed when using certain broken versions of bison to build parse.c. */ +extern void *yy_flex_xmalloc PROTO(( int )); + +/* Set a region of memory to 0. */ +extern void zero_out PROTO((char *, size_t)); + + +/* from file nfa.c */ + +/* Add an accepting state to a machine. */ +extern void add_accept PROTO((int, int)); + +/* Make a given number of copies of a singleton machine. */ +extern int copysingl PROTO((int, int)); + +/* Debugging routine to write out an nfa. */ +extern void dumpnfa PROTO((int)); + +/* Finish up the processing for a rule. */ +extern void finish_rule PROTO((int, int, int, int)); + +/* Connect two machines together. */ +extern int link_machines PROTO((int, int)); + +/* Mark each "beginning" state in a machine as being a "normal" (i.e., + * not trailing context associated) state. + */ +extern void mark_beginning_as_normal PROTO((register int)); + +/* Make a machine that branches to two machines. */ +extern int mkbranch PROTO((int, int)); + +extern int mkclos PROTO((int)); /* convert a machine into a closure */ +extern int mkopt PROTO((int)); /* make a machine optional */ + +/* Make a machine that matches either one of two machines. */ +extern int mkor PROTO((int, int)); + +/* Convert a machine into a positive closure. */ +extern int mkposcl PROTO((int)); + +extern int mkrep PROTO((int, int, int)); /* make a replicated machine */ + +/* Create a state with a transition on a given symbol. */ +extern int mkstate PROTO((int)); + +extern void new_rule PROTO((void)); /* initialize for a new rule */ + + +/* from file parse.y */ + +/* Build the "<<EOF>>" action for the active start conditions. */ +extern void build_eof_action PROTO((void)); + +/* Write out a message formatted with one string, pinpointing its location. */ +extern void format_pinpoint_message PROTO((char[], char[])); + +/* Write out a message, pinpointing its location. */ +extern void pinpoint_message PROTO((char[])); + +/* Write out a warning, pinpointing it at the given line. */ +extern void line_warning PROTO(( char[], int )); + +/* Write out a message, pinpointing it at the given line. */ +extern void line_pinpoint PROTO(( char[], int )); + +/* Report a formatted syntax error. */ +extern void format_synerr PROTO((char [], char[])); +extern void synerr PROTO((char [])); /* report a syntax error */ +extern void format_warn PROTO((char [], char[])); +extern void warn PROTO((char [])); /* report a warning */ +extern void yyerror PROTO((char [])); /* report a parse error */ +extern int yyparse PROTO((void)); /* the YACC parser */ + + +/* from file scan.l */ + +/* The Flex-generated scanner for flex. */ +extern int flexscan PROTO((void)); + +/* Open the given file (if NULL, stdin) for scanning. */ +extern void set_input_file PROTO((char*)); + +/* Wrapup a file in the lexical analyzer. */ +extern int yywrap PROTO((void)); + + +/* from file sym.c */ + +/* Add symbol and definitions to symbol table. */ +extern int addsym PROTO((register char[], char*, int, hash_table, int)); + +/* Save the text of a character class. */ +extern void cclinstal PROTO ((Char [], int)); + +/* Lookup the number associated with character class. */ +extern int ccllookup PROTO((Char [])); + +/* Find symbol in symbol table. */ +extern struct hash_entry *findsym PROTO((register char[], hash_table, int )); + +extern void ndinstal PROTO((char[], Char[])); /* install a name definition */ +extern Char *ndlookup PROTO((char[])); /* lookup a name definition */ + +/* Increase maximum number of SC's. */ +extern void scextend PROTO((void)); +extern void scinstal PROTO((char[], int)); /* make a start condition */ + +/* Lookup the number associated with a start condition. */ +extern int sclookup PROTO((char[])); + + +/* from file tblcmp.c */ + +/* Build table entries for dfa state. */ +extern void bldtbl PROTO((int[], int, int, int, int)); + +extern void cmptmps PROTO((void)); /* compress template table entries */ +extern void expand_nxt_chk PROTO((void)); /* increase nxt/chk arrays */ +/* Finds a space in the table for a state to be placed. */ +extern int find_table_space PROTO((int*, int)); +extern void inittbl PROTO((void)); /* initialize transition tables */ +/* Make the default, "jam" table entries. */ +extern void mkdeftbl PROTO((void)); + +/* Create table entries for a state (or state fragment) which has + * only one out-transition. + */ +extern void mk1tbl PROTO((int, int, int, int)); + +/* Place a state into full speed transition table. */ +extern void place_state PROTO((int*, int, int)); + +/* Save states with only one out-transition to be processed later. */ +extern void stack1 PROTO((int, int, int, int)); + + +/* from file yylex.c */ + +extern int yylex PROTO((void)); diff --git a/to.do/unicode/gen.c b/to.do/unicode/gen.c new file mode 100644 index 0000000..d9db93a --- /dev/null +++ b/to.do/unicode/gen.c @@ -0,0 +1,1650 @@ +/* gen - actual generation (writing) of flex scanners */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* $Header$ */ + +#include "flexdef.h" + + +/* declare functions that have forward references */ + +void gen_next_state PROTO((int)); +void genecs PROTO((void)); +void indent_put2s PROTO((char [], char [])); +void indent_puts PROTO((char [])); + + +static int indent_level = 0; /* each level is 8 spaces */ + +#define indent_up() (++indent_level) +#define indent_down() (--indent_level) +#define set_indent(indent_val) indent_level = indent_val + +/* Almost everything is done in terms of arrays starting at 1, so provide + * a null entry for the zero element of all C arrays. (The exception + * to this is that the fast table representation generally uses the + * 0 elements of its arrays, too.) + */ +static char C_int_decl[] = "static yyconst int %s[%d] =\n { 0,\n"; +static char C_short_decl[] = "static yyconst short int %s[%d] =\n { 0,\n"; +static char C_long_decl[] = "static yyconst long int %s[%d] =\n { 0,\n"; +static char C_state_decl[] = + "static yyconst yy_state_type %s[%d] =\n { 0,\n"; +static char C_uchar_decl[] = + "static yyconst unsigned char %s[%d] =\n { 0,\n"; +static char C_ushort_decl[] = + "static yyconst unsigned short %s[%d] =\n { 0,\n"; + + +/* Indent to the current level. */ + +void do_indent() + { + register int i = indent_level * 8; + + while ( i >= 8 ) + { + outc( '\t' ); + i -= 8; + } + + while ( i > 0 ) + { + outc( ' ' ); + --i; + } + } + + +/* Generate the code to keep backing-up information. */ + +void gen_backing_up() + { + if ( reject || num_backing_up == 0 ) + return; + + if ( fullspd ) + indent_puts( "if ( yy_current_state[-1].yy_nxt )" ); + else + indent_puts( "if ( yy_accept[yy_current_state] )" ); + + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_last_accepting_state = yy_current_state;" ); + indent_puts( "yy_last_accepting_cpos = yy_cp;" ); + indent_puts( "}" ); + indent_down(); + } + + +/* Generate the code to perform the backing up. */ + +void gen_bu_action() + { + if ( reject || num_backing_up == 0 ) + return; + + set_indent( 3 ); + + indent_puts( "case 0: /* must back up */" ); + indent_puts( "/* undo the effects of YY_DO_BEFORE_ACTION */" ); + indent_puts( "*yy_cp = yy_hold_char;" ); + + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_last_accepting_cpos + 1;" ); + else + /* Backing-up info for compressed tables is taken \after/ + * yy_cp has been incremented for the next state. + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + + indent_puts( "yy_current_state = yy_last_accepting_state;" ); + indent_puts( "goto yy_find_action;" ); + outc( '\n' ); + + set_indent( 0 ); + } + + +/* genctbl - generates full speed compressed transition table */ + +void genctbl() + { + register int i; + int end_of_buffer_action = num_rules + 1; + + /* Table of verify for transition and offset to next state. */ + out_dec( "static yyconst struct yy_trans_info yy_transition[%d] =\n", + tblend + numecs + 1 ); + outn( " {" ); + + /* We want the transition to be represented as the offset to the + * next state, not the actual state number, which is what it currently + * is. The offset is base[nxt[i]] - (base of current state)]. That's + * just the difference between the starting points of the two involved + * states (to - from). + * + * First, though, we need to find some way to put in our end-of-buffer + * flags and states. We do this by making a state with absolutely no + * transitions. We put it at the end of the table. + */ + + /* We need to have room in nxt/chk for two more slots: One for the + * action and one for the end-of-buffer transition. We now *assume* + * that we're guaranteed the only character we'll try to index this + * nxt/chk pair with is EOB, i.e., 0, so we don't have to make sure + * there's room for jam entries for other characters. + */ + + while ( tblend + 2 >= current_max_xpairs ) + expand_nxt_chk(); + + while ( lastdfa + 1 >= current_max_dfas ) + increase_max_dfas(); + + base[lastdfa + 1] = tblend + 2; + nxt[tblend + 1] = end_of_buffer_action; + chk[tblend + 1] = numecs + 1; + chk[tblend + 2] = 1; /* anything but EOB */ + + /* So that "make test" won't show arb. differences. */ + nxt[tblend + 2] = 0; + + /* Make sure every state has an end-of-buffer transition and an + * action #. + */ + for ( i = 0; i <= lastdfa; ++i ) + { + int anum = dfaacc[i].dfaacc_state; + int offset = base[i]; + + chk[offset] = EOB_POSITION; + chk[offset - 1] = ACTION_POSITION; + nxt[offset - 1] = anum; /* action number */ + } + + for ( i = 0; i <= tblend; ++i ) + { + if ( chk[i] == EOB_POSITION ) + transition_struct_out( 0, base[lastdfa + 1] - i ); + + else if ( chk[i] == ACTION_POSITION ) + transition_struct_out( 0, nxt[i] ); + + else if ( chk[i] > numecs || chk[i] == 0 ) + transition_struct_out( 0, 0 ); /* unused slot */ + + else /* verify, transition */ + transition_struct_out( chk[i], + base[nxt[i]] - (i - chk[i]) ); + } + + + /* Here's the final, end-of-buffer state. */ + transition_struct_out( chk[tblend + 1], nxt[tblend + 1] ); + transition_struct_out( chk[tblend + 2], nxt[tblend + 2] ); + + outn( " };\n" ); + + /* Table of pointers to start states. */ + out_dec( + "static yyconst struct yy_trans_info *yy_start_state_list[%d] =\n", + lastsc * 2 + 1 ); + outn( " {" ); /* } so vi doesn't get confused */ + + for ( i = 0; i <= lastsc * 2; ++i ) + out_dec( " &yy_transition[%d],\n", base[i] ); + + dataend(); + + if ( useecs ) + genecs(); + } + + +/* Generate equivalence-class tables. */ + +void genecs() + { + register int i, j; + int numrows; + + if ( csize == 65536 && long_align ) + out_str_dec( C_long_decl, "yy_ec", csize ); + else if ( csize == 65536 && numecs < 256 ) + out_str_dec( C_uchar_decl, "yy_ec", csize ); + else if ( csize == 65536 && numecs < 65536 ) + out_str_dec( C_ushort_decl, "yy_ec", csize ); + else + out_str_dec( C_int_decl, "yy_ec", csize ); + + for ( i = 1; i < csize; ++i ) + { + if ( caseins && (i >= 'A') && (i <= 'Z') ) + ecgroup[i] = ecgroup[clower( i )]; + + ecgroup[i] = ABS( ecgroup[i] ); + mkdata( ecgroup[i] ); + } + + dataend(); + + if ( trace ) + { + fputs( _( "\n\nEquivalence Classes:\n\n" ), stderr ); + + numrows = csize / 8; + + for ( j = 0; j < numrows; ++j ) + { + for ( i = j; i < csize; i = i + numrows ) + { + fprintf( stderr, "%4s = %-2d", + readable_form( i ), ecgroup[i] ); + + putc( ' ', stderr ); + } + + putc( '\n', stderr ); + } + } + } + + +/* Generate the code to find the action number. */ + +void gen_find_action() + { + if ( fullspd ) + indent_puts( "yy_act = yy_current_state[-1].yy_nxt;" ); + + else if ( fulltbl ) + indent_puts( "yy_act = yy_accept[yy_current_state];" ); + + else if ( reject ) + { + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); + + outn( + "find_rule: /* we branch to this label when backing up */" ); + + indent_puts( + "for ( ; ; ) /* until we find what rule we matched */" ); + + indent_up(); + + indent_puts( "{" ); + + indent_puts( + "if ( yy_lp && yy_lp < yy_accept[yy_current_state + 1] )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_act = yy_acclist[yy_lp];" ); + + if ( variable_trailing_context_rules ) + { + indent_puts( "if ( yy_act & YY_TRAILING_HEAD_MASK ||" ); + indent_puts( " yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + + indent_puts( + "if ( yy_act == yy_looking_for_trail_begin )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_looking_for_trail_begin = 0;" ); + indent_puts( "yy_act &= ~YY_TRAILING_HEAD_MASK;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else if ( yy_act & YY_TRAILING_MASK )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( + "yy_looking_for_trail_begin = yy_act & ~YY_TRAILING_MASK;" ); + indent_puts( + "yy_looking_for_trail_begin |= YY_TRAILING_HEAD_MASK;" ); + + if ( real_reject ) + { + /* Remember matched text in case we back up + * due to REJECT. + */ + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( "else" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "yy_full_state = yy_state_ptr;" ); + indent_puts( "yy_full_lp = yy_lp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + + indent_puts( "++yy_lp;" ); + indent_puts( "goto find_rule;" ); + } + + else + { + /* Remember matched text in case we back up due to + * trailing context plus REJECT. + */ + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_full_match = yy_cp;" ); + indent_puts( "break;" ); + indent_puts( "}" ); + indent_down(); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( "--yy_cp;" ); + + /* We could consolidate the following two lines with those at + * the beginning, but at the cost of complaints that we're + * branching inside a loop. + */ + indent_puts( "yy_current_state = *--yy_state_ptr;" ); + indent_puts( "yy_lp = yy_accept[yy_current_state];" ); + + indent_puts( "}" ); + + indent_down(); + } + + else + { /* compressed */ + indent_puts( "yy_act = yy_accept[yy_current_state];" ); + + if ( interactive && ! reject ) + { + /* Do the guaranteed-needed backing up to figure out + * the match. + */ + indent_puts( "if ( yy_act == 0 )" ); + indent_up(); + indent_puts( "{ /* have to back up */" ); + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( + "yy_current_state = yy_last_accepting_state;" ); + indent_puts( "yy_act = yy_accept[yy_current_state];" ); + indent_puts( "}" ); + indent_down(); + } + } + } + + +/* genftbl - generate full transition table */ + +void genftbl() + { + register int i; + int end_of_buffer_action = num_rules + 1; + + out_str_dec( long_align ? C_long_decl : C_short_decl, + "yy_accept", lastdfa + 1 ); + + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; + + for ( i = 1; i <= lastdfa; ++i ) + { + register int anum = dfaacc[i].dfaacc_state; + + mkdata( anum ); + + if ( trace && anum ) + fprintf( stderr, _( "state # %d accepts: [%d]\n" ), + i, anum ); + } + + dataend(); + + if ( useecs ) + genecs(); + + /* Don't have to dump the actual full table entries - they were + * created on-the-fly. + */ + } + + +/* Generate the code to find the next compressed-table state. */ + +void gen_next_compressed_state( char_map ) +char *char_map; + { + indent_put2s( "register YY_CHAR yy_c = %s;", char_map ); + + /* Save the backing-up info \before/ computing the next state + * because we always compute one more state than needed - we + * always proceed until we reach a jam state + */ + gen_backing_up(); + + indent_puts( +"while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_current_state = (int) yy_def[yy_current_state];" ); + + if ( usemecs ) + { + /* We've arrange it so that templates are never chained + * to one another. This means we can afford to make a + * very simple test to see if we need to convert to + * yy_c's meta-equivalence class without worrying + * about erroneously looking up the meta-equivalence + * class twice + */ + do_indent(); + + /* lastdfa + 2 is the beginning of the templates */ + out_dec( "if ( yy_current_state >= %d )\n", lastdfa + 2 ); + + indent_up(); + indent_puts( "yy_c = yy_meta[(unsigned int) yy_c];" ); + indent_down(); + } + + indent_puts( "}" ); + indent_down(); + + indent_puts( +"yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];" ); + } + + +/* Generate the code to find the next match. */ + +void gen_next_match() + { + /* NOTE - changes in here should be reflected in gen_next_state() and + * gen_NUL_trans(). + */ + char *char_map = useecs ? + "yy_ec[YY_SC_TO_UI(*yy_cp)]" : + "YY_SC_TO_UI(*yy_cp)"; + + char *char_map_2 = useecs ? + "yy_ec[YY_SC_TO_UI(*++yy_cp)]" : + "YY_SC_TO_UI(*++yy_cp)"; + + if ( fulltbl ) + { + indent_put2s( + "while ( (yy_current_state = yy_nxt[yy_current_state][%s]) > 0 )", + char_map ); + + indent_up(); + + if ( num_backing_up > 0 ) + { + indent_puts( "{" ); /* } for vi */ + gen_backing_up(); + outc( '\n' ); + } + + indent_puts( "++yy_cp;" ); + + if ( num_backing_up > 0 ) + /* { for vi */ + indent_puts( "}" ); + + indent_down(); + + outc( '\n' ); + indent_puts( "yy_current_state = -yy_current_state;" ); + } + + else if ( fullspd ) + { + indent_puts( "{" ); /* } for vi */ + indent_puts( + "register yyconst struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( "register YY_CHAR yy_c;\n" ); + indent_put2s( "for ( yy_c = %s;", char_map ); + indent_puts( + " (yy_trans_info = &yy_current_state[(unsigned int) yy_c])->" ); + indent_puts( "yy_verify == yy_c;" ); + indent_put2s( " yy_c = %s )", char_map_2 ); + + indent_up(); + + if ( num_backing_up > 0 ) + indent_puts( "{" ); /* } for vi */ + + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + + if ( num_backing_up > 0 ) + { + outc( '\n' ); + gen_backing_up(); /* { for vi */ + indent_puts( "}" ); + } + + indent_down(); /* { for vi */ + indent_puts( "}" ); + } + + else + { /* compressed */ + indent_puts( "do" ); + + indent_up(); + indent_puts( "{" ); /* } for vi */ + + gen_next_state( false ); + + indent_puts( "++yy_cp;" ); + + /* { for vi */ + indent_puts( "}" ); + indent_down(); + + do_indent(); + + if ( interactive ) + out_dec( "while ( yy_base[yy_current_state] != %d );\n", + jambase ); + else + out_dec( "while ( yy_current_state != %d );\n", + jamstate ); + + if ( ! reject && ! interactive ) + { + /* Do the guaranteed-needed backing up to figure out + * the match. + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( + "yy_current_state = yy_last_accepting_state;" ); + } + } + } + + +/* Generate the code to find the next state. */ + +void gen_next_state( worry_about_NULs ) +int worry_about_NULs; + { /* NOTE - changes in here should be reflected in gen_next_match() */ + char char_map[256]; + + if ( worry_about_NULs && ! nultrans ) + { + if ( useecs ) + (void) sprintf( char_map, + "(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : %d)", + NUL_ec ); + else + (void) sprintf( char_map, + "(*yy_cp ? YY_SC_TO_UI(*yy_cp) : %d)", NUL_ec ); + } + + else + strcpy( char_map, useecs ? + "yy_ec[YY_SC_TO_UI(*yy_cp)]" : "YY_SC_TO_UI(*yy_cp)" ); + + if ( worry_about_NULs && nultrans ) + { + if ( ! fulltbl && ! fullspd ) + /* Compressed tables back up *before* they match. */ + gen_backing_up(); + + indent_puts( "if ( *yy_cp )" ); + indent_up(); + indent_puts( "{" ); /* } for vi */ + } + + if ( fulltbl ) + indent_put2s( + "yy_current_state = yy_nxt[yy_current_state][%s];", + char_map ); + + else if ( fullspd ) + indent_put2s( + "yy_current_state += yy_current_state[%s].yy_nxt;", + char_map ); + + else + gen_next_compressed_state( char_map ); + + if ( worry_about_NULs && nultrans ) + { + /* { for vi */ + indent_puts( "}" ); + indent_down(); + indent_puts( "else" ); + indent_up(); + indent_puts( + "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_down(); + } + + if ( fullspd || fulltbl ) + gen_backing_up(); + + if ( reject ) + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + + +/* Generate the code to make a NUL transition. */ + +void gen_NUL_trans() + { /* NOTE - changes in here should be reflected in gen_next_match() */ + /* Only generate a definition for "yy_cp" if we'll generate code + * that uses it. Otherwise lint and the like complain. + */ + int need_backing_up = (num_backing_up > 0 && ! reject); + + if ( need_backing_up && (! nultrans || fullspd || fulltbl) ) + /* We're going to need yy_cp lying around for the call + * below to gen_backing_up(). + */ + indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); + + outc( '\n' ); + + if ( nultrans ) + { + indent_puts( + "yy_current_state = yy_NUL_trans[yy_current_state];" ); + indent_puts( "yy_is_jam = (yy_current_state == 0);" ); + } + + else if ( fulltbl ) + { + do_indent(); + out_dec( "yy_current_state = yy_nxt[yy_current_state][%d];\n", + NUL_ec ); + indent_puts( "yy_is_jam = (yy_current_state <= 0);" ); + } + + else if ( fullspd ) + { + do_indent(); + out_dec( "register int yy_c = %d;\n", NUL_ec ); + + indent_puts( + "register yyconst struct yy_trans_info *yy_trans_info;\n" ); + indent_puts( + "yy_trans_info = &yy_current_state[(unsigned int) yy_c];" ); + indent_puts( "yy_current_state += yy_trans_info->yy_nxt;" ); + + indent_puts( + "yy_is_jam = (yy_trans_info->yy_verify != yy_c);" ); + } + + else + { + char NUL_ec_str[20]; + + (void) sprintf( NUL_ec_str, "%d", NUL_ec ); + gen_next_compressed_state( NUL_ec_str ); + + do_indent(); + out_dec( "yy_is_jam = (yy_current_state == %d);\n", jamstate ); + + if ( reject ) + { + /* Only stack this state if it's a transition we + * actually make. If we stack it on a jam, then + * the state stack and yy_c_buf_p get out of sync. + */ + indent_puts( "if ( ! yy_is_jam )" ); + indent_up(); + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + indent_down(); + } + } + + /* If we've entered an accepting state, back up; note that + * compressed tables have *already* done such backing up, so + * we needn't bother with it again. + */ + if ( need_backing_up && (fullspd || fulltbl) ) + { + outc( '\n' ); + indent_puts( "if ( ! yy_is_jam )" ); + indent_up(); + indent_puts( "{" ); + gen_backing_up(); + indent_puts( "}" ); + indent_down(); + } + } + + +/* Generate the code to find the start state. */ + +void gen_start_state() + { + if ( fullspd ) + { + if ( bol_needed ) + { + indent_puts( + "yy_current_state = yy_start_state_list[yy_start + YY_AT_BOL()];" ); + } + else + indent_puts( + "yy_current_state = yy_start_state_list[yy_start];" ); + } + + else + { + indent_puts( "yy_current_state = yy_start;" ); + + if ( bol_needed ) + indent_puts( "yy_current_state += YY_AT_BOL();" ); + + if ( reject ) + { + /* Set up for storing up states. */ + indent_puts( "yy_state_ptr = yy_state_buf;" ); + indent_puts( "*yy_state_ptr++ = yy_current_state;" ); + } + } + } + + +/* gentabs - generate data statements for the transition tables */ + +void gentabs() + { + int i, j, k, *accset, nacc, *acc_array, total_states; + int end_of_buffer_action = num_rules + 1; + + acc_array = allocate_integer_array( current_max_dfas ); + nummt = 0; + + /* The compressed table format jams by entering the "jam state", + * losing information about the previous state in the process. + * In order to recover the previous state, we effectively need + * to keep backing-up information. + */ + ++num_backing_up; + + if ( reject ) + { + /* Write out accepting list and pointer list. + * + * First we generate the "yy_acclist" array. In the process, + * we compute the indices that will go into the "yy_accept" + * array, and save the indices in the dfaacc array. + */ + int EOB_accepting_list[2]; + + /* Set up accepting structures for the End Of Buffer state. */ + EOB_accepting_list[0] = 0; + EOB_accepting_list[1] = end_of_buffer_action; + accsiz[end_of_buffer_state] = 1; + dfaacc[end_of_buffer_state].dfaacc_set = EOB_accepting_list; + + out_str_dec( long_align ? C_long_decl : C_short_decl, + "yy_acclist", MAX( numas, 1 ) + 1 ); + + j = 1; /* index into "yy_acclist" array */ + + for ( i = 1; i <= lastdfa; ++i ) + { + acc_array[i] = j; + + if ( accsiz[i] != 0 ) + { + accset = dfaacc[i].dfaacc_set; + nacc = accsiz[i]; + + if ( trace ) + fprintf( stderr, + _( "state # %d accepts: " ), + i ); + + for ( k = 1; k <= nacc; ++k ) + { + int accnum = accset[k]; + + ++j; + + if ( variable_trailing_context_rules && + ! (accnum & YY_TRAILING_HEAD_MASK) && + accnum > 0 && accnum <= num_rules && + rule_type[accnum] == RULE_VARIABLE ) + { + /* Special hack to flag + * accepting number as part + * of trailing context rule. + */ + accnum |= YY_TRAILING_MASK; + } + + mkdata( accnum ); + + if ( trace ) + { + fprintf( stderr, "[%d]", + accset[k] ); + + if ( k < nacc ) + fputs( ", ", stderr ); + else + putc( '\n', stderr ); + } + } + } + } + + /* add accepting number for the "jam" state */ + acc_array[i] = j; + + dataend(); + } + + else + { + dfaacc[end_of_buffer_state].dfaacc_state = end_of_buffer_action; + + for ( i = 1; i <= lastdfa; ++i ) + acc_array[i] = dfaacc[i].dfaacc_state; + + /* add accepting number for jam state */ + acc_array[i] = 0; + } + + /* Spit out "yy_accept" array. If we're doing "reject", it'll be + * pointers into the "yy_acclist" array. Otherwise it's actual + * accepting numbers. In either case, we just dump the numbers. + */ + + /* "lastdfa + 2" is the size of "yy_accept"; includes room for C arrays + * beginning at 0 and for "jam" state. + */ + k = lastdfa + 2; + + if ( reject ) + /* We put a "cap" on the table associating lists of accepting + * numbers with state numbers. This is needed because we tell + * where the end of an accepting list is by looking at where + * the list for the next state starts. + */ + ++k; + + out_str_dec( long_align ? C_long_decl : C_short_decl, "yy_accept", k ); + + for ( i = 1; i <= lastdfa; ++i ) + { + mkdata( acc_array[i] ); + + if ( ! reject && trace && acc_array[i] ) + fprintf( stderr, _( "state # %d accepts: [%d]\n" ), + i, acc_array[i] ); + } + + /* Add entry for "jam" state. */ + mkdata( acc_array[i] ); + + if ( reject ) + /* Add "cap" for the list. */ + mkdata( acc_array[i] ); + + dataend(); + + if ( useecs ) + genecs(); + + if ( usemecs ) + { + /* Write out meta-equivalence classes (used to index + * templates with). + */ + + if ( trace ) + fputs( _( "\n\nMeta-Equivalence Classes:\n" ), + stderr ); + + out_str_dec( C_int_decl, "yy_meta", numecs + 1 ); + + for ( i = 1; i <= numecs; ++i ) + { + if ( trace ) + fprintf( stderr, "%d = %d\n", + i, ABS( tecbck[i] ) ); + + mkdata( ABS( tecbck[i] ) ); + } + + dataend(); + } + + total_states = lastdfa + numtemps; + + out_str_dec( (tblend >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, + "yy_base", total_states + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + register int d = def[i]; + + if ( base[i] == JAMSTATE ) + base[i] = jambase; + + if ( d == JAMSTATE ) + def[i] = jamstate; + + else if ( d < 0 ) + { + /* Template reference. */ + ++tmpuses; + def[i] = lastdfa - d + 1; + } + + mkdata( base[i] ); + } + + /* Generate jam state's base index. */ + mkdata( base[i] ); + + for ( ++i /* skip jam state */; i <= total_states; ++i ) + { + mkdata( base[i] ); + def[i] = jamstate; + } + + dataend(); + + out_str_dec( (total_states >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, + "yy_def", total_states + 1 ); + + for ( i = 1; i <= total_states; ++i ) + mkdata( def[i] ); + + dataend(); + + out_str_dec( (total_states >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, + "yy_nxt", tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + /* Note, the order of the following test is important. + * If chk[i] is 0, then nxt[i] is undefined. + */ + if ( chk[i] == 0 || nxt[i] == 0 ) + nxt[i] = jamstate; /* new state is the JAM state */ + + mkdata( nxt[i] ); + } + + dataend(); + + out_str_dec( (total_states >= MAX_SHORT || long_align) ? + C_long_decl : C_short_decl, + "yy_chk", tblend + 1 ); + + for ( i = 1; i <= tblend; ++i ) + { + if ( chk[i] == 0 ) + ++nummt; + + mkdata( chk[i] ); + } + + dataend(); + } + + +/* Write out a formatted string (with a secondary string argument) at the + * current indentation level, adding a final newline. + */ + +void indent_put2s( fmt, arg ) +char fmt[], arg[]; + { + do_indent(); + out_str( fmt, arg ); + outn( "" ); + } + + +/* Write out a string at the current indentation level, adding a final + * newline. + */ + +void indent_puts( str ) +char str[]; + { + do_indent(); + outn( str ); + } + + +/* make_tables - generate transition tables and finishes generating output file + */ + +void make_tables() + { + register int i; + int did_eof_rule = false; + + skelout(); + + /* First, take care of YY_DO_BEFORE_ACTION depending on yymore + * being used. + */ + set_indent( 1 ); + + if ( yymore_used && ! yytext_is_array ) + { + indent_puts( "yytext_ptr -= yy_more_len; \\" ); + indent_puts( "yyleng = (int) (yy_cp - yytext_ptr); \\" ); + } + + else + indent_puts( "yyleng = (int) (yy_cp - yy_bp); \\" ); + + /* Now also deal with copying yytext_ptr to yytext if needed. */ + skelout(); + if ( yytext_is_array ) + { + if ( yymore_used ) + indent_puts( + "if ( yyleng + yy_more_offset >= YYLMAX ) \\" ); + else + indent_puts( "if ( yyleng >= YYLMAX ) \\" ); + + indent_up(); + indent_puts( + "YY_FATAL_ERROR( \"token too large, exceeds YYLMAX\" ); \\" ); + indent_down(); + + if ( yymore_used ) + { + indent_puts( +"yy_flex_strncpy( &yytext[yy_more_offset], yytext_ptr, yyleng + 1 ); \\" ); + indent_puts( "yyleng += yy_more_offset; \\" ); + indent_puts( + "yy_prev_more_offset = yy_more_offset; \\" ); + indent_puts( "yy_more_offset = 0; \\" ); + } + else + { + indent_puts( + "yy_flex_strncpy( yytext, yytext_ptr, yyleng + 1 ); \\" ); + } + } + + set_indent( 0 ); + + skelout(); + + + out_dec( "#define YY_NUM_RULES %d\n", num_rules ); + out_dec( "#define YY_END_OF_BUFFER %d\n", num_rules + 1 ); + + if ( fullspd ) + { + /* Need to define the transet type as a size large + * enough to hold the biggest offset. + */ + int total_table_size = tblend + numecs + 1; + char *trans_offset_type = + (total_table_size >= MAX_SHORT || long_align) ? + "long" : "short"; + + set_indent( 0 ); + indent_puts( "struct yy_trans_info" ); + indent_up(); + indent_puts( "{" ); /* } for vi */ + + if ( long_align ) + indent_puts( "long yy_verify;" ); + else + indent_puts( "short yy_verify;" ); + + /* In cases where its sister yy_verify *is* a "yes, there is + * a transition", yy_nxt is the offset (in records) to the + * next state. In most cases where there is no transition, + * the value of yy_nxt is irrelevant. If yy_nxt is the -1th + * record of a state, though, then yy_nxt is the action number + * for that state. + */ + + indent_put2s( "%s yy_nxt;", trans_offset_type ); + indent_puts( "};" ); + indent_down(); + } + + if ( fullspd ) + genctbl(); + else if ( fulltbl ) + genftbl(); + else + gentabs(); + + /* Definitions for backing up. We don't need them if REJECT + * is being used because then we use an alternative backin-up + * technique instead. + */ + if ( num_backing_up > 0 && ! reject ) + { + if ( ! C_plus_plus ) + { + indent_puts( + "static yy_state_type yy_last_accepting_state;" ); + indent_puts( + "static YY_CHAR *yy_last_accepting_cpos;\n" ); + } + } + + if ( nultrans ) + { + out_str_dec( C_state_decl, "yy_NUL_trans", lastdfa + 1 ); + + for ( i = 1; i <= lastdfa; ++i ) + { + if ( fullspd ) + out_dec( " &yy_transition[%d],\n", base[i] ); + else + mkdata( nultrans[i] ); + } + + dataend(); + } + + if ( ddebug ) + { /* Spit out table mapping rules to line numbers. */ + if ( ! C_plus_plus ) + { + indent_puts( "extern int yy_flex_debug;" ); + indent_puts( "int yy_flex_debug = 1;\n" ); + } + + out_str_dec( long_align ? C_long_decl : C_short_decl, + "yy_rule_linenum", num_rules ); + for ( i = 1; i < num_rules; ++i ) + mkdata( rule_linenum[i] ); + dataend(); + } + + if ( reject ) + { + /* Declare state buffer variables. */ + if ( ! C_plus_plus ) + { + outn( + "static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;" ); + outn( "static YY_CHAR *yy_full_match;" ); + outn( "static int yy_lp;" ); + } + + if ( variable_trailing_context_rules ) + { + if ( ! C_plus_plus ) + { + outn( + "static int yy_looking_for_trail_begin = 0;" ); + outn( "static int yy_full_lp;" ); + outn( "static int *yy_full_state;" ); + } + + out_hex( "#define YY_TRAILING_MASK 0x%x\n", + (unsigned int) YY_TRAILING_MASK ); + out_hex( "#define YY_TRAILING_HEAD_MASK 0x%x\n", + (unsigned int) YY_TRAILING_HEAD_MASK ); + } + + outn( "#define REJECT \\" ); + outn( "{ \\" ); /* } for vi */ + outn( + "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \\" ); + outn( + "yy_cp = yy_full_match; /* restore poss. backed-over text */ \\" ); + + if ( variable_trailing_context_rules ) + { + outn( + "yy_lp = yy_full_lp; /* restore orig. accepting pos. */ \\" ); + outn( + "yy_state_ptr = yy_full_state; /* restore orig. state */ \\" ); + outn( + "yy_current_state = *yy_state_ptr; /* restore curr. state */ \\" ); + } + + outn( "++yy_lp; \\" ); + outn( "goto find_rule; \\" ); + /* { for vi */ + outn( "}" ); + } + + else + { + outn( + "/* The intent behind this definition is that it'll catch" ); + outn( " * any uses of REJECT which flex missed." ); + outn( " */" ); + outn( "#define REJECT reject_used_but_not_detected" ); + } + + if ( yymore_used ) + { + if ( ! C_plus_plus ) + { + if ( yytext_is_array ) + { + indent_puts( "static int yy_more_offset = 0;" ); + indent_puts( + "static int yy_prev_more_offset = 0;" ); + } + else + { + indent_puts( "static int yy_more_flag = 0;" ); + indent_puts( "static int yy_more_len = 0;" ); + } + } + + if ( yytext_is_array ) + { + indent_puts( + "#define yymore() (yy_more_offset = yy_flex_strlen( yytext ))" ); + indent_puts( "#define YY_NEED_STRLEN" ); + indent_puts( "#define YY_MORE_ADJ 0" ); + indent_puts( "#define YY_RESTORE_YY_MORE_OFFSET \\" ); + indent_up(); + indent_puts( "{ \\" ); + indent_puts( "yy_more_offset = yy_prev_more_offset; \\" ); + indent_puts( "yyleng -= yy_more_offset; \\" ); + indent_puts( "}" ); + indent_down(); + } + else + { + indent_puts( "#define yymore() (yy_more_flag = 1)" ); + indent_puts( "#define YY_MORE_ADJ yy_more_len" ); + indent_puts( "#define YY_RESTORE_YY_MORE_OFFSET" ); + } + } + + else + { + indent_puts( "#define yymore() yymore_used_but_not_detected" ); + indent_puts( "#define YY_MORE_ADJ 0" ); + indent_puts( "#define YY_RESTORE_YY_MORE_OFFSET" ); + } + + if ( ! C_plus_plus ) + { + if ( yytext_is_array ) + { + outn( "#ifndef YYLMAX" ); + outn( "#define YYLMAX 8192" ); + outn( "#endif\n" ); + outn( "YY_CHAR yytext[YYLMAX];" ); + outn( "YY_CHAR *yytext_ptr;" ); + } + + else + outn( "YY_CHAR *yytext;" ); + } + + out( &action_array[defs1_offset] ); + + line_directive_out( stdout, 0 ); + + skelout(); + + if ( ! C_plus_plus ) + { + if ( use_read ) + { + outn( +"\tif ( (result = read( fileno(yyin), (char *) buf, max_size * sizeof( YY_CHAR ) ) ) < 0 ) \\" ); + outn( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );" ); + } + + else + { + outn( + "\tif ( yy_current_buffer->yy_is_interactive ) \\" ); + outn( "\t\t{ \\" ); + outn( "\t\tint c = '*', n; \\" ); + outn( "\t\tfor ( n = 0; n < max_size && \\" ); + + if ( csize == 65536 ) + outn( + "\t\t\t (c = getwc( yyin )) != WEOF && c != '\\n'; ++n ) \\" ); + else + outn( + "\t\t\t (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \\" ); + + outn( "\t\t\tbuf[n] = (YY_CHAR) c; \\" ); + outn( "\t\tif ( c == '\\n' ) \\" ); + outn( "\t\t\tbuf[n++] = (YY_CHAR) c; \\" ); + + if ( csize == 65536 ) + outn( + "\t\tif ( c == WEOF && ferror( yyin ) ) \\" ); + else + outn( + "\t\tif ( c == EOF && ferror( yyin ) ) \\" ); + + outn( + "\t\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" ); \\" ); + outn( "\t\tresult = n; \\" ); + outn( "\t\t} \\" ); + outn( +"\telse if ( ((result = fread( buf, sizeof( YY_CHAR ), max_size, yyin )) \\" ); + outn( "\t\t == 0) && ferror( yyin ) ) \\" ); + outn( + "\t\tYY_FATAL_ERROR( \"input in flex scanner failed\" );" ); + } + } + + skelout(); + + indent_puts( "#define YY_RULE_SETUP \\" ); + indent_up(); + if ( bol_needed ) + { + indent_puts( "if ( yyleng > 0 ) \\" ); + indent_up(); + indent_puts( "yy_current_buffer->yy_at_bol = \\" ); + indent_puts( "\t\t(yytext[yyleng - 1] == '\\n'); \\" ); + indent_down(); + } + indent_puts( "YY_USER_ACTION" ); + indent_down(); + + skelout(); + + /* Copy prolog to output file. */ + out( &action_array[prolog_offset] ); + + line_directive_out( stdout, 0 ); + + skelout(); + + set_indent( 2 ); + + if ( yymore_used && ! yytext_is_array ) + { + indent_puts( "yy_more_len = 0;" ); + indent_puts( "if ( yy_more_flag )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "yy_more_len = yy_c_buf_p - yytext_ptr;" ); + indent_puts( "yy_more_flag = 0;" ); + indent_puts( "}" ); + indent_down(); + } + + skelout(); + + gen_start_state(); + + /* Note, don't use any indentation. */ + outn( "yy_match:" ); + gen_next_match(); + + skelout(); + set_indent( 2 ); + gen_find_action(); + + skelout(); + if ( do_yylineno ) + { + indent_puts( "if ( yy_act != YY_END_OF_BUFFER )" ); + indent_up(); + indent_puts( "{" ); + indent_puts( "int yyl;" ); + indent_puts( "for ( yyl = 0; yyl < yyleng; ++yyl )" ); + indent_up(); + indent_puts( "if ( yytext[yyl] == '\\n' )" ); + indent_up(); + indent_puts( "++yylineno;" ); + indent_down(); + indent_down(); + indent_puts( "}" ); + indent_down(); + } + + skelout(); + if ( ddebug ) + { + indent_puts( "if ( yy_flex_debug )" ); + indent_up(); + + indent_puts( "{" ); + indent_puts( "if ( yy_act == 0 )" ); + indent_up(); + indent_puts( C_plus_plus ? + "cerr << \"--scanner backing up\\n\";" : + "fprintf( stderr, \"--scanner backing up\\n\" );" ); + indent_down(); + + do_indent(); + out_dec( "else if ( yy_act < %d )\n", num_rules ); + indent_up(); + + if ( C_plus_plus ) + { + indent_puts( + "cerr << \"--accepting rule at line \" << yy_rule_linenum[yy_act] <<" ); + indent_puts( + " \"(\\\"\" << yytext << \"\\\")\\n\";" ); + } + else + { + indent_puts( + "fprintf( stderr, \"--accepting rule at line %d (\\\"%s\\\")\\n\"," ); + + indent_puts( + " yy_rule_linenum[yy_act], yytext );" ); + } + + indent_down(); + + do_indent(); + out_dec( "else if ( yy_act == %d )\n", num_rules ); + indent_up(); + + if ( C_plus_plus ) + { + indent_puts( +"cerr << \"--accepting default rule (\\\"\" << yytext << \"\\\")\\n\";" ); + } + else + { + indent_puts( + "fprintf( stderr, \"--accepting default rule (\\\"%s\\\")\\n\"," ); + indent_puts( " yytext );" ); + } + + indent_down(); + + do_indent(); + out_dec( "else if ( yy_act == %d )\n", num_rules + 1 ); + indent_up(); + + indent_puts( C_plus_plus ? + "cerr << \"--(end of buffer or a NUL)\\n\";" : + "fprintf( stderr, \"--(end of buffer or a NUL)\\n\" );" ); + + indent_down(); + + do_indent(); + outn( "else" ); + indent_up(); + + if ( C_plus_plus ) + { + indent_puts( + "cerr << \"--EOF (start condition \" << YY_START << \")\\n\";" ); + } + else + { + indent_puts( + "fprintf( stderr, \"--EOF (start condition %d)\\n\", YY_START );" ); + } + + indent_down(); + + indent_puts( "}" ); + indent_down(); + } + + /* Copy actions to output file. */ + skelout(); + indent_up(); + gen_bu_action(); + out( &action_array[action_offset] ); + + line_directive_out( stdout, 0 ); + + /* generate cases for any missing EOF rules */ + for ( i = 1; i <= lastsc; ++i ) + if ( ! sceof[i] ) + { + do_indent(); + out_str( "case YY_STATE_EOF(%s):\n", scname[i] ); + did_eof_rule = true; + } + + if ( did_eof_rule ) + { + indent_up(); + indent_puts( "yyterminate();" ); + indent_down(); + } + + + /* Generate code for handling NUL's, if needed. */ + + /* First, deal with backing up and setting up yy_cp if the scanner + * finds that it should JAM on the NUL. + */ + skelout(); + set_indent( 4 ); + + if ( fullspd || fulltbl ) + indent_puts( "yy_cp = yy_c_buf_p;" ); + + else + { /* compressed table */ + if ( ! reject && ! interactive ) + { + /* Do the guaranteed-needed backing up to figure + * out the match. + */ + indent_puts( "yy_cp = yy_last_accepting_cpos;" ); + indent_puts( + "yy_current_state = yy_last_accepting_state;" ); + } + + else + /* Still need to initialize yy_cp, though + * yy_current_state was set up by + * yy_get_previous_state(). + */ + indent_puts( "yy_cp = yy_c_buf_p;" ); + } + + + /* Generate code for yy_get_previous_state(). */ + set_indent( 1 ); + skelout(); + + gen_start_state(); + + set_indent( 2 ); + skelout(); + gen_next_state( true ); + + set_indent( 1 ); + skelout(); + gen_NUL_trans(); + + skelout(); + if ( do_yylineno ) + { /* update yylineno inside of unput() */ + indent_puts( "if ( c == '\\n' )" ); + indent_up(); + indent_puts( "--yylineno;" ); + indent_down(); + } + + skelout(); + /* Update BOL and yylineno inside of input(). */ + if ( bol_needed ) + { + indent_puts( "yy_current_buffer->yy_at_bol = (c == '\\n');" ); + if ( do_yylineno ) + { + indent_puts( "if ( yy_current_buffer->yy_at_bol )" ); + indent_up(); + indent_puts( "++yylineno;" ); + indent_down(); + } + } + + else if ( do_yylineno ) + { + indent_puts( "if ( c == '\\n' )" ); + indent_up(); + indent_puts( "++yylineno;" ); + indent_down(); + } + + skelout(); + + /* Copy remainder of input to output. */ + + line_directive_out( stdout, 1 ); + + if ( sectnum == 3 ) + (void) flexscan(); /* copy remainder of input to output */ + } diff --git a/to.do/unicode/main.c b/to.do/unicode/main.c new file mode 100644 index 0000000..eee3c3e --- /dev/null +++ b/to.do/unicode/main.c @@ -0,0 +1,1228 @@ +/* flex - tool to generate fast lexical analyzers */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +char copyright[] = +"@(#) Copyright (c) 1990 The Regents of the University of California.\n\ + All rights reserved.\n"; +#endif /* not lint */ + +/* $Header$ */ + + +#include "flexdef.h" +#include "version.h" + +static char flex_version[] = FLEX_VERSION; + + +/* declare functions that have forward references */ + +void flexinit PROTO((int, char**)); +void readin PROTO((void)); +void set_up_initial_allocations PROTO((void)); + +#ifdef NEED_ARGV_FIXUP +extern void argv_fixup PROTO((int *, char ***)); +#endif + + +/* these globals are all defined and commented in flexdef.h */ +int printstats, syntaxerror, eofseen, ddebug, trace, nowarn, spprdflt; +int interactive, caseins, lex_compat, do_yylineno, useecs, fulltbl, usemecs; +int fullspd, gen_line_dirs, performance_report, backing_up_report; +int C_plus_plus, long_align, use_read, yytext_is_array, do_yywrap, csize; +int yymore_used, reject, real_reject, continued_action, in_rule; +int yymore_really_used, reject_really_used; +int datapos, dataline, linenum, out_linenum; +FILE *skelfile = NULL; +int skel_ind = 0; +char *action_array; +int action_size, defs1_offset, prolog_offset, action_offset, action_index; +char *infilename = NULL, *outfilename = NULL; +int did_outfilename; +char *prefix, *yyclass; +int do_stdinit, use_stdout; +int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; +int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; +int current_mns, current_max_rules; +int num_rules, num_eof_rules, default_rule, lastnfa; +int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; +int *accptnum, *assoc_rule, *state_type; +int *rule_type, *rule_linenum, *rule_useful; +int current_state_type; +int variable_trailing_context_rules; +int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; +int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; +int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; +int tecbck[CSIZE + 1]; +int lastsc, *scset, *scbol, *scxclu, *sceof; +int current_max_scs; +char **scname; +int current_max_dfa_size, current_max_xpairs; +int current_max_template_xpairs, current_max_dfas; +int lastdfa, *nxt, *chk, *tnxt; +int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz; +union dfaacc_union *dfaacc; +int *accsiz, *dhash, numas; +int numsnpairs, jambase, jamstate; +int lastccl, *cclmap, *ccllen, *cclng, cclreuse; +int current_maxccls, current_max_ccl_tbl_size; +wchar_t *ccltbl; +char nmstr[MAXLINE]; +int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; +int num_backing_up, bol_needed; +FILE *backing_up_file; +int end_of_buffer_state; +char **input_files; +int num_input_files; + +/* Make sure program_name is initialized so we don't crash if writing + * out an error message before getting the program name from argv[0]. + */ +char *program_name = "flex"; + +#ifndef SHORT_FILE_NAMES +static char *outfile_template = "lex.%s.%s"; +static char *backing_name = "lex.backup"; +#else +static char *outfile_template = "lex%s.%s"; +static char *backing_name = "lex.bck"; +#endif + +#ifdef THINK_C +#include <console.h> +#endif + +#ifdef MS_DOS +extern unsigned _stklen = 16384; +#endif + +static char outfile_path[MAXLINE]; +static int outfile_created = 0; +static char *skelname = NULL; + + +int main( argc, argv ) +int argc; +char **argv; + { + int i; + +#ifdef THINK_C + argc = ccommand( &argv ); +#endif +#ifdef NEED_ARGV_FIXUP + argv_fixup( &argc, &argv ); +#endif + + flexinit( argc, argv ); + + readin(); + + ntod(); + + for ( i = 1; i <= num_rules; ++i ) + if ( ! rule_useful[i] && i != default_rule ) + line_warning( _( "rule cannot be matched" ), + rule_linenum[i] ); + + if ( spprdflt && ! reject && rule_useful[default_rule] ) + line_warning( + _( "-s option given but default rule can be matched" ), + rule_linenum[default_rule] ); + + /* Generate the C state transition tables from the DFA. */ + make_tables(); + + /* Note, flexend does not return. It exits with its argument + * as status. + */ + flexend( 0 ); + + return 0; /* keep compilers/lint happy */ + } + + +/* check_options - check user-specified options */ + +void check_options() + { + int i; + + if ( lex_compat ) + { + if ( C_plus_plus ) + flexerror( _( "Can't use -+ with -l option" ) ); + + if ( fulltbl || fullspd ) + flexerror( _( "Can't use -f or -F with -l option" ) ); + + /* Don't rely on detecting use of yymore() and REJECT, + * just assume they'll be used. + */ + yymore_really_used = reject_really_used = true; + + yytext_is_array = true; + do_yylineno = true; + use_read = false; + } + + if ( do_yylineno ) + /* This should really be "maintain_backup_tables = true" */ + reject_really_used = true; + + if ( csize == unspecified ) + { + if ( (fulltbl || fullspd) && ! useecs ) + csize = DEFAULT_CSIZE; + else + csize = 256; + } + + if ( interactive == unspecified ) + { + if ( fulltbl || fullspd ) + interactive = false; + else + interactive = true; + } + + if ( fulltbl || fullspd ) + { + if ( usemecs ) + flexerror( + _( "-Cf/-CF and -Cm don't make sense together" ) ); + + if ( interactive ) + flexerror( _( "-Cf/-CF and -I are incompatible" ) ); + + if ( lex_compat ) + flexerror( + _( "-Cf/-CF are incompatible with lex-compatibility mode" ) ); + + if ( do_yylineno ) + flexerror( + _( "-Cf/-CF and %option yylineno are incompatible" ) ); + + if ( fulltbl && fullspd ) + flexerror( _( "-Cf and -CF are mutually exclusive" ) ); + } + + if ( C_plus_plus && fullspd ) + flexerror( _( "Can't use -+ with -CF option" ) ); + + if ( C_plus_plus && yytext_is_array ) + { + warn( _( "%array incompatible with -+ option" ) ); + yytext_is_array = false; + } + + if ( csize == 65536 ) + { + if ( fulltbl ) + { + if ( use_read ) + flexerror( _( "Can't use -f with -U" ) ); + else + flexerror( _( "Can't use -Cf with -U" ) ); + } + else if ( fullspd ) + { + if ( use_read ) + flexerror( _( "Can't use -F with -U" ) ); + else + flexerror( _( "Can't use -CF with -U" ) ); + } + else if ( ! useecs && ! usemecs ) + flexerror( _( "Can't use -C with -U" ) ); + } + + if ( useecs ) + { /* Set up doubly-linked equivalence classes. */ + + /* We loop all the way up to csize, since ecgroup[csize] is + * the position used for NUL characters. + */ + ecgroup[1] = NIL; + + for ( i = 2; i <= csize; ++i ) + { + ecgroup[i] = i - 1; + nextecm[i - 1] = i; + } + + nextecm[csize] = NIL; + } + + else + { + /* Put everything in its own equivalence class. */ + for ( i = 1; i <= csize; ++i ) + { + ecgroup[i] = i; + nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ + } + } + + if ( ! use_stdout ) + { + FILE *prev_stdout; + + if ( ! did_outfilename ) + { + char *suffix; + + if ( C_plus_plus ) + suffix = "cc"; + else + suffix = "c"; + + sprintf( outfile_path, outfile_template, + prefix, suffix ); + + outfilename = outfile_path; + } + + prev_stdout = freopen( outfilename, "w", stdout ); + + if ( prev_stdout == NULL ) + lerrsf( _( "could not create %s" ), outfilename ); + + outfile_created = 1; + } + + if ( skelname && (skelfile = fopen( skelname, "r" )) == NULL ) + lerrsf( _( "can't open skeleton file %s" ), skelname ); + + if ( strcmp( prefix, "yy" ) ) + { +#define GEN_PREFIX(name) out_str3( "#define yy%s %s%s\n", name, prefix, name ) + if ( C_plus_plus ) + GEN_PREFIX( "FlexLexer" ); + else + { + GEN_PREFIX( "_create_buffer" ); + GEN_PREFIX( "_delete_buffer" ); + GEN_PREFIX( "_scan_buffer" ); + GEN_PREFIX( "_scan_string" ); + GEN_PREFIX( "_scan_bytes" ); + GEN_PREFIX( "_flex_debug" ); + GEN_PREFIX( "_init_buffer" ); + GEN_PREFIX( "_flush_buffer" ); + GEN_PREFIX( "_load_buffer_state" ); + GEN_PREFIX( "_switch_to_buffer" ); + GEN_PREFIX( "in" ); + GEN_PREFIX( "leng" ); + GEN_PREFIX( "lex" ); + GEN_PREFIX( "out" ); + GEN_PREFIX( "restart" ); + GEN_PREFIX( "text" ); + + if ( do_yylineno ) + GEN_PREFIX( "lineno" ); + } + + if ( do_yywrap ) + GEN_PREFIX( "wrap" ); + + outn( "" ); + } + + if ( did_outfilename ) + line_directive_out( stdout, 0 ); + + skelout(); + + outn( "/* Define the YY_CHAR type. */" ); + + switch (csize) { + case 65536: + outn( "typedef wchar_t YY_CHAR;" ); + break; + case 256: + outn( "typedef unsigned char YY_CHAR;" ); + break; + default: + outn( "typedef char YY_CHAR;" ); + break; + } + + outn( "\n/* Promotes a YY_CHAR to an unsigned integer for use as an array index. */"); + + switch (csize) { + case 65536: + case 256: + outn( "#define YY_SC_TO_UI(c) ((unsigned int) c)" ); + break; + default: + outn( + "#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)" ); + break; + } + + skelout(); + } + + +/* flexend - terminate flex + * + * note + * This routine does not return. + */ + +void flexend( exit_status ) +int exit_status; + + { + int tblsiz; + int unlink(); + + if ( skelfile != NULL ) + { + if ( ferror( skelfile ) ) + lerrsf( _( "input error reading skeleton file %s" ), + skelname ); + + else if ( fclose( skelfile ) ) + lerrsf( _( "error closing skeleton file %s" ), + skelname ); + } + + if ( exit_status != 0 && outfile_created ) + { + if ( ferror( stdout ) ) + lerrsf( _( "error writing output file %s" ), + outfilename ); + + else if ( fclose( stdout ) ) + lerrsf( _( "error closing output file %s" ), + outfilename ); + + else if ( unlink( outfilename ) ) + lerrsf( _( "error deleting output file %s" ), + outfilename ); + } + + if ( backing_up_report && backing_up_file ) + { + if ( num_backing_up == 0 ) + fprintf( backing_up_file, _( "No backing up.\n" ) ); + else if ( fullspd || fulltbl ) + fprintf( backing_up_file, + _( "%d backing up (non-accepting) states.\n" ), + num_backing_up ); + else + fprintf( backing_up_file, + _( "Compressed tables always back up.\n" ) ); + + if ( ferror( backing_up_file ) ) + lerrsf( _( "error writing backup file %s" ), + backing_name ); + + else if ( fclose( backing_up_file ) ) + lerrsf( _( "error closing backup file %s" ), + backing_name ); + } + + if ( printstats ) + { + fprintf( stderr, _( "%s version %s usage statistics:\n" ), + program_name, flex_version ); + + fprintf( stderr, _( " scanner options: -" ) ); + + if ( C_plus_plus ) + putc( '+', stderr ); + if ( backing_up_report ) + putc( 'b', stderr ); + if ( ddebug ) + putc( 'd', stderr ); + if ( caseins ) + putc( 'i', stderr ); + if ( lex_compat ) + putc( 'l', stderr ); + if ( performance_report > 0 ) + putc( 'p', stderr ); + if ( performance_report > 1 ) + putc( 'p', stderr ); + if ( spprdflt ) + putc( 's', stderr ); + if ( use_stdout ) + putc( 't', stderr ); + if ( printstats ) + putc( 'v', stderr ); /* always true! */ + if ( nowarn ) + putc( 'w', stderr ); + if ( interactive == false ) + putc( 'B', stderr ); + if ( interactive == true ) + putc( 'I', stderr ); + if ( ! gen_line_dirs ) + putc( 'L', stderr ); + if ( trace ) + putc( 'T', stderr ); + + if ( csize == unspecified ) + /* We encountered an error fairly early on, so csize + * never got specified. Define it now, to prevent + * bogus table sizes being written out below. + */ + csize = 256; + + if ( csize == 128 ) + putc( '7', stderr ); + else if ( csize == 256 ) + putc( '8', stderr ); + else + putc( 'U', stderr ); + + fprintf( stderr, " -C" ); + + if ( long_align ) + putc( 'a', stderr ); + if ( fulltbl ) + putc( 'f', stderr ); + if ( fullspd ) + putc( 'F', stderr ); + if ( useecs ) + putc( 'e', stderr ); + if ( usemecs ) + putc( 'm', stderr ); + if ( use_read ) + putc( 'r', stderr ); + + if ( did_outfilename ) + fprintf( stderr, " -o%s", outfilename ); + + if ( skelname ) + fprintf( stderr, " -S%s", skelname ); + + if ( strcmp( prefix, "yy" ) ) + fprintf( stderr, " -P%s", prefix ); + + putc( '\n', stderr ); + + fprintf( stderr, _( " %d/%d NFA states\n" ), + lastnfa, current_mns ); + fprintf( stderr, _( " %d/%d DFA states (%d words)\n" ), + lastdfa, current_max_dfas, totnst ); + fprintf( stderr, _( " %d rules\n" ), + num_rules + num_eof_rules - 1 /* - 1 for def. rule */ ); + + if ( num_backing_up == 0 ) + fprintf( stderr, _( " No backing up\n" ) ); + else if ( fullspd || fulltbl ) + fprintf( stderr, + _( " %d backing-up (non-accepting) states\n" ), + num_backing_up ); + else + fprintf( stderr, + _( " Compressed tables always back-up\n" ) ); + + if ( bol_needed ) + fprintf( stderr, + _( " Beginning-of-line patterns used\n" ) ); + + fprintf( stderr, _( " %d/%d start conditions\n" ), lastsc, + current_max_scs ); + fprintf( stderr, + _( " %d epsilon states, %d double epsilon states\n" ), + numeps, eps2 ); + + if ( lastccl == 0 ) + fprintf( stderr, _( " no character classes\n" ) ); + else + fprintf( stderr, +_( " %d/%d character classes needed %d/%d words of storage, %d reused\n" ), + lastccl, current_maxccls, + cclmap[lastccl] + ccllen[lastccl], + current_max_ccl_tbl_size, cclreuse ); + + fprintf( stderr, _( " %d state/nextstate pairs created\n" ), + numsnpairs ); + fprintf( stderr, _( " %d/%d unique/duplicate transitions\n" ), + numuniq, numdup ); + + if ( fulltbl ) + { + tblsiz = lastdfa * numecs; + fprintf( stderr, _( " %d table entries\n" ), tblsiz ); + } + + else + { + tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; + + fprintf( stderr, + _( " %d/%d base-def entries created\n" ), + lastdfa + numtemps, current_max_dfas ); + fprintf( stderr, + _( " %d/%d (peak %d) nxt-chk entries created\n" ), + tblend, current_max_xpairs, peakpairs ); + fprintf( stderr, + _( " %d/%d (peak %d) template nxt-chk entries created\n" ), + numtemps * nummecs, + current_max_template_xpairs, + numtemps * numecs ); + fprintf( stderr, _( " %d empty table entries\n" ), + nummt ); + fprintf( stderr, _( " %d protos created\n" ), + numprots ); + fprintf( stderr, + _( " %d templates created, %d uses\n" ), + numtemps, tmpuses ); + } + + if ( useecs ) + { + tblsiz = tblsiz + csize; + fprintf( stderr, + _( " %d/%d equivalence classes created\n" ), + numecs, csize ); + } + + if ( usemecs ) + { + tblsiz = tblsiz + numecs; + fprintf( stderr, + _( " %d/%d meta-equivalence classes created\n" ), + nummecs, csize ); + } + + fprintf( stderr, + _( " %d (%d saved) hash collisions, %d DFAs equal\n" ), + hshcol, hshsave, dfaeql ); + fprintf( stderr, _( " %d sets of reallocations needed\n" ), + num_reallocs ); + fprintf( stderr, _( " %d total table entries needed\n" ), + tblsiz ); + } + + exit( exit_status ); + } + + +/* flexinit - initialize flex */ + +void flexinit( argc, argv ) +int argc; +char **argv; + { + int i, sawcmpflag; + char *arg; + + printstats = syntaxerror = trace = spprdflt = caseins = false; + lex_compat = C_plus_plus = backing_up_report = ddebug = fulltbl = false; + fullspd = long_align = nowarn = yymore_used = continued_action = false; + do_yylineno = yytext_is_array = in_rule = reject = do_stdinit = false; + yymore_really_used = reject_really_used = unspecified; + interactive = csize = unspecified; + do_yywrap = gen_line_dirs = usemecs = useecs = true; + performance_report = 0; + did_outfilename = 0; + prefix = "yy"; + yyclass = 0; + use_read = use_stdout = false; + + sawcmpflag = false; + + /* Initialize dynamic array for holding the rule actions. */ + action_size = 2048; /* default size of action array in bytes */ + action_array = allocate_character_array( action_size ); + defs1_offset = prolog_offset = action_offset = action_index = 0; + action_array[0] = '\0'; + + program_name = argv[0]; + + if ( program_name[0] != '\0' && + program_name[strlen( program_name ) - 1] == '+' ) + C_plus_plus = true; + + /* read flags */ + for ( --argc, ++argv; argc ; --argc, ++argv ) + { + arg = argv[0]; + + if ( arg[0] != '-' || arg[1] == '\0' ) + break; + + if ( arg[1] == '-' ) + { /* --option */ + if ( ! strcmp( arg, "--help" ) ) + arg = "-h"; + + else if ( ! strcmp( arg, "--version" ) ) + arg = "-V"; + + else if ( ! strcmp( arg, "--" ) ) + { /* end of options */ + --argc; + ++argv; + break; + } + } + + for ( i = 1; arg[i] != '\0'; ++i ) + switch ( arg[i] ) + { + case '+': + C_plus_plus = true; + break; + + case 'B': + interactive = false; + break; + + case 'b': + backing_up_report = true; + break; + + case 'c': + break; + + case 'C': + if ( i != 1 ) + flexerror( + _( "-C flag must be given separately" ) ); + + if ( ! sawcmpflag ) + { + useecs = false; + usemecs = false; + fulltbl = false; + sawcmpflag = true; + } + + for ( ++i; arg[i] != '\0'; ++i ) + switch ( arg[i] ) + { + case 'a': + long_align = + true; + break; + + case 'e': + useecs = true; + break; + + case 'F': + fullspd = true; + break; + + case 'f': + fulltbl = true; + break; + + case 'm': + usemecs = true; + break; + + case 'r': + use_read = true; + break; + + default: + lerrif( + _( "unknown -C option '%c'" ), + (int) arg[i] ); + break; + } + + goto get_next_arg; + + case 'd': + ddebug = true; + break; + + case 'f': + useecs = usemecs = false; + use_read = fulltbl = true; + break; + + case 'F': + useecs = usemecs = false; + use_read = fullspd = true; + break; + + case '?': + case 'h': + usage(); + exit( 0 ); + + case 'I': + interactive = true; + break; + + case 'i': + caseins = true; + break; + + case 'l': + lex_compat = true; + break; + + case 'L': + gen_line_dirs = false; + break; + + case 'n': + /* Stupid do-nothing deprecated + * option. + */ + break; + + case 'o': + if ( i != 1 ) + flexerror( + _( "-o flag must be given separately" ) ); + + outfilename = arg + i + 1; + did_outfilename = 1; + goto get_next_arg; + + case 'P': + if ( i != 1 ) + flexerror( + _( "-P flag must be given separately" ) ); + + prefix = arg + i + 1; + goto get_next_arg; + + case 'p': + ++performance_report; + break; + + case 'S': + if ( i != 1 ) + flexerror( + _( "-S flag must be given separately" ) ); + + skelname = arg + i + 1; + goto get_next_arg; + + case 's': + spprdflt = true; + break; + + case 't': + use_stdout = true; + break; + + case 'T': + trace = true; + break; + + case 'U': + csize = 65536; + break; + + case 'v': + printstats = true; + break; + + case 'V': + printf( _( "%s version %s\n" ), + program_name, flex_version ); + exit( 0 ); + + case 'w': + nowarn = true; + break; + + case '7': + csize = 128; + break; + + case '8': + csize = 256; + break; + + default: + fprintf( stderr, + _( "%s: unknown flag '%c'. For usage, try\n\t%s --help\n" ), + program_name, (int) arg[i], + program_name ); + exit( 1 ); + } + + /* Used by -C, -S, -o, and -P flags in lieu of a "continue 2" + * control. + */ + get_next_arg: ; + } + + num_input_files = argc; + input_files = argv; + set_input_file( num_input_files > 0 ? input_files[0] : NULL ); + + lastccl = lastsc = lastdfa = lastnfa = 0; + num_rules = num_eof_rules = default_rule = 0; + numas = numsnpairs = tmpuses = 0; + numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; + numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; + num_backing_up = onesp = numprots = 0; + variable_trailing_context_rules = bol_needed = false; + + out_linenum = linenum = sectnum = 1; + firstprot = NIL; + + /* Used in mkprot() so that the first proto goes in slot 1 + * of the proto queue. + */ + lastprot = 1; + + set_up_initial_allocations(); + } + + +/* readin - read in the rules section of the input file(s) */ + +void readin() + { + static char yy_stdinit[] = "FILE *yyin = stdin, *yyout = stdout;"; + static char yy_nostdinit[] = + "FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;"; + + line_directive_out( (FILE *) 0, 1 ); + + if ( yyparse() ) + { + pinpoint_message( _( "fatal parse error" ) ); + flexend( 1 ); + } + + if ( syntaxerror ) + flexend( 1 ); + + if ( backing_up_report ) + { + backing_up_file = fopen( backing_name, "w" ); + if ( backing_up_file == NULL ) + lerrsf( + _( "could not create backing-up info file %s" ), + backing_name ); + } + + else + backing_up_file = NULL; + + if ( yymore_really_used == true ) + yymore_used = true; + else if ( yymore_really_used == false ) + yymore_used = false; + + if ( reject_really_used == true ) + reject = true; + else if ( reject_really_used == false ) + reject = false; + + if ( performance_report > 0 ) + { + if ( lex_compat ) + { + fprintf( stderr, +_( "-l AT&T lex compatibility option entails a large performance penalty\n" ) ); + fprintf( stderr, +_( " and may be the actual source of other reported performance penalties\n" ) ); + } + + else if ( do_yylineno ) + { + fprintf( stderr, + _( "%%option yylineno entails a large performance penalty\n" ) ); + } + + if ( performance_report > 1 ) + { + if ( interactive ) + fprintf( stderr, + _( "-I (interactive) entails a minor performance penalty\n" ) ); + + if ( yymore_used ) + fprintf( stderr, + _( "yymore() entails a minor performance penalty\n" ) ); + } + + if ( reject ) + fprintf( stderr, + _( "REJECT entails a large performance penalty\n" ) ); + + if ( variable_trailing_context_rules ) + fprintf( stderr, +_( "Variable trailing context rules entail a large performance penalty\n" ) ); + } + + if ( reject ) + real_reject = true; + + if ( variable_trailing_context_rules ) + reject = true; + + if ( (fulltbl || fullspd) && reject ) + { + if ( real_reject ) + flexerror( + _( "REJECT cannot be used with -f or -F" ) ); + else if ( do_yylineno ) + flexerror( + _( "%option yylineno cannot be used with -f or -F" ) ); + else + flexerror( + _( "variable trailing context rules cannot be used with -f or -F" ) ); + } + + if ( reject ) + outn( "\n#define YY_USES_REJECT" ); + + if ( ! do_yywrap ) + { + outn( "\n#define yywrap() 1" ); + outn( "#define YY_SKIP_YYWRAP" ); + } + + if ( ddebug ) + outn( "\n#define FLEX_DEBUG" ); + + if ( C_plus_plus ) + { + outn( "#define yytext_ptr yytext" ); + + if ( interactive ) + outn( "#define YY_INTERACTIVE" ); + } + + else + { + if ( do_stdinit ) + { + outn( "#ifdef VMS" ); + outn( "#ifndef __VMS_POSIX" ); + outn( yy_nostdinit ); + outn( "#else" ); + outn( yy_stdinit ); + outn( "#endif" ); + outn( "#else" ); + outn( yy_stdinit ); + outn( "#endif" ); + } + + else + outn( yy_nostdinit ); + } + + if ( fullspd ) + outn( "typedef yyconst struct yy_trans_info *yy_state_type;" ); + else if ( ! C_plus_plus ) + outn( "typedef int yy_state_type;" ); + + if ( ddebug ) + outn( "\n#define FLEX_DEBUG" ); + + if ( lex_compat ) + outn( "#define YY_FLEX_LEX_COMPAT" ); + + if ( do_yylineno && ! C_plus_plus ) + { + outn( "extern int yylineno;" ); + outn( "int yylineno = 1;" ); + } + + if ( C_plus_plus ) + { + outn( "\n#include <FlexLexer.h>" ); + + if ( yyclass ) + { + outn( "int yyFlexLexer::yylex()" ); + outn( "\t{" ); + outn( +"\tLexerError( \"yyFlexLexer::yylex invoked but %option yyclass used\" );" ); + outn( "\treturn 0;" ); + outn( "\t}" ); + + out_str( "\n#define YY_DECL int %s::yylex()\n", + yyclass ); + } + } + + else + { + if ( yytext_is_array ) + outn( "extern YY_CHAR yytext[];\n" ); + + else + { + outn( "extern YY_CHAR *yytext;" ); + outn( "#define yytext_ptr yytext" ); + } + + if ( yyclass ) + flexerror( + _( "%option yyclass only meaningful for C++ scanners" ) ); + } + + if ( useecs ) + numecs = cre8ecs( nextecm, ecgroup, csize ); + else + numecs = csize; + + /* Now map the equivalence class for NUL to its expected place. */ + ecgroup[0] = ecgroup[csize]; + NUL_ec = ABS( ecgroup[0] ); + + if ( useecs ) + ccl2ecl(); + } + + +/* set_up_initial_allocations - allocate memory for internal tables */ + +void set_up_initial_allocations() + { + current_mns = INITIAL_MNS; + firstst = allocate_integer_array( current_mns ); + lastst = allocate_integer_array( current_mns ); + finalst = allocate_integer_array( current_mns ); + transchar = allocate_integer_array( current_mns ); + trans1 = allocate_integer_array( current_mns ); + trans2 = allocate_integer_array( current_mns ); + accptnum = allocate_integer_array( current_mns ); + assoc_rule = allocate_integer_array( current_mns ); + state_type = allocate_integer_array( current_mns ); + + current_max_rules = INITIAL_MAX_RULES; + rule_type = allocate_integer_array( current_max_rules ); + rule_linenum = allocate_integer_array( current_max_rules ); + rule_useful = allocate_integer_array( current_max_rules ); + + current_max_scs = INITIAL_MAX_SCS; + scset = allocate_integer_array( current_max_scs ); + scbol = allocate_integer_array( current_max_scs ); + scxclu = allocate_integer_array( current_max_scs ); + sceof = allocate_integer_array( current_max_scs ); + scname = allocate_char_ptr_array( current_max_scs ); + + current_maxccls = INITIAL_MAX_CCLS; + cclmap = allocate_integer_array( current_maxccls ); + ccllen = allocate_integer_array( current_maxccls ); + cclng = allocate_integer_array( current_maxccls ); + + current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; + ccltbl = allocate_wchar_array( current_max_ccl_tbl_size ); + + current_max_dfa_size = INITIAL_MAX_DFA_SIZE; + + current_max_xpairs = INITIAL_MAX_XPAIRS; + nxt = allocate_integer_array( current_max_xpairs ); + chk = allocate_integer_array( current_max_xpairs ); + + current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; + tnxt = allocate_integer_array( current_max_template_xpairs ); + + current_max_dfas = INITIAL_MAX_DFAS; + base = allocate_integer_array( current_max_dfas ); + def = allocate_integer_array( current_max_dfas ); + dfasiz = allocate_integer_array( current_max_dfas ); + accsiz = allocate_integer_array( current_max_dfas ); + dhash = allocate_integer_array( current_max_dfas ); + dss = allocate_int_ptr_array( current_max_dfas ); + dfaacc = allocate_dfaacc_union( current_max_dfas ); + + nultrans = (int *) 0; + } + + +void usage() + { + FILE *f = stdout; + + fprintf( f, +_( "%s [-bcdfhilnpstvwBFILTV78+? -C[aefFmr] -ooutput -Pprefix -Sskeleton]\n" ), + program_name ); + fprintf( f, _( "\t[--help --version] [file ...]\n" ) ); + + fprintf( f, _( "\t-b generate backing-up information to %s\n" ), + backing_name ); + fprintf( f, _( "\t-c do-nothing POSIX option\n" ) ); + fprintf( f, _( "\t-d turn on debug mode in generated scanner\n" ) ); + fprintf( f, _( "\t-f generate fast, large scanner\n" ) ); + fprintf( f, _( "\t-h produce this help message\n" ) ); + fprintf( f, _( "\t-i generate case-insensitive scanner\n" ) ); + fprintf( f, _( "\t-l maximal compatibility with original lex\n" ) ); + fprintf( f, _( "\t-n do-nothing POSIX option\n" ) ); + fprintf( f, _( "\t-p generate performance report to stderr\n" ) ); + fprintf( f, + _( "\t-s suppress default rule to ECHO unmatched text\n" ) ); + + if ( ! did_outfilename ) + { + sprintf( outfile_path, outfile_template, + prefix, C_plus_plus ? "cc" : "c" ); + outfilename = outfile_path; + } + + fprintf( f, + _( "\t-t write generated scanner on stdout instead of %s\n" ), + outfilename ); + + fprintf( f, + _( "\t-v write summary of scanner statistics to f\n" ) ); + fprintf( f, _( "\t-w do not generate warnings\n" ) ); + fprintf( f, _( "\t-B generate batch scanner (opposite of -I)\n" ) ); + fprintf( f, + _( "\t-F use alternative fast scanner representation\n" ) ); + fprintf( f, + _( "\t-I generate interactive scanner (opposite of -B)\n" ) ); + fprintf( f, _( "\t-L suppress #line directives in scanner\n" ) ); + fprintf( f, _( "\t-T %s should run in trace mode\n" ), program_name ); + fprintf( f, _( "\t-V report %s version\n" ), program_name ); + fprintf( f, _( "\t-7 generate 7-bit scanner\n" ) ); + fprintf( f, _( "\t-8 generate 8-bit scanner\n" ) ); + fprintf( f, _( "\t-U generate 16-bit (Unicode) scanner\n" ) ); + fprintf( f, _( "\t-+ generate C++ scanner class\n" ) ); + fprintf( f, _( "\t-? produce this help message\n" ) ); + fprintf( f, +_( "\t-C specify degree of table compression (default is -Cem):\n" ) ); + fprintf( f, +_( "\t\t-Ca trade off larger tables for better memory alignment\n" ) ); + fprintf( f, _( "\t\t-Ce construct equivalence classes\n" ) ); + fprintf( f, +_( "\t\t-Cf do not compress scanner tables; use -f representation\n" ) ); + fprintf( f, +_( "\t\t-CF do not compress scanner tables; use -F representation\n" ) ); + fprintf( f, _( "\t\t-Cm construct meta-equivalence classes\n" ) ); + fprintf( f, + _( "\t\t-Cr use read() instead of stdio for scanner input\n" ) ); + fprintf( f, _( "\t-o specify output filename\n" ) ); + fprintf( f, _( "\t-P specify scanner prefix other than \"yy\"\n" ) ); + fprintf( f, _( "\t-S specify skeleton file\n" ) ); + fprintf( f, _( "\t--help produce this help message\n" ) ); + fprintf( f, _( "\t--version report %s version\n" ), program_name ); + } diff --git a/to.do/unicode/misc.c b/to.do/unicode/misc.c new file mode 100644 index 0000000..60d4e44 --- /dev/null +++ b/to.do/unicode/misc.c @@ -0,0 +1,894 @@ +/* misc - miscellaneous flex routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* $Header$ */ + +#include "flexdef.h" + + +void action_define( defname, value ) +char *defname; +int value; + { + char buf[MAXLINE]; + + if ( (int) strlen( defname ) > MAXLINE / 2 ) + { + format_pinpoint_message( _( "name \"%s\" ridiculously long" ), + defname ); + return; + } + + sprintf( buf, "#define %s %d\n", defname, value ); + add_action( buf ); + } + + +void add_action( new_text ) +char *new_text; + { + int len = strlen( new_text ); + + while ( len + action_index >= action_size - 10 /* slop */ ) + { + int new_size = action_size * 2; + + if ( new_size <= 0 ) + /* Increase just a little, to try to avoid overflow + * on 16-bit machines. + */ + action_size += action_size / 8; + else + action_size = new_size; + + action_array = + reallocate_character_array( action_array, action_size ); + } + + strcpy( &action_array[action_index], new_text ); + + action_index += len; + } + + +/* allocate_array - allocate memory for an integer array of the given size */ + +void *allocate_array( size, element_size ) +int size; +size_t element_size; + { + register void *mem; + size_t num_bytes = element_size * size; + + mem = flex_alloc( num_bytes ); + if ( ! mem ) + flexfatal( + _( "memory allocation failed in allocate_array()" ) ); + + return mem; + } + + +/* all_lower - true if a string is all lower-case */ + +int all_lower( str ) +register char *str; + { + while ( *str ) + { + if ( ! isascii( (Char) *str ) || ! islower( *str ) ) + return 0; + ++str; + } + + return 1; + } + + +/* all_upper - true if a string is all upper-case */ + +int all_upper( str ) +register char *str; + { + while ( *str ) + { + if ( ! isascii( (Char) *str ) || ! isupper( *str ) ) + return 0; + ++str; + } + + return 1; + } + + +/* bubble - bubble sort an integer array in increasing order + * + * synopsis + * int v[n], n; + * void bubble( v, n ); + * + * description + * sorts the first n elements of array v and replaces them in + * increasing order. + * + * passed + * v - the array to be sorted + * n - the number of elements of 'v' to be sorted + */ + +void bubble( v, n ) +int v[], n; + { + register int i, j, k; + + for ( i = n; i > 1; --i ) + for ( j = 1; j < i; ++j ) + if ( v[j] > v[j + 1] ) /* compare */ + { + k = v[j]; /* exchange */ + v[j] = v[j + 1]; + v[j + 1] = k; + } + } + + +/* check_char - checks a character to make sure it's within the range + * we're expecting. If not, generates fatal error message + * and exits. + */ + +void check_char( c ) +int c; + { + if ( c >= CSIZE ) + lerrsf( _( "bad character '%s' detected in check_char()" ), + readable_form( c ) ); + + if ( c >= csize ) + { + if ( c < 256 ) + lerrsf( + _( "scanner requires -8 flag to use the character %s" ), + readable_form( c ) ); + else + lerrsf( + _( "scanner requires -U flag to use the character %s" ), + readable_form( c ) ); + } + } + + + +/* clower - replace upper-case letter to lower-case */ + +Char clower( c ) +register int c; + { + return (Char) ((isascii( c ) && isupper( c )) ? tolower( c ) : c); + } + + +/* copy_string - returns a dynamically allocated copy of a string */ + +char *copy_string( str ) +register const char *str; + { + register const char *c1; + register char *c2; + char *copy; + unsigned int size; + + /* find length */ + for ( c1 = str; *c1; ++c1 ) + ; + + size = (c1 - str + 1) * sizeof( char ); + copy = (char *) flex_alloc( size ); + + if ( copy == NULL ) + flexfatal( _( "dynamic memory failure in copy_string()" ) ); + + for ( c2 = copy; (*c2++ = *str++) != 0; ) + ; + + return copy; + } + + +/* copy_unsigned_string - + * returns a dynamically allocated copy of a (potentially) unsigned string + */ + +Char *copy_unsigned_string( str ) +register Char *str; + { + register Char *c; + Char *copy; + + /* find length */ + for ( c = str; *c; ++c ) + ; + + copy = allocate_Character_array( c - str + 1 ); + + for ( c = copy; (*c++ = *str++) != 0; ) + ; + + return copy; + } + + +/* cshell - shell sort a character array in increasing order + * + * synopsis + * + * Char v[n]; + * int n, special_case_0; + * cshell( v, n, special_case_0 ); + * + * description + * Does a shell sort of the first n elements of array v. + * If special_case_0 is true, then any element equal to 0 + * is instead assumed to have infinite weight. + * + * passed + * v - array to be sorted + * n - number of elements of v to be sorted + */ + +void cshell( v, n, special_case_0 ) +wchar_t v[]; +int n, special_case_0; + { + int gap, i, j, jg; + wchar_t k; + + for ( gap = n / 2; gap > 0; gap = gap / 2 ) + for ( i = gap; i < n; ++i ) + for ( j = i - gap; j >= 0; j = j - gap ) + { + jg = j + gap; + + if ( special_case_0 ) + { + if ( v[jg] == 0 ) + break; + + else if ( v[j] != 0 && v[j] <= v[jg] ) + break; + } + + else if ( v[j] <= v[jg] ) + break; + + k = v[j]; + v[j] = v[jg]; + v[jg] = k; + } + } + + +/* dataend - finish up a block of data declarations */ + +void dataend() + { + if ( datapos > 0 ) + dataflush(); + + /* add terminator for initialization; { for vi */ + outn( " } ;\n" ); + + dataline = 0; + datapos = 0; + } + + +/* dataflush - flush generated data statements */ + +void dataflush() + { + outc( '\n' ); + + if ( ++dataline >= NUMDATALINES ) + { + /* Put out a blank line so that the table is grouped into + * large blocks that enable the user to find elements easily. + */ + outc( '\n' ); + dataline = 0; + } + + /* Reset the number of characters written on the current line. */ + datapos = 0; + } + + +/* flexerror - report an error message and terminate */ + +void flexerror( msg ) +const char msg[]; + { + fprintf( stderr, "%s: %s\n", program_name, msg ); + flexend( 1 ); + } + + +/* flexfatal - report a fatal error message and terminate */ + +void flexfatal( msg ) +const char msg[]; + { + fprintf( stderr, _( "%s: fatal internal error, %s\n" ), + program_name, msg ); + exit( 1 ); + } + + +/* htoi - convert a hexadecimal digit string to an integer value */ + +int htoi( str ) +Char str[]; + { + unsigned int result; + + (void) sscanf( (char *) str, "%x", &result ); + + return result; + } + + +/* lerrif - report an error message formatted with one integer argument */ + +void lerrif( msg, arg ) +const char msg[]; +int arg; + { + char errmsg[MAXLINE]; + (void) sprintf( errmsg, msg, arg ); + flexerror( errmsg ); + } + + +/* lerrsf - report an error message formatted with one string argument */ + +void lerrsf( msg, arg ) +const char msg[], arg[]; + { + char errmsg[MAXLINE]; + + (void) sprintf( errmsg, msg, arg ); + flexerror( errmsg ); + } + + +/* line_directive_out - spit out a "#line" statement */ + +void line_directive_out( output_file, do_infile ) +FILE *output_file; +int do_infile; + { + char directive[MAXLINE], filename[MAXLINE]; + char *s1, *s2, *s3; + static char line_fmt[] = "#line %d \"%s\"\n"; + + if ( ! gen_line_dirs ) + return; + + if ( (do_infile && ! infilename) || (! do_infile && ! outfilename) ) + /* don't know the filename to use, skip */ + return; + + s1 = do_infile ? infilename : outfilename; + s2 = filename; + s3 = &filename[sizeof( filename ) - 2]; + + while ( s2 < s3 && *s1 ) + { + if ( *s1 == '\\' ) + /* Escape the '\' */ + *s2++ = '\\'; + + *s2++ = *s1++; + } + + *s2 = '\0'; + + if ( do_infile ) + sprintf( directive, line_fmt, linenum, filename ); + else + { + if ( output_file == stdout ) + /* Account for the line directive itself. */ + ++out_linenum; + + sprintf( directive, line_fmt, out_linenum, filename ); + } + + /* If output_file is nil then we should put the directive in + * the accumulated actions. + */ + if ( output_file ) + { + fputs( directive, output_file ); + } + else + add_action( directive ); + } + + +/* mark_defs1 - mark the current position in the action array as + * representing where the user's section 1 definitions end + * and the prolog begins + */ +void mark_defs1() + { + defs1_offset = 0; + action_array[action_index++] = '\0'; + action_offset = prolog_offset = action_index; + action_array[action_index] = '\0'; + } + + +/* mark_prolog - mark the current position in the action array as + * representing the end of the action prolog + */ +void mark_prolog() + { + action_array[action_index++] = '\0'; + action_offset = action_index; + action_array[action_index] = '\0'; + } + + +/* mk2data - generate a data statement for a two-dimensional array + * + * Generates a data statement initializing the current 2-D array to "value". + */ +void mk2data( value ) +int value; + { + if ( datapos >= NUMDATAITEMS ) + { + outc( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* Indent. */ + out( " " ); + + else + outc( ',' ); + + ++datapos; + + out_dec( "%5d", value ); + } + + +/* mkdata - generate a data statement + * + * Generates a data statement initializing the current array element to + * "value". + */ +void mkdata( value ) +int value; + { + if ( datapos >= NUMDATAITEMS ) + { + outc( ',' ); + dataflush(); + } + + if ( datapos == 0 ) + /* Indent. */ + out( " " ); + else + outc( ',' ); + + ++datapos; + + out_dec( "%5d", value ); + } + + +/* myctoi - return the integer represented by a string of digits */ + +int myctoi( array ) +char array[]; + { + int val = 0; + + (void) sscanf( array, "%d", &val ); + + return val; + } + + +/* myesc - return character corresponding to escape sequence */ + +int myesc( array ) +Char array[]; + { + Char c; + unsigned int esc_char; + + switch ( array[1] ) + { + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + +#if __STDC__ + case 'a': return '\a'; + case 'v': return '\v'; +#else + case 'a': return '\007'; + case 'v': return '\013'; +#endif + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + { /* \<octal> */ + int sptr = 1; + + while ( isascii( array[sptr] ) && + isdigit( array[sptr] ) ) + /* Don't increment inside loop control + * because if isdigit() is a macro it might + * expand into multiple increments ... + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = otoi( array + 1 ); + + array[sptr] = c; + + return esc_char; + } + + case 'x': + { /* \x<hex> */ + int sptr = 2; + + while ( isascii( array[sptr] ) && + isxdigit( (char) array[sptr] ) ) + /* Don't increment inside loop control + * because if isdigit() is a macro it might + * expand into multiple increments ... + */ + ++sptr; + + c = array[sptr]; + array[sptr] = '\0'; + + esc_char = htoi( array + 2 ); + + array[sptr] = c; + + return esc_char; + } + + default: + return array[1]; + } + } + + +/* otoi - convert an octal digit string to an integer value */ + +int otoi( str ) +Char str[]; + { + unsigned int result; + + (void) sscanf( (char *) str, "%o", &result ); + return result; + } + + +/* out - various flavors of outputing a (possibly formatted) string for the + * generated scanner, keeping track of the line count. + */ + +void out( str ) +const char str[]; + { + fputs( str, stdout ); + out_line_count( str ); + } + +void out_dec( fmt, n ) +const char fmt[]; +int n; + { + printf( fmt, n ); + out_line_count( fmt ); + } + +void out_dec2( fmt, n1, n2 ) +const char fmt[]; +int n1, n2; + { + printf( fmt, n1, n2 ); + out_line_count( fmt ); + } + +void out_hex( fmt, x ) +const char fmt[]; +unsigned int x; + { + printf( fmt, x ); + out_line_count( fmt ); + } + +void out_line_count( str ) +const char str[]; + { + register int i; + + for ( i = 0; str[i]; ++i ) + if ( str[i] == '\n' ) + ++out_linenum; + } + +void out_str( fmt, str ) +const char fmt[], str[]; + { + printf( fmt, str ); + out_line_count( fmt ); + out_line_count( str ); + } + +void out_str3( fmt, s1, s2, s3 ) +const char fmt[], s1[], s2[], s3[]; + { + printf( fmt, s1, s2, s3 ); + out_line_count( fmt ); + out_line_count( s1 ); + out_line_count( s2 ); + out_line_count( s3 ); + } + +void out_str_dec( fmt, str, n ) +const char fmt[], str[]; +int n; + { + printf( fmt, str, n ); + out_line_count( fmt ); + out_line_count( str ); + } + +void outc( c ) +int c; + { + putc( c, stdout ); + + if ( c == '\n' ) + ++out_linenum; + } + +void outn( str ) +const char str[]; + { + puts( str ); + out_line_count( str ); + ++out_linenum; + } + + +/* readable_form - return the the human-readable form of a character + * + * The returned string is in static storage. + */ + +char *readable_form( c ) +register int c; + { + static char rform[10]; + + if ( (c >= 0 && c < 32) || c >= 127 ) + { + switch ( c ) + { + case '\b': return "\\b"; + case '\f': return "\\f"; + case '\n': return "\\n"; + case '\r': return "\\r"; + case '\t': return "\\t"; + +#if __STDC__ + case '\a': return "\\a"; + case '\v': return "\\v"; +#endif + + default: + (void) sprintf( rform, "\\%.3o", + (unsigned int) c ); + return rform; + } + } + + else if ( c == ' ' ) + return "' '"; + + else + { + rform[0] = c; + rform[1] = '\0'; + + return rform; + } + } + + +/* reallocate_array - increase the size of a dynamic array */ + +void *reallocate_array( array, size, element_size ) +void *array; +int size; +size_t element_size; + { + register void *new_array; + size_t num_bytes = element_size * size; + + new_array = flex_realloc( array, num_bytes ); + if ( ! new_array ) + flexfatal( _( "attempt to increase array size failed" ) ); + + return new_array; + } + + +/* skelout - write out one section of the skeleton file + * + * Description + * Copies skelfile or skel array to stdout until a line beginning with + * "%%" or EOF is found. + */ +void skelout() + { + char buf_storage[MAXLINE]; + char *buf = buf_storage; + int do_copy = 1; + + /* Loop pulling lines either from the skelfile, if we're using + * one, or from the skel[] array. + */ + while ( skelfile ? + (fgets( buf, MAXLINE, skelfile ) != NULL) : + ((buf = (char *) skel[skel_ind++]) != 0) ) + { /* copy from skel array */ + if ( buf[0] == '%' ) + { /* control line */ + switch ( buf[1] ) + { + case '%': + return; + + case '+': + do_copy = C_plus_plus; + break; + + case '-': + do_copy = ! C_plus_plus; + break; + + case '*': + do_copy = 1; + break; + + default: + flexfatal( + _( "bad line in skeleton file" ) ); + } + } + + else if ( do_copy ) + { + if ( skelfile ) + /* Skeleton file reads include final + * newline, skel[] array does not. + */ + out( buf ); + else + outn( buf ); + } + } + } + + +/* transition_struct_out - output a yy_trans_info structure + * + * outputs the yy_trans_info structure with the two elements, element_v and + * element_n. Formats the output with spaces and carriage returns. + */ + +void transition_struct_out( element_v, element_n ) +int element_v, element_n; + { + out_dec2( " {%4d,%4d },", element_v, element_n ); + + datapos += TRANS_STRUCT_PRINT_LENGTH; + + if ( datapos >= 79 - TRANS_STRUCT_PRINT_LENGTH ) + { + outc( '\n' ); + + if ( ++dataline % 10 == 0 ) + outc( '\n' ); + + datapos = 0; + } + } + + +/* The following is only needed when building flex's parser using certain + * broken versions of bison. + */ +void *yy_flex_xmalloc( size ) +int size; + { + void *result = flex_alloc( (size_t) size ); + + if ( ! result ) + flexfatal( + _( "memory allocation failed in yy_flex_xmalloc()" ) ); + + return result; + } + + +/* zero_out - set a region of memory to 0 + * + * Sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero. + */ + +void zero_out( region_ptr, size_in_bytes ) +char *region_ptr; +size_t size_in_bytes; + { + register char *rp, *rp_end; + + rp = region_ptr; + rp_end = region_ptr + size_in_bytes; + + while ( rp < rp_end ) + *rp++ = 0; + } diff --git a/to.do/unicode/scan.l b/to.do/unicode/scan.l new file mode 100644 index 0000000..0864068 --- /dev/null +++ b/to.do/unicode/scan.l @@ -0,0 +1,710 @@ +/* scan.l - scanner for flex input */ + +%{ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* $Header$ */ + +#include "flexdef.h" +#include "parse.h" + +#define ACTION_ECHO add_action( yytext ) +#define ACTION_IFDEF(def, should_define) \ + { \ + if ( should_define ) \ + action_define( def, 1 ); \ + } + +#define MARK_END_OF_PROLOG mark_prolog(); + +#define YY_DECL \ + int flexscan() + +#define RETURNCHAR \ + yylval = (unsigned char) yytext[0]; \ + return CHAR; + +#define RETURNNAME \ + strcpy( nmstr, yytext ); \ + return NAME; + +#define PUT_BACK_STRING(str, start) \ + for ( i = strlen( str ) - 1; i >= start; --i ) \ + unput((str)[i]) + +#define CHECK_REJECT(str) \ + if ( all_upper( str ) ) \ + reject = true; + +#define CHECK_YYMORE(str) \ + if ( all_lower( str ) ) \ + yymore_used = true; +%} + +%option caseless nodefault outfile="scan.c" stack noyy_top_state +%option nostdinit + +%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE +%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION +%x OPTION LINEDIR + +WS [[:blank:]]+ +OPTWS [[:blank:]]* +NOT_WS [^[:blank:]\n] + +NL \r?\n + +NAME ([[:alpha:]_][[:alnum:]_-]*) +NOT_NAME [^[:alpha:]_*\n]+ + +SCNAME {NAME} + +ESCSEQ (\\([^\n]|[0-7]{1,6}|x[[:xdigit:]]{1,4})) + +FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) +CCL_CHAR ([^\\\n\]]|{ESCSEQ}) +CCL_EXPR ("[:"[[:alpha:]]+":]") + +LEXOPT [aceknopr] + +%% + static int bracelevel, didadef, indented_code; + static int doing_rule_action = false; + static int option_sense; + + int doing_codeblock = false; + int i; + Char nmdef[MAXLINE]; + + +<INITIAL>{ + ^{WS} indented_code = true; BEGIN(CODEBLOCK); + ^"/*" ACTION_ECHO; yy_push_state( COMMENT ); + ^#{OPTWS}line{WS} yy_push_state( LINEDIR ); + ^"%s"{NAME}? return SCDECL; + ^"%x"{NAME}? return XSCDECL; + ^"%{".*{NL} { + ++linenum; + line_directive_out( (FILE *) 0, 1 ); + indented_code = false; + BEGIN(CODEBLOCK); + } + + {WS} /* discard */ + + ^"%%".* { + sectnum = 2; + bracelevel = 0; + mark_defs1(); + line_directive_out( (FILE *) 0, 1 ); + BEGIN(SECT2PROLOG); + return SECTEND; + } + + ^"%pointer".*{NL} yytext_is_array = false; ++linenum; + ^"%array".*{NL} yytext_is_array = true; ++linenum; + + ^"%option" BEGIN(OPTION); return OPTION_OP; + + ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */ + ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */ + + ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) ); + + ^{NAME} { + strcpy( nmstr, yytext ); + didadef = false; + BEGIN(PICKUPDEF); + } + + {SCNAME} RETURNNAME; + ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ + {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ +} + + +<COMMENT>{ + "*/" ACTION_ECHO; yy_pop_state(); + "*" ACTION_ECHO; + [^*\n]+ ACTION_ECHO; + [^*\n]*{NL} ++linenum; ACTION_ECHO; +} + +<LINEDIR>{ + \n yy_pop_state(); + [[:digit:]]+ linenum = myctoi( yytext ); + + \"[^"\n]*\" { + flex_free( (void *) infilename ); + infilename = copy_string( yytext + 1 ); + infilename[strlen( infilename ) - 1] = '\0'; + } + . /* ignore spurious characters */ +} + +<CODEBLOCK>{ + ^"%}".*{NL} ++linenum; BEGIN(INITIAL); + + {NAME}|{NOT_NAME}|. ACTION_ECHO; + + {NL} { + ++linenum; + ACTION_ECHO; + if ( indented_code ) + BEGIN(INITIAL); + } +} + + +<PICKUPDEF>{ + {WS} /* separates name and definition */ + + {NOT_WS}.* { + strcpy( (char *) nmdef, yytext ); + + /* Skip trailing whitespace. */ + for ( i = strlen( (char *) nmdef ) - 1; + i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); + --i ) + ; + + nmdef[i + 1] = '\0'; + + ndinstal( nmstr, nmdef ); + didadef = true; + } + + {NL} { + if ( ! didadef ) + synerr( _( "incomplete name definition" ) ); + BEGIN(INITIAL); + ++linenum; + } +} + + +<OPTION>{ + {NL} ++linenum; BEGIN(INITIAL); + {WS} option_sense = true; + + "=" return '='; + + no option_sense = ! option_sense; + + 7bit csize = option_sense ? 128 : 256; + 8bit csize = option_sense ? 256 : 128; + 16bit csize = option_sense ? 65536 : 256; + + align long_align = option_sense; + always-interactive { + action_define( "YY_ALWAYS_INTERACTIVE", option_sense ); + } + array yytext_is_array = option_sense; + backup backing_up_report = option_sense; + batch interactive = ! option_sense; + "c++" C_plus_plus = option_sense; + caseful|case-sensitive caseins = ! option_sense; + caseless|case-insensitive caseins = option_sense; + debug ddebug = option_sense; + default spprdflt = ! option_sense; + ecs useecs = option_sense; + fast { + useecs = usemecs = false; + use_read = fullspd = true; + } + full { + useecs = usemecs = false; + use_read = fulltbl = true; + } + input ACTION_IFDEF("YY_NO_INPUT", ! option_sense); + interactive interactive = option_sense; + lex-compat lex_compat = option_sense; + main { + action_define( "YY_MAIN", option_sense ); + do_yywrap = ! option_sense; + } + meta-ecs usemecs = option_sense; + never-interactive { + action_define( "YY_NEVER_INTERACTIVE", option_sense ); + } + perf-report performance_report += option_sense ? 1 : -1; + pointer yytext_is_array = ! option_sense; + read use_read = option_sense; + reject reject_really_used = option_sense; + stack action_define( "YY_STACK_USED", option_sense ); + stdinit do_stdinit = option_sense; + stdout use_stdout = option_sense; + unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense); + verbose printstats = option_sense; + warn nowarn = ! option_sense; + yylineno do_yylineno = option_sense; + yymore yymore_really_used = option_sense; + yywrap do_yywrap = option_sense; + + yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense); + yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense); + yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense); + + yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense); + yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense); + yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense); + + outfile return OPT_OUTFILE; + prefix return OPT_PREFIX; + yyclass return OPT_YYCLASS; + + \"[^"\n]*\" { + strcpy( nmstr, yytext + 1 ); + nmstr[strlen( nmstr ) - 1] = '\0'; + return NAME; + } + + (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. { + format_synerr( _( "unrecognized %%option: %s" ), + yytext ); + BEGIN(RECOVER); + } +} + +<RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); + + +<SECT2PROLOG>{ + ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ + ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ + + ^{WS}.* ACTION_ECHO; /* indented code in prolog */ + + ^{NOT_WS}.* { /* non-indented code */ + if ( bracelevel <= 0 ) + { /* not in %{ ... %} */ + yyless( 0 ); /* put it all back */ + yy_set_bol( 1 ); + mark_prolog(); + BEGIN(SECT2); + } + else + ACTION_ECHO; + } + + .* ACTION_ECHO; + {NL} ++linenum; ACTION_ECHO; + + <<EOF>> { + mark_prolog(); + sectnum = 0; + yyterminate(); /* to stop the parser */ + } +} + +<SECT2>{ + ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ + + ^{OPTWS}"%{" { + indented_code = false; + doing_codeblock = true; + bracelevel = 1; + BEGIN(PERCENT_BRACE_ACTION); + } + + ^{OPTWS}"<" BEGIN(SC); return '<'; + ^{OPTWS}"^" return '^'; + \" BEGIN(QUOTE); return '"'; + "{"/[[:digit:]] BEGIN(NUM); return '{'; + "$"/([[:blank:]]|{NL}) return '$'; + + {WS}"%{" { + bracelevel = 1; + BEGIN(PERCENT_BRACE_ACTION); + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } + } + {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; + + ^{WS}"/*" { + yyless( yyleng - 2 ); /* put back '/', '*' */ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + } + + ^{WS} /* allow indented rules */ + + {WS} { + /* This rule is separate from the one below because + * otherwise we get variable trailing context, so + * we can't build the scanner using -{f,F}. + */ + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } + } + + {OPTWS}{NL} { + bracelevel = 0; + continued_action = false; + BEGIN(ACTION); + unput( '\n' ); /* so <ACTION> sees it */ + + if ( in_rule ) + { + doing_rule_action = true; + in_rule = false; + return '\n'; + } + } + + ^{OPTWS}"<<EOF>>" | + "<<EOF>>" return EOF_OP; + + ^"%%".* { + sectnum = 3; + BEGIN(SECT3); + yyterminate(); /* to stop the parser */ + } + + "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* { + int cclval; + + strcpy( nmstr, yytext ); + + /* Check to see if we've already encountered this + * ccl. + */ + if ( (cclval = ccllookup( (Char *) nmstr )) != 0 ) + { + if ( input() != ']' ) + synerr( _( "bad character class" ) ); + + yylval = cclval; + ++cclreuse; + return PREVCCL; + } + else + { + /* We fudge a bit. We know that this ccl will + * soon be numbered as lastccl + 1 by cclinit. + */ + cclinstal( (Char *) nmstr, lastccl + 1 ); + + /* Push back everything but the leading bracket + * so the ccl can be rescanned. + */ + yyless( 1 ); + + BEGIN(FIRSTCCL); + return '['; + } + } + + "{"{NAME}"}" { + register Char *nmdefptr; + + strcpy( nmstr, yytext + 1 ); + nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ + + if ( (nmdefptr = ndlookup( nmstr )) == 0 ) + format_synerr( + _( "undefined definition {%s}" ), + nmstr ); + + else + { /* push back name surrounded by ()'s */ + int len = strlen( (char *) nmdefptr ); + + if ( lex_compat || nmdefptr[0] == '^' || + (len > 0 && nmdefptr[len - 1] == '$') ) + { /* don't use ()'s after all */ + PUT_BACK_STRING((char *) nmdefptr, 0); + + if ( nmdefptr[0] == '^' ) + BEGIN(CARETISBOL); + } + + else + { + unput(')'); + PUT_BACK_STRING((char *) nmdefptr, 0); + unput('('); + } + } + } + + [/|*+?.(){}] return (unsigned char) yytext[0]; + . RETURNCHAR; +} + + +<SC>{ + [,*] return (unsigned char) yytext[0]; + ">" BEGIN(SECT2); return '>'; + ">"/^ BEGIN(CARETISBOL); return '>'; + {SCNAME} RETURNNAME; + . { + format_synerr( _( "bad <start condition>: %s" ), + yytext ); + } +} + +<CARETISBOL>"^" BEGIN(SECT2); return '^'; + + +<QUOTE>{ + [^"\n] RETURNCHAR; + \" BEGIN(SECT2); return '"'; + + {NL} { + synerr( _( "missing quote" ) ); + BEGIN(SECT2); + ++linenum; + return '"'; + } +} + + +<FIRSTCCL>{ + "^"/[^-\]\n] BEGIN(CCL); return '^'; + "^"/("-"|"]") return '^'; + . BEGIN(CCL); RETURNCHAR; +} + +<CCL>{ + -/[^\]\n] return '-'; + [^\]\n] RETURNCHAR; + "]" BEGIN(SECT2); return ']'; + .|{NL} { + synerr( _( "bad character class" ) ); + BEGIN(SECT2); + return ']'; + } +} + +<FIRSTCCL,CCL>{ + "[:alnum:]" BEGIN(CCL); return CCE_ALNUM; + "[:alpha:]" BEGIN(CCL); return CCE_ALPHA; + "[:blank:]" BEGIN(CCL); return CCE_BLANK; + "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL; + "[:digit:]" BEGIN(CCL); return CCE_DIGIT; + "[:graph:]" BEGIN(CCL); return CCE_GRAPH; + "[:lower:]" BEGIN(CCL); return CCE_LOWER; + "[:print:]" BEGIN(CCL); return CCE_PRINT; + "[:punct:]" BEGIN(CCL); return CCE_PUNCT; + "[:space:]" BEGIN(CCL); return CCE_SPACE; + "[:upper:]" BEGIN(CCL); return CCE_UPPER; + "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT; + {CCL_EXPR} { + format_synerr( + _( "bad character class expression: %s" ), + yytext ); + BEGIN(CCL); return CCE_ALNUM; + } +} + +<NUM>{ + [[:digit:]]+ { + yylval = myctoi( yytext ); + return NUMBER; + } + + "," return ','; + "}" BEGIN(SECT2); return '}'; + + . { + synerr( _( "bad character inside {}'s" ) ); + BEGIN(SECT2); + return '}'; + } + + {NL} { + synerr( _( "missing }" ) ); + BEGIN(SECT2); + ++linenum; + return '}'; + } +} + + +<PERCENT_BRACE_ACTION>{ + {OPTWS}"%}".* bracelevel = 0; + + <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT ); + + <CODEBLOCK,ACTION>{ + "reject" { + ACTION_ECHO; + CHECK_REJECT(yytext); + } + "yymore" { + ACTION_ECHO; + CHECK_YYMORE(yytext); + } + } + + {NAME}|{NOT_NAME}|. ACTION_ECHO; + {NL} { + ++linenum; + ACTION_ECHO; + if ( bracelevel == 0 || + (doing_codeblock && indented_code) ) + { + if ( doing_rule_action ) + add_action( "\tYY_BREAK\n" ); + + doing_rule_action = doing_codeblock = false; + BEGIN(SECT2); + } + } +} + + + /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ +<ACTION>{ + "{" ACTION_ECHO; ++bracelevel; + "}" ACTION_ECHO; --bracelevel; + [^[:alpha:]_{}"'/\n]+ ACTION_ECHO; + {NAME} ACTION_ECHO; + "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ + \" ACTION_ECHO; BEGIN(ACTION_STRING); + {NL} { + ++linenum; + ACTION_ECHO; + if ( bracelevel == 0 ) + { + if ( doing_rule_action ) + add_action( "\tYY_BREAK\n" ); + + doing_rule_action = false; + BEGIN(SECT2); + } + } + . ACTION_ECHO; +} + +<ACTION_STRING>{ + [^"\\\n]+ ACTION_ECHO; + \\. ACTION_ECHO; + {NL} ++linenum; ACTION_ECHO; + \" ACTION_ECHO; BEGIN(ACTION); + . ACTION_ECHO; +} + +<COMMENT,ACTION,ACTION_STRING><<EOF>> { + synerr( _( "EOF encountered inside an action" ) ); + yyterminate(); + } + + +<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} { + yylval = myesc( (Char *) yytext ); + + if ( YY_START == FIRSTCCL ) + BEGIN(CCL); + + return CHAR; + } + + +<SECT3>{ + .*(\n?) ECHO; + <<EOF>> sectnum = 0; yyterminate(); +} + +<*>.|\n format_synerr( _( "bad character: %s" ), yytext ); + +%% + + +int yywrap() + { + if ( --num_input_files > 0 ) + { + set_input_file( *++input_files ); + return 0; + } + + else + return 1; + } + + +/* set_input_file - open the given file (if NULL, stdin) for scanning */ + +void set_input_file( file ) +char *file; + { + if ( file && strcmp( file, "-" ) ) + { + infilename = copy_string( file ); + yyin = fopen( infilename, "r" ); + + if ( yyin == NULL ) + lerrsf( _( "can't open %s" ), file ); + } + + else + { + yyin = stdin; + infilename = copy_string( "<stdin>" ); + } + + linenum = 1; + } + + +/* Wrapper routines for accessing the scanner's malloc routines. */ + +void *flex_alloc( size ) +size_t size; + { + return (void *) malloc( size ); + } + +void *flex_realloc( ptr, size ) +void *ptr; +size_t size; + { + return (void *) realloc( ptr, size ); + } + +void flex_free( ptr ) +void *ptr; + { + if ( ptr ) + free( ptr ); + } diff --git a/to.do/unicode/tblcmp.c b/to.do/unicode/tblcmp.c new file mode 100644 index 0000000..c21d9be --- /dev/null +++ b/to.do/unicode/tblcmp.c @@ -0,0 +1,887 @@ +/* tblcmp - table compression routines */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Vern Paxson. + * + * The United States Government has rights in this work pursuant + * to contract no. DE-AC03-76SF00098 between the United States + * Department of Energy and the University of California. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* $Header$ */ + +#include "flexdef.h" + + +/* declarations for functions that have forward references */ + +void mkentry PROTO((register int*, int, int, int, int)); +void mkprot PROTO((int[], int, int)); +void mktemplate PROTO((int[], int, int)); +void mv2front PROTO((int)); +int tbldiff PROTO((int[], int, int[])); + + +/* bldtbl - build table entries for dfa state + * + * synopsis + * int state[numecs], statenum, totaltrans, comstate, comfreq; + * bldtbl( state, statenum, totaltrans, comstate, comfreq ); + * + * State is the statenum'th dfa state. It is indexed by equivalence class and + * gives the number of the state to enter for a given equivalence class. + * totaltrans is the total number of transitions out of the state. Comstate + * is that state which is the destination of the most transitions out of State. + * Comfreq is how many transitions there are out of State to Comstate. + * + * A note on terminology: + * "protos" are transition tables which have a high probability of + * either being redundant (a state processed later will have an identical + * transition table) or nearly redundant (a state processed later will have + * many of the same out-transitions). A "most recently used" queue of + * protos is kept around with the hope that most states will find a proto + * which is similar enough to be usable, and therefore compacting the + * output tables. + * "templates" are a special type of proto. If a transition table is + * homogeneous or nearly homogeneous (all transitions go to the same + * destination) then the odds are good that future states will also go + * to the same destination state on basically the same character set. + * These homogeneous states are so common when dealing with large rule + * sets that they merit special attention. If the transition table were + * simply made into a proto, then (typically) each subsequent, similar + * state will differ from the proto for two out-transitions. One of these + * out-transitions will be that character on which the proto does not go + * to the common destination, and one will be that character on which the + * state does not go to the common destination. Templates, on the other + * hand, go to the common state on EVERY transition character, and therefore + * cost only one difference. + */ + +void bldtbl( state, statenum, totaltrans, comstate, comfreq ) +int state[], statenum, totaltrans, comstate, comfreq; + { + int extptr, extrct[2][CSIZE + 1]; + int mindiff, minprot, i, d; + + /* If extptr is 0 then the first array of extrct holds the result + * of the "best difference" to date, which is those transitions + * which occur in "state" but not in the proto which, to date, + * has the fewest differences between itself and "state". If + * extptr is 1 then the second array of extrct hold the best + * difference. The two arrays are toggled between so that the + * best difference to date can be kept around and also a difference + * just created by checking against a candidate "best" proto. + */ + + extptr = 0; + + /* If the state has too few out-transitions, don't bother trying to + * compact its tables. + */ + + if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) ) + mkentry( state, numecs, statenum, JAMSTATE, totaltrans ); + + else + { + /* "checkcom" is true if we should only check "state" against + * protos which have the same "comstate" value. + */ + int checkcom = + comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE; + + minprot = firstprot; + mindiff = totaltrans; + + if ( checkcom ) + { + /* Find first proto which has the same "comstate". */ + for ( i = firstprot; i != NIL; i = protnext[i] ) + if ( protcomst[i] == comstate ) + { + minprot = i; + mindiff = tbldiff( state, minprot, + extrct[extptr] ); + break; + } + } + + else + { + /* Since we've decided that the most common destination + * out of "state" does not occur with a high enough + * frequency, we set the "comstate" to zero, assuring + * that if this state is entered into the proto list, + * it will not be considered a template. + */ + comstate = 0; + + if ( firstprot != NIL ) + { + minprot = firstprot; + mindiff = tbldiff( state, minprot, + extrct[extptr] ); + } + } + + /* We now have the first interesting proto in "minprot". If + * it matches within the tolerances set for the first proto, + * we don't want to bother scanning the rest of the proto list + * to see if we have any other reasonable matches. + */ + + if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE ) + { + /* Not a good enough match. Scan the rest of the + * protos. + */ + for ( i = minprot; i != NIL; i = protnext[i] ) + { + d = tbldiff( state, i, extrct[1 - extptr] ); + if ( d < mindiff ) + { + extptr = 1 - extptr; + mindiff = d; + minprot = i; + } + } + } + + /* Check if the proto we've decided on as our best bet is close + * enough to the state we want to match to be usable. + */ + + if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE ) + { + /* No good. If the state is homogeneous enough, + * we make a template out of it. Otherwise, we + * make a proto. + */ + + if ( comfreq * 100 >= + totaltrans * TEMPLATE_SAME_PERCENTAGE ) + mktemplate( state, statenum, comstate ); + + else + { + mkprot( state, statenum, comstate ); + mkentry( state, numecs, statenum, + JAMSTATE, totaltrans ); + } + } + + else + { /* use the proto */ + mkentry( extrct[extptr], numecs, statenum, + prottbl[minprot], mindiff ); + + /* If this state was sufficiently different from the + * proto we built it from, make it, too, a proto. + */ + + if ( mindiff * 100 >= + totaltrans * NEW_PROTO_DIFF_PERCENTAGE ) + mkprot( state, statenum, comstate ); + + /* Since mkprot added a new proto to the proto queue, + * it's possible that "minprot" is no longer on the + * proto queue (if it happened to have been the last + * entry, it would have been bumped off). If it's + * not there, then the new proto took its physical + * place (though logically the new proto is at the + * beginning of the queue), so in that case the + * following call will do nothing. + */ + + mv2front( minprot ); + } + } + } + + +/* cmptmps - compress template table entries + * + * Template tables are compressed by using the 'template equivalence + * classes', which are collections of transition character equivalence + * classes which always appear together in templates - really meta-equivalence + * classes. + */ + +void cmptmps() + { + int tmpstorage[CSIZE + 1]; + register int *tmp = tmpstorage, i, j; + int totaltrans, trans; + + peakpairs = numtemps * numecs + tblend; + + if ( usemecs ) + { + /* Create equivalence classes based on data gathered on + * template transitions. + */ + nummecs = cre8ecs( tecfwd, tecbck, numecs ); + } + + else + nummecs = numecs; + + while ( lastdfa + numtemps + 1 >= current_max_dfas ) + increase_max_dfas(); + + /* Loop through each template. */ + + for ( i = 1; i <= numtemps; ++i ) + { + /* Number of non-jam transitions out of this template. */ + totaltrans = 0; + + for ( j = 1; j <= numecs; ++j ) + { + trans = tnxt[numecs * i + j]; + + if ( usemecs ) + { + /* The absolute value of tecbck is the + * meta-equivalence class of a given + * equivalence class, as set up by cre8ecs(). + */ + if ( tecbck[j] > 0 ) + { + tmp[tecbck[j]] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + else + { + tmp[j] = trans; + + if ( trans > 0 ) + ++totaltrans; + } + } + + /* It is assumed (in a rather subtle way) in the skeleton + * that if we're using meta-equivalence classes, the def[] + * entry for all templates is the jam template, i.e., + * templates never default to other non-jam table entries + * (e.g., another template) + */ + + /* Leave room for the jam-state after the last real state. */ + mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans ); + } + } + + + +/* expand_nxt_chk - expand the next check arrays */ + +void expand_nxt_chk() + { + register int old_max = current_max_xpairs; + + current_max_xpairs += MAX_XPAIRS_INCREMENT; + + ++num_reallocs; + + nxt = reallocate_integer_array( nxt, current_max_xpairs ); + chk = reallocate_integer_array( chk, current_max_xpairs ); + + zero_out( (char *) (chk + old_max), + (size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) ); + } + + +/* find_table_space - finds a space in the table for a state to be placed + * + * synopsis + * int *state, numtrans, block_start; + * int find_table_space(); + * + * block_start = find_table_space( state, numtrans ); + * + * State is the state to be added to the full speed transition table. + * Numtrans is the number of out-transitions for the state. + * + * find_table_space() returns the position of the start of the first block (in + * chk) able to accommodate the state + * + * In determining if a state will or will not fit, find_table_space() must take + * into account the fact that an end-of-buffer state will be added at [0], + * and an action number will be added in [-1]. + */ + +int find_table_space( state, numtrans ) +int *state, numtrans; + { + /* Firstfree is the position of the first possible occurrence of two + * consecutive unused records in the chk and nxt arrays. + */ + register int i; + register int *state_ptr, *chk_ptr; + register int *ptr_to_last_entry_in_state; + + /* If there are too many out-transitions, put the state at the end of + * nxt and chk. + */ + if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT ) + { + /* If table is empty, return the first available spot in + * chk/nxt, which should be 1. + */ + if ( tblend < 2 ) + return 1; + + /* Start searching for table space near the end of + * chk/nxt arrays. + */ + i = tblend - numecs; + } + + else + /* Start searching for table space from the beginning + * (skipping only the elements which will definitely not + * hold the new state). + */ + i = firstfree; + + while ( 1 ) /* loops until a space is found */ + { + while ( i + numecs >= current_max_xpairs ) + expand_nxt_chk(); + + /* Loops until space for end-of-buffer and action number + * are found. + */ + while ( 1 ) + { + /* Check for action number space. */ + if ( chk[i - 1] == 0 ) + { + /* Check for end-of-buffer space. */ + if ( chk[i] == 0 ) + break; + + else + /* Since i != 0, there is no use + * checking to see if (++i) - 1 == 0, + * because that's the same as i == 0, + * so we skip a space. + */ + i += 2; + } + + else + ++i; + + while ( i + numecs >= current_max_xpairs ) + expand_nxt_chk(); + } + + /* If we started search from the beginning, store the new + * firstfree for the next call of find_table_space(). + */ + if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT ) + firstfree = i + 1; + + /* Check to see if all elements in chk (and therefore nxt) + * that are needed for the new state have not yet been taken. + */ + + state_ptr = &state[1]; + ptr_to_last_entry_in_state = &chk[i + numecs + 1]; + + for ( chk_ptr = &chk[i + 1]; + chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr ) + if ( *(state_ptr++) != 0 && *chk_ptr != 0 ) + break; + + if ( chk_ptr == ptr_to_last_entry_in_state ) + return i; + + else + ++i; + } + } + + +/* inittbl - initialize transition tables + * + * Initializes "firstfree" to be one beyond the end of the table. Initializes + * all "chk" entries to be zero. + */ +void inittbl() + { + register int i; + + zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) ); + + tblend = 0; + firstfree = tblend + 1; + numtemps = 0; + + if ( usemecs ) + { + /* Set up doubly-linked meta-equivalence classes; these + * are sets of equivalence classes which all have identical + * transitions out of TEMPLATES. + */ + + tecbck[1] = NIL; + + for ( i = 2; i <= numecs; ++i ) + { + tecbck[i] = i - 1; + tecfwd[i - 1] = i; + } + + tecfwd[numecs] = NIL; + } + } + + +/* mkdeftbl - make the default, "jam" table entries */ + +void mkdeftbl() + { + int i; + + jamstate = lastdfa + 1; + + ++tblend; /* room for transition on end-of-buffer character */ + + while ( tblend + numecs >= current_max_xpairs ) + expand_nxt_chk(); + + /* Add in default end-of-buffer transition. */ + nxt[tblend] = end_of_buffer_state; + chk[tblend] = jamstate; + + for ( i = 1; i <= numecs; ++i ) + { + nxt[tblend + i] = 0; + chk[tblend + i] = jamstate; + } + + jambase = tblend; + + base[jamstate] = jambase; + def[jamstate] = 0; + + tblend += numecs; + ++numtemps; + } + + +/* mkentry - create base/def and nxt/chk entries for transition array + * + * synopsis + * int state[numchars + 1], numchars, statenum, deflink, totaltrans; + * mkentry( state, numchars, statenum, deflink, totaltrans ); + * + * "state" is a transition array "numchars" characters in size, "statenum" + * is the offset to be used into the base/def tables, and "deflink" is the + * entry to put in the "def" table entry. If "deflink" is equal to + * "JAMSTATE", then no attempt will be made to fit zero entries of "state" + * (i.e., jam entries) into the table. It is assumed that by linking to + * "JAMSTATE" they will be taken care of. In any case, entries in "state" + * marking transitions to "SAME_TRANS" are treated as though they will be + * taken care of by whereever "deflink" points. "totaltrans" is the total + * number of transitions out of the state. If it is below a certain threshold, + * the tables are searched for an interior spot that will accommodate the + * state array. + */ + +void mkentry( state, numchars, statenum, deflink, totaltrans ) +register int *state; +int numchars, statenum, deflink, totaltrans; + { + register int minec, maxec, i, baseaddr; + int tblbase, tbllast; + + if ( totaltrans == 0 ) + { /* there are no out-transitions */ + if ( deflink == JAMSTATE ) + base[statenum] = JAMSTATE; + else + base[statenum] = 0; + + def[statenum] = deflink; + return; + } + + for ( minec = 1; minec <= numchars; ++minec ) + { + if ( state[minec] != SAME_TRANS ) + if ( state[minec] != 0 || deflink != JAMSTATE ) + break; + } + + if ( totaltrans == 1 ) + { + /* There's only one out-transition. Save it for later to fill + * in holes in the tables. + */ + stack1( statenum, minec, state[minec], deflink ); + return; + } + + for ( maxec = numchars; maxec > 0; --maxec ) + { + if ( state[maxec] != SAME_TRANS ) + if ( state[maxec] != 0 || deflink != JAMSTATE ) + break; + } + + /* Whether we try to fit the state table in the middle of the table + * entries we have already generated, or if we just take the state + * table at the end of the nxt/chk tables, we must make sure that we + * have a valid base address (i.e., non-negative). Note that + * negative base addresses dangerous at run-time (because indexing + * the nxt array with one and a low-valued character will access + * memory before the start of the array. + */ + + /* Find the first transition of state that we need to worry about. */ + if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE ) + { + /* Attempt to squeeze it into the middle of the tables. */ + baseaddr = firstfree; + + while ( baseaddr < minec ) + { + /* Using baseaddr would result in a negative base + * address below; find the next free slot. + */ + for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr ) + ; + } + + while ( baseaddr + maxec - minec + 1 >= current_max_xpairs ) + expand_nxt_chk(); + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS && + (state[i] != 0 || deflink != JAMSTATE) && + chk[baseaddr + i - minec] != 0 ) + { /* baseaddr unsuitable - find another */ + for ( ++baseaddr; + baseaddr < current_max_xpairs && + chk[baseaddr] != 0; ++baseaddr ) + ; + + while ( baseaddr + maxec - minec + 1 >= + current_max_xpairs ) + expand_nxt_chk(); + + /* Reset the loop counter so we'll start all + * over again next time it's incremented. + */ + + i = minec - 1; + } + } + + else + { + /* Ensure that the base address we eventually generate is + * non-negative. + */ + baseaddr = MAX( tblend + 1, minec ); + } + + tblbase = baseaddr - minec; + tbllast = tblbase + maxec; + + while ( tbllast + 1 >= current_max_xpairs ) + expand_nxt_chk(); + + base[statenum] = tblbase; + def[statenum] = deflink; + + for ( i = minec; i <= maxec; ++i ) + if ( state[i] != SAME_TRANS ) + if ( state[i] != 0 || deflink != JAMSTATE ) + { + nxt[tblbase + i] = state[i]; + chk[tblbase + i] = statenum; + } + + if ( baseaddr == firstfree ) + /* Find next free slot in tables. */ + for ( ++firstfree; chk[firstfree] != 0; ++firstfree ) + ; + + tblend = MAX( tblend, tbllast ); + } + + +/* mk1tbl - create table entries for a state (or state fragment) which + * has only one out-transition + */ + +void mk1tbl( state, sym, onenxt, onedef ) +int state, sym, onenxt, onedef; + { + if ( firstfree < sym ) + firstfree = sym; + + while ( chk[firstfree] != 0 ) + if ( ++firstfree >= current_max_xpairs ) + expand_nxt_chk(); + + base[state] = firstfree - sym; + def[state] = onedef; + chk[firstfree] = state; + nxt[firstfree] = onenxt; + + if ( firstfree > tblend ) + { + tblend = firstfree++; + + if ( firstfree >= current_max_xpairs ) + expand_nxt_chk(); + } + } + + +/* mkprot - create new proto entry */ + +void mkprot( state, statenum, comstate ) +int state[], statenum, comstate; + { + int i, slot, tblbase; + + if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE ) + { + /* Gotta make room for the new proto by dropping last entry in + * the queue. + */ + slot = lastprot; + lastprot = protprev[lastprot]; + protnext[lastprot] = NIL; + } + + else + slot = numprots; + + protnext[slot] = firstprot; + + if ( firstprot != NIL ) + protprev[firstprot] = slot; + + firstprot = slot; + prottbl[slot] = statenum; + protcomst[slot] = comstate; + + /* Copy state into save area so it can be compared with rapidly. */ + tblbase = numecs * (slot - 1); + + for ( i = 1; i <= numecs; ++i ) + protsave[tblbase + i] = state[i]; + } + + +/* mktemplate - create a template entry based on a state, and connect the state + * to it + */ + +void mktemplate( state, statenum, comstate ) +int state[], statenum, comstate; + { + int i, numdiff, tmpbase, tmp[CSIZE + 1]; + wchar_t transset[CSIZE + 1]; + int tsptr; + + ++numtemps; + + tsptr = 0; + + /* Calculate where we will temporarily store the transition table + * of the template in the tnxt[] array. The final transition table + * gets created by cmptmps(). + */ + + tmpbase = numtemps * numecs; + + if ( tmpbase + numecs >= current_max_template_xpairs ) + { + current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT; + + ++num_reallocs; + + tnxt = reallocate_integer_array( tnxt, + current_max_template_xpairs ); + } + + for ( i = 1; i <= numecs; ++i ) + if ( state[i] == 0 ) + tnxt[tmpbase + i] = 0; + else + { + transset[tsptr++] = i; + tnxt[tmpbase + i] = comstate; + } + + if ( usemecs ) + mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 ); + + mkprot( tnxt + tmpbase, -numtemps, comstate ); + + /* We rely on the fact that mkprot adds things to the beginning + * of the proto queue. + */ + + numdiff = tbldiff( state, firstprot, tmp ); + mkentry( tmp, numecs, statenum, -numtemps, numdiff ); + } + + +/* mv2front - move proto queue element to front of queue */ + +void mv2front( qelm ) +int qelm; + { + if ( firstprot != qelm ) + { + if ( qelm == lastprot ) + lastprot = protprev[lastprot]; + + protnext[protprev[qelm]] = protnext[qelm]; + + if ( protnext[qelm] != NIL ) + protprev[protnext[qelm]] = protprev[qelm]; + + protprev[qelm] = NIL; + protnext[qelm] = firstprot; + protprev[firstprot] = qelm; + firstprot = qelm; + } + } + + +/* place_state - place a state into full speed transition table + * + * State is the statenum'th state. It is indexed by equivalence class and + * gives the number of the state to enter for a given equivalence class. + * Transnum is the number of out-transitions for the state. + */ + +void place_state( state, statenum, transnum ) +int *state, statenum, transnum; + { + register int i; + register int *state_ptr; + int position = find_table_space( state, transnum ); + + /* "base" is the table of start positions. */ + base[statenum] = position; + + /* Put in action number marker; this non-zero number makes sure that + * find_table_space() knows that this position in chk/nxt is taken + * and should not be used for another accepting number in another + * state. + */ + chk[position - 1] = 1; + + /* Put in end-of-buffer marker; this is for the same purposes as + * above. + */ + chk[position] = 1; + + /* Place the state into chk and nxt. */ + state_ptr = &state[1]; + + for ( i = 1; i <= numecs; ++i, ++state_ptr ) + if ( *state_ptr != 0 ) + { + chk[position + i] = i; + nxt[position + i] = *state_ptr; + } + + if ( position + numecs > tblend ) + tblend = position + numecs; + } + + +/* stack1 - save states with only one out-transition to be processed later + * + * If there's room for another state on the "one-transition" stack, the + * state is pushed onto it, to be processed later by mk1tbl. If there's + * no room, we process the sucker right now. + */ + +void stack1( statenum, sym, nextstate, deflink ) +int statenum, sym, nextstate, deflink; + { + if ( onesp >= ONE_STACK_SIZE - 1 ) + mk1tbl( statenum, sym, nextstate, deflink ); + + else + { + ++onesp; + onestate[onesp] = statenum; + onesym[onesp] = sym; + onenext[onesp] = nextstate; + onedef[onesp] = deflink; + } + } + + +/* tbldiff - compute differences between two state tables + * + * "state" is the state array which is to be extracted from the pr'th + * proto. "pr" is both the number of the proto we are extracting from + * and an index into the save area where we can find the proto's complete + * state table. Each entry in "state" which differs from the corresponding + * entry of "pr" will appear in "ext". + * + * Entries which are the same in both "state" and "pr" will be marked + * as transitions to "SAME_TRANS" in "ext". The total number of differences + * between "state" and "pr" is returned as function value. Note that this + * number is "numecs" minus the number of "SAME_TRANS" entries in "ext". + */ + +int tbldiff( state, pr, ext ) +int state[], pr, ext[]; + { + register int i, *sp = state, *ep = ext, *protp; + register int numdiff = 0; + + protp = &protsave[numecs * (pr - 1)]; + + for ( i = numecs; i > 0; --i ) + { + if ( *++protp == *++sp ) + *++ep = SAME_TRANS; + else + { + *++ep = *sp; + ++numdiff; + } + } + + return numdiff; + } |