diff options
Diffstat (limited to 'nfa.c')
-rw-r--r-- | nfa.c | 727 |
1 files changed, 351 insertions, 376 deletions
@@ -42,33 +42,28 @@ void mkxtion PROTO((int, int)); /* add_accept - add an accepting state to a machine * - * synopsis - * - * add_accept( mach, accepting_number ); - * * accepting_number becomes mach's accepting number. */ void add_accept( mach, accepting_number ) int mach, accepting_number; + { + /* Hang the accepting number off an epsilon state. if it is associated + * with a state that has a non-epsilon out-transition, then the state + * will accept BEFORE it makes that transition, i.e., one character + * too soon. + */ - { - /* hang the accepting number off an epsilon state. if it is associated - * with a state that has a non-epsilon out-transition, then the state - * will accept BEFORE it makes that transition, i.e., one character - * too soon - */ - - if ( transchar[finalst[mach]] == SYM_EPSILON ) - accptnum[finalst[mach]] = accepting_number; + if ( transchar[finalst[mach]] == SYM_EPSILON ) + accptnum[finalst[mach]] = accepting_number; - else - { - int astate = mkstate( SYM_EPSILON ); - accptnum[astate] = accepting_number; - mach = link_machines( mach, astate ); + else + { + int astate = mkstate( SYM_EPSILON ); + accptnum[astate] = accepting_number; + mach = link_machines( mach, astate ); + } } - } /* copysingl - make a given number of copies of a singleton machine @@ -84,61 +79,56 @@ int mach, accepting_number; int copysingl( singl, num ) int singl, num; + { + int copy, i; - { - int copy, i; + copy = mkstate( SYM_EPSILON ); - copy = mkstate( SYM_EPSILON ); + for ( i = 1; i <= num; ++i ) + copy = link_machines( copy, dupmachine( singl ) ); - for ( i = 1; i <= num; ++i ) - copy = link_machines( copy, dupmachine( singl ) ); + return copy; + } - return ( copy ); - } - -/* dumpnfa - debugging routine to write out an nfa - * - * synopsis - * int state1; - * dumpnfa( state1 ); - */ +/* dumpnfa - debugging routine to write out an nfa */ void dumpnfa( state1 ) int state1; - { - int sym, tsp1, tsp2, anum, ns; + { + int sym, tsp1, tsp2, anum, ns; - fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n", - state1 ); + fprintf( stderr, + "\n\n********** beginning dump of nfa with start state %d\n", + state1 ); - /* we probably should loop starting at firstst[state1] and going to - * lastst[state1], but they're not maintained properly when we "or" - * all of the rules together. So we use our knowledge that the machine - * starts at state 1 and ends at lastnfa. - */ + /* We probably should loop starting at firstst[state1] and going to + * lastst[state1], but they're not maintained properly when we "or" + * all of the rules together. So we use our knowledge that the machine + * starts at state 1 and ends at lastnfa. + */ - /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ - for ( ns = 1; ns <= lastnfa; ++ns ) - { - fprintf( stderr, "state # %4d\t", ns ); + /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ + for ( ns = 1; ns <= lastnfa; ++ns ) + { + fprintf( stderr, "state # %4d\t", ns ); - sym = transchar[ns]; - tsp1 = trans1[ns]; - tsp2 = trans2[ns]; - anum = accptnum[ns]; + sym = transchar[ns]; + tsp1 = trans1[ns]; + tsp2 = trans2[ns]; + anum = accptnum[ns]; - fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); + fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); - if ( anum != NIL ) - fprintf( stderr, " [%d]", anum ); + if ( anum != NIL ) + fprintf( stderr, " [%d]", anum ); - fprintf( stderr, "\n" ); - } + fprintf( stderr, "\n" ); + } - fprintf( stderr, "********** end of dump\n" ); - } + fprintf( stderr, "********** end of dump\n" ); + } /* dupmachine - make a duplicate of a given machine @@ -160,47 +150,44 @@ int state1; int dupmachine( mach ) int mach; - - { - int i, init, state_offset; - int state = 0; - int last = lastst[mach]; - - for ( i = firstst[mach]; i <= last; ++i ) { - state = mkstate( transchar[i] ); + int i, init, state_offset; + int state = 0; + int last = lastst[mach]; - if ( trans1[i] != NO_TRANSITION ) - { - mkxtion( finalst[state], trans1[i] + state - i ); + for ( i = firstst[mach]; i <= last; ++i ) + { + state = mkstate( transchar[i] ); - if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION ) - mkxtion( finalst[state], trans2[i] + state - i ); - } + if ( trans1[i] != NO_TRANSITION ) + { + mkxtion( finalst[state], trans1[i] + state - i ); - accptnum[state] = accptnum[i]; - } + if ( transchar[i] == SYM_EPSILON && + trans2[i] != NO_TRANSITION ) + mkxtion( finalst[state], + trans2[i] + state - i ); + } - if ( state == 0 ) - flexfatal( "empty machine in dupmachine()" ); + accptnum[state] = accptnum[i]; + } + + if ( state == 0 ) + flexfatal( "empty machine in dupmachine()" ); - state_offset = state - i + 1; + state_offset = state - i + 1; - init = mach + state_offset; - firstst[init] = firstst[mach] + state_offset; - finalst[init] = finalst[mach] + state_offset; - lastst[init] = lastst[mach] + state_offset; + init = mach + state_offset; + firstst[init] = firstst[mach] + state_offset; + finalst[init] = finalst[mach] + state_offset; + lastst[init] = lastst[mach] + state_offset; - return ( init ); - } + return init; + } /* finish_rule - finish up the processing for a rule * - * synopsis - * - * finish_rule( mach, variable_trail_rule, headcnt, trailcnt ); - * * An accepting number is added to the given machine. If variable_trail_rule * is true then the rule has trailing context and both the head and trail * are variable size. Otherwise if headcnt or trailcnt is non-zero then @@ -213,70 +200,74 @@ int mach; void finish_rule( mach, variable_trail_rule, headcnt, trailcnt ) int mach, variable_trail_rule, headcnt, trailcnt; + { + char action_text[MAXLINE]; - { - char action_text[MAXLINE]; - - add_accept( mach, num_rules ); + add_accept( mach, num_rules ); - /* we did this in new_rule(), but it often gets the wrong - * number because we do it before we start parsing the current rule - */ - rule_linenum[num_rules] = linenum; + /* We did this in new_rule(), but it often gets the wrong + * number because we do it before we start parsing the current rule. + */ + rule_linenum[num_rules] = linenum; - /* if this is a continued action, then the line-number has - * already been updated, giving us the wrong number - */ - if ( continued_action ) - --rule_linenum[num_rules]; + /* If this is a continued action, then the line-number has already + * been updated, giving us the wrong number. + */ + if ( continued_action ) + --rule_linenum[num_rules]; - sprintf( action_text, "case %d:\n", num_rules ); - add_action( action_text ); + sprintf( action_text, "case %d:\n", num_rules ); + add_action( action_text ); - if ( variable_trail_rule ) - { - rule_type[num_rules] = RULE_VARIABLE; + if ( variable_trail_rule ) + { + rule_type[num_rules] = RULE_VARIABLE; - if ( performance_report > 0 ) - fprintf( stderr, "Variable trailing context rule at line %d\n", - rule_linenum[num_rules] ); + if ( performance_report > 0 ) + fprintf( stderr, + "Variable trailing context rule at line %d\n", + rule_linenum[num_rules] ); - variable_trailing_context_rules = true; - } + variable_trailing_context_rules = true; + } - else - { - rule_type[num_rules] = RULE_NORMAL; + else + { + rule_type[num_rules] = RULE_NORMAL; - if ( headcnt > 0 || trailcnt > 0 ) - { - /* do trailing context magic to not match the trailing characters */ - char *scanner_cp = "yy_c_buf_p = yy_cp"; - char *scanner_bp = "yy_bp"; + if ( headcnt > 0 || trailcnt > 0 ) + { + /* Do trailing context magic to not match the trailing + * characters. + */ + char *scanner_cp = "yy_c_buf_p = yy_cp"; + char *scanner_bp = "yy_bp"; - add_action( + add_action( "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); - if ( headcnt > 0 ) - { - sprintf( action_text, "%s = %s + %d;\n", - scanner_cp, scanner_bp, headcnt ); - add_action( action_text ); + if ( headcnt > 0 ) + { + sprintf( action_text, "%s = %s + %d;\n", + scanner_cp, scanner_bp, headcnt ); + add_action( action_text ); + } + + else + { + sprintf( action_text, "%s -= %d;\n", + scanner_cp, trailcnt ); + add_action( action_text ); + } + + add_action( + "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); + } } - else - { - sprintf( action_text, "%s -= %d;\n", scanner_cp, trailcnt ); - add_action( action_text ); - } - - add_action( "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); - } + line_directive_out( (FILE *) 0 ); } - line_directive_out( (FILE *) 0 ); - } - /* link_machines - connect two machines together * @@ -296,67 +287,62 @@ int mach, variable_trail_rule, headcnt, trailcnt; int link_machines( first, last ) int first, last; + { + if ( first == NIL ) + return last; - { - if ( first == NIL ) - return ( last ); - - else if ( last == NIL ) - return ( first ); + else if ( last == NIL ) + return first; - else - { - mkxtion( finalst[first], last ); - finalst[first] = finalst[last]; - lastst[first] = max( lastst[first], lastst[last] ); - firstst[first] = min( firstst[first], firstst[last] ); + else + { + mkxtion( finalst[first], last ); + finalst[first] = finalst[last]; + lastst[first] = max( lastst[first], lastst[last] ); + firstst[first] = min( firstst[first], firstst[last] ); - return ( first ); + return first; + } } - } /* mark_beginning_as_normal - mark each "beginning" state in a machine * as being a "normal" (i.e., not trailing context- * associated) states * - * synopsis - * - * mark_beginning_as_normal( mach ) - * - * mach - machine to mark - * * The "beginning" states are the epsilon closure of the first state */ void mark_beginning_as_normal( mach ) register int mach; - - { - switch ( state_type[mach] ) { - case STATE_NORMAL: - /* oh, we've already visited here */ - return; - - case STATE_TRAILING_CONTEXT: - state_type[mach] = STATE_NORMAL; - - if ( transchar[mach] == SYM_EPSILON ) + switch ( state_type[mach] ) { - if ( trans1[mach] != NO_TRANSITION ) - mark_beginning_as_normal( trans1[mach] ); - - if ( trans2[mach] != NO_TRANSITION ) - mark_beginning_as_normal( trans2[mach] ); + case STATE_NORMAL: + /* Oh, we've already visited here. */ + return; + + case STATE_TRAILING_CONTEXT: + state_type[mach] = STATE_NORMAL; + + if ( transchar[mach] == SYM_EPSILON ) + { + if ( trans1[mach] != NO_TRANSITION ) + mark_beginning_as_normal( + trans1[mach] ); + + if ( trans2[mach] != NO_TRANSITION ) + mark_beginning_as_normal( + trans2[mach] ); + } + break; + + default: + flexerror( + "bad state type in mark_beginning_as_normal()" ); + break; } - break; - - default: - flexerror( "bad state type in mark_beginning_as_normal()" ); - break; } - } /* mkbranch - make a machine that branches to two machines @@ -368,30 +354,29 @@ register int mach; * branch - a machine which matches either first's pattern or second's * first, second - machines whose patterns are to be or'ed (the | operator) * - * note that first and second are NEITHER destroyed by the operation. Also, + * Note that first and second are NEITHER destroyed by the operation. Also, * the resulting machine CANNOT be used with any other "mk" operation except * more mkbranch's. Compare with mkor() */ int mkbranch( first, second ) int first, second; + { + int eps; - { - int eps; - - if ( first == NO_TRANSITION ) - return ( second ); + if ( first == NO_TRANSITION ) + return second; - else if ( second == NO_TRANSITION ) - return ( first ); + else if ( second == NO_TRANSITION ) + return first; - eps = mkstate( SYM_EPSILON ); + eps = mkstate( SYM_EPSILON ); - mkxtion( eps, first ); - mkxtion( eps, second ); + mkxtion( eps, first ); + mkxtion( eps, second ); - return ( eps ); - } + return eps; + } /* mkclos - convert a machine into a closure @@ -399,15 +384,14 @@ int first, second; * synopsis * new = mkclos( state ); * - * new - a new state which matches the closure of "state" + * new - a new state which matches the closure of "state" */ int mkclos( state ) int state; - - { - return ( mkopt( mkposcl( state ) ) ); - } + { + return mkopt( mkposcl( state ) ); + } /* mkopt - make a machine optional @@ -426,27 +410,26 @@ int state; int mkopt( mach ) int mach; + { + int eps; - { - int eps; + if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) + { + eps = mkstate( SYM_EPSILON ); + mach = link_machines( mach, eps ); + } - if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) - { + /* Can't skimp on the following if FREE_EPSILON(mach) is true because + * some state interior to "mach" might point back to the beginning + * for a closure. + */ eps = mkstate( SYM_EPSILON ); - mach = link_machines( mach, eps ); - } - - /* can't skimp on the following if FREE_EPSILON(mach) is true because - * some state interior to "mach" might point back to the beginning - * for a closure - */ - eps = mkstate( SYM_EPSILON ); - mach = link_machines( eps, mach ); + mach = link_machines( eps, mach ); - mkxtion( mach, finalst[mach] ); + mkxtion( mach, finalst[mach] ); - return ( mach ); - } + return mach; + } /* mkor - make a machine that matches either one of two machines @@ -465,56 +448,55 @@ int mach; int mkor( first, second ) int first, second; - - { - int eps, orend; - - if ( first == NIL ) - return ( second ); - - else if ( second == NIL ) - return ( first ); - - else { - /* see comment in mkopt() about why we can't use the first state - * of "first" or "second" if they satisfy "FREE_EPSILON" - */ - eps = mkstate( SYM_EPSILON ); + int eps, orend; - first = link_machines( eps, first ); + if ( first == NIL ) + return second; - mkxtion( first, second ); - - if ( SUPER_FREE_EPSILON(finalst[first]) && - accptnum[finalst[first]] == NIL ) - { - orend = finalst[first]; - mkxtion( finalst[second], orend ); - } - - else if ( SUPER_FREE_EPSILON(finalst[second]) && - accptnum[finalst[second]] == NIL ) - { - orend = finalst[second]; - mkxtion( finalst[first], orend ); - } + else if ( second == NIL ) + return first; else - { - eps = mkstate( SYM_EPSILON ); - - first = link_machines( first, eps ); - orend = finalst[first]; + { + /* See comment in mkopt() about why we can't use the first + * state of "first" or "second" if they satisfy "FREE_EPSILON". + */ + eps = mkstate( SYM_EPSILON ); + + first = link_machines( eps, first ); + + mkxtion( first, second ); + + if ( SUPER_FREE_EPSILON(finalst[first]) && + accptnum[finalst[first]] == NIL ) + { + orend = finalst[first]; + mkxtion( finalst[second], orend ); + } + + else if ( SUPER_FREE_EPSILON(finalst[second]) && + accptnum[finalst[second]] == NIL ) + { + orend = finalst[second]; + mkxtion( finalst[first], orend ); + } + + else + { + eps = mkstate( SYM_EPSILON ); + + first = link_machines( first, eps ); + orend = finalst[first]; + + mkxtion( finalst[second], orend ); + } + } - mkxtion( finalst[second], orend ); - } + finalst[first] = orend; + return first; } - finalst[first] = orend; - return ( first ); - } - /* mkposcl - convert a machine into a positive closure * @@ -526,23 +508,22 @@ int first, second; int mkposcl( state ) int state; - - { - int eps; - - if ( SUPER_FREE_EPSILON(finalst[state]) ) { - mkxtion( finalst[state], state ); - return ( state ); - } + int eps; - else - { - eps = mkstate( SYM_EPSILON ); - mkxtion( eps, state ); - return ( link_machines( state, eps ) ); + if ( SUPER_FREE_EPSILON(finalst[state]) ) + { + mkxtion( finalst[state], state ); + return state; + } + + else + { + eps = mkstate( SYM_EPSILON ); + mkxtion( eps, state ); + return link_machines( state, eps ); + } } - } /* mkrep - make a replicated machine @@ -559,35 +540,34 @@ int state; int mkrep( mach, lb, ub ) int mach, lb, ub; + { + int base_mach, tail, copy, i; - { - int base_mach, tail, copy, i; + base_mach = copysingl( mach, lb - 1 ); - base_mach = copysingl( mach, lb - 1 ); + if ( ub == INFINITY ) + { + copy = dupmachine( mach ); + mach = link_machines( mach, + link_machines( base_mach, mkclos( copy ) ) ); + } - if ( ub == INFINITY ) - { - copy = dupmachine( mach ); - mach = link_machines( mach, - link_machines( base_mach, mkclos( copy ) ) ); - } + else + { + tail = mkstate( SYM_EPSILON ); - else - { - tail = mkstate( SYM_EPSILON ); + for ( i = lb; i < ub; ++i ) + { + copy = dupmachine( mach ); + tail = mkopt( link_machines( copy, tail ) ); + } - for ( i = lb; i < ub; ++i ) - { - copy = dupmachine( mach ); - tail = mkopt( link_machines( copy, tail ) ); - } + mach = link_machines( mach, link_machines( base_mach, tail ) ); + } - mach = link_machines( mach, link_machines( base_mach, tail ) ); + return mach; } - return ( mach ); - } - /* mkstate - create a state with a transition on a given symbol * @@ -607,64 +587,68 @@ int mach, lb, ub; int mkstate( sym ) int sym; - - { - if ( ++lastnfa >= current_mns ) { - if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) - lerrif( "input rules are too complicated (>= %d NFA states)", - current_mns ); - - ++num_reallocs; - - firstst = reallocate_integer_array( firstst, current_mns ); - lastst = reallocate_integer_array( lastst, current_mns ); - finalst = reallocate_integer_array( finalst, current_mns ); - transchar = reallocate_integer_array( transchar, current_mns ); - trans1 = reallocate_integer_array( trans1, current_mns ); - trans2 = reallocate_integer_array( trans2, current_mns ); - accptnum = reallocate_integer_array( accptnum, current_mns ); - assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); - state_type = reallocate_integer_array( state_type, current_mns ); - } + if ( ++lastnfa >= current_mns ) + { + if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) + lerrif( + "input rules are too complicated (>= %d NFA states)", + current_mns ); + + ++num_reallocs; + + firstst = reallocate_integer_array( firstst, current_mns ); + lastst = reallocate_integer_array( lastst, current_mns ); + finalst = reallocate_integer_array( finalst, current_mns ); + transchar = reallocate_integer_array( transchar, current_mns ); + trans1 = reallocate_integer_array( trans1, current_mns ); + trans2 = reallocate_integer_array( trans2, current_mns ); + accptnum = reallocate_integer_array( accptnum, current_mns ); + assoc_rule = + reallocate_integer_array( assoc_rule, current_mns ); + state_type = + reallocate_integer_array( state_type, current_mns ); + } - firstst[lastnfa] = lastnfa; - finalst[lastnfa] = lastnfa; - lastst[lastnfa] = lastnfa; - transchar[lastnfa] = sym; - trans1[lastnfa] = NO_TRANSITION; - trans2[lastnfa] = NO_TRANSITION; - accptnum[lastnfa] = NIL; - assoc_rule[lastnfa] = num_rules; - state_type[lastnfa] = current_state_type; - - /* fix up equivalence classes base on this transition. Note that any - * character which has its own transition gets its own equivalence class. - * Thus only characters which are only in character classes have a chance - * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' - * into two different equivalence classes. "[ab]" puts them in the same - * equivalence class (barring other differences elsewhere in the input). - */ - - if ( sym < 0 ) - { - /* we don't have to update the equivalence classes since that was - * already done when the ccl was created for the first time + firstst[lastnfa] = lastnfa; + finalst[lastnfa] = lastnfa; + lastst[lastnfa] = lastnfa; + transchar[lastnfa] = sym; + trans1[lastnfa] = NO_TRANSITION; + trans2[lastnfa] = NO_TRANSITION; + accptnum[lastnfa] = NIL; + assoc_rule[lastnfa] = num_rules; + state_type[lastnfa] = current_state_type; + + /* Fix up equivalence classes base on this transition. Note that any + * character which has its own transition gets its own equivalence + * class. Thus only characters which are only in character classes + * have a chance at being in the same equivalence class. E.g. "a|b" + * puts 'a' and 'b' into two different equivalence classes. "[ab]" + * puts them in the same equivalence class (barring other differences + * elsewhere in the input). */ - } - else if ( sym == SYM_EPSILON ) - ++numeps; + if ( sym < 0 ) + { + /* We don't have to update the equivalence classes since + * that was already done when the ccl was created for the + * first time. + */ + } - else - { - if ( useecs ) - /* map NUL's to csize */ - mkechar( sym ? sym : csize, nextecm, ecgroup ); - } + else if ( sym == SYM_EPSILON ) + ++numeps; - return ( lastnfa ); - } + else + { + if ( useecs ) + /* Map NUL's to csize. */ + mkechar( sym ? sym : csize, nextecm, ecgroup ); + } + + return lastnfa; + } /* mkxtion - make a transition from one state to another @@ -679,49 +663,40 @@ int sym; void mkxtion( statefrom, stateto ) int statefrom, stateto; + { + if ( trans1[statefrom] == NO_TRANSITION ) + trans1[statefrom] = stateto; - { - if ( trans1[statefrom] == NO_TRANSITION ) - trans1[statefrom] = stateto; - - else if ( (transchar[statefrom] != SYM_EPSILON) || - (trans2[statefrom] != NO_TRANSITION) ) - flexfatal( "found too many transitions in mkxtion()" ); + else if ( (transchar[statefrom] != SYM_EPSILON) || + (trans2[statefrom] != NO_TRANSITION) ) + flexfatal( "found too many transitions in mkxtion()" ); - else - { /* second out-transition for an epsilon state */ - ++eps2; - trans2[statefrom] = stateto; + else + { /* second out-transition for an epsilon state */ + ++eps2; + trans2[statefrom] = stateto; + } } - } -/* new_rule - initialize for a new rule - * - * synopsis - * - * new_rule(); - * - * the global num_rules is incremented and the any corresponding dynamic - * arrays (such as rule_type[]) are grown as needed. - */ +/* new_rule - initialize for a new rule */ void new_rule() - - { - if ( ++num_rules >= current_max_rules ) { - ++num_reallocs; - current_max_rules += MAX_RULES_INCREMENT; - rule_type = reallocate_integer_array( rule_type, current_max_rules ); - rule_linenum = - reallocate_integer_array( rule_linenum, current_max_rules ); - rule_useful = - reallocate_integer_array( rule_useful, current_max_rules ); - } + if ( ++num_rules >= current_max_rules ) + { + ++num_reallocs; + current_max_rules += MAX_RULES_INCREMENT; + rule_type = reallocate_integer_array( rule_type, + current_max_rules ); + rule_linenum = reallocate_integer_array( rule_linenum, + current_max_rules ); + rule_useful = reallocate_integer_array( rule_useful, + current_max_rules ); + } - if ( num_rules > MAX_RULE ) - lerrif( "too many rules (> %d)!", MAX_RULE ); + if ( num_rules > MAX_RULE ) + lerrif( "too many rules (> %d)!", MAX_RULE ); - rule_linenum[num_rules] = linenum; - rule_useful[num_rules] = false; - } + rule_linenum[num_rules] = linenum; + rule_useful[num_rules] = false; + } |