summaryrefslogtreecommitdiff
path: root/pcre_exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'pcre_exec.c')
-rw-r--r--pcre_exec.c247
1 files changed, 114 insertions, 133 deletions
diff --git a/pcre_exec.c b/pcre_exec.c
index 0270d60..24b23ca 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -1136,88 +1136,81 @@ for (;;)
printf("\n");
#endif
- if (offset < md->offset_max)
- {
- matched_once = FALSE;
- code_offset = (int)(ecode - md->start_code);
-
- save_offset1 = md->offset_vector[offset];
- save_offset2 = md->offset_vector[offset+1];
- save_offset3 = md->offset_vector[md->offset_end - number];
- save_capture_last = md->capture_last;
+ if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
- DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+ matched_once = FALSE;
+ code_offset = (int)(ecode - md->start_code);
- /* Each time round the loop, save the current subject position for use
- when the group matches. For MATCH_MATCH, the group has matched, so we
- restart it with a new subject starting position, remembering that we had
- at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
- usual. If we haven't matched any alternatives in any iteration, check to
- see if a previous iteration matched. If so, the group has matched;
- continue from afterwards. Otherwise it has failed; restore the previous
- capture values before returning NOMATCH. */
+ save_offset1 = md->offset_vector[offset];
+ save_offset2 = md->offset_vector[offset+1];
+ save_offset3 = md->offset_vector[md->offset_end - number];
+ save_capture_last = md->capture_last;
- for (;;)
- {
- md->offset_vector[md->offset_end - number] =
- (int)(eptr - md->start_subject);
- if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
- eptrb, RM63);
- if (rrc == MATCH_KETRPOS)
- {
- offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
- ecode = md->start_code + code_offset;
- save_capture_last = md->capture_last;
- matched_once = TRUE;
- mstart = md->start_match_ptr; /* In case \K changed it */
- continue;
- }
+ DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
- /* See comment in the code for capturing groups above about handling
- THEN. */
+ /* Each time round the loop, save the current subject position for use
+ when the group matches. For MATCH_MATCH, the group has matched, so we
+ restart it with a new subject starting position, remembering that we had
+ at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
+ usual. If we haven't matched any alternatives in any iteration, check to
+ see if a previous iteration matched. If so, the group has matched;
+ continue from afterwards. Otherwise it has failed; restore the previous
+ capture values before returning NOMATCH. */
- if (rrc == MATCH_THEN)
+ for (;;)
+ {
+ md->offset_vector[md->offset_end - number] =
+ (int)(eptr - md->start_subject);
+ if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
+ RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
+ eptrb, RM63);
+ if (rrc == MATCH_KETRPOS)
+ {
+ offset_top = md->end_offset_top;
+ ecode = md->start_code + code_offset;
+ save_capture_last = md->capture_last;
+ matched_once = TRUE;
+ mstart = md->start_match_ptr; /* In case \K changed it */
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
{
- next = ecode + GET(ecode,1);
- if (md->start_match_ptr < next &&
- (*ecode == OP_ALT || *next == OP_ALT))
- rrc = MATCH_NOMATCH;
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+ break;
}
-
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- md->capture_last = save_capture_last;
- ecode += GET(ecode, 1);
- if (*ecode != OP_ALT) break;
+ eptr = md->end_match_ptr;
+ continue;
}
- if (!matched_once)
- {
- md->offset_vector[offset] = save_offset1;
- md->offset_vector[offset+1] = save_offset2;
- md->offset_vector[md->offset_end - number] = save_offset3;
- }
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
- if (allow_zero || matched_once)
+ if (rrc == MATCH_THEN)
{
- ecode += 1 + LINK_SIZE;
- break;
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
}
- RRETURN(MATCH_NOMATCH);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ md->capture_last = save_capture_last;
+ ecode += GET(ecode, 1);
+ if (*ecode != OP_ALT) break;
}
- /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
- as a non-capturing bracket. */
-
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+ if (!matched_once)
+ {
+ md->offset_vector[offset] = save_offset1;
+ md->offset_vector[offset+1] = save_offset2;
+ md->offset_vector[md->offset_end - number] = save_offset3;
+ }
- DPRINTF(("insufficient capture room: treat as non-capturing\n"));
+ if (allow_zero || matched_once)
+ {
+ ecode += 1 + LINK_SIZE;
+ break;
+ }
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+ RRETURN(MATCH_NOMATCH);
/* Non-capturing possessive bracket with unlimited repeat. We come here
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
@@ -1241,10 +1234,15 @@ for (;;)
if (rrc == MATCH_KETRPOS)
{
offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
ecode = md->start_code + code_offset;
matched_once = TRUE;
mstart = md->start_match_ptr; /* In case \K reset it */
+ if (eptr == md->end_match_ptr) /* Matched an empty string */
+ {
+ do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+ break;
+ }
+ eptr = md->end_match_ptr;
continue;
}
@@ -1378,6 +1376,7 @@ for (;;)
break;
case OP_DEF: /* DEFINE - always false */
+ case OP_FAIL: /* From optimized (?!) condition */
break;
/* The condition is an assertion. Call match() to evaluate it - setting
@@ -1395,10 +1394,10 @@ for (;;)
/* Advance ecode past the assertion to the start of the first branch,
but adjust it so that the general choosing code below works. If the
- assertion has a quantifier that allows zero repeats we must skip over
- the BRAZERO. This is a lunatic thing to do, but somebody did! */
+ assertion has a quantifier that allows zero repeats we must skip over
+ the BRAZERO. This is a lunatic thing to do, but somebody did! */
- if (*ecode == OP_BRAZERO) ecode++;
+ if (*ecode == OP_BRAZERO) ecode++;
ecode += GET(ecode, 1);
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
@@ -1830,7 +1829,11 @@ for (;;)
are defined in a range that can be tested for. */
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
+ {
+ if (new_recursive.offset_save != stacksave)
+ (PUBL(free))(new_recursive.offset_save);
RRETURN(MATCH_NOMATCH);
+ }
/* Any return code other than NOMATCH is an error. */
@@ -1993,6 +1996,19 @@ for (;;)
}
}
+ /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+ and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+ at a time from the outer level, thus saving stack. This must precede the
+ empty string test - in this case that test is done at the outer level. */
+
+ if (*ecode == OP_KETRPOS)
+ {
+ md->start_match_ptr = mstart; /* In case \K reset it */
+ md->end_match_ptr = eptr;
+ md->end_offset_top = offset_top;
+ RRETURN(MATCH_KETRPOS);
+ }
+
/* For an ordinary non-repeating ket, just continue at this level. This
also happens for a repeating ket if no characters were matched in the
group. This is the forcible breaking of infinite loops as implemented in
@@ -2015,18 +2031,6 @@ for (;;)
break;
}
- /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
- and return the MATCH_KETRPOS. This makes it possible to do the repeats one
- at a time from the outer level, thus saving stack. */
-
- if (*ecode == OP_KETRPOS)
- {
- md->start_match_ptr = mstart; /* In case \K reset it */
- md->end_match_ptr = eptr;
- md->end_offset_top = offset_top;
- RRETURN(MATCH_KETRPOS);
- }
-
/* The normal repeating kets try the rest of the pattern or restart from
the preceding bracket, in the appropriate order. In the second case, we can
use tail recursion to avoid using another stack frame, unless we have an
@@ -3479,7 +3483,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
#ifdef SUPPORT_UCP
@@ -3900,7 +3904,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -4035,7 +4039,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -5606,7 +5610,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -5648,12 +5652,17 @@ for (;;)
if (possessive) continue; /* No backtracking */
+ /* We use <= pp rather than == pp to detect the start of the run while
+ backtracking because the use of \C in UTF mode can cause BACKCHAR to
+ move back past pp. This is just palliative; the use of \C in UTF mode
+ is fraught with danger. */
+
for(;;)
{
int lgb, rgb;
PCRE_PUCHAR fptr;
- if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -5671,7 +5680,7 @@ for (;;)
for (;;)
{
- if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
fptr = eptr - 1;
if (!utf) c = *fptr; else
{
@@ -5695,54 +5704,25 @@ for (;;)
switch(ctype)
{
case OP_ANY:
- if (max < INT_MAX)
+ for (i = min; i < max; i++)
{
- for (i = min; i < max; i++)
+ if (eptr >= md->end_subject)
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
- if (md->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= md->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- UCHAR21(eptr) == NLBLOCK->nl[0])
- {
- md->hitend = TRUE;
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
- }
- eptr++;
- ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+ SCHECK_PARTIAL();
+ break;
}
- }
-
- /* Handle unlimited UTF-8 repeat */
-
- else
- {
- for (i = min; i < max; i++)
+ if (IS_NEWLINE(eptr)) break;
+ if (md->partial != 0 && /* Take care with CRLF partial */
+ eptr + 1 >= md->end_subject &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ UCHAR21(eptr) == NLBLOCK->nl[0])
{
- if (eptr >= md->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (IS_NEWLINE(eptr)) break;
- if (md->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= md->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- UCHAR21(eptr) == NLBLOCK->nl[0])
- {
- md->hitend = TRUE;
- if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
- }
- eptr++;
- ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+ md->hitend = TRUE;
+ if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
}
+ eptr++;
+ ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
}
break;
@@ -5950,7 +5930,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -6533,7 +6513,7 @@ tables = re->tables;
if (extra_data != NULL)
{
- register unsigned int flags = extra_data->flags;
+ unsigned long int flags = extra_data->flags;
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
@@ -6705,7 +6685,8 @@ if (md->offset_vector != NULL)
register int *iend = iptr - re->top_bracket;
if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
while (--iptr >= iend) *iptr = -1;
- md->offset_vector[0] = md->offset_vector[1] = -1;
+ if (offsetcount > 0) md->offset_vector[0] = -1;
+ if (offsetcount > 1) md->offset_vector[1] = -1;
}
/* Set up the first character to match, if available. The first_char value is