summaryrefslogtreecommitdiff
path: root/pcre_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'pcre_compile.c')
-rw-r--r--pcre_compile.c99
1 files changed, 57 insertions, 42 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index c76ca14..08caf8e 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -2367,6 +2367,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
if (c == OP_RECURSE)
{
const pcre_uchar *scode = cd->start_code + GET(code, 1);
+ const pcre_uchar *endgroup = scode;
BOOL empty_branch;
/* Test for forward reference or uncompleted reference. This is disabled
@@ -2381,24 +2382,20 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
}
- /* If we are scanning a completed pattern, there are no forward references
- and all groups are complete. We need to detect whether this is a recursive
- call, as otherwise there will be an infinite loop. If it is a recursion,
- just skip over it. Simple recursions are easily detected. For mutual
- recursions we keep a chain on the stack. */
+ /* If the reference is to a completed group, we need to detect whether this
+ is a recursive call, as otherwise there will be an infinite loop. If it is
+ a recursion, just skip over it. Simple recursions are easily detected. For
+ mutual recursions we keep a chain on the stack. */
+ do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
+ if (code >= scode && code <= endgroup) continue; /* Simple recursion */
else
- {
+ {
recurse_check *r = recurses;
- const pcre_uchar *endgroup = scode;
-
- do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
- if (code >= scode && code <= endgroup) continue; /* Simple recursion */
-
for (r = recurses; r != NULL; r = r->prev)
if (r->group == scode) break;
if (r != NULL) continue; /* Mutual recursion */
- }
+ }
/* Completed reference; scan the referenced group, remembering it on the
stack chain to detect mutual recursions. */
@@ -3936,14 +3933,14 @@ Arguments:
adjust the amount by which the group is to be moved
utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
cd contains pointers to tables etc.
- save_hwm the hwm forward reference pointer at the start of the group
+ save_hwm_offset the hwm forward reference offset at the start of the group
Returns: nothing
*/
static void
adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
- pcre_uchar *save_hwm)
+ size_t save_hwm_offset)
{
pcre_uchar *ptr = group;
@@ -3955,7 +3952,8 @@ while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
/* See if this recursion is on the forward reference list. If so, adjust the
reference. */
- for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
+ for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
+ hc += LINK_SIZE)
{
offset = (int)GET(hc, 0);
if (cd->start_code + offset == ptr + 1)
@@ -4400,7 +4398,7 @@ const pcre_uchar *tempptr;
const pcre_uchar *nestptr = NULL;
pcre_uchar *previous = NULL;
pcre_uchar *previous_callout = NULL;
-pcre_uchar *save_hwm = NULL;
+size_t save_hwm_offset = 0;
pcre_uint8 classbits[32];
/* We can fish out the UTF-8 setting once and for all into a BOOL, but we
@@ -5912,7 +5910,7 @@ for (;; ptr++)
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
{
*code = OP_END;
- adjust_recurse(previous, 1, utf, cd, save_hwm);
+ adjust_recurse(previous, 1, utf, cd, save_hwm_offset);
memmove(previous + 1, previous, IN_UCHARS(len));
code++;
if (repeat_max == 0)
@@ -5936,7 +5934,7 @@ for (;; ptr++)
{
int offset;
*code = OP_END;
- adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);
+ adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);
memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type;
@@ -5999,26 +5997,25 @@ for (;; ptr++)
for (i = 1; i < repeat_min; i++)
{
pcre_uchar *hc;
- pcre_uchar *this_hwm = cd->hwm;
+ size_t this_hwm_offset = cd->hwm - cd->start_workspace;
memcpy(code, previous, IN_UCHARS(len));
while (cd->hwm > cd->start_workspace + cd->workspace_size -
- WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+ WORK_SIZE_SAFETY_MARGIN -
+ (this_hwm_offset - save_hwm_offset))
{
- int save_offset = save_hwm - cd->start_workspace;
- int this_offset = this_hwm - cd->start_workspace;
*errorcodeptr = expand_workspace(cd);
if (*errorcodeptr != 0) goto FAILED;
- save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
- this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
}
- for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+ for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;
+ hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
+ hc += LINK_SIZE)
{
PUT(cd->hwm, 0, GET(hc, 0) + len);
cd->hwm += LINK_SIZE;
}
- save_hwm = this_hwm;
+ save_hwm_offset = this_hwm_offset;
code += len;
}
}
@@ -6063,7 +6060,7 @@ for (;; ptr++)
else for (i = repeat_max - 1; i >= 0; i--)
{
pcre_uchar *hc;
- pcre_uchar *this_hwm = cd->hwm;
+ size_t this_hwm_offset = cd->hwm - cd->start_workspace;
*code++ = OP_BRAZERO + repeat_type;
@@ -6085,22 +6082,21 @@ for (;; ptr++)
copying them. */
while (cd->hwm > cd->start_workspace + cd->workspace_size -
- WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
+ WORK_SIZE_SAFETY_MARGIN -
+ (this_hwm_offset - save_hwm_offset))
{
- int save_offset = save_hwm - cd->start_workspace;
- int this_offset = this_hwm - cd->start_workspace;
*errorcodeptr = expand_workspace(cd);
if (*errorcodeptr != 0) goto FAILED;
- save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
- this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
}
- for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
+ for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;
+ hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
+ hc += LINK_SIZE)
{
PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
cd->hwm += LINK_SIZE;
}
- save_hwm = this_hwm;
+ save_hwm_offset = this_hwm_offset;
code += len;
}
@@ -6196,7 +6192,7 @@ for (;; ptr++)
{
int nlen = (int)(code - bracode);
*code = OP_END;
- adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);
+ adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE;
@@ -6330,7 +6326,7 @@ for (;; ptr++)
else
{
*code = OP_END;
- adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+ adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
@@ -6379,7 +6375,7 @@ for (;; ptr++)
default:
*code = OP_END;
- adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+ adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
@@ -6411,7 +6407,7 @@ for (;; ptr++)
newoptions = options;
skipbytes = 0;
bravalue = OP_CBRA;
- save_hwm = cd->hwm;
+ save_hwm_offset = cd->hwm - cd->start_workspace;
reset_bracount = FALSE;
/* First deal with various "verbs" that can be introduced by '*'. */
@@ -7086,14 +7082,26 @@ for (;; ptr++)
number. If the name is not found, set the value to 0 for a forward
reference. */
+ recno = 0;
ng = cd->named_groups;
for (i = 0; i < cd->names_found; i++, ng++)
{
if (namelen == ng->length &&
STRNCMP_UC_UC(name, ng->name, namelen) == 0)
- break;
+ {
+ open_capitem *oc;
+ recno = ng->number;
+ if (is_recurse) break;
+ for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+ {
+ if (oc->number == recno)
+ {
+ oc->flag = TRUE;
+ break;
+ }
+ }
+ }
}
- recno = (i < cd->names_found)? ng->number : 0;
/* Count named back references. */
@@ -7704,7 +7712,7 @@ for (;; ptr++)
const pcre_uchar *p;
pcre_uint32 cf;
- save_hwm = cd->hwm; /* Normally this is set when '(' is read */
+ save_hwm_offset = cd->hwm - cd->start_workspace; /* Normally this is set when '(' is read */
terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
@@ -8031,6 +8039,7 @@ int length;
unsigned int orig_bracount;
unsigned int max_bracount;
branch_chain bc;
+size_t save_hwm_offset;
/* If set, call the external function that checks for stack availability. */
@@ -8048,6 +8057,8 @@ bc.current_branch = code;
firstchar = reqchar = 0;
firstcharflags = reqcharflags = REQ_UNSET;
+save_hwm_offset = cd->hwm - cd->start_workspace;
+
/* Accumulate the length for use in the pre-compile phase. Start with the
length of the BRA and KET and any extra bytes that are required at the
beginning. We accumulate in a local variable to save frequent testing of
@@ -8241,12 +8252,16 @@ for (;;)
/* If it was a capturing subpattern, check to see if it contained any
recursive back references. If so, we must wrap it in atomic brackets.
- In any event, remove the block from the chain. */
+ Because we are moving code along, we must ensure that any pending recursive
+ references are updated. In any event, remove the block from the chain. */
if (capnumber > 0)
{
if (cd->open_caps->flag)
{
+ *code = OP_END;
+ adjust_recurse(start_bracket, 1 + LINK_SIZE,
+ (options & PCRE_UTF8) != 0, cd, save_hwm_offset);
memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
IN_UCHARS(code - start_bracket));
*start_bracket = OP_ONCE;