diff options
Diffstat (limited to 'pcre_compile.c')
-rw-r--r-- | pcre_compile.c | 99 |
1 files changed, 57 insertions, 42 deletions
diff --git a/pcre_compile.c b/pcre_compile.c index c76ca14..08caf8e 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -2367,6 +2367,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); if (c == OP_RECURSE) { const pcre_uchar *scode = cd->start_code + GET(code, 1); + const pcre_uchar *endgroup = scode; BOOL empty_branch; /* Test for forward reference or uncompleted reference. This is disabled @@ -2381,24 +2382,20 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ } - /* If we are scanning a completed pattern, there are no forward references - and all groups are complete. We need to detect whether this is a recursive - call, as otherwise there will be an infinite loop. If it is a recursion, - just skip over it. Simple recursions are easily detected. For mutual - recursions we keep a chain on the stack. */ + /* If the reference is to a completed group, we need to detect whether this + is a recursive call, as otherwise there will be an infinite loop. If it is + a recursion, just skip over it. Simple recursions are easily detected. For + mutual recursions we keep a chain on the stack. */ + do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); + if (code >= scode && code <= endgroup) continue; /* Simple recursion */ else - { + { recurse_check *r = recurses; - const pcre_uchar *endgroup = scode; - - do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); - if (code >= scode && code <= endgroup) continue; /* Simple recursion */ - for (r = recurses; r != NULL; r = r->prev) if (r->group == scode) break; if (r != NULL) continue; /* Mutual recursion */ - } + } /* Completed reference; scan the referenced group, remembering it on the stack chain to detect mutual recursions. */ @@ -3936,14 +3933,14 @@ Arguments: adjust the amount by which the group is to be moved utf TRUE in UTF-8 / UTF-16 / UTF-32 mode cd contains pointers to tables etc. - save_hwm the hwm forward reference pointer at the start of the group + save_hwm_offset the hwm forward reference offset at the start of the group Returns: nothing */ static void adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd, - pcre_uchar *save_hwm) + size_t save_hwm_offset) { pcre_uchar *ptr = group; @@ -3955,7 +3952,8 @@ while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL) /* See if this recursion is on the forward reference list. If so, adjust the reference. */ - for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) + for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; + hc += LINK_SIZE) { offset = (int)GET(hc, 0); if (cd->start_code + offset == ptr + 1) @@ -4400,7 +4398,7 @@ const pcre_uchar *tempptr; const pcre_uchar *nestptr = NULL; pcre_uchar *previous = NULL; pcre_uchar *previous_callout = NULL; -pcre_uchar *save_hwm = NULL; +size_t save_hwm_offset = 0; pcre_uint8 classbits[32]; /* We can fish out the UTF-8 setting once and for all into a BOOL, but we @@ -5912,7 +5910,7 @@ for (;; ptr++) if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ { *code = OP_END; - adjust_recurse(previous, 1, utf, cd, save_hwm); + adjust_recurse(previous, 1, utf, cd, save_hwm_offset); memmove(previous + 1, previous, IN_UCHARS(len)); code++; if (repeat_max == 0) @@ -5936,7 +5934,7 @@ for (;; ptr++) { int offset; *code = OP_END; - adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm); + adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset); memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len)); code += 2 + LINK_SIZE; *previous++ = OP_BRAZERO + repeat_type; @@ -5999,26 +5997,25 @@ for (;; ptr++) for (i = 1; i < repeat_min; i++) { pcre_uchar *hc; - pcre_uchar *this_hwm = cd->hwm; + size_t this_hwm_offset = cd->hwm - cd->start_workspace; memcpy(code, previous, IN_UCHARS(len)); while (cd->hwm > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) + WORK_SIZE_SAFETY_MARGIN - + (this_hwm_offset - save_hwm_offset)) { - int save_offset = save_hwm - cd->start_workspace; - int this_offset = this_hwm - cd->start_workspace; *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; - save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; - this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; } - for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) + for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; + hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; + hc += LINK_SIZE) { PUT(cd->hwm, 0, GET(hc, 0) + len); cd->hwm += LINK_SIZE; } - save_hwm = this_hwm; + save_hwm_offset = this_hwm_offset; code += len; } } @@ -6063,7 +6060,7 @@ for (;; ptr++) else for (i = repeat_max - 1; i >= 0; i--) { pcre_uchar *hc; - pcre_uchar *this_hwm = cd->hwm; + size_t this_hwm_offset = cd->hwm - cd->start_workspace; *code++ = OP_BRAZERO + repeat_type; @@ -6085,22 +6082,21 @@ for (;; ptr++) copying them. */ while (cd->hwm > cd->start_workspace + cd->workspace_size - - WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) + WORK_SIZE_SAFETY_MARGIN - + (this_hwm_offset - save_hwm_offset)) { - int save_offset = save_hwm - cd->start_workspace; - int this_offset = this_hwm - cd->start_workspace; *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; - save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; - this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; } - for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) + for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; + hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset; + hc += LINK_SIZE) { PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); cd->hwm += LINK_SIZE; } - save_hwm = this_hwm; + save_hwm_offset = this_hwm_offset; code += len; } @@ -6196,7 +6192,7 @@ for (;; ptr++) { int nlen = (int)(code - bracode); *code = OP_END; - adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm); + adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset); memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen)); code += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE; @@ -6330,7 +6326,7 @@ for (;; ptr++) else { *code = OP_END; - adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm); + adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset); memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; @@ -6379,7 +6375,7 @@ for (;; ptr++) default: *code = OP_END; - adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm); + adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset); memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; @@ -6411,7 +6407,7 @@ for (;; ptr++) newoptions = options; skipbytes = 0; bravalue = OP_CBRA; - save_hwm = cd->hwm; + save_hwm_offset = cd->hwm - cd->start_workspace; reset_bracount = FALSE; /* First deal with various "verbs" that can be introduced by '*'. */ @@ -7086,14 +7082,26 @@ for (;; ptr++) number. If the name is not found, set the value to 0 for a forward reference. */ + recno = 0; ng = cd->named_groups; for (i = 0; i < cd->names_found; i++, ng++) { if (namelen == ng->length && STRNCMP_UC_UC(name, ng->name, namelen) == 0) - break; + { + open_capitem *oc; + recno = ng->number; + if (is_recurse) break; + for (oc = cd->open_caps; oc != NULL; oc = oc->next) + { + if (oc->number == recno) + { + oc->flag = TRUE; + break; + } + } + } } - recno = (i < cd->names_found)? ng->number : 0; /* Count named back references. */ @@ -7704,7 +7712,7 @@ for (;; ptr++) const pcre_uchar *p; pcre_uint32 cf; - save_hwm = cd->hwm; /* Normally this is set when '(' is read */ + save_hwm_offset = cd->hwm - cd->start_workspace; /* Normally this is set when '(' is read */ terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; @@ -8031,6 +8039,7 @@ int length; unsigned int orig_bracount; unsigned int max_bracount; branch_chain bc; +size_t save_hwm_offset; /* If set, call the external function that checks for stack availability. */ @@ -8048,6 +8057,8 @@ bc.current_branch = code; firstchar = reqchar = 0; firstcharflags = reqcharflags = REQ_UNSET; +save_hwm_offset = cd->hwm - cd->start_workspace; + /* Accumulate the length for use in the pre-compile phase. Start with the length of the BRA and KET and any extra bytes that are required at the beginning. We accumulate in a local variable to save frequent testing of @@ -8241,12 +8252,16 @@ for (;;) /* If it was a capturing subpattern, check to see if it contained any recursive back references. If so, we must wrap it in atomic brackets. - In any event, remove the block from the chain. */ + Because we are moving code along, we must ensure that any pending recursive + references are updated. In any event, remove the block from the chain. */ if (capnumber > 0) { if (cd->open_caps->flag) { + *code = OP_END; + adjust_recurse(start_bracket, 1 + LINK_SIZE, + (options & PCRE_UTF8) != 0, cd, save_hwm_offset); memmove(start_bracket + 1 + LINK_SIZE, start_bracket, IN_UCHARS(code - start_bracket)); *start_bracket = OP_ONCE; |