summaryrefslogtreecommitdiff
path: root/src/pcre2test.c
diff options
context:
space:
mode:
authorMatthew Vernon <matthew@debian.org>2018-02-24 12:07:04 +0000
committerMatthew Vernon <matthew@debian.org>2018-02-24 12:07:04 +0000
commite98c3314cf9e05aa99f5e192862ec37f29b7dbb5 (patch)
treeb69bb3feb63a4fd79ad8a6e55865228f6fde04eb /src/pcre2test.c
parent92b17f0eb8fddd7117c5344a1e1177daec21995a (diff)
New upstream version 10.31
Diffstat (limited to 'src/pcre2test.c')
-rw-r--r--src/pcre2test.c1929
1 files changed, 1456 insertions, 473 deletions
diff --git a/src/pcre2test.c b/src/pcre2test.c
index a8dffa3..15bf404 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam.
Written by Philip Hazel
Original code Copyright (c) 1997-2012 University of Cambridge
- Rewritten code Copyright (c) 2016 University of Cambridge
+ Rewritten code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -78,6 +78,10 @@ from www.cbttape.org. */
#include <unistd.h>
#endif
+/* Debugging code enabler */
+
+// #define DEBUG_SHOW_MALLOC_ADDRESSES
+
/* Both libreadline and libedit are optionally supported. The user-supplied
original patch uses readline/readline.h for libedit, but in at least one system
it is installed as editline/readline.h, so the configuration code now looks for
@@ -158,6 +162,13 @@ patterns. */
void vms_setsymbol( char *, char *, int );
#endif
+/* VC doesn't support "%td". */
+#ifdef _MSC_VER
+#define PTR_SPEC "%lu"
+#else
+#define PTR_SPEC "%td"
+#endif
+
/* ------------------End of system-specific definitions -------------------- */
/* Glueing macros that are used in several places below. */
@@ -175,15 +186,18 @@ void vms_setsymbol( char *, char *, int );
#endif
#endif
-#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
-#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
-#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
-#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
-#define LOCALESIZE 32 /* Size of locale name */
-#define LOOPREPEAT 500000 /* Default loop count for timing */
-#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
-#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
-#define VERSION_SIZE 64 /* Size of buffer for the version strings */
+#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
+#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
+#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
+#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
+#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
+#define LOCALESIZE 32 /* Size of locale name */
+#define LOOPREPEAT 500000 /* Default loop count for timing */
+#define MALLOCLISTSIZE 20 /* For remembering mallocs */
+#define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
+#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
+#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
+#define VERSION_SIZE 64 /* Size of buffer for the version strings */
/* Make sure the buffer into which replacement strings are copied is big enough
to hold them as 32-bit code units. */
@@ -211,7 +225,7 @@ systems that differ in their output from isprint() even in the "C" locale. */
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
#endif
-#define PRINTOK(c) ((locale_tables != NULL)? isprint(c) : PRINTABLE(c))
+#define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
/* We have to include some of the library source files because we need
to use some of the macros, internal structure definitions, and other internal
@@ -312,7 +326,7 @@ extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
/* If we have 8-bit support, default to it; if there is also 16-or 32-bit
support, it can be selected by a command-line option. If there is no 8-bit
-support, there must be 16- or 32-bit support, so default to one of them. The
+support, there must be 16-bit or 32-bit support, so default to one of them. The
config function, JIT stack, contexts, and version string are the same in all
modes, so use the form of the first that is available. */
@@ -323,8 +337,8 @@ modes, so use the form of the first that is available. */
#define PCRE2_JIT_STACK pcre2_jit_stack_8
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
+#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
-#define VERSION_TYPE PCRE2_UCHAR8
#elif defined SUPPORT_PCRE2_16
#define DEFAULT_TEST_MODE PCRE16_MODE
@@ -333,6 +347,7 @@ modes, so use the form of the first that is available. */
#define PCRE2_JIT_STACK pcre2_jit_stack_16
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
+#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
#elif defined SUPPORT_PCRE2_32
@@ -342,6 +357,7 @@ modes, so use the form of the first that is available. */
#define PCRE2_JIT_STACK pcre2_jit_stack_32
#define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
#define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
+#define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
#define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
#endif
@@ -366,7 +382,7 @@ static cmdstruct cmdlist[] = {
{ "save", CMD_SAVE },
{ "subject", CMD_SUBJECT }};
-#define cmdlistcount sizeof(cmdlist)/sizeof(cmdstruct)
+#define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
/* ------------- Structures and tables for handling modifiers -------------- */
@@ -374,7 +390,24 @@ static cmdstruct cmdlist[] = {
of PCRE2_NEWLINE_xx in pcre2.h. */
static const char *newlines[] = {
- "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF" };
+ "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
+
+/* Structure and table for handling pattern conversion types. */
+
+typedef struct convertstruct {
+ const char *name;
+ uint32_t option;
+} convertstruct;
+
+static convertstruct convertlist[] = {
+ { "glob", PCRE2_CONVERT_GLOB },
+ { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
+ { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
+ { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
+ { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
+ { "unset", CONVERT_UNSET }};
+
+#define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
/* Modifier types and applicability */
@@ -387,6 +420,8 @@ enum { MOD_CTC, /* Applies to a compile context */
MOD_PDP, /* As MOD_PD, OK for Perl test */
MOD_PND, /* As MOD_PD, but not for a default pattern */
MOD_PNDP, /* As MOD_PND, OK for Perl test */
+ MOD_CHR, /* Is a single character */
+ MOD_CON, /* Is a "convert" type/options list */
MOD_CTL, /* Is a control bit */
MOD_BSR, /* Is a BSR value */
MOD_IN2, /* Is one or two unsigned integers */
@@ -415,26 +450,32 @@ so many of them that they are split into two fields. */
#define CTL_DFA 0x00000200u
#define CTL_EXPAND 0x00000400u
#define CTL_FINDLIMITS 0x00000800u
-#define CTL_FULLBINCODE 0x00001000u
-#define CTL_GETALL 0x00002000u
-#define CTL_GLOBAL 0x00004000u
-#define CTL_HEXPAT 0x00008000u
-#define CTL_INFO 0x00010000u
-#define CTL_JITFAST 0x00020000u
-#define CTL_JITVERIFY 0x00040000u
-#define CTL_MARK 0x00080000u
-#define CTL_MEMORY 0x00100000u
-#define CTL_NULLCONTEXT 0x00200000u
-#define CTL_POSIX 0x00400000u
-#define CTL_POSIX_NOSUB 0x00800000u
-#define CTL_PUSH 0x01000000u
-#define CTL_PUSHCOPY 0x02000000u
-#define CTL_STARTCHAR 0x04000000u
-#define CTL_ZERO_TERMINATE 0x08000000u
-/* Spare 0x10000000u */
-/* Spare 0x20000000u */
-#define CTL_NL_SET 0x40000000u /* Informational */
-#define CTL_BSR_SET 0x80000000u /* Informational */
+#define CTL_FRAMESIZE 0x00001000u
+#define CTL_FULLBINCODE 0x00002000u
+#define CTL_GETALL 0x00004000u
+#define CTL_GLOBAL 0x00008000u
+#define CTL_HEXPAT 0x00010000u /* Same word as USE_LENGTH */
+#define CTL_INFO 0x00020000u
+#define CTL_JITFAST 0x00040000u
+#define CTL_JITVERIFY 0x00080000u
+#define CTL_MARK 0x00100000u
+#define CTL_MEMORY 0x00200000u
+#define CTL_NULLCONTEXT 0x00400000u
+#define CTL_POSIX 0x00800000u
+#define CTL_POSIX_NOSUB 0x01000000u
+#define CTL_PUSH 0x02000000u /* These three must be */
+#define CTL_PUSHCOPY 0x04000000u /* all in the same */
+#define CTL_PUSHTABLESCOPY 0x08000000u /* word. */
+#define CTL_STARTCHAR 0x10000000u
+#define CTL_USE_LENGTH 0x20000000u /* Same word as HEXPAT */
+#define CTL_UTF8_INPUT 0x40000000u
+#define CTL_ZERO_TERMINATE 0x80000000u
+
+/* Combinations */
+
+#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
+#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
+#define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
/* Second control word */
@@ -442,15 +483,17 @@ so many of them that they are split into two fields. */
#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
+#define CTL2_SUBJECT_LITERAL 0x00000010u
+#define CTL2_CALLOUT_NO_WHERE 0x00000020u
+#define CTL2_CALLOUT_EXTRA 0x00000040u
-/* Combinations */
-
-#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
-#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
-#define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
+#define CTL2_NL_SET 0x40000000u /* Informational */
+#define CTL2_BSR_SET 0x80000000u /* Informational */
-/* These are all the controls that may be set either on a pattern or on a
-data line. */
+/* These are the matching controls that may be set either on a pattern or on a
+data line. They are copied from the pattern controls as initial settings for
+data line controls Note that CTL_MEMORY is not included here, because it does
+different things in the two cases. */
#define CTL_ALLPD (CTL_AFTERTEXT|\
CTL_ALLAFTERTEXT|\
@@ -459,8 +502,8 @@ data line. */
CTL_ALTGLOBAL|\
CTL_GLOBAL|\
CTL_MARK|\
- CTL_MEMORY|\
- CTL_STARTCHAR)
+ CTL_STARTCHAR|\
+ CTL_UTF8_INPUT)
#define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
@@ -476,10 +519,15 @@ typedef struct patctl { /* Structure for pattern modifiers. */
uint32_t options; /* Must be in same position as datctl */
uint32_t control; /* Must be in same position as datctl */
uint32_t control2; /* Must be in same position as datctl */
+ uint32_t jitstack; /* Must be in same position as datctl */
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
uint32_t jit;
uint32_t stackguard_test;
uint32_t tables_id;
+ uint32_t convert_type;
+ uint32_t convert_length;
+ uint32_t convert_glob_escape;
+ uint32_t convert_glob_separator;
uint32_t regerror_buffsize;
uint8_t locale[LOCALESIZE];
} patctl;
@@ -491,12 +539,14 @@ typedef struct datctl { /* Structure for data line modifiers. */
uint32_t options; /* Must be in same position as patctl */
uint32_t control; /* Must be in same position as patctl */
uint32_t control2; /* Must be in same position as patctl */
+ uint32_t jitstack; /* Must be in same position as patctl */
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
+ uint32_t startend[2];
+ uint32_t cerror[2];
uint32_t cfail[2];
int32_t callout_data;
int32_t copy_numbers[MAXCPYGET];
int32_t get_numbers[MAXCPYGET];
- uint32_t jitstack;
uint32_t oveccount;
uint32_t offset;
uint8_t copy_names[LENCPYGET];
@@ -535,6 +585,7 @@ static modstruct modlist[] = {
{ "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
{ "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
{ "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
+ { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
{ "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
@@ -542,40 +593,56 @@ static modstruct modlist[] = {
{ "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
{ "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
+ { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
{ "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
{ "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
{ "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
+ { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
+ { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
{ "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
+ { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
+ { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
+ { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
+ { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
+ { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
{ "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
+ { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
{ "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
{ "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
{ "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
{ "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
{ "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
{ "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
+ { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
{ "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
{ "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
+ { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
{ "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
{ "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
+ { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
{ "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
+ { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
{ "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
- { "jitstack", MOD_DAT, MOD_INT, 0, DO(jitstack) },
+ { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
{ "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
+ { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
{ "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
{ "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
{ "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
+ { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
{ "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
+ { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
{ "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
{ "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
@@ -603,15 +670,18 @@ static modstruct modlist[] = {
{ "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
{ "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
+ { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
{ "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
- { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
- { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
+ { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
+ { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
+ { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
{ "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
{ "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
{ "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
+ { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
{ "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
{ "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
{ "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
@@ -619,8 +689,10 @@ static modstruct modlist[] = {
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
+ { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
{ "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
+ { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
{ "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
};
@@ -629,11 +701,14 @@ static modstruct modlist[] = {
/* Controls and options that are supported for use with the POSIX interface. */
#define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
- PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
- PCRE2_UNGREEDY)
+ PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
+ PCRE2_UTF|PCRE2_UNGREEDY)
+
+#define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
#define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
- CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)
+ CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
+ CTL_POSIX_NOSUB|CTL_USE_LENGTH)
#define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
@@ -647,9 +722,10 @@ static modstruct modlist[] = {
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
- CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET)
+ CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
+ CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
-#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
+#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
/* Controls that apply only at compile time with 'push'. */
@@ -659,20 +735,24 @@ static modstruct modlist[] = {
/* Controls that are forbidden with #pop or #popcopy. */
#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
- CTL_PUSHCOPY)
+ CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
/* Pattern controls that are mutually exclusive. At present these are all in
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
CTL_POSIX, so it doesn't need its own entries. */
static uint32_t exclusive_pat_controls[] = {
- CTL_POSIX | CTL_HEXPAT,
- CTL_POSIX | CTL_PUSH,
- CTL_POSIX | CTL_PUSHCOPY,
- CTL_EXPAND | CTL_HEXPAT };
+ CTL_POSIX | CTL_PUSH,
+ CTL_POSIX | CTL_PUSHCOPY,
+ CTL_POSIX | CTL_PUSHTABLESCOPY,
+ CTL_PUSH | CTL_PUSHCOPY,
+ CTL_PUSH | CTL_PUSHTABLESCOPY,
+ CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
+ CTL_EXPAND | CTL_HEXPAT };
/* Data controls that are mutually exclusive. At present these are all in the
first control word. */
+
static uint32_t exclusive_dat_controls[] = {
CTL_ALLUSEDTEXT | CTL_STARTCHAR,
CTL_FINDLIMITS | CTL_NULLCONTEXT };
@@ -690,13 +770,14 @@ typedef struct c1modstruct {
} c1modstruct;
static c1modstruct c1modlist[] = {
- { "bincode", 'B', -1 },
- { "info", 'I', -1 },
- { "global", 'g', -1 },
- { "caseless", 'i', -1 },
- { "multiline", 'm', -1 },
- { "dotall", 's', -1 },
- { "extended", 'x', -1 }
+ { "bincode", 'B', -1 },
+ { "info", 'I', -1 },
+ { "global", 'g', -1 },
+ { "caseless", 'i', -1 },
+ { "multiline", 'm', -1 },
+ { "no_auto_capture", 'n', -1 },
+ { "dotall", 's', -1 },
+ { "extended", 'x', -1 }
};
#define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
@@ -817,12 +898,17 @@ static datctl dat_datctl;
static void *patstack[PATSTACKSIZE];
static int patstacknext = 0;
+static void *malloclist[MALLOCLISTSIZE];
+static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
+static uint32_t malloclistptr = 0;
+
#ifdef SUPPORT_PCRE2_8
-static regex_t preg = { NULL, NULL, 0, 0, 0 };
+static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
#endif
static int *dfa_workspace = NULL;
static const uint8_t *locale_tables = NULL;
+static const uint8_t *use_tables = NULL;
static uint8_t locale_name[32];
/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
@@ -848,6 +934,7 @@ static uint8_t *dbuffer = NULL;
static pcre2_code_8 *compiled_code8;
static pcre2_general_context_8 *general_context8, *general_context_copy8;
static pcre2_compile_context_8 *pat_context8, *default_pat_context8;
+static pcre2_convert_context_8 *con_context8, *default_con_context8;
static pcre2_match_context_8 *dat_context8, *default_dat_context8;
static pcre2_match_data_8 *match_data8;
#endif
@@ -856,6 +943,7 @@ static pcre2_match_data_8 *match_data8;
static pcre2_code_16 *compiled_code16;
static pcre2_general_context_16 *general_context16, *general_context_copy16;
static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
+static pcre2_convert_context_16 *con_context16, *default_con_context16;
static pcre2_match_context_16 *dat_context16, *default_dat_context16;
static pcre2_match_data_16 *match_data16;
static PCRE2_SIZE pbuffer16_size = 0; /* Set only when needed */
@@ -866,6 +954,7 @@ static uint16_t *pbuffer16 = NULL;
static pcre2_code_32 *compiled_code32;
static pcre2_general_context_32 *general_context32, *general_context_copy32;
static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
+static pcre2_convert_context_32 *con_context32, *default_con_context32;
static pcre2_match_context_32 *dat_context32, *default_dat_context32;
static pcre2_match_data_32 *match_data32;
static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */
@@ -906,6 +995,21 @@ are supported. */
(test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
(uint32_t)(((PCRE2_SPTR32)(a))[b]))
+#define CONCTXCPY(a,b) \
+ if (test_mode == PCRE8_MODE) \
+ memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
+ else if (test_mode == PCRE16_MODE) \
+ memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
+ else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
+
+#define CONVERT_COPY(a,b,c) \
+ if (test_mode == PCRE8_MODE) \
+ memcpy(G(a,8),(char *)b,c); \
+ else if (test_mode == PCRE16_MODE) \
+ memcpy(G(a,16),(char *)b,(c)*2); \
+ else if (test_mode == PCRE32_MODE) \
+ memcpy(G(a,32),(char *)b,(c)*4)
+
#define DATCTXCPY(a,b) \
if (test_mode == PCRE8_MODE) \
memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
@@ -966,6 +1070,14 @@ are supported. */
else \
a = (void *)pcre2_code_copy_32(G(b,32))
+#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
+ if (test_mode == PCRE8_MODE) \
+ a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
+ else if (test_mode == PCRE16_MODE) \
+ a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
+ else \
+ a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
+
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
if (test_mode == PCRE8_MODE) \
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
@@ -974,6 +1086,11 @@ are supported. */
else \
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
+#define PCRE2_CONVERTED_PATTERN_FREE(a) \
+ if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
+ else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
+ else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
+
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
if (test_mode == PCRE8_MODE) \
a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
@@ -986,9 +1103,9 @@ are supported. */
if (test_mode == PCRE8_MODE) \
r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
else if (test_mode == PCRE16_MODE) \
- r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size)); \
+ r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
else \
- r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
+ r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
#define PCRE2_GET_OVECTOR_COUNT(a,b) \
if (test_mode == PCRE8_MODE) \
@@ -1085,6 +1202,14 @@ are supported. */
else \
pcre2_match_data_free_32(G(a,32))
+#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
+ if (test_mode == PCRE8_MODE) \
+ a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
+ else if (test_mode == PCRE16_MODE) \
+ a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
+ else \
+ a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
+
#define PCRE2_PATTERN_INFO(a,b,c,d) \
if (test_mode == PCRE8_MODE) \
a = pcre2_pattern_info_8(G(b,8),c,d); \
@@ -1157,6 +1282,38 @@ are supported. */
else \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
+#define PCRE2_SET_DEPTH_LIMIT(a,b) \
+ if (test_mode == PCRE8_MODE) \
+ pcre2_set_depth_limit_8(G(a,8),b); \
+ else if (test_mode == PCRE16_MODE) \
+ pcre2_set_depth_limit_16(G(a,16),b); \
+ else \
+ pcre2_set_depth_limit_32(G(a,32),b)
+
+#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
+ if (test_mode == PCRE8_MODE) \
+ r = pcre2_set_glob_separator_8(G(a,8),b); \
+ else if (test_mode == PCRE16_MODE) \
+ r = pcre2_set_glob_separator_16(G(a,16),b); \
+ else \
+ r = pcre2_set_glob_separator_32(G(a,32),b)
+
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
+ if (test_mode == PCRE8_MODE) \
+ r = pcre2_set_glob_escape_8(G(a,8),b); \
+ else if (test_mode == PCRE16_MODE) \
+ r = pcre2_set_glob_escape_16(G(a,16),b); \
+ else \
+ r = pcre2_set_glob_escape_32(G(a,32),b)
+
+#define PCRE2_SET_HEAP_LIMIT(a,b) \
+ if (test_mode == PCRE8_MODE) \
+ pcre2_set_heap_limit_8(G(a,8),b); \
+ else if (test_mode == PCRE16_MODE) \
+ pcre2_set_heap_limit_16(G(a,16),b); \
+ else \
+ pcre2_set_heap_limit_32(G(a,32),b)
+
#define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_match_limit_8(G(a,8),b); \
@@ -1189,14 +1346,6 @@ are supported. */
else \
pcre2_set_parens_nest_limit_32(G(a,32),b)
-#define PCRE2_SET_RECURSION_LIMIT(a,b) \
- if (test_mode == PCRE8_MODE) \
- pcre2_set_recursion_limit_8(G(a,8),b); \
- else if (test_mode == PCRE16_MODE) \
- pcre2_set_recursion_limit_16(G(a,16),b); \
- else \
- pcre2_set_recursion_limit_32(G(a,32),b)
-
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
if (test_mode == PCRE8_MODE) \
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
@@ -1339,7 +1488,6 @@ are supported. */
(test_mode == PCRE32_MODE && G(x,32)->f r (y)))
-
/* ----- Two out of three modes are supported ----- */
#else
@@ -1369,6 +1517,9 @@ the three different cases. */
/* ----- Common macros for two-mode cases ----- */
+#define BYTEONE (BITONE/8)
+#define BYTETWO (BITTWO/8)
+
#define CASTFLD(t,a,b) \
((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
(t)(G(a,BITTWO)->b))
@@ -1382,6 +1533,17 @@ the three different cases. */
(uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
(uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
+#define CONCTXCPY(a,b) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
+ else \
+ memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
+
+#define CONVERT_COPY(a,b,c) \
+ (test_mode == G(G(PCRE,BITONE),_MODE))? \
+ memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
+ memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
+
#define DATCTXCPY(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
@@ -1429,12 +1591,24 @@ the three different cases. */
else \
a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
+#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
+ else \
+ a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
+
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
else \
G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
+#define PCRE2_CONVERTED_PATTERN_FREE(a) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
+ else \
+ G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
+
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
@@ -1445,9 +1619,9 @@ the three different cases. */
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
- r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size)); \
+ r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
else \
- r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size))
+ r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
#define PCRE2_GET_OVECTOR_COUNT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
@@ -1531,6 +1705,12 @@ the three different cases. */
else \
G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
+#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
+ else \
+ a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
+
#define PCRE2_PATTERN_INFO(a,b,c,d) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
@@ -1587,6 +1767,30 @@ the three different cases. */
else \
G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
+#define PCRE2_SET_DEPTH_LIMIT(a,b) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
+ else \
+ G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
+
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
+ else \
+ r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
+
+#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
+ else \
+ r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
+
+#define PCRE2_SET_HEAP_LIMIT(a,b) \
+ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
+ G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
+ else \
+ G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
+
#define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
@@ -1611,12 +1815,6 @@ the three different cases. */
else \
G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
-#define PCRE2_SET_RECURSION_LIMIT(a,b) \
- if (test_mode == G(G(PCRE,BITONE),_MODE)) \
- G(pcre2_set_recursion_limit_,BITONE)(G(a,BITONE),b); \
- else \
- G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b)
-
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
@@ -1754,6 +1952,8 @@ the three different cases. */
#define CASTFLD(t,a,b) (t)(G(a,8)->b)
#define CASTVAR(t,x) (t)G(x,8)
#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
+#define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
+#define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
#define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
#define FLD(a,b) G(a,8)->b
#define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
@@ -1766,8 +1966,11 @@ the three different cases. */
(int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
+#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
+#define PCRE2_CONVERTED_PATTERN_FREE(a) \
+ pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
@@ -1790,6 +1993,7 @@ the three different cases. */
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
+#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
@@ -1804,11 +2008,14 @@ the three different cases. */
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
+#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
+#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
+#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
-#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
@@ -1849,6 +2056,8 @@ the three different cases. */
#define CASTFLD(t,a,b) (t)(G(a,16)->b)
#define CASTVAR(t,x) (t)G(x,16)
#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
+#define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
+#define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
#define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
#define FLD(a,b) G(a,16)->b
#define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
@@ -1861,12 +2070,15 @@ the three different cases. */
(int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
+#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
+#define PCRE2_CONVERTED_PATTERN_FREE(a) \
+ pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
- r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size))
+ r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
@@ -1885,6 +2097,7 @@ the three different cases. */
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
+#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
#define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
@@ -1899,11 +2112,14 @@ the three different cases. */
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
+#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
+#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
+#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
-#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
@@ -1944,6 +2160,8 @@ the three different cases. */
#define CASTFLD(t,a,b) (t)(G(a,32)->b)
#define CASTVAR(t,x) (t)G(x,32)
#define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
+#define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
+#define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
#define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
#define FLD(a,b) G(a,32)->b
#define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
@@ -1956,12 +2174,15 @@ the three different cases. */
(int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
+#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
+#define PCRE2_CONVERTED_PATTERN_FREE(a) \
+ pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
#define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
- r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size))
+ r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
#define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
#define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
@@ -1980,6 +2201,7 @@ the three different cases. */
#define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
#define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
+#define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
#define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
#define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
#define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
@@ -1994,11 +2216,14 @@ the three different cases. */
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
+#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
+#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
+#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
+#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
-#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
@@ -2365,50 +2590,96 @@ static const uint8_t tables2[] = {
};
-
+#ifndef HAVE_STRERROR
/*************************************************
-* Local memory functions *
+* Provide strerror() for non-ANSI libraries *
*************************************************/
-/* Alternative memory functions, to test functionality. */
+/* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
+libraries. They may no longer be around, but just in case, we can try to
+provide the same facility by this simple alternative function. */
-static void *my_malloc(size_t size, void *data)
-{
-void *block = malloc(size);
-(void)data;
-if (show_memory)
- fprintf(outfile, "malloc %3d %p\n", (int)size, block);
-return block;
-}
+extern int sys_nerr;
+extern char *sys_errlist[];
-static void my_free(void *block, void *data)
+char *
+strerror(int n)
{
-(void)data;
-if (show_memory)
- fprintf(outfile, "free %p\n", block);
-free(block);
+if (n < 0 || n >= sys_nerr) return "unknown error number";
+return sys_errlist[n];
}
+#endif /* HAVE_STRERROR */
+
+
+
+/*************************************************
+* Local memory functions *
+*************************************************/
-/* For recursion malloc/free, to test stacking calls */
+/* Alternative memory functions, to test functionality. */
-#ifdef HEAP_MATCH_RECURSE
-static void *my_stack_malloc(size_t size, void *data)
+static void *my_malloc(PCRE2_SIZE size, void *data)
{
void *block = malloc(size);
(void)data;
if (show_memory)
- fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
+ {
+ if (block == NULL)
+ {
+ fprintf(outfile, "** malloc() failed for %zd\n", size);
+ }
+ else
+ {
+ fprintf(outfile, "malloc %5zd", size);
+#ifdef DEBUG_SHOW_MALLOC_ADDRESSES
+ fprintf(outfile, " %p", block); /* Not portable */
+#endif
+ if (malloclistptr < MALLOCLISTSIZE)
+ {
+ malloclist[malloclistptr] = block;
+ malloclistlength[malloclistptr++] = size;
+ }
+ else
+ fprintf(outfile, " (not remembered)");
+ fprintf(outfile, "\n");
+ }
+ }
return block;
}
-static void my_stack_free(void *block, void *data)
+static void my_free(void *block, void *data)
{
(void)data;
if (show_memory)
- fprintf(outfile, "stack_free %p\n", block);
+ {
+ uint32_t i, j;
+ BOOL found = FALSE;
+
+ fprintf(outfile, "free");
+ for (i = 0; i < malloclistptr; i++)
+ {
+ if (block == malloclist[i])
+ {
+ fprintf(outfile, " %5zd", malloclistlength[i]);
+ malloclistptr--;
+ for (j = i; j < malloclistptr; j++)
+ {
+ malloclist[j] = malloclist[j+1];
+ malloclistlength[j] = malloclistlength[j+1];
+ }
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) fprintf(outfile, " unremembered block");
+#ifdef DEBUG_SHOW_MALLOC_ADDRESSES
+ fprintf(outfile, " %p", block); /* Not portable */
+#endif
+ fprintf(outfile, "\n");
+ }
free(block);
}
-#endif /* HEAP_MATCH_RECURSE */
+
/*************************************************
@@ -2525,6 +2796,8 @@ static int
pchar(uint32_t c, BOOL utf, FILE *f)
{
int n = 0;
+char tempbuffer[16];
+
if (PRINTOK(c))
{
if (f != NULL) fprintf(f, "%c", c);
@@ -2546,6 +2819,8 @@ if (c < 0x100)
}
if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
+ else n = sprintf(tempbuffer, "\\x{%02x}", c);
+
return n >= 0 ? n : 0;
}
@@ -2657,13 +2932,14 @@ return yield;
*************************************************/
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
-For printing *MARK strings, a negative length is given.If handed a NULL file,
+For printing *MARK strings, a negative length is given. If handed a NULL file,
just counts chars without printing. */
static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
{
int yield = 0;
(void)(utf); /* Avoid compiler warning */
+
if (length < 0) length = p[-1];
while (length-- > 0)
{
@@ -2695,7 +2971,7 @@ Returns: number of characters placed in the buffer
static int
ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
{
-register int i, j;
+int i, j;
if (cvalue > 0x7fffffffu)
return -1;
for (i = 0; i < utf8_table1_size; i++)
@@ -2715,16 +2991,22 @@ return i + 1;
#ifdef SUPPORT_PCRE2_16
/*************************************************
-* Convert pattern to 16-bit *
+* Convert string to 16-bit *
*************************************************/
-/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
-all the input bytes are ASCII, the space needed for a 16-bit string is exactly
-double the 8-bit size. Otherwise, the size needed for a 16-bit string is no
-more than double, because up to 0xffff uses no more than 3 bytes in UTF-8 but
-possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in
-UTF-16. The result is always left in pbuffer16. Impose a minimum size to save
-repeated re-sizing.
+/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
+the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
+code values from 0 to 0x7fffffff. However, values greater than the later UTF
+limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
+UTF-8 if the utf8_input modifier is set, but an error is generated for values
+greater than 0xffff.
+
+If all the input bytes are ASCII, the space needed for a 16-bit string is
+exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
+is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
+but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
+in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
+save repeated re-sizing.
Note that this function does not object to surrogate values. This is
deliberate; it makes it possible to construct UTF-16 strings that are invalid,
@@ -2732,7 +3014,7 @@ for the purpose of testing that they are correctly faulted.
Arguments:
p points to a byte string
- utf non-zero if converting to UTF-16
+ utf true in UTF mode
lenptr points to number of bytes in the string (excluding trailing zero)
Returns: 0 on success, with the length updated to the number of 16-bit
@@ -2752,7 +3034,7 @@ if (pbuffer16_size < 2*len + 2)
{
if (pbuffer16 != NULL) free(pbuffer16);
pbuffer16_size = 2*len + 2;
- if (pbuffer16_size < 256) pbuffer16_size = 256;
+ if (pbuffer16_size < 4096) pbuffer16_size = 4096;
pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
if (pbuffer16 == NULL)
{
@@ -2763,7 +3045,7 @@ if (pbuffer16_size < 2*len + 2)
}
pp = pbuffer16;
-if (!utf)
+if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
{
for (; len > 0; len--) *pp++ = *p++;
}
@@ -2772,12 +3054,12 @@ else while (len > 0)
uint32_t c;
int chlen = utf82ord(p, &c);
if (chlen <= 0) return -1;
+ if (!utf && c > 0xffff) return -3;
if (c > 0x10ffff) return -2;
p += chlen;
len -= chlen;
if (c < 0x10000) *pp++ = c; else
{
- if (!utf) return -3;
c -= 0x10000;
*pp++ = 0xD800 | (c >> 10);
*pp++ = 0xDC00 | (c & 0x3ff);
@@ -2794,15 +3076,25 @@ return 0;
#ifdef SUPPORT_PCRE2_32
/*************************************************
-* Convert pattern to 32-bit *
+* Convert string to 32-bit *
*************************************************/
-/* In UTF mode the input is always interpreted as a string of UTF-8 bytes. If
-all the input bytes are ASCII, the space needed for a 32-bit string is exactly
-four times the 8-bit size. Otherwise, the size needed for a 32-bit string is no
-more than four times, because the number of characters must be less than the
-number of bytes. The result is always left in pbuffer32. Impose a minimum size
-to save repeated re-sizing.
+/* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
+the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
+code values from 0 to 0x7fffffff. However, values greater than the later UTF
+limit of 0x10ffff cause an error.
+
+In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
+is set, and no limit is imposed. There is special interpretation of the 0xff
+byte (which is illegal in UTF-8) in this case: it causes the top bit of the
+next character to be set. This provides a way of generating 32-bit characters
+greater than 0x7fffffff.
+
+If all the input bytes are ASCII, the space needed for a 32-bit string is
+exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
+string is no more than four times, because the number of characters must be
+less than the number of bytes. The result is always left in pbuffer32. Impose a
+minimum size to save repeated re-sizing.
Note that this function does not object to surrogate values. This is
deliberate; it makes it possible to construct UTF-32 strings that are invalid,
@@ -2810,7 +3102,7 @@ for the purpose of testing that they are correctly faulted.
Arguments:
p points to a byte string
- utf true if UTF-8 (to be converted to UTF-32)
+ utf true in UTF mode
lenptr points to number of bytes in the string (excluding trailing zero)
Returns: 0 on success, with the length updated to the number of 32-bit
@@ -2829,7 +3121,7 @@ if (pbuffer32_size < 4*len + 4)
{
if (pbuffer32 != NULL) free(pbuffer32);
pbuffer32_size = 4*len + 4;
- if (pbuffer32_size < 256) pbuffer32_size = 256;
+ if (pbuffer32_size < 8192) pbuffer32_size = 8192;
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
if (pbuffer32 == NULL)
{
@@ -2840,19 +3132,29 @@ if (pbuffer32_size < 4*len + 4)
}
pp = pbuffer32;
-if (!utf)
+
+if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
{
for (; len > 0; len--) *pp++ = *p++;
}
+
else while (len > 0)
{
+ int chlen;
uint32_t c;
- int chlen = utf82ord(p, &c);
+ uint32_t topbit = 0;
+ if (!utf && *p == 0xff && len > 1)
+ {
+ topbit = 0x80000000u;
+ p++;
+ len--;
+ }
+ chlen = utf82ord(p, &c);
if (chlen <= 0) return -1;
if (utf && c > 0x10ffff) return -2;
p += chlen;
len -= chlen;
- *pp++ = c;
+ *pp++ = c | topbit;
}
*pp = 0;
@@ -3072,7 +3374,7 @@ strncmpic(const uint8_t *s, const uint8_t *t, int n)
while (n--)
{
int c = tolower(*s++) - tolower(*t++);
- if (c) return c;
+ if (c != 0) return c;
}
return 0;
}
@@ -3183,7 +3485,8 @@ switch (m->which)
case MOD_PND: /* Ditto, but not default pattern */
case MOD_PNDP: /* Ditto, allowed for Perl test */
if (dctl != NULL) field = dctl;
- else if (pctl != NULL && (m->which == MOD_PD || ctx != CTX_DEFPAT))
+ else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
+ ctx != CTX_DEFPAT))
field = pctl;
break;
}
@@ -3318,7 +3621,17 @@ for (;;)
field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
if (field == NULL) return FALSE;
- *((uint32_t *)field) |= modlist[index].value;
+
+ /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
+ PCRE2_EXTENDED_MORE. */
+
+ if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
+ {
+ *((uint32_t *)field) &= ~PCRE2_EXTENDED;
+ *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
+ }
+ else
+ *((uint32_t *)field) |= modlist[index].value;
}
continue; /* With tne next (fullname) modifier */
@@ -3377,8 +3690,8 @@ for (;;)
#else
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
#endif
- if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET;
- else dctl->control &= ~CTL_BSR_SET;
+ if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
+ else dctl->control2 &= ~CTL2_BSR_SET;
}
else
{
@@ -3387,12 +3700,38 @@ for (;;)
else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
else goto INVALID_VALUE;
- if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET;
- else dctl->control |= CTL_BSR_SET;
+ if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
+ else dctl->control2 |= CTL2_BSR_SET;
}
pp = ep;
break;
+ case MOD_CHR: /* A single character */
+ *((uint32_t *)field) = *pp++;
+ break;
+
+ case MOD_CON: /* A convert type/options list */
+ for (;; pp++)
+ {
+ uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
+ len = ((colon != NULL && colon < ep)? colon:ep) - pp;
+ for (i = 0; i < convertlistcount; i++)
+ {
+ if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
+ {
+ if (*((uint32_t *)field) == CONVERT_UNSET)
+ *((uint32_t *)field) = convertlist[i].option;
+ else
+ *((uint32_t *)field) |= convertlist[i].option;
+ break;
+ }
+ }
+ if (i >= convertlistcount) goto INVALID_VALUE;
+ pp += len;
+ if (*pp != ':') break;
+ }
+ break;
+
case MOD_IN2: /* One or two unsigned integers */
if (!isdigit(*pp)) goto INVALID_VALUE;
uli = strtoul((const char *)pp, &endptr, 10);
@@ -3455,14 +3794,14 @@ for (;;)
if (i == 0)
{
*((uint16_t *)field) = NEWLINE_DEFAULT;
- if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET;
- else dctl->control &= ~CTL_NL_SET;
+ if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
+ else dctl->control2 &= ~CTL2_NL_SET;
}
else
{
*((uint16_t *)field) = i;
- if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET;
- else dctl->control |= CTL_NL_SET;
+ if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
+ else dctl->control2 |= CTL2_NL_SET;
}
pp = ep;
break;
@@ -3498,10 +3837,16 @@ for (;;)
char *nn = (char *)field;
if (len > 0) /* Add new name */
{
+ if (len > MAX_NAME_SIZE)
+ {
+ fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
+ return FALSE;
+ }
while (*nn != 0) nn += strlen(nn) + 1;
- if (nn + len + 1 - (char *)field > LENCPYGET)
+ if (nn + len + 2 - (char *)field > LENCPYGET)
{
- fprintf(outfile, "** Too many named '%s' modifiers\n", m->name);
+ fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
+ m->name);
return FALSE;
}
memcpy(nn, pp, len);
@@ -3572,6 +3917,7 @@ static int
pattern_info(int what, void *where, BOOL unsetok)
{
int rc;
+PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL); /* Exercise the code */
PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
if (rc >= 0) return 0;
if (rc != PCRE2_ERROR_UNSET || !unsetok)
@@ -3627,7 +3973,7 @@ Returns: nothing
static void
show_controls(uint32_t controls, uint32_t controls2, const char *before)
{
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@@ -3635,13 +3981,16 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
((controls & CTL_BINCODE) != 0)? " bincode" : "",
- ((controls & CTL_BSR_SET) != 0)? " bsr" : "",
+ ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
+ ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
+ ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
((controls & CTL_DFA) != 0)? " dfa" : "",
((controls & CTL_EXPAND) != 0)? " expand" : "",
((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
+ ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
((controls & CTL_GETALL) != 0)? " getall" : "",
((controls & CTL_GLOBAL) != 0)? " global" : "",
@@ -3651,17 +4000,20 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
((controls & CTL_MARK) != 0)? " mark" : "",
((controls & CTL_MEMORY) != 0)? " memory" : "",
- ((controls & CTL_NL_SET) != 0)? " newline" : "",
+ ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
((controls & CTL_POSIX) != 0)? " posix" : "",
((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
((controls & CTL_PUSH) != 0)? " push" : "",
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
+ ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
+ ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
+ ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
}
@@ -3685,7 +4037,7 @@ static void
show_compile_options(uint32_t options, const char *before, const char *after)
{
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
-else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
@@ -3697,8 +4049,11 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
((options & PCRE2_DOTALL) != 0)? " dotall" : "",
((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
+ ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
((options & PCRE2_EXTENDED) != 0)? " extended" : "",
+ ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
+ ((options & PCRE2_LITERAL) != 0)? " literal" : "",
((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
@@ -3717,6 +4072,35 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
}
+/*************************************************
+* Show compile extra options *
+*************************************************/
+
+/* Called from show_pattern_info() and for unsupported POSIX options.
+
+Arguments:
+ options an options word
+ before text to print before
+ after text to print after
+
+Returns: nothing
+*/
+
+static void
+show_compile_extra_options(uint32_t options, const char *before,
+ const char *after)
+{
+if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
+else fprintf(outfile, "%s%s%s%s%s%s",
+ before,
+ ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
+ ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
+ ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
+ ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
+ after);
+}
+
+
#ifdef SUPPORT_PCRE2_8
/*************************************************
@@ -3728,10 +4112,11 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
static void
show_match_options(uint32_t options)
{
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s",
((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
+ ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
@@ -3759,13 +4144,13 @@ warning we must initialize cblock_size. */
cblock_size = 0;
#ifdef SUPPORT_PCRE2_8
-if (test_mode == 8) cblock_size = sizeof(pcre2_real_code_8);
+if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
#endif
#ifdef SUPPORT_PCRE2_16
-if (test_mode == 16) cblock_size = sizeof(pcre2_real_code_16);
+if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
#endif
#ifdef SUPPORT_PCRE2_32
-if (test_mode == 32) cblock_size = sizeof(pcre2_real_code_32);
+if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
#endif
(void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
@@ -3783,6 +4168,44 @@ if (pat_patctl.jit != 0)
/*************************************************
+* Show frame size info for a pattern *
+*************************************************/
+
+static void
+show_framesize(void)
+{
+size_t frame_size;
+(void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
+fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
+}
+
+
+
+/*************************************************
+* Get and output an error message *
+*************************************************/
+
+static BOOL
+print_error_message(int errorcode, const char *before, const char *after)
+{
+int len;
+PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
+if (len < 0)
+ {
+ fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
+ "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
+ }
+else
+ {
+ fprintf(outfile, "%s", before);
+ PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
+ fprintf(outfile, "%s", after);
+ }
+return len >= 0;
+}
+
+
+/*************************************************
* Callback function for callout enumeration *
*************************************************/
@@ -3849,7 +4272,7 @@ Returns: PR_OK continue processing next line
static int
show_pattern_info(void)
{
-uint32_t compile_options, overall_options;
+uint32_t compile_options, overall_options, extra_options;
if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
{
@@ -3859,16 +4282,36 @@ if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
if ((pat_patctl.control & CTL_INFO) != 0)
{
+ int rc;
void *nametable;
uint8_t *start_bits;
- BOOL match_limit_set, recursion_limit_set;
+ BOOL heap_limit_set, match_limit_set, depth_limit_set;
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
- match_limit, minlength, nameentrysize, namecount, newline_convention,
- recursion_limit;
+ depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
+ newline_convention;
+
+ /* Exercise the error route. */
+
+ PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
+ (void)rc;
/* These info requests may return PCRE2_ERROR_UNSET. */
+ switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
+ {
+ case 0:
+ heap_limit_set = TRUE;
+ break;
+
+ case PCRE2_ERROR_UNSET:
+ heap_limit_set = FALSE;
+ break;
+
+ default:
+ return PR_ABEND;
+ }
+
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
{
case 0:
@@ -3883,14 +4326,14 @@ if ((pat_patctl.control & CTL_INFO) != 0)
return PR_ABEND;
}
- switch(pattern_info(PCRE2_INFO_RECURSIONLIMIT, &recursion_limit, TRUE))
+ switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
{
case 0:
- recursion_limit_set = TRUE;
+ depth_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
- recursion_limit_set = FALSE;
+ depth_limit_set = FALSE;
break;
default:
@@ -3927,11 +4370,14 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (maxlookbehind > 0)
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
+ if (heap_limit_set)
+ fprintf(outfile, "Heap limit = %u\n", heap_limit);
+
if (match_limit_set)
fprintf(outfile, "Match limit = %u\n", match_limit);
- if (recursion_limit_set)
- fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
+ if (depth_limit_set)
+ fprintf(outfile, "Depth limit = %u\n", depth_limit);
if (namecount > 0)
{
@@ -3966,6 +4412,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
+ pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
cluttering up the verification output of non-UTF test files. */
@@ -3993,9 +4440,12 @@ if ((pat_patctl.control & CTL_INFO) != 0)
}
}
+ if (extra_options != 0)
+ show_compile_extra_options(extra_options, "Extra options:", "\n");
+
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
- if ((pat_patctl.control & CTL_BSR_SET) != 0 ||
+ if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
(FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
"any Unicode newline" : "CR, LF, or CRLF");
@@ -4024,6 +4474,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
fprintf(outfile, "Forced newline is any Unicode newline\n");
break;
+ case PCRE2_NEWLINE_NUL:
+ fprintf(outfile, "Forced newline is NUL\n");
+ break;
+
default:
break;
}
@@ -4100,15 +4554,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
else
{
#ifdef SUPPORT_JIT
- int len;
fprintf(outfile, "JIT compilation was not successful");
- if (jitrc != 0)
- {
- fprintf(outfile, " (");
- PCRE2_GET_ERROR_MESSAGE(len, jitrc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
- fprintf(outfile, ")");
- }
+ if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
+ return PR_ABEND;
fprintf(outfile, "\n");
#else
fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
@@ -4123,14 +4571,9 @@ if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
if (errorcode != 0)
{
- int len;
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
- if (errorcode < 0)
- {
- PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
- }
- fprintf(outfile, "\n");
+ if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
+ return PR_ABEND;
return PR_SKIP;
}
}
@@ -4150,16 +4593,14 @@ Arguments:
rc the error code
msg an initial message for what failed
-Returns: nothing
+Returns: FALSE if print_error_message() fails
*/
-static void
+static BOOL
serial_error(int rc, const char *msg)
{
fprintf(outfile, "%s failed: error %d: ", msg, rc);
-PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
-PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
-fprintf(outfile, "\n");
+return print_error_message(rc, "", "\n");
}
@@ -4197,7 +4638,7 @@ if (endf == filename)
*fptr = fopen((const char *)filename, mode);
if (*fptr == NULL)
{
- fprintf(outfile, "** Failed to open '%s'\n", filename);
+ fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
return PR_ABEND;
}
@@ -4227,7 +4668,7 @@ process_command(void)
FILE *f;
PCRE2_SIZE serial_size;
size_t i;
-int rc, cmd, cmdlen;
+int rc, cmd, cmdlen, yield;
uint16_t first_listed_newline;
const char *cmdname;
uint8_t *argptr, *serial;
@@ -4238,6 +4679,7 @@ if (restrict_for_perl_test)
return PR_ABEND;
}
+yield = PR_OK;
cmd = CMD_UNKNOWN;
cmdlen = 0;
@@ -4337,6 +4779,7 @@ switch(cmd)
PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
}
if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
+ if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
if ((pat_patctl.control & CTL_ANYINFO) != 0)
{
rc = show_pattern_info();
@@ -4360,7 +4803,8 @@ switch(cmd)
general_context);
if (rc < 0)
{
- serial_error(rc, "Serialization");
+ fclose(f);
+ if (!serial_error(rc, "Serialization")) return PR_ABEND;
break;
}
@@ -4374,6 +4818,7 @@ switch(cmd)
if (fwrite(serial, 1, serial_size, f) != serial_size)
{
fprintf(outfile, "** Wrong return from fwrite()\n");
+ fclose(f);
return PR_ABEND;
}
@@ -4401,38 +4846,50 @@ switch(cmd)
{
fprintf(outfile, "** Failed to get memory (size %lu) for #load\n",
(unsigned long int)serial_size);
+ fclose(f);
return PR_ABEND;
}
- if (fread(serial, 1, serial_size, f) != serial_size)
+ i = fread(serial, 1, serial_size, f);
+ fclose(f);
+
+ if (i != serial_size)
{
fprintf(outfile, "** Wrong return from fread()\n");
- return PR_ABEND;
+ yield = PR_ABEND;
}
- fclose(f);
-
- PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
- if (rc < 0) serial_error(rc, "Get number of codes"); else
+ else
{
- if (rc + patstacknext > PATSTACKSIZE)
+ PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
+ if (rc < 0)
{
- fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
- rc, (rc == 1)? "" : "s");
- rc = PATSTACKSIZE - patstacknext;
- fprintf(outfile, "** Decoding %d pattern%s\n", rc,
- (rc == 1)? "" : "s");
+ if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
}
- PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
- general_context);
- if (rc < 0) serial_error(rc, "Deserialization");
+ else
+ {
+ if (rc + patstacknext > PATSTACKSIZE)
+ {
+ fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
+ rc, (rc == 1)? "" : "s");
+ rc = PATSTACKSIZE - patstacknext;
+ fprintf(outfile, "** Decoding %d pattern%s\n", rc,
+ (rc == 1)? "" : "s");
+ }
+ PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
+ general_context);
+ if (rc < 0)
+ {
+ if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
+ }
else patstacknext += rc;
+ }
}
free(serial);
break;
}
-return PR_OK;
+return yield;
}
@@ -4459,11 +4916,12 @@ process_pattern(void)
BOOL utf;
uint32_t k;
uint8_t *p = buffer;
-const uint8_t *use_tables;
unsigned int delimiter = *p++;
int errorcode;
void *use_pat_context;
+uint32_t use_forbid_utf = forbid_utf;
PCRE2_SIZE patlen;
+PCRE2_SIZE valgrind_access_length;
PCRE2_SIZE erroroffset;
/* Initialize the context and pattern/data controls for this test from the
@@ -4507,8 +4965,34 @@ patlen = p - buffer - 2;
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
utf = (pat_patctl.options & PCRE2_UTF) != 0;
-/* Check for mutually exclusive modifiers. At present, these are all in the
-first control word. */
+/* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
+exclusive with the utf modifier. */
+
+if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
+ {
+ if (test_mode == PCRE8_MODE)
+ {
+ fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
+ return PR_SKIP;
+ }
+ if (utf)
+ {
+ fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
+ return PR_SKIP;
+ }
+ }
+
+/* The convert and posix modifiers are mutually exclusive. */
+
+if (pat_patctl.convert_type != CONVERT_UNSET &&
+ (pat_patctl.control & CTL_POSIX) != 0)
+ {
+ fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
+ return PR_SKIP;
+ }
+
+/* Check for mutually exclusive control modifiers. At present, these are all in
+the first control word. */
for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
{
@@ -4548,12 +5032,14 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0)
if (c == '\'' || c == '"')
{
+ uint8_t *pq = pp;
for (;; pp++)
{
d = *pp;
if (d == 0)
{
- fprintf(outfile, "** Missing closing quote in hex pattern\n");
+ fprintf(outfile, "** Missing closing quote in hex pattern: "
+ "opening quote is at offset " PTR_SPEC ".\n", pq - buffer - 2);
return PR_SKIP;
}
if (d == c) break;
@@ -4567,8 +5053,8 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0)
{
if (!isxdigit(c))
{
- fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: "
- "quote missing?\n", c);
+ fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
+ PTR_SPEC " in hex pattern: quote missing?\n", c, pp - buffer - 2);
return PR_SKIP;
}
if (*pp == 0)
@@ -4579,8 +5065,8 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0)
d = *pp;
if (!isxdigit(d))
{
- fprintf(outfile, "** Unexpected non-hex-digit '%c' in hex pattern: "
- "quote missing?\n", d);
+ fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
+ PTR_SPEC " in hex pattern: quote missing?\n", d, pp - buffer - 1);
return PR_SKIP;
}
c = toupper(c);
@@ -4738,7 +5224,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
const char *msg = "** Ignored with POSIX interface:";
#endif
- if (test_mode != 8)
+ if (test_mode != PCRE8_MODE)
{
fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
return PR_SKIP;
@@ -4760,6 +5246,16 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
msg = "";
}
+
+ if ((FLD(pat_context, extra_options) &
+ ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
+ {
+ show_compile_extra_options(
+ FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
+ msg, "");
+ msg = "";
+ }
+
if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
(pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
{
@@ -4769,6 +5265,10 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
}
if (local_newline_default != 0) prmsg(&msg, "#newline_default");
+ if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
+ prmsg(&msg, "max_pattern_length");
+ if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
+ prmsg(&msg, "parens_nest_limit");
if (msg[0] == 0) fprintf(outfile, "\n");
@@ -4778,10 +5278,17 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
+ if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
+ if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
+ {
+ preg.re_endp = (char *)pbuffer8 + patlen;
+ cflags |= REG_PEND;
+ }
+
rc = regcomp(&preg, (char *)pbuffer8, cflags);
/* Compiling failed */
@@ -4844,7 +5351,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
/* Handle compiling via the native interface. Controls that act later are
ignored with "push". Replacements are locked out. */
-if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY)) != 0)
+if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
{
if (pat_patctl.replacement[0] != 0)
{
@@ -4902,38 +5409,165 @@ switch(errorcode)
break;
}
-/* The pattern is now in pbuffer[8|16|32], with the length in patlen. By
-default, however, we pass a zero-terminated pattern. The length is passed only
-if we had a hex pattern. */
+/* The pattern is now in pbuffer[8|16|32], with the length in code units in
+patlen. If it is to be converted, copy the result back afterwards so that it
+it ends up back in the usual place. */
+
+if (pat_patctl.convert_type != CONVERT_UNSET)
+ {
+ int rc;
+ int convert_return = PR_OK;
+ uint32_t convert_options = pat_patctl.convert_type;
+ void *converted_pattern;
+ PCRE2_SIZE converted_length;
+
+ if (pat_patctl.convert_length != 0)
+ {
+ converted_length = pat_patctl.convert_length;
+ converted_pattern = malloc(converted_length * code_unit_size);
+ if (converted_pattern == NULL)
+ {
+ fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
+ return PR_SKIP;
+ }
+ }
+ else converted_pattern = NULL; /* Let the library allocate */
+
+ if (utf) convert_options |= PCRE2_CONVERT_UTF;
+ if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
+ convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
+
+ CONCTXCPY(con_context, default_con_context);
-if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
+ if (pat_patctl.convert_glob_escape != 0)
+ {
+ uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
+ pat_patctl.convert_glob_escape;
+ PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
+ if (rc != 0)
+ {
+ fprintf(outfile, "** Invalid glob escape '%c'\n",
+ pat_patctl.convert_glob_escape);
+ convert_return = PR_SKIP;
+ goto CONVERT_FINISH;
+ }
+ }
+
+ if (pat_patctl.convert_glob_separator != 0)
+ {
+ PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
+ if (rc != 0)
+ {
+ fprintf(outfile, "** Invalid glob separator '%c'\n",
+ pat_patctl.convert_glob_separator);
+ convert_return = PR_SKIP;
+ goto CONVERT_FINISH;
+ }
+ }
+
+ PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
+ &converted_pattern, &converted_length, con_context);
+
+ if (rc != 0)
+ {
+ fprintf(outfile, "** Pattern conversion error at offset %zu: ",
+ converted_length);
+ convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
+ }
+
+ /* Output the converted pattern, then copy it. */
+
+ else
+ {
+ PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
+ fprintf(outfile, "\n");
+ patlen = converted_length;
+ CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
+ }
+
+ /* Free the converted pattern. */
+
+ CONVERT_FINISH:
+ if (pat_patctl.convert_length != 0)
+ free(converted_pattern);
+ else
+ PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
+
+ /* Return if conversion was unsuccessful. */
+
+ if (convert_return != PR_OK) return convert_return;
+ }
+
+/* By default we pass a zero-terminated pattern, but a length is passed if
+"use_length" was specified or this is a hex pattern (which might contain binary
+zeros). When valgrind is supported, arrange for the unused part of the buffer
+to be marked as no access. */
+
+valgrind_access_length = patlen;
+if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
+ {
+ patlen = PCRE2_ZERO_TERMINATED;
+ valgrind_access_length += 1; /* For the terminating zero */
+ }
+
+#ifdef SUPPORT_VALGRIND
+#ifdef SUPPORT_PCRE2_8
+if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
+ {
+ VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
+ pbuffer8_size - valgrind_access_length);
+ }
+#endif
+#ifdef SUPPORT_PCRE2_16
+if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
+ {
+ VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
+ pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
+ }
+#endif
+#ifdef SUPPORT_PCRE2_32
+if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
+ {
+ VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
+ pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
+ }
+#endif
+#else /* Valgrind not supported */
+(void)valgrind_access_length; /* Avoid compiler warning */
+#endif
/* If #newline_default has been used and the library was not compiled with an
appropriate default newline setting, local_newline_default will be non-zero. We
use this if there is no explicit newline modifier. */
-if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0)
+if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
{
SETFLD(pat_context, newline_convention, local_newline_default);
}
-/* The nullcontext modifier is used to test calling pcre2_compile() with a NULL
-context. */
+/* The null_context modifier is used to test calling pcre2_compile() with a
+NULL context. */
use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
NULL : PTR(pat_context);
+/* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
+and PCRE2_NEVER_UCP are invalid with it. */
+
+if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
+
/* Compile many times when timing. */
if (timeit > 0)
{
- register int i;
+ int i;
clock_t time_taken = 0;
for (i = 0; i < timeit; i++)
{
clock_t start_time = clock();
PCRE2_COMPILE(compiled_code, pbuffer, patlen,
- pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context);
+ pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
+ use_pat_context);
time_taken += clock() - start_time;
if (TEST(compiled_code, !=, NULL))
{ SUB1(pcre2_code_free, compiled_code); }
@@ -4946,58 +5580,25 @@ if (timeit > 0)
/* A final compile that is used "for real". */
-PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
+PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
&errorcode, &erroroffset, use_pat_context);
-/* Compilation failed; go back for another re, skipping to blank line
-if non-interactive. */
-
-if (TEST(compiled_code, ==, NULL))
- {
- int len;
- fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
- (int)erroroffset);
- PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
- fprintf(outfile, "\n");
- return PR_SKIP;
- }
-
-/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
-locked out at compile time, but we must also check for occurrences of \P, \p,
-and \X, which are only supported when Unicode is supported. */
-
-if (forbid_utf != 0)
- {
- if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
- {
- fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
- "#forbid_utf command\n");
- return PR_SKIP;
- }
- }
-
-/* Remember the maximum lookbehind, for partial matching. */
-
-if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
- return PR_ABEND;
-
/* Call the JIT compiler if requested. When timing, we must free and recompile
the pattern each time because that is the only way to free the JIT compiled
code. We know that compilation will always succeed. */
-if (pat_patctl.jit != 0)
+if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
{
if (timeit > 0)
{
- register int i;
+ int i;
clock_t time_taken = 0;
for (i = 0; i < timeit; i++)
{
clock_t start_time;
SUB1(pcre2_code_free, compiled_code);
PCRE2_COMPILE(compiled_code, pbuffer, patlen,
- pat_patctl.options|forbid_utf, &errorcode, &erroroffset,
+ pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
use_pat_context);
start_time = clock();
PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);
@@ -5014,10 +5615,67 @@ if (pat_patctl.jit != 0)
}
}
+/* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
+and 32-bit buffers can be marked completely undefined, but we must leave the
+pattern in the 8-bit buffer defined because it may be read from a callout
+during matching. */
+
+#ifdef SUPPORT_VALGRIND
+#ifdef SUPPORT_PCRE2_8
+if (test_mode == PCRE8_MODE)
+ {
+ VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
+ pbuffer8_size - valgrind_access_length);
+ }
+#endif
+#ifdef SUPPORT_PCRE2_16
+if (test_mode == PCRE16_MODE)
+ {
+ VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
+ }
+#endif
+#ifdef SUPPORT_PCRE2_32
+if (test_mode == PCRE32_MODE)
+ {
+ VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
+ }
+#endif
+#endif
+
+/* Compilation failed; go back for another re, skipping to blank line
+if non-interactive. */
+
+if (TEST(compiled_code, ==, NULL))
+ {
+ fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
+ (int)erroroffset);
+ if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
+ return PR_SKIP;
+ }
+
+/* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
+locked out at compile time, but we must also check for occurrences of \P, \p,
+and \X, which are only supported when Unicode is supported. */
+
+if (forbid_utf != 0)
+ {
+ if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
+ {
+ fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
+ "#forbid_utf command\n");
+ return PR_SKIP;
+ }
+ }
+
+/* Remember the maximum lookbehind, for partial matching. */
+
+if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
+ return PR_ABEND;
+
/* If an explicit newline modifier was given, set the information flag in the
pattern so that it is preserved over push/pop. */
-if ((pat_patctl.control & CTL_NL_SET) != 0)
+if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
{
SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
}
@@ -5025,6 +5683,7 @@ if ((pat_patctl.control & CTL_NL_SET) != 0)
/* Output code size and other information if requested. */
if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
+if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
if ((pat_patctl.control & CTL_ANYINFO) != 0)
{
int rc = show_pattern_info();
@@ -5045,17 +5704,25 @@ if ((pat_patctl.control & CTL_PUSH) != 0)
SET(compiled_code, NULL);
}
-/* The "pushcopy" control is similar, but pushes a copy of the pattern. This
-tests the pcre2_code_copy() function. */
+/* The "pushcopy" and "pushtablescopy" controls are similar, but push a
+copy of the pattern, the latter with a copy of its character tables. This tests
+the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
-if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
+if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
{
if (patstacknext >= PATSTACKSIZE)
{
fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
return PR_ABEND;
}
- PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
+ if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
+ {
+ PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
+ }
+ else
+ {
+ PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
+ compiled_code); }
}
return PR_OK;
@@ -5064,11 +5731,23 @@ return PR_OK;
/*************************************************
-* Check match or recursion limit *
+* Check heap, match or depth limit *
*************************************************/
+/* This is used for DFA, normal, and JIT fast matching. For DFA matching it
+should only called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
+
+Arguments:
+ pp the subject string
+ ulen length of subject or PCRE2_ZERO_TERMINATED
+ errnumber defines which limit to test
+ msg string to include in final message
+
+Returns: the return from the final match function call
+*/
+
static int
-check_match_limit(uint8_t *pp, size_t ulen, int errnumber, const char *msg)
+check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
{
int capcount;
uint32_t min = 0;
@@ -5076,22 +5755,39 @@ uint32_t mid = 64;
uint32_t max = UINT32_MAX;
PCRE2_SET_MATCH_LIMIT(dat_context, max);
-PCRE2_SET_RECURSION_LIMIT(dat_context, max);
+PCRE2_SET_DEPTH_LIMIT(dat_context, max);
+PCRE2_SET_HEAP_LIMIT(dat_context, max);
for (;;)
{
- if (errnumber == PCRE2_ERROR_MATCHLIMIT)
+ if (errnumber == PCRE2_ERROR_HEAPLIMIT)
+ {
+ PCRE2_SET_HEAP_LIMIT(dat_context, mid);
+ }
+ else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
{
PCRE2_SET_MATCH_LIMIT(dat_context, mid);
}
else
{
- PCRE2_SET_RECURSION_LIMIT(dat_context, mid);
+ PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
+ }
+
+ if ((dat_datctl.control & CTL_DFA) != 0)
+ {
+ if (dfa_workspace == NULL)
+ dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
+ if (dfa_matched++ == 0)
+ dfa_workspace[0] = -1; /* To catch bad restart */
+ PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
+ dat_datctl.options, match_data,
+ PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
}
- if ((pat_patctl.control & CTL_JITFAST) != 0)
+ else if ((pat_patctl.control & CTL_JITFAST) != 0)
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, PTR(dat_context));
+
else
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, PTR(dat_context));
@@ -5105,13 +5801,23 @@ for (;;)
capcount == PCRE2_ERROR_NOMATCH ||
capcount == PCRE2_ERROR_PARTIAL)
{
+ /* If we've not hit the error with a heap limit less than the size of the
+ initial stack frame vector, the heap is not being used, so the minimum
+ limit is zero; there's no need to go on. The other limits are always
+ greater than zero. */
+
+ if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < START_FRAMES_SIZE/1024)
+ {
+ fprintf(outfile, "Minimum %s limit = 0\n", msg);
+ break;
+ }
if (mid == min + 1)
{
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
break;
}
max = mid;
- mid = (min + mid)/2;
+ mid = (min + max)/2;
}
else break; /* Some other error */
}
@@ -5126,11 +5832,11 @@ return capcount;
*************************************************/
/* Called from a PCRE2 library as a result of the (?C) item. We print out where
-we are in the match. Yield zero unless more callouts than the fail count, or
-the callout data is not zero. The only differences in the callout block for
-different code unit widths are that the pointers to the subject, the most
-recent MARK, and a callout argument string point to strings of the appropriate
-width. Casts can be used to deal with this.
+we are in the match (unless suppressed). Yield zero unless more callouts than
+the fail count, or the callout data is not zero. The only differences in the
+callout block for different code unit widths are that the pointers to the
+subject, the most recent MARK, and a callout argument string point to strings
+of the appropriate width. Casts can be used to deal with this.
Argument: a pointer to a callout block
Return:
@@ -5139,16 +5845,43 @@ Return:
static int
callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
{
+FILE *f, *fdefault;
uint32_t i, pre_start, post_start, subject_length;
PCRE2_SIZE current_position;
BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
+BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
-/* This FILE is used for echoing the subject. This is done only once in simple
-cases. */
+/* The FILE f is used for echoing the subject string if it is non-NULL. This
+happens only once in simple cases, but we want to repeat after any additional
+output caused by CALLOUT_EXTRA. */
-FILE *f = (first_callout || callout_capture || cb->callout_string != NULL)?
- outfile : NULL;
+fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
+ NULL : outfile;
+
+if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
+ {
+ f = outfile;
+ switch (cb->callout_flags)
+ {
+ case PCRE2_CALLOUT_BACKTRACK:
+ fprintf(f, "Backtrack\n");
+ break;
+
+ case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
+ fprintf(f, "Backtrack\nNo other matching paths\n");
+ /* Fall through */
+
+ case PCRE2_CALLOUT_STARTMATCH:
+ fprintf(f, "New match attempt\n");
+ break;
+
+ default:
+ f = fdefault;
+ break;
+ }
+ }
+else f = fdefault;
/* For a callout with a string argument, show the string first because there
isn't a tidy way to fit it in the rest of the data. */
@@ -5177,7 +5910,7 @@ if (callout_capture)
if (cb->callout_string == NULL)
fprintf(outfile, "Callout %d:", cb->callout_number);
fprintf(outfile, " last capture = %d\n", cb->capture_last);
- for (i = 0; i < cb->capture_top * 2; i += 2)
+ for (i = 2; i < cb->capture_top * 2; i += 2)
{
fprintf(outfile, "%2d: ", i/2);
if (cb->offset_vector[i] == PCRE2_UNSET)
@@ -5191,75 +5924,84 @@ if (callout_capture)
}
}
-/* Re-print the subject in canonical form (with escapes for non-printing
-characters), the first time, or if giving full details. On subsequent calls in
-the same match, we use PCHARS() just to find the printed lengths of the
-substrings. */
+/* Unless suppressed, re-print the subject in canonical form (with escapes for
+non-printing characters), the first time, or if giving full details. On
+subsequent calls in the same match, we use PCHARS() just to find the printed
+lengths of the substrings. */
-if (f != NULL) fprintf(f, "--->");
-
-/* The subject before the match start. */
+if (callout_where)
+ {
+ if (f != NULL) fprintf(f, "--->");
-PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
+ /* The subject before the match start. */
-/* If a lookbehind is involved, the current position may be earlier than the
-match start. If so, use the match start instead. */
+ PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
-current_position = (cb->current_position >= cb->start_match)?
- cb->current_position : cb->start_match;
+ /* If a lookbehind is involved, the current position may be earlier than the
+ match start. If so, use the match start instead. */
-/* The subject between the match start and the current position. */
+ current_position = (cb->current_position >= cb->start_match)?
+ cb->current_position : cb->start_match;
-PCHARS(post_start, cb->subject, cb->start_match,
- current_position - cb->start_match, utf, f);
+ /* The subject between the match start and the current position. */
-/* Print from the current position to the end. */
+ PCHARS(post_start, cb->subject, cb->start_match,
+ current_position - cb->start_match, utf, f);
-PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
- utf, f);
+ /* Print from the current position to the end. */
-/* Calculate the total subject printed length (no print). */
+ PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
+ utf, f);
-PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
+ /* Calculate the total subject printed length (no print). */
-if (f != NULL) fprintf(f, "\n");
+ PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
-/* For automatic callouts, show the pattern offset. Otherwise, for a numerical
-callout whose number has not already been shown with captured strings, show the
-number here. A callout with a string argument has been displayed above. */
+ if (f != NULL) fprintf(f, "\n");
-if (cb->callout_number == 255)
- {
- fprintf(outfile, "%+3d ", (int)cb->pattern_position);
- if (cb->pattern_position > 99) fprintf(outfile, "\n ");
- }
-else
- {
- if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
- else fprintf(outfile, "%3d ", cb->callout_number);
- }
+ /* For automatic callouts, show the pattern offset. Otherwise, for a
+ numerical callout whose number has not already been shown with captured
+ strings, show the number here. A callout with a string argument has been
+ displayed above. */
-/* Now show position indicators */
+ if (cb->callout_number == 255)
+ {
+ fprintf(outfile, "%+3d ", (int)cb->pattern_position);
+ if (cb->pattern_position > 99) fprintf(outfile, "\n ");
+ }
+ else
+ {
+ if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " ");
+ else fprintf(outfile, "%3d ", cb->callout_number);
+ }
-for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
-fprintf(outfile, "^");
+ /* Now show position indicators */
-if (post_start > 0)
- {
- for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
+ for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
fprintf(outfile, "^");
- }
-for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
- fprintf(outfile, " ");
+ if (post_start > 0)
+ {
+ for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
+ fprintf(outfile, "^");
+ }
+
+ for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
+ fprintf(outfile, " ");
-fprintf(outfile, "%.*s",
- (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
- pbuffer8 + cb->pattern_position);
+ if (cb->next_item_length != 0)
+ fprintf(outfile, "%.*s", (int)(cb->next_item_length),
+ pbuffer8 + cb->pattern_position);
+ else
+ fprintf(outfile, "End of pattern");
+
+ fprintf(outfile, "\n");
+ }
-fprintf(outfile, "\n");
first_callout = FALSE;
+/* Show any mark info */
+
if (cb->mark != last_callout_mark)
{
if (cb->mark == NULL)
@@ -5273,6 +6015,8 @@ if (cb->mark != last_callout_mark)
last_callout_mark = cb->mark;
}
+/* Show callout data */
+
if (callout_data_ptr != NULL)
{
int callout_data = *((int32_t *)callout_data_ptr);
@@ -5283,8 +6027,19 @@ if (callout_data_ptr != NULL)
}
}
-return (cb->callout_number != dat_datctl.cfail[0])? 0 :
- (++callout_count >= dat_datctl.cfail[1])? 1 : 0;
+/* Keep count and give the appropriate return code */
+
+callout_count++;
+
+if (cb->callout_number == dat_datctl.cerror[0] &&
+ callout_count >= dat_datctl.cerror[1])
+ return PCRE2_ERROR_CALLOUT;
+
+if (cb->callout_number == dat_datctl.cfail[0] &&
+ callout_count >= dat_datctl.cfail[1])
+ return 1;
+
+return 0;
}
@@ -5300,10 +6055,10 @@ Arguments:
utf TRUE for utf
capcount return from pcre2_match()
-Returns: nothing
+Returns: FALSE if print_error_message() fails
*/
-static void
+static BOOL
copy_and_get(BOOL utf, int capcount)
{
int i;
@@ -5322,9 +6077,7 @@ for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
if (rc < 0)
{
fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
- PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
- fprintf(outfile, "\n");
+ if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
@@ -5332,9 +6085,7 @@ for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
if (rc < 0)
{
fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
- PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
- fprintf(outfile, "\n");
+ if (!print_error_message(rc, "", "\n")) return FALSE;
}
else if (length2 != length)
{
@@ -5381,9 +6132,7 @@ for (;;)
if (rc < 0)
{
fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
- PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
- fprintf(outfile, "\n");
+ if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
@@ -5391,9 +6140,7 @@ for (;;)
if (rc < 0)
{
fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
- PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
- fprintf(outfile, "\n");
+ if (!print_error_message(rc, "", "\n")) return FALSE;
}
else if (length2 != length)
{
@@ -5421,9 +6168,7 @@ for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
if (rc < 0)
{
fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
- PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
- fprintf(outfile, "\n");
+ if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
@@ -5467,9 +6212,7 @@ for (;;)
if (rc < 0)
{
fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
- PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
- fprintf(outfile, "\n");
+ if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
@@ -5494,9 +6237,7 @@ if ((dat_datctl.control & CTL_GETALL) != 0)
if (rc < 0)
{
fprintf(outfile, "get substring list failed (%d): ", rc);
- PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
- fprintf(outfile, "\n");
+ if (!print_error_message(rc, "", "\n")) return FALSE;
}
else
{
@@ -5511,6 +6252,8 @@ if ((dat_datctl.control & CTL_GETALL) != 0)
PCRE2_SUBSTRING_LIST_FREE(stringlist);
}
}
+
+return TRUE;
}
@@ -5531,7 +6274,7 @@ Returns: PR_OK continue processing next line
static int
process_data(void)
{
-PCRE2_SIZE len, ulen;
+PCRE2_SIZE len, ulen, arg_ulen;
uint32_t gmatched;
uint32_t c, k;
uint32_t g_notempty = 0;
@@ -5539,6 +6282,7 @@ uint8_t *p, *pp, *start_rep;
size_t needlen;
void *use_dat_context;
BOOL utf;
+BOOL subject_literal;
#ifdef SUPPORT_PCRE2_8
uint8_t *q8 = NULL;
@@ -5550,6 +6294,8 @@ uint16_t *q16 = NULL;
uint32_t *q32 = NULL;
#endif
+subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
+
/* Copy the default context and data control blocks to the active ones. Then
copy from the pattern the controls that can be set in either the pattern or the
data. This allows them to be overridden in the data line. We do not do this for
@@ -5561,6 +6307,7 @@ memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
+if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
/* Initialize for scanning the data line. */
@@ -5622,7 +6369,9 @@ if (dbuffer == NULL || needlen >= dbuffer_size)
SETCASTPTR(q, dbuffer); /* Sets q8, q16, or q32, as appropriate. */
/* Scan the data line, interpreting data escapes, and put the result into a
-buffer of the appropriate width. In UTF mode, input can be UTF-8. */
+buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
+in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
+*/
while ((c = *p++) != 0)
{
@@ -5691,11 +6440,20 @@ while ((c = *p++) != 0)
continue;
}
- /* Handle a non-escaped character */
+ /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
+ set, do the fudge for setting the top bit. */
- if (c != '\\')
+ if (c != '\\' || subject_literal)
{
- if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
+ uint32_t topbit = 0;
+ if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
+ {
+ topbit = 0x80000000;
+ c = *p++;
+ }
+ if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
+ HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
+ c |= topbit;
}
/* Handle backslash escapes */
@@ -5883,6 +6641,7 @@ ENDSTRING:
SET(*q, 0);
len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
ulen = len/code_unit_size; /* Length in code units */
+arg_ulen = ulen; /* Value to use in match arg */
/* If the string was terminated by \= we must now interpret modifiers. */
@@ -5911,11 +6670,15 @@ if (pat_patctl.replacement[0] != 0 &&
}
/* We now have the subject in dbuffer, with len containing the byte length, and
-ulen containing the code unit length. Move the data to the end of the buffer so
-that a read over the end can be caught by valgrind or other means. If we have
-explicit valgrind support, mark the unused start of the buffer unaddressable.
-If we are using the POSIX interface, or testing zero-termination, we must
-include the terminating zero in the usable data. */
+ulen containing the code unit length, with a copy in arg_ulen for use in match
+function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
+zero_terminate modifier is present).
+
+Move the data to the end of the buffer so that a read over the end can be
+caught by valgrind or other means. If we have explicit valgrind support, mark
+the unused start of the buffer unaddressable. If we are using the POSIX
+interface, or testing zero-termination, we must include the terminating zero in
+the usable data. */
c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
(dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
@@ -5936,13 +6699,16 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
regmatch_t *pmatch = NULL;
const char *msg = "** Ignored with POSIX interface:";
- if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET)
+ if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
+ prmsg(&msg, "callout_error");
+ if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
prmsg(&msg, "callout_fail");
if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
prmsg(&msg, "copy");
if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
prmsg(&msg, "get");
if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
+ if (dat_datctl.offset != 0) prmsg(&msg, "offset");
if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
{
@@ -5961,13 +6727,29 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
if (msg[0] == 0) fprintf(outfile, "\n");
if (dat_datctl.oveccount > 0)
+ {
pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
+ if (pmatch == NULL)
+ {
+ fprintf(outfile, "** Failed to get memory for recording matching "
+ "information (size set = %du)\n", dat_datctl.oveccount);
+ return PR_OK;
+ }
+ }
+
+ if (dat_datctl.startend[0] != CFORE_UNSET)
+ {
+ pmatch[0].rm_so = dat_datctl.startend[0];
+ pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
+ dat_datctl.startend[1] : len;
+ eflags |= REG_STARTEND;
+ }
+
if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
- rc = regexec(&preg, (const char *)pp + dat_datctl.offset,
- dat_datctl.oveccount, pmatch, eflags);
+ rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
if (rc != 0)
{
(void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
@@ -5984,18 +6766,27 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
{
if (pmatch[i].rm_so >= 0)
{
+ PCRE2_SIZE start = pmatch[i].rm_so;
+ PCRE2_SIZE end = pmatch[i].rm_eo;
+ if (start > end)
+ {
+ start = pmatch[i].rm_eo;
+ end = pmatch[i].rm_so;
+ fprintf(outfile, "Start of matched string is beyond its end - "
+ "displaying from end to start.\n");
+ }
fprintf(outfile, "%2d: ", (int)i);
- PCHARSV(pp, pmatch[i].rm_so,
- pmatch[i].rm_eo - pmatch[i].rm_so, utf, outfile);
+ PCHARSV(pp, start, end - start, utf, outfile);
fprintf(outfile, "\n");
+
if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
(dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
{
fprintf(outfile, "%2d+ ", (int)i);
- PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
- utf, outfile);
- fprintf(outfile, "\n");
- }
+ /* Note: don't use the start/end variables here because we want to
+ show the text from what is reported as the end. */
+ PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
+ fprintf(outfile, "\n"); }
}
}
}
@@ -6007,11 +6798,8 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
/* Handle matching via the native interface. Check for consistency of
modifiers. */
-if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS))
- {
- fprintf(outfile, "** Finding match limits is not relevant for DFA matching: ignored\n");
- dat_datctl.control &= ~CTL_FINDLIMITS;
- }
+if (dat_datctl.startend[0] != CFORE_UNSET)
+ fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
matching, even if the JIT compiler was used. */
@@ -6026,7 +6814,7 @@ if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
/* Handle passing the subject as zero-terminated. */
if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
- ulen = PCRE2_ZERO_TERMINATED;
+ arg_ulen = PCRE2_ZERO_TERMINATED;
/* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
NULL context. */
@@ -6034,10 +6822,16 @@ NULL context. */
use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
NULL : PTR(dat_context);
-/* Enable display of malloc/free if wanted. */
+/* Enable display of malloc/free if wanted. We can do this only if either the
+pattern or the subject is processed with a context. */
show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
+if (show_memory &&
+ (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
+ fprintf(outfile, "** \\=memory requires either a pattern or a subject "
+ "context: ignored\n");
+
/* Create and assign a JIT stack if requested. */
if (dat_datctl.jitstack != 0)
@@ -6089,6 +6883,14 @@ else
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
}
+if (CASTVAR(void *, match_data) == NULL)
+ {
+ fprintf(outfile, "** Failed to get memory for recording matching "
+ "information (size requested: %d)\n", dat_datctl.oveccount);
+ max_oveccount = 0;
+ return PR_OK;
+ }
+
/* Replacement processing is ignored for DFA matching. */
if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
@@ -6220,19 +7022,17 @@ if (dat_datctl.replacement[0] != 0)
rlen = PCRE2_ZERO_TERMINATED;
else
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
- PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
+ PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options|xoptions, match_data, dat_context,
rbuffer, rlen, nbuffer, &nsize);
if (rc < 0)
{
- PCRE2_SIZE msize;
fprintf(outfile, "Failed: error %d", rc);
if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
fprintf(outfile, ": ");
- PCRE2_GET_ERROR_MESSAGE(msize, rc, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, msize, FALSE, outfile);
+ if (!print_error_message(rc, "", "")) return PR_ABEND;
if (rc == PCRE2_ERROR_NOMEMORY &&
(xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
fprintf(outfile, ": %ld code units are needed", (long int)nsize);
@@ -6287,7 +7087,7 @@ else for (gmatched = 0;; gmatched++)
if (timeitm > 0)
{
- register int i;
+ int i;
clock_t start_time, time_taken;
if ((dat_datctl.control & CTL_DFA) != 0)
@@ -6302,7 +7102,7 @@ else for (gmatched = 0;; gmatched++)
start_time = clock();
for (i = 0; i < timeitm; i++)
{
- PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
+ PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
}
@@ -6313,7 +7113,7 @@ else for (gmatched = 0;; gmatched++)
start_time = clock();
for (i = 0; i < timeitm; i++)
{
- PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen,
+ PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context);
}
@@ -6324,7 +7124,7 @@ else for (gmatched = 0;; gmatched++)
start_time = clock();
for (i = 0; i < timeitm; i++)
{
- PCRE2_MATCH(capcount, compiled_code, pp, ulen,
+ PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context);
}
@@ -6335,19 +7135,36 @@ else for (gmatched = 0;; gmatched++)
(double)CLOCKS_PER_SEC);
}
- /* Find the match and recursion limits if requested. The recursion limit
- is not relevant for JIT. */
+ /* Find the heap, match and depth limits if requested. The match and heap
+ limits are not relevant for DFA matching and the depth and heap limits are
+ not relevant for JIT. The return from check_match_limit() is the return from
+ the final call to pcre2_match() or pcre2_dfa_match(). */
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
{
- capcount = check_match_limit(pp, ulen, PCRE2_ERROR_MATCHLIMIT, "match");
- if (FLD(compiled_code, executable_jit) == NULL)
- (void)check_match_limit(pp, ulen, PCRE2_ERROR_RECURSIONLIMIT,
- "recursion");
+ capcount = 0; /* This stops compiler warnings */
+
+ if ((dat_datctl.control & CTL_DFA) == 0 &&
+ (FLD(compiled_code, executable_jit) == NULL ||
+ (dat_datctl.options & PCRE2_NO_JIT) != 0))
+ {
+ (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
+ }
+
+ capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
+ "match");
+
+ if (FLD(compiled_code, executable_jit) == NULL ||
+ (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
+ (dat_datctl.control & CTL_DFA) != 0)
+ {
+ capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
+ "depth");
+ }
}
/* Otherwise just run a single match, setting up a callout if required (the
- default). */
+ default). There is a copy of the pattern in pbuffer8 for use by callouts. */
else
{
@@ -6372,7 +7189,7 @@ else for (gmatched = 0;; gmatched++)
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
if (dfa_matched++ == 0)
dfa_workspace[0] = -1; /* To catch bad restart */
- PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
+ PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
if (capcount == 0)
@@ -6384,10 +7201,10 @@ else for (gmatched = 0;; gmatched++)
else
{
if ((pat_patctl.control & CTL_JITFAST) != 0)
- PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
+ PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, use_dat_context);
else
- PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
+ PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options | g_notempty, match_data, use_dat_context);
if (capcount == 0)
{
@@ -6588,7 +7405,7 @@ else for (gmatched = 0;; gmatched++)
/* Process copy/get strings */
- copy_and_get(utf, capcount);
+ if (!copy_and_get(utf, capcount)) return PR_ABEND;
} /* End of handling a successful match */
@@ -6631,7 +7448,7 @@ else for (gmatched = 0;; gmatched++)
/* Process copy/get strings */
- copy_and_get(utf, 1);
+ if (!copy_and_get(utf, 1)) return PR_ABEND;
break; /* Out of the /g loop */
} /* End of handling partial match */
@@ -6687,8 +7504,6 @@ else for (gmatched = 0;; gmatched++)
else
{
- int mlen;
-
switch(capcount)
{
case PCRE2_ERROR_NOMATCH:
@@ -6713,8 +7528,7 @@ else for (gmatched = 0;; gmatched++)
default:
fprintf(outfile, "Failed: error %d: ", capcount);
- PCRE2_GET_ERROR_MESSAGE(mlen, capcount, pbuffer);
- PCHARSV(CASTVAR(void *, pbuffer), 0, mlen, FALSE, outfile);
+ if (!print_error_message(capcount, "", "")) return PR_ABEND;
if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
capcount >= PCRE2_ERROR_UTF32_ERR2)
{
@@ -6800,6 +7614,7 @@ else for (gmatched = 0;; gmatched++)
pp += end_offset * code_unit_size;
len -= end_offset * code_unit_size;
ulen -= end_offset;
+ if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
}
}
} /* End of global loop */
@@ -6901,7 +7716,9 @@ printf(" -16 use the 16-bit library\n");
#ifdef SUPPORT_PCRE2_32
printf(" -32 use the 32-bit library\n");
#endif
-printf(" -b set default pattern control 'fullbincode'\n");
+printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n");
+printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n");
+printf(" -b set default pattern modifier 'fullbincode'\n");
printf(" -C show PCRE2 compile-time options and exit\n");
printf(" -C arg show a specific compile-time option and exit with its\n");
printf(" value if numeric (else 0). The arg can be:\n");
@@ -6911,20 +7728,22 @@ printf(" ebcdic compiled for EBCDIC character code [0,1]\n");
printf(" ebcdic-nl NL code if compiled for EBCDIC\n");
printf(" jit just-in-time compiler supported [0, 1]\n");
printf(" linksize internal link size [2, 3, 4]\n");
-printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
+printf(" newline newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
printf(" pcre2-8 8 bit library support enabled [0, 1]\n");
printf(" pcre2-16 16 bit library support enabled [0, 1]\n");
printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
printf(" unicode Unicode and UTF support enabled [0, 1]\n");
-printf(" -d set default pattern control 'debug'\n");
-printf(" -dfa set default subject control 'dfa'\n");
+printf(" -d set default pattern modifier 'debug'\n");
+printf(" -dfa set default subject modifier 'dfa'\n");
printf(" -error <n,m,..> show messages for error numbers, then exit\n");
printf(" -help show usage information\n");
-printf(" -i set default pattern control 'info'\n");
-printf(" -jit set default pattern control 'jit'\n");
+printf(" -i set default pattern modifier 'info'\n");
+printf(" -jit set default pattern modifier 'jit'\n");
+printf(" -jitverify set default pattern modifier 'jitverify'\n");
+printf(" -LM list pattern and subject modifiers, then exit\n");
printf(" -q quiet: do not output PCRE2 version number at start\n");
-printf(" -pattern <s> set default pattern control fields\n");
-printf(" -subject <s> set default subject control fields\n");
+printf(" -pattern <s> set default pattern modifier fields\n");
+printf(" -subject <s> set default subject modifier fields\n");
printf(" -S <n> set stack size to <n> megabytes\n");
printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
@@ -6952,19 +7771,18 @@ static int
c_option(const char *arg)
{
uint32_t optval;
+unsigned int i = COPTLISTCOUNT;
int yield = 0;
-if (arg != NULL)
+if (arg != NULL && arg[0] != CHAR_MINUS)
{
- unsigned int i;
-
for (i = 0; i < COPTLISTCOUNT; i++)
if (strcmp(arg, coptlist[i].name) == 0) break;
if (i >= COPTLISTCOUNT)
{
fprintf(stderr, "** Unknown -C option '%s'\n", arg);
- return -1;
+ return 0;
}
switch (coptlist[i].type)
@@ -7002,7 +7820,7 @@ if (arg != NULL)
{
char ucname[16];
strcpy(ucname, coptlist[i].name);
- for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i];
+ for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
vms_setsymbol(ucname, 0, optval);
}
#endif
@@ -7022,14 +7840,13 @@ printf(" EBCDIC code page %s or similar\n", pcrz_cpversion());
#endif
#endif
-#ifdef SUPPORT_PCRE2_8
-printf(" 8-bit support\n");
-#endif
-#ifdef SUPPORT_PCRE2_16
-printf(" 16-bit support\n");
-#endif
-#ifdef SUPPORT_PCRE2_32
-printf(" 32-bit support\n");
+(void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
+if (optval & 1) printf(" 8-bit support\n");
+if (optval & 2) printf(" 16-bit support\n");
+if (optval & 4) printf(" 32-bit support\n");
+
+#ifdef SUPPORT_VALGRIND
+printf(" Valgrind support\n");
#endif
(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
@@ -7058,24 +7875,127 @@ print_newline_config(optval, FALSE);
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" :
"all Unicode newlines");
-#ifdef NEVER_BACKSLASH_C
-printf(" \\C is not supported\n");
-#else
-printf(" \\C is supported\n");
-#endif
+(void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
+printf(" \\C is %ssupported\n", optval? "not ":"");
(void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
printf(" Internal link size = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
printf(" Parentheses nest limit = %d\n", optval);
+(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
+printf(" Default heap limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
printf(" Default match limit = %d\n", optval);
-(void)PCRE2_CONFIG(PCRE2_CONFIG_RECURSIONLIMIT, &optval);
-printf(" Default recursion depth limit = %d\n", optval);
-(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &optval);
-printf(" Match recursion uses %s", optval? "stack" : "heap");
+(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
+printf(" Default depth limit = %d\n", optval);
+return 0;
+}
+
+
+
+/*************************************************
+* Display one modifier *
+*************************************************/
+static void
+display_one_modifier(modstruct *m, BOOL for_pattern)
+{
+uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
+ '*' : ' ';
+printf("%c%s", c, m->name);
+}
+
+
+
+/*************************************************
+* Display pattern or subject modifiers *
+*************************************************/
+
+/* In order to print in two columns, first scan without printing to get a list
+of the modifiers that are required.
+
+Arguments:
+ for_pattern TRUE for pattern modifiers, FALSE for subject modifiers
+ title string to be used in title
+
+Returns: nothing
+*/
+
+static void
+display_selected_modifiers(BOOL for_pattern, const char *title)
+{
+uint32_t i, j;
+uint32_t n = 0;
+uint32_t list[MODLISTCOUNT];
+
+for (i = 0; i < MODLISTCOUNT; i++)
+ {
+ BOOL is_pattern = TRUE;
+ modstruct *m = modlist + i;
+
+ switch (m->which)
+ {
+ case MOD_CTC: /* Compile context */
+ case MOD_PAT: /* Pattern */
+ case MOD_PATP: /* Pattern, OK for Perl-compatible test */
+ break;
+
+ /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
+ subjects, but can be given with a pattern. We list them as subject
+ modifiers, but marked with an asterisk.*/
+
+ case MOD_CTM: /* Match context */
+ case MOD_DAT: /* Subject line */
+ case MOD_PND: /* As PD, but not default pattern */
+ case MOD_PNDP: /* As PND, OK for Perl-compatible test */
+ is_pattern = FALSE;
+ break;
+
+ default: printf("** Unknown type for modifier '%s'\n", m->name);
+ /* Fall through */
+ case MOD_PD: /* Pattern or subject */
+ case MOD_PDP: /* As PD, OK for Perl-compatible test */
+ is_pattern = for_pattern;
+ break;
+ }
+
+ if (for_pattern == is_pattern) list[n++] = i;
+ }
+
+/* Now print from the list in two columns. */
+
+printf("-------------- %s MODIFIERS --------------\n", title);
+
+for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
+ {
+ modstruct *m = modlist + list[i];
+ display_one_modifier(m, for_pattern);
+ if (j < n)
+ {
+ uint32_t k = 27 - strlen(m->name);
+ while (k-- > 0) printf(" ");
+ display_one_modifier(modlist + list[j], for_pattern);
+ }
+ printf("\n");
+ }
+}
+
+
+
+/*************************************************
+* Display the list of modifiers *
+*************************************************/
+
+static void
+display_modifiers(void)
+{
+printf(
+ "An asterisk on a subject modifier means that it may be given on a pattern\n"
+ "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
+ "that are listed for both patterns and subjects have different effects in\n"
+ "each case.\n\n");
+display_selected_modifiers(TRUE, "PATTERN");
printf("\n");
-return 0;
+display_selected_modifiers(FALSE, "SUBJECT");
}
@@ -7087,9 +8007,9 @@ return 0;
int
main(int argc, char **argv)
{
+uint32_t temp;
uint32_t yield = 0;
uint32_t op = 1;
-uint32_t stack_size;
BOOL notdone = TRUE;
BOOL quiet = FALSE;
BOOL showtotaltimes = FALSE;
@@ -7132,6 +8052,20 @@ if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
return 1;
}
+/* Check that bad options are diagnosed. */
+
+if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
+ PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
+ {
+ fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
+ return 1;
+ }
+
+/* This configuration option is now obsolete, but running a quick check ensures
+that its code is covered. */
+
+(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
+
/* Get buffers from malloc() so that valgrind will check their misuse when
debugging. They grow automatically when very long lines are read. The 16-
and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
@@ -7151,12 +8085,17 @@ _setmode( _fileno( stdout ), _O_BINARY );
/* Initialization that does not depend on the running mode. */
locale_name[0] = 0;
+
memset(&def_patctl, 0, sizeof(patctl));
+def_patctl.convert_type = CONVERT_UNSET;
+
memset(&def_datctl, 0, sizeof(datctl));
def_datctl.oveccount = DEFAULT_OVECCOUNT;
def_datctl.copy_numbers[0] = -1;
def_datctl.get_numbers[0] = -1;
-def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET;
+def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
+def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
+def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
/* Scan command line options. */
@@ -7166,6 +8105,14 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
char *arg = argv[op];
unsigned long uli;
+ /* List modifiers and exit. */
+
+ if (strcmp(arg, "-LM") == 0)
+ {
+ display_modifiers();
+ goto EXIT;
+ }
+
/* Display and/or set return code for configuration options. */
if (strcmp(arg, "-C") == 0)
@@ -7174,32 +8121,45 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
goto EXIT;
}
- /* Select operating mode */
+ /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
+ and 32-bit modes because that won't happen naturally when 8-bit is also
+ configured. Also call some other functions that are not otherwise used. This
+ means that a coverage report won't claim there are uncalled functions. */
if (strcmp(arg, "-8") == 0)
{
#ifdef SUPPORT_PCRE2_8
test_mode = PCRE8_MODE;
+ (void)pcre2_set_bsr_8(pat_context8, 999);
+ (void)pcre2_set_newline_8(pat_context8, 999);
#else
fprintf(stderr,
"** This version of PCRE2 was built without 8-bit support\n");
exit(1);
#endif
}
+
else if (strcmp(arg, "-16") == 0)
{
#ifdef SUPPORT_PCRE2_16
test_mode = PCRE16_MODE;
+ (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
+ (void)pcre2_set_bsr_16(pat_context16, 999);
+ (void)pcre2_set_newline_16(pat_context16, 999);
#else
fprintf(stderr,
"** This version of PCRE2 was built without 16-bit support\n");
exit(1);
#endif
}
+
else if (strcmp(arg, "-32") == 0)
{
#ifdef SUPPORT_PCRE2_32
test_mode = PCRE32_MODE;
+ (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
+ (void)pcre2_set_bsr_32(pat_context32, 999);
+ (void)pcre2_set_newline_32(pat_context32, 999);
#else
fprintf(stderr,
"** This version of PCRE2 was built without 32-bit support\n");
@@ -7221,6 +8181,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
exit(1);
#else
int rc;
+ uint32_t stack_size;
struct rlimit rlim;
if (U32OVERFLOW(uli))
{
@@ -7252,16 +8213,23 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
/* Set some common pattern and subject controls */
+ else if (strcmp(arg, "-AC") == 0)
+ {
+ def_patctl.options |= PCRE2_AUTO_CALLOUT;
+ def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
+ }
+ else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT;
+ else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
+ else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
- else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
- else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
- else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
- else if (strcmp(arg, "-jit") == 0)
+ else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
+ else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0)
{
+ if (arg[4] != 0) def_patctl.control |= CTL_JITVERIFY;
def_patctl.jit = 7; /* full & partial */
#ifndef SUPPORT_JIT
fprintf(stderr, "** Warning: JIT support is not available: "
- "-jit calls functions that do nothing.\n");
+ "-jit[verify] calls functions that do nothing.\n");
#endif
}
@@ -7356,7 +8324,8 @@ if (arg_error != NULL)
int errcode;
char *endptr;
-/* Ensure the relevant non-8-bit buffer is available. */
+/* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
+least 128 code units, because it is used for retrieving error messages. */
#ifdef SUPPORT_PCRE2_16
if (test_mode == PCRE16_MODE)
@@ -7376,7 +8345,7 @@ if (arg_error != NULL)
#ifdef SUPPORT_PCRE2_32
if (test_mode == PCRE32_MODE)
{
- pbuffer32_size = 256;
+ pbuffer32_size = 512;
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
if (pbuffer32 == NULL)
{
@@ -7430,9 +8399,8 @@ if (arg_error != NULL)
} /* End of -error handling */
/* Initialize things that cannot be done until we know which test mode we are
-running in. When HEAP_MATCH_RECURSE is undefined, calling pcre2_set_recursion_
-memory_management() is a no-op, but we call it in order to exercise it. Also
-exercise the general context copying function, which is not otherwise used. */
+running in. Exercise the general context copying function, which is not
+otherwise used. */
code_unit_size = test_mode/8;
max_oveccount = DEFAULT_OVECCOUNT;
@@ -7446,19 +8414,18 @@ max_oveccount = DEFAULT_OVECCOUNT;
G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
+ G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
+ G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
-#ifdef HEAP_MATCH_RECURSE
-#define SETRECURSEMEMMAN \
- (void)G(pcre2_set_recursion_memory_management_,BITS) \
- (G(default_dat_context,BITS), \
- &my_stack_malloc, &my_stack_free, NULL)
-#else
-#define SETRECURSEMEMMAN \
- (void)G(pcre2_set_recursion_memory_management_,BITS)(NULL, NULL, NULL, NULL)
-#endif
+#define CONTEXTTESTS \
+ (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
+ (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
+ (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
+ (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
-/* Call the appropriate functions for the current mode. */
+/* Call the appropriate functions for the current mode, and exercise some
+functions that are not otherwise called. */
#ifdef SUPPORT_PCRE2_8
#undef BITS
@@ -7466,7 +8433,7 @@ max_oveccount = DEFAULT_OVECCOUNT;
if (test_mode == PCRE8_MODE)
{
CREATECONTEXTS;
- SETRECURSEMEMMAN;
+ CONTEXTTESTS;
}
#endif
@@ -7476,7 +8443,7 @@ if (test_mode == PCRE8_MODE)
if (test_mode == PCRE16_MODE)
{
CREATECONTEXTS;
- SETRECURSEMEMMAN;
+ CONTEXTTESTS;
}
#endif
@@ -7486,14 +8453,14 @@ if (test_mode == PCRE16_MODE)
if (test_mode == PCRE32_MODE)
{
CREATECONTEXTS;
- SETRECURSEMEMMAN;
+ CONTEXTTESTS;
}
#endif
/* Set a default parentheses nest limit that is large enough to run the
standard tests (this also exercises the function). */
-PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, 220);
+PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
/* Handle command line modifier settings, sending any error messages to
stderr. We need to know the mode before modifying the context, and it is tidier
@@ -7519,18 +8486,22 @@ if (argc > 1 && strcmp(argv[op], "-") != 0)
infile = fopen(argv[op], INPUT_MODE);
if (infile == NULL)
{
- printf("** Failed to open '%s'\n", argv[op]);
+ printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
yield = 1;
goto EXIT;
}
}
+#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
+if (INTERACTIVE(infile)) using_history();
+#endif
+
if (argc > 2)
{
outfile = fopen(argv[op+1], OUTPUT_MODE);
if (outfile == NULL)
{
- printf("** Failed to open '%s'\n", argv[op+1]);
+ printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
yield = 1;
goto EXIT;
}
@@ -7563,8 +8534,7 @@ while (notdone)
p = buffer;
/* If we have a pattern set up for testing, or we are skipping after a
- compile failure, a blank line terminates this test; otherwise process the
- line as a data line. */
+ compile failure, a blank line terminates this test. */
if (expectdata || skipping)
{
@@ -7587,14 +8557,21 @@ while (notdone)
skipping = FALSE;
setlocale(LC_CTYPE, "C");
}
+
+ /* Otherwise, if we are not skipping, and the line is not a data comment
+ line starting with "\=", process a data line. */
+
else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
+ {
rc = process_data();
+ }
}
/* We do not have a pattern set up for testing. Lines starting with # are
either comments or special commands. Blank lines are ignored. Otherwise, the
line must start with a valid delimiter. It is then processed as a pattern
- line. */
+ line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
+ valgrind, make the unused part of the buffer undefined, to catch overruns. */
else if (*p == '#')
{
@@ -7655,6 +8632,10 @@ if (showtotaltimes)
EXIT:
+#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
+if (infile != NULL && INTERACTIVE(infile)) clear_history();
+#endif
+
if (infile != NULL && infile != stdin) fclose(infile);
if (outfile != NULL && outfile != stdout) fclose(outfile);
@@ -7684,7 +8665,9 @@ if (jit_stack != NULL)
G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
- G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS))
+ G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
+ G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
+ G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
#ifdef SUPPORT_PCRE2_8
#undef BITS