summaryrefslogtreecommitdiff
path: root/src/pcre2test.c
diff options
context:
space:
mode:
authorMatthew Vernon <matthew@debian.org>2022-04-25 17:03:54 +0100
committerMatthew Vernon <matthew@debian.org>2022-04-25 17:03:54 +0100
commit1ffedb43e1dc27250034147144e60b7984c66af0 (patch)
tree26a9c9e041ee79beef3538f168012c16be5e2c87 /src/pcre2test.c
parentae779ff9f07bbc2fa39c75c7a331aaeb3a43a159 (diff)
New upstream version 10.40
Diffstat (limited to 'src/pcre2test.c')
-rw-r--r--src/pcre2test.c286
1 files changed, 257 insertions, 29 deletions
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 84987d7..ea52a20 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam.
Written by Philip Hazel
Original code Copyright (c) 1997-2012 University of Cambridge
- Rewritten code Copyright (c) 2016-2021 University of Cambridge
+ Rewritten code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -82,11 +82,7 @@ from www.cbttape.org. */
/* #define DEBUG_SHOW_MALLOC_ADDRESSES */
-/* Both libreadline and libedit are optionally supported. The user-supplied
-original patch uses readline/readline.h for libedit, but in at least one system
-it is installed as editline/readline.h, so the configuration code now looks for
-that first, falling back to readline/readline.h. */
-
+/* Both libreadline and libedit are optionally supported */
#if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
#if defined(SUPPORT_LIBREADLINE)
#include <readline/readline.h>
@@ -94,8 +90,15 @@ that first, falling back to readline/readline.h. */
#else
#if defined(HAVE_EDITLINE_READLINE_H)
#include <editline/readline.h>
+#elif defined(HAVE_EDIT_READLINE_READLINE_H)
+#include <edit/readline/readline.h>
#else
-#include <readline/readline.h>
+#include <readline.h>
+/* GNU readline defines this macro but libedit doesn't, if that ever changes
+this needs to be updated or the build could break */
+#ifdef RL_VERSION_MAJOR
+#include <history.h>
+#endif
#endif
#endif
#endif
@@ -441,6 +444,7 @@ enum { MOD_CTC, /* Applies to a compile context */
MOD_PAT, /* Applies to a pattern */
MOD_PATP, /* Ditto, OK for Perl test */
MOD_DAT, /* Applies to a data line */
+ MOD_DATP, /* Ditto, OK for Perl test */
MOD_PD, /* Applies to a pattern or a data line */
MOD_PDP, /* As MOD_PD, OK for Perl test */
MOD_PND, /* As MOD_PD, but not for a default pattern */
@@ -516,6 +520,8 @@ so many of them that they are split into two fields. */
#define CTL2_CALLOUT_NO_WHERE 0x00000200u
#define CTL2_CALLOUT_EXTRA 0x00000400u
#define CTL2_ALLVECTOR 0x00000800u
+#define CTL2_NULL_SUBJECT 0x00001000u
+#define CTL2_NULL_REPLACEMENT 0x00002000u
#define CTL2_NL_SET 0x40000000u /* Informational */
#define CTL2_BSR_SET 0x80000000u /* Informational */
@@ -698,7 +704,7 @@ static modstruct modlist[] = {
{ "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
{ "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
{ "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
- { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
+ { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
{ "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
{ "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
@@ -706,6 +712,8 @@ static modstruct modlist[] = {
{ "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
{ "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
{ "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
+ { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) },
+ { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) },
{ "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
{ "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
{ "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
@@ -767,7 +775,7 @@ static modstruct modlist[] = {
PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
#define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
-#define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
+#define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT)
/* Control bits that are not ignored with 'push'. */
@@ -3147,7 +3155,7 @@ Returns: 0 on success, with the length updated to the number of 16-bit
OR -3 if a value > 0xffff is encountered when not in UTF mode
*/
-static PCRE2_SIZE
+static int
to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
{
uint16_t *pp;
@@ -3234,7 +3242,7 @@ Returns: 0 on success, with the length updated to the number of 32-bit
OR -2 if a value > 0x10ffff is encountered in UTF mode
*/
-static PCRE2_SIZE
+static int
to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
{
uint32_t *pp;
@@ -3579,6 +3587,7 @@ if (restrict_for_perl_test) switch(m->which)
{
case MOD_PNDP:
case MOD_PATP:
+ case MOD_DATP:
case MOD_PDP:
break;
@@ -3600,7 +3609,8 @@ switch (m->which)
else if (ctx == CTX_DAT) field = PTR(dat_context);
break;
- case MOD_DAT: /* Data line modifier */
+ case MOD_DAT: /* Data line modifier */
+ case MOD_DATP: /* Allowed for Perl test */
if (dctl != NULL) field = dctl;
break;
@@ -4102,7 +4112,7 @@ Returns: nothing
static void
show_controls(uint32_t controls, uint32_t controls2, const char *before)
{
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@@ -4132,6 +4142,8 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
((controls & CTL_MEMORY) != 0)? " memory" : "",
((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
+ ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "",
+ ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "",
((controls & CTL_POSIX) != 0)? " posix" : "",
((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
((controls & CTL_PUSH) != 0)? " push" : "",
@@ -5481,24 +5493,27 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
{
show_compile_options(
- pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
+ pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
+ msg, "");
msg = "";
}
if ((FLD(pat_context, extra_options) &
- ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
+ (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
{
show_compile_extra_options(
- FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
- msg, "");
+ FLD(pat_context, extra_options) &
+ (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
msg = "";
}
- if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
- (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
+ if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
+ (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
{
- show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
- pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
+ show_controls(
+ pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
+ pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
+ msg);
msg = "";
}
@@ -7064,9 +7079,14 @@ pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
#endif
-/* Now pp points to the subject string. POSIX matching is only possible in
-8-bit mode, and it does not support timing or other fancy features. Some were
-checked at compile time, but we need to check the match-time settings here. */
+/* Now pp points to the subject string, but if null_subject was specified, set
+it to NULL to test PCRE2's behaviour. */
+
+if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL;
+
+/* POSIX matching is only possible in 8-bit mode, and it does not support
+timing or other fancy features. Some were checked at compile time, but we need
+to check the match-time settings here. */
#ifdef SUPPORT_PCRE2_8
if ((pat_patctl.control & CTL_POSIX) != 0)
@@ -7293,6 +7313,7 @@ if (dat_datctl.replacement[0] != 0)
uint8_t *pr;
uint8_t rbuffer[REPLACE_BUFFSIZE];
uint8_t nbuffer[REPLACE_BUFFSIZE];
+ uint8_t *rbptr;
uint32_t xoptions;
uint32_t emoption; /* External match option */
PCRE2_SIZE j, rlen, nsize, erroroffset;
@@ -7443,9 +7464,14 @@ if (dat_datctl.replacement[0] != 0)
PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
}
+ /* There is a special option to set the replacement to NULL in order to test
+ that case. */
+
+ rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL;
+
PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
dat_datctl.options|xoptions, match_data, use_dat_context,
- rbuffer, rlen, nbuffer, &nsize);
+ rbptr, rlen, nbuffer, &nsize);
if (rc < 0)
{
@@ -7632,12 +7658,16 @@ for (gmatched = 0;; gmatched++)
}
/* The result of the match is now in capcount. First handle a successful
- match. */
+ match. If pp was forced to be NULL (to test NULL handling) it will have been
+ treated as an empty string if the length was zero. So re-create that for
+ outputting. */
if (capcount >= 0)
{
int i;
+ if (pp == NULL) pp = (uint8_t *)"";
+
if (capcount > (int)oveccount) /* Check for lunatic return value */
{
fprintf(outfile,
@@ -8224,6 +8254,8 @@ printf(" -jit set default pattern modifier 'jit'\n");
printf(" -jitfast set default pattern modifier 'jitfast'\n");
printf(" -jitverify set default pattern modifier 'jitverify'\n");
printf(" -LM list pattern and subject modifiers, then exit\n");
+printf(" -LP list non-script properties, then exit\n");
+printf(" -LS list supported scripts, then exit\n");
printf(" -q quiet: do not output PCRE2 version number at start\n");
printf(" -pattern <s> set default pattern modifier fields\n");
printf(" -subject <s> set default subject modifier fields\n");
@@ -8404,6 +8436,167 @@ return 0;
}
+/*************************************************
+* Format one property/script list item *
+*************************************************/
+
+#ifdef SUPPORT_UNICODE
+static void
+format_list_item(int16_t *ff, char *buff, BOOL isscript)
+{
+int count;
+int maxi = 0;
+const char *maxs = "";
+size_t max = 0;
+
+for (count = 0; ff[count] >= 0; count++) {}
+
+/* Find the name to put first. For scripts, any 3-character name is chosen.
+For non-scripts, or if there is no 3-character name, take the longest. */
+
+for (int i = 0; ff[i] >= 0; i++)
+ {
+ const char *s = PRIV(utt_names) + ff[i];
+ size_t len = strlen(s);
+ if (isscript && len == 3)
+ {
+ maxi = i;
+ max = len;
+ maxs = s;
+ break;
+ }
+ else if (len > max)
+ {
+ max = len;
+ maxi = i;
+ maxs = s;
+ }
+ }
+
+strcpy(buff, maxs);
+buff += max;
+
+if (count > 1)
+ {
+ const char *sep = " (";
+ for (int i = 0; i < count; i++)
+ {
+ if (i == maxi) continue;
+ buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]);
+ sep = ", ";
+ }
+ (void)sprintf(buff, ")");
+ }
+}
+#endif /* SUPPORT_UNICODE */
+
+
+
+/*************************************************
+* Display scripts or properties *
+*************************************************/
+
+#define MAX_SYNONYMS 5
+
+static void
+display_properties(BOOL wantscripts)
+{
+#ifndef SUPPORT_UNICODE
+(void)wantscripts;
+printf("** This version of PCRE2 was compiled without Unicode support.\n");
+#else
+
+const char *typename;
+uint16_t seentypes[1024];
+uint16_t seenvalues[1024];
+int seencount = 0;
+int16_t found[256][MAX_SYNONYMS + 1];
+int fc = 0;
+int colwidth = 40;
+int n;
+
+if (wantscripts)
+ {
+ n = ucp_Script_Count;
+ typename = "SCRIPTS";
+ }
+else
+ {
+ n = ucp_Bprop_Count;
+ typename = "PROPERTIES";
+ }
+
+for (size_t i = 0; i < PRIV(utt_size); i++)
+ {
+ int k;
+ int m = 0;
+ int16_t *fv;
+ const ucp_type_table *t = PRIV(utt) + i;
+ unsigned int value = t->value;
+
+ if (wantscripts)
+ {
+ if (t->type != PT_SC && t->type != PT_SCX) continue;
+ }
+ else
+ {
+ if (t->type != PT_BOOL) continue;
+ }
+
+ for (k = 0; k < seencount; k++)
+ {
+ if (t->type == seentypes[k] && t->value == seenvalues[k]) break;
+ }
+ if (k < seencount) continue;
+
+ seentypes[seencount] = t->type;
+ seenvalues[seencount++] = t->value;
+
+ fv = found[fc++];
+ fv[m++] = t->name_offset;
+
+ for (size_t j = i + 1; j < PRIV(utt_size); j++)
+ {
+ const ucp_type_table *tt = PRIV(utt) + j;
+ if (tt->type != t->type || tt->value != value) continue;
+ if (m >= MAX_SYNONYMS)
+ printf("** Too many synonyms: %s ignored\n",
+ PRIV(utt_names) + tt->name_offset);
+ else fv[m++] = tt->name_offset;
+ }
+
+ fv[m] = -1;
+ }
+
+printf("-------------------------- SUPPORTED %s --------------------------\n\n",
+ typename);
+
+if (!wantscripts) printf(
+"This release of PCRE2 supports Unicode's general category properties such\n"
+"as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n"
+"and the following binary (yes/no) properties:\n\n");
+
+
+for (int k = 0; k < (n+1)/2; k++)
+ {
+ int x;
+ char buff1[128];
+ char buff2[128];
+
+ format_list_item(found[k], buff1, wantscripts);
+ x = k + (n+1)/2;
+ if (x < n) format_list_item(found[x], buff2, wantscripts);
+ else buff2[0] = 0;
+
+ x = printf("%s", buff1);
+ while (x++ < colwidth) printf(" ");
+ printf("%s\n", buff2);
+ }
+
+#endif /* SUPPORT_UNICODE */
+}
+
+
/*************************************************
* Display one modifier *
@@ -8415,6 +8608,11 @@ display_one_modifier(modstruct *m, BOOL for_pattern)
uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
'*' : ' ';
printf("%c%s", c, m->name);
+for (size_t i = 0; i < C1MODLISTCOUNT; i++)
+ {
+ if (strcmp(m->name, c1modlist[i].fullname) == 0)
+ printf(" (%c)", c1modlist[i].onechar);
+ }
}
@@ -8439,6 +8637,7 @@ display_selected_modifiers(BOOL for_pattern, const char *title)
uint32_t i, j;
uint32_t n = 0;
uint32_t list[MODLISTCOUNT];
+uint32_t extra[MODLISTCOUNT];
for (i = 0; i < MODLISTCOUNT; i++)
{
@@ -8458,6 +8657,7 @@ for (i = 0; i < MODLISTCOUNT; i++)
case MOD_CTM: /* Match context */
case MOD_DAT: /* Subject line */
+ case MOD_DATP: /* Subject line, OK for Perl-compatible test */
case MOD_PND: /* As PD, but not default pattern */
case MOD_PNDP: /* As PND, OK for Perl-compatible test */
is_pattern = FALSE;
@@ -8471,7 +8671,19 @@ for (i = 0; i < MODLISTCOUNT; i++)
break;
}
- if (for_pattern == is_pattern) list[n++] = i;
+ if (for_pattern == is_pattern)
+ {
+ extra[n] = 0;
+ for (size_t k = 0; k < C1MODLISTCOUNT; k++)
+ {
+ if (strcmp(m->name, c1modlist[k].fullname) == 0)
+ {
+ extra[n] += 4;
+ break;
+ }
+ }
+ list[n++] = i;
+ }
}
/* Now print from the list in two columns. */
@@ -8484,7 +8696,7 @@ for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
display_one_modifier(m, for_pattern);
if (j < n)
{
- uint32_t k = 27 - strlen(m->name);
+ uint32_t k = 27 - strlen(m->name) - extra[i];
while (k-- > 0) printf(" ");
display_one_modifier(modlist + list[j], for_pattern);
}
@@ -8626,6 +8838,22 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
goto EXIT;
}
+ /* List properties and exit */
+
+ if (strcmp(arg, "-LP") == 0)
+ {
+ display_properties(FALSE);
+ goto EXIT;
+ }
+
+ /* List scripts and exit */
+
+ if (strcmp(arg, "-LS") == 0)
+ {
+ display_properties(TRUE);
+ goto EXIT;
+ }
+
/* Display and/or set return code for configuration options. */
if (strcmp(arg, "-C") == 0)
@@ -8689,7 +8917,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
else if (strcmp(arg, "-S") == 0 && argc > 2 &&
((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
{
-#if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
+#if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS)
fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
exit(1);
#else