diff options
author | Matthew Vernon <matthew@debian.org> | 2022-04-25 17:03:54 +0100 |
---|---|---|
committer | Matthew Vernon <matthew@debian.org> | 2022-04-25 17:03:54 +0100 |
commit | 1ffedb43e1dc27250034147144e60b7984c66af0 (patch) | |
tree | 26a9c9e041ee79beef3538f168012c16be5e2c87 /src/pcre2test.c | |
parent | ae779ff9f07bbc2fa39c75c7a331aaeb3a43a159 (diff) |
New upstream version 10.40
Diffstat (limited to 'src/pcre2test.c')
-rw-r--r-- | src/pcre2test.c | 286 |
1 files changed, 257 insertions, 29 deletions
diff --git a/src/pcre2test.c b/src/pcre2test.c index 84987d7..ea52a20 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam. Written by Philip Hazel Original code Copyright (c) 1997-2012 University of Cambridge - Rewritten code Copyright (c) 2016-2021 University of Cambridge + Rewritten code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -82,11 +82,7 @@ from www.cbttape.org. */ /* #define DEBUG_SHOW_MALLOC_ADDRESSES */ -/* Both libreadline and libedit are optionally supported. The user-supplied -original patch uses readline/readline.h for libedit, but in at least one system -it is installed as editline/readline.h, so the configuration code now looks for -that first, falling back to readline/readline.h. */ - +/* Both libreadline and libedit are optionally supported */ #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT) #if defined(SUPPORT_LIBREADLINE) #include <readline/readline.h> @@ -94,8 +90,15 @@ that first, falling back to readline/readline.h. */ #else #if defined(HAVE_EDITLINE_READLINE_H) #include <editline/readline.h> +#elif defined(HAVE_EDIT_READLINE_READLINE_H) +#include <edit/readline/readline.h> #else -#include <readline/readline.h> +#include <readline.h> +/* GNU readline defines this macro but libedit doesn't, if that ever changes +this needs to be updated or the build could break */ +#ifdef RL_VERSION_MAJOR +#include <history.h> +#endif #endif #endif #endif @@ -441,6 +444,7 @@ enum { MOD_CTC, /* Applies to a compile context */ MOD_PAT, /* Applies to a pattern */ MOD_PATP, /* Ditto, OK for Perl test */ MOD_DAT, /* Applies to a data line */ + MOD_DATP, /* Ditto, OK for Perl test */ MOD_PD, /* Applies to a pattern or a data line */ MOD_PDP, /* As MOD_PD, OK for Perl test */ MOD_PND, /* As MOD_PD, but not for a default pattern */ @@ -516,6 +520,8 @@ so many of them that they are split into two fields. */ #define CTL2_CALLOUT_NO_WHERE 0x00000200u #define CTL2_CALLOUT_EXTRA 0x00000400u #define CTL2_ALLVECTOR 0x00000800u +#define CTL2_NULL_SUBJECT 0x00001000u +#define CTL2_NULL_REPLACEMENT 0x00002000u #define CTL2_NL_SET 0x40000000u /* Informational */ #define CTL2_BSR_SET 0x80000000u /* Informational */ @@ -698,7 +704,7 @@ static modstruct modlist[] = { { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, - { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) }, + { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) }, { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, @@ -706,6 +712,8 @@ static modstruct modlist[] = { { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) }, { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) }, { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) }, + { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) }, + { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) }, { "offset", MOD_DAT, MOD_INT, 0, DO(offset) }, { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) }, @@ -767,7 +775,7 @@ static modstruct modlist[] = { PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL) #define POSIX_SUPPORTED_MATCH_CONTROLS (CTL_AFTERTEXT|CTL_ALLAFTERTEXT) -#define POSIX_SUPPORTED_MATCH_CONTROLS2 (0) +#define POSIX_SUPPORTED_MATCH_CONTROLS2 (CTL2_NULL_SUBJECT) /* Control bits that are not ignored with 'push'. */ @@ -3147,7 +3155,7 @@ Returns: 0 on success, with the length updated to the number of 16-bit OR -3 if a value > 0xffff is encountered when not in UTF mode */ -static PCRE2_SIZE +static int to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr) { uint16_t *pp; @@ -3234,7 +3242,7 @@ Returns: 0 on success, with the length updated to the number of 32-bit OR -2 if a value > 0x10ffff is encountered in UTF mode */ -static PCRE2_SIZE +static int to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr) { uint32_t *pp; @@ -3579,6 +3587,7 @@ if (restrict_for_perl_test) switch(m->which) { case MOD_PNDP: case MOD_PATP: + case MOD_DATP: case MOD_PDP: break; @@ -3600,7 +3609,8 @@ switch (m->which) else if (ctx == CTX_DAT) field = PTR(dat_context); break; - case MOD_DAT: /* Data line modifier */ + case MOD_DAT: /* Data line modifier */ + case MOD_DATP: /* Allowed for Perl test */ if (dctl != NULL) field = dctl; break; @@ -4102,7 +4112,7 @@ Returns: nothing static void show_controls(uint32_t controls, uint32_t controls2, const char *before) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", @@ -4132,6 +4142,8 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s ((controls & CTL_MEMORY) != 0)? " memory" : "", ((controls2 & CTL2_NL_SET) != 0)? " newline" : "", ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "", + ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "", + ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "", ((controls & CTL_POSIX) != 0)? " posix" : "", ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "", ((controls & CTL_PUSH) != 0)? " push" : "", @@ -5481,24 +5493,27 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0) { show_compile_options( - pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, ""); + pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS), + msg, ""); msg = ""; } if ((FLD(pat_context, extra_options) & - ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0) + (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0) { show_compile_extra_options( - FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS, - msg, ""); + FLD(pat_context, extra_options) & + (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, ""); msg = ""; } - if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 || - (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0) + if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 || + (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0) { - show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS, - pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg); + show_controls( + pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS), + pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2), + msg); msg = ""; } @@ -7064,9 +7079,14 @@ pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c); VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c)); #endif -/* Now pp points to the subject string. POSIX matching is only possible in -8-bit mode, and it does not support timing or other fancy features. Some were -checked at compile time, but we need to check the match-time settings here. */ +/* Now pp points to the subject string, but if null_subject was specified, set +it to NULL to test PCRE2's behaviour. */ + +if ((dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) pp = NULL; + +/* POSIX matching is only possible in 8-bit mode, and it does not support +timing or other fancy features. Some were checked at compile time, but we need +to check the match-time settings here. */ #ifdef SUPPORT_PCRE2_8 if ((pat_patctl.control & CTL_POSIX) != 0) @@ -7293,6 +7313,7 @@ if (dat_datctl.replacement[0] != 0) uint8_t *pr; uint8_t rbuffer[REPLACE_BUFFSIZE]; uint8_t nbuffer[REPLACE_BUFFSIZE]; + uint8_t *rbptr; uint32_t xoptions; uint32_t emoption; /* External match option */ PCRE2_SIZE j, rlen, nsize, erroroffset; @@ -7443,9 +7464,14 @@ if (dat_datctl.replacement[0] != 0) PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */ } + /* There is a special option to set the replacement to NULL in order to test + that case. */ + + rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL; + PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, dat_datctl.options|xoptions, match_data, use_dat_context, - rbuffer, rlen, nbuffer, &nsize); + rbptr, rlen, nbuffer, &nsize); if (rc < 0) { @@ -7632,12 +7658,16 @@ for (gmatched = 0;; gmatched++) } /* The result of the match is now in capcount. First handle a successful - match. */ + match. If pp was forced to be NULL (to test NULL handling) it will have been + treated as an empty string if the length was zero. So re-create that for + outputting. */ if (capcount >= 0) { int i; + if (pp == NULL) pp = (uint8_t *)""; + if (capcount > (int)oveccount) /* Check for lunatic return value */ { fprintf(outfile, @@ -8224,6 +8254,8 @@ printf(" -jit set default pattern modifier 'jit'\n"); printf(" -jitfast set default pattern modifier 'jitfast'\n"); printf(" -jitverify set default pattern modifier 'jitverify'\n"); printf(" -LM list pattern and subject modifiers, then exit\n"); +printf(" -LP list non-script properties, then exit\n"); +printf(" -LS list supported scripts, then exit\n"); printf(" -q quiet: do not output PCRE2 version number at start\n"); printf(" -pattern <s> set default pattern modifier fields\n"); printf(" -subject <s> set default subject modifier fields\n"); @@ -8404,6 +8436,167 @@ return 0; } +/************************************************* +* Format one property/script list item * +*************************************************/ + +#ifdef SUPPORT_UNICODE +static void +format_list_item(int16_t *ff, char *buff, BOOL isscript) +{ +int count; +int maxi = 0; +const char *maxs = ""; +size_t max = 0; + +for (count = 0; ff[count] >= 0; count++) {} + +/* Find the name to put first. For scripts, any 3-character name is chosen. +For non-scripts, or if there is no 3-character name, take the longest. */ + +for (int i = 0; ff[i] >= 0; i++) + { + const char *s = PRIV(utt_names) + ff[i]; + size_t len = strlen(s); + if (isscript && len == 3) + { + maxi = i; + max = len; + maxs = s; + break; + } + else if (len > max) + { + max = len; + maxi = i; + maxs = s; + } + } + +strcpy(buff, maxs); +buff += max; + +if (count > 1) + { + const char *sep = " ("; + for (int i = 0; i < count; i++) + { + if (i == maxi) continue; + buff += sprintf(buff, "%s%s", sep, PRIV(utt_names) + ff[i]); + sep = ", "; + } + (void)sprintf(buff, ")"); + } +} +#endif /* SUPPORT_UNICODE */ + + + +/************************************************* +* Display scripts or properties * +*************************************************/ + +#define MAX_SYNONYMS 5 + +static void +display_properties(BOOL wantscripts) +{ +#ifndef SUPPORT_UNICODE +(void)wantscripts; +printf("** This version of PCRE2 was compiled without Unicode support.\n"); +#else + +const char *typename; +uint16_t seentypes[1024]; +uint16_t seenvalues[1024]; +int seencount = 0; +int16_t found[256][MAX_SYNONYMS + 1]; +int fc = 0; +int colwidth = 40; +int n; + +if (wantscripts) + { + n = ucp_Script_Count; + typename = "SCRIPTS"; + } +else + { + n = ucp_Bprop_Count; + typename = "PROPERTIES"; + } + +for (size_t i = 0; i < PRIV(utt_size); i++) + { + int k; + int m = 0; + int16_t *fv; + const ucp_type_table *t = PRIV(utt) + i; + unsigned int value = t->value; + + if (wantscripts) + { + if (t->type != PT_SC && t->type != PT_SCX) continue; + } + else + { + if (t->type != PT_BOOL) continue; + } + + for (k = 0; k < seencount; k++) + { + if (t->type == seentypes[k] && t->value == seenvalues[k]) break; + } + if (k < seencount) continue; + + seentypes[seencount] = t->type; + seenvalues[seencount++] = t->value; + + fv = found[fc++]; + fv[m++] = t->name_offset; + + for (size_t j = i + 1; j < PRIV(utt_size); j++) + { + const ucp_type_table *tt = PRIV(utt) + j; + if (tt->type != t->type || tt->value != value) continue; + if (m >= MAX_SYNONYMS) + printf("** Too many synonyms: %s ignored\n", + PRIV(utt_names) + tt->name_offset); + else fv[m++] = tt->name_offset; + } + + fv[m] = -1; + } + +printf("-------------------------- SUPPORTED %s --------------------------\n\n", + typename); + +if (!wantscripts) printf( +"This release of PCRE2 supports Unicode's general category properties such\n" +"as Lu (upper case letter), bi-directional properties such as Bidi_Class,\n" +"and the following binary (yes/no) properties:\n\n"); + + +for (int k = 0; k < (n+1)/2; k++) + { + int x; + char buff1[128]; + char buff2[128]; + + format_list_item(found[k], buff1, wantscripts); + x = k + (n+1)/2; + if (x < n) format_list_item(found[x], buff2, wantscripts); + else buff2[0] = 0; + + x = printf("%s", buff1); + while (x++ < colwidth) printf(" "); + printf("%s\n", buff2); + } + +#endif /* SUPPORT_UNICODE */ +} + + /************************************************* * Display one modifier * @@ -8415,6 +8608,11 @@ display_one_modifier(modstruct *m, BOOL for_pattern) uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))? '*' : ' '; printf("%c%s", c, m->name); +for (size_t i = 0; i < C1MODLISTCOUNT; i++) + { + if (strcmp(m->name, c1modlist[i].fullname) == 0) + printf(" (%c)", c1modlist[i].onechar); + } } @@ -8439,6 +8637,7 @@ display_selected_modifiers(BOOL for_pattern, const char *title) uint32_t i, j; uint32_t n = 0; uint32_t list[MODLISTCOUNT]; +uint32_t extra[MODLISTCOUNT]; for (i = 0; i < MODLISTCOUNT; i++) { @@ -8458,6 +8657,7 @@ for (i = 0; i < MODLISTCOUNT; i++) case MOD_CTM: /* Match context */ case MOD_DAT: /* Subject line */ + case MOD_DATP: /* Subject line, OK for Perl-compatible test */ case MOD_PND: /* As PD, but not default pattern */ case MOD_PNDP: /* As PND, OK for Perl-compatible test */ is_pattern = FALSE; @@ -8471,7 +8671,19 @@ for (i = 0; i < MODLISTCOUNT; i++) break; } - if (for_pattern == is_pattern) list[n++] = i; + if (for_pattern == is_pattern) + { + extra[n] = 0; + for (size_t k = 0; k < C1MODLISTCOUNT; k++) + { + if (strcmp(m->name, c1modlist[k].fullname) == 0) + { + extra[n] += 4; + break; + } + } + list[n++] = i; + } } /* Now print from the list in two columns. */ @@ -8484,7 +8696,7 @@ for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++) display_one_modifier(m, for_pattern); if (j < n) { - uint32_t k = 27 - strlen(m->name); + uint32_t k = 27 - strlen(m->name) - extra[i]; while (k-- > 0) printf(" "); display_one_modifier(modlist + list[j], for_pattern); } @@ -8626,6 +8838,22 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) goto EXIT; } + /* List properties and exit */ + + if (strcmp(arg, "-LP") == 0) + { + display_properties(FALSE); + goto EXIT; + } + + /* List scripts and exit */ + + if (strcmp(arg, "-LS") == 0) + { + display_properties(TRUE); + goto EXIT; + } + /* Display and/or set return code for configuration options. */ if (strcmp(arg, "-C") == 0) @@ -8689,7 +8917,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) else if (strcmp(arg, "-S") == 0 && argc > 2 && ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) { -#if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS) +#if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS) fprintf(stderr, "pcre2test: -S is not supported on this OS\n"); exit(1); #else |