summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Vernon <matthew@debian.org>2018-10-26 19:26:32 +0100
committerMatthew Vernon <matthew@debian.org>2018-10-26 19:26:32 +0100
commit1cab70503159c32de523a1762614b6829687a116 (patch)
tree26068f08ea492a5d12216d014a1a58372c12d360
parent39c4b070d68976779cdb3f2a9f886de962870a37 (diff)
parentb03dbaae48971b62fe6ce174a8dfbbcaf1314d7e (diff)
Merge tag '10.32'
Upstream version 10.32
-rw-r--r--CMakeLists.txt25
-rw-r--r--ChangeLog206
-rw-r--r--HACKING33
-rw-r--r--LICENCE14
-rw-r--r--Makefile.am4
-rw-r--r--Makefile.in7
-rw-r--r--NEWS31
-rw-r--r--NON-AUTOTOOLS-BUILD23
-rw-r--r--README39
-rwxr-xr-xRunGrepTest36
-rwxr-xr-xRunTest4
-rw-r--r--RunTest.bat2
-rw-r--r--aclocal.m480
-rwxr-xr-xconfigure105
-rw-r--r--configure.ac66
-rw-r--r--doc/html/NON-AUTOTOOLS-BUILD.txt23
-rw-r--r--doc/html/README.txt39
-rw-r--r--doc/html/index.html2
-rw-r--r--doc/html/pcre2.html25
-rw-r--r--doc/html/pcre2_code_free.html3
-rw-r--r--doc/html/pcre2_compile.html2
-rw-r--r--doc/html/pcre2_compile_context_free.html3
-rw-r--r--doc/html/pcre2_convert_context_free.html3
-rw-r--r--doc/html/pcre2_converted_pattern_free.html3
-rw-r--r--doc/html/pcre2_dfa_match.html6
-rw-r--r--doc/html/pcre2_general_context_free.html3
-rw-r--r--doc/html/pcre2_jit_stack_assign.html11
-rw-r--r--doc/html/pcre2_jit_stack_create.html4
-rw-r--r--doc/html/pcre2_jit_stack_free.html5
-rw-r--r--doc/html/pcre2_match_context_free.html3
-rw-r--r--doc/html/pcre2_match_data_free.html7
-rw-r--r--doc/html/pcre2_pattern_info.html2
-rw-r--r--doc/html/pcre2_serialize_decode.html9
-rw-r--r--doc/html/pcre2_serialize_encode.html11
-rw-r--r--doc/html/pcre2_serialize_free.html7
-rw-r--r--doc/html/pcre2_serialize_get_number_of_codes.html4
-rw-r--r--doc/html/pcre2_set_glob_separator.html2
-rw-r--r--doc/html/pcre2_substring_free.html2
-rw-r--r--doc/html/pcre2_substring_list_free.html3
-rw-r--r--doc/html/pcre2api.html291
-rw-r--r--doc/html/pcre2build.html59
-rw-r--r--doc/html/pcre2callout.html16
-rw-r--r--doc/html/pcre2compat.html40
-rw-r--r--doc/html/pcre2convert.html7
-rw-r--r--doc/html/pcre2grep.html139
-rw-r--r--doc/html/pcre2jit.html28
-rw-r--r--doc/html/pcre2limits.html18
-rw-r--r--doc/html/pcre2matching.html6
-rw-r--r--doc/html/pcre2pattern.html737
-rw-r--r--doc/html/pcre2perform.html40
-rw-r--r--doc/html/pcre2posix.html8
-rw-r--r--doc/html/pcre2serialize.html27
-rw-r--r--doc/html/pcre2syntax.html60
-rw-r--r--doc/html/pcre2test.html112
-rw-r--r--doc/html/pcre2unicode.html16
-rw-r--r--doc/index.html.src2
-rw-r--r--doc/pcre2.326
-rw-r--r--doc/pcre2.txt4245
-rw-r--r--doc/pcre2_code_free.35
-rw-r--r--doc/pcre2_compile.32
-rw-r--r--doc/pcre2_compile_context_free.35
-rw-r--r--doc/pcre2_convert_context_free.35
-rw-r--r--doc/pcre2_converted_pattern_free.35
-rw-r--r--doc/pcre2_dfa_match.38
-rw-r--r--doc/pcre2_general_context_free.35
-rw-r--r--doc/pcre2_jit_stack_assign.312
-rw-r--r--doc/pcre2_jit_stack_create.34
-rw-r--r--doc/pcre2_jit_stack_free.37
-rw-r--r--doc/pcre2_match.32
-rw-r--r--doc/pcre2_match_context_free.35
-rw-r--r--doc/pcre2_match_data_free.39
-rw-r--r--doc/pcre2_pattern_info.32
-rw-r--r--doc/pcre2_serialize_decode.311
-rw-r--r--doc/pcre2_serialize_encode.313
-rw-r--r--doc/pcre2_serialize_free.39
-rw-r--r--doc/pcre2_serialize_get_number_of_codes.36
-rw-r--r--doc/pcre2_set_compile_extra_options.32
-rw-r--r--doc/pcre2_set_glob_separator.32
-rw-r--r--doc/pcre2_set_heap_limit.32
-rw-r--r--doc/pcre2_substring_free.34
-rw-r--r--doc/pcre2_substring_list_free.35
-rw-r--r--doc/pcre2api.3285
-rw-r--r--doc/pcre2build.361
-rw-r--r--doc/pcre2callout.318
-rw-r--r--doc/pcre2compat.342
-rw-r--r--doc/pcre2convert.39
-rw-r--r--doc/pcre2grep.198
-rw-r--r--doc/pcre2grep.txt127
-rw-r--r--doc/pcre2jit.330
-rw-r--r--doc/pcre2limits.318
-rw-r--r--doc/pcre2matching.36
-rw-r--r--doc/pcre2pattern.3736
-rw-r--r--doc/pcre2perform.341
-rw-r--r--doc/pcre2posix.38
-rw-r--r--doc/pcre2serialize.328
-rw-r--r--doc/pcre2syntax.360
-rw-r--r--doc/pcre2test.1110
-rw-r--r--doc/pcre2test.txt579
-rw-r--r--doc/pcre2unicode.317
-rwxr-xr-xperltest.sh75
-rw-r--r--src/config.h.generic32
-rw-r--r--src/config.h.in26
-rw-r--r--src/dftables.c27
-rw-r--r--src/pcre2.h.generic30
-rw-r--r--src/pcre2.h.in30
-rw-r--r--src/pcre2_auto_possess.c7
-rw-r--r--src/pcre2_chartables.c.dist50
-rw-r--r--src/pcre2_compile.c336
-rw-r--r--src/pcre2_convert.c12
-rw-r--r--src/pcre2_dfa_match.c271
-rw-r--r--src/pcre2_error.c15
-rw-r--r--src/pcre2_extuni.c8
-rw-r--r--src/pcre2_find_bracket.c3
-rw-r--r--src/pcre2_internal.h131
-rw-r--r--src/pcre2_intmodedep.h17
-rw-r--r--src/pcre2_jit_compile.c37
-rw-r--r--src/pcre2_jit_test.c9
-rw-r--r--src/pcre2_maketables.c9
-rw-r--r--src/pcre2_match.c54
-rw-r--r--src/pcre2_pattern_info.c3
-rw-r--r--src/pcre2_printint.c3
-rw-r--r--src/pcre2_serialize.c22
-rw-r--r--src/pcre2_string_utils.c38
-rw-r--r--src/pcre2_study.c6
-rw-r--r--src/pcre2_substitute.c47
-rw-r--r--src/pcre2_tables.c378
-rw-r--r--src/pcre2_ucd.c6727
-rw-r--r--src/pcre2_ucp.h48
-rw-r--r--src/pcre2grep.c149
-rw-r--r--src/pcre2posix.c10
-rw-r--r--src/pcre2test.c275
-rw-r--r--src/sljit/sljitConfigInternal.h29
-rw-r--r--src/sljit/sljitExecAllocator.c9
-rw-r--r--src/sljit/sljitLir.c10
-rw-r--r--src/sljit/sljitLir.h20
-rw-r--r--src/sljit/sljitNativeARM_64.c250
-rw-r--r--src/sljit/sljitNativeARM_T2_32.c112
-rw-r--r--src/sljit/sljitNativeMIPS_32.c4
-rw-r--r--src/sljit/sljitNativeMIPS_64.c4
-rw-r--r--src/sljit/sljitNativeMIPS_common.c48
-rw-r--r--src/sljit/sljitNativeX86_32.c79
-rw-r--r--src/sljit/sljitNativeX86_64.c120
-rw-r--r--src/sljit/sljitNativeX86_common.c17
-rw-r--r--testdata/grepinput6
-rw-r--r--testdata/grepoutput34
-rw-r--r--testdata/testinput190
-rw-r--r--testdata/testinput1513
-rw-r--r--testdata/testinput179
-rw-r--r--testdata/testinput184
-rw-r--r--testdata/testinput278
-rw-r--r--testdata/testinput226
-rw-r--r--testdata/testinput449
-rw-r--r--testdata/testinput565
-rw-r--r--testdata/testinput613
-rw-r--r--testdata/testoutput1127
-rw-r--r--testdata/testoutput1513
-rw-r--r--testdata/testoutput179
-rw-r--r--testdata/testoutput1815
-rw-r--r--testdata/testoutput2214
-rw-r--r--testdata/testoutput22-168
-rw-r--r--testdata/testoutput22-328
-rw-r--r--testdata/testoutput22-88
-rw-r--r--testdata/testoutput473
-rw-r--r--testdata/testoutput5101
-rw-r--r--testdata/testoutput616
-rw-r--r--testdata/testoutput8-16-41022
-rw-r--r--testdata/testoutputEBC3
167 files changed, 12493 insertions, 8215 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bde940a..1a2c95b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -80,6 +80,8 @@
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
# 2017-04-08 PH added HEAP_LIMIT
# 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support
+# 2018-06-19 PH added checks for stdint.h and inttypes.h
+# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC
PROJECT(PCRE2 C)
@@ -113,6 +115,18 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
+IF(HAVE_INTTYPES_H)
+ SET(PCRE2_HAVE_INTTYPES_H 1)
+ELSE(HAVE_INTTYPES_H)
+ SET(PCRE2_HAVE_INTTYPES_H 0)
+ENDIF(HAVE_INTTYPES_H)
+
+IF(HAVE_STDINT_H)
+ SET(PCRE2_HAVE_STDINT_H 1)
+ELSE(HAVE_STDINT_H)
+ SET(PCRE2_HAVE_STDINT_H 0)
+ENDIF(HAVE_STDINT_H)
+
CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
@@ -146,7 +160,7 @@ SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
- "Default limit on heap memory (kilobytes). See HEAP_LIMIT in config.h.in for details.")
+ "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.")
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
@@ -594,6 +608,13 @@ IF(PCRE2_BUILD_TESTS)
SET(PCRE2TEST_SOURCES src/pcre2test.c)
+ IF(MSVC)
+ # This is needed to avoid a stack overflow error in the standard tests. The
+ # flag should be indicated with a forward-slash instead of a hyphen, but
+ # then CMake treats it as a file path.
+ SET(PCRE2TEST_LINKER_FLAGS -STACK:2500000)
+ ENDIF(MSVC)
+
ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES})
SET(targets ${targets} pcre2test)
IF(PCRE2_BUILD_PCRE2_8)
@@ -605,7 +626,7 @@ IF(PCRE2_BUILD_TESTS)
IF(PCRE2_BUILD_PCRE2_32)
LIST(APPEND PCRE2TEST_LIBS pcre2-32)
ENDIF(PCRE2_BUILD_PCRE2_32)
- TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS})
+ TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS})
IF(PCRE2_SUPPORT_JIT)
ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c)
diff --git a/ChangeLog b/ChangeLog
index 7f520bf..06b69f8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,202 @@ Change Log for PCRE2
--------------------
+Version 10.32-RC1 10-September-2018
+-----------------------------------
+
+1. When matching using the the REG_STARTEND feature of the POSIX API with a
+non-zero starting offset, unset capturing groups with lower numbers than a
+group that did capture something were not being correctly returned as "unset"
+(that is, with offset values of -1).
+
+2. When matching using the POSIX API, pcre2test used to omit listing unset
+groups altogether. Now it shows those that come before any actual captures as
+"<unset>", as happens for non-POSIX matching.
+
+3. Running "pcre2test -C" always stated "\R matches CR, LF, or CRLF only",
+whatever the build configuration was. It now correctly says "\R matches all
+Unicode newlines" in the default case when --enable-bsr-anycrlf has not been
+specified. Similarly, running "pcre2test -C bsr" never produced the result
+ANY.
+
+4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing
+multi-code-unit characters caused bad behaviour and possibly a crash. This
+issue was fixed for other kinds of repeat in release 10.20 by change 19, but
+repeating character classes were overlooked.
+
+5. pcre2grep now supports the inclusion of binary zeros in patterns that are
+read from files via the -f option.
+
+6. A small fix to pcre2grep to avoid compiler warnings for -Wformat-overflow=2.
+
+7. Added --enable-jit=auto support to configure.ac.
+
+8. Added some dummy variables to the heapframe structure in 16-bit and 32-bit
+modes for the benefit of m68k, where pointers can be 16-bit aligned. The
+dummies force 32-bit alignment and this ensures that the structure is a
+multiple of PCRE2_SIZE, a requirement that is tested at compile time. In other
+architectures, alignment requirements take care of this automatically.
+
+9. When returning an error from pcre2_pattern_convert(), ensure the error
+offset is set zero for early errors.
+
+10. A number of patches for Windows support from Daniel Richard G:
+
+ (a) List of error numbers in Runtest.bat corrected (it was not the same as in
+ Runtest).
+
+ (b) pcre2grep snprintf() workaround as used elsewhere in the tree.
+
+ (c) Support for non-C99 snprintf() that returns -1 in the overflow case.
+
+11. Minor tidy of pcre2_dfa_match() code.
+
+12. Refactored pcre2_dfa_match() so that the internal recursive calls no longer
+use the stack for local workspace and local ovectors. Instead, an initial block
+of stack is reserved, but if this is insufficient, heap memory is used. The
+heap limit parameter now applies to pcre2_dfa_match().
+
+13. If a "find limits" test of DFA matching in pcre2test resulted in too many
+matches for the ovector, no matches were displayed.
+
+14. Removed an occurrence of ctrl/Z from test 6 because Windows treats it as
+EOF. The test looks to have come from a fuzzer.
+
+15. If PCRE2 was built with a default match limit a lot greater than the
+default default of 10 000 000, some JIT tests of the match limit no longer
+failed. All such tests now set 10 000 000 as the upper limit.
+
+16. Another Windows related patch for pcregrep to ensure that WIN32 is
+undefined under Cygwin.
+
+17. Test for the presence of stdint.h and inttypes.h in configure and CMake and
+include whichever exists (stdint preferred) instead of unconditionally
+including stdint. This makes life easier for old and non-standard systems.
+
+18. Further changes to improve portability, especially to old and or non-
+standard systems:
+
+ (a) Put all printf arguments in RunGrepTest into single, not double, quotes,
+ and use \0 not \x00 for binary zero.
+
+ (b) Avoid the use of C++ (i.e. BCPL) // comments.
+
+ (c) Parameterize the use of %zu in pcre2test to make it like %td. For both of
+ these now, if using MSVC or a standard C before C99, %lu is used with a
+ cast if necessary.
+
+19. Applied a contributed patch to CMakeLists.txt to increase the stack size
+when linking pcre2test with MSVC. This gets rid of a stack overflow error in
+the standard set of tests.
+
+20. Output a warning in pcre2test when ignoring the "altglobal" modifier when
+it is given with the "replace" modifier.
+
+21. In both pcre2test and pcre2_substitute(), with global matching, a pattern
+that matched an empty string, but never at the starting match offset, was not
+handled in a Perl-compatible way. The pattern /(<?=\G.)/ is an example of such
+a pattern. Because \G is in a lookbehind assertion, there has to be a
+"bumpalong" before there can be a match. The automatic "advance by one
+character after an empty string match" rule is therefore inappropriate. A more
+complicated algorithm has now been implemented.
+
+22. When checking to see if a lookbehind is of fixed length, lookaheads were
+correctly ignored, but qualifiers on lookaheads were not being ignored, leading
+to an incorrect "lookbehind assertion is not fixed length" error.
+
+23. The VERSION condition test was reading fractional PCRE2 version numbers
+such as the 04 in 10.04 incorrectly and hence giving wrong results.
+
+24. Updated to Unicode version 11.0.0. As well as the usual addition of new
+scripts and characters, this involved re-jigging the grapheme break property
+algorithm because Unicode has changed the way emojis are handled.
+
+25. Fixed an obscure bug that struck when there were two atomic groups not
+separated by something with a backtracking point. There could be an incorrect
+backtrack into the first of the atomic groups. A complicated example is
+/(?>a(*:1))(?>b)(*SKIP:1)x|.*/ matched against "abc", where the *SKIP
+shouldn't find a MARK (because is in an atomic group), but it did.
+
+26. Upgraded the perltest.sh script: (1) #pattern lines can now be used to set
+a list of modifiers for all subsequent patterns - only those that the script
+recognizes are meaningful; (2) #subject lines can be used to set or unset a
+default "mark" modifier; (3) Unsupported #command lines give a warning when
+they are ignored; (4) Mark data is output only if the "mark" modifier is
+present.
+
+27. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
+
+28. A (*MARK) name was not being passed back for positive assertions that were
+terminated by (*ACCEPT).
+
+29. Add support for \N{U+dddd}, but only in Unicode mode.
+
+30. Add support for (?^) for unsetting all imnsx options.
+
+31. The PCRE2_EXTENDED (/x) option only ever discarded space characters whose
+code point was less than 256 and that were recognized by the lookup table
+generated by pcre2_maketables(), which uses isspace() to identify white space.
+Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085,
+U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by
+Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl.
+
+32. In certain circumstances, option settings within patterns were not being
+correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly
+matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the
+end of its group during the parse process, but without another setting such as
+(?m) the compile phase got it right.) This bug was introduced by the
+refactoring in release 10.23.
+
+33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to
+define memmove() as function call to bcopy(). This hasn't been tested for a
+long time because in pcre2test the result of memmove() was being used, whereas
+bcopy() doesn't return a result. This feature is now refactored always to call
+an emulation function when there is no memmove(). The emulation makes use of
+bcopy() when available.
+
+34. When serializing a pattern, set the memctl, executable_jit, and tables
+fields (that is, all the fields that contain pointers) to zeros so that the
+result of serializing is always the same. These fields are re-set when the
+pattern is deserialized.
+
+35. In a pattern such as /[^\x{100}-\x{ffff}]*[\x80-\xff]/ which has a repeated
+negative class with no characters less than 0x100 followed by a positive class
+with only characters less than 0x100, the first class was incorrectly being
+auto-possessified, causing incorrect match failures.
+
+36. Removed the character type bit ctype_meta, which dates from PCRE1 and is
+not used in PCRE2.
+
+37. Tidied up unnecessarily complicated macros used in the escapes table.
+
+38. Since 10.21, the new testoutput8-16-4 file has accidentally been omitted
+from distribution tarballs, owing to a typo in Makefile.am which had
+testoutput8-16-3 twice. Now fixed.
+
+39. If the only branch in a conditional subpattern was anchored, the whole
+subpattern was treated as anchored, when it should not have been, since the
+assumed empty second branch cannot be anchored. Demonstrated by test patterns
+such as /(?(1)^())b/ or /(?(?=^))b/.
+
+40. A repeated conditional subpattern that could match an empty string was
+always assumed to be unanchored. Now it it checked just like any other
+repeated conditional subpattern, and can be found to be anchored if the minimum
+quantifier is one or more. I can't see much use for a repeated anchored
+pattern, but the behaviour is now consistent.
+
+41. Minor addition to pcre2_jit_compile.c to avoid static analyzer complaint
+(for an event that could never occur but you had to have external information
+to know that).
+
+42. If before the first match in a file that was being searched by pcre2grep
+there was a line that was sufficiently long to cause the input buffer to be
+expanded, the variable holding the location of the end of the previous match
+was being adjusted incorrectly, and could cause an overflow warning from a code
+sanitizer. However, as the value is used only to print pending "after" lines
+when the next match is reached (and there are no such lines in this case) this
+bug could do no damage.
+
+
Version 10.31 12-February-2018
------------------------------
@@ -304,8 +500,8 @@ tests to improve coverage.
31. If more than one of "push", "pushcopy", or "pushtablescopy" were set in
pcre2test, a crash could occur.
-32. Make -bigstack in RunTest allocate a 64Mb stack (instead of 16 MB) so that
-all the tests can run with clang's sanitizing options.
+32. Make -bigstack in RunTest allocate a 64MiB stack (instead of 16MiB) so
+that all the tests can run with clang's sanitizing options.
33. Implement extra compile options in the compile context and add the first
one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
@@ -898,9 +1094,9 @@ to the same code as '.' when PCRE2_DOTALL is set).
40. Fix two clang compiler warnings in pcre2test when only one code unit width
is supported.
-41. Upgrade RunTest to automatically re-run test 2 with a large (64M) stack if
-it fails when running the interpreter with a 16M stack (and if changing the
-stack size via pcre2test is possible). This avoids having to manually set a
+41. Upgrade RunTest to automatically re-run test 2 with a large (64MiB) stack
+if it fails when running the interpreter with a 16MiB stack (and if changing
+the stack size via pcre2test is possible). This avoids having to manually set a
large stack size when testing with clang.
42. Fix register overwite in JIT when SSE2 acceleration is enabled.
diff --git a/HACKING b/HACKING
index d727add..f99616a 100644
--- a/HACKING
+++ b/HACKING
@@ -256,6 +256,7 @@ The following are followed by a length element, then a number of character code
values (which should match with the length):
META_MARK (*MARK:xxxx)
+META_COMMIT_ARG )*COMMIT:xxxx)
META_PRUNE_ARG (*PRUNE:xxx)
META_SKIP_ARG (*SKIP:xxxx)
META_THEN_ARG (*THEN:xxxx)
@@ -348,7 +349,7 @@ The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
others) may be changed in the middle of patterns by items such as (?i). Their
processing is handled entirely at compile time by generating different opcodes
for the different settings. The runtime functions do not need to keep track of
-an options state.
+an option's state.
PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
are tracked and processed during the parsing pre-pass. The others are handled
@@ -370,7 +371,7 @@ default value for LINK_SIZE is 2, except for the 32-bit library, where it can
only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
and the 16-bit library can be compiled to use 4-byte values, though this
impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
-necessary only when patterns whose compiled length is greater than 64K code
+necessary only when patterns whose compiled length is greater than 65535 code
units are going to be processed. When a LINK_SIZE value uses more than one code
unit, the most significant unit is first.
@@ -382,7 +383,7 @@ that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode
Opcodes with no following data
------------------------------
-These items are all just one unit long
+These items are all just one unit long:
OP_END end of pattern
OP_ANY match any one character other than newline
@@ -430,14 +431,22 @@ character). Another use is for [^] when empty classes are permitted
(PCRE2_ALLOW_EMPTY_CLASS is set).
-Backtracking control verbs with optional data
----------------------------------------------
+Backtracking control verbs
+--------------------------
-(*THEN) without an argument generates the opcode OP_THEN and no following data.
-OP_MARK is followed by the mark name, preceded by a length in one code unit,
-and followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with
-arguments, the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used,
-with the name following in the same format as OP_MARK.
+Verbs with no arguments generate opcodes with no following data (as listed
+in the section above).
+
+(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
+length in one code unit, and followed by a binary zero. The name length is
+limited by the size of the code unit.
+
+(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and
+(*MARK:NAME)(*FAIL) respectively.
+
+For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes
+OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the
+name following in the same format as for OP_MARK.
Matching literal characters
@@ -764,7 +773,7 @@ OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting
bracket from the start of the whole pattern. OP_RECURSE is also used for
"subroutine" calls, even though they are not strictly a recursion. Up till
release 10.30 recursions were treated as atomic groups, making them
-incompatible with Perl (but PCRE had then well before Perl did). From 10.30,
+incompatible with Perl (but PCRE had them well before Perl did). From 10.30,
backtracking into recursions is supported.
Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only
@@ -814,4 +823,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
opcode are the correct length, in order to catch updating errors.
Philip Hazel
-21 April 2017
+20 July 2018
diff --git a/LICENCE b/LICENCE
index bfe3c8d..b0f8804 100644
--- a/LICENCE
+++ b/LICENCE
@@ -4,11 +4,11 @@ PCRE2 LICENCE
PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
-Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
-specified below, with one exemption for certain binary redistributions. The
-documentation for PCRE2, supplied in the "doc" directory, is distributed under
-the same terms as the software itself. The data in the testdata directory is
-not copyrighted and is in the public domain.
+Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
+licence, as specified below, with one exemption for certain binary
+redistributions. The documentation for PCRE2, supplied in the "doc" directory,
+is distributed under the same terms as the software itself. The data in the
+testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to
@@ -35,7 +35,7 @@ PCRE2 JUST-IN-TIME COMPILATION SUPPORT
Written by: Zoltan Herczeg
Email local part: hzmester
-Emain domain: freemail.hu
+Email domain: freemail.hu
Copyright(c) 2010-2018 Zoltan Herczeg
All rights reserved.
@@ -46,7 +46,7 @@ STACK-LESS JUST-IN-TIME COMPILER
Written by: Zoltan Herczeg
Email local part: hzmester
-Emain domain: freemail.hu
+Email domain: freemail.hu
Copyright(c) 2009-2018 Zoltan Herczeg
All rights reserved.
diff --git a/Makefile.am b/Makefile.am
index d0efa12..a4bcdf6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -231,7 +231,7 @@ noinst_PROGRAMS =
# and 'make maintainer-clean'.
CLEANFILES =
-DISTCLEANFILES = src/config.h.in~ config.h
+DISTCLEANFILES = src/config.h.in~ config.h pcre2.h.generic
MAINTAINERCLEANFILES =
# Additional files to bundle with the distribution, over and above what
@@ -657,7 +657,7 @@ EXTRA_DIST += \
testdata/testoutput7 \
testdata/testoutput8-16-2 \
testdata/testoutput8-16-3 \
- testdata/testoutput8-16-3 \
+ testdata/testoutput8-16-4 \
testdata/testoutput8-32-2 \
testdata/testoutput8-32-3 \
testdata/testoutput8-32-4 \
diff --git a/Makefile.in b/Makefile.in
index 47ec217..597b171 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -767,6 +767,8 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PCRE2_DATE = @PCRE2_DATE@
+PCRE2_HAVE_INTTYPES_H = @PCRE2_HAVE_INTTYPES_H@
+PCRE2_HAVE_STDINT_H = @PCRE2_HAVE_STDINT_H@
PCRE2_MAJOR = @PCRE2_MAJOR@
PCRE2_MINOR = @PCRE2_MINOR@
PCRE2_PRERELEASE = @PCRE2_PRERELEASE@
@@ -1064,7 +1066,8 @@ CLEANFILES = src/pcre2_chartables.c testSinput test3input test3output \
test3outputA test3outputB testtry teststdout teststderr \
teststderrgrep testtemp1grep testtemp2grep testtrygrep \
testNinputgrep
-DISTCLEANFILES = src/config.h.in~ config.h $(am__append_36)
+DISTCLEANFILES = src/config.h.in~ config.h pcre2.h.generic \
+ $(am__append_36)
MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic
# Additional files to bundle with the distribution, over and above what
@@ -1129,7 +1132,7 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \
testdata/testoutput3A testdata/testoutput3B \
testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \
testdata/testoutput7 testdata/testoutput8-16-2 \
- testdata/testoutput8-16-3 testdata/testoutput8-16-3 \
+ testdata/testoutput8-16-3 testdata/testoutput8-16-4 \
testdata/testoutput8-32-2 testdata/testoutput8-32-3 \
testdata/testoutput8-32-4 testdata/testoutput8-8-2 \
testdata/testoutput8-8-3 testdata/testoutput8-8-4 \
diff --git a/NEWS b/NEWS
index 0093eb7..94345b3 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,33 @@
News about PCRE2 releases
-------------------------
+
+Version 10.32 10-September-2018
+-------------------------------
+
+This is another mainly bugfix and tidying release with a few minor
+enhancements. These are the main ones:
+
+1. pcre2grep now supports the inclusion of binary zeros in patterns that are
+read from files via the -f option.
+
+2. ./configure now supports --enable-jit=auto, which automatically enables JIT
+if the hardware supports it.
+
+3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for
+local workspace and local ovectors. Instead, an initial block of stack is
+reserved, but if this is insufficient, heap memory is used. The heap limit
+parameter now applies to pcre2_dfa_match().
+
+4. Updated to Unicode version 11.0.0.
+
+5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
+
+6. Added support for \N{U+dddd}, but only in Unicode mode.
+
+7. Added support for (?^) to unset all imnsx options.
+
+
Version 10.31 12-February-2018
------------------------------
@@ -31,7 +58,7 @@ remembering backtracking positions. This makes --disable-stack-for-recursion a
NOOP. The new implementation allows backtracking into recursive group calls in
patterns, making it more compatible with Perl, and also fixes some other
previously hard-to-do issues. For patterns that have a lot of backtracking, the
-heap is now used, and there is explicit limit on the amount, settable by
+heap is now used, and there is an explicit limit on the amount, settable by
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained,
but is renamed as "depth limit" (though the old names remain for
compatibility).
@@ -53,7 +80,7 @@ also supported.
5. Additional compile options in the compile context are now available, and the
first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and
-PCRE2_EXTRA_BAD_ESCAPE_IS LITERAL.
+PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
6. The newline type PCRE2_NEWLINE_NUL is now available.
diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD
index 0775794..b742ed3 100644
--- a/NON-AUTOTOOLS-BUILD
+++ b/NON-AUTOTOOLS-BUILD
@@ -10,6 +10,7 @@ This document contains the following sections:
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE2 on Windows with CMake
+ Building PCRE2 on Windows with Visual Studio
Testing with RunTest.bat
Building PCRE2 on native z/OS and z/VM
@@ -126,7 +127,7 @@ can skip ahead to the CMake section.
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
these yourself.
- Not also that the pcre2_fuzzsupport.c file contains special code that is
+ Note also that the pcre2_fuzzsupport.c file contains special code that is
useful to those who want to run fuzzing tests on the PCRE2 library. Unless
you are doing that, you can ignore it.
@@ -185,7 +186,7 @@ can skip ahead to the CMake section.
STACK SIZE IN WINDOWS ENVIRONMENTS
-Prior to release 10.30 the default system stack size of 1Mb in some Windows
+Prior to release 10.30 the default system stack size of 1MiB in some Windows
environments caused issues with some tests. This should no longer be the case
for 10.30 and later releases.
@@ -330,6 +331,18 @@ cache can be deleted by selecting "File > Delete Cache".
available for review in Testing\Temporary under your build dir.
+BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
+
+The code currently cannot be compiled without a stdint.h header, which is
+available only in relatively recent versions of Visual Studio. However, this
+portable and permissively-licensed implementation of the header worked without
+issue:
+
+ http://www.azillionmonkeys.com/qed/pstdint.h
+
+Just rename it and drop it into the top level of the build tree.
+
+
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
@@ -382,6 +395,6 @@ Everything in that location, source and executable, is in EBCDIC and native
z/OS file formats. The port provides an API for LE languages such as COBOL and
for the z/OS and z/VM versions of the Rexx languages.
-===============================
-Last Updated: 13 September 2017
-===============================
+===========================
+Last Updated: 19 April 2018
+===========================
diff --git a/README b/README
index 52859a9..2eb621b 100644
--- a/README
+++ b/README
@@ -171,10 +171,12 @@ library. They are also documented in the pcre2build man page.
give large performance improvements on certain platforms, add --enable-jit to
the "configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
- will be a compile time error. If you are running under SELinux you may also
- want to add --enable-jit-sealloc, which enables the use of an execmem
- allocator in JIT that is compatible with SELinux. This has no effect if JIT
- is not enabled.
+ will be a compile time error. If in doubt, use --enable-jit=auto, which
+ enables JIT only if the current hardware is supported.
+
+. If you are enabling JIT under SELinux you may also want to add
+ --enable-jit-sealloc, which enables the use of an execmem allocator in JIT
+ that is compatible with SELinux. This has no effect if JIT is not enabled.
. If you do not want to make use of the default support for UTF-8 Unicode
character strings in the 8-bit library, UTF-16 Unicode character strings in
@@ -239,9 +241,11 @@ library. They are also documented in the pcre2build man page.
discussion in the pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking
- during a matching process, which indirectly limits the amount of heap memory
- that is used. This also has a default of ten million, which is essentially
- "unlimited". You can change the default by setting, for example,
+ (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
+ matching process, which indirectly limits the amount of heap memory that is
+ used, and in the case of pcre2_dfa_match() the amount of stack as well. This
+ counter also has a default of ten million, which is essentially "unlimited".
+ You can change the default by setting, for example,
--with-match-limit-depth=5000
@@ -249,16 +253,17 @@ library. They are also documented in the pcre2build man page.
pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
- the pcre2_match() interpreter:
+ the pcre2_match() and pcre2_dfa_match() interpreters:
--with-heap-limit=500
- The units are kilobytes. This limit does not apply when the JIT optimization
- (which has its own memory control features) is used. There is more discussion
- on the pcre2api man page (search for pcre2_set_heap_limit).
+ The units are kibibytes (units of 1024 bytes). This limit does not apply when
+ the JIT optimization (which has its own memory control features) is used.
+ There is more discussion on the pcre2api man page (search for
+ pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around
- 64K bytes. You can increase this by adding --with-link-size=3 to the
+ 64 kibibytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets
to different parts of the compiled pattern. In the 16-bit library,
--with-link-size=3 is the same as --with-link-size=4, which (in both
@@ -315,10 +320,10 @@ library. They are also documented in the pcre2build man page.
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
you add --disable-pcre2grep-jit to the "configure" command.
-. On non-Windows sytems there is support for calling external scripts during
- matching in the pcre2grep command via PCRE2's callout facility with string
- arguments. This support can be disabled by adding --disable-pcre2grep-callout
- to the "configure" command.
+. There is support for calling external programs during matching in the
+ pcre2grep command, using PCRE2's callout facility with string arguments. This
+ support can be disabled by adding --disable-pcre2grep-callout to the
+ "configure" command.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
@@ -883,4 +888,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 12 September 2017
+Last updated: 17 June 2018
diff --git a/RunGrepTest b/RunGrepTest
index a26f677..74ff4c1 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -4,6 +4,12 @@
# itself. What we are checking here is the file handling and options that are
# supported by pcre2grep. This script must be run in the build directory.
+# CODING CONVENTIONS:
+# * Put printf arguments in single, not double quotes to avoid unwanted
+# escaping.
+# * Use \0 for binary zero in printf, not \x0, for the benefit of older
+# versions.
+
# Set the C locale, so that sort(1) behaves predictably.
LC_ALL=C
@@ -600,7 +606,7 @@ echo "---------------------------- Test 118 -----------------------------" >>tes
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 119 -----------------------------" >>testtrygrep
-printf "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
+printf '123\n456\n789\n---abc\ndef\nxyz\n---\n' >testNinputgrep
$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
@@ -631,7 +637,7 @@ echo "RC=$?" >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 125 -----------------------------" >>testtrygrep
-printf "abcd\n" >testNinputgrep
+printf 'abcd\n' >testNinputgrep
$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep
@@ -641,6 +647,12 @@ echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
+echo "---------------------------- Test 126 -----------------------------" >>testtrygrep
+printf 'Next line pattern has binary zero\nABC\0XYZ\n' >testtemp1grep
+printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep
+$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
# Now compare the results.
@@ -681,36 +693,36 @@ fi
# starts with a hyphen. These tests are run in the build directory.
echo "Testing pcre2grep newline settings"
-printf "abc\rdef\r\nghi\njkl" >testNinputgrep
+printf 'abc\rdef\r\nghi\njkl' >testNinputgrep
-printf "%c--------------------------- Test N1 ------------------------------\r\n" - >testtrygrep
+printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep
$valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-printf "%c--------------------------- Test N2 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-printf "%c--------------------------- Test N3 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep
pattern=`printf 'def\rjkl'`
$valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep
-printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N4 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep
-printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-printf "%c--------------------------- Test N6 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-# It seems inpossible to handle NUL characters easily in Solaris (aka SunOS).
+# It seems impossible to handle NUL characters easily in Solaris (aka SunOS).
# The version of sed explicitly doesn't like them. For the moment, we just
# don't run this test under SunOS. Fudge the output so that the comparison
# works. A similar problem has also been reported for MacOS (Darwin).
-printf "%c--------------------------- Test N7 ------------------------------\r\n" - >>testtrygrep
+printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
uname=`uname`
if [ "$uname" != "SunOS" -a "$uname" != "Darwin" ] ; then
- printf "abc\0def" >testNinputgrep
+ printf 'abc\0def' >testNinputgrep
$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
echo "" >>testtrygrep
else
diff --git a/RunTest b/RunTest
index bc912da..39f04d4 100755
--- a/RunTest
+++ b/RunTest
@@ -500,7 +500,7 @@ for bmode in "$test8" "$test16" "$test32"; do
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
- $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,200 >>testtry
+ $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,200 >>testtry
checkresult $? 2 "$opt"
fi
done
@@ -843,7 +843,7 @@ for bmode in "$test8" "$test16" "$test32"; do
checkresult $? 24 ""
fi
- # UTF pattern converson tests
+ # UTF pattern conversion tests
if [ "$do25" = yes ] ; then
echo $title25
diff --git a/RunTest.bat b/RunTest.bat
index 0cd8bcc..9474434 100644
--- a/RunTest.bat
+++ b/RunTest.bat
@@ -263,7 +263,7 @@ if errorlevel 1 (
set failed="yes"
goto :eof
) else if [%1]==[2] (
- %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -63,-62,-2,-1,0,100,188,189,190,191 >>%2%bits%\%testoutput%
+ %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,200 >>%2%bits%\%testoutput%
)
set type=
diff --git a/aclocal.m4 b/aclocal.m4
index d88a48b..cc10b26 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -21,7 +21,7 @@ If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*-
-# serial 12 (pkg-config-0.29.2)
+# serial 11 (pkg-config-0.29.1)
dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
@@ -63,7 +63,7 @@ dnl
dnl See the "Since" comment for each macro you use to see what version
dnl of the macros you require.
m4_defun([PKG_PREREQ],
-[m4_define([PKG_MACROS_VERSION], [0.29.2])
+[m4_define([PKG_MACROS_VERSION], [0.29.1])
m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
[m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
])dnl PKG_PREREQ
@@ -164,7 +164,7 @@ AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
pkg_failed=no
-AC_MSG_CHECKING([for $2])
+AC_MSG_CHECKING([for $1])
_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
_PKG_CONFIG([$1][_LIBS], [libs], [$2])
@@ -174,11 +174,11 @@ and $1[]_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details.])
if test $pkg_failed = yes; then
- AC_MSG_RESULT([no])
+ AC_MSG_RESULT([no])
_PKG_SHORT_ERRORS_SUPPORTED
if test $_pkg_short_errors_supported = yes; then
$1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
- else
+ else
$1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
@@ -195,7 +195,7 @@ installed software in a non-standard prefix.
_PKG_TEXT])[]dnl
])
elif test $pkg_failed = untried; then
- AC_MSG_RESULT([no])
+ AC_MSG_RESULT([no])
m4_default([$4], [AC_MSG_FAILURE(
[The pkg-config script could not be found or is too old. Make sure it
is in your PATH or set the PKG_CONFIG environment variable to the full
@@ -296,6 +296,74 @@ AS_VAR_COPY([$1], [pkg_cv_][$1])
AS_VAR_IF([$1], [""], [$5], [$4])dnl
])dnl PKG_CHECK_VAR
+dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES,
+dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND],
+dnl [DESCRIPTION], [DEFAULT])
+dnl ------------------------------------------
+dnl
+dnl Prepare a "--with-" configure option using the lowercase
+dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and
+dnl PKG_CHECK_MODULES in a single macro.
+AC_DEFUN([PKG_WITH_MODULES],
+[
+m4_pushdef([with_arg], m4_tolower([$1]))
+
+m4_pushdef([description],
+ [m4_default([$5], [build with ]with_arg[ support])])
+
+m4_pushdef([def_arg], [m4_default([$6], [auto])])
+m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes])
+m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no])
+
+m4_case(def_arg,
+ [yes],[m4_pushdef([with_without], [--without-]with_arg)],
+ [m4_pushdef([with_without],[--with-]with_arg)])
+
+AC_ARG_WITH(with_arg,
+ AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),,
+ [AS_TR_SH([with_]with_arg)=def_arg])
+
+AS_CASE([$AS_TR_SH([with_]with_arg)],
+ [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)],
+ [auto],[PKG_CHECK_MODULES([$1],[$2],
+ [m4_n([def_action_if_found]) $3],
+ [m4_n([def_action_if_not_found]) $4])])
+
+m4_popdef([with_arg])
+m4_popdef([description])
+m4_popdef([def_arg])
+
+])dnl PKG_WITH_MODULES
+
+dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
+dnl [DESCRIPTION], [DEFAULT])
+dnl -----------------------------------------------
+dnl
+dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES
+dnl check._[VARIABLE-PREFIX] is exported as make variable.
+AC_DEFUN([PKG_HAVE_WITH_MODULES],
+[
+PKG_WITH_MODULES([$1],[$2],,,[$3],[$4])
+
+AM_CONDITIONAL([HAVE_][$1],
+ [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"])
+])dnl PKG_HAVE_WITH_MODULES
+
+dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
+dnl [DESCRIPTION], [DEFAULT])
+dnl ------------------------------------------------------
+dnl
+dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after
+dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make
+dnl and preprocessor variable.
+AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES],
+[
+PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4])
+
+AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"],
+ [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])])
+])dnl PKG_HAVE_DEFINE_WITH_MODULES
+
# Copyright (C) 2002-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
diff --git a/configure b/configure
index 7064b11..6091d75 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for PCRE2 10.31.
+# Generated by GNU Autoconf 2.69 for PCRE2 10.32.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -587,8 +587,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='PCRE2'
PACKAGE_TARNAME='pcre2'
-PACKAGE_VERSION='10.31'
-PACKAGE_STRING='PCRE2 10.31'
+PACKAGE_VERSION='10.32'
+PACKAGE_STRING='PCRE2 10.32'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -677,6 +677,8 @@ WITH_PCRE2_16_FALSE
WITH_PCRE2_16_TRUE
WITH_PCRE2_8_FALSE
WITH_PCRE2_8_TRUE
+PCRE2_HAVE_INTTYPES_H
+PCRE2_HAVE_STDINT_H
enable_pcre2_32
enable_pcre2_16
enable_pcre2_8
@@ -1411,7 +1413,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures PCRE2 10.31 to adapt to many kinds of systems.
+\`configure' configures PCRE2 10.32 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1481,7 +1483,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of PCRE2 10.31:";;
+ short | recursive ) echo "Configuration of PCRE2 10.32:";;
esac
cat <<\_ACEOF
@@ -1560,7 +1562,7 @@ Optional Packages:
--with-link-size=N internal link size (2, 3, or 4 allowed; default=2)
--with-parens-nest-limit=N
nested parentheses limit (default=250)
- --with-heap-limit=N default limit on heap memory (kilobytes,
+ --with-heap-limit=N default limit on heap memory (kibibytes,
default=20000000)
--with-match-limit=N default limit on internal looping (default=10000000)
--with-match-limit-depth=N
@@ -1657,7 +1659,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-PCRE2 configure 10.31
+PCRE2 configure 10.32
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2152,7 +2154,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by PCRE2 $as_me 10.31, which was
+It was created by PCRE2 $as_me 10.32, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -3016,7 +3018,7 @@ fi
# Define the identity of the package.
PACKAGE='pcre2'
- VERSION='10.31'
+ VERSION='10.32'
cat >>confdefs.h <<_ACEOF
@@ -12976,9 +12978,9 @@ _ACEOF
# Versioning
PCRE2_MAJOR="10"
-PCRE2_MINOR="31"
+PCRE2_MINOR="32"
PCRE2_PRERELEASE=""
-PCRE2_DATE="2018-02-12"
+PCRE2_DATE="2018-09-10"
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
then
@@ -13077,6 +13079,32 @@ else
fi
+# This code enables JIT if the hardware supports it.
+if test "$enable_jit" = "auto"; then
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #define SLJIT_CONFIG_AUTO 1
+ #include "src/sljit/sljitConfigInternal.h"
+ #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+ #error unsupported
+ #endif
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ enable_jit=yes
+else
+ enable_jit=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ echo checking for JIT support on this hardware... $enable_jit
+fi
+
# Handle --enable-jit-sealloc (disabled by default)
# Check whether --enable-jit-sealloc was given.
if test "${enable_jit_sealloc+set}" = set; then :
@@ -13585,6 +13613,37 @@ fi
done
+for ac_header in stdint.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "stdint.h" "ac_cv_header_stdint_h" "$ac_includes_default"
+if test "x$ac_cv_header_stdint_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_STDINT_H 1
+_ACEOF
+ PCRE2_HAVE_STDINT_H=1
+else
+ PCRE2_HAVE_STDINT_H=0
+fi
+
+done
+
+for ac_header in inttypes.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "inttypes.h" "ac_cv_header_inttypes_h" "$ac_includes_default"
+if test "x$ac_cv_header_inttypes_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_INTTYPES_H 1
+_ACEOF
+ PCRE2_HAVE_INTTYPES_H=1
+else
+ PCRE2_HAVE_INTTYPES_H=0
+fi
+
+done
+
+
+
+
# Conditional compilation
if test "x$enable_pcre2_8" = "xyes"; then
WITH_PCRE2_8_TRUE=
@@ -14905,16 +14964,16 @@ esac
# are m4 variables, assigned above.
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
- $NO_UNDEFINED -version-info 7:0:7"
+ $NO_UNDEFINED -version-info 7:1:7"
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
- $NO_UNDEFINED -version-info 7:0:7"
+ $NO_UNDEFINED -version-info 7:1:7"
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
- $NO_UNDEFINED -version-info 7:0:7"
+ $NO_UNDEFINED -version-info 7:1:7"
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
- $NO_UNDEFINED -version-info 2:0:0"
+ $NO_UNDEFINED -version-info 2:1:0"
@@ -14923,7 +14982,7 @@ EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
# When we run 'make distcheck', use these arguments. Turning off compiler
# optimization makes it run faster.
-DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit --enable-utf"
+DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit"
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
@@ -15114,8 +15173,8 @@ $as_echo "no" >&6; }
fi
pkg_failed=no
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for valgrind" >&5
-$as_echo_n "checking for valgrind... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for VALGRIND" >&5
+$as_echo_n "checking for VALGRIND... " >&6; }
if test -n "$VALGRIND_CFLAGS"; then
pkg_cv_VALGRIND_CFLAGS="$VALGRIND_CFLAGS"
@@ -15155,7 +15214,7 @@ fi
if test $pkg_failed = yes; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
@@ -15182,7 +15241,7 @@ Alternatively, you may set the environment variables VALGRIND_CFLAGS
and VALGRIND_LIBS to avoid the need to call pkg-config.
See the pkg-config man page for more details." "$LINENO" 5
elif test $pkg_failed = untried; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
@@ -15957,7 +16016,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by PCRE2 $as_me 10.31, which was
+This file was extended by PCRE2 $as_me 10.32, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -16023,7 +16082,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-PCRE2 config.status 10.31
+PCRE2 config.status 10.32
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -17774,7 +17833,7 @@ $PACKAGE-$VERSION configuration summary:
Rebuild char tables ................ : ${enable_rebuild_chartables}
Internal link size ................. : ${with_link_size}
Nested parentheses limit ........... : ${with_parens_nest_limit}
- Heap limit ......................... : ${with_heap_limit} kilobytes
+ Heap limit ......................... : ${with_heap_limit} kibibytes
Match limit ........................ : ${with_match_limit}
Match depth limit .................. : ${with_match_limit_depth}
Build shared libs .................. : ${enable_shared}
diff --git a/configure.ac b/configure.ac
index 2164e4c..c43ae38 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9,18 +9,18 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [31])
+m4_define(pcre2_minor, [32])
m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2018-02-12])
+m4_define(pcre2_date, [2018-09-10])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
# Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre2_8_version, [7:0:7])
-m4_define(libpcre2_16_version, [7:0:7])
-m4_define(libpcre2_32_version, [7:0:7])
-m4_define(libpcre2_posix_version, [2:0:0])
+m4_define(libpcre2_8_version, [7:1:7])
+m4_define(libpcre2_16_version, [7:1:7])
+m4_define(libpcre2_32_version, [7:1:7])
+m4_define(libpcre2_posix_version, [2:1:0])
AC_PREREQ(2.57)
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
@@ -143,6 +143,18 @@ AC_ARG_ENABLE(jit,
[enable Just-In-Time compiling support]),
, enable_jit=no)
+# This code enables JIT if the hardware supports it.
+if test "$enable_jit" = "auto"; then
+ AC_LANG(C)
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+ #define SLJIT_CONFIG_AUTO 1
+ #include "src/sljit/sljitConfigInternal.h"
+ #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+ #error unsupported
+ #endif]])], enable_jit=yes, enable_jit=no)
+ echo checking for JIT support on this hardware... $enable_jit
+fi
+
# Handle --enable-jit-sealloc (disabled by default)
AC_ARG_ENABLE(jit-sealloc,
AS_HELP_STRING([--enable-jit-sealloc],
@@ -276,7 +288,7 @@ AC_ARG_WITH(parens-nest-limit,
# Handle --with-heap-limit
AC_ARG_WITH(heap-limit,
AS_HELP_STRING([--with-heap-limit=N],
- [default limit on heap memory (kilobytes, default=20000000)]),
+ [default limit on heap memory (kibibytes, default=20000000)]),
, with_heap_limit=20000000)
# Handle --with-match-limit=N
@@ -423,10 +435,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
-Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
-(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
-macros are listed as a commented #undef in config.h.generic. Macros such as
-MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
+Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
+defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
+such macros are listed as a commented #undef in config.h.generic. Macros such
+as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
@@ -439,6 +451,11 @@ AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h)
AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1])
AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1])
+AC_CHECK_HEADERS([stdint.h], [PCRE2_HAVE_STDINT_H=1], [PCRE2_HAVE_STDINT_H=0])
+AC_CHECK_HEADERS([inttypes.h], [PCRE2_HAVE_INTTYPES_H=1], [PCRE2_HAVE_INTTYPES_H=0])
+AC_SUBST([PCRE2_HAVE_STDINT_H])
+AC_SUBST([PCRE2_HAVE_INTTYPES_H])
+
# Conditional compilation
AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes")
AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes")
@@ -694,8 +711,8 @@ fi
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
The value of LINK_SIZE determines the number of bytes used to store
links as offsets within the compiled regex. The default is 2, which
- allows for compiled patterns up to 64K long. This covers the vast
- majority of cases. However, PCRE2 can also be compiled to use 3 or 4
+ allows for compiled patterns up to 65535 code units long. This covers the
+ vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4
bytes instead. This allows for longer patterns in extreme cases.])
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
@@ -706,10 +723,11 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
- matching attempt. There is a runtime interface for setting a different limit.
- The limit exists in order to catch runaway regular expressions that take for
- ever to determine that they do not match. The default is set very large so
- that it does not accidentally catch legitimate cases.])
+ matching attempt. The value is also used to limit a loop counter in
+ pcre2_dfa_match(). There is a runtime interface for setting a different
+ limit. The limit exists in order to catch runaway regular expressions that
+ take for ever to determine that they do not match. The default is set very
+ large so that it does not accidentally catch legitimate cases.])
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
@@ -733,11 +751,15 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
the maximum amount of heap memory that is used. The value of
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it must
be less than the value of MATCH_LIMIT. The default is to use the same value
- as MATCH_LIMIT. There is a runtime method for setting a different limit.])
+ as MATCH_LIMIT. There is a runtime method for setting a different limit. In
+ the case of pcre2_dfa_match(), this limit controls the depth of the internal
+ nested function calls that are used for pattern recursions, lookarounds, and
+ atomic groups.])
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
- This limits the amount of memory that pcre2_match() may use while matching
- a pattern. The value is in kilobytes.])
+ This limits the amount of memory that may be used while matching
+ a pattern. It applies to both pcre2_match() and pcre2_dfa_match(). It does
+ not apply to JIT matching. The value is in kibibytes (units of 1024 bytes).])
AC_DEFINE([MAX_NAME_SIZE], [32], [
This limit is parameterized just in case anybody ever wants to
@@ -817,7 +839,7 @@ AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
# When we run 'make distcheck', use these arguments. Turning off compiler
# optimization makes it run faster.
-DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit --enable-utf"
+DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit"
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
@@ -1000,7 +1022,7 @@ $PACKAGE-$VERSION configuration summary:
Rebuild char tables ................ : ${enable_rebuild_chartables}
Internal link size ................. : ${with_link_size}
Nested parentheses limit ........... : ${with_parens_nest_limit}
- Heap limit ......................... : ${with_heap_limit} kilobytes
+ Heap limit ......................... : ${with_heap_limit} kibibytes
Match limit ........................ : ${with_match_limit}
Match depth limit .................. : ${with_match_limit_depth}
Build shared libs .................. : ${enable_shared}
diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt
index 0775794..b742ed3 100644
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@@ -10,6 +10,7 @@ This document contains the following sections:
Calling conventions in Windows environments
Comments about Win32 builds
Building PCRE2 on Windows with CMake
+ Building PCRE2 on Windows with Visual Studio
Testing with RunTest.bat
Building PCRE2 on native z/OS and z/VM
@@ -126,7 +127,7 @@ can skip ahead to the CMake section.
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
these yourself.
- Not also that the pcre2_fuzzsupport.c file contains special code that is
+ Note also that the pcre2_fuzzsupport.c file contains special code that is
useful to those who want to run fuzzing tests on the PCRE2 library. Unless
you are doing that, you can ignore it.
@@ -185,7 +186,7 @@ can skip ahead to the CMake section.
STACK SIZE IN WINDOWS ENVIRONMENTS
-Prior to release 10.30 the default system stack size of 1Mb in some Windows
+Prior to release 10.30 the default system stack size of 1MiB in some Windows
environments caused issues with some tests. This should no longer be the case
for 10.30 and later releases.
@@ -330,6 +331,18 @@ cache can be deleted by selecting "File > Delete Cache".
available for review in Testing\Temporary under your build dir.
+BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
+
+The code currently cannot be compiled without a stdint.h header, which is
+available only in relatively recent versions of Visual Studio. However, this
+portable and permissively-licensed implementation of the header worked without
+issue:
+
+ http://www.azillionmonkeys.com/qed/pstdint.h
+
+Just rename it and drop it into the top level of the build tree.
+
+
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
@@ -382,6 +395,6 @@ Everything in that location, source and executable, is in EBCDIC and native
z/OS file formats. The port provides an API for LE languages such as COBOL and
for the z/OS and z/VM versions of the Rexx languages.
-===============================
-Last Updated: 13 September 2017
-===============================
+===========================
+Last Updated: 19 April 2018
+===========================
diff --git a/doc/html/README.txt b/doc/html/README.txt
index 52859a9..2eb621b 100644
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@@ -171,10 +171,12 @@ library. They are also documented in the pcre2build man page.
give large performance improvements on certain platforms, add --enable-jit to
the "configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
- will be a compile time error. If you are running under SELinux you may also
- want to add --enable-jit-sealloc, which enables the use of an execmem
- allocator in JIT that is compatible with SELinux. This has no effect if JIT
- is not enabled.
+ will be a compile time error. If in doubt, use --enable-jit=auto, which
+ enables JIT only if the current hardware is supported.
+
+. If you are enabling JIT under SELinux you may also want to add
+ --enable-jit-sealloc, which enables the use of an execmem allocator in JIT
+ that is compatible with SELinux. This has no effect if JIT is not enabled.
. If you do not want to make use of the default support for UTF-8 Unicode
character strings in the 8-bit library, UTF-16 Unicode character strings in
@@ -239,9 +241,11 @@ library. They are also documented in the pcre2build man page.
discussion in the pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking
- during a matching process, which indirectly limits the amount of heap memory
- that is used. This also has a default of ten million, which is essentially
- "unlimited". You can change the default by setting, for example,
+ (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a
+ matching process, which indirectly limits the amount of heap memory that is
+ used, and in the case of pcre2_dfa_match() the amount of stack as well. This
+ counter also has a default of ten million, which is essentially "unlimited".
+ You can change the default by setting, for example,
--with-match-limit-depth=5000
@@ -249,16 +253,17 @@ library. They are also documented in the pcre2build man page.
pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
- the pcre2_match() interpreter:
+ the pcre2_match() and pcre2_dfa_match() interpreters:
--with-heap-limit=500
- The units are kilobytes. This limit does not apply when the JIT optimization
- (which has its own memory control features) is used. There is more discussion
- on the pcre2api man page (search for pcre2_set_heap_limit).
+ The units are kibibytes (units of 1024 bytes). This limit does not apply when
+ the JIT optimization (which has its own memory control features) is used.
+ There is more discussion on the pcre2api man page (search for
+ pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around
- 64K bytes. You can increase this by adding --with-link-size=3 to the
+ 64 kibibytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets
to different parts of the compiled pattern. In the 16-bit library,
--with-link-size=3 is the same as --with-link-size=4, which (in both
@@ -315,10 +320,10 @@ library. They are also documented in the pcre2build man page.
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
you add --disable-pcre2grep-jit to the "configure" command.
-. On non-Windows sytems there is support for calling external scripts during
- matching in the pcre2grep command via PCRE2's callout facility with string
- arguments. This support can be disabled by adding --disable-pcre2grep-callout
- to the "configure" command.
+. There is support for calling external programs during matching in the
+ pcre2grep command, using PCRE2's callout facility with string arguments. This
+ support can be disabled by adding --disable-pcre2grep-callout to the
+ "configure" command.
. The pcre2grep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
@@ -883,4 +888,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 12 September 2017
+Last updated: 17 June 2018
diff --git a/doc/html/index.html b/doc/html/index.html
index b9393d9..82b43c3 100644
--- a/doc/html/index.html
+++ b/doc/html/index.html
@@ -141,7 +141,7 @@ in the library.
<td>&nbsp;&nbsp;Free a general context</td></tr>
<tr><td><a href="pcre2_get_error_message.html">pcre2_get_error_message</a></td>
- <td>&nbsp;&nbsp;Free study data</td></tr>
+ <td>&nbsp;&nbsp;Get textual error message for error number</td></tr>
<tr><td><a href="pcre2_get_mark.html">pcre2_get_mark</a></td>
<td>&nbsp;&nbsp;Get a (*MARK) name</td></tr>
diff --git a/doc/html/pcre2.html b/doc/html/pcre2.html
index b61c579..bc588c4 100644
--- a/doc/html/pcre2.html
+++ b/doc/html/pcre2.html
@@ -23,12 +23,19 @@ please consult the man page, in case the conversion went wrong.
<P>
PCRE2 is the name used for a revised API for the PCRE library, which is a set
of functions, written in C, that implement regular expression pattern matching
-using the same syntax and semantics as Perl, with just a few differences. Some
-features that appeared in Python and the original PCRE before they appeared in
-Perl are also available using the Python syntax. There is also some support for
-one or two .NET and Oniguruma syntax items, and there are options for
-requesting some minor changes that give better ECMAScript (aka JavaScript)
-compatibility.
+using the same syntax and semantics as Perl, with just a few differences. After
+nearly two decades, the limitations of the original API were making development
+increasingly difficult. The new API is more extensible, and it was simplified
+by abolishing the separate "study" optimizing function; in PCRE2, patterns are
+automatically optimized where possible. Since forking from PCRE1, the code has
+been extensively refactored and new features introduced.
+</P>
+<P>
+As well as Perl-style regular expression patterns, some features that appeared
+in Python and the original PCRE before they appeared in Perl are available
+using the Python syntax. There is also some support for one or two .NET and
+Oniguruma syntax items, and there are options for requesting some minor changes
+that give better ECMAScript (aka JavaScript) compatibility.
</P>
<P>
The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
@@ -157,6 +164,7 @@ listing), and the short pages for individual functions, are concatenated in
pcre2build building PCRE2
pcre2callout details of the callout feature
pcre2compat discussion of Perl compatibility
+ pcre2convert details of pattern conversion functions
pcre2demo a demonstration C program that uses PCRE2
pcre2grep description of the <b>pcre2grep</b> command (8-bit only)
pcre2jit discussion of just-in-time optimization support
@@ -167,6 +175,7 @@ listing), and the short pages for individual functions, are concatenated in
pcre2perform discussion of performance issues
pcre2posix the POSIX-compatible C API for the 8-bit library
pcre2sample discussion of the pcre2demo program
+ pcre2serialize details of pattern serialization
pcre2syntax quick syntax reference
pcre2test description of the <b>pcre2test</b> command
pcre2unicode discussion of Unicode and UTF support
@@ -189,9 +198,9 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
</P>
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 01 April 2017
+Last updated: 11 July 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2_code_free.html b/doc/html/pcre2_code_free.html
index 5fce3c5..ff302fc 100644
--- a/doc/html/pcre2_code_free.html
+++ b/doc/html/pcre2_code_free.html
@@ -25,7 +25,8 @@ SYNOPSIS
DESCRIPTION
</b><br>
<P>
-This function frees the memory used for a compiled pattern, including any
+If <i>code</i> is NULL, this function does nothing. Otherwise, <i>code</i> must
+point to a compiled pattern. This function frees its memory, including any
memory used by the JIT compiler. If the compiled pattern was created by a call
to <b>pcre2_code_copy_with_tables()</b>, the memory for the character tables is
also freed.
diff --git a/doc/html/pcre2_compile.html b/doc/html/pcre2_compile.html
index 0a9eafa..d109eeb 100644
--- a/doc/html/pcre2_compile.html
+++ b/doc/html/pcre2_compile.html
@@ -65,7 +65,7 @@ The option bits are:
PCRE2_EXTENDED Ignore white space and # comments
PCRE2_FIRSTLINE Force matching to be before newline
PCRE2_LITERAL Pattern characters are all literal
- PCRE2_MATCH_UNSET_BACKREF Match unset back references
+ PCRE2_MATCH_UNSET_BACKREF Match unset backreferences
PCRE2_MULTILINE ^ and $ match newlines within data
PCRE2_NEVER_BACKSLASH_C Lock out the use of \C in patterns
PCRE2_NEVER_UCP Lock out PCRE2_UCP, e.g. via (*UCP)
diff --git a/doc/html/pcre2_compile_context_free.html b/doc/html/pcre2_compile_context_free.html
index ea67a18..b4159b1 100644
--- a/doc/html/pcre2_compile_context_free.html
+++ b/doc/html/pcre2_compile_context_free.html
@@ -27,7 +27,8 @@ DESCRIPTION
<P>
This function frees the memory occupied by a compile context, using the memory
freeing function from the general context with which it was created, or
-<b>free()</b> if that was not set.
+<b>free()</b> if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
diff --git a/doc/html/pcre2_convert_context_free.html b/doc/html/pcre2_convert_context_free.html
index ab6db6c..e9b142b 100644
--- a/doc/html/pcre2_convert_context_free.html
+++ b/doc/html/pcre2_convert_context_free.html
@@ -28,7 +28,8 @@ DESCRIPTION
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a convert context, using the memory
freeing function from the general context with which it was created, or
-<b>free()</b> if that was not set.
+<b>free()</b> if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
</P>
<P>
The pattern conversion functions are described in the
diff --git a/doc/html/pcre2_converted_pattern_free.html b/doc/html/pcre2_converted_pattern_free.html
index 11adefd..01d28d7 100644
--- a/doc/html/pcre2_converted_pattern_free.html
+++ b/doc/html/pcre2_converted_pattern_free.html
@@ -28,7 +28,8 @@ DESCRIPTION
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a converted pattern that was obtained by
calling <b>pcre2_pattern_convert()</b> with arguments that caused it to place
-the converted pattern into newly obtained heap memory.
+the converted pattern into newly obtained heap memory. If the argument is NULL,
+the function returns immediately without doing anything.
</P>
<P>
The pattern conversion functions are described in the
diff --git a/doc/html/pcre2_dfa_match.html b/doc/html/pcre2_dfa_match.html
index 36d7976..8702cca 100644
--- a/doc/html/pcre2_dfa_match.html
+++ b/doc/html/pcre2_dfa_match.html
@@ -46,9 +46,9 @@ just once (except when processing lookaround assertions). This function is
<i>wscount</i> Number of elements in the vector
</pre>
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
-up a callout function or specify the match and/or the recursion depth limits.
-The <i>length</i> and <i>startoffset</i> values are code units, not characters.
-The options are:
+up a callout function or specify the heap limit or the match or the recursion
+depth limits. The <i>length</i> and <i>startoffset</i> values are code units, not
+characters. The options are:
<pre>
PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
diff --git a/doc/html/pcre2_general_context_free.html b/doc/html/pcre2_general_context_free.html
index ec818ef..9f335f5 100644
--- a/doc/html/pcre2_general_context_free.html
+++ b/doc/html/pcre2_general_context_free.html
@@ -26,7 +26,8 @@ DESCRIPTION
</b><br>
<P>
This function frees the memory occupied by a general context, using the memory
-freeing function within the context, if set.
+freeing function within the context, if set. If the argument is NULL, the
+function returns immediately without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
diff --git a/doc/html/pcre2_jit_stack_assign.html b/doc/html/pcre2_jit_stack_assign.html
index e7e3a6a..4b3abb9 100644
--- a/doc/html/pcre2_jit_stack_assign.html
+++ b/doc/html/pcre2_jit_stack_assign.html
@@ -38,7 +38,11 @@ passed to a matching function. The arguments of this function are:
</PRE>
</P>
<P>
-If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32K
+If <i>mcontext</i> is NULL, the function returns immediately, without doing
+anything.
+</P>
+<P>
+If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32KiB
block on the machine stack is used.
</P>
<P>
@@ -49,8 +53,9 @@ If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
<P>
If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
argument at the start of matching, in order to set up a JIT stack. If the
-result is NULL, the internal 32K stack is used; otherwise the return value must
-be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
+result is NULL, the internal 32KiB stack is used; otherwise the return value
+must be a valid JIT stack, the result of calling
+<b>pcre2_jit_stack_create()</b>.
</P>
<P>
You may safely use the same JIT stack for multiple patterns, as long as they
diff --git a/doc/html/pcre2_jit_stack_create.html b/doc/html/pcre2_jit_stack_create.html
index 7c89c31..6200d17 100644
--- a/doc/html/pcre2_jit_stack_create.html
+++ b/doc/html/pcre2_jit_stack_create.html
@@ -33,8 +33,8 @@ context, for memory allocation functions, or NULL for standard memory
allocation. The result can be passed to the JIT run-time code by calling
<b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
which can then be processed by <b>pcre2_match()</b> or <b>pcre2_jit_match()</b>.
-A maximum stack size of 512K to 1M should be more than enough for any pattern.
-For more details, see the
+A maximum stack size of 512KiB to 1MiB should be more than enough for any
+pattern. For more details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
diff --git a/doc/html/pcre2_jit_stack_free.html b/doc/html/pcre2_jit_stack_free.html
index 21ac276..1d078d7 100644
--- a/doc/html/pcre2_jit_stack_free.html
+++ b/doc/html/pcre2_jit_stack_free.html
@@ -26,8 +26,9 @@ DESCRIPTION
</b><br>
<P>
This function is used to free a JIT stack that was created by
-<b>pcre2_jit_stack_create()</b> when it is no longer needed. For more details,
-see the
+<b>pcre2_jit_stack_create()</b> when it is no longer needed. If the argument is
+NULL, the function returns immediately without doing anything. For more
+details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
page.
</P>
diff --git a/doc/html/pcre2_match_context_free.html b/doc/html/pcre2_match_context_free.html
index 8248974..7f00ea9 100644
--- a/doc/html/pcre2_match_context_free.html
+++ b/doc/html/pcre2_match_context_free.html
@@ -27,7 +27,8 @@ DESCRIPTION
<P>
This function frees the memory occupied by a match context, using the memory
freeing function from the general context with which it was created, or
-<b>free()</b> if that was not set.
+<b>free()</b> if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
diff --git a/doc/html/pcre2_match_data_free.html b/doc/html/pcre2_match_data_free.html
index 840067f..68a4461 100644
--- a/doc/html/pcre2_match_data_free.html
+++ b/doc/html/pcre2_match_data_free.html
@@ -25,9 +25,10 @@ SYNOPSIS
DESCRIPTION
</b><br>
<P>
-This function frees the memory occupied by a match data block, using the memory
-freeing function from the general context or compiled pattern with which it was
-created, or <b>free()</b> if that was not set.
+If <i>match_data</i> is NULL, this function does nothing. Otherwise,
+<i>match_data</i> must point to a match data block, which this function frees,
+using the memory freeing function from the general context or compiled pattern
+with which it was created, or <b>free()</b> if that was not set.
</P>
<P>
There is a complete description of the PCRE2 native API in the
diff --git a/doc/html/pcre2_pattern_info.html b/doc/html/pcre2_pattern_info.html
index 1ebf90b..2e35709 100644
--- a/doc/html/pcre2_pattern_info.html
+++ b/doc/html/pcre2_pattern_info.html
@@ -36,7 +36,7 @@ request are as follows:
<pre>
PCRE2_INFO_ALLOPTIONS Final options after compiling
PCRE2_INFO_ARGOPTIONS Options passed to <b>pcre2_compile()</b>
- PCRE2_INFO_BACKREFMAX Number of highest back reference
+ PCRE2_INFO_BACKREFMAX Number of highest backreference
PCRE2_INFO_BSR What \R matches:
PCRE2_BSR_UNICODE: Unicode line endings
PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
diff --git a/doc/html/pcre2_serialize_decode.html b/doc/html/pcre2_serialize_decode.html
index 688398f..cff6e6c 100644
--- a/doc/html/pcre2_serialize_decode.html
+++ b/doc/html/pcre2_serialize_decode.html
@@ -28,7 +28,10 @@ DESCRIPTION
</b><br>
<P>
This function decodes a serialized set of compiled patterns back into a list of
-individual patterns. Its arguments are:
+individual patterns. This is possible only on a host that is running the same
+version of PCRE2, with the same code unit width, and the host must also have
+the same endianness, pointer width and PCRE2_SIZE type. The arguments for
+<b>pcre2_serialize_decode()</b> are:
<pre>
<i>codes</i> pointer to a vector in which to build the list
<i>number_of_codes</i> number of slots in the vector
@@ -54,8 +57,8 @@ on a system with different endianness.
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2_serialize_encode.html b/doc/html/pcre2_serialize_encode.html
index 08cc46d..f153270 100644
--- a/doc/html/pcre2_serialize_encode.html
+++ b/doc/html/pcre2_serialize_encode.html
@@ -28,7 +28,12 @@ DESCRIPTION
</b><br>
<P>
This function encodes a list of compiled patterns into a byte stream that can
-be saved on disc or elsewhere. Its arguments are:
+be saved on disc or elsewhere. Note that this is not an abstract format like
+Java or .NET. Conversion of the byte stream back into usable compiled patterns
+can only happen on a host that is running the same version of PCRE2, with the
+same code unit width, and the host must also have the same endianness, pointer
+width and PCRE2_SIZE type. The arguments for <b>pcre2_serialize_encode()</b>
+are:
<pre>
<i>codes</i> pointer to a vector containing the list
<i>number_of_codes</i> number of slots in the vector
@@ -53,8 +58,8 @@ that a slot in the vector does not point to a compiled pattern.
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2_serialize_free.html b/doc/html/pcre2_serialize_free.html
index 10695dc..26b435b 100644
--- a/doc/html/pcre2_serialize_free.html
+++ b/doc/html/pcre2_serialize_free.html
@@ -27,13 +27,14 @@ DESCRIPTION
<P>
This function frees the memory that was obtained by
<b>pcre2_serialize_encode()</b> to hold a serialized byte stream. The argument
-must point to such a byte stream.
+must point to such a byte stream or be NULL, in which case the function returns
+without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2_serialize_get_number_of_codes.html b/doc/html/pcre2_serialize_get_number_of_codes.html
index 3bab45a..fdd2429 100644
--- a/doc/html/pcre2_serialize_get_number_of_codes.html
+++ b/doc/html/pcre2_serialize_get_number_of_codes.html
@@ -41,8 +41,8 @@ on a system with different endianness.
<P>
There is a complete description of the PCRE2 native API in the
<a href="pcre2api.html"><b>pcre2api</b></a>
-page and a description of the POSIX API in the
-<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page and a description of the serialization functions in the
+<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
page.
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2_set_glob_separator.html b/doc/html/pcre2_set_glob_separator.html
index 538748d..283648e 100644
--- a/doc/html/pcre2_set_glob_separator.html
+++ b/doc/html/pcre2_set_glob_separator.html
@@ -28,7 +28,7 @@ DESCRIPTION
<P>
This function is part of an experimental set of pattern conversion functions.
It sets the component separator character that is used when converting globs.
-The second argument must one of the characters forward slash, backslash, or
+The second argument must be one of the characters forward slash, backslash, or
dot. The default is backslash when running under Windows, otherwise forward
slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
the second argument is invalid.
diff --git a/doc/html/pcre2_substring_free.html b/doc/html/pcre2_substring_free.html
index 35a5b55..e0d0fbd 100644
--- a/doc/html/pcre2_substring_free.html
+++ b/doc/html/pcre2_substring_free.html
@@ -28,7 +28,7 @@ DESCRIPTION
This is a convenience function for freeing the memory obtained by a previous
call to <b>pcre2_substring_get_byname()</b> or
<b>pcre2_substring_get_bynumber()</b>. Its only argument is a pointer to the
-string.
+string. If the argument is NULL, the function does nothing.
</P>
<P>
There is a complete description of the PCRE2 native API in the
diff --git a/doc/html/pcre2_substring_list_free.html b/doc/html/pcre2_substring_list_free.html
index d61241d..0919d1e 100644
--- a/doc/html/pcre2_substring_list_free.html
+++ b/doc/html/pcre2_substring_list_free.html
@@ -27,7 +27,8 @@ DESCRIPTION
<P>
This is a convenience function for freeing the store obtained by a previous
call to <b>pcre2substring_list_get()</b>. Its only argument is a pointer to
-the list of string pointers.
+the list of string pointers. If the argument is NULL, the function returns
+immediately, without doing anything.
</P>
<P>
There is a complete description of the PCRE2 native API in the
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
index ba3b2ca..17f9794 100644
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@@ -518,7 +518,9 @@ been matched by <b>pcre2_match()</b>. They are:
<b>pcre2_substring_number_from_name()</b>
</pre>
<b>pcre2_substring_free()</b> and <b>pcre2_substring_list_free()</b> are also
-provided, to free memory used for extracted strings.
+provided, to free memory used for extracted strings. If either of these
+functions is called with a NULL argument, the function returns immediately
+without doing anything.
</P>
<P>
The function <b>pcre2_substitute()</b> can be called to match a pattern and
@@ -562,10 +564,10 @@ U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
<P>
Each of the first three conventions is used by at least one operating system as
its standard newline sequence. When PCRE2 is built, a default can be specified.
-The default default is LF, which is the Unix standard. However, the newline
-convention can be changed by an application when calling <b>pcre2_compile()</b>,
-or it can be specified by special text at the start of the pattern itself; this
-overrides any other settings. See the
+If it is not, the default is set to LF, which is the Unix standard. However,
+the newline convention can be changed by an application when calling
+<b>pcre2_compile()</b>, or it can be specified by special text at the start of
+the pattern itself; this overrides any other settings. See the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
page for details of the special character sequences.
</P>
@@ -727,6 +729,10 @@ The memory used for a general context should be freed by calling:
<br>
<br>
<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
+<br>
+<br>
+If this function is passed a NULL argument, it returns immediately without
+doing anything.
<a name="compilecontext"></a></P>
<br><b>
The compile context
@@ -831,10 +837,10 @@ page for details.
</P>
<P>
When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
-option, the newline convention affects the recognition of white space and the
-end of internal comments starting with #. The value is saved with the compiled
-pattern for subsequent use by the JIT compiler and by the two interpreted
-matching functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
+option, the newline convention affects the recognition of the end of internal
+comments starting with #. The value is saved with the compiled pattern for
+subsequent use by the JIT compiler and by the two interpreted matching
+functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
<br>
<br>
<b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
@@ -949,16 +955,18 @@ offset limit. In other words, whichever limit comes first is used.
<b> uint32_t <i>value</i>);</b>
<br>
<br>
-The <i>heap_limit</i> parameter specifies, in units of kilobytes, the maximum
-amount of heap memory that <b>pcre2_match()</b> may use to hold backtracking
-information when running an interpretive match. This limit does not apply to
-matching with the JIT optimization, which has its own memory control
-arrangements (see the
+The <i>heap_limit</i> parameter specifies, in units of kibibytes (1024 bytes),
+the maximum amount of heap memory that <b>pcre2_match()</b> may use to hold
+backtracking information when running an interpretive match. This limit also
+applies to <b>pcre2_dfa_match()</b>, which may use the heap when processing
+patterns with a lot of nested pattern recursion or lookarounds or atomic
+groups. This limit does not apply to matching with the JIT optimization, which
+has its own memory control arrangements (see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
-documentation for more details), nor does it apply to <b>pcre2_dfa_match()</b>.
-If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
-returned. The default limit is set when PCRE2 is built; the default default is
-very large and is essentially "unlimited".
+documentation for more details). If the limit is reached, the negative error
+code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
+is built; if it is not, the default is set very large and is essentially
+"unlimited".
</P>
<P>
A value for the heap limit may also be supplied by an item at the start of a
@@ -971,13 +979,19 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
limit is set, less than the default.
</P>
<P>
-The <b>pcre2_match()</b> function starts out using a 20K vector on the system
+The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
stack for recording backtracking points. The more nested backtracking points
there are (that is, the deeper the search tree), the more memory is needed.
Heap memory is used only if the initial vector is too small. If the heap limit
is set to a value less than 21 (in particular, zero) no heap memory will be
used. In this case, only patterns that do not have a lot of nested backtracking
can be successfully processed.
+</P>
+<P>
+Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
+when processing pattern recursions, lookarounds, or atomic groups, and only if
+this is not big enough is heap memory used. In this case, too, setting a value
+of zero disables the use of the heap.
<br>
<br>
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@@ -1035,18 +1049,29 @@ backtracking.
<P>
The depth limit is not relevant, and is ignored, when matching is done using
JIT compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which
-uses it to limit the depth of internal recursive function calls that implement
-atomic groups, lookaround assertions, and pattern recursions. This is,
-therefore, an indirect limit on the amount of system stack that is used. A
-recursive pattern such as /(.)(?1)/, when matched to a very long string using
-<b>pcre2_dfa_match()</b>, can use a great deal of stack.
-</P>
-<P>
-The default value for the depth limit can be set when PCRE2 is built; the
-default default is the same value as the default for the match limit. If the
-limit is exceeded, <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> returns
-PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be supplied by an
-item at the start of a pattern of the form
+uses it to limit the depth of nested internal recursive function calls that
+implement atomic groups, lookaround assertions, and pattern recursions. This
+limits, indirectly, the amount of system stack that is used. It was more useful
+in versions before 10.32, when stack memory was used for local workspace
+vectors for recursive function calls. From version 10.32, only local variables
+are allocated on the stack and as each call uses only a few hundred bytes, even
+a small stack can support quite a lot of recursion.
+</P>
+<P>
+If the depth of internal recursive function calls is great enough, local
+workspace vectors are allocated on the heap from version 10.32 onwards, so the
+depth limit also indirectly limits the amount of heap memory that is used. A
+recursive pattern such as /(.(?2))((?1)|)/, when matched to a very long string
+using <b>pcre2_dfa_match()</b>, can use a great deal of memory. However, it is
+probably better to limit heap usage directly by calling
+<b>pcre2_set_heap_limit()</b>.
+</P>
+<P>
+The default value for the depth limit can be set when PCRE2 is built; if it is
+not, the default is set to the same value as the default for the match limit.
+If the limit is exceeded, <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
+returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be
+supplied by an item at the start of a pattern of the form
<pre>
(*LIMIT_DEPTH=ddd)
</pre>
@@ -1096,15 +1121,16 @@ and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively.
PCRE2_CONFIG_DEPTHLIMIT
</pre>
The output is a uint32_t integer that gives the default limit for the depth of
-nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions
-and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with
-<b>pcre2_set_depth_limit()</b> above.
+nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions,
+lookarounds, and atomic groups in <b>pcre2_dfa_match()</b>. Further details are
+given with <b>pcre2_set_depth_limit()</b> above.
<pre>
PCRE2_CONFIG_HEAPLIMIT
</pre>
-The output is a uint32_t integer that gives, in kilobytes, the default limit
-for the amount of heap memory used by <b>pcre2_match()</b>. Further details are
-given with <b>pcre2_set_heap_limit()</b> above.
+The output is a uint32_t integer that gives, in kibibytes, the default limit
+for the amount of heap memory used by <b>pcre2_match()</b> or
+<b>pcre2_dfa_match()</b>. Further details are given with
+<b>pcre2_set_heap_limit()</b> above.
<pre>
PCRE2_CONFIG_JIT
</pre>
@@ -1135,7 +1161,7 @@ relevant.
<P>
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
but the most massive patterns, since it allows the size of the compiled pattern
-to be up to 64K code units. Larger values allow larger regular expressions to
+to be up to 65535 code units. Larger values allow larger regular expressions to
be compiled by those two libraries, but at the expense of slower matching.
<pre>
PCRE2_CONFIG_MATCHLIMIT
@@ -1229,6 +1255,8 @@ If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
pattern is obtained by calling <b>malloc()</b>. Otherwise, it is obtained from
the same memory function that was used for the compile context. The caller must
free the memory by calling <b>pcre2_code_free()</b> when it is no longer needed.
+If <b>pcre2_code_free()</b> is called with a NULL argument, it returns
+immediately, without doing anything.
</P>
<P>
The function <b>pcre2_code_copy()</b> makes a copy of the compiled code in new
@@ -1237,7 +1265,8 @@ if the code has been processed by the JIT compiler (see
<a href="#jitcompiling">below),</a>
the JIT information cannot be copied (because it is position-dependent).
The new copy can initially be used only for non-JIT matching, though it can be
-passed to <b>pcre2_jit_compile()</b> if required.
+passed to <b>pcre2_jit_compile()</b> if required. If <b>pcre2_code_copy()</b> is
+called with a NULL argument, it returns NULL.
</P>
<P>
The <b>pcre2_code_copy()</b> function provides a way for individual threads in a
@@ -1252,7 +1281,9 @@ there are occasions when a copy of a compiled pattern and the relevant tables
are needed. The <b>pcre2_code_copy_with_tables()</b> provides this facility.
Copies of both the code and the tables are made, with the new code pointing to
the new tables. The memory for the new tables is automatically freed when
-<b>pcre2_code_free()</b> is called for the new copy of the compiled code.
+<b>pcre2_code_free()</b> is called for the new copy of the compiled code. If
+<b>pcre2_code_copy_withy_tables()</b> is called with a NULL argument, it returns
+NULL.
</P>
<P>
NOTE: When one of the matching functions is called, pointers to the compiled
@@ -1393,9 +1424,9 @@ include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
option is set, normal backslash processing is applied to verb names and only an
unescaped closing parenthesis terminates the name. A closing parenthesis can be
included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED
-or PCRE2_EXTENDED_MORE option is set, unescaped whitespace in verb names is
-skipped and #-comments are recognized in this mode, exactly as in the rest of
-the pattern.
+or PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped
+whitespace in verb names is skipped and #-comments are recognized, exactly as
+in the rest of the pattern.
<pre>
PCRE2_AUTO_CALLOUT
</pre>
@@ -1412,7 +1443,7 @@ If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be
changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode
properties are used for all characters with more than one other case, and for
-all characters whose code points are greater than U+007f. For lower valued
+all characters whose code points are greater than U+007F. For lower valued
characters with only one other case, a lookup table is used for speed. When
PCRE2_UTF is not set, a lookup table is used for all code points less than 256,
and higher code points (available only in 16-bit or 32-bit mode) are treated as
@@ -1435,7 +1466,8 @@ character, even if newlines are coded as CRLF. Without this option, a dot does
not match when the current position in the subject is at a newline. This option
is equivalent to Perl's /s option, and it can be changed within a pattern by a
(?s) option setting. A negative class such as [^a] always matches newline
-characters, independent of the setting of this option.
+characters, and the \N escape sequence always matches a non-newline character,
+independent of the setting of PCRE2_DOTALL.
<pre>
PCRE2_DUPNAMES
</pre>
@@ -1478,15 +1510,36 @@ is not allowed within sequences such as (?&#62; that introduce various
parenthesized subpatterns, nor within numerical quantifiers such as {1,3}.
Ignorable white space is permitted between an item and a following quantifier
and between a quantifier and a following + that indicates possessiveness.
+PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be changed within
+a pattern by a (?x) option setting.
</P>
<P>
-PCRE2_EXTENDED also causes characters between an unescaped # outside a
-character class and the next newline, inclusive, to be ignored, which makes it
-possible to include comments inside complicated patterns. Note that the end of
-this type of comment is a literal newline sequence in the pattern; escape
-sequences that happen to represent a newline do not count. PCRE2_EXTENDED is
-equivalent to Perl's /x option, and it can be changed within a pattern by a
-(?x) option setting.
+When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
+white space only those characters with code points less than 256 that are
+flagged as white space in its low-character table. The table is normally
+created by
+<a href="pcre2_maketables.html"><b>pcre2_maketables()</b>,</a>
+which uses the <b>isspace()</b> function to identify space characters. In most
+ASCII environments, the relevant characters are those with code points 0x0009
+(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D
+(carriage return), and 0x0020 (space).
+</P>
+<P>
+When PCRE2 is compiled with Unicode support, in addition to these characters,
+five more Unicode "Pattern White Space" characters are recognized by
+PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-right mark),
+U+200F (right-to-left mark), U+2028 (line separator), and U+2029 (paragraph
+separator). This set of characters is the same as recognized by Perl's /x
+option. Note that the horizontal and vertical space characters that are matched
+by the \h and \v escapes in patterns are a much bigger set.
+</P>
+<P>
+As well as ignoring most white space, PCRE2_EXTENDED also causes characters
+between an unescaped # outside a character class and the next newline,
+inclusive, to be ignored, which makes it possible to include comments inside
+complicated patterns. Note that the end of this type of comment is a literal
+newline sequence in the pattern; escape sequences that happen to represent a
+newline do not count.
</P>
<P>
Which characters are interpreted as newlines can be specified by a setting in
@@ -1499,9 +1552,11 @@ built.
PCRE2_EXTENDED_MORE
</pre>
This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space
-and horizontal tab characters are ignored inside a character class.
-PCRE2_EXTENDED_MORE is equivalent to Perl's 5.26 /xx option, and it can be
-changed within a pattern by a (?xx) option setting.
+and horizontal tab characters are ignored inside a character class. Note: only
+these two characters are ignored, not the full set of pattern white space
+characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is
+equivalent to Perl's /xx option, and it can be changed within a pattern by a
+(?xx) option setting.
<pre>
PCRE2_FIRSTLINE
</pre>
@@ -1532,7 +1587,7 @@ error.
<pre>
PCRE2_MATCH_UNSET_BACKREF
</pre>
-If this option is set, a back reference to an unset subpattern group matches an
+If this option is set, a backreference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail).
A pattern such as (\1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl
@@ -1594,8 +1649,8 @@ If this option is set, it disables the use of numbered capturing parentheses in
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
were followed by ?: but named parentheses can still be used for capturing (and
they acquire numbers in the usual way). This is the same as Perl's /n option.
-Note that, when this option is set, references to capturing groups (back
-references or recursion/subroutine calls) may only refer to named groups,
+Note that, when this option is set, references to capturing groups
+(backreferences or recursion/subroutine calls) may only refer to named groups,
though the reference can be by name or by number.
<pre>
PCRE2_NO_AUTO_POSSESS
@@ -1614,7 +1669,7 @@ If this option is set, it disables an optimization that is applied when .* is
the first significant item in a top-level branch of a pattern, and all the
other branches also start with .* or with \A or \G or ^. The optimization is
automatically disabled for .* if it is inside an atomic group or a capturing
-group that is the subject of a back reference, or if the pattern contains
+group that is the subject of a backreference, or if the pattern contains
(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is
automatically anchored if PCRE2_DOTALL is set for all the .* items and
PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match
@@ -1749,7 +1804,8 @@ Unicode support (which is the default). If Unicode support is not available,
the use of this option provokes an error. Details of how PCRE2_UTF changes the
behaviour of PCRE2 are given in the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
-page.
+page. In particular, note that it changes the way PCRE2_CASELESS handles
+characters with code points greater than 127.
<a name="extracompileoptions"></a></P>
<br><b>
Extra compile options
@@ -1980,7 +2036,7 @@ When .* is the first significant item, anchoring is possible only when all the
following are true:
<pre>
.* is not in an atomic group
- .* is not in a capturing group that is the subject of a back reference
+ .* is not in a capturing group that is the subject of a backreference
PCRE2_DOTALL is in force for .*
Neither (*PRUNE) nor (*SKIP) appears in the pattern
PCRE2_NO_DOTSTAR_ANCHOR is not set
@@ -1990,20 +2046,20 @@ options returned for PCRE2_INFO_ALLOPTIONS.
<pre>
PCRE2_INFO_BACKREFMAX
</pre>
-Return the number of the highest back reference in the pattern. The third
+Return the number of the highest backreference in the pattern. The third
argument should point to an <b>uint32_t</b> variable. Named subpatterns acquire
-numbers as well as names, and these count towards the highest back reference.
-Back references such as \4 or \g{12} match the captured characters of the
+numbers as well as names, and these count towards the highest backreference.
+Backreferences such as \4 or \g{12} match the captured characters of the
given group, but in addition, the check that a capturing group is set in a
-conditional subpattern such as (?(3)a|b) is also a back reference. Zero is
-returned if there are no back references.
+conditional subpattern such as (?(3)a|b) is also a backreference. Zero is
+returned if there are no backreferences.
<pre>
PCRE2_INFO_BSR
</pre>
-The output is a uint32_t whose value indicates what character sequences the \R
-escape sequence matches. A value of PCRE2_BSR_UNICODE means that \R matches
-any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means that \R
-matches only CR, LF, or CRLF.
+The output is a uint32_t integer whose value indicates what character sequences
+the \R escape sequence matches. A value of PCRE2_BSR_UNICODE means that \R
+matches any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means
+that \R matches only CR, LF, or CRLF.
<pre>
PCRE2_INFO_CAPTURECOUNT
</pre>
@@ -2015,10 +2071,10 @@ The third argument should point to an <b>uint32_t</b> variable.
</pre>
If the pattern set a backtracking depth limit by including an item of the form
(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument
-should point to an unsigned 32-bit integer. If no such value has been set, the
-call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
-that this limit will only be used during matching if it is less than the limit
-set or defaulted by the caller of the match function.
+should point to a uint32_t integer. If no such value has been set, the call to
+<b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note that this
+limit will only be used during matching if it is less than the limit set or
+defaulted by the caller of the match function.
<pre>
PCRE2_INFO_FIRSTBITMAP
</pre>
@@ -2028,7 +2084,7 @@ values for the first code unit in any match. For example, a pattern that starts
with [abc] results in a table with three bits set. When code unit values
greater than 255 are supported, the flag bit for 255 means "any code unit of
value 255 or above". If such a table was constructed, a pointer to it is
-returned. Otherwise NULL is returned. The third argument should point to an
+returned. Otherwise NULL is returned. The third argument should point to a
<b>const uint8_t *</b> variable.
<pre>
PCRE2_INFO_FIRSTCODETYPE
@@ -2055,7 +2111,7 @@ and up to 0xffffffff when not using UTF-32 mode.
</pre>
Return the size (in bytes) of the data frames that are used to remember
backtracking positions when the pattern is processed by <b>pcre2_match()</b>
-without the use of JIT. The third argument should point to an <b>size_t</b>
+without the use of JIT. The third argument should point to a <b>size_t</b>
variable. The frame size depends on the number of capturing parentheses in the
pattern. Each additional capturing group adds two PCRE2_SIZE variables.
<pre>
@@ -2075,10 +2131,10 @@ the equivalent hexadecimal or octal escape sequences.
</pre>
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
-should point to an unsigned 32-bit integer. If no such value has been set, the
-call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
-that this limit will only be used during matching if it is less than the limit
-set or defaulted by the caller of the match function.
+should point to a uint32_t integer. If no such value has been set, the call to
+<b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note that this
+limit will only be used during matching if it is less than the limit set or
+defaulted by the caller of the match function.
<pre>
PCRE2_INFO_JCHANGED
</pre>
@@ -2122,15 +2178,15 @@ in such cases.
</pre>
If the pattern set a match limit by including an item of the form
(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument
-should point to an unsigned 32-bit integer. If no such value has been set, the
-call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
-that this limit will only be used during matching if it is less than the limit
-set or defaulted by the caller of the match function.
+should point to a uint32_t integer. If no such value has been set, the call to
+<b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note that this
+limit will only be used during matching if it is less than the limit set or
+defaulted by the caller of the match function.
<pre>
PCRE2_INFO_MAXLOOKBEHIND
</pre>
Return the number of characters (not code units) in the longest lookbehind
-assertion in the pattern. The third argument should point to an unsigned 32-bit
+assertion in the pattern. The third argument should point to a uint32_t
integer. This information is useful when doing multi-segment matching using the
partial matching facilities. Note that the simple assertions \b and \B
require a one-character lookbehind. \A also registers a one-character
@@ -2263,11 +2319,16 @@ documentation, which also gives further details about callouts.
<br><a name="SEC25" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
<P>
It is possible to save compiled patterns on disc or elsewhere, and reload them
-later, subject to a number of restrictions. The functions whose names begin
-with <b>pcre2_serialize_</b> are used for this purpose. They are described in
-the
+later, subject to a number of restrictions. The host on which the patterns are
+reloaded must be running the same version of PCRE2, with the same code unit
+width, and must also have the same endianness, pointer width, and PCRE2_SIZE
+type. Before compiled patterns can be saved, they must be converted to a
+"serialized" form, which in the case of PCRE2 is really just a bytecode dump.
+The functions whose names begin with <b>pcre2_serialize_</b> are used for
+converting to and from the serialized form. They are described in the
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
-documentation.
+documentation. Note that PCRE2 serialization does not convert compiled patterns
+to an abstract format like Java or .NET serialization.
<a name="matchdatablock"></a></P>
<br><a name="SEC26" href="#TOC1">THE MATCH DATA BLOCK</a><br>
<P>
@@ -2338,7 +2399,8 @@ match data block (for that match) have taken place.
</P>
<P>
When a match data block itself is no longer needed, it should be freed by
-calling <b>pcre2_match_data_free()</b>.
+calling <b>pcre2_match_data_free()</b>. If this function is called with a NULL
+argument, it returns immediately, without doing anything.
</P>
<br><a name="SEC27" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
<P>
@@ -2398,7 +2460,7 @@ zero, the search for a match starts at the beginning of the subject, and this
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
must point to the start of a character, or to the end of the subject (in UTF-32
mode, one code unit equals one character, so all offsets are valid). Like the
-pattern string, the subject may contain binary zeroes.
+pattern string, the subject may contain binary zeros.
</P>
<P>
A non-zero starting offset is useful when searching for another match in the
@@ -2549,7 +2611,7 @@ calls to <b>pcre2_match()</b> if you are making repeated calls to find other
matches in the same subject string.
</P>
<P>
-WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
+<b>Warning:</b> When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
string as a subject, or an invalid value of <i>startoffset</i>, is undefined.
Your program may crash or loop indefinitely.
<pre>
@@ -2715,7 +2777,7 @@ Elements in the ovector that do not correspond to capturing parentheses in the
pattern are never changed. That is, if a pattern contains <i>n</i> capturing
parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
<b>pcre2_match()</b>. The other elements retain whatever values they previously
-had.
+had. After a failed match attempt, the contents of the ovector are unchanged.
<a name="matchotherdata"></a></P>
<br><a name="SEC30" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
<P>
@@ -2756,6 +2818,15 @@ branch of the group, but it is not on the matching path. On the other hand,
when this pattern fails to match "bx", the returned name is B.
</P>
<P>
+<b>Warning:</b> By default, certain start-of-match optimizations are used to
+give a fast "no match" result in some situations. For example, if the anchoring
+is removed from the pattern above, there is an initial check for the presence
+of "c" in the subject before running the matching engine. This check fails for
+"bx", causing a match failure without seeing any marks. You can disable the
+start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for
+<b>pcre2_compile()</b> or starting the pattern with (*NO_START_OPT).
+</P>
+<P>
After a successful match, a partial match, or one of the invalid UTF errors
(for example, PCRE2_ERROR_UTF8_ERR5), <b>pcre2_get_startchar()</b> can be
called. After a successful or partial match it returns the code unit offset of
@@ -3108,7 +3179,10 @@ string in <i>outputbuffer</i>, replacing the part that was matched with the
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
which a \K item in a lookahead in the pattern causes the match to end before
-it starts are not supported, and give rise to an error return.
+it starts are not supported, and give rise to an error return. For global
+replacements, matches in which \K in a lookbehind causes the match to start
+earlier than the point that was reached in the previous iteration are also not
+supported.
</P>
<P>
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
@@ -3119,6 +3193,12 @@ functions from the match context, if provided, or else those that were used to
allocate memory for the compiled code.
</P>
<P>
+If an external <i>match_data</i> block is provided, its contents afterwards
+are those set by the final call to <b>pcre2_match()</b>, which will have
+ended in a matching error. The contents of the ovector within the match data
+block may or may not have been changed.
+</P>
+<P>
The <i>outlengthptr</i> argument must point to a variable that contains the
length, in code units, of the output buffer. If the function is successful, the
value is updated to contain the length of the new string, excluding the
@@ -3310,7 +3390,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
-it started, which can happen if \K is used in an assertion).
+it started or the match started earlier than the current position in the
+subject, which can happen if \K is used in an assertion).
</P>
<P>
As for all PCRE2 errors, a text message that describes the error can be
@@ -3510,17 +3591,7 @@ capture.
Calls to the convenience functions that extract substrings by name
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a
DFA match. The convenience functions that extract substrings by number never
-return PCRE2_ERROR_NOSUBSTRING, and the meanings of some other errors are
-slightly different:
-<pre>
- PCRE2_ERROR_UNAVAILABLE
-</pre>
-The ovector is not big enough to include a slot for the given substring number.
-<pre>
- PCRE2_ERROR_UNSET
-</pre>
-There is a slot in the ovector for this substring, but there were insufficient
-matches to fill it.
+return PCRE2_ERROR_NOSUBSTRING.
</P>
<P>
The matched strings are stored in the ovector in reverse order of length; that
@@ -3550,12 +3621,12 @@ There are in addition the following errors that are specific to
</pre>
This return is given if <b>pcre2_dfa_match()</b> encounters an item in the
pattern that it does not support, for instance, the use of \C in a UTF mode or
-a back reference.
+a backreference.
<pre>
PCRE2_ERROR_DFA_UCOND
</pre>
This return is given if <b>pcre2_dfa_match()</b> encounters a condition item
-that uses a back reference for the condition, or a test for recursion in a
+that uses a backreference for the condition, or a test for recursion in a
specific group. These are not supported.
<pre>
PCRE2_ERROR_DFA_WSSIZE
@@ -3594,9 +3665,9 @@ Cambridge, England.
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 31 December 2017
+Last updated: 07 September 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2build.html b/doc/html/pcre2build.html
index 823e605..e31b604 100644
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@@ -82,7 +82,8 @@ The following sections include descriptions of "on/off" options whose names
begin with --enable or --disable. Because of the way that <b>configure</b>
works, --enable and --disable always come in pairs, so the complementary option
always exists as well, but as it specifies the default, it is not described.
-Options that specify values have names that start with --with.
+Options that specify values have names that start with --with. At the end of a
+<b>configure</b> run, a summary of the configuration is output.
</P>
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
<P>
@@ -170,8 +171,15 @@ Just-in-time (JIT) compiler support is included in the build by specifying
--enable-jit
</pre>
This support is available only for certain hardware architectures. If this
-option is set for an unsupported architecture, a building error occurs. If you
-are running under SELinux you may also want to add
+option is set for an unsupported architecture, a building error occurs.
+If in doubt, use
+<pre>
+ --enable-jit=auto
+</pre>
+which enables JIT only if the current hardware is supported. You can check
+if JIT is enabled in the configuration summary that is output at the end of a
+<b>configure</b> run. If you are enabling JIT under SELinux you may also want to
+add
<pre>
--enable-jit-sealloc
</pre>
@@ -219,7 +227,7 @@ separator, U+2028), and PS (paragraph separator, U+2029). The final option is
<pre>
--enable-newline-is-nul
</pre>
-which causes NUL (binary zero) is set as the default line-ending character.
+which causes NUL (binary zero) to be set as the default line-ending character.
</P>
<P>
Whatever default line ending convention is selected when PCRE2 is built can be
@@ -244,10 +252,10 @@ Within a compiled pattern, offset values are used to point from one part to
another (for example, from an opening parenthesis to an alternation
metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
are used for these offsets, leading to a maximum size for a compiled pattern of
-around 64K code units. This is sufficient to handle all but the most gigantic
-patterns. Nevertheless, some people do want to process truly enormous patterns,
-so it is possible to compile PCRE2 to use three-byte or four-byte offsets by
-adding a setting such as
+around 64 thousand code units. This is sufficient to handle all but the most
+gigantic patterns. Nevertheless, some people do want to process truly enormous
+patterns, so it is possible to compile PCRE2 to use three-byte or four-byte
+offsets by adding a setting such as
<pre>
--with-link-size=3
</pre>
@@ -274,22 +282,23 @@ to the <b>configure</b> command. This setting also applies to the
counting is done differently).
</P>
<P>
-The <b>pcre2_match()</b> function starts out using a 20K vector on the system
+The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
stack to record backtracking points. The more nested backtracking points there
are (that is, the deeper the search tree), the more memory is needed. If the
initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kilobytes. The limit can be changed at run time, as
-described in the
+which is specified in kibibytes (units of 1024 bytes). The limit can be changed
+at run time, as described in the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation. The default limit (in effect unlimited) is 20 million. You can
change this by a setting such as
<pre>
--with-heap-limit=500
</pre>
-which limits the amount of heap to 500 kilobytes. This limit applies only to
-interpretive matching in pcre2_match(). It does not apply when JIT (which has
-its own memory arrangements) is used, nor does it apply to
-<b>pcre2_dfa_match()</b>.
+which limits the amount of heap to 500 KiB. This limit applies only to
+interpretive matching in <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, which
+may also use the heap for internal workspace when processing complicated
+patterns. This limit does not apply when JIT (which has its own memory
+arrangements) is used.
</P>
<P>
You can also explicitly limit the depth of nested backtracking in the
@@ -390,13 +399,13 @@ they are not.
<P>
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when it
-finds a match. The starting size of the buffer is controlled by a parameter
-whose default value is 20K. The buffer itself is three times this size, but
-because of the way it is used for holding "before" lines, the longest line that
-is guaranteed to be processable is the parameter size. If a longer line is
-encountered, <b>pcre2grep</b> automatically expands the buffer, up to a
-specified maximum size, whose default is 1M or the starting size, whichever is
-the larger. You can change the default parameter values by adding, for example,
+finds a match. The default starting size of the buffer is 20KiB. The buffer
+itself is three times this size, but because of the way it is used for holding
+"before" lines, the longest line that is guaranteed to be processable is the
+notional buffer size. If a longer line is encountered, <b>pcre2grep</b>
+automatically expands the buffer, up to a specified maximum size, whose default
+is 1MiB or the starting size, whichever is the larger. You can change the
+default parameter values by adding, for example,
<pre>
--with-pcre2grep-bufsize=51200
--with-pcre2grep-max-bufsize=2097152
@@ -533,7 +542,7 @@ generated from the string.
Setting --enable-fuzz-support also causes a binary called <b>pcre2fuzzcheck</b>
to be created. This is normally run under valgrind or used when PCRE2 is
compiled with address sanitizing enabled. It calls the fuzzing function and
-outputs information about it is doing. The input strings are specified by
+outputs information about what it is doing. The input strings are specified by
arguments: if an argument starts with "=" the rest of it is a literal input
string. Otherwise, it is assumed to be a file name, and the contents of the
file are the test string.
@@ -565,9 +574,9 @@ Cambridge, England.
</P>
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 18 July 2017
+Last updated: 26 April 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2callout.html b/doc/html/pcre2callout.html
index 2adf21a..9b6ae6f 100644
--- a/doc/html/pcre2callout.html
+++ b/doc/html/pcre2callout.html
@@ -143,7 +143,7 @@ branch, automatic anchoring occurs if all branches are anchorable.
</P>
<P>
This optimization is disabled, however, if .* is in an atomic group or if there
-is a back reference to the capturing group in which it appears. It is also
+is a backreference to the capturing group in which it appears. It is also
disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of
callouts does not affect it.
</P>
@@ -310,10 +310,12 @@ PCRE2_UNSET.
</P>
<P>
For DFA matching, the <i>offset_vector</i> field points to the ovector that was
-passed to the matching function in the match data block, but it holds no useful
-information at callout time because <b>pcre2_dfa_match()</b> does not support
-substring capturing. The value of <i>capture_top</i> is always 1 and the value
-of <i>capture_last</i> is always 0 for DFA matching.
+passed to the matching function in the match data block for callouts at the top
+level, but to an internal ovector during the processing of pattern recursions,
+lookarounds, and atomic groups. However, these ovectors hold no useful
+information because <b>pcre2_dfa_match()</b> does not support substring
+capturing. The value of <i>capture_top</i> is always 1 and the value of
+<i>capture_last</i> is always 0 for DFA matching.
</P>
<P>
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
@@ -461,9 +463,9 @@ Cambridge, England.
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 22 December 2017
+Last updated: 26 April 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2compat.html b/doc/html/pcre2compat.html
index e6d2e7e..f593204 100644
--- a/doc/html/pcre2compat.html
+++ b/doc/html/pcre2compat.html
@@ -31,7 +31,7 @@ page.
2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
they do not mean what you might think. For example, (?!a){3} does not assert
that the next three characters are not "a". It just asserts that the next
-character is not "a" three times (in principle: PCRE2 optimizes this to run the
+character is not "a" three times (in principle; PCRE2 optimizes this to run the
assertion just once). Perl allows some repeat quantifiers on other assertions,
for example, \b* (but not \b{3}), but these do not seem to have any use.
</P>
@@ -42,13 +42,14 @@ assertion is a condition that has a matching branch (that is, the condition is
false).
</P>
<P>
-4. The following Perl escape sequences are not supported: \l, \u, \L,
-\U, and \N when followed by a character name or Unicode value. (\N on its
-own, matching a non-newline character, is supported.) In fact these are
+4. The following Perl escape sequences are not supported: \F, \l, \L, \u,
+\U, and \N when followed by a character name. \N on its own, matching a
+non-newline character, and \N{U+dd..}, matching a Unicode code point, are
+supported. The escapes that modify the case of following letters are
implemented by Perl's general string-handling and are not part of its pattern
matching engine. If any of these are encountered by PCRE2, an error is
-generated by default. However, if the PCRE2_ALT_BSUX option is set,
-\U and \u are interpreted as ECMAScript interprets them.
+generated by default. However, if the PCRE2_ALT_BSUX option is set, \U and \u
+are interpreted as ECMAScript interprets them.
</P>
<P>
5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
@@ -61,24 +62,29 @@ internal representation of Unicode characters, there is no need to implement
the somewhat messy concept of surrogates."
</P>
<P>
-6. PCRE2 does support the \Q...\E escape for quoting substrings. Characters
-in between are treated as literals. This is slightly different from Perl in
-that $ and @ are also handled as literals inside the quotes. In Perl, they
-cause variable interpolation (but of course PCRE2 does not have variables).
-Note the following examples:
+6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
+in between are treated as literals. However, this is slightly different from
+Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
+they cause variable interpolation (but of course PCRE2 does not have
+variables). Also, Perl does "double-quotish backslash interpolation" on any
+backslashes between \Q and \E which, its documentation says, "may lead to
+confusing results". PCRE2 treats a backslash between \Q and \E just like any
+other character. Note the following examples:
<pre>
- Pattern PCRE2 matches Perl matches
+ Pattern PCRE2 matches Perl matches
\Qabc$xyz\E abc$xyz abc followed by the contents of $xyz
\Qabc\$xyz\E abc\$xyz abc\$xyz
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
+ \QA\B\E A\B A\B
+ \Q\\E \ \\E
</pre>
The \Q...\E sequence is recognized both inside and outside character classes.
</P>
<P>
7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
-constructions. However, there is support PCRE2's "callout" feature, which
-allows an external function to be called during pattern matching. See the
+constructions. However, PCRE2 does have a "callout" feature, which allows an
+external function to be called during pattern matching. See the
<a href="pcre2callout.html"><b>pcre2callout</b></a>
documentation for details.
</P>
@@ -156,7 +162,7 @@ each alternative branch of a lookbehind assertion can match a different length
of string. Perl requires them all to have the same length.
<br>
<br>
-(b) From PCRE2 10.23, back references to groups of fixed length are supported
+(b) From PCRE2 10.23, backreferences to groups of fixed length are supported
in lookbehinds, provided that there is no possibility of referencing a
non-unique number or name. Perl does not support backreferences in lookbehinds.
<br>
@@ -229,9 +235,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
-Last updated: 18 April 2017
+Last updated: 28 July 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2convert.html b/doc/html/pcre2convert.html
index 8b4d87f..871e563 100644
--- a/doc/html/pcre2convert.html
+++ b/doc/html/pcre2convert.html
@@ -105,7 +105,8 @@ If <b>buffer</b> points to a NULL pointer, an output buffer is obtained using
the allocator in the context or <b>malloc()</b> if no context is supplied. A
pointer to this buffer is placed in the variable to which <b>buffer</b> points.
When no longer needed the output buffer must be freed by calling
-<b>pcre2_converted_pattern_free()</b>.
+<b>pcre2_converted_pattern_free()</b>. If this function is called with a NULL
+argument, it returns immediately without doing anything.
</P>
<P>
If <b>buffer</b> points to a non-NULL pointer, <b>blength</b> must be set to the
@@ -181,9 +182,9 @@ Cambridge, England.
</P>
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 12 July 2017
+Last updated: 28 June 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
index 625a467..272b87d 100644
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@@ -17,17 +17,18 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
<li><a name="TOC4" href="#SEC4">BINARY FILES</a>
-<li><a name="TOC5" href="#SEC5">OPTIONS</a>
-<li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
-<li><a name="TOC7" href="#SEC7">NEWLINES</a>
-<li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
-<li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
-<li><a name="TOC10" href="#SEC10">USING PCRE2'S CALLOUT FACILITY</a>
-<li><a name="TOC11" href="#SEC11">MATCHING ERRORS</a>
-<li><a name="TOC12" href="#SEC12">DIAGNOSTICS</a>
-<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
-<li><a name="TOC14" href="#SEC14">AUTHOR</a>
-<li><a name="TOC15" href="#SEC15">REVISION</a>
+<li><a name="TOC5" href="#SEC5">BINARY ZEROS IN PATTERNS</a>
+<li><a name="TOC6" href="#SEC6">OPTIONS</a>
+<li><a name="TOC7" href="#SEC7">ENVIRONMENT VARIABLES</a>
+<li><a name="TOC8" href="#SEC8">NEWLINES</a>
+<li><a name="TOC9" href="#SEC9">OPTIONS COMPATIBILITY</a>
+<li><a name="TOC10" href="#SEC10">OPTIONS WITH DATA</a>
+<li><a name="TOC11" href="#SEC11">USING PCRE2'S CALLOUT FACILITY</a>
+<li><a name="TOC12" href="#SEC12">MATCHING ERRORS</a>
+<li><a name="TOC13" href="#SEC13">DIAGNOSTICS</a>
+<li><a name="TOC14" href="#SEC14">SEE ALSO</a>
+<li><a name="TOC15" href="#SEC15">AUTHOR</a>
+<li><a name="TOC16" href="#SEC16">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
@@ -85,9 +86,10 @@ controlled by parameters that can be set by the <b>--buffer-size</b> and
that is obtained at the start of processing. If an input file contains very
long lines, a larger buffer may be needed; this is handled by automatically
extending the buffer, up to the limit specified by <b>--max-buffer-size</b>. The
-default values for these parameters are specified when <b>pcre2grep</b> is
-built, with the default defaults being 20K and 1M respectively. An error occurs
-if a line is too long and the buffer can no longer be expanded.
+default values for these parameters can be set when <b>pcre2grep</b> is
+built; if nothing is specified, the defaults are set to 20KiB and 1MiB
+respectively. An error occurs if a line is too long and the buffer can no
+longer be expanded.
</P>
<P>
The block of memory that is actually used is three times the "buffer size", to
@@ -95,7 +97,7 @@ allow for buffering "before" and "after" lines. If the buffer size is too
small, fewer than requested "before" and "after" lines may be output.
</P>
<P>
-Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
+Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater.
BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
each line in the order in which they are defined, except that all the <b>-e</b>
@@ -150,7 +152,13 @@ specified as "nul", that is, the line terminator is a binary zero, the test for
a binary file is not applied. See the <b>--binary-files</b> option for a means
of changing the way binary files are handled.
</P>
-<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
+<br><a name="SEC5" href="#TOC1">BINARY ZEROS IN PATTERNS</a><br>
+<P>
+Patterns passed from the command line are strings that are terminated by a
+binary zero, so cannot contain internal zeros. However, patterns that are read
+from a file via the <b>-f</b> option may contain binary zeros.
+</P>
+<br><a name="SEC6" href="#TOC1">OPTIONS</a><br>
<P>
The order in which some of the options appear can affect the output. For
example, both the <b>-H</b> and <b>-l</b> options affect the printing of file
@@ -355,12 +363,15 @@ files; it does not apply to patterns specified by any of the <b>--include</b> or
<P>
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
Read patterns from the file, one per line, and match them against each line of
-input. What constitutes a newline when reading the file is the operating
-system's default. The <b>--newline</b> option has no effect on this option.
-Trailing white space is removed from each line, and blank lines are ignored. An
-empty file contains no patterns and therefore matches nothing. See also the
-comments about multiple patterns versus a single pattern with alternatives in
-the description of <b>-e</b> above.
+input. As is the case with patterns on the command line, no delimiters should
+be used. What constitutes a newline when reading the file is the operating
+system's default interpretation of \n. The <b>--newline</b> option has no
+effect on this option. Trailing white space is removed from each line, and
+blank lines are ignored. An empty file contains no patterns and therefore
+matches nothing. Patterns read from a file in this way may contain binary
+zeros, which are treated as ordinary data characters. See also the comments
+about multiple patterns versus a single pattern with alternatives in the
+description of <b>-e</b> above.
<br>
<br>
If this option is given more than once, all the specified files are read. A
@@ -373,14 +384,15 @@ command line; all arguments are treated as the names of paths to be searched.
<P>
<b>--file-list</b>=<i>filename</i>
Read a list of files and/or directories that are to be scanned from the given
-file, one per line. Trailing white space is removed from each line, and blank
-lines are ignored. These paths are processed before any that are listed on the
-command line. The file name can be given as "-" to refer to the standard input.
-If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
-read first. This is useful only when the standard input is a terminal, from
-which further lines (the list of files) can be read after an end-of-file
-indication. If this option is given more than once, all the specified files are
-read.
+file, one per line. What constitutes a newline when reading the file is the
+operating system's default. Trailing white space is removed from each line, and
+blank lines are ignored. These paths are processed before any that are listed
+on the command line. The file name can be given as "-" to refer to the standard
+input. If <b>--file</b> and <b>--file-list</b> are both specified as "-",
+patterns are read first. This is useful only when the standard input is a
+terminal, from which further lines (the list of files) can be read after an
+end-of-file indication. If this option is given more than once, all the
+specified files are read.
</P>
<P>
<b>--file-offsets</b>
@@ -489,13 +501,13 @@ short form for this option.
When this option is given, non-compressed input is read and processed line by
line, and the output is flushed after each write. By default, input is read in
large chunks, unless <b>pcre2grep</b> can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments). Output
-to terminal is normally automatically flushed by the operating system. This
-option can be useful when the input or output is attached to a pipe and you do
-not want <b>pcre2grep</b> to buffer up large amounts of data. However, its use
-will affect performance, and the <b>-M</b> (multiline) option ceases to work.
-When input is from a compressed .gz or .bz2 file, <b>--line-buffered</b> is
-ignored.
+terminal (which is currently possible only in Unix-like environments or
+Windows). Output to terminal is normally automatically flushed by the operating
+system. This option can be useful when the input or output is attached to a
+pipe and you do not want <b>pcre2grep</b> to buffer up large amounts of data.
+However, its use will affect performance, and the <b>-M</b> (multiline) option
+ceases to work. When input is from a compressed .gz or .bz2 file,
+<b>--line-buffered</b> is ignored.
</P>
<P>
<b>--line-offsets</b>
@@ -530,11 +542,11 @@ counter that is incremented each time around its main processing loop. If the
value set by <b>--match-limit</b> is reached, an error occurs.
<br>
<br>
-The <b>--heap-limit</b> option specifies, as a number of kilobytes, the amount
-of heap memory that may be used for matching. Heap memory is needed only if
-matching the pattern requires a significant number of nested backtracking
-points to be remembered. This parameter can be set to zero to forbid the use of
-heap memory altogether.
+The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
+1024 bytes), the amount of heap memory that may be used for matching. Heap
+memory is needed only if matching the pattern requires a significant number of
+nested backtracking points to be remembered. This parameter can be set to zero
+to forbid the use of heap memory altogether.
<br>
<br>
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
@@ -545,9 +557,9 @@ limit acts varies from pattern to pattern. This limit is of use only if it is
set smaller than <b>--match-limit</b>.
<br>
<br>
-There are no short forms for these options. The default settings are specified
-when the PCRE2 library is compiled, with the default defaults being very large
-and so effectively unlimited.
+There are no short forms for these options. The default limits can be set
+when the PCRE2 library is compiled; if they are not specified, the defaults
+are very large and so effectively unlimited.
</P>
<P>
\fB--max-buffer-size=<i>number</i>
@@ -764,27 +776,28 @@ pattern and ")$" at the end. This option applies only to the patterns that are
matched against the contents of files; it does not apply to patterns specified
by any of the <b>--include</b> or <b>--exclude</b> options.
</P>
-<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
+<br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
<P>
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
order, for a locale. The first one that is set is used. This can be overridden
by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
(usually the "C" locale) is used.
</P>
-<br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
+<br><a name="SEC8" href="#TOC1">NEWLINES</a><br>
<P>
The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
different newline conventions from the default. Any parts of the input files
that are written to the standard output are copied identically, with whatever
newline sequences they have in the input. However, the setting of this option
-does not affect the interpretation of files specified by the <b>-f</b>,
-<b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
-the operating system's standard newline sequence, nor does it affect the way in
-which <b>pcre2grep</b> writes informational messages to the standard error and
-output streams. For these it uses the string "\n" to indicate newlines,
-relying on the C I/O library to convert this to an appropriate sequence.
+affects only the way scanned files are processed. It does not affect the
+interpretation of files specified by the <b>-f</b>, <b>--file-list</b>,
+<b>--exclude-from</b>, or <b>--include-from</b> options, nor does it affect the
+way in which <b>pcre2grep</b> writes informational messages to the standard
+error and output streams. For these it uses the string "\n" to indicate
+newlines, relying on the C I/O library to convert this to an appropriate
+sequence.
</P>
-<br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
+<br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
<P>
Many of the short and long forms of <b>pcre2grep</b>'s options are the same
as in the GNU <b>grep</b> program. Any long option of the form
@@ -804,7 +817,7 @@ for GNU <b>grep</b>, but a regular expression for <b>pcre2grep</b>. If both the
<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
without counts, but <b>pcre2grep</b> gives the counts as well.
</P>
-<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
+<br><a name="SEC10" href="#TOC1">OPTIONS WITH DATA</a><br>
<P>
There are four different ways in which an option with data can be specified.
If a short form option is used, the data may follow immediately, or (with one
@@ -836,7 +849,7 @@ The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
options does have data, it must be given in the first form, using an equals
character. Otherwise <b>pcre2grep</b> will assume that it has no data.
</P>
-<br><a name="SEC10" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
+<br><a name="SEC11" href="#TOC1">USING PCRE2'S CALLOUT FACILITY</a><br>
<P>
<b>pcre2grep</b> has, by default, support for calling external programs or
scripts or echoing specific strings during matching by making use of PCRE2's
@@ -906,7 +919,7 @@ Matching continues normally after the string is output. If you want to see only
the callout output but not any output from an actual match, you should end the
relevant pattern with (*FAIL).
</P>
-<br><a name="SEC11" href="#TOC1">MATCHING ERRORS</a><br>
+<br><a name="SEC12" href="#TOC1">MATCHING ERRORS</a><br>
<P>
It is possible to supply a regular expression that takes a very long time to
fail to match certain lines. Such patterns normally involve nested indefinite
@@ -922,7 +935,7 @@ overall resource limit. There are also other limits that affect the amount of
memory used during matching; see the discussion of <b>--heap-limit</b> and
<b>--depth-limit</b> above.
</P>
-<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
+<br><a name="SEC13" href="#TOC1">DIAGNOSTICS</a><br>
<P>
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
for syntax errors, overlong lines, non-existent or inaccessible files (even if
@@ -934,11 +947,11 @@ affect the return code.
When run under VMS, the return code is placed in the symbol PCRE2GREP_RC
because VMS does not distinguish between exit(0) and exit(1).
</P>
-<br><a name="SEC13" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC14" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2pattern</b>(3), <b>pcre2syntax</b>(3), <b>pcre2callout</b>(3).
</P>
-<br><a name="SEC14" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC15" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
@@ -947,11 +960,11 @@ University Computing Service
Cambridge, England.
<br>
</P>
-<br><a name="SEC15" href="#TOC1">REVISION</a><br>
+<br><a name="SEC16" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 13 November 2017
+Last updated: 24 February 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2jit.html b/doc/html/pcre2jit.html
index c53d3d9..fa007e0 100644
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@@ -179,7 +179,7 @@ when JIT matching is used.
<br><a name="SEC6" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
<P>
When the compiled JIT code runs, it needs a block of memory to use as a stack.
-By default, it uses 32K on the machine stack. However, some large or
+By default, it uses 32KiB on the machine stack. However, some large or
complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT
is given when there is not enough stack. Three functions are provided for
managing blocks of memory for use as JIT stacks. There is further discussion
@@ -193,9 +193,10 @@ are a starting size, a maximum size, and a general context (for memory
allocation functions, or NULL for standard memory allocation). It returns a
pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
is an error. The <b>pcre2_jit_stack_free()</b> function is used to free a stack
-that is no longer needed. (For the technically minded: the address space is
-allocated by mmap or VirtualAlloc.) A maximum stack size of 512K to 1M should
-be more than enough for any pattern.
+that is no longer needed. If its argument is NULL, this function returns
+immediately, without doing anything. (For the technically minded: the address
+space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to
+1MiB should be more than enough for any pattern.
</P>
<P>
The <b>pcre2_jit_stack_assign()</b> function specifies which stack JIT code
@@ -207,9 +208,10 @@ should use. Its arguments are as follows:
</pre>
The first argument is a pointer to a match context. When this is subsequently
passed to a matching function, its information determines which JIT stack is
-used. There are three cases for the values of the other two options:
+used. If this argument is NULL, the function returns immediately, without doing
+anything. There are three cases for the values of the other two options:
<pre>
- (1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
+ (1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32KiB block
on the machine stack is used. This is the default when a match
context is created.
@@ -220,7 +222,7 @@ used. There are three cases for the values of the other two options:
(3) If <i>callback</i> is not NULL, it must point to a function that is
called with <i>data</i> as an argument at the start of matching, in
order to set up a JIT stack. If the return from the callback
- function is NULL, the internal 32K stack is used; otherwise the
+ function is NULL, the internal 32KiB stack is used; otherwise the
return value must be a valid JIT stack, the result of calling
<b>pcre2_jit_stack_create()</b>.
</pre>
@@ -286,9 +288,9 @@ we do the recursion in memory.
Modern operating systems have a nice feature: they can reserve an address space
instead of allocating memory. We can safely allocate memory pages inside this
address space, so the stack could grow without moving memory data (this is
-important because of pointers). Thus we can allocate 1M address space, and use
-only a single memory page (usually 4K) if that is enough. However, we can still
-grow up to 1M anytime if needed.
+important because of pointers). Thus we can allocate 1MiB address space, and
+use only a single memory page (usually 4KiB) if that is enough. However, we can
+still grow up to 1MiB anytime if needed.
</P>
<P>
(3) Who "owns" a JIT stack?
@@ -328,7 +330,7 @@ list of patterns.
</P>
<P>
(6) OK, the stack is for long term memory allocation. But what happens if a
-pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
+pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the
stack is freed?
<br>
<br>
@@ -432,9 +434,9 @@ Cambridge, England.
</P>
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 31 March 2017
+Last updated: 28 June 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2limits.html b/doc/html/pcre2limits.html
index 640fe3d..d90cdc3 100644
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@@ -20,12 +20,12 @@ There are some size limitations in PCRE2 but it is hoped that they will never
in practice be relevant.
</P>
<P>
-The maximum size of a compiled pattern is approximately 64K code units for the
-8-bit and 16-bit libraries if PCRE2 is compiled with the default internal
-linkage size, which is 2 bytes for these libraries. If you want to process
-regular expressions that are truly enormous, you can compile PCRE2 with an
-internal linkage size of 3 or 4 (when building the 16-bit library, 3 is rounded
-up to 4). See the <b>README</b> file in the source distribution and the
+The maximum size of a compiled pattern is approximately 64 thousand code units
+for the 8-bit and 16-bit libraries if PCRE2 is compiled with the default
+internal linkage size, which is 2 bytes for these libraries. If you want to
+process regular expressions that are truly enormous, you can compile PCRE2 with
+an internal linkage size of 3 or 4 (when building the 16-bit library, 3 is
+rounded up to 4). See the <b>README</b> file in the source distribution and the
<a href="pcre2build.html"><b>pcre2build</b></a>
documentation for details. In these cases the limit is substantially larger.
However, the speed of execution is slower. In the 32-bit library, the internal
@@ -54,9 +54,9 @@ There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns. There is, however, a limit to the
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
order to limit the amount of system stack used at compile time. The default
-limit can be specified when PCRE2 is built; the default default is 250. An
-application can change this limit by calling pcre2_set_parens_nest_limit() to
-set the limit in a compile context.
+limit can be specified when PCRE2 is built; if not, the default is set to 250.
+An application can change this limit by calling pcre2_set_parens_nest_limit()
+to set the limit in a compile context.
</P>
<P>
The maximum length of name for a named subpattern is 32 code units, and the
diff --git a/doc/html/pcre2matching.html b/doc/html/pcre2matching.html
index 859bbb3..602f3b2 100644
--- a/doc/html/pcre2matching.html
+++ b/doc/html/pcre2matching.html
@@ -85,7 +85,7 @@ ungreedy repetition quantifiers are specified in the pattern.
Because it ends up with a single path through the tree, it is relatively
straightforward for this algorithm to keep track of the substrings that are
matched by portions of the pattern in parentheses. This provides support for
-capturing parentheses and back references.
+capturing parentheses and backreferences.
</P>
<br><a name="SEC4" href="#TOC1">THE ALTERNATIVE MATCHING ALGORITHM</a><br>
<P>
@@ -158,7 +158,7 @@ possibilities, and PCRE2's implementation of this algorithm does not attempt to
do this. This means that no captured substrings are available.
</P>
<P>
-3. Because no substrings are captured, back references within the pattern are
+3. Because no substrings are captured, backreferences within the pattern are
not supported, and cause errors if encountered.
</P>
<P>
@@ -215,7 +215,7 @@ because it has to search for all possible matches, but is also because it is
less susceptible to optimization.
</P>
<P>
-2. Capturing parentheses and back references are not supported.
+2. Capturing parentheses and backreferences are not supported.
</P>
<P>
3. Although atomic groups are supported, their use does not provide the
diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html
index c495cba..e43e98e 100644
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@@ -31,7 +31,7 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC16" href="#SEC16">NAMED SUBPATTERNS</a>
<li><a name="TOC17" href="#SEC17">REPETITION</a>
<li><a name="TOC18" href="#SEC18">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
-<li><a name="TOC19" href="#SEC19">BACK REFERENCES</a>
+<li><a name="TOC19" href="#SEC19">BACKREFERENCES</a>
<li><a name="TOC20" href="#SEC20">ASSERTIONS</a>
<li><a name="TOC21" href="#SEC21">CONDITIONAL SUBPATTERNS</a>
<li><a name="TOC22" href="#SEC22">COMMENTS</a>
@@ -173,12 +173,12 @@ the application to apply the JIT optimization by calling
Setting match resource limits
</b><br>
<P>
-The pcre2_match() function contains a counter that is incremented every time it
-goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on
-this counter, which therefore limits the amount of computing resource used for
-a match. The maximum depth of nested backtracking can also be limited; this
-indirectly restricts the amount of heap memory that is used, but there is also
-an explicit memory limit that can be set.
+The <b>pcre2_match()</b> function contains a counter that is incremented every
+time it goes round its main loop. The caller of <b>pcre2_match()</b> can set a
+limit on this counter, which therefore limits the amount of computing resource
+used for a match. The maximum depth of nested backtracking can also be limited;
+this indirectly restricts the amount of heap memory that is used, but there is
+also an explicit memory limit that can be set.
</P>
<P>
These facilities are provided to catch runaway matches that are provoked by
@@ -195,20 +195,22 @@ where d is any number of decimal digits. However, the value of the setting must
be less than the value set (or defaulted) by the caller of <b>pcre2_match()</b>
for it to have any effect. In other words, the pattern writer can lower the
limits set by the programmer, but not raise them. If there is more than one
-setting of one of these limits, the lower value is used.
+setting of one of these limits, the lower value is used. The heap limit is
+specified in kibibytes (units of 1024 bytes).
</P>
<P>
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
still recognized for backwards compatibility.
</P>
<P>
-The heap limit applies only when the <b>pcre2_match()</b> interpreter is used
-for matching. It does not apply to JIT or DFA matching. The match limit is used
-(but in a different way) when JIT is being used, or when
-<b>pcre2_dfa_match()</b> is called, to limit computing resource usage by those
-matching functions. The depth limit is ignored by JIT but is relevant for DFA
-matching, which uses function recursion for recursions within the pattern. In
-this case, the depth limit controls the amount of system stack that is used.
+The heap limit applies only when the <b>pcre2_match()</b> or
+<b>pcre2_dfa_match()</b> interpreters are used for matching. It does not apply
+to JIT. The match limit is used (but in a different way) when JIT is being
+used, or when <b>pcre2_dfa_match()</b> is called, to limit computing resource
+usage by those matching functions. The depth limit is ignored by JIT but is
+relevant for DFA matching, which uses function recursion for recursions within
+the pattern and for lookaround assertions and atomic groups. In this case, the
+depth limit controls the depth of such recursion.
<a name="newlines"></a></P>
<br><b>
Newline conventions
@@ -247,10 +249,11 @@ is used.
<P>
The newline convention affects where the circumflex and dollar assertions are
true. It also affects the interpretation of the dot metacharacter when
-PCRE2_DOTALL is not set, and the behaviour of \N. However, it does not affect
-what the \R escape sequence matches. By default, this is any Unicode newline
-sequence, for Perl compatibility. However, this can be changed; see the next
-section and the description of \R in the section entitled
+PCRE2_DOTALL is not set, and the behaviour of \N when not followed by an
+opening brace. However, it does not affect what the \R escape sequence
+matches. By default, this is any Unicode newline sequence, for Perl
+compatibility. However, this can be changed; see the next section and the
+description of \R in the section entitled
<a href="#newlineseq">"Newline sequences"</a>
below. A change of \R setting can be combined with a change of newline
convention.
@@ -340,7 +343,7 @@ In particular, if you want to match a backslash, you write \\.
</P>
<P>
In a UTF mode, only ASCII numbers and letters have any special meaning after a
-backslash. All other characters (in particular, those whose codepoints are
+backslash. All other characters (in particular, those whose code points are
greater than 127) are treated as literals.
</P>
<P>
@@ -354,13 +357,18 @@ of the pattern.
If you want to remove the special meaning from a sequence of characters, you
can do so by putting them between \Q and \E. This is different from Perl in
that $ and @ are handled as literals in \Q...\E sequences in PCRE2, whereas
-in Perl, $ and @ cause variable interpolation. Note the following examples:
+in Perl, $ and @ cause variable interpolation. Also, Perl does "double-quotish
+backslash interpolation" on any backslashes between \Q and \E which, its
+documentation says, "may lead to confusing results". PCRE2 treats a backslash
+between \Q and \E just like any other character. Note the following examples:
<pre>
Pattern PCRE2 matches Perl matches
\Qabc$xyz\E abc$xyz abc followed by the contents of $xyz
\Qabc\$xyz\E abc\$xyz abc\$xyz
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
+ \QA\B\E A\B A\B
+ \Q\\E \ \\E
</pre>
The \Q...\E sequence is recognized both inside and outside character classes.
An isolated \E that is not preceded by \Q is ignored. If \Q is not followed
@@ -380,20 +388,28 @@ text editing, it is often easier to use one of the following escape sequences
than the binary character it represents. In an ASCII or Unicode environment,
these escapes are as follows:
<pre>
- \a alarm, that is, the BEL character (hex 07)
- \cx "control-x", where x is any printable ASCII character
- \e escape (hex 1B)
- \f form feed (hex 0C)
- \n linefeed (hex 0A)
- \r carriage return (hex 0D)
- \t tab (hex 09)
- \0dd character with octal code 0dd
- \ddd character with octal code ddd, or back reference
- \o{ddd..} character with octal code ddd..
- \xhh character with hex code hh
- \x{hhh..} character with hex code hhh.. (default mode)
- \uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
-</pre>
+ \a alarm, that is, the BEL character (hex 07)
+ \cx "control-x", where x is any printable ASCII character
+ \e escape (hex 1B)
+ \f form feed (hex 0C)
+ \n linefeed (hex 0A)
+ \r carriage return (hex 0D)
+ \t tab (hex 09)
+ \0dd character with octal code 0dd
+ \ddd character with octal code ddd, or backreference
+ \o{ddd..} character with octal code ddd..
+ \xhh character with hex code hh
+ \x{hhh..} character with hex code hhh..
+ \N{U+hhh..} character with Unicode hex code point hhh..
+ \uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
+</pre>
+The \N{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
+is set, that is, when PCRE2 is operating in a Unicode mode. Perl also uses
+\N{name} to specify characters by Unicode name; PCRE2 does not support this.
+Note that when \N is not followed by an opening brace (curly bracket) it has
+an entirely different meaning, matching any character that is not a newline.
+</P>
+<P>
The precise effect of \cx on ASCII characters is as follows: if x is a lower
case letter, it is converted to upper case. Then bit 6 of the character (hex
40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is 5A),
@@ -402,14 +418,14 @@ code unit following \c has a value less than 32 or greater than 126, a
compile-time error occurs.
</P>
<P>
-When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t
-generate the appropriate EBCDIC code values. The \c escape is processed
-as specified for Perl in the <b>perlebcdic</b> document. The only characters
-that are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. Any
-other character provokes a compile-time error. The sequence \c@ encodes
-character code 0; after \c the letters (in either case) encode characters 1-26
-(hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex
-1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F).
+When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. \a, \e,
+\f, \n, \r, and \t generate the appropriate EBCDIC code values. The \c
+escape is processed as specified for Perl in the <b>perlebcdic</b> document. The
+only characters that are allowed after \c are A-Z, a-z, or one of @, [, \, ],
+^, _, or ?. Any other character provokes a compile-time error. The sequence
+\c@ encodes character code 0; after \c the letters (in either case) encode
+characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31
+(hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F).
</P>
<P>
Thus, apart from \c?, these escapes generate the same character code values as
@@ -436,14 +452,14 @@ follows is itself an octal digit.
The escape \o must be followed by a sequence of octal digits, enclosed in
braces. An error occurs if this is not the case. This escape is a recent
addition to Perl; it provides way of specifying character code points as octal
-numbers greater than 0777, and it also allows octal numbers and back references
+numbers greater than 0777, and it also allows octal numbers and backreferences
to be unambiguously specified.
</P>
<P>
For greater clarity and unambiguity, it is best to avoid following \ by a
-digit greater than zero. Instead, use \o{} or \x{} to specify character
-numbers, and \g{} to specify back references. The following paragraphs
-describe the old, ambiguous syntax.
+digit greater than zero. Instead, use \o{} or \x{} to specify numerical
+character code points, and \g{} to specify backreferences. The following
+paragraphs describe the old, ambiguous syntax.
</P>
<P>
The handling of a backslash followed by a digit other than 0 is complicated,
@@ -453,7 +469,7 @@ and Perl has changed over time, causing PCRE2 also to change.
Outside a character class, PCRE2 reads the digit and any following digits as a
decimal number. If the number is less than 10, begins with the digit 8 or 9, or
if there are at least that many previous capturing left parentheses in the
-expression, the entire sequence is taken as a <i>back reference</i>. A
+expression, the entire sequence is taken as a <i>backreference</i>. A
description of how this works is given
<a href="#backreferences">later,</a>
following the discussion of
@@ -468,13 +484,13 @@ for themselves. For example, outside a character class:
<pre>
\040 is another way of writing an ASCII space
\40 is the same, provided there are fewer than 40 previous capturing subpatterns
- \7 is always a back reference
- \11 might be a back reference, or another way of writing a tab
+ \7 is always a backreference
+ \11 might be a backreference, or another way of writing a tab
\011 is always a tab
\0113 is a tab followed by the character "3"
- \113 might be a back reference, otherwise the character with octal code 113
- \377 might be a back reference, otherwise the value 255 (decimal)
- \81 is always a back reference .sp
+ \113 might be a backreference, otherwise the character with octal code 113
+ \377 might be a backreference, otherwise the value 255 (decimal)
+ \81 is always a backreference .sp
</pre>
Note that octal values of 100 or greater that are specified using this syntax
must not be introduced by a leading zero, because no more than three octal
@@ -510,12 +526,13 @@ limited to certain values, as follows:
8-bit non-UTF mode no greater than 0xff
16-bit non-UTF mode no greater than 0xffff
32-bit non-UTF mode no greater than 0xffffffff
- All UTF modes no greater than 0x10ffff and a valid codepoint
+ All UTF modes no greater than 0x10ffff and a valid code point
</pre>
-Invalid Unicode codepoints are all those in the range 0xd800 to 0xdfff (the
-so-called "surrogate" codepoints). The check for these can be disabled by the
+Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the
+so-called "surrogate" code points). The check for these can be disabled by the
caller of <b>pcre2_compile()</b> by setting the option
-PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8
+and UTF-32 modes, because these values are not representable in UTF-16.
</P>
<br><b>
Escape sequences in character classes
@@ -526,28 +543,28 @@ and outside character classes. In addition, inside a character class, \b is
interpreted as the backspace character (hex 08).
</P>
<P>
-\N is not allowed in a character class. \B, \R, and \X are not special
-inside a character class. Like other unrecognized alphabetic escape sequences,
-they cause an error. Outside a character class, these sequences have different
-meanings.
+When not followed by an opening brace, \N is not allowed in a character class.
+\B, \R, and \X are not special inside a character class. Like other
+unrecognized alphabetic escape sequences, they cause an error. Outside a
+character class, these sequences have different meanings.
</P>
<br><b>
Unsupported escape sequences
</b><br>
<P>
-In Perl, the sequences \l, \L, \u, and \U are recognized by its string
+In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its string
handler and used to modify the case of following characters. By default, PCRE2
does not support these escape sequences. However, if the PCRE2_ALT_BSUX option
is set, \U matches a "U" character, and \u can be used to define a character
by code point, as described above.
</P>
<br><b>
-Absolute and relative back references
+Absolute and relative backreferences
</b><br>
<P>
The sequence \g followed by a signed or unsigned number, optionally enclosed
-in braces, is an absolute or relative back reference. A named back reference
-can be coded as \g{name}. Back references are discussed
+in braces, is an absolute or relative backreference. A named backreference
+can be coded as \g{name}. Backreferences are discussed
<a href="#backreferences">later,</a>
following the discussion of
<a href="#subpattern">parenthesized subpatterns.</a>
@@ -561,7 +578,7 @@ a number enclosed either in angle brackets or single quotes, is an alternative
syntax for referencing a subpattern as a "subroutine". Details are discussed
<a href="#onigurumasubroutines">later.</a>
Note that \g{...} (Perl syntax) and \g&#60;...&#62; (Oniguruma syntax) are <i>not</i>
-synonymous. The former is a back reference; the latter is a
+synonymous. The former is a backreference; the latter is a
<a href="#subpatternsassubroutines">subroutine</a>
call.
<a name="genericchartypes"></a></P>
@@ -575,6 +592,7 @@ Another use of backslash is for specifying generic character types:
\D any character that is not a decimal digit
\h any horizontal white space character
\H any character that is not a horizontal white space character
+ \N any character that is not a newline
\s any white space character
\S any character that is not a white space character
\v any vertical white space character
@@ -582,11 +600,14 @@ Another use of backslash is for specifying generic character types:
\w any "word" character
\W any "non-word" character
</pre>
-There is also the single sequence \N, which matches a non-newline character.
-This is the same as
+The \N escape sequence has the same meaning as
<a href="#fullstopdot">the "." metacharacter</a>
-when PCRE2_DOTALL is not set. Perl also uses \N to match characters by name;
-PCRE2 does not support this.
+when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change the
+meaning of \N. Note that when \N is followed by an opening brace it has a
+different meaning. See the section entitled
+<a href="#digitsafterbackslash">"Non-printing characters"</a>
+above for details. Perl also uses \N{name} to specify characters by Unicode
+name; PCRE2 does not support this.
</P>
<P>
Each pair of lower and upper case escape sequences partitions the complete set
@@ -692,7 +713,7 @@ line, U+0085). Because this is an atomic group, the two-character sequence is
treated as a single unit that cannot be split.
</P>
<P>
-In other modes, two additional characters whose codepoints are greater than 255
+In other modes, two additional characters whose code points are greater than 255
are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029).
Unicode support is not needed for these characters to be recognized.
</P>
@@ -727,8 +748,8 @@ Unicode character properties
When PCRE2 is built with Unicode support (the default), three additional escape
sequences that match characters with specific properties are available. In
8-bit non-UTF-8 mode, these sequences are of course limited to testing
-characters whose codepoints are less than 256, but they do work in this mode.
-In 32-bit non-UTF mode, codepoints greater than 0x10ffff (the Unicode limit)
+characters whose code points are less than 256, but they do work in this mode.
+In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit)
may be encountered. These are all treated as being in the Common script and
with an unassigned type. The extra escape sequences are:
<pre>
@@ -787,6 +808,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dogra,
Duployan,
Egyptian_Hieroglyphs,
Elbasan,
@@ -797,9 +819,11 @@ Gothic,
Grantha,
Greek,
Gujarati,
+Gunjala_Gondi,
Gurmukhi,
Han,
Hangul,
+Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
@@ -827,11 +851,13 @@ Lisu,
Lycian,
Lydian,
Mahajani,
+Makasar,
Malayalam,
Mandaic,
Manichaean,
Marchen,
Masaram_Gondi,
+Medefaidrin,
Meetei_Mayek,
Mende_Kikakui,
Meroitic_Cursive,
@@ -854,6 +880,7 @@ Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
+Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Oriya,
@@ -874,6 +901,7 @@ Shavian,
Siddham,
SignWriting,
Sinhala,
+Sogdian,
Sora_Sompeng,
Soyombo,
Sundanese,
@@ -1004,7 +1032,10 @@ grapheme cluster", and treats the sequence as an atomic group
Unicode supports various kinds of composite character by giving each character
a grapheme breaking property, and having rules that use these properties to
define the boundaries of extended grapheme clusters. The rules are defined in
-Unicode Standard Annex 29, "Unicode Text Segmentation".
+Unicode Standard Annex 29, "Unicode Text Segmentation". Unicode 11.0.0
+abandoned the use of some previous properties that had been used for emojis.
+Instead it introduced various emoji-specific properties. PCRE2 uses only the
+Extended Pictographic property.
</P>
<P>
\X always matches at least one character. Then it decides whether to add
@@ -1024,27 +1055,24 @@ character; an LVT or T character may be follwed only by a T character.
</P>
<P>
4. Do not end before extending characters or spacing marks or the "zero-width
-joiner" characters. Characters with the "mark" property always have the
+joiner" character. Characters with the "mark" property always have the
"extend" grapheme breaking property.
</P>
<P>
5. Do not end after prepend characters.
</P>
<P>
-6. Do not break within emoji modifier sequences (a base character followed by a
-modifier). Extending characters are allowed before the modifier.
+6. Do not break within emoji modifier sequences or emoji zwj sequences. That
+is, do not break between characters with the Extended_Pictographic property.
+Extend and ZWJ characters are allowed between the characters.
</P>
<P>
-7. Do not break within emoji zwj sequences (zero-width jointer followed by
-"glue after ZWJ" or "base glue after ZWJ").
-</P>
-<P>
-8. Do not break within emoji flag sequences. That is, do not break between
+7. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) characters if there are an odd number of RI characters
before the break point.
</P>
<P>
-6. Otherwise, end the cluster.
+8. Otherwise, end the cluster.
<a name="extraprops"></a></P>
<br><b>
PCRE2's additional properties
@@ -1082,13 +1110,20 @@ sequences but the characters that they represent.)
Resetting the match start
</b><br>
<P>
-The escape sequence \K causes any previously matched characters not to be
-included in the final matched sequence. For example, the pattern:
+In normal use, the escape sequence \K causes any previously matched characters
+not to be included in the final matched sequence that is returned. For example,
+the pattern:
<pre>
foo\Kbar
</pre>
-matches "foobar", but reports that it has matched "bar". This feature is
-similar to a lookbehind assertion
+matches "foobar", but reports that it has matched "bar". \K does not interact
+with anchoring in any way. The pattern:
+<pre>
+ ^foo\Kbar
+</pre>
+matches only when the subject begins with "foobar" (in single line mode),
+though it again reports the matched string as "bar". This feature is similar to
+a lookbehind assertion
<a href="#lookbehind">(described below).</a>
However, in this case, the part of the subject before the real match does not
have to be of fixed length, as lookbehind assertions do. The use of \K does
@@ -1105,7 +1140,14 @@ Perl documents that the use of \K within assertions is "not well defined". In
PCRE2, \K is acted upon when it occurs inside positive assertions, but is
ignored in negative assertions. Note that when a pattern such as (?=ab\K)
matches, the reported start of the match can be greater than the end of the
-match.
+match. Using \K in a lookbehind assertion at the start of a pattern can also
+lead to odd effects. For example, consider this pattern:
+<pre>
+ (?&#60;=\Kfoo)bar
+</pre>
+If the subject is "foobar", a call to <b>pcre2_match()</b> with a starting
+offset of 3 succeeds and reports the matching string as "foobar", that is, the
+start of the reported match is earlier than where the match started.
<a name="smallassertions"></a></P>
<br><b>
Simple assertions
@@ -1156,18 +1198,18 @@ end.
</P>
<P>
The \G assertion is true only when the current matching position is at the
-start point of the match, as specified by the <i>startoffset</i> argument of
-<b>pcre2_match()</b>. It differs from \A when the value of <i>startoffset</i> is
-non-zero. By calling <b>pcre2_match()</b> multiple times with appropriate
-arguments, you can mimic Perl's /g option, and it is in this kind of
-implementation where \G can be useful.
+start point of the matching process, as specified by the <i>startoffset</i>
+argument of <b>pcre2_match()</b>. It differs from \A when the value of
+<i>startoffset</i> is non-zero. By calling <b>pcre2_match()</b> multiple times
+with appropriate arguments, you can mimic Perl's /g option, and it is in this
+kind of implementation where \G can be useful.
</P>
<P>
-Note, however, that PCRE2's interpretation of \G, as the start of the current
-match, is subtly different from Perl's, which defines it as the end of the
-previous match. In Perl, these can be different when the previously matched
-string was empty. Because PCRE2 does just one match at a time, it cannot
-reproduce this behaviour.
+Note, however, that PCRE2's implementation of \G, being true at the starting
+character of the matching process, is subtly different from Perl's, which
+defines it as true at the end of the previous match. In Perl, these can be
+different when the previously matched string was empty. Because PCRE2 does just
+one match at a time, it cannot reproduce this behaviour.
</P>
<P>
If all the alternatives of a pattern begin with \G, the expression is anchored
@@ -1274,9 +1316,15 @@ dollar, the only relationship being that they both involve newlines. Dot has no
special meaning in a character class.
</P>
<P>
-The escape sequence \N behaves like a dot, except that it is not affected by
-the PCRE2_DOTALL option. In other words, it matches any character except one
-that signifies the end of a line. Perl also uses \N to match characters by
+The escape sequence \N when not followed by an opening brace behaves like a
+dot, except that it is not affected by the PCRE2_DOTALL option. In other words,
+it matches any character except one that signifies the end of a line.
+</P>
+<P>
+When \N is followed by an opening brace it has a different meaning. See the
+section entitled
+<a href="digitsafterbackslash">"Non-printing characters"</a>
+above for details. Perl also uses \N{name} to specify characters by Unicode
name; PCRE2 does not support this.
</P>
<br><a name="SEC8" href="#TOC1">MATCHING A SINGLE CODE UNIT</a><br>
@@ -1362,10 +1410,11 @@ string, and therefore it fails if the current pointer is at the end of the
string.
</P>
<P>
-When caseless matching is set, any letters in a class represent both their
-upper case and lower case versions, so for example, a caseless [aeiou] matches
-"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
-caseful version would.
+Characters in a class may be specified by their code points using \o, \x, or
+\N{U+hh..} in the usual way. When caseless matching is set, any letters in a
+class represent both their upper case and lower case versions, so for example,
+a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not
+match "A", whereas a caseful version would.
</P>
<P>
Characters that might indicate line breaks are never treated in any special way
@@ -1374,17 +1423,18 @@ whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A
class such as [^a] always matches one of these characters.
</P>
<P>
-The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v,
-\V, \w, and \W may appear in a character class, and add the characters that
-they match to the class. For example, [\dABCDEF] matches any hexadecimal
-digit. In UTF modes, the PCRE2_UCP option affects the meanings of \d, \s, \w
-and their upper case partners, just as it does when they appear outside a
-character class, as described in the section entitled
+The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s,
+\S, \v, \V, \w, and \W may appear in a character class, and add the
+characters that they match to the class. For example, [\dABCDEF] matches any
+hexadecimal digit. In UTF modes, the PCRE2_UCP option affects the meanings of
+\d, \s, \w and their upper case partners, just as it does when they appear
+outside a character class, as described in the section entitled
<a href="#genericchartypes">"Generic character types"</a>
above. The escape sequence \b has a different meaning inside a character
-class; it matches the backspace character. The sequences \B, \N, \R, and \X
-are not special inside a character class. Like any other unrecognized escape
-sequences, they cause an error.
+class; it matches the backspace character. The sequences \B, \R, and \X are
+not special inside a character class. Like any other unrecognized escape
+sequences, they cause an error. The same is true for \N when not followed by
+an opening brace.
</P>
<P>
The minus (hyphen) character can be used to specify a range of characters in a
@@ -1580,9 +1630,11 @@ alternative in the subpattern.
<br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
<P>
The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
-PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options (which
-are Perl-compatible) can be changed from within the pattern by a sequence of
-Perl option letters enclosed between "(?" and ")". The option letters are
+PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options can be
+changed from within the pattern by a sequence of letters enclosed between "(?"
+and ")". These options are Perl-compatible, and are described in detail in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation. The option letters are:
<pre>
i for PCRE2_CASELESS
m for PCRE2_MULTILINE
@@ -1592,21 +1644,27 @@ Perl option letters enclosed between "(?" and ")". The option letters are
xx for PCRE2_EXTENDED_MORE
</pre>
For example, (?im) sets caseless, multiline matching. It is also possible to
-unset these options by preceding the letter with a hyphen. The two "extended"
-options are not independent; unsetting either one cancels the effects of both
-of them.
+unset these options by preceding the relevant letters with a hyphen, for
+example (?-im). The two "extended" options are not independent; unsetting either
+one cancels the effects of both of them.
</P>
<P>
A combined setting and unsetting such as (?im-sx), which sets PCRE2_CASELESS
and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and PCRE2_EXTENDED, is also
-permitted. If a letter appears both before and after the hyphen, the option is
-unset. An empty options setting "(?)" is allowed. Needless to say, it has no
-effect.
+permitted. Only one hyphen may appear in the options string. If a letter
+appears both before and after the hyphen, the option is unset. An empty options
+setting "(?)" is allowed. Needless to say, it has no effect.
+</P>
+<P>
+If the first character following (? is a circumflex, it causes all of the above
+options to be unset. Thus, (?^) is equivalent to (?-imnsx). Letters may follow
+the circumflex to cause some options to be re-instated, but a hyphen may not
+appear.
</P>
<P>
The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be changed in
the same way as the Perl-compatible options by using the characters J and U
-respectively.
+respectively. However, these are not unset by (?^).
</P>
<P>
When one of these option changes occurs at top level (that is, not inside
@@ -1729,7 +1787,7 @@ numbers underneath show in which buffer the captured content will be stored.
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
# 1 2 2 3 2 3 4
</pre>
-A back reference to a numbered subpattern uses the most recent value that is
+A backreference to a numbered subpattern uses the most recent value that is
set for that number by any subpattern. The following pattern matches "abcabc"
or "defdef":
<pre>
@@ -1757,41 +1815,68 @@ duplicate named subpatterns, as described in the next section.
<br><a name="SEC16" href="#TOC1">NAMED SUBPATTERNS</a><br>
<P>
Identifying capturing parentheses by number is simple, but it can be very hard
-to keep track of the numbers in complicated regular expressions. Furthermore,
-if an expression is modified, the numbers may change. To help with this
-difficulty, PCRE2 supports the naming of subpatterns. This feature was not
-added to Perl until release 5.10. Python had the feature earlier, and PCRE1
+to keep track of the numbers in complicated patterns. Furthermore, if an
+expression is modified, the numbers may change. To help with this difficulty,
+PCRE2 supports the naming of capturing subpatterns. This feature was not added
+to Perl until release 5.10. Python had the feature earlier, and PCRE1
introduced it at release 4.0, using the Python syntax. PCRE2 supports both the
-Perl and the Python syntax. Perl allows identically numbered subpatterns to
-have different names, but PCRE2 does not.
+Perl and the Python syntax.
</P>
<P>
-In PCRE2, a subpattern can be named in one of three ways: (?&#60;name&#62;...) or
-(?'name'...) as in Perl, or (?P&#60;name&#62;...) as in Python. References to capturing
-parentheses from other parts of the pattern, such as
-<a href="#backreferences">back references,</a>
+In PCRE2, a capturing subpattern can be named in one of three ways:
+(?&#60;name&#62;...) or (?'name'...) as in Perl, or (?P&#60;name&#62;...) as in Python. Names
+consist of up to 32 alphanumeric characters and underscores, but must start
+with a non-digit. References to capturing parentheses from other parts of the
+pattern, such as
+<a href="#backreferences">backreferences,</a>
<a href="#recursion">recursion,</a>
and
<a href="#conditions">conditions,</a>
-can be made by name as well as by number.
+can all be made by name as well as by number.
</P>
<P>
-Names consist of up to 32 alphanumeric characters and underscores, but must
-start with a non-digit. Named capturing parentheses are still allocated numbers
-as well as names, exactly as if the names were not present. The PCRE2 API
-provides function calls for extracting the name-to-number translation table
-from a compiled pattern. There are also convenience functions for extracting a
-captured substring by name.
+Named capturing parentheses are allocated numbers as well as names, exactly as
+if the names were not present. In both PCRE2 and Perl, capturing subpatterns
+are primarily identified by numbers; any names are just aliases for these
+numbers. The PCRE2 API provides function calls for extracting the complete
+name-to-number translation table from a compiled pattern, as well as
+convenience functions for extracting captured substrings by name.
</P>
<P>
-By default, a name must be unique within a pattern, but it is possible to relax
-this constraint by setting the PCRE2_DUPNAMES option at compile time.
-(Duplicate names are also always permitted for subpatterns with the same
-number, set up as described in the previous section.) Duplicate names can be
-useful for patterns where only one instance of the named parentheses can match.
-Suppose you want to match the name of a weekday, either as a 3-letter
-abbreviation or as the full name, and in both cases you want to extract the
-abbreviation. This pattern (ignoring the line breaks) does the job:
+<b>Warning:</b> When more than one subpattern has the same number, as described
+in the previous section, a name given to one of them applies to all of them.
+Perl allows identically numbered subpatterns to have different names. Consider
+this pattern, where there are two capturing subpatterns, both numbered 1:
+<pre>
+ (?|(?&#60;AA&#62;aa)|(?&#60;BB&#62;bb))
+</pre>
+Perl allows this, with both names AA and BB as aliases of group 1. Thus, after
+a successful match, both names yield the same value (either "aa" or "bb").
+</P>
+<P>
+In an attempt to reduce confusion, PCRE2 does not allow the same group number
+to be associated with more than one name. The example above provokes a
+compile-time error. However, there is still scope for confusion. Consider this
+pattern:
+<pre>
+ (?|(?&#60;AA&#62;aa)|(bb))
+</pre>
+Although the second subpattern number 1 is not explicitly named, the name AA is
+still an alias for subpattern 1. Whether the pattern matches "aa" or "bb", a
+reference by name to group AA yields the matched string.
+</P>
+<P>
+By default, a name must be unique within a pattern, except that duplicate names
+are permitted for subpatterns with the same number, for example:
+<pre>
+ (?|(?&#60;AA&#62;aa)|(?&#60;AA&#62;bb))
+</pre>
+The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
+option at compile time, or by the use of (?J) within the pattern. Duplicate
+names can be useful for patterns where only one instance of the named
+parentheses can match. Suppose you want to match the name of a weekday, either
+as a 3-letter abbreviation or as the full name, and in both cases you want to
+extract the abbreviation. This pattern (ignoring the line breaks) does the job:
<pre>
(?&#60;DN&#62;Mon|Fri|Sun)(?:day)?|
(?&#60;DN&#62;Tue)(?:sday)?|
@@ -1800,16 +1885,14 @@ abbreviation. This pattern (ignoring the line breaks) does the job:
(?&#60;DN&#62;Sat)(?:urday)?
</pre>
There are five capturing substrings, but only one is ever set after a match.
-(An alternative way of solving this problem is to use a "branch reset"
-subpattern, as described in the previous section.)
-</P>
-<P>
The convenience functions for extracting the data by name returns the substring
for the first (and in this example, the only) subpattern of that name that
-matched. This saves searching to find which numbered subpattern it was.
+matched. This saves searching to find which numbered subpattern it was. (An
+alternative way of solving this problem is to use a "branch reset" subpattern,
+as described in the previous section.)
</P>
<P>
-If you make a back reference to a non-unique named subpattern from elsewhere in
+If you make a backreference to a non-unique named subpattern from elsewhere in
the pattern, the subpatterns to which the name refers are checked in the order
in which they appear in the overall pattern. The first one that is set is used
for the reference. For example, this pattern matches both "foofoo" and
@@ -1822,8 +1905,7 @@ for the reference. For example, this pattern matches both "foofoo" and
<P>
If you make a subroutine call to a non-unique named subpattern, the one that
corresponds to the first occurrence of the name is used. In the absence of
-duplicate numbers (see the previous section) this is the one with the lowest
-number.
+duplicate numbers this is the one with the lowest number.
</P>
<P>
If you use a named reference in a condition
@@ -1837,14 +1919,6 @@ handling named subpatterns, see the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation.
</P>
-<P>
-<b>Warning:</b> You cannot use different names to distinguish between two
-subpatterns with the same number because PCRE2 uses only the numbers when
-matching. For this reason, an error is given at compile time if different names
-are given to subpatterns with the same number. However, you can always give the
-same name to subpatterns with the same number, even when PCRE2_DUPNAMES is not
-set.
-</P>
<br><a name="SEC17" href="#TOC1">REPETITION</a><br>
<P>
Repetition is specified by quantifiers, which can follow any of the following
@@ -1857,7 +1931,7 @@ items:
the \R escape sequence
an escape such as \d or \pL that matches a single character
a character class
- a back reference
+ a backreference
a parenthesized subpattern (including most assertions)
a subroutine call to a subpattern (recursive or otherwise)
</pre>
@@ -1978,7 +2052,7 @@ alternatively, using ^ to indicate anchoring explicitly.
</P>
<P>
However, there are some cases where the optimization cannot be used. When .*
-is inside capturing parentheses that are the subject of a back reference
+is inside capturing parentheses that are the subject of a backreference
elsewhere in the pattern, a match at the start may fail where a later one
succeeds. Consider, for example:
<pre>
@@ -2119,30 +2193,30 @@ an atomic group, like this:
</pre>
sequences of non-digits cannot be broken, and failure happens quickly.
<a name="backreferences"></a></P>
-<br><a name="SEC19" href="#TOC1">BACK REFERENCES</a><br>
+<br><a name="SEC19" href="#TOC1">BACKREFERENCES</a><br>
<P>
Outside a character class, a backslash followed by a digit greater than 0 (and
-possibly further digits) is a back reference to a capturing subpattern earlier
+possibly further digits) is a backreference to a capturing subpattern earlier
(that is, to its left) in the pattern, provided there have been that many
previous capturing left parentheses.
</P>
<P>
However, if the decimal number following the backslash is less than 8, it is
-always taken as a back reference, and causes an error only if there are not
+always taken as a backreference, and causes an error only if there are not
that many capturing left parentheses in the entire pattern. In other words, the
parentheses that are referenced need not be to the left of the reference for
-numbers less than 8. A "forward back reference" of this type can make sense
+numbers less than 8. A "forward backreference" of this type can make sense
when a repetition is involved and the subpattern to the right has participated
in an earlier iteration.
</P>
<P>
-It is not possible to have a numerical "forward back reference" to a subpattern
+It is not possible to have a numerical "forward backreference" to a subpattern
whose number is 8 or more using this syntax because a sequence such as \50 is
interpreted as a character defined in octal. See the subsection entitled
"Non-printing characters"
<a href="#digitsafterbackslash">above</a>
for further details of the handling of digits following a backslash. There is
-no such problem when named parentheses are used. A back reference to any
+no such problem when named parentheses are used. A backreference to any
subpattern is possible using named parentheses (see below).
</P>
<P>
@@ -2173,7 +2247,7 @@ of forward reference can be useful it patterns that repeat. Perl does not
support the use of + in this way.
</P>
<P>
-A back reference matches whatever actually matched the capturing subpattern in
+A backreference matches whatever actually matched the capturing subpattern in
the current subject string, rather than anything matching the subpattern
itself (see
<a href="#subpatternsassubroutines">"Subpatterns as subroutines"</a>
@@ -2183,7 +2257,7 @@ below for a way of doing that). So the pattern
</pre>
matches "sense and sensibility" and "response and responsibility", but not
"sense and responsibility". If caseful matching is in force at the time of the
-back reference, the case of letters is relevant. For example,
+backreference, the case of letters is relevant. For example,
<pre>
((?i)rah)\s+\1
</pre>
@@ -2191,10 +2265,10 @@ matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
capturing subpattern is matched caselessly.
</P>
<P>
-There are several different ways of writing back references to named
+There are several different ways of writing backreferences to named
subpatterns. The .NET syntax \k{name} and the Perl syntax \k&#60;name&#62; or
\k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
-back reference syntax, in which \g can be used for both numeric and named
+backreference syntax, in which \g can be used for both numeric and named
references, is also supported. We could rewrite the above example in any of
the following ways:
<pre>
@@ -2207,30 +2281,31 @@ A subpattern that is referenced by name may appear in the pattern before or
after the reference.
</P>
<P>
-There may be more than one back reference to the same subpattern. If a
-subpattern has not actually been used in a particular match, any back
-references to it always fail by default. For example, the pattern
+There may be more than one backreference to the same subpattern. If a
+subpattern has not actually been used in a particular match, any backreferences
+to it always fail by default. For example, the pattern
<pre>
(a|(bc))\2
</pre>
always fails if it starts to match "a" rather than "bc". However, if the
-PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a back reference to an
+PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an
unset value matches an empty string.
</P>
<P>
Because there may be many capturing parentheses in a pattern, all digits
-following a backslash are taken as part of a potential back reference number.
+following a backslash are taken as part of a potential backreference number.
If the pattern continues with a digit character, some delimiter must be used to
-terminate the back reference. If the PCRE2_EXTENDED option is set, this can be
-white space. Otherwise, the \g{ syntax or an empty comment (see
+terminate the backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
+option is set, this can be white space. Otherwise, the \g{ syntax or an empty
+comment (see
<a href="#comments">"Comments"</a>
below) can be used.
</P>
<br><b>
-Recursive back references
+Recursive backreferences
</b><br>
<P>
-A back reference that occurs inside the parentheses to which it refers fails
+A backreference that occurs inside the parentheses to which it refers fails
when the subpattern is first used, so, for example, (a\1) never matches.
However, such references can be useful inside repeated subpatterns. For
example, the pattern
@@ -2238,14 +2313,14 @@ example, the pattern
(a|b\1)+
</pre>
matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of
-the subpattern, the back reference matches the character string corresponding
+the subpattern, the backreference matches the character string corresponding
to the previous iteration. In order for this to work, the pattern must be such
-that the first iteration does not need to match the back reference. This can be
+that the first iteration does not need to match the backreference. This can be
done using alternation, as in the example above, or by a quantifier with a
minimum of zero.
</P>
<P>
-Back references of this type cause the group that they reference to be treated
+Backreferences of this type cause the group that they reference to be treated
as an
<a href="#atomicgroup">atomic group.</a>
Once the whole group has been matched, a subsequent matching failure cannot
@@ -2264,24 +2339,35 @@ those that look ahead of the current position in the subject string, and those
that look behind it, and in each case an assertion may be positive (must
succeed for matching to continue) or negative (must not succeed for matching to
continue). An assertion subpattern is matched in the normal way, except that,
-when matching continues afterwards, the matching position in the subject string
-is as it was at the start of the assertion.
+when matching continues after a successful assertion, the matching position in
+the subject string is as it was before the assertion was processed.
</P>
<P>
Assertion subpatterns are not capturing subpatterns. If an assertion contains
capturing subpatterns within it, these are counted for the purposes of
-numbering the capturing subpatterns in the whole pattern. However, substring
-capturing is carried out only for positive assertions that succeed, that is,
-one of their branches matches, so matching continues after the assertion. If
-all branches of a positive assertion fail to match, nothing is captured, and
-control is passed to the previous backtracking point.
+numbering the capturing subpatterns in the whole pattern. Within each branch of
+an assertion, locally captured substrings may be referenced in the usual way.
+For example, a sequence such as (.)\g{-1} can be used to check that two
+adjacent characters are the same.
+</P>
+<P>
+When a branch within an assertion fails to match, any substrings that were
+captured are discarded (as happens with any pattern branch that fails to
+match). A negative assertion succeeds only when all its branches fail to match;
+this means that no captured substrings are ever retained after a successful
+negative assertion. When an assertion contains a matching branch, what happens
+depends on the type of assertion.
</P>
<P>
-No capturing is done for a negative assertion unless it is being used as a
-condition in a
-<a href="#subpatternsassubroutines">conditional subpattern</a>
-(see the discussion below). Matching continues after a non-conditional negative
-assertion only if all its branches fail to match.
+For a positive assertion, internally captured substrings in the successful
+branch are retained, and matching continues with the next pattern item after
+the assertion. For a negative assertion, a matching branch means that the
+assertion has failed. If the assertion is being used as a condition in a
+<a href="#conditions">conditional subpattern</a>
+(see below), captured substrings are retained, because matching continues with
+the "no" branch of the condition. For other failing negative assertions,
+control passes to the previous backtracking point, thus discarding any captured
+strings within the assertion.
</P>
<P>
For compatibility with Perl, most assertion subpatterns may be repeated; though
@@ -2395,10 +2481,10 @@ that is, a "subroutine" call into a group that is already active,
is not supported.
</P>
<P>
-Perl does not support back references in lookbehinds. PCRE2 does support them,
+Perl does not support backreferences in lookbehinds. PCRE2 does support them,
but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option
must not be set, there must be no use of (?| in the pattern (it creates
-duplicate subpattern numbers), and if the back reference is by name, the name
+duplicate subpattern numbers), and if the backreference is by name, the name
must be unique. Of course, the referenced subpattern must itself be of fixed
length. The following pattern matches words containing at least two characters
that begin and end with the same character:
@@ -2479,7 +2565,8 @@ already been matched. The two possible forms of conditional subpattern are:
(?(condition)yes-pattern|no-pattern)
</pre>
If the condition is satisfied, the yes-pattern is used; otherwise the
-no-pattern (if present) is used. If there are more than two alternatives in the
+no-pattern (if present) is used. An absent no-pattern is equivalent to an empty
+string (it always matches). If there are more than two alternatives in the
subpattern, a compile-time error occurs. Each of the two alternatives may
itself contain nested subpatterns of any form, including conditional
subpatterns; the restriction to two alternatives applies only at the level of
@@ -2678,12 +2765,12 @@ no part in the pattern matching.
<P>
The sequence (?# marks the start of a comment that continues up to the next
closing parenthesis. Nested parentheses are not permitted. If the
-PCRE2_EXTENDED option is set, an unescaped # character also introduces a
-comment, which in this case continues to immediately after the next newline
-character or character sequence in the pattern. Which characters are
-interpreted as newlines is controlled by an option passed to the compiling
-function or by a special sequence at the start of the pattern, as described in
-the section entitled
+PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # character
+also introduces a comment, which in this case continues to immediately after
+the next newline character or character sequence in the pattern. Which
+characters are interpreted as newlines is controlled by an option passed to the
+compiling function or by a special sequence at the start of the pattern, as
+described in the section entitled
<a href="#newlines">"Newline conventions"</a>
above. Note that the end of this type of comment is a literal newline sequence
in the pattern; escape sequences that happen to represent a newline do not
@@ -2818,11 +2905,6 @@ matched at the top level, its final captured value is unset, even if it was
(temporarily) set at a deeper level during the matching process.
</P>
<P>
-If there are more than 15 capturing parentheses in a pattern, PCRE2 has to
-obtain extra memory from the heap to store data during a recursion. If no
-memory can be obtained, the match fails with the PCRE2_ERROR_NOMEMORY error.
-</P>
-<P>
Do not confuse the (?R) item with the condition (R), which tests for recursion.
Consider this pattern, which matches text in angle brackets, allowing for
arbitrary nesting. Only digits are allowed in nested brackets (that is, when
@@ -2885,7 +2967,7 @@ in PCRE2 these values can be referenced. Consider this pattern:
^(.)(\1|a(?2))
</pre>
This pattern matches "bab". The first capturing parentheses match "b", then in
-the second group, when the back reference \1 fails to match "b", the second
+the second group, when the backreference \1 fails to match "b", the second
alternative matches "a" and then recurses. In the recursion, \1 does now match
"b" and so the whole match succeeds. This match used to fail in Perl, but in
later versions (I tried 5.024) it now works.
@@ -2893,10 +2975,12 @@ later versions (I tried 5.024) it now works.
<br><a name="SEC24" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
<P>
If the syntax for a recursive subpattern call (either by number or by
-name) is used outside the parentheses to which it refers, it operates like a
-subroutine in a programming language. The called subpattern may be defined
-before or after the reference. A numbered reference can be absolute or
-relative, as in these examples:
+name) is used outside the parentheses to which it refers, it operates a bit
+like a subroutine in a programming language. More accurately, PCRE2 treats the
+referenced subpattern as an independent subpattern which it tries to match at
+the current matching position. The called subpattern may be defined before or
+after the reference. A numbered reference can be absolute or relative, as in
+these examples:
<pre>
(...(absolute)...)...(?2)...
(...(relative)...)...(?-1)...
@@ -2929,6 +3013,13 @@ different calls. For example, consider this pattern:
</pre>
It matches "abcabc". It does not match "abcABC" because the change of
processing option does not affect the called subpattern.
+</P>
+<P>
+The behaviour of
+<a href="#backtrackcontrol">backtracking control verbs</a>
+in subpatterns when called as subroutines is described in the section entitled
+<a href="#btsub">"Backtracking verbs in subroutines"</a>
+below.
<a name="onigurumasubroutines"></a></P>
<br><a name="SEC25" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
<P>
@@ -2946,7 +3037,7 @@ plus or a minus sign it is taken as a relative reference. For example:
(abc)(?i:\g&#60;-1&#62;)
</pre>
Note that \g{...} (Perl syntax) and \g&#60;...&#62; (Oniguruma syntax) are <i>not</i>
-synonymous. The former is a back reference; the latter is a subroutine call.
+synonymous. The former is a backreference; the latter is a subroutine call.
</P>
<br><a name="SEC26" href="#TOC1">CALLOUTS</a><br>
<P>
@@ -3047,10 +3138,11 @@ are faulted.
</P>
<P>
A closing parenthesis can be included in a name either as \) or between \Q
-and \E. In addition to backslash processing, if the PCRE2_EXTENDED option is
-also set, unescaped whitespace in verb names is skipped, and #-comments are
-recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
-affect verb names unless PCRE2_ALT_VERBNAMES is also set.
+and \E. In addition to backslash processing, if the PCRE2_EXTENDED or
+PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb names is
+skipped, and #-comments are recognized, exactly as in the rest of the pattern.
+PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect verb names unless
+PCRE2_ALT_VERBNAMES is also set.
</P>
<P>
The maximum length of a name is 255 in the 8-bit library and 65535 in the
@@ -3092,17 +3184,16 @@ in the
documentation.
</P>
<P>
-Experiments with Perl suggest that it too has similar optimizations, sometimes
-leading to anomalous results.
+Experiments with Perl suggest that it too has similar optimizations, and like
+PCRE2, turning them off can change the result of a match.
</P>
<br><b>
Verbs that act immediately
</b><br>
<P>
-The following verbs act as soon as they are encountered. They may not be
-followed by a name.
+The following verbs act as soon as they are encountered.
<pre>
- (*ACCEPT)
+ (*ACCEPT) or (*ACCEPT:NAME)
</pre>
This verb causes the match to end successfully, skipping the remainder of the
pattern. However, when it is inside a subpattern that is called as a
@@ -3119,19 +3210,23 @@ example:
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by
the outer parentheses.
<pre>
- (*FAIL) or (*F)
+ (*FAIL) or (*FAIL:NAME)
</pre>
-This verb causes a matching failure, forcing backtracking to occur. It is
-equivalent to (?!) but easier to read. The Perl documentation notes that it is
-probably useful only when combined with (?{}) or (??{}). Those are, of course,
-Perl features that are not present in PCRE2. The nearest equivalent is the
-callout feature, as for example in this pattern:
+This verb causes a matching failure, forcing backtracking to occur. It may be
+abbreviated to (*F). It is equivalent to (?!) but easier to read. The Perl
+documentation notes that it is probably useful only when combined with (?{}) or
+(??{}). Those are, of course, Perl features that are not present in PCRE2. The
+nearest equivalent is the callout feature, as for example in this pattern:
<pre>
a+(?C)(*FAIL)
</pre>
A match with the string "aaaa" always fails, but the callout is taken before
each backtrack happens (in this example, 10 times).
</P>
+<P>
+(*ACCEPT:NAME) and (*FAIL:NAME) behave exactly the same as
+(*MARK:NAME)(*ACCEPT) and (*MARK:NAME)(*FAIL), respectively.
+</P>
<br><b>
Recording which path was taken
</b><br>
@@ -3146,14 +3241,23 @@ A name is always required with this verb. There may be as many instances of
(*MARK) as you like in a pattern, and their names do not have to be unique.
</P>
<P>
-When a match succeeds, the name of the last-encountered (*MARK:NAME),
-(*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed back to the
-caller as described in the section entitled
+When a match succeeds, the name of the last-encountered (*MARK:NAME) on the
+matching path is passed back to the caller as described in the section entitled
<a href="pcre2api.html#matchotherdata">"Other information about the match"</a>
in the
<a href="pcre2api.html"><b>pcre2api</b></a>
-documentation. Here is an example of <b>pcre2test</b> output, where the "mark"
-modifier requests the retrieval and outputting of (*MARK) data:
+documentation. This applies to all instances of (*MARK), including those inside
+assertions and atomic groups. (There are differences in those cases when
+(*MARK) is used in conjunction with (*SKIP) as described below.)
+</P>
+<P>
+As well as (*MARK), the (*COMMIT), (*PRUNE) and (*THEN) verbs may have
+associated NAME arguments. Whichever is last on the matching path is passed
+back. See below for more details of these other verbs.
+</P>
+<P>
+Here is an example of <b>pcre2test</b> output, where the "mark" modifier
+requests the retrieval and outputting of (*MARK) data:
<pre>
re&#62; /X(*MARK:A)Y|X(*MARK:B)Z/mark
data&#62; XY
@@ -3197,13 +3301,13 @@ Verbs that act after backtracking
</b><br>
<P>
The following verbs do nothing when they are encountered. Matching continues
-with what follows, but if there is no subsequent match, causing a backtrack to
-the verb, a failure is forced. That is, backtracking cannot pass to the left of
-the verb. However, when one of these verbs appears inside an atomic group or in
-an assertion that is true, its effect is confined to that group, because once
-the group has been matched, there is never any backtracking into it. In this
-situation, backtracking has to jump to the left of the entire atomic group or
-assertion.
+with what follows, but if there is a subsequent match failure, causing a
+backtrack to the verb, a failure is forced. That is, backtracking cannot pass
+to the left of the verb. However, when one of these verbs appears inside an
+atomic group or in a lookaround assertion that is true, its effect is confined
+to that group, because once the group has been matched, there is never any
+backtracking into it. Backtracking from beyond an assertion or an atomic group
+ignores the entire group, and seeks a preceeding backtracking point.
</P>
<P>
These verbs differ in exactly what kind of failure occurs when backtracking
@@ -3211,22 +3315,25 @@ reaches them. The behaviour described below is what happens when the verb is
not in a subroutine or an assertion. Subsequent sections cover these special
cases.
<pre>
- (*COMMIT)
+ (*COMMIT) or (*COMMIT:NAME)
</pre>
-This verb, which may not be followed by a name, causes the whole match to fail
-outright if there is a later matching failure that causes backtracking to reach
-it. Even if the pattern is unanchored, no further attempts to find a match by
-advancing the starting point take place. If (*COMMIT) is the only backtracking
-verb that is encountered, once it has been passed <b>pcre2_match()</b> is
-committed to finding a match at the current starting point, or not at all. For
-example:
+This verb causes the whole match to fail outright if there is a later matching
+failure that causes backtracking to reach it. Even if the pattern is
+unanchored, no further attempts to find a match by advancing the starting point
+take place. If (*COMMIT) is the only backtracking verb that is encountered,
+once it has been passed <b>pcre2_match()</b> is committed to finding a match at
+the current starting point, or not at all. For example:
<pre>
a+(*COMMIT)b
</pre>
This matches "xxaab" but not "aacaab". It can be thought of as a kind of
-dynamic anchor, or "I've started, so I must finish." The name of the most
-recently passed (*MARK) in the path is passed back when (*COMMIT) forces a
-match failure.
+dynamic anchor, or "I've started, so I must finish."
+</P>
+<P>
+The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is
+like (*MARK:NAME) in that the name is remembered for passing back to the
+caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
</P>
<P>
If there is more than one backtracking verb in a pattern, a different one that
@@ -3270,7 +3377,7 @@ as (*COMMIT).
The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by (*PRUNE) or (*THEN).
+ignoring those set by (*COMMIT), (*PRUNE) or (*THEN).
<pre>
(*SKIP)
</pre>
@@ -3278,7 +3385,7 @@ This verb, when given without a name, is like (*PRUNE), except that if the
pattern is unanchored, the "bumpalong" advance is not to the next character,
but to the position in the subject where (*SKIP) was encountered. (*SKIP)
signifies that whatever text was matched leading up to it cannot be part of a
-successful match. Consider:
+successful match if there is a later mismatch. Consider:
<pre>
a+(*SKIP)b
</pre>
@@ -3291,16 +3398,41 @@ instead of skipping on to "c".
<pre>
(*SKIP:NAME)
</pre>
-When (*SKIP) has an associated name, its behaviour is modified. When it is
-triggered, the previous path through the pattern is searched for the most
-recent (*MARK) that has the same name. If one is found, the "bumpalong" advance
-is to the subject position that corresponds to that (*MARK) instead of to where
-(*SKIP) was encountered. If no (*MARK) with a matching name is found, the
-(*SKIP) is ignored.
+When (*SKIP) has an associated name, its behaviour is modified. When such a
+(*SKIP) is triggered, the previous path through the pattern is searched for the
+most recent (*MARK) that has the same name. If one is found, the "bumpalong"
+advance is to the subject position that corresponds to that (*MARK) instead of
+to where (*SKIP) was encountered. If no (*MARK) with a matching name is found,
+the (*SKIP) is ignored.
+</P>
+<P>
+The search for a (*MARK) name uses the normal backtracking mechanism, which
+means that it does not see (*MARK) settings that are inside atomic groups or
+assertions, because they are never re-entered by backtracking. Compare the
+following <b>pcre2test</b> examples:
+<pre>
+ re&#62; /a(?&#62;(*MARK:X))(*SKIP:X)(*F)|(.)/
+ data: abc
+ 0: a
+ 1: a
+ data:
+ re&#62; /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/
+ data: abc
+ 0: b
+ 1: b
+</pre>
+In the first example, the (*MARK) setting is in an atomic group, so it is not
+seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. This allows
+the second branch of the pattern to be tried at the first character position.
+In the second example, the (*MARK) setting is not in an atomic group. This
+allows (*SKIP:X) to find the (*MARK) when it backtracks, and this causes a new
+matching attempt to start at the second character. This time, the (*MARK) is
+never seen because "a" does not match "b", so the matcher immediately jumps to
+the second branch of the pattern.
</P>
<P>
Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores
-names that are set by (*PRUNE:NAME) or (*THEN:NAME).
+names that are set by (*COMMIT:NAME), (*PRUNE:NAME) or (*THEN:NAME).
<pre>
(*THEN) or (*THEN:NAME)
</pre>
@@ -3319,10 +3451,10 @@ more alternatives, so there is a backtrack to whatever came before the entire
group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
</P>
<P>
-The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
-It is like (*MARK:NAME) in that the name is remembered for passing back to the
+The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is
+like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by (*PRUNE) and (*THEN).
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
</P>
<P>
A subpattern that does not contain a | character is just a part of the
@@ -3397,13 +3529,14 @@ onto (*COMMIT).
Backtracking verbs in repeated groups
</b><br>
<P>
-PCRE2 differs from Perl in its handling of backtracking verbs in repeated
-groups. For example, consider:
+PCRE2 sometimes differs from Perl in its handling of backtracking verbs in
+repeated groups. For example, consider:
<pre>
/(a(*COMMIT)b)+ac/
</pre>
-If the subject is "abac", Perl matches, but PCRE2 fails because the (*COMMIT)
-in the second repeat of the group acts.
+If the subject is "abac", Perl matches unless its optimizations are disabled,
+but PCRE2 always fails because the (*COMMIT) in the second repeat of the group
+acts.
<a name="btassert"></a></P>
<br><b>
Backtracking verbs in assertions
@@ -3416,9 +3549,10 @@ subpattern.
</P>
<P>
(*ACCEPT) in a standalone positive assertion causes the assertion to succeed
-without any further processing; captured strings are retained. In a standalone
-negative assertion, (*ACCEPT) causes the assertion to fail without any further
-processing; captured substrings are discarded.
+without any further processing; captured strings and a (*MARK) name (if set)
+are retained. In a standalone negative assertion, (*ACCEPT) causes the
+assertion to fail without any further processing; captured substrings and any
+(*MARK) name are discarded.
</P>
<P>
If the assertion is a condition, (*ACCEPT) causes the condition to be true for
@@ -3426,6 +3560,14 @@ a positive assertion and false for a negative one; captured substrings are
retained in both cases.
</P>
<P>
+The remaining verbs act only when a later failure causes a backtrack to
+reach them. This means that their effect is confined to the assertion,
+because lookaround assertions are atomic. A backtrack that occurs after an
+assertion is complete does not jump back into the assertion. Note in particular
+that a (*MARK) name that is set in an assertion is not "seen" by an instance of
+(*SKIP:NAME) latter in the pattern.
+</P>
+<P>
The effect of (*THEN) is not allowed to escape beyond an assertion. If there
are no more branches to try, (*THEN) causes a positive assertion to be false,
and a negative assertion to be true.
@@ -3433,35 +3575,38 @@ and a negative assertion to be true.
<P>
The other backtracking verbs are not treated specially if they appear in a
standalone positive assertion. In a conditional positive assertion,
-backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes the condition to be
-false. However, for both standalone and conditional negative assertions,
-backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes the assertion to be
-true, without considering any further alternative branches.
+backtracking (from within the assertion) into (*COMMIT), (*SKIP), or (*PRUNE)
+causes the condition to be false. However, for both standalone and conditional
+negative assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes
+the assertion to be true, without considering any further alternative branches.
<a name="btsub"></a></P>
<br><b>
Backtracking verbs in subroutines
</b><br>
<P>
These behaviours occur whether or not the subpattern is called recursively.
-Perl's treatment of subroutines is different in some cases.
-</P>
-<P>
-(*FAIL) in a subpattern called as a subroutine has its normal effect: it forces
-an immediate backtrack.
</P>
<P>
(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to
succeed without any further processing. Matching then continues after the
-subroutine call.
+subroutine call. Perl documents this behaviour. Perl's treatment of the other
+verbs in subroutines is different in some cases.
+</P>
+<P>
+(*FAIL) in a subpattern called as a subroutine has its normal effect: it forces
+an immediate backtrack.
</P>
<P>
-(*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine cause
-the subroutine match to fail.
+(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when
+triggered by being backtracked to in a subpattern called as a subroutine. There
+is then a backtrack at the outer level.
</P>
<P>
-(*THEN) skips to the next alternative in the innermost enclosing group within
-the subpattern that has alternatives. If there is no such group within the
-subpattern, (*THEN) causes the subroutine match to fail.
+(*THEN), when triggered, skips to the next alternative in the innermost
+enclosing group within the subpattern that has alternatives (its normal
+behaviour). However, if there is no such group within the subroutine
+subpattern, the subroutine match fails and there is a backtrack at the outer
+level.
</P>
<br><a name="SEC28" href="#TOC1">SEE ALSO</a><br>
<P>
@@ -3479,9 +3624,9 @@ Cambridge, England.
</P>
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 12 September 2017
+Last updated: 04 September 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2perform.html b/doc/html/pcre2perform.html
index 28f4f73..f823c12 100644
--- a/doc/html/pcre2perform.html
+++ b/doc/html/pcre2perform.html
@@ -52,9 +52,9 @@ example, the very simple pattern
<pre>
((ab){1,1000}c){1,3}
</pre>
-uses over 50K bytes when compiled using the 8-bit library. When PCRE2 is
+uses over 50KiB when compiled using the 8-bit library. When PCRE2 is
compiled with its default internal pointer size of two bytes, the size limit on
-a compiled pattern is 64K code units in the 8-bit and 16-bit libraries, and
+a compiled pattern is 65535 code units in the 8-bit and 16-bit libraries, and
this is reached with the above pattern if the outer repetition is increased
from 3 to 4. PCRE2 can be compiled to use larger internal pointers and thus
handle larger compiled patterns, but it is better to try to rewrite your
@@ -68,14 +68,14 @@ facility. Re-writing the above pattern as
<pre>
((ab)(?2){0,999}c)(?1){0,2}
</pre>
-reduces the memory requirements to around 16K, and indeed it remains under 20K
-even with the outer repetition increased to 100. However, this kind of pattern
-is not always exactly equivalent, because any captures within subroutine calls
-are lost when the subroutine completes. If this is not a problem, this kind of
-rewriting will allow you to process patterns that PCRE2 cannot otherwise
-handle. The matching performance of the two different versions of the pattern
-are roughly the same. (This applies from release 10.30 - things were different
-in earlier releases.)
+reduces the memory requirements to around 16KiB, and indeed it remains under
+20KiB even with the outer repetition increased to 100. However, this kind of
+pattern is not always exactly equivalent, because any captures within
+subroutine calls are lost when the subroutine completes. If this is not a
+problem, this kind of rewriting will allow you to process patterns that PCRE2
+cannot otherwise handle. The matching performance of the two different versions
+of the pattern are roughly the same. (This applies from release 10.30 - things
+were different in earlier releases.)
</P>
<br><a name="SEC3" href="#TOC1">STACK AND HEAP USAGE AT RUN TIME</a><br>
<P>
@@ -83,7 +83,7 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
uses very little system stack at run time. In earlier releases recursive
function calls could use a great deal of stack, and this could cause problems,
but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20K vector of
+remembered in memory frames controlled by the code. An initial 20KiB vector of
frames is allocated on the system stack (enough for about 100 frames for small
patterns), but if this is insufficient, heap memory is used. The amount of heap
memory can be limited; if the limit is set to zero, only the initial stack
@@ -93,9 +93,17 @@ may also reduce the memory requirements.
<P>
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
function calls, but only for processing atomic groups, lookaround assertions,
-and recursion within the pattern. Too much nested recursion may cause stack
-issues. The "match depth" parameter can be used to limit the depth of function
-recursion in <b>pcre2_dfa_match()</b>.
+and recursion within the pattern. The original version of the code used to
+allocate quite large internal workspace vectors on the stack, which caused some
+problems for some patterns in environments with small stacks. From release
+10.32 the code for <b>pcre2_dfa_match()</b> has been re-factored to use heap
+memory when necessary for internal workspace when recursing, though recursive
+function calls are still used.
+</P>
+<P>
+The "match depth" parameter can be used to limit the depth of function
+recursion, and the "match heap" parameter to limit heap memory in
+<b>pcre2_dfa_match()</b>.
</P>
<br><a name="SEC4" href="#TOC1">PROCESSING TIME</a><br>
<P>
@@ -244,9 +252,9 @@ Cambridge, England.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 08 April 2017
+Last updated: 25 April 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2posix.html b/doc/html/pcre2posix.html
index 8a4431c..1da2460 100644
--- a/doc/html/pcre2posix.html
+++ b/doc/html/pcre2posix.html
@@ -132,14 +132,14 @@ When a pattern that is compiled with this flag is passed to <b>regexec()</b> for
matching, the <i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no
captured strings are returned. Versions of the PCRE library prior to 10.22 used
to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens
-because it disables the use of back references.
+because it disables the use of backreferences.
<pre>
REG_PEND
</pre>
If this option is set, the <b>reg_endp</b> field in the <i>preg</i> structure
(which has the type const char *) must be set to point to the character beyond
the end of the pattern before calling <b>regcomp()</b>. The pattern itself may
-now contain binary zeroes, which are treated as data characters. Without
+now contain binary zeros, which are treated as data characters. Without
REG_PEND, a binary zero terminates the pattern and the <b>re_endp</b> field is
ignored. This is a GNU extension to the POSIX standard and should be used with
caution in software intended to be portable to other systems.
@@ -248,10 +248,10 @@ function.
<pre>
REG_STARTEND
</pre>
-When this option is set, the subject string is starts at <i>string</i> +
+When this option is set, the subject string starts at <i>string</i> +
<i>pmatch[0].rm_so</i> and ends at <i>string</i> + <i>pmatch[0].rm_eo</i>, which
should point to the first character beyond the string. There may be binary
-zeroes within the subject string, and indeed, using REG_STARTEND is the only
+zeros within the subject string, and indeed, using REG_STARTEND is the only
way to pass a subject string that contains a binary zero.
</P>
<P>
diff --git a/doc/html/pcre2serialize.html b/doc/html/pcre2serialize.html
index 813b25a..18a8d7f 100644
--- a/doc/html/pcre2serialize.html
+++ b/doc/html/pcre2serialize.html
@@ -49,6 +49,15 @@ and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using
PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be
reloaded using the 8-bit library.
</P>
+<P>
+Note that "serialization" in PCRE2 does not convert compiled patterns to an
+abstract format like Java or .NET serialization. The serialized output is
+really just a bytecode dump, which is why it can only be reloaded in the same
+environment as the one that created it. Hence the restrictions mentioned above.
+Applications that are not statically linked with a fixed version of PCRE2 must
+be prepared to recompile patterns from their sources, in order to be immune to
+PCRE2 upgrades.
+</P>
<br><a name="SEC2" href="#TOC1">SECURITY CONCERNS</a><br>
<P>
The facility for saving and restoring compiled patterns is intended for use
@@ -62,11 +71,11 @@ the byte stream that is passed to it.
</P>
<br><a name="SEC3" href="#TOC1">SAVING COMPILED PATTERNS</a><br>
<P>
-Before compiled patterns can be saved they must be serialized, that is,
-converted to a stream of bytes. A single byte stream may contain any number of
-compiled patterns, but they must all use the same character tables. A single
-copy of the tables is included in the byte stream (its size is 1088 bytes). For
-more details of character tables, see the
+Before compiled patterns can be saved they must be serialized, which in PCRE2
+means converting the pattern to a stream of bytes. A single byte stream may
+contain any number of compiled patterns, but they must all use the same
+character tables. A single copy of the tables is included in the byte stream
+(its size is 1088 bytes). For more details of character tables, see the
<a href="pcre2api.html#localesupport">section on locale support</a>
in the
<a href="pcre2api.html"><b>pcre2api</b></a>
@@ -120,7 +129,9 @@ non-binary data, be sure that the file is opened for binary output.
Serializing a set of patterns leaves the original data untouched, so they can
still be used for matching. Their memory must eventually be freed in the usual
way by calling <b>pcre2_code_free()</b>. When you have finished with the byte
-stream, it too must be freed by calling <b>pcre2_serialize_free()</b>.
+stream, it too must be freed by calling <b>pcre2_serialize_free()</b>. If this
+function is called with a NULL argument, it returns immediately without doing
+anything.
</P>
<br><a name="SEC4" href="#TOC1">RE-USING PRECOMPILED PATTERNS</a><br>
<P>
@@ -193,9 +204,9 @@ Cambridge, England.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 21 March 2017
+Last updated: 27 June 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2syntax.html b/doc/html/pcre2syntax.html
index 9098f47..7d332a1 100644
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@@ -23,7 +23,7 @@ please consult the man page, in case the conversion went wrong.
<li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
<li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
<li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
-<li><a name="TOC11" href="#SEC11">MATCH POINT RESET</a>
+<li><a name="TOC11" href="#SEC11">REPORTED MATCH POINT SETTING</a>
<li><a name="TOC12" href="#SEC12">ALTERNATION</a>
<li><a name="TOC13" href="#SEC13">CAPTURING</a>
<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
@@ -70,9 +70,10 @@ This table applies to ASCII and Unicode environments.
\ddd character with octal code ddd, or backreference
\o{ddd..} character with octal code ddd..
\U "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
+ \N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
\uhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
\xhh character with hex code hh
- \x{hhh..} character with hex code hhh..
+ \x{hh..} character with hex code hh..
</pre>
Note that \0dd is always an octal code. The treatment of backslash followed by
a non-zero digit is complicated; for details see the section
@@ -80,7 +81,9 @@ a non-zero digit is complicated; for details see the section
in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation, where details of escape processing in EBCDIC environments are
-also given.
+also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not
+supported in EBCDIC environments. Note that \N not followed by an opening
+curly bracket has a different meaning (see below).
</P>
<P>
When \x is not followed by {, from zero to two hexadecimal digits are read,
@@ -188,6 +191,7 @@ at release 5.18.
</P>
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
<P>
+Adlam,
Ahom,
Anatolian_Hieroglyphs,
Arabic,
@@ -198,6 +202,7 @@ Bamum,
Bassa_Vah,
Batak,
Bengali,
+Bhaiksuki,
Bopomofo,
Brahmi,
Braille,
@@ -216,6 +221,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dogra,
Duployan,
Egyptian_Hieroglyphs,
Elbasan,
@@ -226,9 +232,11 @@ Gothic,
Grantha,
Greek,
Gujarati,
+Gunjala_Gondi,
Gurmukhi,
Han,
Hangul,
+Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
@@ -256,9 +264,13 @@ Lisu,
Lycian,
Lydian,
Mahajani,
+Makasar,
Malayalam,
Mandaic,
Manichaean,
+Marchen,
+Masaram_Gondi,
+Medefaidrin,
Meetei_Mayek,
Mende_Kikakui,
Meroitic_Cursive,
@@ -271,7 +283,9 @@ Multani,
Myanmar,
Nabataean,
New_Tai_Lue,
+Newa,
Nko,
+Nushu,
Ogham,
Ol_Chiki,
Old_Hungarian,
@@ -279,9 +293,11 @@ Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
+Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Oriya,
+Osage,
Osmanya,
Pahawh_Hmong,
Palmyrene,
@@ -298,7 +314,9 @@ Shavian,
Siddham,
SignWriting,
Sinhala,
+Sogdian,
Sora_Sompeng,
+Soyombo,
Sundanese,
Syloti_Nagri,
Syriac,
@@ -309,6 +327,7 @@ Tai_Tham,
Tai_Viet,
Takri,
Tamil,
+Tangut,
Telugu,
Thaana,
Thai,
@@ -318,7 +337,8 @@ Tirhuta,
Ugaritic,
Vai,
Warang_Citi,
-Yi.
+Yi,
+Zanabazar_Square.
</P>
<br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
<P>
@@ -387,10 +407,10 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
\G first matching position in subject
</PRE>
</P>
-<br><a name="SEC11" href="#TOC1">MATCH POINT RESET</a><br>
+<br><a name="SEC11" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
<P>
<pre>
- \K reset start of match
+ \K set reported start of match
</pre>
\K is honoured in positive assertions, but ignored in negative ones.
</P>
@@ -426,6 +446,8 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
</P>
<br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
<P>
+Changes of these options within a group are automatically cancelled at the end
+of the group.
<pre>
(?i) caseless
(?J) allow duplicate names
@@ -436,13 +458,21 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
(?x) extended: ignore white space except in classes
(?xx) as (?x) but also ignore space and tab in classes
(?-...) unset option(s)
+ (?^) unset imnsx options
</pre>
+Unsetting x or xx unsets both. Several options may be set at once, and a
+mixture of setting and unsetting such as (?i-x) is allowed, but there may be
+only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
+(?^in). An option setting may appear at the start of a non-capturing group, for
+example (?i:...).
+</P>
+<P>
The following are recognized only at the very start of a pattern or after one
of the newline or \R options with similar syntax. More than one of them may
appear. For the first three, d is a decimal number.
<pre>
(*LIMIT_DEPTH=d) set the backtracking limit to d
- (*LIMIT_HEAP=d) set the heap size limit to d kilobytes
+ (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
(*LIMIT_MATCH=d) set the match limit to d
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
@@ -552,7 +582,11 @@ condition if the relevant named group exists.
</P>
<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P>
-The following act immediately they are reached:
+All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
+name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
+if :NAME is present. The others just set a name for passing back to the caller,
+but this is not a name that (*SKIP) can see. The following act immediately they
+are reached:
<pre>
(*ACCEPT) force successful match
(*FAIL) force backtrack; synonym (*F)
@@ -565,13 +599,13 @@ pattern is not anchored.
<pre>
(*COMMIT) overall failure, no advance of starting point
(*PRUNE) advance to next starting character
- (*PRUNE:NAME) equivalent to (*MARK:NAME)(*PRUNE)
(*SKIP) advance to current matching position
(*SKIP:NAME) advance to position corresponding to an earlier
(*MARK:NAME); if not found, the (*SKIP) is ignored
(*THEN) local failure, backtrack to next alternation
- (*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
-</PRE>
+</pre>
+The effect of one of these verbs in a group called as a subroutine is confined
+to the subroutine call.
</P>
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
<P>
@@ -600,9 +634,9 @@ Cambridge, England.
</P>
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 17 June 2017
+Last updated: 02 September 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html
index 7d98d90..af2b18c 100644
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@@ -129,7 +129,7 @@ to occur).
UTF-8 (in its original definition) is not capable of encoding values greater
than 0x7fffffff, but such values can be handled by the 32-bit library. When
testing this library in non-UTF mode with <b>utf8_input</b> set, if any
-character is preceded by the byte 0xff (which is an illegal byte in UTF-8)
+character is preceded by the byte 0xff (which is an invalid byte in UTF-8)
0x80000000 is added to the character's value. This is the only way of passing
such code points in a pattern string. For subject strings, using an escape
sequence is preferable.
@@ -264,7 +264,7 @@ Do not output the version number of <b>pcre2test</b> at the start of execution.
<P>
<b>-S</b> <i>size</i>
On Unix-like systems, set the size of the run-time stack to <i>size</i>
-megabytes.
+mebibytes (units of 1024*1024 bytes).
</P>
<P>
<b>-subject</b> <i>modifier-list</i>
@@ -315,7 +315,8 @@ number of subject lines to be matched against that pattern. In between sets of
test data, command lines that begin with # may appear. This file format, with
some restrictions, can also be processed by the <b>perltest.sh</b> script that
is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
-and Perl is the same.
+and Perl is the same. For a specification of <b>perltest.sh</b>, see the
+comments near its beginning.
</P>
<P>
When the input is a terminal, <b>pcre2test</b> prompts for each line of input,
@@ -409,10 +410,11 @@ patterns. Modifiers on a pattern can change these settings.
The appearance of this line causes all subsequent modifier settings to be
checked for compatibility with the <b>perltest.sh</b> script, which is used to
confirm that Perl gives the same results as PCRE2. Also, apart from comment
-lines, none of the other command lines are permitted, because they and many
-of the modifiers are specific to <b>pcre2test</b>, and should not be used in
-test files that are also processed by <b>perltest.sh</b>. The <b>#perltest</b>
-command helps detect tests that are accidentally put in the wrong file.
+lines, #pattern commands, and #subject commands that set or unset "mark", no
+command lines are permitted, because they and many of the modifiers are
+specific to <b>pcre2test</b>, and should not be used in test files that are also
+processed by <b>perltest.sh</b>. The <b>#perltest</b> command helps detect tests
+that are accidentally put in the wrong file.
<pre>
#pop [&#60;modifiers&#62;]
#popcopy [&#60;modifiers&#62;]
@@ -679,8 +681,8 @@ Newline and \R handling
<P>
The <b>bsr</b> modifier specifies what \R in a pattern should match. If it is
set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to "unicode",
-\R matches any Unicode newline sequence. The default is specified when PCRE2
-is built, with the default default being Unicode.
+\R matches any Unicode newline sequence. The default can be specified when
+PCRE2 is built; if it is not, the default is set to Unicode.
</P>
<P>
The <b>newline</b> modifier specifies which characters are to be interpreted as
@@ -1199,7 +1201,7 @@ pattern.
get=&#60;number or name&#62; extract captured substring
getall extract all captured substrings
/g global global matching
- heap_limit=&#60;n&#62; set a limit on heap memory
+ heap_limit=&#60;n&#62; set a limit on heap memory (Kbytes)
jitstack=&#60;n&#62; set size of JIT stack
mark show mark values
match_limit=&#60;n&#62; set a match limit
@@ -1418,11 +1420,11 @@ Setting the JIT stack size
<P>
The <b>jitstack</b> modifier provides a way of setting the maximum stack size
that is used by the just-in-time optimization code. It is ignored if JIT
-optimization is not being used. The value is a number of kilobytes. Setting
-zero reverts to the default of 32K. Providing a stack that is larger than the
-default is necessary only for very complicated patterns. If <b>jitstack</b> is
-set non-zero on a subject line it overrides any value that was set on the
-pattern.
+optimization is not being used. The value is a number of kibibytes (units of
+1024 bytes). Setting zero reverts to the default of 32KiB. Providing a stack
+that is larger than the default is necessary only for very complicated
+patterns. If <b>jitstack</b> is set non-zero on a subject line it overrides any
+value that was set on the pattern.
</P>
<br><b>
Setting heap, match, and depth limits
@@ -1438,20 +1440,17 @@ Finding minimum limits
<P>
If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
calls the relevant matching function several times, setting different values in
-the match context via <b>pcre2_set_heap_limit(), \fBpcre2_set_match_limit()</b>,
-or <b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
-parameter that allows the match to complete without error.
-</P>
-<P>
-If JIT is being used, only the match limit is relevant. If DFA matching is
-being used, only the depth limit is relevant.
+the match context via <b>pcre2_set_heap_limit()</b>,
+<b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
+the minimum values for each parameter that allows the match to complete without
+error. If JIT is being used, only the match limit is relevant.
</P>
<P>
-The <i>match_limit</i> number is a measure of the amount of backtracking
-that takes place, and learning the minimum value can be instructive. For most
-simple matches, the number is quite small, but for patterns with very large
-numbers of matching possibilities, it can become large very quickly with
-increasing length of subject string.
+When using this modifier, the pattern should not contain any limit settings
+such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
+lower than the minimum matching value, the minimum value cannot be found
+because <b>pcre2_set_match_limit()</b> etc. are only able to reduce the value of
+an in-pattern limit; they cannot increase it.
</P>
<P>
For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
@@ -1460,6 +1459,22 @@ searched). In the case of DFA matching, <i>depth_limit</i> controls the depth of
recursive calls of the internal function that is used for handling pattern
recursion, lookaround assertions, and atomic groups.
</P>
+<P>
+For non-DFA matching, the <i>match_limit</i> number is a measure of the amount
+of backtracking that takes place, and learning the minimum value can be
+instructive. For most simple matches, the number is quite small, but for
+patterns with very large numbers of matching possibilities, it can become large
+very quickly with increasing length of subject string. In the case of DFA
+matching, <i>match_limit</i> controls the total number of calls, both recursive
+and non-recursive, to the internal matching function, thus controlling the
+overall amount of computing resource that is used.
+</P>
+<P>
+For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
+(units of 1024 bytes), limits the amount of heap memory used for matching. A
+value of zero disables the use of any heap memory; many simple pattern matches
+can be done without using the heap, so zero is not an unreasonable setting.
+</P>
<br><b>
Showing MARK names
</b><br>
@@ -1476,13 +1491,14 @@ Showing memory usage
<P>
The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b>. These occur only when a match requires a bigger vector
-than the default for remembering backtracking points. In many cases there will
-be no heap memory used and therefore no additional output. No heap memory is
-allocated during matching with <b>pcre2_dfa_match</b> or with JIT, so in those
-cases the <b>memory</b> modifier never has any effect. For this modifier to
-work, the <b>null_context</b> modifier must not be set on both the pattern and
-the subject, though it can be set on one or the other.
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
+requires a bigger vector than the default for remembering backtracking points
+(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
+many cases there will be no heap memory used and therefore no additional
+output. No heap memory is allocated during matching with JIT, so in that case
+the <b>memory</b> modifier never has any effect. For this modifier to work, the
+<b>null_context</b> modifier must not be set on both the pattern and the
+subject, though it can be set on one or the other.
</P>
<br><b>
Setting a starting offset
@@ -1913,15 +1929,21 @@ documentation. In this section we describe the features of <b>pcre2test</b> that
can be used to test these functions.
</P>
<P>
-When a pattern with <b>push</b> modifier is successfully compiled, it is pushed
-onto a stack of compiled patterns, and <b>pcre2test</b> expects the next line to
-contain a new pattern (or command) instead of a subject line. By contrast,
-the <b>pushcopy</b> modifier causes a copy of the compiled pattern to be
-stacked, leaving the original available for immediate matching. By using
-<b>push</b> and/or <b>pushcopy</b>, a number of patterns can be compiled and
-retained. These modifiers are incompatible with <b>posix</b>, and control
-modifiers that act at match time are ignored (with a message) for the stacked
-patterns. The <b>jitverify</b> modifier applies only at compile time.
+Note that "serialization" in PCRE2 does not convert compiled patterns to an
+abstract format like Java or .NET. It just makes a reloadable byte code stream.
+Hence the restrictions on reloading mentioned above.
+</P>
+<P>
+In <b>pcre2test</b>, when a pattern with <b>push</b> modifier is successfully
+compiled, it is pushed onto a stack of compiled patterns, and <b>pcre2test</b>
+expects the next line to contain a new pattern (or command) instead of a
+subject line. By contrast, the <b>pushcopy</b> modifier causes a copy of the
+compiled pattern to be stacked, leaving the original available for immediate
+matching. By using <b>push</b> and/or <b>pushcopy</b>, a number of patterns can
+be compiled and retained. These modifiers are incompatible with <b>posix</b>,
+and control modifiers that act at match time are ignored (with a message) for
+the stacked patterns. The <b>jitverify</b> modifier applies only at compile
+time.
</P>
<P>
The command
@@ -1982,9 +2004,9 @@ Cambridge, England.
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 21 December 2017
+Last updated: 21 July 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html
index 448a221..24f6d93 100644
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@@ -26,7 +26,8 @@ you must call
with the PCRE2_UTF option flag, or the pattern must start with the sequence
(*UTF). When either of these is the case, both the pattern and any subject
strings that are matched against it are treated as UTF strings instead of
-strings of individual one-code-unit characters.
+strings of individual one-code-unit characters. There are also some other
+changes to the way characters are handled, as documented below.
</P>
<P>
If you do not need Unicode support you can build PCRE2 without it, in which
@@ -53,12 +54,17 @@ compatibility with Perl 5.6. PCRE2 does not support this.
WIDE CHARACTERS AND UTF MODES
</b><br>
<P>
-Codepoints less than 256 can be specified in patterns by either braced or
+Code points less than 256 can be specified in patterns by either braced or
unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger
values have to use braced sequences. Unbraced octal code points up to \777 are
also recognized; larger ones can be coded using \o{...}.
</P>
<P>
+The escape sequence \N{U+&#60;hex digits&#62;} is recognized as another way of
+specifying a Unicode character by code point in a UTF mode. It is not allowed
+in non-UTF modes.
+</P>
+<P>
In UTF modes, repeat quantifiers apply to complete UTF characters, not to
individual code units.
</P>
@@ -116,7 +122,7 @@ CASE-EQUIVALENCE IN UTF MODES
Case-insensitive matching in a UTF mode makes use of Unicode properties except
for characters whose code points are less than 128 and that have at most two
case-equivalent values. For these, a direct table lookup is used for speed. A
-few Unicode characters such as Greek sigma have more than two codepoints that
+few Unicode characters such as Greek sigma have more than two code points that
are case-equivalent, and these are treated as such.
</P>
<br><b>
@@ -294,9 +300,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
-Last updated: 17 May 2017
+Last updated: 02 September 2018
<br>
-Copyright &copy; 1997-2017 University of Cambridge.
+Copyright &copy; 1997-2018 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/index.html.src b/doc/index.html.src
index b9393d9..82b43c3 100644
--- a/doc/index.html.src
+++ b/doc/index.html.src
@@ -141,7 +141,7 @@ in the library.
<td>&nbsp;&nbsp;Free a general context</td></tr>
<tr><td><a href="pcre2_get_error_message.html">pcre2_get_error_message</a></td>
- <td>&nbsp;&nbsp;Free study data</td></tr>
+ <td>&nbsp;&nbsp;Get textual error message for error number</td></tr>
<tr><td><a href="pcre2_get_mark.html">pcre2_get_mark</a></td>
<td>&nbsp;&nbsp;Get a (*MARK) name</td></tr>
diff --git a/doc/pcre2.3 b/doc/pcre2.3
index 83a7655..2f83658 100644
--- a/doc/pcre2.3
+++ b/doc/pcre2.3
@@ -1,4 +1,4 @@
-.TH PCRE2 3 "01 April 2017" "PCRE2 10.30"
+.TH PCRE2 3 "11 July 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH INTRODUCTION
@@ -6,12 +6,18 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp
PCRE2 is the name used for a revised API for the PCRE library, which is a set
of functions, written in C, that implement regular expression pattern matching
-using the same syntax and semantics as Perl, with just a few differences. Some
-features that appeared in Python and the original PCRE before they appeared in
-Perl are also available using the Python syntax. There is also some support for
-one or two .NET and Oniguruma syntax items, and there are options for
-requesting some minor changes that give better ECMAScript (aka JavaScript)
-compatibility.
+using the same syntax and semantics as Perl, with just a few differences. After
+nearly two decades, the limitations of the original API were making development
+increasingly difficult. The new API is more extensible, and it was simplified
+by abolishing the separate "study" optimizing function; in PCRE2, patterns are
+automatically optimized where possible. Since forking from PCRE1, the code has
+been extensively refactored and new features introduced.
+.P
+As well as Perl-style regular expression patterns, some features that appeared
+in Python and the original PCRE before they appeared in Perl are available
+using the Python syntax. There is also some support for one or two .NET and
+Oniguruma syntax items, and there are options for requesting some minor changes
+that give better ECMAScript (aka JavaScript) compatibility.
.P
The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or 32-bit
code units, which means that up to three separate libraries may be installed.
@@ -152,6 +158,7 @@ listing), and the short pages for individual functions, are concatenated in
pcre2build building PCRE2
pcre2callout details of the callout feature
pcre2compat discussion of Perl compatibility
+ pcre2convert details of pattern conversion functions
pcre2demo a demonstration C program that uses PCRE2
pcre2grep description of the \fBpcre2grep\fP command (8-bit only)
pcre2jit discussion of just-in-time optimization support
@@ -164,6 +171,7 @@ listing), and the short pages for individual functions, are concatenated in
pcre2perform discussion of performance issues
pcre2posix the POSIX-compatible C API for the 8-bit library
pcre2sample discussion of the pcre2demo program
+ pcre2serialize details of pattern serialization
pcre2syntax quick syntax reference
pcre2test description of the \fBpcre2test\fP command
pcre2unicode discussion of Unicode and UTF support
@@ -189,6 +197,6 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
.rs
.sp
.nf
-Last updated: 01 April 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 11 July 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2.txt b/doc/pcre2.txt
index 79d94e3..30ba2f9 100644
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@@ -20,11 +20,19 @@ INTRODUCTION
PCRE2 is the name used for a revised API for the PCRE library, which is
a set of functions, written in C, that implement regular expression
pattern matching using the same syntax and semantics as Perl, with just
- a few differences. Some features that appeared in Python and the origi-
- nal PCRE before they appeared in Perl are also available using the
- Python syntax. There is also some support for one or two .NET and Onig-
- uruma syntax items, and there are options for requesting some minor
- changes that give better ECMAScript (aka JavaScript) compatibility.
+ a few differences. After nearly two decades, the limitations of the
+ original API were making development increasingly difficult. The new
+ API is more extensible, and it was simplified by abolishing the sepa-
+ rate "study" optimizing function; in PCRE2, patterns are automatically
+ optimized where possible. Since forking from PCRE1, the code has been
+ extensively refactored and new features introduced.
+
+ As well as Perl-style regular expression patterns, some features that
+ appeared in Python and the original PCRE before they appeared in Perl
+ are available using the Python syntax. There is also some support for
+ one or two .NET and Oniguruma syntax items, and there are options for
+ requesting some minor changes that give better ECMAScript (aka
+ JavaScript) compatibility.
The source code for PCRE2 can be compiled to support 8-bit, 16-bit, or
32-bit code units, which means that up to three separate libraries may
@@ -135,6 +143,7 @@ USER DOCUMENTATION
pcre2build building PCRE2
pcre2callout details of the callout feature
pcre2compat discussion of Perl compatibility
+ pcre2convert details of pattern conversion functions
pcre2demo a demonstration C program that uses PCRE2
pcre2grep description of the pcre2grep command (8-bit only)
pcre2jit discussion of just-in-time optimization support
@@ -146,6 +155,7 @@ USER DOCUMENTATION
pcre2perform discussion of performance issues
pcre2posix the POSIX-compatible C API for the 8-bit library
pcre2sample discussion of the pcre2demo program
+ pcre2serialize details of pattern serialization
pcre2syntax quick syntax reference
pcre2test description of the pcre2test command
pcre2unicode discussion of Unicode and UTF support
@@ -167,8 +177,8 @@ AUTHOR
REVISION
- Last updated: 01 April 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 11 July 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -576,7 +586,9 @@ PCRE2 API OVERVIEW
pcre2_substring_number_from_name()
pcre2_substring_free() and pcre2_substring_list_free() are also pro-
- vided, to free memory used for extracted strings.
+ vided, to free memory used for extracted strings. If either of these
+ functions is called with a NULL argument, the function returns immedi-
+ ately without doing anything.
The function pcre2_substitute() can be called to match a pattern and
return a copy of the subject string with substitutions for parts that
@@ -617,32 +629,33 @@ NEWLINES
Each of the first three conventions is used by at least one operating
system as its standard newline sequence. When PCRE2 is built, a default
- can be specified. The default default is LF, which is the Unix stan-
- dard. However, the newline convention can be changed by an application
- when calling pcre2_compile(), or it can be specified by special text at
- the start of the pattern itself; this overrides any other settings. See
- the pcre2pattern page for details of the special character sequences.
-
- In the PCRE2 documentation the word "newline" is used to mean "the
+ can be specified. If it is not, the default is set to LF, which is the
+ Unix standard. However, the newline convention can be changed by an
+ application when calling pcre2_compile(), or it can be specified by
+ special text at the start of the pattern itself; this overrides any
+ other settings. See the pcre2pattern page for details of the special
+ character sequences.
+
+ In the PCRE2 documentation the word "newline" is used to mean "the
character or pair of characters that indicate a line break". The choice
- of newline convention affects the handling of the dot, circumflex, and
+ of newline convention affects the handling of the dot, circumflex, and
dollar metacharacters, the handling of #-comments in /x mode, and, when
- CRLF is a recognized line ending sequence, the match position advance-
+ CRLF is a recognized line ending sequence, the match position advance-
ment for a non-anchored pattern. There is more detail about this in the
section on pcre2_match() options below.
- The choice of newline convention does not affect the interpretation of
+ The choice of newline convention does not affect the interpretation of
the \n or \r escape sequences, nor does it affect what \R matches; this
has its own separate convention.
MULTITHREADING
- In a multithreaded application it is important to keep thread-specific
- data separate from data that can be shared between threads. The PCRE2
- library code itself is thread-safe: it contains no static or global
- variables. The API is designed to be fairly simple for non-threaded
- applications while at the same time ensuring that multithreaded appli-
+ In a multithreaded application it is important to keep thread-specific
+ data separate from data that can be shared between threads. The PCRE2
+ library code itself is thread-safe: it contains no static or global
+ variables. The API is designed to be fairly simple for non-threaded
+ applications while at the same time ensuring that multithreaded appli-
cations can use it.
There are several different blocks of data that are used to pass infor-
@@ -650,19 +663,19 @@ MULTITHREADING
The compiled pattern
- A pointer to the compiled form of a pattern is returned to the user
+ A pointer to the compiled form of a pattern is returned to the user
when pcre2_compile() is successful. The data in the compiled pattern is
- fixed, and does not change when the pattern is matched. Therefore, it
- is thread-safe, that is, the same compiled pattern can be used by more
+ fixed, and does not change when the pattern is matched. Therefore, it
+ is thread-safe, that is, the same compiled pattern can be used by more
than one thread simultaneously. For example, an application can compile
all its patterns at the start, before forking off multiple threads that
- use them. However, if the just-in-time (JIT) optimization feature is
- being used, it needs separate memory stack areas for each thread. See
+ use them. However, if the just-in-time (JIT) optimization feature is
+ being used, it needs separate memory stack areas for each thread. See
the pcre2jit documentation for more details.
- In a more complicated situation, where patterns are compiled only when
- they are first needed, but are still shared between threads, pointers
- to compiled patterns must be protected from simultaneous writing by
+ In a more complicated situation, where patterns are compiled only when
+ they are first needed, but are still shared between threads, pointers
+ to compiled patterns must be protected from simultaneous writing by
multiple threads, at least until a pattern has been compiled. The logic
can be something like this:
@@ -675,72 +688,72 @@ MULTITHREADING
Release the lock
Use pointer in pcre2_match()
- Of course, testing for compilation errors should also be included in
+ Of course, testing for compilation errors should also be included in
the code.
If JIT is being used, but the JIT compilation is not being done immedi-
- ately, (perhaps waiting to see if the pattern is used often enough)
+ ately, (perhaps waiting to see if the pattern is used often enough)
similar logic is required. JIT compilation updates a pointer within the
- compiled code block, so a thread must gain unique write access to the
- pointer before calling pcre2_jit_compile(). Alternatively,
+ compiled code block, so a thread must gain unique write access to the
+ pointer before calling pcre2_jit_compile(). Alternatively,
pcre2_code_copy() or pcre2_code_copy_with_tables() can be used to
- obtain a private copy of the compiled code before calling the JIT com-
+ obtain a private copy of the compiled code before calling the JIT com-
piler.
Context blocks
- The next main section below introduces the idea of "contexts" in which
+ The next main section below introduces the idea of "contexts" in which
PCRE2 functions are called. A context is nothing more than a collection
of parameters that control the way PCRE2 operates. Grouping a number of
parameters together in a context is a convenient way of passing them to
- a PCRE2 function without using lots of arguments. The parameters that
- are stored in contexts are in some sense "advanced features" of the
+ a PCRE2 function without using lots of arguments. The parameters that
+ are stored in contexts are in some sense "advanced features" of the
API. Many straightforward applications will not need to use contexts.
In a multithreaded application, if the parameters in a context are val-
- ues that are never changed, the same context can be used by all the
+ ues that are never changed, the same context can be used by all the
threads. However, if any thread needs to change any value in a context,
it must make its own thread-specific copy.
Match blocks
- The matching functions need a block of memory for storing the results
+ The matching functions need a block of memory for storing the results
of a match. This includes details of what was matched, as well as addi-
- tional information such as the name of a (*MARK) setting. Each thread
+ tional information such as the name of a (*MARK) setting. Each thread
must provide its own copy of this memory.
PCRE2 CONTEXTS
- Some PCRE2 functions have a lot of parameters, many of which are used
- only by specialist applications, for example, those that use custom
- memory management or non-standard character tables. To keep function
- argument lists at a reasonable size, and at the same time to keep the
- API extensible, "uncommon" parameters are passed to certain functions
- in a context instead of directly. A context is just a block of memory
- that holds the parameter values. Applications that do not need to
- adjust any of the context parameters can pass NULL when a context
+ Some PCRE2 functions have a lot of parameters, many of which are used
+ only by specialist applications, for example, those that use custom
+ memory management or non-standard character tables. To keep function
+ argument lists at a reasonable size, and at the same time to keep the
+ API extensible, "uncommon" parameters are passed to certain functions
+ in a context instead of directly. A context is just a block of memory
+ that holds the parameter values. Applications that do not need to
+ adjust any of the context parameters can pass NULL when a context
pointer is required.
- There are three different types of context: a general context that is
- relevant for several PCRE2 operations, a compile-time context, and a
+ There are three different types of context: a general context that is
+ relevant for several PCRE2 operations, a compile-time context, and a
match-time context.
The general context
- At present, this context just contains pointers to (and data for)
- external memory management functions that are called from several
+ At present, this context just contains pointers to (and data for)
+ external memory management functions that are called from several
places in the PCRE2 library. The context is named `general' rather than
- specifically `memory' because in future other fields may be added. If
- you do not want to supply your own custom memory management functions,
- you do not need to bother with a general context. A general context is
+ specifically `memory' because in future other fields may be added. If
+ you do not want to supply your own custom memory management functions,
+ you do not need to bother with a general context. A general context is
created by:
pcre2_general_context *pcre2_general_context_create(
void *(*private_malloc)(PCRE2_SIZE, void *),
void (*private_free)(void *, void *), void *memory_data);
- The two function pointers specify custom memory management functions,
+ The two function pointers specify custom memory management functions,
whose prototypes are:
void *private_malloc(PCRE2_SIZE, void *);
@@ -748,16 +761,16 @@ PCRE2 CONTEXTS
Whenever code in PCRE2 calls these functions, the final argument is the
value of memory_data. Either of the first two arguments of the creation
- function may be NULL, in which case the system memory management func-
- tions malloc() and free() are used. (This is not currently useful, as
- there are no other fields in a general context, but in future there
- might be.) The private_malloc() function is used (if supplied) to
- obtain memory for storing the context, and all three values are saved
+ function may be NULL, in which case the system memory management func-
+ tions malloc() and free() are used. (This is not currently useful, as
+ there are no other fields in a general context, but in future there
+ might be.) The private_malloc() function is used (if supplied) to
+ obtain memory for storing the context, and all three values are saved
as part of the context.
- Whenever PCRE2 creates a data block of any kind, the block contains a
- pointer to the free() function that matches the malloc() function that
- was used. When the time comes to free the block, this function is
+ Whenever PCRE2 creates a data block of any kind, the block contains a
+ pointer to the free() function that matches the malloc() function that
+ was used. When the time comes to free the block, this function is
called.
A general context can be copied by calling:
@@ -769,6 +782,8 @@ PCRE2 CONTEXTS
void pcre2_general_context_free(pcre2_general_context *gcontext);
+ If this function is passed a NULL argument, it returns immediately
+ without doing anything.
The compile context
@@ -854,10 +869,10 @@ PCRE2 CONTEXTS
When a pattern is compiled with the PCRE2_EXTENDED or
PCRE2_EXTENDED_MORE option, the newline convention affects the recogni-
- tion of white space and the end of internal comments starting with #.
- The value is saved with the compiled pattern for subsequent use by the
- JIT compiler and by the two interpreted matching functions,
- pcre2_match() and pcre2_dfa_match().
+ tion of the end of internal comments starting with #. The value is
+ saved with the compiled pattern for subsequent use by the JIT compiler
+ and by the two interpreted matching functions, pcre2_match() and
+ pcre2_dfa_match().
int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext,
uint32_t value);
@@ -955,15 +970,17 @@ PCRE2 CONTEXTS
int pcre2_set_heap_limit(pcre2_match_context *mcontext,
uint32_t value);
- The heap_limit parameter specifies, in units of kilobytes, the maximum
- amount of heap memory that pcre2_match() may use to hold backtracking
- information when running an interpretive match. This limit does not
- apply to matching with the JIT optimization, which has its own memory
- control arrangements (see the pcre2jit documentation for more details),
- nor does it apply to pcre2_dfa_match(). If the limit is reached, the
+ The heap_limit parameter specifies, in units of kibibytes (1024 bytes),
+ the maximum amount of heap memory that pcre2_match() may use to hold
+ backtracking information when running an interpretive match. This limit
+ also applies to pcre2_dfa_match(), which may use the heap when process-
+ ing patterns with a lot of nested pattern recursion or lookarounds or
+ atomic groups. This limit does not apply to matching with the JIT opti-
+ mization, which has its own memory control arrangements (see the
+ pcre2jit documentation for more details). If the limit is reached, the
negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default
- limit is set when PCRE2 is built; the default default is very large and
- is essentially "unlimited".
+ limit can be set when PCRE2 is built; if it is not, the default is set
+ very large and is essentially "unlimited".
A value for the heap limit may also be supplied by an item at the start
of a pattern of the form
@@ -974,13 +991,19 @@ PCRE2 CONTEXTS
unless ddd is less than the limit set by the caller of pcre2_match()
or, if no such limit is set, less than the default.
- The pcre2_match() function starts out using a 20K vector on the system
- stack for recording backtracking points. The more nested backtracking
- points there are (that is, the deeper the search tree), the more memory
- is needed. Heap memory is used only if the initial vector is too
- small. If the heap limit is set to a value less than 21 (in particular,
- zero) no heap memory will be used. In this case, only patterns that do
- not have a lot of nested backtracking can be successfully processed.
+ The pcre2_match() function starts out using a 20KiB vector on the sys-
+ tem stack for recording backtracking points. The more nested backtrack-
+ ing points there are (that is, the deeper the search tree), the more
+ memory is needed. Heap memory is used only if the initial vector is
+ too small. If the heap limit is set to a value less than 21 (in partic-
+ ular, zero) no heap memory will be used. In this case, only patterns
+ that do not have a lot of nested backtracking can be successfully pro-
+ cessed.
+
+ Similarly, for pcre2_dfa_match(), a vector on the system stack is used
+ when processing pattern recursions, lookarounds, or atomic groups, and
+ only if this is not big enough is heap memory used. In this case, too,
+ setting a value of zero disables the use of the heap.
int pcre2_set_match_limit(pcre2_match_context *mcontext,
uint32_t value);
@@ -1031,22 +1054,33 @@ PCRE2 CONTEXTS
The depth limit is not relevant, and is ignored, when matching is done
using JIT compiled code. However, it is supported by pcre2_dfa_match(),
- which uses it to limit the depth of internal recursive function calls
- that implement atomic groups, lookaround assertions, and pattern recur-
- sions. This is, therefore, an indirect limit on the amount of system
- stack that is used. A recursive pattern such as /(.)(?1)/, when matched
- to a very long string using pcre2_dfa_match(), can use a great deal of
- stack.
+ which uses it to limit the depth of nested internal recursive function
+ calls that implement atomic groups, lookaround assertions, and pattern
+ recursions. This limits, indirectly, the amount of system stack that is
+ used. It was more useful in versions before 10.32, when stack memory
+ was used for local workspace vectors for recursive function calls. From
+ version 10.32, only local variables are allocated on the stack and as
+ each call uses only a few hundred bytes, even a small stack can support
+ quite a lot of recursion.
+
+ If the depth of internal recursive function calls is great enough,
+ local workspace vectors are allocated on the heap from version 10.32
+ onwards, so the depth limit also indirectly limits the amount of heap
+ memory that is used. A recursive pattern such as /(.(?2))((?1)|)/, when
+ matched to a very long string using pcre2_dfa_match(), can use a great
+ deal of memory. However, it is probably better to limit heap usage
+ directly by calling pcre2_set_heap_limit().
The default value for the depth limit can be set when PCRE2 is built;
- the default default is the same value as the default for the match
- limit. If the limit is exceeded, pcre2_match() or pcre2_dfa_match()
- returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be
- supplied by an item at the start of a pattern of the form
+ if it is not, the default is set to the same value as the default for
+ the match limit. If the limit is exceeded, pcre2_match() or
+ pcre2_dfa_match() returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth
+ limit may also be supplied by an item at the start of a pattern of the
+ form
(*LIMIT_DEPTH=ddd)
- where ddd is a decimal number. However, such a setting is ignored
+ where ddd is a decimal number. However, such a setting is ignored
unless ddd is less than the limit set by the caller of pcre2_match() or
pcre2_dfa_match() or, if no such limit is set, less than the default.
@@ -1055,52 +1089,53 @@ CHECKING BUILD-TIME OPTIONS
int pcre2_config(uint32_t what, void *where);
- The function pcre2_config() makes it possible for a PCRE2 client to
- discover which optional features have been compiled into the PCRE2
- library. The pcre2build documentation has more details about these
+ The function pcre2_config() makes it possible for a PCRE2 client to
+ discover which optional features have been compiled into the PCRE2
+ library. The pcre2build documentation has more details about these
optional features.
- The first argument for pcre2_config() specifies which information is
- required. The second argument is a pointer to memory into which the
- information is placed. If NULL is passed, the function returns the
- amount of memory that is needed for the requested information. For
- calls that return numerical values, the value is in bytes; when
- requesting these values, where should point to appropriately aligned
- memory. For calls that return strings, the required length is given in
+ The first argument for pcre2_config() specifies which information is
+ required. The second argument is a pointer to memory into which the
+ information is placed. If NULL is passed, the function returns the
+ amount of memory that is needed for the requested information. For
+ calls that return numerical values, the value is in bytes; when
+ requesting these values, where should point to appropriately aligned
+ memory. For calls that return strings, the required length is given in
code units, not counting the terminating zero.
- When requesting information, the returned value from pcre2_config() is
- non-negative on success, or the negative error code PCRE2_ERROR_BADOP-
- TION if the value in the first argument is not recognized. The follow-
+ When requesting information, the returned value from pcre2_config() is
+ non-negative on success, or the negative error code PCRE2_ERROR_BADOP-
+ TION if the value in the first argument is not recognized. The follow-
ing information is available:
PCRE2_CONFIG_BSR
- The output is a uint32_t integer whose value indicates what character
- sequences the \R escape sequence matches by default. A value of
+ The output is a uint32_t integer whose value indicates what character
+ sequences the \R escape sequence matches by default. A value of
PCRE2_BSR_UNICODE means that \R matches any Unicode line ending
- sequence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR,
+ sequence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR,
LF, or CRLF. The default can be overridden when a pattern is compiled.
PCRE2_CONFIG_COMPILED_WIDTHS
- The output is a uint32_t integer whose lower bits indicate which code
- unit widths were selected when PCRE2 was built. The 1-bit indicates
- 8-bit support, and the 2-bit and 4-bit indicate 16-bit and 32-bit sup-
+ The output is a uint32_t integer whose lower bits indicate which code
+ unit widths were selected when PCRE2 was built. The 1-bit indicates
+ 8-bit support, and the 2-bit and 4-bit indicate 16-bit and 32-bit sup-
port, respectively.
PCRE2_CONFIG_DEPTHLIMIT
- The output is a uint32_t integer that gives the default limit for the
- depth of nested backtracking in pcre2_match() or the depth of nested
- recursions and lookarounds in pcre2_dfa_match(). Further details are
- given with pcre2_set_depth_limit() above.
+ The output is a uint32_t integer that gives the default limit for the
+ depth of nested backtracking in pcre2_match() or the depth of nested
+ recursions, lookarounds, and atomic groups in pcre2_dfa_match(). Fur-
+ ther details are given with pcre2_set_depth_limit() above.
PCRE2_CONFIG_HEAPLIMIT
- The output is a uint32_t integer that gives, in kilobytes, the default
- limit for the amount of heap memory used by pcre2_match(). Further
- details are given with pcre2_set_heap_limit() above.
+ The output is a uint32_t integer that gives, in kibibytes, the default
+ limit for the amount of heap memory used by pcre2_match() or
+ pcre2_dfa_match(). Further details are given with
+ pcre2_set_heap_limit() above.
PCRE2_CONFIG_JIT
@@ -1130,9 +1165,9 @@ CHECKING BUILD-TIME OPTIONS
The default value of 2 for the 8-bit and 16-bit libraries is sufficient
for all but the most massive patterns, since it allows the size of the
- compiled pattern to be up to 64K code units. Larger values allow larger
- regular expressions to be compiled by those two libraries, but at the
- expense of slower matching.
+ compiled pattern to be up to 65535 code units. Larger values allow
+ larger regular expressions to be compiled by those two libraries, but
+ at the expense of slower matching.
PCRE2_CONFIG_MATCHLIMIT
@@ -1226,14 +1261,16 @@ COMPILING A PATTERN
piled pattern is obtained by calling malloc(). Otherwise, it is
obtained from the same memory function that was used for the compile
context. The caller must free the memory by calling pcre2_code_free()
- when it is no longer needed.
+ when it is no longer needed. If pcre2_code_free() is called with a
+ NULL argument, it returns immediately, without doing anything.
The function pcre2_code_copy() makes a copy of the compiled code in new
- memory, using the same memory allocator as was used for the original.
- However, if the code has been processed by the JIT compiler (see
- below), the JIT information cannot be copied (because it is position-
+ memory, using the same memory allocator as was used for the original.
+ However, if the code has been processed by the JIT compiler (see
+ below), the JIT information cannot be copied (because it is position-
dependent). The new copy can initially be used only for non-JIT match-
- ing, though it can be passed to pcre2_jit_compile() if required.
+ ing, though it can be passed to pcre2_jit_compile() if required. If
+ pcre2_code_copy() is called with a NULL argument, it returns NULL.
The pcre2_code_copy() function provides a way for individual threads in
a multithreaded application to acquire a private copy of shared com-
@@ -1247,64 +1284,65 @@ COMPILING A PATTERN
Copies of both the code and the tables are made, with the new code
pointing to the new tables. The memory for the new tables is automati-
cally freed when pcre2_code_free() is called for the new copy of the
- compiled code.
+ compiled code. If pcre2_code_copy_withy_tables() is called with a NULL
+ argument, it returns NULL.
- NOTE: When one of the matching functions is called, pointers to the
+ NOTE: When one of the matching functions is called, pointers to the
compiled pattern and the subject string are set in the match data block
- so that they can be referenced by the substring extraction functions.
- After running a match, you must not free a compiled pattern (or a sub-
- ject string) until after all operations on the match data block have
+ so that they can be referenced by the substring extraction functions.
+ After running a match, you must not free a compiled pattern (or a sub-
+ ject string) until after all operations on the match data block have
taken place.
- The options argument for pcre2_compile() contains various bit settings
- that affect the compilation. It should be zero if no options are
- required. The available options are described below. Some of them (in
- particular, those that are compatible with Perl, but some others as
- well) can also be set and unset from within the pattern (see the
+ The options argument for pcre2_compile() contains various bit settings
+ that affect the compilation. It should be zero if no options are
+ required. The available options are described below. Some of them (in
+ particular, those that are compatible with Perl, but some others as
+ well) can also be set and unset from within the pattern (see the
detailed description in the pcre2pattern documentation).
- For those options that can be different in different parts of the pat-
- tern, the contents of the options argument specifies their settings at
- the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and
- PCRE2_NO_UTF_CHECK options can be set at the time of matching as well
+ For those options that can be different in different parts of the pat-
+ tern, the contents of the options argument specifies their settings at
+ the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and
+ PCRE2_NO_UTF_CHECK options can be set at the time of matching as well
as at compile time.
- Other, less frequently required compile-time parameters (for example,
+ Other, less frequently required compile-time parameters (for example,
the newline setting) can be provided in a compile context (as described
above).
If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme-
- diately. Otherwise, the variables to which these point are set to an
- error code and an offset (number of code units) within the pattern,
- respectively, when pcre2_compile() returns NULL because a compilation
+ diately. Otherwise, the variables to which these point are set to an
+ error code and an offset (number of code units) within the pattern,
+ respectively, when pcre2_compile() returns NULL because a compilation
error has occurred. The values are not defined when compilation is suc-
cessful and pcre2_compile() returns a non-NULL value.
- There are nearly 100 positive error codes that pcre2_compile() may
- return if it finds an error in the pattern. There are also some nega-
- tive error codes that are used for invalid UTF strings. These are the
+ There are nearly 100 positive error codes that pcre2_compile() may
+ return if it finds an error in the pattern. There are also some nega-
+ tive error codes that are used for invalid UTF strings. These are the
same as given by pcre2_match() and pcre2_dfa_match(), and are described
- in the pcre2unicode page. There is no separate documentation for the
- positive error codes, because the textual error messages that are
- obtained by calling the pcre2_get_error_message() function (see
- "Obtaining a textual error message" below) should be self-explanatory.
- Macro names starting with PCRE2_ERROR_ are defined for both positive
+ in the pcre2unicode page. There is no separate documentation for the
+ positive error codes, because the textual error messages that are
+ obtained by calling the pcre2_get_error_message() function (see
+ "Obtaining a textual error message" below) should be self-explanatory.
+ Macro names starting with PCRE2_ERROR_ are defined for both positive
and negative error codes in pcre2.h.
The value returned in erroroffset is an indication of where in the pat-
- tern the error occurred. It is not necessarily the furthest point in
- the pattern that was read. For example, after the error "lookbehind
+ tern the error occurred. It is not necessarily the furthest point in
+ the pattern that was read. For example, after the error "lookbehind
assertion is not fixed length", the error offset points to the start of
- the failing assertion. For an invalid UTF-8 or UTF-16 string, the off-
+ the failing assertion. For an invalid UTF-8 or UTF-16 string, the off-
set is that of the first code unit of the failing character.
- Some errors are not detected until the whole pattern has been scanned;
- in these cases, the offset passed back is the length of the pattern.
- Note that the offset is in code units, not characters, even in a UTF
+ Some errors are not detected until the whole pattern has been scanned;
+ in these cases, the offset passed back is the length of the pattern.
+ Note that the offset is in code units, not characters, even in a UTF
mode. It may sometimes point into the middle of a UTF-8 or UTF-16 char-
acter.
- This code fragment shows a typical straightforward call to pcre2_com-
+ This code fragment shows a typical straightforward call to pcre2_com-
pile():
pcre2_code *re;
@@ -1318,106 +1356,107 @@ COMPILING A PATTERN
&erroffset, /* for error offset */
NULL); /* no compile context */
- The following names for option bits are defined in the pcre2.h header
+ The following names for option bits are defined in the pcre2.h header
file:
PCRE2_ANCHORED
If this bit is set, the pattern is forced to be "anchored", that is, it
- is constrained to match only at the first matching point in the string
- that is being searched (the "subject string"). This effect can also be
- achieved by appropriate constructs in the pattern itself, which is the
+ is constrained to match only at the first matching point in the string
+ that is being searched (the "subject string"). This effect can also be
+ achieved by appropriate constructs in the pattern itself, which is the
only way to do it in Perl.
PCRE2_ALLOW_EMPTY_CLASS
- By default, for compatibility with Perl, a closing square bracket that
- immediately follows an opening one is treated as a data character for
- the class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the
+ By default, for compatibility with Perl, a closing square bracket that
+ immediately follows an opening one is treated as a data character for
+ the class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the
class, which therefore contains no characters and so can never match.
PCRE2_ALT_BSUX
- This option request alternative handling of three escape sequences,
- which makes PCRE2's behaviour more like ECMAscript (aka JavaScript).
+ This option request alternative handling of three escape sequences,
+ which makes PCRE2's behaviour more like ECMAscript (aka JavaScript).
When it is set:
(1) \U matches an upper case "U" character; by default \U causes a com-
pile time error (Perl uses \U to upper case subsequent characters).
(2) \u matches a lower case "u" character unless it is followed by four
- hexadecimal digits, in which case the hexadecimal number defines the
- code point to match. By default, \u causes a compile time error (Perl
+ hexadecimal digits, in which case the hexadecimal number defines the
+ code point to match. By default, \u causes a compile time error (Perl
uses it to upper case the following character).
- (3) \x matches a lower case "x" character unless it is followed by two
- hexadecimal digits, in which case the hexadecimal number defines the
- code point to match. By default, as in Perl, a hexadecimal number is
+ (3) \x matches a lower case "x" character unless it is followed by two
+ hexadecimal digits, in which case the hexadecimal number defines the
+ code point to match. By default, as in Perl, a hexadecimal number is
always expected after \x, but it may have zero, one, or two digits (so,
for example, \xz matches a binary zero character followed by z).
PCRE2_ALT_CIRCUMFLEX
In multiline mode (when PCRE2_MULTILINE is set), the circumflex
- metacharacter matches at the start of the subject (unless PCRE2_NOTBOL
- is set), and also after any internal newline. However, it does not
+ metacharacter matches at the start of the subject (unless PCRE2_NOTBOL
+ is set), and also after any internal newline. However, it does not
match after a newline at the end of the subject, for compatibility with
- Perl. If you want a multiline circumflex also to match after a termi-
+ Perl. If you want a multiline circumflex also to match after a termi-
nating newline, you must set PCRE2_ALT_CIRCUMFLEX.
PCRE2_ALT_VERBNAMES
- By default, for compatibility with Perl, the name in any verb sequence
- such as (*MARK:NAME) is any sequence of characters that does not
- include a closing parenthesis. The name is not processed in any way,
- and it is not possible to include a closing parenthesis in the name.
- However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash
- processing is applied to verb names and only an unescaped closing
- parenthesis terminates the name. A closing parenthesis can be included
- in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED or
- PCRE2_EXTENDED_MORE option is set, unescaped whitespace in verb names
- is skipped and #-comments are recognized in this mode, exactly as in
- the rest of the pattern.
+ By default, for compatibility with Perl, the name in any verb sequence
+ such as (*MARK:NAME) is any sequence of characters that does not
+ include a closing parenthesis. The name is not processed in any way,
+ and it is not possible to include a closing parenthesis in the name.
+ However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash
+ processing is applied to verb names and only an unescaped closing
+ parenthesis terminates the name. A closing parenthesis can be included
+ in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED or
+ PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped
+ whitespace in verb names is skipped and #-comments are recognized,
+ exactly as in the rest of the pattern.
PCRE2_AUTO_CALLOUT
- If this bit is set, pcre2_compile() automatically inserts callout
- items, all with number 255, before each pattern item, except immedi-
- ately before or after an explicit callout in the pattern. For discus-
+ If this bit is set, pcre2_compile() automatically inserts callout
+ items, all with number 255, before each pattern item, except immedi-
+ ately before or after an explicit callout in the pattern. For discus-
sion of the callout facility, see the pcre2callout documentation.
PCRE2_CASELESS
- If this bit is set, letters in the pattern match both upper and lower
- case letters in the subject. It is equivalent to Perl's /i option, and
- it can be changed within a pattern by a (?i) option setting. If
- PCRE2_UTF is set, Unicode properties are used for all characters with
- more than one other case, and for all characters whose code points are
- greater than U+007f. For lower valued characters with only one other
- case, a lookup table is used for speed. When PCRE2_UTF is not set, a
+ If this bit is set, letters in the pattern match both upper and lower
+ case letters in the subject. It is equivalent to Perl's /i option, and
+ it can be changed within a pattern by a (?i) option setting. If
+ PCRE2_UTF is set, Unicode properties are used for all characters with
+ more than one other case, and for all characters whose code points are
+ greater than U+007F. For lower valued characters with only one other
+ case, a lookup table is used for speed. When PCRE2_UTF is not set, a
lookup table is used for all code points less than 256, and higher code
- points (available only in 16-bit or 32-bit mode) are treated as not
+ points (available only in 16-bit or 32-bit mode) are treated as not
having another case.
PCRE2_DOLLAR_ENDONLY
- If this bit is set, a dollar metacharacter in the pattern matches only
- at the end of the subject string. Without this option, a dollar also
- matches immediately before a newline at the end of the string (but not
- before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored
- if PCRE2_MULTILINE is set. There is no equivalent to this option in
+ If this bit is set, a dollar metacharacter in the pattern matches only
+ at the end of the subject string. Without this option, a dollar also
+ matches immediately before a newline at the end of the string (but not
+ before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored
+ if PCRE2_MULTILINE is set. There is no equivalent to this option in
Perl, and no way to set it within a pattern.
PCRE2_DOTALL
- If this bit is set, a dot metacharacter in the pattern matches any
- character, including one that indicates a newline. However, it only
+ If this bit is set, a dot metacharacter in the pattern matches any
+ character, including one that indicates a newline. However, it only
ever matches one character, even if newlines are coded as CRLF. Without
this option, a dot does not match when the current position in the sub-
- ject is at a newline. This option is equivalent to Perl's /s option,
+ ject is at a newline. This option is equivalent to Perl's /s option,
and it can be changed within a pattern by a (?s) option setting. A neg-
- ative class such as [^a] always matches newline characters, independent
- of the setting of this option.
+ ative class such as [^a] always matches newline characters, and the \N
+ escape sequence always matches a non-newline character, independent of
+ the setting of PCRE2_DOTALL.
PCRE2_DUPNAMES
@@ -1459,272 +1498,294 @@ COMPILING A PATTERN
introduce various parenthesized subpatterns, nor within numerical quan-
tifiers such as {1,3}. Ignorable white space is permitted between an
item and a following quantifier and between a quantifier and a follow-
- ing + that indicates possessiveness.
-
- PCRE2_EXTENDED also causes characters between an unescaped # outside a
- character class and the next newline, inclusive, to be ignored, which
- makes it possible to include comments inside complicated patterns. Note
- that the end of this type of comment is a literal newline sequence in
- the pattern; escape sequences that happen to represent a newline do not
- count. PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be
- changed within a pattern by a (?x) option setting.
+ ing + that indicates possessiveness. PCRE2_EXTENDED is equivalent to
+ Perl's /x option, and it can be changed within a pattern by a (?x)
+ option setting.
+
+ When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recog-
+ nizes as white space only those characters with code points less than
+ 256 that are flagged as white space in its low-character table. The ta-
+ ble is normally created by pcre2_maketables(), which uses the isspace()
+ function to identify space characters. In most ASCII environments, the
+ relevant characters are those with code points 0x0009 (tab), 0x000A
+ (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage
+ return), and 0x0020 (space).
+
+ When PCRE2 is compiled with Unicode support, in addition to these char-
+ acters, five more Unicode "Pattern White Space" characters are recog-
+ nized by PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-
+ right mark), U+200F (right-to-left mark), U+2028 (line separator), and
+ U+2029 (paragraph separator). This set of characters is the same as
+ recognized by Perl's /x option. Note that the horizontal and vertical
+ space characters that are matched by the \h and \v escapes in patterns
+ are a much bigger set.
+
+ As well as ignoring most white space, PCRE2_EXTENDED also causes char-
+ acters between an unescaped # outside a character class and the next
+ newline, inclusive, to be ignored, which makes it possible to include
+ comments inside complicated patterns. Note that the end of this type of
+ comment is a literal newline sequence in the pattern; escape sequences
+ that happen to represent a newline do not count.
Which characters are interpreted as newlines can be specified by a set-
- ting in the compile context that is passed to pcre2_compile() or by a
- special sequence at the start of the pattern, as described in the sec-
- tion entitled "Newline conventions" in the pcre2pattern documentation.
+ ting in the compile context that is passed to pcre2_compile() or by a
+ special sequence at the start of the pattern, as described in the sec-
+ tion entitled "Newline conventions" in the pcre2pattern documentation.
A default is defined when PCRE2 is built.
PCRE2_EXTENDED_MORE
- This option has the effect of PCRE2_EXTENDED, but, in addition,
- unescaped space and horizontal tab characters are ignored inside a
- character class. PCRE2_EXTENDED_MORE is equivalent to Perl's 5.26 /xx
- option, and it can be changed within a pattern by a (?xx) option set-
+ This option has the effect of PCRE2_EXTENDED, but, in addition,
+ unescaped space and horizontal tab characters are ignored inside a
+ character class. Note: only these two characters are ignored, not the
+ full set of pattern white space characters that are ignored outside a
+ character class. PCRE2_EXTENDED_MORE is equivalent to Perl's /xx
+ option, and it can be changed within a pattern by a (?xx) option set-
ting.
PCRE2_FIRSTLINE
If this option is set, the start of an unanchored pattern match must be
- before or at the first newline in the subject string following the
- start of matching, though the matched text may continue over the new-
+ before or at the first newline in the subject string following the
+ start of matching, though the matched text may continue over the new-
line. If startoffset is non-zero, the limiting newline is not necessar-
- ily the first newline in the subject. For example, if the subject
+ ily the first newline in the subject. For example, if the subject
string is "abc\nxyz" (where \n represents a single-character newline) a
- pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is
- greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
- general limiting facility. If PCRE2_FIRSTLINE is set with an offset
- limit, a match must occur in the first line and also within the offset
+ pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is
+ greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
+ general limiting facility. If PCRE2_FIRSTLINE is set with an offset
+ limit, a match must occur in the first line and also within the offset
limit. In other words, whichever limit comes first is used.
PCRE2_LITERAL
If this option is set, all meta-characters in the pattern are disabled,
- and it is treated as a literal string. Matching literal strings with a
+ and it is treated as a literal string. Matching literal strings with a
regular expression engine is not the most efficient way of doing it. If
- you are doing a lot of literal matching and are worried about effi-
+ you are doing a lot of literal matching and are worried about effi-
ciency, you should consider using other approaches. The only other main
options that are allowed with PCRE2_LITERAL are: PCRE2_ANCHORED,
PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, PCRE2_CASELESS, PCRE2_FIRSTLINE,
PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_UTF, and
- PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and
- PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an
+ PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and
+ PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an
error.
PCRE2_MATCH_UNSET_BACKREF
- If this option is set, a back reference to an unset subpattern group
- matches an empty string (by default this causes the current matching
- alternative to fail). A pattern such as (\1)(a) succeeds when this
- option is set (assuming it can find an "a" in the subject), whereas it
- fails by default, for Perl compatibility. Setting this option makes
+ If this option is set, a backreference to an unset subpattern group
+ matches an empty string (by default this causes the current matching
+ alternative to fail). A pattern such as (\1)(a) succeeds when this
+ option is set (assuming it can find an "a" in the subject), whereas it
+ fails by default, for Perl compatibility. Setting this option makes
PCRE2 behave more like ECMAscript (aka JavaScript).
PCRE2_MULTILINE
- By default, for the purposes of matching "start of line" and "end of
- line", PCRE2 treats the subject string as consisting of a single line
- of characters, even if it actually contains newlines. The "start of
- line" metacharacter (^) matches only at the start of the string, and
- the "end of line" metacharacter ($) matches only at the end of the
+ By default, for the purposes of matching "start of line" and "end of
+ line", PCRE2 treats the subject string as consisting of a single line
+ of characters, even if it actually contains newlines. The "start of
+ line" metacharacter (^) matches only at the start of the string, and
+ the "end of line" metacharacter ($) matches only at the end of the
string, or before a terminating newline (except when PCRE2_DOL-
- LAR_ENDONLY is set). Note, however, that unless PCRE2_DOTALL is set,
+ LAR_ENDONLY is set). Note, however, that unless PCRE2_DOTALL is set,
the "any character" metacharacter (.) does not match at a newline. This
behaviour (for ^, $, and dot) is the same as Perl.
- When PCRE2_MULTILINE it is set, the "start of line" and "end of line"
- constructs match immediately following or immediately before internal
- newlines in the subject string, respectively, as well as at the very
- start and end. This is equivalent to Perl's /m option, and it can be
+ When PCRE2_MULTILINE it is set, the "start of line" and "end of line"
+ constructs match immediately following or immediately before internal
+ newlines in the subject string, respectively, as well as at the very
+ start and end. This is equivalent to Perl's /m option, and it can be
changed within a pattern by a (?m) option setting. Note that the "start
of line" metacharacter does not match after a newline at the end of the
- subject, for compatibility with Perl. However, you can change this by
- setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a
- subject string, or no occurrences of ^ or $ in a pattern, setting
+ subject, for compatibility with Perl. However, you can change this by
+ setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a
+ subject string, or no occurrences of ^ or $ in a pattern, setting
PCRE2_MULTILINE has no effect.
PCRE2_NEVER_BACKSLASH_C
- This option locks out the use of \C in the pattern that is being com-
- piled. This escape can cause unpredictable behaviour in UTF-8 or
- UTF-16 modes, because it may leave the current matching point in the
- middle of a multi-code-unit character. This option may be useful in
- applications that process patterns from external sources. Note that
+ This option locks out the use of \C in the pattern that is being com-
+ piled. This escape can cause unpredictable behaviour in UTF-8 or
+ UTF-16 modes, because it may leave the current matching point in the
+ middle of a multi-code-unit character. This option may be useful in
+ applications that process patterns from external sources. Note that
there is also a build-time option that permanently locks out the use of
\C.
PCRE2_NEVER_UCP
- This option locks out the use of Unicode properties for handling \B,
+ This option locks out the use of Unicode properties for handling \B,
\b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes, as
- described for the PCRE2_UCP option below. In particular, it prevents
- the creator of the pattern from enabling this facility by starting the
- pattern with (*UCP). This option may be useful in applications that
+ described for the PCRE2_UCP option below. In particular, it prevents
+ the creator of the pattern from enabling this facility by starting the
+ pattern with (*UCP). This option may be useful in applications that
process patterns from external sources. The option combination PCRE_UCP
and PCRE_NEVER_UCP causes an error.
PCRE2_NEVER_UTF
- This option locks out interpretation of the pattern as UTF-8, UTF-16,
+ This option locks out interpretation of the pattern as UTF-8, UTF-16,
or UTF-32, depending on which library is in use. In particular, it pre-
- vents the creator of the pattern from switching to UTF interpretation
- by starting the pattern with (*UTF). This option may be useful in
- applications that process patterns from external sources. The combina-
+ vents the creator of the pattern from switching to UTF interpretation
+ by starting the pattern with (*UTF). This option may be useful in
+ applications that process patterns from external sources. The combina-
tion of PCRE2_UTF and PCRE2_NEVER_UTF causes an error.
PCRE2_NO_AUTO_CAPTURE
If this option is set, it disables the use of numbered capturing paren-
- theses in the pattern. Any opening parenthesis that is not followed by
- ? behaves as if it were followed by ?: but named parentheses can still
+ theses in the pattern. Any opening parenthesis that is not followed by
+ ? behaves as if it were followed by ?: but named parentheses can still
be used for capturing (and they acquire numbers in the usual way). This
- is the same as Perl's /n option. Note that, when this option is set,
- references to capturing groups (back references or recursion/subroutine
- calls) may only refer to named groups, though the reference can be by
+ is the same as Perl's /n option. Note that, when this option is set,
+ references to capturing groups (backreferences or recursion/subroutine
+ calls) may only refer to named groups, though the reference can be by
name or by number.
PCRE2_NO_AUTO_POSSESS
If this option is set, it disables "auto-possessification", which is an
- optimization that, for example, turns a+b into a++b in order to avoid
- backtracks into a+ that can never be successful. However, if callouts
- are in use, auto-possessification means that some callouts are never
+ optimization that, for example, turns a+b into a++b in order to avoid
+ backtracks into a+ that can never be successful. However, if callouts
+ are in use, auto-possessification means that some callouts are never
taken. You can set this option if you want the matching functions to do
- a full unoptimized search and run all the callouts, but it is mainly
+ a full unoptimized search and run all the callouts, but it is mainly
provided for testing purposes.
PCRE2_NO_DOTSTAR_ANCHOR
If this option is set, it disables an optimization that is applied when
- .* is the first significant item in a top-level branch of a pattern,
- and all the other branches also start with .* or with \A or \G or ^.
- The optimization is automatically disabled for .* if it is inside an
- atomic group or a capturing group that is the subject of a back refer-
- ence, or if the pattern contains (*PRUNE) or (*SKIP). When the opti-
- mization is not disabled, such a pattern is automatically anchored if
+ .* is the first significant item in a top-level branch of a pattern,
+ and all the other branches also start with .* or with \A or \G or ^.
+ The optimization is automatically disabled for .* if it is inside an
+ atomic group or a capturing group that is the subject of a backrefer-
+ ence, or if the pattern contains (*PRUNE) or (*SKIP). When the opti-
+ mization is not disabled, such a pattern is automatically anchored if
PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set
- for any ^ items. Otherwise, the fact that any match must start either
- at the start of the subject or following a newline is remembered. Like
+ for any ^ items. Otherwise, the fact that any match must start either
+ at the start of the subject or following a newline is remembered. Like
other optimizations, this can cause callouts to be skipped.
PCRE2_NO_START_OPTIMIZE
- This is an option whose main effect is at matching time. It does not
+ This is an option whose main effect is at matching time. It does not
change what pcre2_compile() generates, but it does affect the output of
the JIT compiler.
- There are a number of optimizations that may occur at the start of a
- match, in order to speed up the process. For example, if it is known
- that an unanchored match must start with a specific code unit value,
- the matching code searches the subject for that value, and fails imme-
- diately if it cannot find it, without actually running the main match-
- ing function. This means that a special item such as (*COMMIT) at the
- start of a pattern is not considered until after a suitable starting
- point for the match has been found. Also, when callouts or (*MARK)
- items are in use, these "start-up" optimizations can cause them to be
- skipped if the pattern is never actually used. The start-up optimiza-
- tions are in effect a pre-scan of the subject that takes place before
+ There are a number of optimizations that may occur at the start of a
+ match, in order to speed up the process. For example, if it is known
+ that an unanchored match must start with a specific code unit value,
+ the matching code searches the subject for that value, and fails imme-
+ diately if it cannot find it, without actually running the main match-
+ ing function. This means that a special item such as (*COMMIT) at the
+ start of a pattern is not considered until after a suitable starting
+ point for the match has been found. Also, when callouts or (*MARK)
+ items are in use, these "start-up" optimizations can cause them to be
+ skipped if the pattern is never actually used. The start-up optimiza-
+ tions are in effect a pre-scan of the subject that takes place before
the pattern is run.
The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
- possibly causing performance to suffer, but ensuring that in cases
- where the result is "no match", the callouts do occur, and that items
+ possibly causing performance to suffer, but ensuring that in cases
+ where the result is "no match", the callouts do occur, and that items
such as (*COMMIT) and (*MARK) are considered at every possible starting
position in the subject string.
- Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching
+ Setting PCRE2_NO_START_OPTIMIZE may change the outcome of a matching
operation. Consider the pattern
(*COMMIT)ABC
- When this is compiled, PCRE2 records the fact that a match must start
- with the character "A". Suppose the subject string is "DEFABC". The
- start-up optimization scans along the subject, finds "A" and runs the
- first match attempt from there. The (*COMMIT) item means that the pat-
- tern must match the current starting position, which in this case, it
- does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE
- set, the initial scan along the subject string does not happen. The
- first match attempt is run starting from "D" and when this fails,
- (*COMMIT) prevents any further matches being tried, so the overall
+ When this is compiled, PCRE2 records the fact that a match must start
+ with the character "A". Suppose the subject string is "DEFABC". The
+ start-up optimization scans along the subject, finds "A" and runs the
+ first match attempt from there. The (*COMMIT) item means that the pat-
+ tern must match the current starting position, which in this case, it
+ does. However, if the same match is run with PCRE2_NO_START_OPTIMIZE
+ set, the initial scan along the subject string does not happen. The
+ first match attempt is run starting from "D" and when this fails,
+ (*COMMIT) prevents any further matches being tried, so the overall
result is "no match".
- There are also other start-up optimizations. For example, a minimum
+ There are also other start-up optimizations. For example, a minimum
length for the subject may be recorded. Consider the pattern
(*MARK:A)(X|Y)
- The minimum length for a match is one character. If the subject is
+ The minimum length for a match is one character. If the subject is
"ABC", there will be attempts to match "ABC", "BC", and "C". An attempt
to match an empty string at the end of the subject does not take place,
- because PCRE2 knows that the subject is now too short, and so the
- (*MARK) is never encountered. In this case, the optimization does not
+ because PCRE2 knows that the subject is now too short, and so the
+ (*MARK) is never encountered. In this case, the optimization does not
affect the overall match result, which is still "no match", but it does
affect the auxiliary information that is returned.
PCRE2_NO_UTF_CHECK
- When PCRE2_UTF is set, the validity of the pattern as a UTF string is
- automatically checked. There are discussions about the validity of
- UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode
- document. If an invalid UTF sequence is found, pcre2_compile() returns
+ When PCRE2_UTF is set, the validity of the pattern as a UTF string is
+ automatically checked. There are discussions about the validity of
+ UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode
+ document. If an invalid UTF sequence is found, pcre2_compile() returns
a negative error code.
- If you know that your pattern is a valid UTF string, and you want to
- skip this check for performance reasons, you can set the
- PCRE2_NO_UTF_CHECK option. When it is set, the effect of passing an
+ If you know that your pattern is a valid UTF string, and you want to
+ skip this check for performance reasons, you can set the
+ PCRE2_NO_UTF_CHECK option. When it is set, the effect of passing an
invalid UTF string as a pattern is undefined. It may cause your program
to crash or loop.
Note that this option can also be passed to pcre2_match() and
- pcre_dfa_match(), to suppress UTF validity checking of the subject
+ pcre_dfa_match(), to suppress UTF validity checking of the subject
string.
Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis-
- able the error that is given if an escape sequence for an invalid Uni-
- code code point is encountered in the pattern. In particular, the so-
- called "surrogate" code points (0xd800 to 0xdfff) are invalid. If you
- want to allow escape sequences such as \x{d800} you can set the
- PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option, as described in the
- section entitled "Extra compile options" below. However, this is pos-
+ able the error that is given if an escape sequence for an invalid Uni-
+ code code point is encountered in the pattern. In particular, the so-
+ called "surrogate" code points (0xd800 to 0xdfff) are invalid. If you
+ want to allow escape sequences such as \x{d800} you can set the
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option, as described in the
+ section entitled "Extra compile options" below. However, this is pos-
sible only in UTF-8 and UTF-32 modes, because these values are not rep-
resentable in UTF-16.
PCRE2_UCP
This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
- \w, and some of the POSIX character classes. By default, only ASCII
- characters are recognized, but if PCRE2_UCP is set, Unicode properties
- are used instead to classify characters. More details are given in the
+ \w, and some of the POSIX character classes. By default, only ASCII
+ characters are recognized, but if PCRE2_UCP is set, Unicode properties
+ are used instead to classify characters. More details are given in the
section on generic character types in the pcre2pattern page. If you set
- PCRE2_UCP, matching one of the items it affects takes much longer. The
- option is available only if PCRE2 has been compiled with Unicode sup-
+ PCRE2_UCP, matching one of the items it affects takes much longer. The
+ option is available only if PCRE2 has been compiled with Unicode sup-
port (which is the default).
PCRE2_UNGREEDY
- This option inverts the "greediness" of the quantifiers so that they
- are not greedy by default, but become greedy if followed by "?". It is
- not compatible with Perl. It can also be set by a (?U) option setting
+ This option inverts the "greediness" of the quantifiers so that they
+ are not greedy by default, but become greedy if followed by "?". It is
+ not compatible with Perl. It can also be set by a (?U) option setting
within the pattern.
PCRE2_USE_OFFSET_LIMIT
This option must be set for pcre2_compile() if pcre2_set_offset_limit()
- is going to be used to set a non-default offset limit in a match con-
- text for matches that use this pattern. An error is generated if an
- offset limit is set without this option. For more details, see the
- description of pcre2_set_offset_limit() in the section that describes
+ is going to be used to set a non-default offset limit in a match con-
+ text for matches that use this pattern. An error is generated if an
+ offset limit is set without this option. For more details, see the
+ description of pcre2_set_offset_limit() in the section that describes
match contexts. See also the PCRE2_FIRSTLINE option above.
PCRE2_UTF
- This option causes PCRE2 to regard both the pattern and the subject
- strings that are subsequently processed as strings of UTF characters
- instead of single-code-unit strings. It is available when PCRE2 is
- built to include Unicode support (which is the default). If Unicode
- support is not available, the use of this option provokes an error.
- Details of how PCRE2_UTF changes the behaviour of PCRE2 are given in
- the pcre2unicode page.
+ This option causes PCRE2 to regard both the pattern and the subject
+ strings that are subsequently processed as strings of UTF characters
+ instead of single-code-unit strings. It is available when PCRE2 is
+ built to include Unicode support (which is the default). If Unicode
+ support is not available, the use of this option provokes an error.
+ Details of how PCRE2_UTF changes the behaviour of PCRE2 are given in
+ the pcre2unicode page. In particular, note that it changes the way
+ PCRE2_CASELESS handles characters with code points greater than 127.
Extra compile options
@@ -1950,7 +2011,7 @@ INFORMATION ABOUT A COMPILED PATTERN
.* is not in an atomic group
.* is not in a capturing group that is the subject
- of a back reference
+ of a backreference
PCRE2_DOTALL is in force for .*
Neither (*PRUNE) nor (*SKIP) appears in the pattern
PCRE2_NO_DOTSTAR_ANCHOR is not set
@@ -1960,101 +2021,99 @@ INFORMATION ABOUT A COMPILED PATTERN
PCRE2_INFO_BACKREFMAX
- Return the number of the highest back reference in the pattern. The
+ Return the number of the highest backreference in the pattern. The
third argument should point to an uint32_t variable. Named subpatterns
acquire numbers as well as names, and these count towards the highest
- back reference. Back references such as \4 or \g{12} match the cap-
- tured characters of the given group, but in addition, the check that a
- capturing group is set in a conditional subpattern such as (?(3)a|b) is
- also a back reference. Zero is returned if there are no back refer-
- ences.
+ backreference. Backreferences such as \4 or \g{12} match the captured
+ characters of the given group, but in addition, the check that a cap-
+ turing group is set in a conditional subpattern such as (?(3)a|b) is
+ also a backreference. Zero is returned if there are no backreferences.
PCRE2_INFO_BSR
- The output is a uint32_t whose value indicates what character sequences
- the \R escape sequence matches. A value of PCRE2_BSR_UNICODE means that
- \R matches any Unicode line ending sequence; a value of PCRE2_BSR_ANY-
- CRLF means that \R matches only CR, LF, or CRLF.
+ The output is a uint32_t integer whose value indicates what character
+ sequences the \R escape sequence matches. A value of PCRE2_BSR_UNICODE
+ means that \R matches any Unicode line ending sequence; a value of
+ PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF.
PCRE2_INFO_CAPTURECOUNT
- Return the highest capturing subpattern number in the pattern. In pat-
+ Return the highest capturing subpattern number in the pattern. In pat-
terns where (?| is not used, this is also the total number of capturing
subpatterns. The third argument should point to an uint32_t variable.
PCRE2_INFO_DEPTHLIMIT
- If the pattern set a backtracking depth limit by including an item of
- the form (*LIMIT_DEPTH=nnnn) at the start, the value is returned. The
- third argument should point to an unsigned 32-bit integer. If no such
- value has been set, the call to pcre2_pattern_info() returns the error
+ If the pattern set a backtracking depth limit by including an item of
+ the form (*LIMIT_DEPTH=nnnn) at the start, the value is returned. The
+ third argument should point to a uint32_t integer. If no such value has
+ been set, the call to pcre2_pattern_info() returns the error
PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
- ing if it is less than the limit set or defaulted by the caller of the
+ ing if it is less than the limit set or defaulted by the caller of the
match function.
PCRE2_INFO_FIRSTBITMAP
- In the absence of a single first code unit for a non-anchored pattern,
- pcre2_compile() may construct a 256-bit table that defines a fixed set
- of values for the first code unit in any match. For example, a pattern
- that starts with [abc] results in a table with three bits set. When
- code unit values greater than 255 are supported, the flag bit for 255
- means "any code unit of value 255 or above". If such a table was con-
- structed, a pointer to it is returned. Otherwise NULL is returned. The
- third argument should point to an const uint8_t * variable.
+ In the absence of a single first code unit for a non-anchored pattern,
+ pcre2_compile() may construct a 256-bit table that defines a fixed set
+ of values for the first code unit in any match. For example, a pattern
+ that starts with [abc] results in a table with three bits set. When
+ code unit values greater than 255 are supported, the flag bit for 255
+ means "any code unit of value 255 or above". If such a table was con-
+ structed, a pointer to it is returned. Otherwise NULL is returned. The
+ third argument should point to a const uint8_t * variable.
PCRE2_INFO_FIRSTCODETYPE
Return information about the first code unit of any matched string, for
- a non-anchored pattern. The third argument should point to an uint32_t
- variable. If there is a fixed first value, for example, the letter "c"
- from a pattern such as (cat|cow|coyote), 1 is returned, and the value
- can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed
- first value, but it is known that a match can occur only at the start
- of the subject or following a newline in the subject, 2 is returned.
+ a non-anchored pattern. The third argument should point to an uint32_t
+ variable. If there is a fixed first value, for example, the letter "c"
+ from a pattern such as (cat|cow|coyote), 1 is returned, and the value
+ can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed
+ first value, but it is known that a match can occur only at the start
+ of the subject or following a newline in the subject, 2 is returned.
Otherwise, and for anchored patterns, 0 is returned.
PCRE2_INFO_FIRSTCODEUNIT
- Return the value of the first code unit of any matched string for a
- pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0.
- The third argument should point to an uint32_t variable. In the 8-bit
- library, the value is always less than 256. In the 16-bit library the
- value can be up to 0xffff. In the 32-bit library in UTF-32 mode the
+ Return the value of the first code unit of any matched string for a
+ pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0.
+ The third argument should point to an uint32_t variable. In the 8-bit
+ library, the value is always less than 256. In the 16-bit library the
+ value can be up to 0xffff. In the 32-bit library in UTF-32 mode the
value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32
mode.
PCRE2_INFO_FRAMESIZE
Return the size (in bytes) of the data frames that are used to remember
- backtracking positions when the pattern is processed by pcre2_match()
- without the use of JIT. The third argument should point to an size_t
+ backtracking positions when the pattern is processed by pcre2_match()
+ without the use of JIT. The third argument should point to a size_t
variable. The frame size depends on the number of capturing parentheses
- in the pattern. Each additional capturing group adds two PCRE2_SIZE
+ in the pattern. Each additional capturing group adds two PCRE2_SIZE
variables.
PCRE2_INFO_HASBACKSLASHC
- Return 1 if the pattern contains any instances of \C, otherwise 0. The
+ Return 1 if the pattern contains any instances of \C, otherwise 0. The
third argument should point to an uint32_t variable.
PCRE2_INFO_HASCRORLF
- Return 1 if the pattern contains any explicit matches for CR or LF
+ Return 1 if the pattern contains any explicit matches for CR or LF
characters, otherwise 0. The third argument should point to an uint32_t
- variable. An explicit match is either a literal CR or LF character, or
- \r or \n or one of the equivalent hexadecimal or octal escape
+ variable. An explicit match is either a literal CR or LF character, or
+ \r or \n or one of the equivalent hexadecimal or octal escape
sequences.
PCRE2_INFO_HEAPLIMIT
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu-
- ment should point to an unsigned 32-bit integer. If no such value has
- been set, the call to pcre2_pattern_info() returns the error
- PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
- ing if it is less than the limit set or defaulted by the caller of the
- match function.
+ ment should point to a uint32_t integer. If no such value has been set,
+ the call to pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET.
+ Note that this limit will only be used during matching if it is less
+ than the limit set or defaulted by the caller of the match function.
PCRE2_INFO_JCHANGED
@@ -2100,8 +2159,8 @@ INFORMATION ABOUT A COMPILED PATTERN
If the pattern set a match limit by including an item of the form
(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third
- argument should point to an unsigned 32-bit integer. If no such value
- has been set, the call to pcre2_pattern_info() returns the error
+ argument should point to a uint32_t integer. If no such value has been
+ set, the call to pcre2_pattern_info() returns the error
PCRE2_ERROR_UNSET. Note that this limit will only be used during match-
ing if it is less than the limit set or defaulted by the caller of the
match function.
@@ -2109,15 +2168,15 @@ INFORMATION ABOUT A COMPILED PATTERN
PCRE2_INFO_MAXLOOKBEHIND
Return the number of characters (not code units) in the longest lookbe-
- hind assertion in the pattern. The third argument should point to an
- unsigned 32-bit integer. This information is useful when doing multi-
- segment matching using the partial matching facilities. Note that the
- simple assertions \b and \B require a one-character lookbehind. \A also
- registers a one-character lookbehind, though it does not actually
- inspect the previous character. This is to ensure that at least one
- character from the old segment is retained when a new segment is pro-
- cessed. Otherwise, if there are no lookbehinds in the pattern, \A might
- match incorrectly at the start of a second or subsequent segment.
+ hind assertion in the pattern. The third argument should point to a
+ uint32_t integer. This information is useful when doing multi-segment
+ matching using the partial matching facilities. Note that the simple
+ assertions \b and \B require a one-character lookbehind. \A also regis-
+ ters a one-character lookbehind, though it does not actually inspect
+ the previous character. This is to ensure that at least one character
+ from the old segment is retained when a new segment is processed. Oth-
+ erwise, if there are no lookbehinds in the pattern, \A might match
+ incorrectly at the start of a second or subsequent segment.
PCRE2_INFO_MINLENGTH
@@ -2239,9 +2298,16 @@ INFORMATION ABOUT A PATTERN'S CALLOUTS
SERIALIZATION AND PRECOMPILING
It is possible to save compiled patterns on disc or elsewhere, and
- reload them later, subject to a number of restrictions. The functions
- whose names begin with pcre2_serialize_ are used for this purpose. They
- are described in the pcre2serialize documentation.
+ reload them later, subject to a number of restrictions. The host on
+ which the patterns are reloaded must be running the same version of
+ PCRE2, with the same code unit width, and must also have the same endi-
+ anness, pointer width, and PCRE2_SIZE type. Before compiled patterns
+ can be saved, they must be converted to a "serialized" form, which in
+ the case of PCRE2 is really just a bytecode dump. The functions whose
+ names begin with pcre2_serialize_ are used for converting to and from
+ the serialized form. They are described in the pcre2serialize documen-
+ tation. Note that PCRE2 serialization does not convert compiled pat-
+ terns to an abstract format like Java or .NET serialization.
THE MATCH DATA BLOCK
@@ -2254,57 +2320,58 @@ THE MATCH DATA BLOCK
void pcre2_match_data_free(pcre2_match_data *match_data);
- Information about a successful or unsuccessful match is placed in a
- match data block, which is an opaque structure that is accessed by
- function calls. In particular, the match data block contains a vector
- of offsets into the subject string that define the matched part of the
- subject and any substrings that were captured. This is known as the
+ Information about a successful or unsuccessful match is placed in a
+ match data block, which is an opaque structure that is accessed by
+ function calls. In particular, the match data block contains a vector
+ of offsets into the subject string that define the matched part of the
+ subject and any substrings that were captured. This is known as the
ovector.
- Before calling pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match()
+ Before calling pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match()
you must create a match data block by calling one of the creation func-
- tions above. For pcre2_match_data_create(), the first argument is the
- number of pairs of offsets in the ovector. One pair of offsets is
+ tions above. For pcre2_match_data_create(), the first argument is the
+ number of pairs of offsets in the ovector. One pair of offsets is
required to identify the string that matched the whole pattern, with an
- additional pair for each captured substring. For example, a value of 4
- creates enough space to record the matched portion of the subject plus
- three captured substrings. A minimum of at least 1 pair is imposed by
+ additional pair for each captured substring. For example, a value of 4
+ creates enough space to record the matched portion of the subject plus
+ three captured substrings. A minimum of at least 1 pair is imposed by
pcre2_match_data_create(), so it is always possible to return the over-
all matched string.
The second argument of pcre2_match_data_create() is a pointer to a gen-
- eral context, which can specify custom memory management for obtaining
+ eral context, which can specify custom memory management for obtaining
the memory for the match data block. If you are not using custom memory
management, pass NULL, which causes malloc() to be used.
- For pcre2_match_data_create_from_pattern(), the first argument is a
+ For pcre2_match_data_create_from_pattern(), the first argument is a
pointer to a compiled pattern. The ovector is created to be exactly the
right size to hold all the substrings a pattern might capture. The sec-
- ond argument is again a pointer to a general context, but in this case
+ ond argument is again a pointer to a general context, but in this case
if NULL is passed, the memory is obtained using the same allocator that
was used for the compiled pattern (custom or default).
- A match data block can be used many times, with the same or different
- compiled patterns. You can extract information from a match data block
+ A match data block can be used many times, with the same or different
+ compiled patterns. You can extract information from a match data block
after a match operation has finished, using functions that are
- described in the sections on matched strings and other match data
+ described in the sections on matched strings and other match data
below.
- When a call of pcre2_match() fails, valid data is available in the
- match block only when the error is PCRE2_ERROR_NOMATCH,
- PCRE2_ERROR_PARTIAL, or one of the error codes for an invalid UTF
+ When a call of pcre2_match() fails, valid data is available in the
+ match block only when the error is PCRE2_ERROR_NOMATCH,
+ PCRE2_ERROR_PARTIAL, or one of the error codes for an invalid UTF
string. Exactly what is available depends on the error, and is detailed
below.
- When one of the matching functions is called, pointers to the compiled
- pattern and the subject string are set in the match data block so that
- they can be referenced by the extraction functions. After running a
- match, you must not free a compiled pattern or a subject string until
- after all operations on the match data block (for that match) have
+ When one of the matching functions is called, pointers to the compiled
+ pattern and the subject string are set in the match data block so that
+ they can be referenced by the extraction functions. After running a
+ match, you must not free a compiled pattern or a subject string until
+ after all operations on the match data block (for that match) have
taken place.
- When a match data block itself is no longer needed, it should be freed
- by calling pcre2_match_data_free().
+ When a match data block itself is no longer needed, it should be freed
+ by calling pcre2_match_data_free(). If this function is called with a
+ NULL argument, it returns immediately, without doing anything.
MATCHING A PATTERN: THE TRADITIONAL FUNCTION
@@ -2358,7 +2425,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
set must point to the start of a character, or to the end of the sub-
ject (in UTF-32 mode, one code unit equals one character, so all off-
sets are valid). Like the pattern string, the subject may contain
- binary zeroes.
+ binary zeros.
A non-zero starting offset is useful when searching for another match
in the same subject by calling pcre2_match() again after a previous
@@ -2498,7 +2565,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
second and subsequent calls to pcre2_match() if you are making repeated
calls to find other matches in the same subject string.
- WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
+ Warning: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
invalid string as a subject, or an invalid value of startoffset, is
undefined. Your program may crash or loop indefinitely.
@@ -2644,7 +2711,8 @@ HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
in the pattern are never changed. That is, if a pattern contains n cap-
turing parentheses, no more than ovector[0] to ovector[2n+1] are set by
pcre2_match(). The other elements retain whatever values they previ-
- ously had.
+ ously had. After a failed match attempt, the contents of the ovector
+ are unchanged.
OTHER INFORMATION ABOUT A MATCH
@@ -2653,36 +2721,45 @@ OTHER INFORMATION ABOUT A MATCH
PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data);
- As well as the offsets in the ovector, other information about a match
- is retained in the match data block and can be retrieved by the above
- functions in appropriate circumstances. If they are called at other
+ As well as the offsets in the ovector, other information about a match
+ is retained in the match data block and can be retrieved by the above
+ functions in appropriate circumstances. If they are called at other
times, the result is undefined.
- After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a
+ After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a
failure to match (PCRE2_ERROR_NOMATCH), a (*MARK), (*PRUNE), or (*THEN)
- name may be available. The function pcre2_get_mark() can be called to
- access this name. The same function applies to all three verbs. It
+ name may be available. The function pcre2_get_mark() can be called to
+ access this name. The same function applies to all three verbs. It
returns a pointer to the zero-terminated name, which is within the com-
piled pattern. If no name is available, NULL is returned. The length of
- the name (excluding the terminating zero) is stored in the code unit
- that precedes the name. You should use this length instead of relying
+ the name (excluding the terminating zero) is stored in the code unit
+ that precedes the name. You should use this length instead of relying
on the terminating zero if the name might contain a binary zero.
- After a successful match, the name that is returned is the last
- (*MARK), (*PRUNE), or (*THEN) name encountered on the matching path
- through the pattern. Instances of (*PRUNE) and (*THEN) without names
- are ignored. Thus, for example, if the matching path contains
- (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a
- partial match, the last encountered name is returned. For example,
+ After a successful match, the name that is returned is the last
+ (*MARK), (*PRUNE), or (*THEN) name encountered on the matching path
+ through the pattern. Instances of (*PRUNE) and (*THEN) without names
+ are ignored. Thus, for example, if the matching path contains
+ (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a
+ partial match, the last encountered name is returned. For example,
consider this pattern:
^(*MARK:A)((*MARK:B)a|b)c
- When it matches "bc", the returned name is A. The B mark is "seen" in
- the first branch of the group, but it is not on the matching path. On
- the other hand, when this pattern fails to match "bx", the returned
+ When it matches "bc", the returned name is A. The B mark is "seen" in
+ the first branch of the group, but it is not on the matching path. On
+ the other hand, when this pattern fails to match "bx", the returned
name is B.
+ Warning: By default, certain start-of-match optimizations are used to
+ give a fast "no match" result in some situations. For example, if the
+ anchoring is removed from the pattern above, there is an initial check
+ for the presence of "c" in the subject before running the matching
+ engine. This check fails for "bx", causing a match failure without see-
+ ing any marks. You can disable the start-of-match optimizations by set-
+ ting the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or starting
+ the pattern with (*NO_START_OPT).
+
After a successful match, a partial match, or one of the invalid UTF
errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can
be called. After a successful or partial match it returns the code unit
@@ -3014,15 +3091,23 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
replacement string, whose length is supplied in rlength. This can be
given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
which a \K item in a lookahead in the pattern causes the match to end
- before it starts are not supported, and give rise to an error return.
+ before it starts are not supported, and give rise to an error return.
+ For global replacements, matches in which \K in a lookbehind causes the
+ match to start earlier than the point that was reached in the previous
+ iteration are also not supported.
- The first seven arguments of pcre2_substitute() are the same as for
+ The first seven arguments of pcre2_substitute() are the same as for
pcre2_match(), except that the partial matching options are not permit-
- ted, and match_data may be passed as NULL, in which case a match data
- block is obtained and freed within this function, using memory manage-
- ment functions from the match context, if provided, or else those that
+ ted, and match_data may be passed as NULL, in which case a match data
+ block is obtained and freed within this function, using memory manage-
+ ment functions from the match context, if provided, or else those that
were used to allocate memory for the compiled code.
+ If an external match_data block is provided, its contents afterwards
+ are those set by the final call to pcre2_match(), which will have ended
+ in a matching error. The contents of the ovector within the match data
+ block may or may not have been changed.
+
The outlengthptr argument must point to a variable that contains the
length, in code units, of the output buffer. If the function is suc-
cessful, the value is updated to contain the length of the new string,
@@ -3209,11 +3294,12 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REP-
MISSINGBRACE (closing curly bracket not found), PCRE2_ERROR_BADSUBSTI-
TUTION (syntax error in extended group substitution), and
- PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started,
+ PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started
+ or the match started earlier than the current position in the subject,
which can happen if \K is used in an assertion).
As for all PCRE2 errors, a text message that describes the error can be
- obtained by calling the pcre2_get_error_message() function (see
+ obtained by calling the pcre2_get_error_message() function (see
"Obtaining a textual error message" above).
@@ -3222,56 +3308,56 @@ DUPLICATE SUBPATTERN NAMES
int pcre2_substring_nametable_scan(const pcre2_code *code,
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
- When a pattern is compiled with the PCRE2_DUPNAMES option, names for
- subpatterns are not required to be unique. Duplicate names are always
- allowed for subpatterns with the same number, created by using the (?|
- feature. Indeed, if such subpatterns are named, they are required to
+ When a pattern is compiled with the PCRE2_DUPNAMES option, names for
+ subpatterns are not required to be unique. Duplicate names are always
+ allowed for subpatterns with the same number, created by using the (?|
+ feature. Indeed, if such subpatterns are named, they are required to
use the same names.
Normally, patterns with duplicate names are such that in any one match,
- only one of the named subpatterns participates. An example is shown in
+ only one of the named subpatterns participates. An example is shown in
the pcre2pattern documentation.
- When duplicates are present, pcre2_substring_copy_byname() and
- pcre2_substring_get_byname() return the first substring corresponding
- to the given name that is set. Only if none are set is
- PCRE2_ERROR_UNSET is returned. The pcre2_substring_number_from_name()
+ When duplicates are present, pcre2_substring_copy_byname() and
+ pcre2_substring_get_byname() return the first substring corresponding
+ to the given name that is set. Only if none are set is
+ PCRE2_ERROR_UNSET is returned. The pcre2_substring_number_from_name()
function returns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are
duplicate names.
- If you want to get full details of all captured substrings for a given
- name, you must use the pcre2_substring_nametable_scan() function. The
- first argument is the compiled pattern, and the second is the name. If
- the third and fourth arguments are NULL, the function returns a group
+ If you want to get full details of all captured substrings for a given
+ name, you must use the pcre2_substring_nametable_scan() function. The
+ first argument is the compiled pattern, and the second is the name. If
+ the third and fourth arguments are NULL, the function returns a group
number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise.
When the third and fourth arguments are not NULL, they must be pointers
- to variables that are updated by the function. After it has run, they
+ to variables that are updated by the function. After it has run, they
point to the first and last entries in the name-to-number table for the
- given name, and the function returns the length of each entry in code
- units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are
+ given name, and the function returns the length of each entry in code
+ units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are
no entries for the given name.
The format of the name table is described above in the section entitled
- Information about a pattern. Given all the relevant entries for the
- name, you can extract each of their numbers, and hence the captured
+ Information about a pattern. Given all the relevant entries for the
+ name, you can extract each of their numbers, and hence the captured
data.
FINDING ALL POSSIBLE MATCHES AT ONE POSITION
- The traditional matching function uses a similar algorithm to Perl,
- which stops when it finds the first match at a given point in the sub-
+ The traditional matching function uses a similar algorithm to Perl,
+ which stops when it finds the first match at a given point in the sub-
ject. If you want to find all possible matches, or the longest possible
- match at a given position, consider using the alternative matching
- function (see below) instead. If you cannot use the alternative func-
+ match at a given position, consider using the alternative matching
+ function (see below) instead. If you cannot use the alternative func-
tion, you can kludge it up by making use of the callout facility, which
is described in the pcre2callout documentation.
What you have to do is to insert a callout right at the end of the pat-
- tern. When your callout function is called, extract and save the cur-
- rent matched substring. Then return 1, which forces pcre2_match() to
- backtrack and try other alternatives. Ultimately, when it runs out of
+ tern. When your callout function is called, extract and save the cur-
+ rent matched substring. Then return 1, which forces pcre2_match() to
+ backtrack and try other alternatives. Ultimately, when it runs out of
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
@@ -3283,26 +3369,26 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
pcre2_match_context *mcontext,
int *workspace, PCRE2_SIZE wscount);
- The function pcre2_dfa_match() is called to match a subject string
- against a compiled pattern, using a matching algorithm that scans the
+ The function pcre2_dfa_match() is called to match a subject string
+ against a compiled pattern, using a matching algorithm that scans the
subject string just once (not counting lookaround assertions), and does
- not backtrack. This has different characteristics to the normal algo-
- rithm, and is not compatible with Perl. Some of the features of PCRE2
- patterns are not supported. Nevertheless, there are times when this
- kind of matching can be useful. For a discussion of the two matching
+ not backtrack. This has different characteristics to the normal algo-
+ rithm, and is not compatible with Perl. Some of the features of PCRE2
+ patterns are not supported. Nevertheless, there are times when this
+ kind of matching can be useful. For a discussion of the two matching
algorithms, and a list of features that pcre2_dfa_match() does not sup-
port, see the pcre2matching documentation.
- The arguments for the pcre2_dfa_match() function are the same as for
+ The arguments for the pcre2_dfa_match() function are the same as for
pcre2_match(), plus two extras. The ovector within the match data block
is used in a different way, and this is described below. The other com-
- mon arguments are used in the same way as for pcre2_match(), so their
+ mon arguments are used in the same way as for pcre2_match(), so their
description is not repeated here.
- The two additional arguments provide workspace for the function. The
- workspace vector should contain at least 20 elements. It is used for
+ The two additional arguments provide workspace for the function. The
+ workspace vector should contain at least 20 elements. It is used for
keeping track of multiple paths through the pattern tree. More
- workspace is needed for patterns and subjects where there are a lot of
+ workspace is needed for patterns and subjects where there are a lot of
potential matches.
Here is an example of a simple call to pcre2_dfa_match():
@@ -3322,45 +3408,45 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
Option bits for pcre_dfa_match()
- The unused bits of the options argument for pcre2_dfa_match() must be
- zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDAN-
- CHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
+ The unused bits of the options argument for pcre2_dfa_match() must be
+ zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_ENDAN-
+ CHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD,
- PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but
- the last four of these are exactly the same as for pcre2_match(), so
+ PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but
+ the last four of these are exactly the same as for pcre2_match(), so
their description is not repeated here.
PCRE2_PARTIAL_HARD
PCRE2_PARTIAL_SOFT
- These have the same general effect as they do for pcre2_match(), but
- the details are slightly different. When PCRE2_PARTIAL_HARD is set for
- pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
+ These have the same general effect as they do for pcre2_match(), but
+ the details are slightly different. When PCRE2_PARTIAL_HARD is set for
+ pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
subject is reached and there is still at least one matching possibility
that requires additional characters. This happens even if some complete
- matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
- return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
- if the end of the subject is reached, there have been no complete
+ matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
+ return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
+ if the end of the subject is reached, there have been no complete
matches, but there is still at least one matching possibility. The por-
- tion of the string that was inspected when the longest partial match
+ tion of the string that was inspected when the longest partial match
was found is set as the first matching string in both cases. There is a
- more detailed discussion of partial and multi-segment matching, with
+ more detailed discussion of partial and multi-segment matching, with
examples, in the pcre2partial documentation.
PCRE2_DFA_SHORTEST
- Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
+ Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
stop as soon as it has found one match. Because of the way the alterna-
- tive algorithm works, this is necessarily the shortest possible match
+ tive algorithm works, this is necessarily the shortest possible match
at the first possible matching point in the subject string.
PCRE2_DFA_RESTART
- When pcre2_dfa_match() returns a partial match, it is possible to call
+ When pcre2_dfa_match() returns a partial match, it is possible to call
it again, with additional subject characters, and have it continue with
the same match. The PCRE2_DFA_RESTART option requests this action; when
- it is set, the workspace and wscount options must reference the same
- vector as before because data about the match so far is left in them
+ it is set, the workspace and wscount options must reference the same
+ vector as before because data about the match so far is left in them
after a partial match. There is more discussion of this facility in the
pcre2partial documentation.
@@ -3368,8 +3454,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
string in the subject. Note, however, that all the matches from one run
- of the function start at the same point in the subject. The shorter
- matches are all initial substrings of the longer matches. For example,
+ of the function start at the same point in the subject. The shorter
+ matches are all initial substrings of the longer matches. For example,
if the pattern
<.*>
@@ -3384,28 +3470,17 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
<something> <something else>
<something>
- On success, the yield of the function is a number greater than zero,
- which is the number of matched substrings. The offsets of the sub-
- strings are returned in the ovector, and can be extracted by number in
- the same way as for pcre2_match(), but the numbers bear no relation to
- any capturing groups that may exist in the pattern, because DFA match-
+ On success, the yield of the function is a number greater than zero,
+ which is the number of matched substrings. The offsets of the sub-
+ strings are returned in the ovector, and can be extracted by number in
+ the same way as for pcre2_match(), but the numbers bear no relation to
+ any capturing groups that may exist in the pattern, because DFA match-
ing does not support group capture.
- Calls to the convenience functions that extract substrings by name
- return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used
+ Calls to the convenience functions that extract substrings by name
+ return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used
after a DFA match. The convenience functions that extract substrings by
- number never return PCRE2_ERROR_NOSUBSTRING, and the meanings of some
- other errors are slightly different:
-
- PCRE2_ERROR_UNAVAILABLE
-
- The ovector is not big enough to include a slot for the given substring
- number.
-
- PCRE2_ERROR_UNSET
-
- There is a slot in the ovector for this substring, but there were
- insufficient matches to fill it.
+ number never return PCRE2_ERROR_NOSUBSTRING.
The matched strings are stored in the ovector in reverse order of
length; that is, the longest matching string is first. If there were
@@ -3431,13 +3506,13 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
This return is given if pcre2_dfa_match() encounters an item in the
pattern that it does not support, for instance, the use of \C in a UTF
- mode or a back reference.
+ mode or a backreference.
PCRE2_ERROR_DFA_UCOND
This return is given if pcre2_dfa_match() encounters a condition item
- that uses a back reference for the condition, or a test for recursion
- in a specific group. These are not supported.
+ that uses a backreference for the condition, or a test for recursion in
+ a specific group. These are not supported.
PCRE2_ERROR_DFA_WSSIZE
@@ -3474,8 +3549,8 @@ AUTHOR
REVISION
- Last updated: 31 December 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 07 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -3526,18 +3601,19 @@ PCRE2 BUILD-TIME OPTIONS
ure works, --enable and --disable always come in pairs, so the comple-
mentary option always exists as well, but as it specifies the default,
it is not described. Options that specify values have names that start
- with --with.
+ with --with. At the end of a configure run, a summary of the configura-
+ tion is output.
BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES
- By default, a library called libpcre2-8 is built, containing functions
- that take string arguments contained in arrays of bytes, interpreted
- either as single-byte characters, or UTF-8 strings. You can also build
- two other libraries, called libpcre2-16 and libpcre2-32, which process
- strings that are contained in arrays of 16-bit and 32-bit code units,
+ By default, a library called libpcre2-8 is built, containing functions
+ that take string arguments contained in arrays of bytes, interpreted
+ either as single-byte characters, or UTF-8 strings. You can also build
+ two other libraries, called libpcre2-16 and libpcre2-32, which process
+ strings that are contained in arrays of 16-bit and 32-bit code units,
respectively. These can be interpreted either as single-unit characters
- or UTF-16/UTF-32 strings. To build these additional libraries, add one
+ or UTF-16/UTF-32 strings. To build these additional libraries, add one
or both of the following to the configure command:
--enable-pcre2-16
@@ -3547,16 +3623,16 @@ BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES
--disable-pcre2-8
- as well. At least one of the three libraries must be built. Note that
- the POSIX wrapper is for the 8-bit library only, and that pcre2grep is
- an 8-bit program. Neither of these are built if you select only the
+ as well. At least one of the three libraries must be built. Note that
+ the POSIX wrapper is for the 8-bit library only, and that pcre2grep is
+ an 8-bit program. Neither of these are built if you select only the
16-bit or 32-bit libraries.
BUILDING SHARED AND STATIC LIBRARIES
- The Autotools PCRE2 building process uses libtool to build both shared
- and static libraries by default. You can suppress an unwanted library
+ The Autotools PCRE2 building process uses libtool to build both shared
+ and static libraries by default. You can suppress an unwanted library
by adding one of
--disable-shared
@@ -3567,40 +3643,40 @@ BUILDING SHARED AND STATIC LIBRARIES
UNICODE AND UTF SUPPORT
- By default, PCRE2 is built with support for Unicode and UTF character
+ By default, PCRE2 is built with support for Unicode and UTF character
strings. To build it without Unicode support, add
--disable-unicode
- to the configure command. This setting applies to all three libraries.
- It is not possible to build one library with Unicode support, and
+ to the configure command. This setting applies to all three libraries.
+ It is not possible to build one library with Unicode support, and
another without, in the same configuration.
- Of itself, Unicode support does not make PCRE2 treat strings as UTF-8,
+ Of itself, Unicode support does not make PCRE2 treat strings as UTF-8,
UTF-16 or UTF-32. To do that, applications that use the library can set
- the PCRE2_UTF option when they call pcre2_compile() to compile a pat-
- tern. Alternatively, patterns may be started with (*UTF) unless the
+ the PCRE2_UTF option when they call pcre2_compile() to compile a pat-
+ tern. Alternatively, patterns may be started with (*UTF) unless the
application has locked this out by setting PCRE2_NEVER_UTF.
UTF support allows the libraries to process character code points up to
- 0x10ffff in the strings that they handle. Unicode support also gives
- access to the Unicode properties of characters, using pattern escapes
+ 0x10ffff in the strings that they handle. Unicode support also gives
+ access to the Unicode properties of characters, using pattern escapes
such as \P, \p, and \X. Only the general category properties such as Lu
- and Nd are supported. Details are given in the pcre2pattern documenta-
+ and Nd are supported. Details are given in the pcre2pattern documenta-
tion.
Pattern escapes such as \d and \w do not by default make use of Unicode
- properties. The application can request that they do by setting the
- PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a
+ properties. The application can request that they do by setting the
+ PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a
pattern may also request this by starting with (*UCP).
DISABLING THE USE OF \C
The \C escape sequence, which matches a single code unit, even in a UTF
- mode, can cause unpredictable behaviour because it may leave the cur-
- rent matching point in the middle of a multi-code-unit character. The
- application can lock it out by setting the PCRE2_NEVER_BACKSLASH_C
+ mode, can cause unpredictable behaviour because it may leave the cur-
+ rent matching point in the middle of a multi-code-unit character. The
+ application can lock it out by setting the PCRE2_NEVER_BACKSLASH_C
option when calling pcre2_compile(). There is also a build-time option
--enable-never-backslash-C
@@ -3610,14 +3686,21 @@ DISABLING THE USE OF \C
JUST-IN-TIME COMPILER SUPPORT
- Just-in-time (JIT) compiler support is included in the build by speci-
+ Just-in-time (JIT) compiler support is included in the build by speci-
fying
--enable-jit
- This support is available only for certain hardware architectures. If
- this option is set for an unsupported architecture, a building error
- occurs. If you are running under SELinux you may also want to add
+ This support is available only for certain hardware architectures. If
+ this option is set for an unsupported architecture, a building error
+ occurs. If in doubt, use
+
+ --enable-jit=auto
+
+ which enables JIT only if the current hardware is supported. You can
+ check if JIT is enabled in the configuration summary that is output at
+ the end of a configure run. If you are enabling JIT under SELinux you
+ may also want to add
--enable-jit-sealloc
@@ -3666,8 +3749,8 @@ NEWLINE RECOGNITION
--enable-newline-is-nul
- which causes NUL (binary zero) is set as the default line-ending char-
- acter.
+ which causes NUL (binary zero) to be set as the default line-ending
+ character.
Whatever default line ending convention is selected when PCRE2 is built
can be overridden by applications that use the library. At build time
@@ -3693,11 +3776,11 @@ HANDLING VERY LARGE PATTERNS
part to another (for example, from an opening parenthesis to an alter-
nation metacharacter). By default, in the 8-bit and 16-bit libraries,
two-byte values are used for these offsets, leading to a maximum size
- for a compiled pattern of around 64K code units. This is sufficient to
- handle all but the most gigantic patterns. Nevertheless, some people do
- want to process truly enormous patterns, so it is possible to compile
- PCRE2 to use three-byte or four-byte offsets by adding a setting such
- as
+ for a compiled pattern of around 64 thousand code units. This is suffi-
+ cient to handle all but the most gigantic patterns. Nevertheless, some
+ people do want to process truly enormous patterns, so it is possible to
+ compile PCRE2 to use three-byte or four-byte offsets by adding a set-
+ ting such as
--with-link-size=3
@@ -3724,40 +3807,41 @@ LIMITING PCRE2 RESOURCE USAGE
pcre2_dfa_match() matching function, and to JIT matching (though the
counting is done differently).
- The pcre2_match() function starts out using a 20K vector on the system
- stack to record backtracking points. The more nested backtracking
+ The pcre2_match() function starts out using a 20KiB vector on the sys-
+ tem stack to record backtracking points. The more nested backtracking
points there are (that is, the deeper the search tree), the more memory
is needed. If the initial vector is not large enough, heap memory is
- used, up to a certain limit, which is specified in kilobytes. The limit
- can be changed at run time, as described in the pcre2api documentation.
- The default limit (in effect unlimited) is 20 million. You can change
- this by a setting such as
+ used, up to a certain limit, which is specified in kibibytes (units of
+ 1024 bytes). The limit can be changed at run time, as described in the
+ pcre2api documentation. The default limit (in effect unlimited) is 20
+ million. You can change this by a setting such as
--with-heap-limit=500
- which limits the amount of heap to 500 kilobytes. This limit applies
- only to interpretive matching in pcre2_match(). It does not apply when
- JIT (which has its own memory arrangements) is used, nor does it apply
- to pcre2_dfa_match().
+ which limits the amount of heap to 500 KiB. This limit applies only to
+ interpretive matching in pcre2_match() and pcre2_dfa_match(), which may
+ also use the heap for internal workspace when processing complicated
+ patterns. This limit does not apply when JIT (which has its own memory
+ arrangements) is used.
- You can also explicitly limit the depth of nested backtracking in the
+ You can also explicitly limit the depth of nested backtracking in the
pcre2_match() interpreter. This limit defaults to the value that is set
- for --with-match-limit. You can set a lower default limit by adding,
+ for --with-match-limit. You can set a lower default limit by adding,
for example,
--with-match-limit_depth=10000
- to the configure command. This value can be overridden at run time.
- This depth limit indirectly limits the amount of heap memory that is
- used, but because the size of each backtracking "frame" depends on the
- number of capturing parentheses in a pattern, the amount of heap that
- is used before the limit is reached varies from pattern to pattern.
- This limit was more useful in versions before 10.30, where function
+ to the configure command. This value can be overridden at run time.
+ This depth limit indirectly limits the amount of heap memory that is
+ used, but because the size of each backtracking "frame" depends on the
+ number of capturing parentheses in a pattern, the amount of heap that
+ is used before the limit is reached varies from pattern to pattern.
+ This limit was more useful in versions before 10.30, where function
recursion was used for backtracking.
As well as applying to pcre2_match(), the depth limit also controls the
- depth of recursive function calls in pcre2_dfa_match(). These are used
- for lookaround assertions, atomic groups, and recursion within pat-
+ depth of recursive function calls in pcre2_dfa_match(). These are used
+ for lookaround assertions, atomic groups, and recursion within pat-
terns. The limit does not apply to JIT matching.
@@ -3765,45 +3849,45 @@ CREATING CHARACTER TABLES AT BUILD TIME
PCRE2 uses fixed tables for processing characters whose code points are
less than 256. By default, PCRE2 is built with a set of tables that are
- distributed in the file src/pcre2_chartables.c.dist. These tables are
+ distributed in the file src/pcre2_chartables.c.dist. These tables are
for ASCII codes only. If you add
--enable-rebuild-chartables
- to the configure command, the distributed tables are no longer used.
- Instead, a program called dftables is compiled and run. This outputs
+ to the configure command, the distributed tables are no longer used.
+ Instead, a program called dftables is compiled and run. This outputs
the source for new set of tables, created in the default locale of your
C run-time system. This method of replacing the tables does not work if
- you are cross compiling, because dftables is run on the local host. If
- you need to create alternative tables when cross compiling, you will
+ you are cross compiling, because dftables is run on the local host. If
+ you need to create alternative tables when cross compiling, you will
have to do so "by hand".
USING EBCDIC CODE
- PCRE2 assumes by default that it will run in an environment where the
- character code is ASCII or Unicode, which is a superset of ASCII. This
+ PCRE2 assumes by default that it will run in an environment where the
+ character code is ASCII or Unicode, which is a superset of ASCII. This
is the case for most computer operating systems. PCRE2 can, however, be
compiled to run in an 8-bit EBCDIC environment by adding
--enable-ebcdic --disable-unicode
to the configure command. This setting implies --enable-rebuild-charta-
- bles. You should only use it if you know that you are in an EBCDIC
+ bles. You should only use it if you know that you are in an EBCDIC
environment (for example, an IBM mainframe operating system).
- It is not possible to support both EBCDIC and UTF-8 codes in the same
- version of the library. Consequently, --enable-unicode and --enable-
+ It is not possible to support both EBCDIC and UTF-8 codes in the same
+ version of the library. Consequently, --enable-unicode and --enable-
ebcdic are mutually exclusive.
The EBCDIC character that corresponds to an ASCII LF is assumed to have
- the value 0x15 by default. However, in some EBCDIC environments, 0x25
+ the value 0x15 by default. However, in some EBCDIC environments, 0x25
is used. In such an environment you should use
--enable-ebcdic-nl25
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR
- has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and
+ has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and
0x25 is not chosen as LF is made to correspond to the Unicode NEL char-
acter (which, in Unicode, is 0x85).
@@ -3816,43 +3900,43 @@ PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
By default, on non-Windows systems, pcre2grep supports the use of call-
outs with string arguments within the patterns it is matching, in order
- to run external scripts. For details, see the pcre2grep documentation.
- This support can be disabled by adding --disable-pcre2grep-callout to
+ to run external scripts. For details, see the pcre2grep documentation.
+ This support can be disabled by adding --disable-pcre2grep-callout to
the configure command.
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
- By default, pcre2grep reads all files as plain text. You can build it
- so that it recognizes files whose names end in .gz or .bz2, and reads
+ By default, pcre2grep reads all files as plain text. You can build it
+ so that it recognizes files whose names end in .gz or .bz2, and reads
them with libz or libbz2, respectively, by adding one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
to the configure command. These options naturally require that the rel-
- evant libraries are installed on your system. Configuration will fail
+ evant libraries are installed on your system. Configuration will fail
if they are not.
PCRE2GREP BUFFER SIZE
- pcre2grep uses an internal buffer to hold a "window" on the file it is
+ pcre2grep uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when
- it finds a match. The starting size of the buffer is controlled by a
- parameter whose default value is 20K. The buffer itself is three times
- this size, but because of the way it is used for holding "before"
- lines, the longest line that is guaranteed to be processable is the
- parameter size. If a longer line is encountered, pcre2grep automati-
- cally expands the buffer, up to a specified maximum size, whose default
- is 1M or the starting size, whichever is the larger. You can change the
- default parameter values by adding, for example,
+ it finds a match. The default starting size of the buffer is 20KiB. The
+ buffer itself is three times this size, but because of the way it is
+ used for holding "before" lines, the longest line that is guaranteed to
+ be processable is the notional buffer size. If a longer line is encoun-
+ tered, pcre2grep automatically expands the buffer, up to a specified
+ maximum size, whose default is 1MiB or the starting size, whichever is
+ the larger. You can change the default parameter values by adding, for
+ example,
--with-pcre2grep-bufsize=51200
--with-pcre2grep-max-bufsize=2097152
- to the configure command. The caller of pcre2grep can override these
- values by using --buffer-size and --max-buffer-size on the command
+ to the configure command. The caller of pcre2grep can override these
+ values by using --buffer-size and --max-buffer-size on the command
line.
@@ -3863,26 +3947,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
--enable-pcre2test-libreadline
--enable-pcre2test-libedit
- to the configure command, pcre2test is linked with the libreadline
+ to the configure command, pcre2test is linked with the libreadline
orlibedit library, respectively, and when its input is from a terminal,
- it reads it using the readline() function. This provides line-editing
- and history facilities. Note that libreadline is GPL-licensed, so if
- you distribute a binary of pcre2test linked in this way, there may be
+ it reads it using the readline() function. This provides line-editing
+ and history facilities. Note that libreadline is GPL-licensed, so if
+ you distribute a binary of pcre2test linked in this way, there may be
licensing issues. These can be avoided by linking instead with libedit,
which has a BSD licence.
- Setting --enable-pcre2test-libreadline causes the -lreadline option to
- be added to the pcre2test build. In many operating environments with a
- sytem-installed readline library this is sufficient. However, in some
+ Setting --enable-pcre2test-libreadline causes the -lreadline option to
+ be added to the pcre2test build. In many operating environments with a
+ sytem-installed readline library this is sufficient. However, in some
environments (e.g. if an unmodified distribution version of readline is
- in use), some extra configuration may be necessary. The INSTALL file
+ in use), some extra configuration may be necessary. The INSTALL file
for libreadline says this:
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library."
- If your environment has not been set up so that an appropriate library
+ If your environment has not been set up so that an appropriate library
is automatically included, you may need to add something like
LIBS="-ncurses"
@@ -3896,7 +3980,7 @@ INCLUDING DEBUGGING CODE
--enable-debug
- to the configure command, additional debugging code is included in the
+ to the configure command, additional debugging code is included in the
build. This feature is intended for use by the PCRE2 maintainers.
@@ -3906,15 +3990,15 @@ DEBUGGING WITH VALGRIND SUPPORT
--enable-valgrind
- to the configure command, PCRE2 will use valgrind annotations to mark
- certain memory regions as unaddressable. This allows it to detect
- invalid memory accesses, and is mostly useful for debugging PCRE2
+ to the configure command, PCRE2 will use valgrind annotations to mark
+ certain memory regions as unaddressable. This allows it to detect
+ invalid memory accesses, and is mostly useful for debugging PCRE2
itself.
CODE COVERAGE REPORTING
- If your C compiler is gcc, you can build a version of PCRE2 that can
+ If your C compiler is gcc, you can build a version of PCRE2 that can
generate a code coverage report for its test suite. To enable this, you
must install lcov version 1.6 or above. Then specify
@@ -3923,20 +4007,20 @@ CODE COVERAGE REPORTING
to the configure command and build PCRE2 in the usual way.
Note that using ccache (a caching C compiler) is incompatible with code
- coverage reporting. If you have configured ccache to run automatically
+ coverage reporting. If you have configured ccache to run automatically
on your system, you must set the environment variable
CCACHE_DISABLE=1
before running make to build PCRE2, so that ccache is not used.
- When --enable-coverage is used, the following addition targets are
+ When --enable-coverage is used, the following addition targets are
added to the Makefile:
make coverage
- This creates a fresh coverage report for the PCRE2 test suite. It is
- equivalent to running "make coverage-reset", "make coverage-baseline",
+ This creates a fresh coverage report for the PCRE2 test suite. It is
+ equivalent to running "make coverage-reset", "make coverage-baseline",
"make check", and then "make coverage-report".
make coverage-reset
@@ -3953,56 +4037,56 @@ CODE COVERAGE REPORTING
make coverage-clean-report
- This removes the generated coverage report without cleaning the cover-
+ This removes the generated coverage report without cleaning the cover-
age data itself.
make coverage-clean-data
- This removes the captured coverage data without removing the coverage
+ This removes the captured coverage data without removing the coverage
files created at compile time (*.gcno).
make coverage-clean
- This cleans all coverage data including the generated coverage report.
- For more information about code coverage, see the gcov and lcov docu-
+ This cleans all coverage data including the generated coverage report.
+ For more information about code coverage, see the gcov and lcov docu-
mentation.
SUPPORT FOR FUZZERS
- There is a special option for use by people who want to run fuzzing
+ There is a special option for use by people who want to run fuzzing
tests on PCRE2:
--enable-fuzz-support
At present this applies only to the 8-bit library. If set, it causes an
- extra library called libpcre2-fuzzsupport.a to be built, but not
- installed. This contains a single function called LLVMFuzzerTestOneIn-
- put() whose arguments are a pointer to a string and the length of the
- string. When called, this function tries to compile the string as a
- pattern, and if that succeeds, to match it. This is done both with no
- options and with some random options bits that are generated from the
+ extra library called libpcre2-fuzzsupport.a to be built, but not
+ installed. This contains a single function called LLVMFuzzerTestOneIn-
+ put() whose arguments are a pointer to a string and the length of the
+ string. When called, this function tries to compile the string as a
+ pattern, and if that succeeds, to match it. This is done both with no
+ options and with some random options bits that are generated from the
string.
- Setting --enable-fuzz-support also causes a binary called pcre2fuz-
- zcheck to be created. This is normally run under valgrind or used when
+ Setting --enable-fuzz-support also causes a binary called pcre2fuz-
+ zcheck to be created. This is normally run under valgrind or used when
PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
- function and outputs information about it is doing. The input strings
- are specified by arguments: if an argument starts with "=" the rest of
- it is a literal input string. Otherwise, it is assumed to be a file
- name, and the contents of the file are the test string.
+ function and outputs information about what it is doing. The input
+ strings are specified by arguments: if an argument starts with "=" the
+ rest of it is a literal input string. Otherwise, it is assumed to be a
+ file name, and the contents of the file are the test string.
OBSOLETE OPTION
- In versions of PCRE2 prior to 10.30, there were two ways of handling
- backtracking in the pcre2_match() function. The default was to use the
+ In versions of PCRE2 prior to 10.30, there were two ways of handling
+ backtracking in the pcre2_match() function. The default was to use the
system stack, but if
--disable-stack-for-recursion
- was set, memory on the heap was used. From release 10.30 onwards this
- has changed (the stack is no longer used) and this option now does
+ was set, memory on the heap was used. From release 10.30 onwards this
+ has changed (the stack is no longer used) and this option now does
nothing except give a warning.
@@ -4020,8 +4104,8 @@ AUTHOR
REVISION
- Last updated: 18 July 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 26 April 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -4149,9 +4233,9 @@ MISSING CALLOUTS
all branches are anchorable.
This optimization is disabled, however, if .* is in an atomic group or
- if there is a back reference to the capturing group in which it
- appears. It is also disabled if the pattern contains (*PRUNE) or
- (*SKIP). However, the presence of callouts does not affect it.
+ if there is a backreference to the capturing group in which it appears.
+ It is also disabled if the pattern contains (*PRUNE) or (*SKIP). How-
+ ever, the presence of callouts does not affect it.
For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT
and applied to the string "aa", the pcre2test output is:
@@ -4301,10 +4385,12 @@ THE CALLOUT INTERFACE
their ovector slots set to PCRE2_UNSET.
For DFA matching, the offset_vector field points to the ovector that
- was passed to the matching function in the match data block, but it
- holds no useful information at callout time because pcre2_dfa_match()
- does not support substring capturing. The value of capture_top is
- always 1 and the value of capture_last is always 0 for DFA matching.
+ was passed to the matching function in the match data block for call-
+ outs at the top level, but to an internal ovector during the processing
+ of pattern recursions, lookarounds, and atomic groups. However, these
+ ovectors hold no useful information because pcre2_dfa_match() does not
+ support substring capturing. The value of capture_top is always 1 and
+ the value of capture_last is always 0 for DFA matching.
The subject and subject_length fields contain copies of the values that
were passed to the matching function.
@@ -4444,8 +4530,8 @@ AUTHOR
REVISION
- Last updated: 22 December 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 26 April 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -4469,7 +4555,7 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized asser-
tions, but they do not mean what you might think. For example, (?!a){3}
does not assert that the next three characters are not "a". It just
- asserts that the next character is not "a" three times (in principle:
+ asserts that the next character is not "a" three times (in principle;
PCRE2 optimizes this to run the assertion just once). Perl allows some
repeat quantifiers on other assertions, for example, \b* (but not
\b{3}), but these do not seem to have any use.
@@ -4479,157 +4565,164 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
when a negative assertion is a condition that has a matching branch
(that is, the condition is false).
- 4. The following Perl escape sequences are not supported: \l, \u, \L,
- \U, and \N when followed by a character name or Unicode value. (\N on
- its own, matching a non-newline character, is supported.) In fact these
- are implemented by Perl's general string-handling and are not part of
- its pattern matching engine. If any of these are encountered by PCRE2,
- an error is generated by default. However, if the PCRE2_ALT_BSUX option
- is set, \U and \u are interpreted as ECMAScript interprets them.
+ 4. The following Perl escape sequences are not supported: \F, \l, \L,
+ \u, \U, and \N when followed by a character name. \N on its own, match-
+ ing a non-newline character, and \N{U+dd..}, matching a Unicode code
+ point, are supported. The escapes that modify the case of following
+ letters are implemented by Perl's general string-handling and are not
+ part of its pattern matching engine. If any of these are encountered by
+ PCRE2, an error is generated by default. However, if the PCRE2_ALT_BSUX
+ option is set, \U and \u are interpreted as ECMAScript interprets them.
5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2
is built with Unicode support (the default). The properties that can be
- tested with \p and \P are limited to the general category properties
- such as Lu and Nd, script names such as Greek or Han, and the derived
+ tested with \p and \P are limited to the general category properties
+ such as Lu and Nd, script names such as Greek or Han, and the derived
properties Any and L&. PCRE2 does support the Cs (surrogate) property,
- which Perl does not; the Perl documentation says "Because Perl hides
+ which Perl does not; the Perl documentation says "Because Perl hides
the need for the user to understand the internal representation of Uni-
- code characters, there is no need to implement the somewhat messy con-
+ code characters, there is no need to implement the somewhat messy con-
cept of surrogates."
- 6. PCRE2 does support the \Q...\E escape for quoting substrings. Char-
- acters in between are treated as literals. This is slightly different
- from Perl in that $ and @ are also handled as literals inside the
+ 6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
+ in between are treated as literals. However, this is slightly different
+ from Perl in that $ and @ are also handled as literals inside the
quotes. In Perl, they cause variable interpolation (but of course PCRE2
- does not have variables). Note the following examples:
+ does not have variables). Also, Perl does "double-quotish backslash
+ interpolation" on any backslashes between \Q and \E which, its documen-
+ tation says, "may lead to confusing results". PCRE2 treats a backslash
+ between \Q and \E just like any other character. Note the following
+ examples:
- Pattern PCRE2 matches Perl matches
+ Pattern PCRE2 matches Perl matches
\Qabc$xyz\E abc$xyz abc followed by the
contents of $xyz
\Qabc\$xyz\E abc\$xyz abc\$xyz
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
+ \QA\B\E A\B A\B
+ \Q\\E \ \\E
- The \Q...\E sequence is recognized both inside and outside character
+ The \Q...\E sequence is recognized both inside and outside character
classes.
- 7. Fairly obviously, PCRE2 does not support the (?{code}) and
- (??{code}) constructions. However, there is support PCRE2's "callout"
- feature, which allows an external function to be called during pattern
- matching. See the pcre2callout documentation for details.
+ 7. Fairly obviously, PCRE2 does not support the (?{code}) and
+ (??{code}) constructions. However, PCRE2 does have a "callout" feature,
+ which allows an external function to be called during pattern matching.
+ See the pcre2callout documentation for details.
- 8. Subroutine calls (whether recursive or not) were treated as atomic
- groups up to PCRE2 release 10.23, but from release 10.30 this changed,
+ 8. Subroutine calls (whether recursive or not) were treated as atomic
+ groups up to PCRE2 release 10.23, but from release 10.30 this changed,
and backtracking into subroutine calls is now supported, as in Perl.
- 9. If any of the backtracking control verbs are used in a subpattern
- that is called as a subroutine (whether or not recursively), their
- effect is confined to that subpattern; it does not extend to the sur-
- rounding pattern. This is not always the case in Perl. In particular,
- if (*THEN) is present in a group that is called as a subroutine, its
+ 9. If any of the backtracking control verbs are used in a subpattern
+ that is called as a subroutine (whether or not recursively), their
+ effect is confined to that subpattern; it does not extend to the sur-
+ rounding pattern. This is not always the case in Perl. In particular,
+ if (*THEN) is present in a group that is called as a subroutine, its
action is limited to that group, even if the group does not contain any
- | characters. Note that such subpatterns are processed as anchored at
+ | characters. Note that such subpatterns are processed as anchored at
the point where they are tested.
- 10. If a pattern contains more than one backtracking control verb, the
- first one that is backtracked onto acts. For example, in the pattern
- A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure
+ 10. If a pattern contains more than one backtracking control verb, the
+ first one that is backtracked onto acts. For example, in the pattern
+ A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure
in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases
it is the same as PCRE2, but there are cases where it differs.
- 11. Most backtracking verbs in assertions have their normal actions.
+ 11. Most backtracking verbs in assertions have their normal actions.
They are not confined to the assertion.
- 12. There are some differences that are concerned with the settings of
- captured strings when part of a pattern is repeated. For example,
- matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2
+ 12. There are some differences that are concerned with the settings of
+ captured strings when part of a pattern is repeated. For example,
+ matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2
unset, but in PCRE2 it is set to "b".
13. PCRE2's handling of duplicate subpattern numbers and duplicate sub-
pattern names is not as general as Perl's. This is a consequence of the
- fact the PCRE2 works internally just with numbers, using an external
- table to translate between numbers and names. In particular, a pattern
- such as (?|(?<a>A)|(?<b>B), where the two capturing parentheses have
- the same number but different names, is not supported, and causes an
- error at compile time. If it were allowed, it would not be possible to
- distinguish which parentheses matched, because both names map to cap-
+ fact the PCRE2 works internally just with numbers, using an external
+ table to translate between numbers and names. In particular, a pattern
+ such as (?|(?<a>A)|(?<b>B), where the two capturing parentheses have
+ the same number but different names, is not supported, and causes an
+ error at compile time. If it were allowed, it would not be possible to
+ distinguish which parentheses matched, because both names map to cap-
turing subpattern number 1. To avoid this confusing situation, an error
is given at compile time.
14. Perl used to recognize comments in some places that PCRE2 does not,
- for example, between the ( and ? at the start of a subpattern. If the
+ for example, between the ( and ? at the start of a subpattern. If the
/x modifier is set, Perl allowed white space between ( and ? though the
- latest Perls give an error (for a while it was just deprecated). There
+ latest Perls give an error (for a while it was just deprecated). There
may still be some cases where Perl behaves differently.
- 15. Perl, when in warning mode, gives warnings for character classes
- such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter-
+ 15. Perl, when in warning mode, gives warnings for character classes
+ such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter-
als. PCRE2 has no warning features, so it gives an error in these cases
because they are almost certainly user mistakes.
- 16. In PCRE2, the upper/lower case character properties Lu and Ll are
- not affected when case-independent matching is specified. For example,
+ 16. In PCRE2, the upper/lower case character properties Lu and Ll are
+ not affected when case-independent matching is specified. For example,
\p{Lu} always matches an upper case letter. I think Perl has changed in
- this respect; in the release at the time of writing (5.24), \p{Lu} and
+ this respect; in the release at the time of writing (5.24), \p{Lu} and
\p{Ll} match all letters, regardless of case, when case independence is
specified.
- 17. PCRE2 provides some extensions to the Perl regular expression
- facilities. Perl 5.10 includes new features that are not in earlier
- versions of Perl, some of which (such as named parentheses) were in
+ 17. PCRE2 provides some extensions to the Perl regular expression
+ facilities. Perl 5.10 includes new features that are not in earlier
+ versions of Perl, some of which (such as named parentheses) were in
PCRE2 for some time before. This list is with respect to Perl 5.26:
- (a) Although lookbehind assertions in PCRE2 must match fixed length
- strings, each alternative branch of a lookbehind assertion can match a
- different length of string. Perl requires them all to have the same
+ (a) Although lookbehind assertions in PCRE2 must match fixed length
+ strings, each alternative branch of a lookbehind assertion can match a
+ different length of string. Perl requires them all to have the same
length.
- (b) From PCRE2 10.23, back references to groups of fixed length are
- supported in lookbehinds, provided that there is no possibility of ref-
- erencing a non-unique number or name. Perl does not support backrefer-
+ (b) From PCRE2 10.23, backreferences to groups of fixed length are sup-
+ ported in lookbehinds, provided that there is no possibility of refer-
+ encing a non-unique number or name. Perl does not support backrefer-
ences in lookbehinds.
- (c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the
+ (c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the
$ meta-character matches only at the very end of the string.
- (d) A backslash followed by a letter with no special meaning is
+ (d) A backslash followed by a letter with no special meaning is
faulted. (Perl can be made to issue a warning.)
- (e) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti-
+ (e) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti-
fiers is inverted, that is, by default they are not greedy, but if fol-
lowed by a question mark they are.
- (f) PCRE2_ANCHORED can be used at matching time to force a pattern to
+ (f) PCRE2_ANCHORED can be used at matching time to force a pattern to
be tried only at the first matching position in the subject string.
- (g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and
+ (g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and
PCRE2_NOTEMPTY_ATSTART options have no Perl equivalents.
- (h) The \R escape sequence can be restricted to match only CR, LF, or
+ (h) The \R escape sequence can be restricted to match only CR, LF, or
CRLF by the PCRE2_BSR_ANYCRLF option.
- (i) The callout facility is PCRE2-specific. Perl supports codeblocks
+ (i) The callout facility is PCRE2-specific. Perl supports codeblocks
and variable interpolation, but not general hooks on every match.
(j) The partial matching facility is PCRE2-specific.
- (k) The alternative matching function (pcre2_dfa_match() matches in a
+ (k) The alternative matching function (pcre2_dfa_match() matches in a
different way and is not Perl-compatible.
- (l) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT)
- at the start of a pattern that set overall options that cannot be
+ (l) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT)
+ at the start of a pattern that set overall options that cannot be
changed within the pattern.
- 18. The Perl /a modifier restricts /d numbers to pure ascii, and the
- /aa modifier restricts /i case-insensitive matching to pure ascii,
- ignoring Unicode rules. This separation cannot be represented with
+ 18. The Perl /a modifier restricts /d numbers to pure ascii, and the
+ /aa modifier restricts /i case-insensitive matching to pure ascii,
+ ignoring Unicode rules. This separation cannot be represented with
PCRE2_UCP.
19. Perl has different limits than PCRE2. See the pcre2limit documenta-
tion for details. Perl went with 5.10 from recursion to iteration keep-
ing the intermediate matches on the heap, which is ~10% slower but does
- not fall into any stack-overflow limit. PCRE2 made a similar change at
- release 10.30, and also has many build-time and run-time customizable
+ not fall into any stack-overflow limit. PCRE2 made a similar change at
+ release 10.30, and also has many build-time and run-time customizable
limits.
@@ -4642,8 +4735,8 @@ AUTHOR
REVISION
- Last updated: 18 April 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 28 July 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -4798,7 +4891,7 @@ RETURN VALUES FROM JIT MATCHING
CONTROLLING THE JIT STACK
When the compiled JIT code runs, it needs a block of memory to use as a
- stack. By default, it uses 32K on the machine stack. However, some
+ stack. By default, it uses 32KiB on the machine stack. However, some
large or complicated patterns need more than this. The error
PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack.
Three functions are provided for managing blocks of memory for use as
@@ -4810,9 +4903,11 @@ CONTROLLING THE JIT STACK
memory allocation functions, or NULL for standard memory allocation).
It returns a pointer to an opaque structure of type pcre2_jit_stack, or
NULL if there is an error. The pcre2_jit_stack_free() function is used
- to free a stack that is no longer needed. (For the technically minded:
- the address space is allocated by mmap or VirtualAlloc.) A maximum
- stack size of 512K to 1M should be more than enough for any pattern.
+ to free a stack that is no longer needed. If its argument is NULL, this
+ function returns immediately, without doing anything. (For the techni-
+ cally minded: the address space is allocated by mmap or VirtualAlloc.)
+ A maximum stack size of 512KiB to 1MiB should be more than enough for
+ any pattern.
The pcre2_jit_stack_assign() function specifies which stack JIT code
should use. Its arguments are as follows:
@@ -4823,10 +4918,11 @@ CONTROLLING THE JIT STACK
The first argument is a pointer to a match context. When this is subse-
quently passed to a matching function, its information determines which
- JIT stack is used. There are three cases for the values of the other
- two options:
+ JIT stack is used. If this argument is NULL, the function returns imme-
+ diately, without doing anything. There are three cases for the values
+ of the other two options:
- (1) If callback is NULL and data is NULL, an internal 32K block
+ (1) If callback is NULL and data is NULL, an internal 32KiB block
on the machine stack is used. This is the default when a match
context is created.
@@ -4837,38 +4933,38 @@ CONTROLLING THE JIT STACK
(3) If callback is not NULL, it must point to a function that is
called with data as an argument at the start of matching, in
order to set up a JIT stack. If the return from the callback
- function is NULL, the internal 32K stack is used; otherwise the
+ function is NULL, the internal 32KiB stack is used; otherwise the
return value must be a valid JIT stack, the result of calling
pcre2_jit_stack_create().
- A callback function is obeyed whenever JIT code is about to be run; it
+ A callback function is obeyed whenever JIT code is about to be run; it
is not obeyed when pcre2_match() is called with options that are incom-
- patible for JIT matching. A callback function can therefore be used to
- determine whether a match operation was executed by JIT or by the
+ patible for JIT matching. A callback function can therefore be used to
+ determine whether a match operation was executed by JIT or by the
interpreter.
You may safely use the same JIT stack for more than one pattern (either
- by assigning directly or by callback), as long as the patterns are
+ by assigning directly or by callback), as long as the patterns are
matched sequentially in the same thread. Currently, the only way to set
- up non-sequential matches in one thread is to use callouts: if a call-
- out function starts another match, that match must use a different JIT
+ up non-sequential matches in one thread is to use callouts: if a call-
+ out function starts another match, that match must use a different JIT
stack to the one used for currently suspended match(es).
- In a multithread application, if you do not specify a JIT stack, or if
- you assign or pass back NULL from a callback, that is thread-safe,
- because each thread has its own machine stack. However, if you assign
- or pass back a non-NULL JIT stack, this must be a different stack for
+ In a multithread application, if you do not specify a JIT stack, or if
+ you assign or pass back NULL from a callback, that is thread-safe,
+ because each thread has its own machine stack. However, if you assign
+ or pass back a non-NULL JIT stack, this must be a different stack for
each thread so that the application is thread-safe.
- Strictly speaking, even more is allowed. You can assign the same non-
- NULL stack to a match context that is used by any number of patterns,
- as long as they are not used for matching by multiple threads at the
- same time. For example, you could use the same stack in all compiled
- patterns, with a global mutex in the callback to wait until the stack
+ Strictly speaking, even more is allowed. You can assign the same non-
+ NULL stack to a match context that is used by any number of patterns,
+ as long as they are not used for matching by multiple threads at the
+ same time. For example, you could use the same stack in all compiled
+ patterns, with a global mutex in the callback to wait until the stack
is available for use. However, this is an inefficient solution, and not
recommended.
- This is a suggestion for how a multithreaded program that needs to set
+ This is a suggestion for how a multithreaded program that needs to set
up non-default JIT stacks might operate:
During thread initalization
@@ -4880,7 +4976,7 @@ CONTROLLING THE JIT STACK
Use a one-line callback function
return thread_local_var
- All the functions described in this section do nothing if JIT is not
+ All the functions described in this section do nothing if JIT is not
available.
@@ -4889,29 +4985,29 @@ JIT STACK FAQ
(1) Why do we need JIT stacks?
PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack
- where the local data of the current node is pushed before checking its
+ where the local data of the current node is pushed before checking its
child nodes. Allocating real machine stack on some platforms is diffi-
cult. For example, the stack chain needs to be updated every time if we
- extend the stack on PowerPC. Although it is possible, its updating
+ extend the stack on PowerPC. Although it is possible, its updating
time overhead decreases performance. So we do the recursion in memory.
(2) Why don't we simply allocate blocks of memory with malloc()?
- Modern operating systems have a nice feature: they can reserve an
+ Modern operating systems have a nice feature: they can reserve an
address space instead of allocating memory. We can safely allocate mem-
- ory pages inside this address space, so the stack could grow without
+ ory pages inside this address space, so the stack could grow without
moving memory data (this is important because of pointers). Thus we can
- allocate 1M address space, and use only a single memory page (usually
- 4K) if that is enough. However, we can still grow up to 1M anytime if
- needed.
+ allocate 1MiB address space, and use only a single memory page (usually
+ 4KiB) if that is enough. However, we can still grow up to 1MiB anytime
+ if needed.
(3) Who "owns" a JIT stack?
The owner of the stack is the user program, not the JIT studied pattern
or anything else. The user program must ensure that if a stack is being
used by pcre2_match(), (that is, it is assigned to a match context that
- is passed to the pattern currently running), that stack must not be
- used by any other threads (to avoid overwriting the same memory area).
+ is passed to the pattern currently running), that stack must not be
+ used by any other threads (to avoid overwriting the same memory area).
The best practice for multithreaded programs is to allocate a stack for
each thread, and return this stack through the JIT callback function.
@@ -4919,36 +5015,36 @@ JIT STACK FAQ
You can free a JIT stack at any time, as long as it will not be used by
pcre2_match() again. When you assign the stack to a match context, only
- a pointer is set. There is no reference counting or any other magic.
+ a pointer is set. There is no reference counting or any other magic.
You can free compiled patterns, contexts, and stacks in any order, any-
- time. Just do not call pcre2_match() with a match context pointing to
+ time. Just do not call pcre2_match() with a match context pointing to
an already freed stack, as that will cause SEGFAULT. (Also, do not free
- a stack currently used by pcre2_match() in another thread). You can
- also replace the stack in a context at any time when it is not in use.
+ a stack currently used by pcre2_match() in another thread). You can
+ also replace the stack in a context at any time when it is not in use.
You should free the previous stack before assigning a replacement.
- (5) Should I allocate/free a stack every time before/after calling
+ (5) Should I allocate/free a stack every time before/after calling
pcre2_match()?
- No, because this is too costly in terms of resources. However, you
- could implement some clever idea which release the stack if it is not
- used in let's say two minutes. The JIT callback can help to achieve
+ No, because this is too costly in terms of resources. However, you
+ could implement some clever idea which release the stack if it is not
+ used in let's say two minutes. The JIT callback can help to achieve
this without keeping a list of patterns.
- (6) OK, the stack is for long term memory allocation. But what happens
- if a pattern causes stack overflow with a stack of 1M? Is that 1M kept
- until the stack is freed?
+ (6) OK, the stack is for long term memory allocation. But what happens
+ if a pattern causes stack overflow with a stack of 1MiB? Is that 1MiB
+ kept until the stack is freed?
- Especially on embedded sytems, it might be a good idea to release mem-
- ory sometimes without freeing the stack. There is no API for this at
- the moment. Probably a function call which returns with the currently
- allocated memory for any stack and another which allows releasing mem-
+ Especially on embedded sytems, it might be a good idea to release mem-
+ ory sometimes without freeing the stack. There is no API for this at
+ the moment. Probably a function call which returns with the currently
+ allocated memory for any stack and another which allows releasing mem-
ory (shrinking the stack) would be a good idea if someone needs this.
(7) This is too much of a headache. Isn't there any better solution for
JIT stack handling?
- No, thanks to Windows. If POSIX threads were used everywhere, we could
+ No, thanks to Windows. If POSIX threads were used everywhere, we could
throw out this complicated API.
@@ -4957,18 +5053,18 @@ FREEING JIT SPECULATIVE MEMORY
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
The JIT executable allocator does not free all memory when it is possi-
- ble. It expects new allocations, and keeps some free memory around to
- improve allocation speed. However, in low memory conditions, it might
- be better to free all possible memory. You can cause this to happen by
- calling pcre2_jit_free_unused_memory(). Its argument is a general con-
+ ble. It expects new allocations, and keeps some free memory around to
+ improve allocation speed. However, in low memory conditions, it might
+ be better to free all possible memory. You can cause this to happen by
+ calling pcre2_jit_free_unused_memory(). Its argument is a general con-
text, for custom memory management, or NULL for standard memory manage-
ment.
EXAMPLE CODE
- This is a single-threaded example that specifies a JIT stack without
- using a callback. A real program should include error checking after
+ This is a single-threaded example that specifies a JIT stack without
+ using a callback. A real program should include error checking after
all the function calls.
int rc;
@@ -4996,29 +5092,29 @@ EXAMPLE CODE
JIT FAST PATH API
Because the API described above falls back to interpreted matching when
- JIT is not available, it is convenient for programs that are written
+ JIT is not available, it is convenient for programs that are written
for general use in many environments. However, calling JIT via
pcre2_match() does have a performance impact. Programs that are written
- for use where JIT is known to be available, and which need the best
- possible performance, can instead use a "fast path" API to call JIT
- matching directly instead of calling pcre2_match() (obviously only for
+ for use where JIT is known to be available, and which need the best
+ possible performance, can instead use a "fast path" API to call JIT
+ matching directly instead of calling pcre2_match() (obviously only for
patterns that have been successfully processed by pcre2_jit_compile()).
- The fast path function is called pcre2_jit_match(), and it takes
+ The fast path function is called pcre2_jit_match(), and it takes
exactly the same arguments as pcre2_match(). The return values are also
the same, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or
- complete) is requested that was not compiled. Unsupported option bits
- (for example, PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT
+ complete) is requested that was not compiled. Unsupported option bits
+ (for example, PCRE2_ANCHORED) are ignored, as is the PCRE2_NO_JIT
option.
- When you call pcre2_match(), as well as testing for invalid options, a
+ When you call pcre2_match(), as well as testing for invalid options, a
number of other sanity checks are performed on the arguments. For exam-
ple, if the subject pointer is NULL, an immediate error is given. Also,
- unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
- validity. In the interests of speed, these checks do not happen on the
+ unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
+ validity. In the interests of speed, these checks do not happen on the
JIT fast path, and if invalid data is passed, the result is undefined.
- Bypassing the sanity checks and the pcre2_match() wrapping can give
+ Bypassing the sanity checks and the pcre2_match() wrapping can give
speedups of more than 10%.
@@ -5036,8 +5132,8 @@ AUTHOR
REVISION
- Last updated: 31 March 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 28 June 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -5053,16 +5149,16 @@ SIZE AND OTHER LIMITATIONS
There are some size limitations in PCRE2 but it is hoped that they will
never in practice be relevant.
- The maximum size of a compiled pattern is approximately 64K code units
- for the 8-bit and 16-bit libraries if PCRE2 is compiled with the
- default internal linkage size, which is 2 bytes for these libraries. If
- you want to process regular expressions that are truly enormous, you
- can compile PCRE2 with an internal linkage size of 3 or 4 (when build-
- ing the 16-bit library, 3 is rounded up to 4). See the README file in
- the source distribution and the pcre2build documentation for details.
- In these cases the limit is substantially larger. However, the speed
- of execution is slower. In the 32-bit library, the internal linkage
- size is always 4.
+ The maximum size of a compiled pattern is approximately 64 thousand
+ code units for the 8-bit and 16-bit libraries if PCRE2 is compiled with
+ the default internal linkage size, which is 2 bytes for these
+ libraries. If you want to process regular expressions that are truly
+ enormous, you can compile PCRE2 with an internal linkage size of 3 or 4
+ (when building the 16-bit library, 3 is rounded up to 4). See the
+ README file in the source distribution and the pcre2build documentation
+ for details. In these cases the limit is substantially larger. How-
+ ever, the speed of execution is slower. In the 32-bit library, the
+ internal linkage size is always 4.
The maximum length of a source pattern string is essentially unlimited;
it is the largest number a PCRE2_SIZE variable can hold. However, the
@@ -5083,9 +5179,9 @@ SIZE AND OTHER LIMITATIONS
limit to the depth of nesting of parenthesized subpatterns of all
kinds. This is imposed in order to limit the amount of system stack
used at compile time. The default limit can be specified when PCRE2 is
- built; the default default is 250. An application can change this limit
- by calling pcre2_set_parens_nest_limit() to set the limit in a compile
- context.
+ built; if not, the default is set to 250. An application can change
+ this limit by calling pcre2_set_parens_nest_limit() to set the limit in
+ a compile context.
The maximum length of name for a named subpattern is 32 code units, and
the maximum number of named subpatterns is 10000.
@@ -5184,7 +5280,7 @@ THE STANDARD MATCHING ALGORITHM
Because it ends up with a single path through the tree, it is rela-
tively straightforward for this algorithm to keep track of the sub-
strings that are matched by portions of the pattern in parentheses.
- This provides support for capturing parentheses and back references.
+ This provides support for capturing parentheses and backreferences.
THE ALTERNATIVE MATCHING ALGORITHM
@@ -5257,7 +5353,7 @@ THE ALTERNATIVE MATCHING ALGORITHM
algorithm does not attempt to do this. This means that no captured sub-
strings are available.
- 3. Because no substrings are captured, back references within the pat-
+ 3. Because no substrings are captured, backreferences within the pat-
tern are not supported, and cause errors if encountered.
4. For the same reason, conditional expressions that use a backrefer-
@@ -5311,7 +5407,7 @@ DISADVANTAGES OF THE ALTERNATIVE ALGORITHM
partly because it has to search for all possible matches, but is also
because it is less susceptible to optimization.
- 2. Capturing parentheses and back references are not supported.
+ 2. Capturing parentheses and backreferences are not supported.
3. Although atomic groups are supported, their use does not provide the
performance advantage that it does for the standard algorithm.
@@ -5909,30 +6005,31 @@ SPECIAL START-OF-PATTERN ITEMS
pcre2_match() for it to have any effect. In other words, the pattern
writer can lower the limits set by the programmer, but not raise them.
If there is more than one setting of one of these limits, the lower
- value is used.
+ value is used. The heap limit is specified in kibibytes (units of 1024
+ bytes).
- Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This
+ Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This
name is still recognized for backwards compatibility.
- The heap limit applies only when the pcre2_match() interpreter is used
- for matching. It does not apply to JIT or DFA matching. The match limit
- is used (but in a different way) when JIT is being used, or when
+ The heap limit applies only when the pcre2_match() or pcre2_dfa_match()
+ interpreters are used for matching. It does not apply to JIT. The match
+ limit is used (but in a different way) when JIT is being used, or when
pcre2_dfa_match() is called, to limit computing resource usage by those
- matching functions. The depth limit is ignored by JIT but is relevant
- for DFA matching, which uses function recursion for recursions within
- the pattern. In this case, the depth limit controls the amount of sys-
- tem stack that is used.
+ matching functions. The depth limit is ignored by JIT but is relevant
+ for DFA matching, which uses function recursion for recursions within
+ the pattern and for lookaround assertions and atomic groups. In this
+ case, the depth limit controls the depth of such recursion.
Newline conventions
- PCRE2 supports six different conventions for indicating line breaks in
- strings: a single CR (carriage return) character, a single LF (line-
+ PCRE2 supports six different conventions for indicating line breaks in
+ strings: a single CR (carriage return) character, a single LF (line-
feed) character, the two-character sequence CRLF, any of the three pre-
- ceding, any Unicode newline sequence, or the NUL character (binary
- zero). The pcre2api page has further discussion about newlines, and
+ ceding, any Unicode newline sequence, or the NUL character (binary
+ zero). The pcre2api page has further discussion about newlines, and
shows how to set the newline convention when calling pcre2_compile().
- It is also possible to specify a newline convention by starting a pat-
+ It is also possible to specify a newline convention by starting a pat-
tern string with one of the following sequences:
(*CR) carriage return
@@ -5943,7 +6040,7 @@ SPECIAL START-OF-PATTERN ITEMS
(*NUL) the NUL character (binary zero)
These override the default and the options given to the compiling func-
- tion. For example, on a Unix system where LF is the default newline
+ tion. For example, on a Unix system where LF is the default newline
sequence, the pattern
(*CR)a.b
@@ -5952,14 +6049,15 @@ SPECIAL START-OF-PATTERN ITEMS
no longer a newline. If more than one of these settings is present, the
last one is used.
- The newline convention affects where the circumflex and dollar asser-
+ The newline convention affects where the circumflex and dollar asser-
tions are true. It also affects the interpretation of the dot metachar-
- acter when PCRE2_DOTALL is not set, and the behaviour of \N. However,
- it does not affect what the \R escape sequence matches. By default,
- this is any Unicode newline sequence, for Perl compatibility. However,
- this can be changed; see the next section and the description of \R in
- the section entitled "Newline sequences" below. A change of \R setting
- can be combined with a change of newline convention.
+ acter when PCRE2_DOTALL is not set, and the behaviour of \N when not
+ followed by an opening brace. However, it does not affect what the \R
+ escape sequence matches. By default, this is any Unicode newline
+ sequence, for Perl compatibility. However, this can be changed; see the
+ next section and the description of \R in the section entitled "Newline
+ sequences" below. A change of \R setting can be combined with a change
+ of newline convention.
Specifying what \R matches
@@ -6047,7 +6145,7 @@ BACKSLASH
In a UTF mode, only ASCII numbers and letters have any special meaning
after a backslash. All other characters (in particular, those whose
- codepoints are greater than 127) are treated as literals.
+ code points are greater than 127) are treated as literals.
If a pattern is compiled with the PCRE2_EXTENDED option, most white
space in the pattern (other than in a character class), and characters
@@ -6059,7 +6157,10 @@ BACKSLASH
ters, you can do so by putting them between \Q and \E. This is differ-
ent from Perl in that $ and @ are handled as literals in \Q...\E
sequences in PCRE2, whereas in Perl, $ and @ cause variable interpola-
- tion. Note the following examples:
+ tion. Also, Perl does "double-quotish backslash interpolation" on any
+ backslashes between \Q and \E which, its documentation says, "may lead
+ to confusing results". PCRE2 treats a backslash between \Q and \E just
+ like any other character. Note the following examples:
Pattern PCRE2 matches Perl matches
@@ -6067,37 +6168,47 @@ BACKSLASH
contents of $xyz
\Qabc\$xyz\E abc\$xyz abc\$xyz
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
-
- The \Q...\E sequence is recognized both inside and outside character
- classes. An isolated \E that is not preceded by \Q is ignored. If \Q
- is not followed by \E later in the pattern, the literal interpretation
- continues to the end of the pattern (that is, \E is assumed at the
- end). If the isolated \Q is inside a character class, this causes an
- error, because the character class is not terminated by a closing
+ \QA\B\E A\B A\B
+ \Q\\E \ \\E
+
+ The \Q...\E sequence is recognized both inside and outside character
+ classes. An isolated \E that is not preceded by \Q is ignored. If \Q
+ is not followed by \E later in the pattern, the literal interpretation
+ continues to the end of the pattern (that is, \E is assumed at the
+ end). If the isolated \Q is inside a character class, this causes an
+ error, because the character class is not terminated by a closing
square bracket.
Non-printing characters
A second use of backslash provides a way of encoding non-printing char-
- acters in patterns in a visible manner. There is no restriction on the
- appearance of non-printing characters in a pattern, but when a pattern
+ acters in patterns in a visible manner. There is no restriction on the
+ appearance of non-printing characters in a pattern, but when a pattern
is being prepared by text editing, it is often easier to use one of the
- following escape sequences than the binary character it represents. In
+ following escape sequences than the binary character it represents. In
an ASCII or Unicode environment, these escapes are as follows:
- \a alarm, that is, the BEL character (hex 07)
- \cx "control-x", where x is any printable ASCII character
- \e escape (hex 1B)
- \f form feed (hex 0C)
- \n linefeed (hex 0A)
- \r carriage return (hex 0D)
- \t tab (hex 09)
- \0dd character with octal code 0dd
- \ddd character with octal code ddd, or back reference
- \o{ddd..} character with octal code ddd..
- \xhh character with hex code hh
- \x{hhh..} character with hex code hhh.. (default mode)
- \uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
+ \a alarm, that is, the BEL character (hex 07)
+ \cx "control-x", where x is any printable ASCII character
+ \e escape (hex 1B)
+ \f form feed (hex 0C)
+ \n linefeed (hex 0A)
+ \r carriage return (hex 0D)
+ \t tab (hex 09)
+ \0dd character with octal code 0dd
+ \ddd character with octal code ddd, or backreference
+ \o{ddd..} character with octal code ddd..
+ \xhh character with hex code hh
+ \x{hhh..} character with hex code hhh..
+ \N{U+hhh..} character with Unicode hex code point hhh..
+ \uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
+
+ The \N{U+hhh..} escape sequence is recognized only when the PCRE2_UTF
+ option is set, that is, when PCRE2 is operating in a Unicode mode. Perl
+ also uses \N{name} to specify characters by Unicode name; PCRE2 does
+ not support this. Note that when \N is not followed by an opening
+ brace (curly bracket) it has an entirely different meaning, matching
+ any character that is not a newline.
The precise effect of \cx on ASCII characters is as follows: if x is a
lower case letter, it is converted to upper case. Then bit 6 of the
@@ -6106,15 +6217,15 @@ BACKSLASH
hex 7B (; is 3B). If the code unit following \c has a value less than
32 or greater than 126, a compile-time error occurs.
- When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t gen-
- erate the appropriate EBCDIC code values. The \c escape is processed as
- specified for Perl in the perlebcdic document. The only characters that
- are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?.
- Any other character provokes a compile-time error. The sequence \c@
- encodes character code 0; after \c the letters (in either case) encode
- characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters
- 27-31 (hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95
- (hex 5F).
+ When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported.
+ \a, \e, \f, \n, \r, and \t generate the appropriate EBCDIC code values.
+ The \c escape is processed as specified for Perl in the perlebcdic doc-
+ ument. The only characters that are allowed after \c are A-Z, a-z, or
+ one of @, [, \, ], ^, _, or ?. Any other character provokes a compile-
+ time error. The sequence \c@ encodes character code 0; after \c the
+ letters (in either case) encode characters 1-26 (hex 01 to hex 1A); [,
+ \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and \c?
+ becomes either 255 (hex FF) or 95 (hex 5F).
Thus, apart from \c?, these escapes generate the same character code
values as they do in an ASCII environment, though the meanings of the
@@ -6139,12 +6250,12 @@ BACKSLASH
in braces. An error occurs if this is not the case. This escape is a
recent addition to Perl; it provides way of specifying character code
points as octal numbers greater than 0777, and it also allows octal
- numbers and back references to be unambiguously specified.
+ numbers and backreferences to be unambiguously specified.
For greater clarity and unambiguity, it is best to avoid following \ by
- a digit greater than zero. Instead, use \o{} or \x{} to specify charac-
- ter numbers, and \g{} to specify back references. The following para-
- graphs describe the old, ambiguous syntax.
+ a digit greater than zero. Instead, use \o{} or \x{} to specify numeri-
+ cal character code points, and \g{} to specify backreferences. The fol-
+ lowing paragraphs describe the old, ambiguous syntax.
The handling of a backslash followed by a digit other than 0 is compli-
cated, and Perl has changed over time, causing PCRE2 also to change.
@@ -6153,7 +6264,7 @@ BACKSLASH
its as a decimal number. If the number is less than 10, begins with the
digit 8 or 9, or if there are at least that many previous capturing
left parentheses in the expression, the entire sequence is taken as a
- back reference. A description of how this works is given later, follow-
+ backreference. A description of how this works is given later, follow-
ing the discussion of parenthesized subpatterns. Otherwise, up to
three octal digits are read to form a character code.
@@ -6166,16 +6277,16 @@ BACKSLASH
\040 is another way of writing an ASCII space
\40 is the same, provided there are fewer than 40
previous capturing subpatterns
- \7 is always a back reference
- \11 might be a back reference, or another way of
+ \7 is always a backreference
+ \11 might be a backreference, or another way of
writing a tab
\011 is always a tab
\0113 is a tab followed by the character "3"
- \113 might be a back reference, otherwise the
+ \113 might be a backreference, otherwise the
character with octal code 113
- \377 might be a back reference, otherwise
+ \377 might be a backreference, otherwise
the value 255 (decimal)
- \81 is always a back reference
+ \81 is always a backreference
Note that octal values of 100 or greater that are specified using this
syntax must not be introduced by a leading zero, because no more than
@@ -6207,12 +6318,14 @@ BACKSLASH
8-bit non-UTF mode no greater than 0xff
16-bit non-UTF mode no greater than 0xffff
32-bit non-UTF mode no greater than 0xffffffff
- All UTF modes no greater than 0x10ffff and a valid codepoint
+ All UTF modes no greater than 0x10ffff and a valid code point
- Invalid Unicode codepoints are all those in the range 0xd800 to 0xdfff
- (the so-called "surrogate" codepoints). The check for these can be dis-
- abled by the caller of pcre2_compile() by setting the option
- PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
+ Invalid Unicode code points are all those in the range 0xd800 to 0xdfff
+ (the so-called "surrogate" code points). The check for these can be
+ disabled by the caller of pcre2_compile() by setting the option
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in
+ UTF-8 and UTF-32 modes, because these values are not representable in
+ UTF-16.
Escape sequences in character classes
@@ -6220,24 +6333,24 @@ BACKSLASH
inside and outside character classes. In addition, inside a character
class, \b is interpreted as the backspace character (hex 08).
- \N is not allowed in a character class. \B, \R, and \X are not special
- inside a character class. Like other unrecognized alphabetic escape
- sequences, they cause an error. Outside a character class, these
- sequences have different meanings.
+ When not followed by an opening brace, \N is not allowed in a character
+ class. \B, \R, and \X are not special inside a character class. Like
+ other unrecognized alphabetic escape sequences, they cause an error.
+ Outside a character class, these sequences have different meanings.
Unsupported escape sequences
- In Perl, the sequences \l, \L, \u, and \U are recognized by its string
- handler and used to modify the case of following characters. By
+ In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its
+ string handler and used to modify the case of following characters. By
default, PCRE2 does not support these escape sequences. However, if the
PCRE2_ALT_BSUX option is set, \U matches a "U" character, and \u can be
used to define a character by code point, as described above.
- Absolute and relative back references
+ Absolute and relative backreferences
The sequence \g followed by a signed or unsigned number, optionally
- enclosed in braces, is an absolute or relative back reference. A named
- back reference can be coded as \g{name}. Back references are discussed
+ enclosed in braces, is an absolute or relative backreference. A named
+ backreference can be coded as \g{name}. Backreferences are discussed
later, following the discussion of parenthesized subpatterns.
Absolute and relative subroutine calls
@@ -6246,8 +6359,8 @@ BACKSLASH
name or a number enclosed either in angle brackets or single quotes, is
an alternative syntax for referencing a subpattern as a "subroutine".
Details are discussed later. Note that \g{...} (Perl syntax) and
- \g<...> (Oniguruma syntax) are not synonymous. The former is a back
- reference; the latter is a subroutine call.
+ \g<...> (Oniguruma syntax) are not synonymous. The former is a backref-
+ erence; the latter is a subroutine call.
Generic character types
@@ -6257,6 +6370,7 @@ BACKSLASH
\D any character that is not a decimal digit
\h any horizontal white space character
\H any character that is not a horizontal white space character
+ \N any character that is not a newline
\s any white space character
\S any character that is not a white space character
\v any vertical white space character
@@ -6264,10 +6378,12 @@ BACKSLASH
\w any "word" character
\W any "non-word" character
- There is also the single sequence \N, which matches a non-newline char-
- acter. This is the same as the "." metacharacter when PCRE2_DOTALL is
- not set. Perl also uses \N to match characters by name; PCRE2 does not
- support this.
+ The \N escape sequence has the same meaning as the "." metacharacter
+ when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change
+ the meaning of \N. Note that when \N is followed by an opening brace it
+ has a different meaning. See the section entitled "Non-printing charac-
+ ters" above for details. Perl also uses \N{name} to specify characters
+ by Unicode name; PCRE2 does not support this.
Each pair of lower and upper case escape sequences partitions the com-
plete set of characters into two disjoint sets. Any given character
@@ -6365,7 +6481,7 @@ BACKSLASH
atomic group, the two-character sequence is treated as a single unit
that cannot be split.
- In other modes, two additional characters whose codepoints are greater
+ In other modes, two additional characters whose code points are greater
than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa-
rator, U+2029). Unicode support is not needed for these characters to
be recognized.
@@ -6399,8 +6515,8 @@ BACKSLASH
When PCRE2 is built with Unicode support (the default), three addi-
tional escape sequences that match characters with specific properties
are available. In 8-bit non-UTF-8 mode, these sequences are of course
- limited to testing characters whose codepoints are less than 256, but
- they do work in this mode. In 32-bit non-UTF mode, codepoints greater
+ limited to testing characters whose code points are less than 256, but
+ they do work in this mode. In 32-bit non-UTF mode, code points greater
than 0x10ffff (the Unicode limit) may be encountered. These are all
treated as being in the Common script and with an unassigned type. The
extra escape sequences are:
@@ -6430,34 +6546,35 @@ BACKSLASH
nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
- Cyrillic, Deseret, Devanagari, Duployan, Egyptian_Hieroglyphs, Elbasan,
- Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gur-
- mukhi, Han, Hangul, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Ara-
- maic, Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian,
- Javanese, Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Kho-
- jki, Khudawadi, Lao, Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu,
- Lycian, Lydian, Mahajani, Malayalam, Mandaic, Manichaean, Marchen,
- Masaram_Gondi, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive,
- Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar,
- Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar-
- ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian,
- Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya, Pahawh_Hmong,
- Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician, Psalter_Pahlavi, Rejang,
- Runic, Samaritan, Saurashtra, Sharada, Shavian, Siddham, SignWriting,
- Sinhala, Sora_Sompeng, Soyombo, Sundanese, Syloti_Nagri, Syriac, Taga-
- log, Tagbanwa, Tai_Le, Tai_Tham, Tai_Viet, Takri, Tamil, Tangut, Tel-
- ugu, Thaana, Thai, Tibetan, Tifinagh, Tirhuta, Ugaritic, Vai,
- Warang_Citi, Yi, Zanabazar_Square.
+ Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
+ Elbasan, Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek,
+ Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
+ Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited,
+ Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan-
+ nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao,
+ Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha-
+ jani, Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi,
+ Medefaidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive,
+ Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar,
+ Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar-
+ ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
+ dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya,
+ Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
+ Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
+ vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
+ Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
+ Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
+ nagh, Tirhuta, Ugaritic, Vai, Warang_Citi, Yi, Zanabazar_Square.
Each character has exactly one Unicode general category property, spec-
- ified by a two-letter abbreviation. For compatibility with Perl, nega-
- tion can be specified by including a circumflex between the opening
- brace and the property name. For example, \p{^Lu} is the same as
+ ified by a two-letter abbreviation. For compatibility with Perl, nega-
+ tion can be specified by including a circumflex between the opening
+ brace and the property name. For example, \p{^Lu} is the same as
\P{Lu}.
If only one letter is specified with \p or \P, it includes all the gen-
- eral category properties that start with that letter. In this case, in
- the absence of negation, the curly brackets in the escape sequence are
+ eral category properties that start with that letter. In this case, in
+ the absence of negation, the curly brackets in the escape sequence are
optional; these two examples have the same effect:
\p{L}
@@ -6509,44 +6626,47 @@ BACKSLASH
Zp Paragraph separator
Zs Space separator
- The special property L& is also supported: it matches a character that
- has the Lu, Ll, or Lt property, in other words, a letter that is not
+ The special property L& is also supported: it matches a character that
+ has the Lu, Ll, or Lt property, in other words, a letter that is not
classified as a modifier or "other".
- The Cs (Surrogate) property applies only to characters in the range
- U+D800 to U+DFFF. Such characters are not valid in Unicode strings and
- so cannot be tested by PCRE2, unless UTF validity checking has been
- turned off (see the discussion of PCRE2_NO_UTF_CHECK in the pcre2api
+ The Cs (Surrogate) property applies only to characters in the range
+ U+D800 to U+DFFF. Such characters are not valid in Unicode strings and
+ so cannot be tested by PCRE2, unless UTF validity checking has been
+ turned off (see the discussion of PCRE2_NO_UTF_CHECK in the pcre2api
page). Perl does not support the Cs property.
- The long synonyms for property names that Perl supports (such as
- \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix
+ The long synonyms for property names that Perl supports (such as
+ \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix
any of these properties with "Is".
No character that is in the Unicode table has the Cn (unassigned) prop-
erty. Instead, this property is assumed for any code point that is not
in the Unicode table.
- Specifying caseless matching does not affect these escape sequences.
- For example, \p{Lu} always matches only upper case letters. This is
+ Specifying caseless matching does not affect these escape sequences.
+ For example, \p{Lu} always matches only upper case letters. This is
different from the behaviour of current versions of Perl.
- Matching characters by Unicode property is not fast, because PCRE2 has
- to do a multistage table lookup in order to find a character's prop-
+ Matching characters by Unicode property is not fast, because PCRE2 has
+ to do a multistage table lookup in order to find a character's prop-
erty. That is why the traditional escape sequences such as \d and \w do
- not use Unicode properties in PCRE2 by default, though you can make
- them do so by setting the PCRE2_UCP option or by starting the pattern
+ not use Unicode properties in PCRE2 by default, though you can make
+ them do so by setting the PCRE2_UCP option or by starting the pattern
with (*UCP).
Extended grapheme clusters
- The \X escape matches any number of Unicode characters that form an
+ The \X escape matches any number of Unicode characters that form an
"extended grapheme cluster", and treats the sequence as an atomic group
- (see below). Unicode supports various kinds of composite character by
- giving each character a grapheme breaking property, and having rules
+ (see below). Unicode supports various kinds of composite character by
+ giving each character a grapheme breaking property, and having rules
that use these properties to define the boundaries of extended grapheme
- clusters. The rules are defined in Unicode Standard Annex 29, "Unicode
- Text Segmentation".
+ clusters. The rules are defined in Unicode Standard Annex 29, "Unicode
+ Text Segmentation". Unicode 11.0.0 abandoned the use of some previous
+ properties that had been used for emojis. Instead it introduced vari-
+ ous emoji-specific properties. PCRE2 uses only the Extended Picto-
+ graphic property.
\X always matches at least one character. Then it decides whether to
add additional characters according to the following rules for ending a
@@ -6564,23 +6684,21 @@ BACKSLASH
only by a T character.
4. Do not end before extending characters or spacing marks or the
- "zero-width joiner" characters. Characters with the "mark" property
+ "zero-width joiner" character. Characters with the "mark" property
always have the "extend" grapheme breaking property.
5. Do not end after prepend characters.
- 6. Do not break within emoji modifier sequences (a base character fol-
- lowed by a modifier). Extending characters are allowed before the modi-
- fier.
-
- 7. Do not break within emoji zwj sequences (zero-width jointer followed
- by "glue after ZWJ" or "base glue after ZWJ").
+ 6. Do not break within emoji modifier sequences or emoji zwj sequences.
+ That is, do not break between characters with the Extended_Pictographic
+ property. Extend and ZWJ characters are allowed between the charac-
+ ters.
- 8. Do not break within emoji flag sequences. That is, do not break
+ 7. Do not break within emoji flag sequences. That is, do not break
between regional indicator (RI) characters if there are an odd number
of RI characters before the break point.
- 6. Otherwise, end the cluster.
+ 8. Otherwise, end the cluster.
PCRE2's additional properties
@@ -6614,27 +6732,43 @@ BACKSLASH
Resetting the match start
- The escape sequence \K causes any previously matched characters not to
- be included in the final matched sequence. For example, the pattern:
+ In normal use, the escape sequence \K causes any previously matched
+ characters not to be included in the final matched sequence that is
+ returned. For example, the pattern:
foo\Kbar
- matches "foobar", but reports that it has matched "bar". This feature
- is similar to a lookbehind assertion (described below). However, in
- this case, the part of the subject before the real match does not have
- to be of fixed length, as lookbehind assertions do. The use of \K does
- not interfere with the setting of captured substrings. For example,
- when the pattern
+ matches "foobar", but reports that it has matched "bar". \K does not
+ interact with anchoring in any way. The pattern:
+
+ ^foo\Kbar
+
+ matches only when the subject begins with "foobar" (in single line
+ mode), though it again reports the matched string as "bar". This fea-
+ ture is similar to a lookbehind assertion (described below). However,
+ in this case, the part of the subject before the real match does not
+ have to be of fixed length, as lookbehind assertions do. The use of \K
+ does not interfere with the setting of captured substrings. For exam-
+ ple, when the pattern
(foo)\Kbar
matches "foobar", the first substring is still set to "foo".
- Perl documents that the use of \K within assertions is "not well
- defined". In PCRE2, \K is acted upon when it occurs inside positive
- assertions, but is ignored in negative assertions. Note that when a
- pattern such as (?=ab\K) matches, the reported start of the match can
- be greater than the end of the match.
+ Perl documents that the use of \K within assertions is "not well
+ defined". In PCRE2, \K is acted upon when it occurs inside positive
+ assertions, but is ignored in negative assertions. Note that when a
+ pattern such as (?=ab\K) matches, the reported start of the match can
+ be greater than the end of the match. Using \K in a lookbehind asser-
+ tion at the start of a pattern can also lead to odd effects. For exam-
+ ple, consider this pattern:
+
+ (?<=\Kfoo)bar
+
+ If the subject is "foobar", a call to pcre2_match() with a starting
+ offset of 3 succeeds and reports the matching string as "foobar", that
+ is, the start of the reported match is earlier than where the match
+ started.
Simple assertions
@@ -6679,118 +6813,124 @@ BACKSLASH
as well as at the very end, whereas \z matches only at the end.
The \G assertion is true only when the current matching position is at
- the start point of the match, as specified by the startoffset argument
- of pcre2_match(). It differs from \A when the value of startoffset is
- non-zero. By calling pcre2_match() multiple times with appropriate
- arguments, you can mimic Perl's /g option, and it is in this kind of
- implementation where \G can be useful.
-
- Note, however, that PCRE2's interpretation of \G, as the start of the
- current match, is subtly different from Perl's, which defines it as the
- end of the previous match. In Perl, these can be different when the
- previously matched string was empty. Because PCRE2 does just one match
- at a time, it cannot reproduce this behaviour.
-
- If all the alternatives of a pattern begin with \G, the expression is
+ the start point of the matching process, as specified by the startoff-
+ set argument of pcre2_match(). It differs from \A when the value of
+ startoffset is non-zero. By calling pcre2_match() multiple times with
+ appropriate arguments, you can mimic Perl's /g option, and it is in
+ this kind of implementation where \G can be useful.
+
+ Note, however, that PCRE2's implementation of \G, being true at the
+ starting character of the matching process, is subtly different from
+ Perl's, which defines it as true at the end of the previous match. In
+ Perl, these can be different when the previously matched string was
+ empty. Because PCRE2 does just one match at a time, it cannot reproduce
+ this behaviour.
+
+ If all the alternatives of a pattern begin with \G, the expression is
anchored to the starting match position, and the "anchored" flag is set
in the compiled regular expression.
CIRCUMFLEX AND DOLLAR
- The circumflex and dollar metacharacters are zero-width assertions.
- That is, they test for a particular condition being true without con-
+ The circumflex and dollar metacharacters are zero-width assertions.
+ That is, they test for a particular condition being true without con-
suming any characters from the subject string. These two metacharacters
- are concerned with matching the starts and ends of lines. If the new-
- line convention is set so that only the two-character sequence CRLF is
- recognized as a newline, isolated CR and LF characters are treated as
+ are concerned with matching the starts and ends of lines. If the new-
+ line convention is set so that only the two-character sequence CRLF is
+ recognized as a newline, isolated CR and LF characters are treated as
ordinary data characters, and are not recognized as newlines.
Outside a character class, in the default matching mode, the circumflex
- character is an assertion that is true only if the current matching
- point is at the start of the subject string. If the startoffset argu-
- ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum-
- flex can never match if the PCRE2_MULTILINE option is unset. Inside a
- character class, circumflex has an entirely different meaning (see
+ character is an assertion that is true only if the current matching
+ point is at the start of the subject string. If the startoffset argu-
+ ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum-
+ flex can never match if the PCRE2_MULTILINE option is unset. Inside a
+ character class, circumflex has an entirely different meaning (see
below).
- Circumflex need not be the first character of the pattern if a number
- of alternatives are involved, but it should be the first thing in each
- alternative in which it appears if the pattern is ever to match that
- branch. If all possible alternatives start with a circumflex, that is,
- if the pattern is constrained to match only at the start of the sub-
- ject, it is said to be an "anchored" pattern. (There are also other
+ Circumflex need not be the first character of the pattern if a number
+ of alternatives are involved, but it should be the first thing in each
+ alternative in which it appears if the pattern is ever to match that
+ branch. If all possible alternatives start with a circumflex, that is,
+ if the pattern is constrained to match only at the start of the sub-
+ ject, it is said to be an "anchored" pattern. (There are also other
constructs that can cause a pattern to be anchored.)
- The dollar character is an assertion that is true only if the current
- matching point is at the end of the subject string, or immediately
- before a newline at the end of the string (by default), unless
+ The dollar character is an assertion that is true only if the current
+ matching point is at the end of the subject string, or immediately
+ before a newline at the end of the string (by default), unless
PCRE2_NOTEOL is set. Note, however, that it does not actually match the
newline. Dollar need not be the last character of the pattern if a num-
ber of alternatives are involved, but it should be the last item in any
- branch in which it appears. Dollar has no special meaning in a charac-
+ branch in which it appears. Dollar has no special meaning in a charac-
ter class.
- The meaning of dollar can be changed so that it matches only at the
- very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at
+ The meaning of dollar can be changed so that it matches only at the
+ very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at
compile time. This does not affect the \Z assertion.
The meanings of the circumflex and dollar metacharacters are changed if
- the PCRE2_MULTILINE option is set. When this is the case, a dollar
- character matches before any newlines in the string, as well as at the
- very end, and a circumflex matches immediately after internal newlines
- as well as at the start of the subject string. It does not match after
- a newline that ends the string, for compatibility with Perl. However,
+ the PCRE2_MULTILINE option is set. When this is the case, a dollar
+ character matches before any newlines in the string, as well as at the
+ very end, and a circumflex matches immediately after internal newlines
+ as well as at the start of the subject string. It does not match after
+ a newline that ends the string, for compatibility with Perl. However,
this can be changed by setting the PCRE2_ALT_CIRCUMFLEX option.
- For example, the pattern /^abc$/ matches the subject string "def\nabc"
- (where \n represents a newline) in multiline mode, but not otherwise.
- Consequently, patterns that are anchored in single line mode because
- all branches start with ^ are not anchored in multiline mode, and a
- match for circumflex is possible when the startoffset argument of
- pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored
+ For example, the pattern /^abc$/ matches the subject string "def\nabc"
+ (where \n represents a newline) in multiline mode, but not otherwise.
+ Consequently, patterns that are anchored in single line mode because
+ all branches start with ^ are not anchored in multiline mode, and a
+ match for circumflex is possible when the startoffset argument of
+ pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored
if PCRE2_MULTILINE is set.
- When the newline convention (see "Newline conventions" below) recog-
- nizes the two-character sequence CRLF as a newline, this is preferred,
- even if the single characters CR and LF are also recognized as new-
- lines. For example, if the newline convention is "any", a multiline
- mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather
- than after CR, even though CR on its own is a valid newline. (It also
+ When the newline convention (see "Newline conventions" below) recog-
+ nizes the two-character sequence CRLF as a newline, this is preferred,
+ even if the single characters CR and LF are also recognized as new-
+ lines. For example, if the newline convention is "any", a multiline
+ mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather
+ than after CR, even though CR on its own is a valid newline. (It also
matches at the very start of the string, of course.)
- Note that the sequences \A, \Z, and \z can be used to match the start
- and end of the subject in both modes, and if all branches of a pattern
- start with \A it is always anchored, whether or not PCRE2_MULTILINE is
+ Note that the sequences \A, \Z, and \z can be used to match the start
+ and end of the subject in both modes, and if all branches of a pattern
+ start with \A it is always anchored, whether or not PCRE2_MULTILINE is
set.
FULL STOP (PERIOD, DOT) AND \N
Outside a character class, a dot in the pattern matches any one charac-
- ter in the subject string except (by default) a character that signi-
+ ter in the subject string except (by default) a character that signi-
fies the end of a line.
- When a line ending is defined as a single character, dot never matches
- that character; when the two-character sequence CRLF is used, dot does
- not match CR if it is immediately followed by LF, but otherwise it
- matches all characters (including isolated CRs and LFs). When any Uni-
- code line endings are being recognized, dot does not match CR or LF or
+ When a line ending is defined as a single character, dot never matches
+ that character; when the two-character sequence CRLF is used, dot does
+ not match CR if it is immediately followed by LF, but otherwise it
+ matches all characters (including isolated CRs and LFs). When any Uni-
+ code line endings are being recognized, dot does not match CR or LF or
any of the other line ending characters.
- The behaviour of dot with regard to newlines can be changed. If the
- PCRE2_DOTALL option is set, a dot matches any one character, without
- exception. If the two-character sequence CRLF is present in the sub-
+ The behaviour of dot with regard to newlines can be changed. If the
+ PCRE2_DOTALL option is set, a dot matches any one character, without
+ exception. If the two-character sequence CRLF is present in the sub-
ject string, it takes two dots to match it.
- The handling of dot is entirely independent of the handling of circum-
- flex and dollar, the only relationship being that they both involve
+ The handling of dot is entirely independent of the handling of circum-
+ flex and dollar, the only relationship being that they both involve
newlines. Dot has no special meaning in a character class.
- The escape sequence \N behaves like a dot, except that it is not
- affected by the PCRE2_DOTALL option. In other words, it matches any
- character except one that signifies the end of a line. Perl also uses
- \N to match characters by name; PCRE2 does not support this.
+ The escape sequence \N when not followed by an opening brace behaves
+ like a dot, except that it is not affected by the PCRE2_DOTALL option.
+ In other words, it matches any character except one that signifies the
+ end of a line.
+
+ When \N is followed by an opening brace it has a different meaning. See
+ the section entitled "Non-printing characters" above for details. Perl
+ also uses \N{name} to specify characters by Unicode name; PCRE2 does
+ not support this.
MATCHING A SINGLE CODE UNIT
@@ -6871,10 +7011,12 @@ SQUARE BRACKETS AND CHARACTER CLASSES
sumes a character from the subject string, and therefore it fails if
the current pointer is at the end of the string.
- When caseless matching is set, any letters in a class represent both
- their upper case and lower case versions, so for example, a caseless
- [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not
- match "A", whereas a caseful version would.
+ Characters in a class may be specified by their code points using \o,
+ \x, or \N{U+hh..} in the usual way. When caseless matching is set, any
+ letters in a class represent both their upper case and lower case ver-
+ sions, so for example, a caseless [aeiou] matches "A" as well as "a",
+ and a caseless [^aeiou] does not match "A", whereas a caseful version
+ would.
Characters that might indicate line breaks are never treated in any
special way when matching character classes, whatever line-ending
@@ -6882,79 +7024,80 @@ SQUARE BRACKETS AND CHARACTER CLASSES
PCRE2_MULTILINE options is used. A class such as [^a] always matches
one of these characters.
- The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v, \V,
- \w, and \W may appear in a character class, and add the characters that
- they match to the class. For example, [\dABCDEF] matches any hexadeci-
- mal digit. In UTF modes, the PCRE2_UCP option affects the meanings of
- \d, \s, \w and their upper case partners, just as it does when they
- appear outside a character class, as described in the section entitled
- "Generic character types" above. The escape sequence \b has a different
- meaning inside a character class; it matches the backspace character.
- The sequences \B, \N, \R, and \X are not special inside a character
- class. Like any other unrecognized escape sequences, they cause an
- error.
-
- The minus (hyphen) character can be used to specify a range of charac-
- ters in a character class. For example, [d-m] matches any letter
- between d and m, inclusive. If a minus character is required in a
- class, it must be escaped with a backslash or appear in a position
- where it cannot be interpreted as indicating a range, typically as the
+ The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s,
+ \S, \v, \V, \w, and \W may appear in a character class, and add the
+ characters that they match to the class. For example, [\dABCDEF]
+ matches any hexadecimal digit. In UTF modes, the PCRE2_UCP option
+ affects the meanings of \d, \s, \w and their upper case partners, just
+ as it does when they appear outside a character class, as described in
+ the section entitled "Generic character types" above. The escape
+ sequence \b has a different meaning inside a character class; it
+ matches the backspace character. The sequences \B, \R, and \X are not
+ special inside a character class. Like any other unrecognized escape
+ sequences, they cause an error. The same is true for \N when not fol-
+ lowed by an opening brace.
+
+ The minus (hyphen) character can be used to specify a range of charac-
+ ters in a character class. For example, [d-m] matches any letter
+ between d and m, inclusive. If a minus character is required in a
+ class, it must be escaped with a backslash or appear in a position
+ where it cannot be interpreted as indicating a range, typically as the
first or last character in the class, or immediately after a range. For
- example, [b-d-z] matches letters in the range b to d, a hyphen charac-
+ example, [b-d-z] matches letters in the range b to d, a hyphen charac-
ter, or z.
Perl treats a hyphen as a literal if it appears before or after a POSIX
class (see below) or before or after a character type escape such as as
- \d or \H. However, unless the hyphen is the last character in the
- class, Perl outputs a warning in its warning mode, as this is most
- likely a user error. As PCRE2 has no facility for warning, an error is
+ \d or \H. However, unless the hyphen is the last character in the
+ class, Perl outputs a warning in its warning mode, as this is most
+ likely a user error. As PCRE2 has no facility for warning, an error is
given in these cases.
It is not possible to have the literal character "]" as the end charac-
- ter of a range. A pattern such as [W-]46] is interpreted as a class of
- two characters ("W" and "-") followed by a literal string "46]", so it
- would match "W46]" or "-46]". However, if the "]" is escaped with a
- backslash it is interpreted as the end of range, so [W-\]46] is inter-
- preted as a class containing a range followed by two other characters.
- The octal or hexadecimal representation of "]" can also be used to end
+ ter of a range. A pattern such as [W-]46] is interpreted as a class of
+ two characters ("W" and "-") followed by a literal string "46]", so it
+ would match "W46]" or "-46]". However, if the "]" is escaped with a
+ backslash it is interpreted as the end of range, so [W-\]46] is inter-
+ preted as a class containing a range followed by two other characters.
+ The octal or hexadecimal representation of "]" can also be used to end
a range.
Ranges normally include all code points between the start and end char-
- acters, inclusive. They can also be used for code points specified
+ acters, inclusive. They can also be used for code points specified
numerically, for example [\000-\037]. Ranges can include any characters
- that are valid for the current mode. In any UTF mode, the so-called
- "surrogate" characters (those whose code points lie between 0xd800 and
- 0xdfff inclusive) may not be specified explicitly by default (the
- PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How-
+ that are valid for the current mode. In any UTF mode, the so-called
+ "surrogate" characters (those whose code points lie between 0xd800 and
+ 0xdfff inclusive) may not be specified explicitly by default (the
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How-
ever, ranges such as [\x{d7ff}-\x{e000}], which include the surrogates,
are always permitted.
- There is a special case in EBCDIC environments for ranges whose end
+ There is a special case in EBCDIC environments for ranges whose end
points are both specified as literal letters in the same case. For com-
- patibility with Perl, EBCDIC code points within the range that are not
- letters are omitted. For example, [h-k] matches only four characters,
+ patibility with Perl, EBCDIC code points within the range that are not
+ letters are omitted. For example, [h-k] matches only four characters,
even though the codes for h and k are 0x88 and 0x92, a range of 11 code
- points. However, if the range is specified numerically, for example,
+ points. However, if the range is specified numerically, for example,
[\x88-\x92] or [h-\x92], all code points are included.
If a range that includes letters is used when caseless matching is set,
it matches the letters in either case. For example, [W-c] is equivalent
- to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if
- character tables for a French locale are in use, [\xc8-\xcb] matches
+ to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if
+ character tables for a French locale are in use, [\xc8-\xcb] matches
accented E characters in both cases.
- A circumflex can conveniently be used with the upper case character
- types to specify a more restricted set of characters than the matching
- lower case type. For example, the class [^\W_] matches any letter or
+ A circumflex can conveniently be used with the upper case character
+ types to specify a more restricted set of characters than the matching
+ lower case type. For example, the class [^\W_] matches any letter or
digit, but not underscore, whereas [\w] includes underscore. A positive
character class should be read as "something OR something OR ..." and a
negative class as "NOT something AND NOT something AND NOT ...".
- The only metacharacters that are recognized in character classes are
- backslash, hyphen (only where it can be interpreted as specifying a
- range), circumflex (only at the start), opening square bracket (only
- when it can be interpreted as introducing a POSIX class name, or for a
- special compatibility feature - see the next two sections), and the
+ The only metacharacters that are recognized in character classes are
+ backslash, hyphen (only where it can be interpreted as specifying a
+ range), circumflex (only at the start), opening square bracket (only
+ when it can be interpreted as introducing a POSIX class name, or for a
+ special compatibility feature - see the next two sections), and the
terminating closing square bracket. However, escaping other non-
alphanumeric characters does no harm.
@@ -6962,7 +7105,7 @@ SQUARE BRACKETS AND CHARACTER CLASSES
POSIX CHARACTER CLASSES
Perl supports the POSIX notation for character classes. This uses names
- enclosed by [: and :] within the enclosing square brackets. PCRE2 also
+ enclosed by [: and :] within the enclosing square brackets. PCRE2 also
supports this notation. For example,
[01[:alpha:]%]
@@ -6985,13 +7128,13 @@ POSIX CHARACTER CLASSES
word "word" characters (same as \w)
xdigit hexadecimal digits
- The default "space" characters are HT (9), LF (10), VT (11), FF (12),
- CR (13), and space (32). If locale-specific matching is taking place,
- the list of space characters may be different; there may be fewer or
+ The default "space" characters are HT (9), LF (10), VT (11), FF (12),
+ CR (13), and space (32). If locale-specific matching is taking place,
+ the list of space characters may be different; there may be fewer or
more of them. "Space" and \s match the same set of characters.
- The name "word" is a Perl extension, and "blank" is a GNU extension
- from Perl 5.8. Another Perl extension is negation, which is indicated
+ The name "word" is a Perl extension, and "blank" is a GNU extension
+ from Perl 5.8. Another Perl extension is negation, which is indicated
by a ^ character after the colon. For example,
[12[:^digit:]]
@@ -7002,9 +7145,9 @@ POSIX CHARACTER CLASSES
By default, characters with values greater than 127 do not match any of
the POSIX character classes, although this may be different for charac-
- ters in the range 128-255 when locale-specific matching is happening.
- However, if the PCRE2_UCP option is passed to pcre2_compile(), some of
- the classes are changed so that Unicode character properties are used.
+ ters in the range 128-255 when locale-specific matching is happening.
+ However, if the PCRE2_UCP option is passed to pcre2_compile(), some of
+ the classes are changed so that Unicode character properties are used.
This is achieved by replacing certain POSIX classes with other
sequences, as follows:
@@ -7018,10 +7161,10 @@ POSIX CHARACTER CLASSES
[:upper:] becomes \p{Lu}
[:word:] becomes \p{Xwd}
- Negated versions, such as [:^alpha:] use \P instead of \p. Three other
+ Negated versions, such as [:^alpha:] use \P instead of \p. Three other
POSIX classes are handled specially in UCP mode:
- [:graph:] This matches characters that have glyphs that mark the page
+ [:graph:] This matches characters that have glyphs that mark the page
when printed. In Unicode property terms, it matches all char-
acters with the L, M, N, P, S, or Cf properties, except for:
@@ -7030,60 +7173,61 @@ POSIX CHARACTER CLASSES
U+2066 - U+2069 Various "isolate"s
- [:print:] This matches the same characters as [:graph:] plus space
- characters that are not controls, that is, characters with
+ [:print:] This matches the same characters as [:graph:] plus space
+ characters that are not controls, that is, characters with
the Zs property.
[:punct:] This matches all characters that have the Unicode P (punctua-
- tion) property, plus those characters with code points less
+ tion) property, plus those characters with code points less
than 256 that have the S (Symbol) property.
- The other POSIX classes are unchanged, and match only characters with
+ The other POSIX classes are unchanged, and match only characters with
code points less than 256.
COMPATIBILITY FEATURE FOR WORD BOUNDARIES
- In the POSIX.2 compliant library that was included in 4.4BSD Unix, the
- ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word"
+ In the POSIX.2 compliant library that was included in 4.4BSD Unix, the
+ ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word"
and "end of word". PCRE2 treats these items as follows:
[[:<:]] is converted to \b(?=\w)
[[:>:]] is converted to \b(?<=\w)
Only these exact character sequences are recognized. A sequence such as
- [a[:<:]b] provokes error for an unrecognized POSIX class name. This
- support is not compatible with Perl. It is provided to help migrations
+ [a[:<:]b] provokes error for an unrecognized POSIX class name. This
+ support is not compatible with Perl. It is provided to help migrations
from other environments, and is best not used in any new patterns. Note
- that \b matches at the start and the end of a word (see "Simple asser-
- tions" above), and in a Perl-style pattern the preceding or following
- character normally shows which is wanted, without the need for the
- assertions that are used above in order to give exactly the POSIX be-
+ that \b matches at the start and the end of a word (see "Simple asser-
+ tions" above), and in a Perl-style pattern the preceding or following
+ character normally shows which is wanted, without the need for the
+ assertions that are used above in order to give exactly the POSIX be-
haviour.
VERTICAL BAR
- Vertical bar characters are used to separate alternative patterns. For
+ Vertical bar characters are used to separate alternative patterns. For
example, the pattern
gilbert|sullivan
- matches either "gilbert" or "sullivan". Any number of alternatives may
- appear, and an empty alternative is permitted (matching the empty
+ matches either "gilbert" or "sullivan". Any number of alternatives may
+ appear, and an empty alternative is permitted (matching the empty
string). The matching process tries each alternative in turn, from left
- to right, and the first one that succeeds is used. If the alternatives
- are within a subpattern (defined below), "succeeds" means matching the
+ to right, and the first one that succeeds is used. If the alternatives
+ are within a subpattern (defined below), "succeeds" means matching the
rest of the main pattern as well as the alternative in the subpattern.
INTERNAL OPTION SETTING
- The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
- PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options
- (which are Perl-compatible) can be changed from within the pattern by a
- sequence of Perl option letters enclosed between "(?" and ")". The
- option letters are
+ The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
+ PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options
+ can be changed from within the pattern by a sequence of letters
+ enclosed between "(?" and ")". These options are Perl-compatible, and
+ are described in detail in the pcre2api documentation. The option let-
+ ters are:
i for PCRE2_CASELESS
m for PCRE2_MULTILINE
@@ -7093,19 +7237,25 @@ INTERNAL OPTION SETTING
xx for PCRE2_EXTENDED_MORE
For example, (?im) sets caseless, multiline matching. It is also possi-
- ble to unset these options by preceding the letter with a hyphen. The
- two "extended" options are not independent; unsetting either one can-
- cels the effects of both of them.
+ ble to unset these options by preceding the relevant letters with a
+ hyphen, for example (?-im). The two "extended" options are not indepen-
+ dent; unsetting either one cancels the effects of both of them.
A combined setting and unsetting such as (?im-sx), which sets
PCRE2_CASELESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and
- PCRE2_EXTENDED, is also permitted. If a letter appears both before and
- after the hyphen, the option is unset. An empty options setting "(?)"
- is allowed. Needless to say, it has no effect.
+ PCRE2_EXTENDED, is also permitted. Only one hyphen may appear in the
+ options string. If a letter appears both before and after the hyphen,
+ the option is unset. An empty options setting "(?)" is allowed. Need-
+ less to say, it has no effect.
+
+ If the first character following (? is a circumflex, it causes all of
+ the above options to be unset. Thus, (?^) is equivalent to (?-imnsx).
+ Letters may follow the circumflex to cause some options to be re-
+ instated, but a hyphen may not appear.
The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be
changed in the same way as the Perl-compatible options by using the
- characters J and U respectively.
+ characters J and U respectively. However, these are not unset by (?^).
When one of these option changes occurs at top level (that is, not
inside subpattern parentheses), the change applies to the remainder of
@@ -7228,7 +7378,7 @@ DUPLICATE SUBPATTERN NUMBERS
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
# 1 2 2 3 2 3 4
- A back reference to a numbered subpattern uses the most recent value
+ A backreference to a numbered subpattern uses the most recent value
that is set for that number by any subpattern. The following pattern
matches "abcabc" or "defdef":
@@ -7254,34 +7404,62 @@ DUPLICATE SUBPATTERN NUMBERS
NAMED SUBPATTERNS
Identifying capturing parentheses by number is simple, but it can be
- very hard to keep track of the numbers in complicated regular expres-
- sions. Furthermore, if an expression is modified, the numbers may
- change. To help with this difficulty, PCRE2 supports the naming of sub-
- patterns. This feature was not added to Perl until release 5.10. Python
+ very hard to keep track of the numbers in complicated patterns. Fur-
+ thermore, if an expression is modified, the numbers may change. To help
+ with this difficulty, PCRE2 supports the naming of capturing subpat-
+ terns. This feature was not added to Perl until release 5.10. Python
had the feature earlier, and PCRE1 introduced it at release 4.0, using
- the Python syntax. PCRE2 supports both the Perl and the Python syntax.
- Perl allows identically numbered subpatterns to have different names,
- but PCRE2 does not.
-
- In PCRE2, a subpattern can be named in one of three ways: (?<name>...)
- or (?'name'...) as in Perl, or (?P<name>...) as in Python. References
- to capturing parentheses from other parts of the pattern, such as back
- references, recursion, and conditions, can be made by name as well as
- by number.
+ the Python syntax. PCRE2 supports both the Perl and the Python syntax.
+ In PCRE2, a capturing subpattern can be named in one of three ways:
+ (?<name>...) or (?'name'...) as in Perl, or (?P<name>...) as in Python.
Names consist of up to 32 alphanumeric characters and underscores, but
- must start with a non-digit. Named capturing parentheses are still
- allocated numbers as well as names, exactly as if the names were not
- present. The PCRE2 API provides function calls for extracting the name-
- to-number translation table from a compiled pattern. There are also
- convenience functions for extracting a captured substring by name.
-
- By default, a name must be unique within a pattern, but it is possible
- to relax this constraint by setting the PCRE2_DUPNAMES option at com-
- pile time. (Duplicate names are also always permitted for subpatterns
- with the same number, set up as described in the previous section.)
+ must start with a non-digit. References to capturing parentheses from
+ other parts of the pattern, such as backreferences, recursion, and con-
+ ditions, can all be made by name as well as by number.
+
+ Named capturing parentheses are allocated numbers as well as names,
+ exactly as if the names were not present. In both PCRE2 and Perl, cap-
+ turing subpatterns are primarily identified by numbers; any names are
+ just aliases for these numbers. The PCRE2 API provides function calls
+ for extracting the complete name-to-number translation table from a
+ compiled pattern, as well as convenience functions for extracting cap-
+ tured substrings by name.
+
+ Warning: When more than one subpattern has the same number, as
+ described in the previous section, a name given to one of them applies
+ to all of them. Perl allows identically numbered subpatterns to have
+ different names. Consider this pattern, where there are two capturing
+ subpatterns, both numbered 1:
+
+ (?|(?<AA>aa)|(?<BB>bb))
+
+ Perl allows this, with both names AA and BB as aliases of group 1.
+ Thus, after a successful match, both names yield the same value (either
+ "aa" or "bb").
+
+ In an attempt to reduce confusion, PCRE2 does not allow the same group
+ number to be associated with more than one name. The example above pro-
+ vokes a compile-time error. However, there is still scope for confu-
+ sion. Consider this pattern:
+
+ (?|(?<AA>aa)|(bb))
+
+ Although the second subpattern number 1 is not explicitly named, the
+ name AA is still an alias for subpattern 1. Whether the pattern matches
+ "aa" or "bb", a reference by name to group AA yields the matched
+ string.
+
+ By default, a name must be unique within a pattern, except that dupli-
+ cate names are permitted for subpatterns with the same number, for
+ example:
+
+ (?|(?<AA>aa)|(?<AA>bb))
+
+ The duplicate name constraint can be disabled by setting the PCRE2_DUP-
+ NAMES option at compile time, or by the use of (?J) within the pattern.
Duplicate names can be useful for patterns where only one instance of
- the named parentheses can match. Suppose you want to match the name of
+ the named parentheses can match. Suppose you want to match the name of
a weekday, either as a 3-letter abbreviation or as the full name, and
in both cases you want to extract the abbreviation. This pattern
(ignoring the line breaks) does the job:
@@ -7293,17 +7471,16 @@ NAMED SUBPATTERNS
(?<DN>Sat)(?:urday)?
There are five capturing substrings, but only one is ever set after a
- match. (An alternative way of solving this problem is to use a "branch
- reset" subpattern, as described in the previous section.)
-
- The convenience functions for extracting the data by name returns the
- substring for the first (and in this example, the only) subpattern of
- that name that matched. This saves searching to find which numbered
- subpattern it was.
-
- If you make a back reference to a non-unique named subpattern from
- elsewhere in the pattern, the subpatterns to which the name refers are
- checked in the order in which they appear in the overall pattern. The
+ match. The convenience functions for extracting the data by name
+ returns the substring for the first (and in this example, the only)
+ subpattern of that name that matched. This saves searching to find
+ which numbered subpattern it was. (An alternative way of solving this
+ problem is to use a "branch reset" subpattern, as described in the pre-
+ vious section.)
+
+ If you make a backreference to a non-unique named subpattern from else-
+ where in the pattern, the subpatterns to which the name refers are
+ checked in the order in which they appear in the overall pattern. The
first one that is set is used for the reference. For example, this pat-
tern matches both "foofoo" and "barbar" but not "foobar" or "barfoo":
@@ -7311,9 +7488,8 @@ NAMED SUBPATTERNS
If you make a subroutine call to a non-unique named subpattern, the one
- that corresponds to the first occurrence of the name is used. In the
- absence of duplicate numbers (see the previous section) this is the one
- with the lowest number.
+ that corresponds to the first occurrence of the name is used. In the
+ absence of duplicate numbers this is the one with the lowest number.
If you use a named reference in a condition test (see the section about
conditions below), either to check whether a subpattern has matched, or
@@ -7323,17 +7499,10 @@ NAMED SUBPATTERNS
details of the interfaces for handling named subpatterns, see the
pcre2api documentation.
- Warning: You cannot use different names to distinguish between two sub-
- patterns with the same number because PCRE2 uses only the numbers when
- matching. For this reason, an error is given at compile time if differ-
- ent names are given to subpatterns with the same number. However, you
- can always give the same name to subpatterns with the same number, even
- when PCRE2_DUPNAMES is not set.
-
REPETITION
- Repetition is specified by quantifiers, which can follow any of the
+ Repetition is specified by quantifiers, which can follow any of the
following items:
a literal data character
@@ -7343,21 +7512,21 @@ REPETITION
the \R escape sequence
an escape such as \d or \pL that matches a single character
a character class
- a back reference
+ a backreference
a parenthesized subpattern (including most assertions)
a subroutine call to a subpattern (recursive or otherwise)
- The general repetition quantifier specifies a minimum and maximum num-
- ber of permitted matches, by giving the two numbers in curly brackets
- (braces), separated by a comma. The numbers must be less than 65536,
+ The general repetition quantifier specifies a minimum and maximum num-
+ ber of permitted matches, by giving the two numbers in curly brackets
+ (braces), separated by a comma. The numbers must be less than 65536,
and the first must be less than or equal to the second. For example:
z{2,4}
- matches "zz", "zzz", or "zzzz". A closing brace on its own is not a
- special character. If the second number is omitted, but the comma is
- present, there is no upper limit; if the second number and the comma
- are both omitted, the quantifier specifies an exact number of required
+ matches "zz", "zzz", or "zzzz". A closing brace on its own is not a
+ special character. If the second number is omitted, but the comma is
+ present, there is no upper limit; if the second number and the comma
+ are both omitted, the quantifier specifies an exact number of required
matches. Thus
[aeiou]{3,}
@@ -7366,50 +7535,50 @@ REPETITION
\d{8}
- matches exactly 8 digits. An opening curly bracket that appears in a
- position where a quantifier is not allowed, or one that does not match
- the syntax of a quantifier, is taken as a literal character. For exam-
+ matches exactly 8 digits. An opening curly bracket that appears in a
+ position where a quantifier is not allowed, or one that does not match
+ the syntax of a quantifier, is taken as a literal character. For exam-
ple, {,6} is not a quantifier, but a literal string of four characters.
In UTF modes, quantifiers apply to characters rather than to individual
- code units. Thus, for example, \x{100}{2} matches two characters, each
+ code units. Thus, for example, \x{100}{2} matches two characters, each
of which is represented by a two-byte sequence in a UTF-8 string. Simi-
- larly, \X{3} matches three Unicode extended grapheme clusters, each of
- which may be several code units long (and they may be of different
+ larly, \X{3} matches three Unicode extended grapheme clusters, each of
+ which may be several code units long (and they may be of different
lengths).
The quantifier {0} is permitted, causing the expression to behave as if
the previous item and the quantifier were not present. This may be use-
- ful for subpatterns that are referenced as subroutines from elsewhere
+ ful for subpatterns that are referenced as subroutines from elsewhere
in the pattern (but see also the section entitled "Defining subpatterns
- for use by reference only" below). Items other than subpatterns that
+ for use by reference only" below). Items other than subpatterns that
have a {0} quantifier are omitted from the compiled pattern.
- For convenience, the three most common quantifiers have single-charac-
+ For convenience, the three most common quantifiers have single-charac-
ter abbreviations:
* is equivalent to {0,}
+ is equivalent to {1,}
? is equivalent to {0,1}
- It is possible to construct infinite loops by following a subpattern
+ It is possible to construct infinite loops by following a subpattern
that can match no characters with a quantifier that has no upper limit,
for example:
(a?)*
- Earlier versions of Perl and PCRE1 used to give an error at compile
+ Earlier versions of Perl and PCRE1 used to give an error at compile
time for such patterns. However, because there are cases where this can
be useful, such patterns are now accepted, but if any repetition of the
- subpattern does in fact match no characters, the loop is forcibly bro-
+ subpattern does in fact match no characters, the loop is forcibly bro-
ken.
- By default, the quantifiers are "greedy", that is, they match as much
- as possible (up to the maximum number of permitted times), without
- causing the rest of the pattern to fail. The classic example of where
+ By default, the quantifiers are "greedy", that is, they match as much
+ as possible (up to the maximum number of permitted times), without
+ causing the rest of the pattern to fail. The classic example of where
this gives problems is in trying to match comments in C programs. These
- appear between /* and */ and within the comment, individual * and /
- characters may appear. An attempt to match C comments by applying the
+ appear between /* and */ and within the comment, individual * and /
+ characters may appear. An attempt to match C comments by applying the
pattern
/\*.*\*/
@@ -7418,19 +7587,19 @@ REPETITION
/* first comment */ not comment /* second comment */
- fails, because it matches the entire string owing to the greediness of
+ fails, because it matches the entire string owing to the greediness of
the .* item.
If a quantifier is followed by a question mark, it ceases to be greedy,
- and instead matches the minimum number of times possible, so the pat-
+ and instead matches the minimum number of times possible, so the pat-
tern
/\*.*?\*/
- does the right thing with the C comments. The meaning of the various
- quantifiers is not otherwise changed, just the preferred number of
- matches. Do not confuse this use of question mark with its use as a
- quantifier in its own right. Because it has two uses, it can sometimes
+ does the right thing with the C comments. The meaning of the various
+ quantifiers is not otherwise changed, just the preferred number of
+ matches. Do not confuse this use of question mark with its use as a
+ quantifier in its own right. Because it has two uses, it can sometimes
appear doubled, as in
\d??\d
@@ -7439,45 +7608,45 @@ REPETITION
only way the rest of the pattern matches.
If the PCRE2_UNGREEDY option is set (an option that is not available in
- Perl), the quantifiers are not greedy by default, but individual ones
- can be made greedy by following them with a question mark. In other
+ Perl), the quantifiers are not greedy by default, but individual ones
+ can be made greedy by following them with a question mark. In other
words, it inverts the default behaviour.
- When a parenthesized subpattern is quantified with a minimum repeat
- count that is greater than 1 or with a limited maximum, more memory is
- required for the compiled pattern, in proportion to the size of the
+ When a parenthesized subpattern is quantified with a minimum repeat
+ count that is greater than 1 or with a limited maximum, more memory is
+ required for the compiled pattern, in proportion to the size of the
minimum or maximum.
- If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option
- (equivalent to Perl's /s) is set, thus allowing the dot to match new-
- lines, the pattern is implicitly anchored, because whatever follows
- will be tried against every character position in the subject string,
- so there is no point in retrying the overall match at any position
+ If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option
+ (equivalent to Perl's /s) is set, thus allowing the dot to match new-
+ lines, the pattern is implicitly anchored, because whatever follows
+ will be tried against every character position in the subject string,
+ so there is no point in retrying the overall match at any position
after the first. PCRE2 normally treats such a pattern as though it were
preceded by \A.
- In cases where it is known that the subject string contains no new-
- lines, it is worth setting PCRE2_DOTALL in order to obtain this opti-
+ In cases where it is known that the subject string contains no new-
+ lines, it is worth setting PCRE2_DOTALL in order to obtain this opti-
mization, or alternatively, using ^ to indicate anchoring explicitly.
- However, there are some cases where the optimization cannot be used.
- When .* is inside capturing parentheses that are the subject of a back
- reference elsewhere in the pattern, a match at the start may fail where
- a later one succeeds. Consider, for example:
+ However, there are some cases where the optimization cannot be used.
+ When .* is inside capturing parentheses that are the subject of a
+ backreference elsewhere in the pattern, a match at the start may fail
+ where a later one succeeds. Consider, for example:
(.*)abc\1
- If the subject is "xyz123abc123" the match point is the fourth charac-
+ If the subject is "xyz123abc123" the match point is the fourth charac-
ter. For this reason, such a pattern is not implicitly anchored.
- Another case where implicit anchoring is not applied is when the lead-
- ing .* is inside an atomic group. Once again, a match at the start may
+ Another case where implicit anchoring is not applied is when the lead-
+ ing .* is inside an atomic group. Once again, a match at the start may
fail where a later one succeeds. Consider this pattern:
(?>.*?a)b
- It matches "ab" in the subject "aab". The use of the backtracking con-
- trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and
+ It matches "ab" in the subject "aab". The use of the backtracking con-
+ trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and
there is an option, PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly.
When a capturing subpattern is repeated, the value captured is the sub-
@@ -7486,8 +7655,8 @@ REPETITION
(tweedle[dume]{3}\s*)+
has matched "tweedledum tweedledee" the value of the captured substring
- is "tweedledee". However, if there are nested capturing subpatterns,
- the corresponding captured values may have been set in previous itera-
+ is "tweedledee". However, if there are nested capturing subpatterns,
+ the corresponding captured values may have been set in previous itera-
tions. For example, after
(a|(b))+
@@ -7497,53 +7666,53 @@ REPETITION
ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
- With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
- repetition, failure of what follows normally causes the repeated item
- to be re-evaluated to see if a different number of repeats allows the
- rest of the pattern to match. Sometimes it is useful to prevent this,
- either to change the nature of the match, or to cause it fail earlier
- than it otherwise might, when the author of the pattern knows there is
+ With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
+ repetition, failure of what follows normally causes the repeated item
+ to be re-evaluated to see if a different number of repeats allows the
+ rest of the pattern to match. Sometimes it is useful to prevent this,
+ either to change the nature of the match, or to cause it fail earlier
+ than it otherwise might, when the author of the pattern knows there is
no point in carrying on.
- Consider, for example, the pattern \d+foo when applied to the subject
+ Consider, for example, the pattern \d+foo when applied to the subject
line
123456bar
After matching all 6 digits and then failing to match "foo", the normal
- action of the matcher is to try again with only 5 digits matching the
- \d+ item, and then with 4, and so on, before ultimately failing.
- "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides
- the means for specifying that once a subpattern has matched, it is not
+ action of the matcher is to try again with only 5 digits matching the
+ \d+ item, and then with 4, and so on, before ultimately failing.
+ "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides
+ the means for specifying that once a subpattern has matched, it is not
to be re-evaluated in this way.
- If we use atomic grouping for the previous example, the matcher gives
- up immediately on failing to match "foo" the first time. The notation
+ If we use atomic grouping for the previous example, the matcher gives
+ up immediately on failing to match "foo" the first time. The notation
is a kind of special parenthesis, starting with (?> as in this example:
(?>\d+)foo
- This kind of parenthesis "locks up" the part of the pattern it con-
- tains once it has matched, and a failure further into the pattern is
- prevented from backtracking into it. Backtracking past it to previous
+ This kind of parenthesis "locks up" the part of the pattern it con-
+ tains once it has matched, and a failure further into the pattern is
+ prevented from backtracking into it. Backtracking past it to previous
items, however, works as normal.
- An alternative description is that a subpattern of this type matches
- exactly the string of characters that an identical standalone pattern
+ An alternative description is that a subpattern of this type matches
+ exactly the string of characters that an identical standalone pattern
would match, if anchored at the current point in the subject string.
Atomic grouping subpatterns are not capturing subpatterns. Simple cases
such as the above example can be thought of as a maximizing repeat that
- must swallow everything it can. So, while both \d+ and \d+? are pre-
- pared to adjust the number of digits they match in order to make the
+ must swallow everything it can. So, while both \d+ and \d+? are pre-
+ pared to adjust the number of digits they match in order to make the
rest of the pattern match, (?>\d+) can only match an entire sequence of
digits.
- Atomic groups in general can of course contain arbitrarily complicated
- subpatterns, and can be nested. However, when the subpattern for an
+ Atomic groups in general can of course contain arbitrarily complicated
+ subpatterns, and can be nested. However, when the subpattern for an
atomic group is just a single repeated item, as in the example above, a
- simpler notation, called a "possessive quantifier" can be used. This
- consists of an additional + character following a quantifier. Using
+ simpler notation, called a "possessive quantifier" can be used. This
+ consists of an additional + character following a quantifier. Using
this notation, the previous example can be rewritten as
\d++foo
@@ -7553,46 +7722,46 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
(abc|xyz){2,3}+
- Possessive quantifiers are always greedy; the setting of the
- PCRE2_UNGREEDY option is ignored. They are a convenient notation for
- the simpler forms of atomic group. However, there is no difference in
+ Possessive quantifiers are always greedy; the setting of the
+ PCRE2_UNGREEDY option is ignored. They are a convenient notation for
+ the simpler forms of atomic group. However, there is no difference in
the meaning of a possessive quantifier and the equivalent atomic group,
- though there may be a performance difference; possessive quantifiers
+ though there may be a performance difference; possessive quantifiers
should be slightly faster.
- The possessive quantifier syntax is an extension to the Perl 5.8 syn-
- tax. Jeffrey Friedl originated the idea (and the name) in the first
+ The possessive quantifier syntax is an extension to the Perl 5.8 syn-
+ tax. Jeffrey Friedl originated the idea (and the name) in the first
edition of his book. Mike McCloskey liked it, so implemented it when he
built Sun's Java package, and PCRE1 copied it from there. It ultimately
found its way into Perl at release 5.10.
- PCRE2 has an optimization that automatically "possessifies" certain
- simple pattern constructs. For example, the sequence A+B is treated as
- A++B because there is no point in backtracking into a sequence of A's
+ PCRE2 has an optimization that automatically "possessifies" certain
+ simple pattern constructs. For example, the sequence A+B is treated as
+ A++B because there is no point in backtracking into a sequence of A's
when B must follow. This feature can be disabled by the PCRE2_NO_AUTO-
POSSESS option, or starting the pattern with (*NO_AUTO_POSSESS).
- When a pattern contains an unlimited repeat inside a subpattern that
- can itself be repeated an unlimited number of times, the use of an
- atomic group is the only way to avoid some failing matches taking a
+ When a pattern contains an unlimited repeat inside a subpattern that
+ can itself be repeated an unlimited number of times, the use of an
+ atomic group is the only way to avoid some failing matches taking a
very long time indeed. The pattern
(\D+|<\d+>)*[!?]
- matches an unlimited number of substrings that either consist of non-
- digits, or digits enclosed in <>, followed by either ! or ?. When it
+ matches an unlimited number of substrings that either consist of non-
+ digits, or digits enclosed in <>, followed by either ! or ?. When it
matches, it runs quickly. However, if it is applied to
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
- it takes a long time before reporting failure. This is because the
- string can be divided between the internal \D+ repeat and the external
- * repeat in a large number of ways, and all have to be tried. (The
- example uses [!?] rather than a single character at the end, because
- both PCRE2 and Perl have an optimization that allows for fast failure
- when a single character is used. They remember the last single charac-
- ter that is required for a match, and fail early if it is not present
- in the string.) If the pattern is changed so that it uses an atomic
+ it takes a long time before reporting failure. This is because the
+ string can be divided between the internal \D+ repeat and the external
+ * repeat in a large number of ways, and all have to be tried. (The
+ example uses [!?] rather than a single character at the end, because
+ both PCRE2 and Perl have an optimization that allows for fast failure
+ when a single character is used. They remember the last single charac-
+ ter that is required for a match, and fail early if it is not present
+ in the string.) If the pattern is changed so that it uses an atomic
group, like this:
((?>\D+)|<\d+>)*[!?]
@@ -7600,32 +7769,32 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
sequences of non-digits cannot be broken, and failure happens quickly.
-BACK REFERENCES
+BACKREFERENCES
Outside a character class, a backslash followed by a digit greater than
- 0 (and possibly further digits) is a back reference to a capturing sub-
- pattern earlier (that is, to its left) in the pattern, provided there
+ 0 (and possibly further digits) is a backreference to a capturing sub-
+ pattern earlier (that is, to its left) in the pattern, provided there
have been that many previous capturing left parentheses.
- However, if the decimal number following the backslash is less than 8,
- it is always taken as a back reference, and causes an error only if
- there are not that many capturing left parentheses in the entire pat-
- tern. In other words, the parentheses that are referenced need not be
- to the left of the reference for numbers less than 8. A "forward back
- reference" of this type can make sense when a repetition is involved
- and the subpattern to the right has participated in an earlier itera-
+ However, if the decimal number following the backslash is less than 8,
+ it is always taken as a backreference, and causes an error only if
+ there are not that many capturing left parentheses in the entire pat-
+ tern. In other words, the parentheses that are referenced need not be
+ to the left of the reference for numbers less than 8. A "forward back-
+ reference" of this type can make sense when a repetition is involved
+ and the subpattern to the right has participated in an earlier itera-
tion.
- It is not possible to have a numerical "forward back reference" to a
- subpattern whose number is 8 or more using this syntax because a
- sequence such as \50 is interpreted as a character defined in octal.
+ It is not possible to have a numerical "forward backreference" to a
+ subpattern whose number is 8 or more using this syntax because a
+ sequence such as \50 is interpreted as a character defined in octal.
See the subsection entitled "Non-printing characters" above for further
- details of the handling of digits following a backslash. There is no
- such problem when named parentheses are used. A back reference to any
+ details of the handling of digits following a backslash. There is no
+ such problem when named parentheses are used. A backreference to any
subpattern is possible using named parentheses (see below).
- Another way of avoiding the ambiguity inherent in the use of digits
- following a backslash is to use the \g escape sequence. This escape
+ Another way of avoiding the ambiguity inherent in the use of digits
+ following a backslash is to use the \g escape sequence. This escape
must be followed by a signed or unsigned number, optionally enclosed in
braces. These examples are all identical:
@@ -7633,46 +7802,46 @@ BACK REFERENCES
(ring), \g1
(ring), \g{1}
- An unsigned number specifies an absolute reference without the ambigu-
+ An unsigned number specifies an absolute reference without the ambigu-
ity that is present in the older syntax. It is also useful when literal
- digits follow the reference. A signed number is a relative reference.
+ digits follow the reference. A signed number is a relative reference.
Consider this example:
(abc(def)ghi)\g{-1}
The sequence \g{-1} is a reference to the most recently started captur-
ing subpattern before \g, that is, is it equivalent to \2 in this exam-
- ple. Similarly, \g{-2} would be equivalent to \1. The use of relative
- references can be helpful in long patterns, and also in patterns that
- are created by joining together fragments that contain references
+ ple. Similarly, \g{-2} would be equivalent to \1. The use of relative
+ references can be helpful in long patterns, and also in patterns that
+ are created by joining together fragments that contain references
within themselves.
- The sequence \g{+1} is a reference to the next capturing subpattern.
- This kind of forward reference can be useful it patterns that repeat.
+ The sequence \g{+1} is a reference to the next capturing subpattern.
+ This kind of forward reference can be useful it patterns that repeat.
Perl does not support the use of + in this way.
- A back reference matches whatever actually matched the capturing sub-
- pattern in the current subject string, rather than anything matching
- the subpattern itself (see "Subpatterns as subroutines" below for a way
- of doing that). So the pattern
+ A backreference matches whatever actually matched the capturing subpat-
+ tern in the current subject string, rather than anything matching the
+ subpattern itself (see "Subpatterns as subroutines" below for a way of
+ doing that). So the pattern
(sens|respons)e and \1ibility
- matches "sense and sensibility" and "response and responsibility", but
- not "sense and responsibility". If caseful matching is in force at the
- time of the back reference, the case of letters is relevant. For exam-
+ matches "sense and sensibility" and "response and responsibility", but
+ not "sense and responsibility". If caseful matching is in force at the
+ time of the backreference, the case of letters is relevant. For exam-
ple,
((?i)rah)\s+\1
- matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
+ matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
original capturing subpattern is matched caselessly.
- There are several different ways of writing back references to named
- subpatterns. The .NET syntax \k{name} and the Perl syntax \k<name> or
- \k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's
- unified back reference syntax, in which \g can be used for both numeric
- and named references, is also supported. We could rewrite the above
+ There are several different ways of writing backreferences to named
+ subpatterns. The .NET syntax \k{name} and the Perl syntax \k<name> or
+ \k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's
+ unified backreference syntax, in which \g can be used for both numeric
+ and named references, is also supported. We could rewrite the above
example in any of the following ways:
(?<p1>(?i)rah)\s+\k<p1>
@@ -7680,29 +7849,30 @@ BACK REFERENCES
(?P<p1>(?i)rah)\s+(?P=p1)
(?<p1>(?i)rah)\s+\g{p1}
- A subpattern that is referenced by name may appear in the pattern
+ A subpattern that is referenced by name may appear in the pattern
before or after the reference.
- There may be more than one back reference to the same subpattern. If a
- subpattern has not actually been used in a particular match, any back
+ There may be more than one backreference to the same subpattern. If a
+ subpattern has not actually been used in a particular match, any back-
references to it always fail by default. For example, the pattern
(a|(bc))\2
- always fails if it starts to match "a" rather than "bc". However, if
- the PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a back
- reference to an unset value matches an empty string.
+ always fails if it starts to match "a" rather than "bc". However, if
+ the PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backref-
+ erence to an unset value matches an empty string.
- Because there may be many capturing parentheses in a pattern, all dig-
- its following a backslash are taken as part of a potential back refer-
- ence number. If the pattern continues with a digit character, some
- delimiter must be used to terminate the back reference. If the
- PCRE2_EXTENDED option is set, this can be white space. Otherwise, the
- \g{ syntax or an empty comment (see "Comments" below) can be used.
+ Because there may be many capturing parentheses in a pattern, all dig-
+ its following a backslash are taken as part of a potential backrefer-
+ ence number. If the pattern continues with a digit character, some
+ delimiter must be used to terminate the backreference. If the
+ PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this can be white
+ space. Otherwise, the \g{ syntax or an empty comment (see "Comments"
+ below) can be used.
- Recursive back references
+ Recursive backreferences
- A back reference that occurs inside the parentheses to which it refers
+ A backreference that occurs inside the parentheses to which it refers
fails when the subpattern is first used, so, for example, (a\1) never
matches. However, such references can be useful inside repeated sub-
patterns. For example, the pattern
@@ -7710,13 +7880,13 @@ BACK REFERENCES
(a|b\1)+
matches any number of "a"s and also "aba", "ababbaa" etc. At each iter-
- ation of the subpattern, the back reference matches the character
- string corresponding to the previous iteration. In order for this to
- work, the pattern must be such that the first iteration does not need
- to match the back reference. This can be done using alternation, as in
- the example above, or by a quantifier with a minimum of zero.
+ ation of the subpattern, the backreference matches the character string
+ corresponding to the previous iteration. In order for this to work, the
+ pattern must be such that the first iteration does not need to match
+ the backreference. This can be done using alternation, as in the exam-
+ ple above, or by a quantifier with a minimum of zero.
- Back references of this type cause the group that they reference to be
+ Backreferences of this type cause the group that they reference to be
treated as an atomic group. Once the whole group has been matched, a
subsequent matching failure cannot cause backtracking into the middle
of the group.
@@ -7734,23 +7904,33 @@ ASSERTIONS
string, and those that look behind it, and in each case an assertion
may be positive (must succeed for matching to continue) or negative
(must not succeed for matching to continue). An assertion subpattern is
- matched in the normal way, except that, when matching continues after-
- wards, the matching position in the subject string is as it was at the
- start of the assertion.
+ matched in the normal way, except that, when matching continues after a
+ successful assertion, the matching position in the subject string is as
+ it was before the assertion was processed.
Assertion subpatterns are not capturing subpatterns. If an assertion
contains capturing subpatterns within it, these are counted for the
purposes of numbering the capturing subpatterns in the whole pattern.
- However, substring capturing is carried out only for positive asser-
- tions that succeed, that is, one of their branches matches, so matching
- continues after the assertion. If all branches of a positive assertion
- fail to match, nothing is captured, and control is passed to the previ-
- ous backtracking point.
-
- No capturing is done for a negative assertion unless it is being used
- as a condition in a conditional subpattern (see the discussion below).
- Matching continues after a non-conditional negative assertion only if
- all its branches fail to match.
+ Within each branch of an assertion, locally captured substrings may be
+ referenced in the usual way. For example, a sequence such as (.)\g{-1}
+ can be used to check that two adjacent characters are the same.
+
+ When a branch within an assertion fails to match, any substrings that
+ were captured are discarded (as happens with any pattern branch that
+ fails to match). A negative assertion succeeds only when all its
+ branches fail to match; this means that no captured substrings are ever
+ retained after a successful negative assertion. When an assertion con-
+ tains a matching branch, what happens depends on the type of assertion.
+
+ For a positive assertion, internally captured substrings in the suc-
+ cessful branch are retained, and matching continues with the next pat-
+ tern item after the assertion. For a negative assertion, a matching
+ branch means that the assertion has failed. If the assertion is being
+ used as a condition in a conditional subpattern (see below), captured
+ substrings are retained, because matching continues with the "no"
+ branch of the condition. For other failing negative assertions, control
+ passes to the previous backtracking point, thus discarding any captured
+ strings within the assertion.
For compatibility with Perl, most assertion subpatterns may be
repeated; though it makes no sense to assert the same thing several
@@ -7851,11 +8031,11 @@ ASSERTIONS
However, recursion, that is, a "subroutine" call into a group that is
already active, is not supported.
- Perl does not support back references in lookbehinds. PCRE2 does sup-
- port them, but only if certain conditions are met. The
+ Perl does not support backreferences in lookbehinds. PCRE2 does support
+ them, but only if certain conditions are met. The
PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no use
of (?| in the pattern (it creates duplicate subpattern numbers), and if
- the back reference is by name, the name must be unique. Of course, the
+ the backreference is by name, the name must be unique. Of course, the
referenced subpattern must itself be of fixed length. The following
pattern matches words containing at least two characters that begin and
end with the same character:
@@ -7935,8 +8115,9 @@ CONDITIONAL SUBPATTERNS
(?(condition)yes-pattern|no-pattern)
If the condition is satisfied, the yes-pattern is used; otherwise the
- no-pattern (if present) is used. If there are more than two alterna-
- tives in the subpattern, a compile-time error occurs. Each of the two
+ no-pattern (if present) is used. An absent no-pattern is equivalent to
+ an empty string (it always matches). If there are more than two alter-
+ natives in the subpattern, a compile-time error occurs. Each of the two
alternatives may itself contain nested subpatterns of any form, includ-
ing conditional subpatterns; the restriction to two alternatives
applies only at the level of the condition. This pattern fragment is an
@@ -7945,88 +8126,88 @@ CONDITIONAL SUBPATTERNS
(?(1) (A|B|C) | (D | (?(2)E|F) | E) )
- There are five kinds of condition: references to subpatterns, refer-
- ences to recursion, two pseudo-conditions called DEFINE and VERSION,
+ There are five kinds of condition: references to subpatterns, refer-
+ ences to recursion, two pseudo-conditions called DEFINE and VERSION,
and assertions.
Checking for a used subpattern by number
- If the text between the parentheses consists of a sequence of digits,
+ If the text between the parentheses consists of a sequence of digits,
the condition is true if a capturing subpattern of that number has pre-
- viously matched. If there is more than one capturing subpattern with
- the same number (see the earlier section about duplicate subpattern
- numbers), the condition is true if any of them have matched. An alter-
- native notation is to precede the digits with a plus or minus sign. In
- this case, the subpattern number is relative rather than absolute. The
- most recently opened parentheses can be referenced by (?(-1), the next
- most recent by (?(-2), and so on. Inside loops it can also make sense
+ viously matched. If there is more than one capturing subpattern with
+ the same number (see the earlier section about duplicate subpattern
+ numbers), the condition is true if any of them have matched. An alter-
+ native notation is to precede the digits with a plus or minus sign. In
+ this case, the subpattern number is relative rather than absolute. The
+ most recently opened parentheses can be referenced by (?(-1), the next
+ most recent by (?(-2), and so on. Inside loops it can also make sense
to refer to subsequent groups. The next parentheses to be opened can be
- referenced as (?(+1), and so on. (The value zero in any of these forms
+ referenced as (?(+1), and so on. (The value zero in any of these forms
is not used; it provokes a compile-time error.)
- Consider the following pattern, which contains non-significant white
- space to make it more readable (assume the PCRE2_EXTENDED option) and
+ Consider the following pattern, which contains non-significant white
+ space to make it more readable (assume the PCRE2_EXTENDED option) and
to divide it into three parts for ease of discussion:
( \( )? [^()]+ (?(1) \) )
- The first part matches an optional opening parenthesis, and if that
+ The first part matches an optional opening parenthesis, and if that
character is present, sets it as the first captured substring. The sec-
- ond part matches one or more characters that are not parentheses. The
- third part is a conditional subpattern that tests whether or not the
- first set of parentheses matched. If they did, that is, if subject
- started with an opening parenthesis, the condition is true, and so the
- yes-pattern is executed and a closing parenthesis is required. Other-
- wise, since no-pattern is not present, the subpattern matches nothing.
- In other words, this pattern matches a sequence of non-parentheses,
+ ond part matches one or more characters that are not parentheses. The
+ third part is a conditional subpattern that tests whether or not the
+ first set of parentheses matched. If they did, that is, if subject
+ started with an opening parenthesis, the condition is true, and so the
+ yes-pattern is executed and a closing parenthesis is required. Other-
+ wise, since no-pattern is not present, the subpattern matches nothing.
+ In other words, this pattern matches a sequence of non-parentheses,
optionally enclosed in parentheses.
- If you were embedding this pattern in a larger one, you could use a
+ If you were embedding this pattern in a larger one, you could use a
relative reference:
...other stuff... ( \( )? [^()]+ (?(-1) \) ) ...
- This makes the fragment independent of the parentheses in the larger
+ This makes the fragment independent of the parentheses in the larger
pattern.
Checking for a used subpattern by name
- Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a
- used subpattern by name. For compatibility with earlier versions of
- PCRE1, which had this facility before Perl, the syntax (?(name)...) is
- also recognized. Note, however, that undelimited names consisting of
- the letter R followed by digits are ambiguous (see the following sec-
+ Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a
+ used subpattern by name. For compatibility with earlier versions of
+ PCRE1, which had this facility before Perl, the syntax (?(name)...) is
+ also recognized. Note, however, that undelimited names consisting of
+ the letter R followed by digits are ambiguous (see the following sec-
tion).
Rewriting the above example to use a named subpattern gives this:
(?<OPEN> \( )? [^()]+ (?(<OPEN>) \) )
- If the name used in a condition of this kind is a duplicate, the test
- is applied to all subpatterns of the same name, and is true if any one
+ If the name used in a condition of this kind is a duplicate, the test
+ is applied to all subpatterns of the same name, and is true if any one
of them has matched.
Checking for pattern recursion
- "Recursion" in this sense refers to any subroutine-like call from one
- part of the pattern to another, whether or not it is actually recur-
- sive. See the sections entitled "Recursive patterns" and "Subpatterns
+ "Recursion" in this sense refers to any subroutine-like call from one
+ part of the pattern to another, whether or not it is actually recur-
+ sive. See the sections entitled "Recursive patterns" and "Subpatterns
as subroutines" below for details of recursion and subpattern calls.
- If a condition is the string (R), and there is no subpattern with the
- name R, the condition is true if matching is currently in a recursion
- or subroutine call to the whole pattern or any subpattern. If digits
- follow the letter R, and there is no subpattern with that name, the
+ If a condition is the string (R), and there is no subpattern with the
+ name R, the condition is true if matching is currently in a recursion
+ or subroutine call to the whole pattern or any subpattern. If digits
+ follow the letter R, and there is no subpattern with that name, the
condition is true if the most recent call is into a subpattern with the
- given number, which must exist somewhere in the overall pattern. This
+ given number, which must exist somewhere in the overall pattern. This
is a contrived example that is equivalent to a+b:
((?(R1)a+|(?1)b))
- However, in both cases, if there is a subpattern with a matching name,
- the condition tests for its being set, as described in the section
- above, instead of testing for recursion. For example, creating a group
- with the name R1 by adding (?<R1>) to the above pattern completely
+ However, in both cases, if there is a subpattern with a matching name,
+ the condition tests for its being set, as described in the section
+ above, instead of testing for recursion. For example, creating a group
+ with the name R1 by adding (?<R1>) to the above pattern completely
changes its meaning.
If a name preceded by ampersand follows the letter R, for example:
@@ -8037,7 +8218,7 @@ CONDITIONAL SUBPATTERNS
of that name (which must exist within the pattern).
This condition does not check the entire recursion stack. It tests only
- the current level. If the name used in a condition of this kind is a
+ the current level. If the name used in a condition of this kind is a
duplicate, the test is applied to all subpatterns of the same name, and
is true if any one of them is the most recent recursion.
@@ -8046,10 +8227,10 @@ CONDITIONAL SUBPATTERNS
Defining subpatterns for use by reference only
If the condition is the string (DEFINE), the condition is always false,
- even if there is a group with the name DEFINE. In this case, there may
+ even if there is a group with the name DEFINE. In this case, there may
be only one alternative in the subpattern. It is always skipped if con-
- trol reaches this point in the pattern; the idea of DEFINE is that it
- can be used to define subroutines that can be referenced from else-
+ trol reaches this point in the pattern; the idea of DEFINE is that it
+ can be used to define subroutines that can be referenced from else-
where. (The use of subroutines is described below.) For example, a pat-
tern to match an IPv4 address such as "192.168.23.245" could be written
like this (ignore white space and line breaks):
@@ -8057,97 +8238,97 @@ CONDITIONAL SUBPATTERNS
(?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
\b (?&byte) (\.(?&byte)){3} \b
- The first part of the pattern is a DEFINE group inside which a another
- group named "byte" is defined. This matches an individual component of
- an IPv4 address (a number less than 256). When matching takes place,
- this part of the pattern is skipped because DEFINE acts like a false
- condition. The rest of the pattern uses references to the named group
- to match the four dot-separated components of an IPv4 address, insist-
+ The first part of the pattern is a DEFINE group inside which a another
+ group named "byte" is defined. This matches an individual component of
+ an IPv4 address (a number less than 256). When matching takes place,
+ this part of the pattern is skipped because DEFINE acts like a false
+ condition. The rest of the pattern uses references to the named group
+ to match the four dot-separated components of an IPv4 address, insist-
ing on a word boundary at each end.
Checking the PCRE2 version
- Programs that link with a PCRE2 library can check the version by call-
- ing pcre2_config() with appropriate arguments. Users of applications
- that do not have access to the underlying code cannot do this. A spe-
- cial "condition" called VERSION exists to allow such users to discover
+ Programs that link with a PCRE2 library can check the version by call-
+ ing pcre2_config() with appropriate arguments. Users of applications
+ that do not have access to the underlying code cannot do this. A spe-
+ cial "condition" called VERSION exists to allow such users to discover
which version of PCRE2 they are dealing with by using this condition to
- match a string such as "yesno". VERSION must be followed either by "="
+ match a string such as "yesno". VERSION must be followed either by "="
or ">=" and a version number. For example:
(?(VERSION>=10.4)yes|no)
- This pattern matches "yes" if the PCRE2 version is greater or equal to
- 10.4, or "no" otherwise. The fractional part of the version number may
+ This pattern matches "yes" if the PCRE2 version is greater or equal to
+ 10.4, or "no" otherwise. The fractional part of the version number may
not contain more than two digits.
Assertion conditions
- If the condition is not in any of the above formats, it must be an
- assertion. This may be a positive or negative lookahead or lookbehind
- assertion. Consider this pattern, again containing non-significant
+ If the condition is not in any of the above formats, it must be an
+ assertion. This may be a positive or negative lookahead or lookbehind
+ assertion. Consider this pattern, again containing non-significant
white space, and with the two alternatives on the second line:
(?(?=[^a-z]*[a-z])
\d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )
- The condition is a positive lookahead assertion that matches an
- optional sequence of non-letters followed by a letter. In other words,
- it tests for the presence of at least one letter in the subject. If a
- letter is found, the subject is matched against the first alternative;
- otherwise it is matched against the second. This pattern matches
- strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
+ The condition is a positive lookahead assertion that matches an
+ optional sequence of non-letters followed by a letter. In other words,
+ it tests for the presence of at least one letter in the subject. If a
+ letter is found, the subject is matched against the first alternative;
+ otherwise it is matched against the second. This pattern matches
+ strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
letters and dd are digits.
- When an assertion that is a condition contains capturing subpatterns,
- any capturing that occurs in a matching branch is retained afterwards,
+ When an assertion that is a condition contains capturing subpatterns,
+ any capturing that occurs in a matching branch is retained afterwards,
for both positive and negative assertions, because matching always con-
tinues after the assertion, whether it succeeds or fails. (Compare non-
- conditional assertions, when captures are retained only for positive
+ conditional assertions, when captures are retained only for positive
assertions that succeed.)
COMMENTS
There are two ways of including comments in patterns that are processed
- by PCRE2. In both cases, the start of the comment must not be in a
- character class, nor in the middle of any other sequence of related
- characters such as (?: or a subpattern name or number. The characters
+ by PCRE2. In both cases, the start of the comment must not be in a
+ character class, nor in the middle of any other sequence of related
+ characters such as (?: or a subpattern name or number. The characters
that make up a comment play no part in the pattern matching.
- The sequence (?# marks the start of a comment that continues up to the
- next closing parenthesis. Nested parentheses are not permitted. If the
- PCRE2_EXTENDED option is set, an unescaped # character also introduces
- a comment, which in this case continues to immediately after the next
- newline character or character sequence in the pattern. Which charac-
- ters are interpreted as newlines is controlled by an option passed to
- the compiling function or by a special sequence at the start of the
- pattern, as described in the section entitled "Newline conventions"
- above. Note that the end of this type of comment is a literal newline
- sequence in the pattern; escape sequences that happen to represent a
- newline do not count. For example, consider this pattern when
- PCRE2_EXTENDED is set, and the default newline convention (a single
- linefeed character) is in force:
+ The sequence (?# marks the start of a comment that continues up to the
+ next closing parenthesis. Nested parentheses are not permitted. If the
+ PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped #
+ character also introduces a comment, which in this case continues to
+ immediately after the next newline character or character sequence in
+ the pattern. Which characters are interpreted as newlines is controlled
+ by an option passed to the compiling function or by a special sequence
+ at the start of the pattern, as described in the section entitled "New-
+ line conventions" above. Note that the end of this type of comment is a
+ literal newline sequence in the pattern; escape sequences that happen
+ to represent a newline do not count. For example, consider this pattern
+ when PCRE2_EXTENDED is set, and the default newline convention (a sin-
+ gle linefeed character) is in force:
abc #comment \n still comment
- On encountering the # character, pcre2_compile() skips along, looking
- for a newline in the pattern. The sequence \n is still literal at this
- stage, so it does not terminate the comment. Only an actual character
+ On encountering the # character, pcre2_compile() skips along, looking
+ for a newline in the pattern. The sequence \n is still literal at this
+ stage, so it does not terminate the comment. Only an actual character
with the code value 0x0a (the default newline) does so.
RECURSIVE PATTERNS
- Consider the problem of matching a string in parentheses, allowing for
- unlimited nested parentheses. Without the use of recursion, the best
- that can be done is to use a pattern that matches up to some fixed
- depth of nesting. It is not possible to handle an arbitrary nesting
+ Consider the problem of matching a string in parentheses, allowing for
+ unlimited nested parentheses. Without the use of recursion, the best
+ that can be done is to use a pattern that matches up to some fixed
+ depth of nesting. It is not possible to handle an arbitrary nesting
depth.
For some time, Perl has provided a facility that allows regular expres-
- sions to recurse (amongst other things). It does this by interpolating
- Perl code in the expression at run time, and the code can refer to the
+ sions to recurse (amongst other things). It does this by interpolating
+ Perl code in the expression at run time, and the code can refer to the
expression itself. A Perl pattern using code interpolation to solve the
parentheses problem can be created like this:
@@ -8157,104 +8338,99 @@ RECURSIVE PATTERNS
refers recursively to the pattern in which it appears.
Obviously, PCRE2 cannot support the interpolation of Perl code.
- Instead, it supports special syntax for recursion of the entire pat-
+ Instead, it supports special syntax for recursion of the entire pat-
tern, and also for individual subpattern recursion. After its introduc-
- tion in PCRE1 and Python, this kind of recursion was subsequently
+ tion in PCRE1 and Python, this kind of recursion was subsequently
introduced into Perl at release 5.10.
- A special item that consists of (? followed by a number greater than
- zero and a closing parenthesis is a recursive subroutine call of the
- subpattern of the given number, provided that it occurs inside that
- subpattern. (If not, it is a non-recursive subroutine call, which is
- described in the next section.) The special item (?R) or (?0) is a
+ A special item that consists of (? followed by a number greater than
+ zero and a closing parenthesis is a recursive subroutine call of the
+ subpattern of the given number, provided that it occurs inside that
+ subpattern. (If not, it is a non-recursive subroutine call, which is
+ described in the next section.) The special item (?R) or (?0) is a
recursive call of the entire regular expression.
- This PCRE2 pattern solves the nested parentheses problem (assume the
+ This PCRE2 pattern solves the nested parentheses problem (assume the
PCRE2_EXTENDED option is set so that white space is ignored):
\( ( [^()]++ | (?R) )* \)
- First it matches an opening parenthesis. Then it matches any number of
- substrings which can either be a sequence of non-parentheses, or a
- recursive match of the pattern itself (that is, a correctly parenthe-
+ First it matches an opening parenthesis. Then it matches any number of
+ substrings which can either be a sequence of non-parentheses, or a
+ recursive match of the pattern itself (that is, a correctly parenthe-
sized substring). Finally there is a closing parenthesis. Note the use
of a possessive quantifier to avoid backtracking into sequences of non-
parentheses.
- If this were part of a larger pattern, you would not want to recurse
+ If this were part of a larger pattern, you would not want to recurse
the entire pattern, so instead you could use this:
( \( ( [^()]++ | (?1) )* \) )
- We have put the pattern into parentheses, and caused the recursion to
+ We have put the pattern into parentheses, and caused the recursion to
refer to them instead of the whole pattern.
- In a larger pattern, keeping track of parenthesis numbers can be
- tricky. This is made easier by the use of relative references. Instead
+ In a larger pattern, keeping track of parenthesis numbers can be
+ tricky. This is made easier by the use of relative references. Instead
of (?1) in the pattern above you can write (?-2) to refer to the second
- most recently opened parentheses preceding the recursion. In other
- words, a negative number counts capturing parentheses leftwards from
+ most recently opened parentheses preceding the recursion. In other
+ words, a negative number counts capturing parentheses leftwards from
the point at which it is encountered.
Be aware however, that if duplicate subpattern numbers are in use, rel-
- ative references refer to the earliest subpattern with the appropriate
+ ative references refer to the earliest subpattern with the appropriate
number. Consider, for example:
(?|(a)|(b)) (c) (?-2)
- The first two capturing groups (a) and (b) are both numbered 1, and
- group (c) is number 2. When the reference (?-2) is encountered, the
+ The first two capturing groups (a) and (b) are both numbered 1, and
+ group (c) is number 2. When the reference (?-2) is encountered, the
second most recently opened parentheses has the number 1, but it is the
- first such group (the (a) group) to which the recursion refers. This
- would be the same if an absolute reference (?1) was used. In other
- words, relative references are just a shorthand for computing a group
+ first such group (the (a) group) to which the recursion refers. This
+ would be the same if an absolute reference (?1) was used. In other
+ words, relative references are just a shorthand for computing a group
number.
- It is also possible to refer to subsequently opened parentheses, by
- writing references such as (?+2). However, these cannot be recursive
- because the reference is not inside the parentheses that are refer-
- enced. They are always non-recursive subroutine calls, as described in
+ It is also possible to refer to subsequently opened parentheses, by
+ writing references such as (?+2). However, these cannot be recursive
+ because the reference is not inside the parentheses that are refer-
+ enced. They are always non-recursive subroutine calls, as described in
the next section.
- An alternative approach is to use named parentheses. The Perl syntax
- for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup-
+ An alternative approach is to use named parentheses. The Perl syntax
+ for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup-
ported. We could rewrite the above example as follows:
(?<pn> \( ( [^()]++ | (?&pn) )* \) )
- If there is more than one subpattern with the same name, the earliest
+ If there is more than one subpattern with the same name, the earliest
one is used.
The example pattern that we have been looking at contains nested unlim-
- ited repeats, and so the use of a possessive quantifier for matching
- strings of non-parentheses is important when applying the pattern to
+ ited repeats, and so the use of a possessive quantifier for matching
+ strings of non-parentheses is important when applying the pattern to
strings that do not match. For example, when this pattern is applied to
(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
- it yields "no match" quickly. However, if a possessive quantifier is
- not used, the match runs for a very long time indeed because there are
- so many different ways the + and * repeats can carve up the subject,
+ it yields "no match" quickly. However, if a possessive quantifier is
+ not used, the match runs for a very long time indeed because there are
+ so many different ways the + and * repeats can carve up the subject,
and all have to be tested before failure can be reported.
- At the end of a match, the values of capturing parentheses are those
- from the outermost level. If you want to obtain intermediate values, a
+ At the end of a match, the values of capturing parentheses are those
+ from the outermost level. If you want to obtain intermediate values, a
callout function can be used (see below and the pcre2callout documenta-
tion). If the pattern above is matched against
(ab(cd)ef)
- the value for the inner capturing parentheses (numbered 2) is "ef",
- which is the last value taken on at the top level. If a capturing sub-
- pattern is not matched at the top level, its final captured value is
- unset, even if it was (temporarily) set at a deeper level during the
+ the value for the inner capturing parentheses (numbered 2) is "ef",
+ which is the last value taken on at the top level. If a capturing sub-
+ pattern is not matched at the top level, its final captured value is
+ unset, even if it was (temporarily) set at a deeper level during the
matching process.
- If there are more than 15 capturing parentheses in a pattern, PCRE2 has
- to obtain extra memory from the heap to store data during a recursion.
- If no memory can be obtained, the match fails with the
- PCRE2_ERROR_NOMEMORY error.
-
Do not confuse the (?R) item with the condition (R), which tests for
recursion. Consider this pattern, which matches text in angle brack-
ets, allowing for arbitrary nesting. Only digits are allowed in nested
@@ -8317,20 +8493,21 @@ RECURSIVE PATTERNS
^(.)(\1|a(?2))
This pattern matches "bab". The first capturing parentheses match "b",
- then in the second group, when the back reference \1 fails to match
- "b", the second alternative matches "a" and then recurses. In the
- recursion, \1 does now match "b" and so the whole match succeeds. This
- match used to fail in Perl, but in later versions (I tried 5.024) it
- now works.
+ then in the second group, when the backreference \1 fails to match "b",
+ the second alternative matches "a" and then recurses. In the recursion,
+ \1 does now match "b" and so the whole match succeeds. This match used
+ to fail in Perl, but in later versions (I tried 5.024) it now works.
SUBPATTERNS AS SUBROUTINES
- If the syntax for a recursive subpattern call (either by number or by
- name) is used outside the parentheses to which it refers, it operates
- like a subroutine in a programming language. The called subpattern may
- be defined before or after the reference. A numbered reference can be
- absolute or relative, as in these examples:
+ If the syntax for a recursive subpattern call (either by number or by
+ name) is used outside the parentheses to which it refers, it operates a
+ bit like a subroutine in a programming language. More accurately, PCRE2
+ treats the referenced subpattern as an independent subpattern which it
+ tries to match at the current matching position. The called subpattern
+ may be defined before or after the reference. A numbered reference can
+ be absolute or relative, as in these examples:
(...(absolute)...)...(?2)...
(...(relative)...)...(?-1)...
@@ -8340,102 +8517,106 @@ SUBPATTERNS AS SUBROUTINES
(sens|respons)e and \1ibility
- matches "sense and sensibility" and "response and responsibility", but
+ matches "sense and sensibility" and "response and responsibility", but
not "sense and responsibility". If instead the pattern
(sens|respons)e and (?1)ibility
- is used, it does match "sense and responsibility" as well as the other
- two strings. Another example is given in the discussion of DEFINE
+ is used, it does match "sense and responsibility" as well as the other
+ two strings. Another example is given in the discussion of DEFINE
above.
- Like recursions, subroutine calls used to be treated as atomic, but
- this changed at PCRE2 release 10.30, so backtracking into subroutine
- calls can now occur. However, any capturing parentheses that are set
+ Like recursions, subroutine calls used to be treated as atomic, but
+ this changed at PCRE2 release 10.30, so backtracking into subroutine
+ calls can now occur. However, any capturing parentheses that are set
during the subroutine call revert to their previous values afterwards.
- Processing options such as case-independence are fixed when a subpat-
- tern is defined, so if it is used as a subroutine, such options cannot
+ Processing options such as case-independence are fixed when a subpat-
+ tern is defined, so if it is used as a subroutine, such options cannot
be changed for different calls. For example, consider this pattern:
(abc)(?i:(?-1))
- It matches "abcabc". It does not match "abcABC" because the change of
+ It matches "abcabc". It does not match "abcABC" because the change of
processing option does not affect the called subpattern.
+ The behaviour of backtracking control verbs in subpatterns when called
+ as subroutines is described in the section entitled "Backtracking verbs
+ in subroutines" below.
+
ONIGURUMA SUBROUTINE SYNTAX
- For compatibility with Oniguruma, the non-Perl syntax \g followed by a
+ For compatibility with Oniguruma, the non-Perl syntax \g followed by a
name or a number enclosed either in angle brackets or single quotes, is
- an alternative syntax for referencing a subpattern as a subroutine,
- possibly recursively. Here are two of the examples used above, rewrit-
+ an alternative syntax for referencing a subpattern as a subroutine,
+ possibly recursively. Here are two of the examples used above, rewrit-
ten using this syntax:
(?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
(sens|respons)e and \g'1'ibility
- PCRE2 supports an extension to Oniguruma: if a number is preceded by a
+ PCRE2 supports an extension to Oniguruma: if a number is preceded by a
plus or a minus sign it is taken as a relative reference. For example:
(abc)(?i:\g<-1>)
- Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
- synonymous. The former is a back reference; the latter is a subroutine
+ Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
+ synonymous. The former is a backreference; the latter is a subroutine
call.
CALLOUTS
Perl has a feature whereby using the sequence (?{...}) causes arbitrary
- Perl code to be obeyed in the middle of matching a regular expression.
+ Perl code to be obeyed in the middle of matching a regular expression.
This makes it possible, amongst other things, to extract different sub-
strings that match the same pair of parentheses when there is a repeti-
tion.
- PCRE2 provides a similar feature, but of course it cannot obey arbi-
- trary Perl code. The feature is called "callout". The caller of PCRE2
- provides an external function by putting its entry point in a match
- context using the function pcre2_set_callout(), and then passing that
- context to pcre2_match() or pcre2_dfa_match(). If no match context is
+ PCRE2 provides a similar feature, but of course it cannot obey arbi-
+ trary Perl code. The feature is called "callout". The caller of PCRE2
+ provides an external function by putting its entry point in a match
+ context using the function pcre2_set_callout(), and then passing that
+ context to pcre2_match() or pcre2_dfa_match(). If no match context is
passed, or if the callout entry point is set to NULL, callouts are dis-
abled.
- Within a regular expression, (?C<arg>) indicates a point at which the
- external function is to be called. There are two kinds of callout:
- those with a numerical argument and those with a string argument. (?C)
- on its own with no argument is treated as (?C0). A numerical argument
- allows the application to distinguish between different callouts.
- String arguments were added for release 10.20 to make it possible for
- script languages that use PCRE2 to embed short scripts within patterns
+ Within a regular expression, (?C<arg>) indicates a point at which the
+ external function is to be called. There are two kinds of callout:
+ those with a numerical argument and those with a string argument. (?C)
+ on its own with no argument is treated as (?C0). A numerical argument
+ allows the application to distinguish between different callouts.
+ String arguments were added for release 10.20 to make it possible for
+ script languages that use PCRE2 to embed short scripts within patterns
in a similar way to Perl.
During matching, when PCRE2 reaches a callout point, the external func-
- tion is called. It is provided with the number or string argument of
- the callout, the position in the pattern, and one item of data that is
+ tion is called. It is provided with the number or string argument of
+ the callout, the position in the pattern, and one item of data that is
also set in the match block. The callout function may cause matching to
proceed, to backtrack, or to fail.
- By default, PCRE2 implements a number of optimizations at matching
- time, and one side-effect is that sometimes callouts are skipped. If
- you need all possible callouts to happen, you need to set options that
- disable the relevant optimizations. More details, including a complete
- description of the programming interface to the callout function, are
+ By default, PCRE2 implements a number of optimizations at matching
+ time, and one side-effect is that sometimes callouts are skipped. If
+ you need all possible callouts to happen, you need to set options that
+ disable the relevant optimizations. More details, including a complete
+ description of the programming interface to the callout function, are
given in the pcre2callout documentation.
Callouts with numerical arguments
- If you just want to have a means of identifying different callout
- points, put a number less than 256 after the letter C. For example,
+ If you just want to have a means of identifying different callout
+ points, put a number less than 256 after the letter C. For example,
this pattern has two callout points:
(?C1)abc(?C2)def
- If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical
- callouts are automatically installed before each item in the pattern.
- They are all numbered 255. If there is a conditional group in the pat-
+ If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical
+ callouts are automatically installed before each item in the pattern.
+ They are all numbered 255. If there is a conditional group in the pat-
tern whose condition is an assertion, an additional callout is inserted
- just before the condition. An explicit callout may also be set at this
+ just before the condition. An explicit callout may also be set at this
position, as in this example:
(?(?C9)(?=a)abc|def)
@@ -8445,60 +8626,60 @@ CALLOUTS
Callouts with string arguments
- A delimited string may be used instead of a number as a callout argu-
- ment. The starting delimiter must be one of ` ' " ^ % # $ { and the
+ A delimited string may be used instead of a number as a callout argu-
+ ment. The starting delimiter must be one of ` ' " ^ % # $ { and the
ending delimiter is the same as the start, except for {, where the end-
- ing delimiter is }. If the ending delimiter is needed within the
+ ing delimiter is }. If the ending delimiter is needed within the
string, it must be doubled. For example:
(?C'ab ''c'' d')xyz(?C{any text})pqr
- The doubling is removed before the string is passed to the callout
+ The doubling is removed before the string is passed to the callout
function.
BACKTRACKING CONTROL
- There are a number of special "Backtracking Control Verbs" (to use
- Perl's terminology) that modify the behaviour of backtracking during
- matching. They are generally of the form (*VERB) or (*VERB:NAME). Some
- verbs take either form, possibly behaving differently depending on
+ There are a number of special "Backtracking Control Verbs" (to use
+ Perl's terminology) that modify the behaviour of backtracking during
+ matching. They are generally of the form (*VERB) or (*VERB:NAME). Some
+ verbs take either form, possibly behaving differently depending on
whether or not a name is present.
- By default, for compatibility with Perl, a name is any sequence of
+ By default, for compatibility with Perl, a name is any sequence of
characters that does not include a closing parenthesis. The name is not
- processed in any way, and it is not possible to include a closing
- parenthesis in the name. This can be changed by setting the
- PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati-
+ processed in any way, and it is not possible to include a closing
+ parenthesis in the name. This can be changed by setting the
+ PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati-
ble.
- When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to
- verb names and only an unescaped closing parenthesis terminates the
- name. However, the only backslash items that are permitted are \Q, \E,
- and sequences such as \x{100} that define character code points. Char-
+ When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to
+ verb names and only an unescaped closing parenthesis terminates the
+ name. However, the only backslash items that are permitted are \Q, \E,
+ and sequences such as \x{100} that define character code points. Char-
acter type escapes such as \d are faulted.
A closing parenthesis can be included in a name either as \) or between
- \Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED
- option is also set, unescaped whitespace in verb names is skipped, and
- #-comments are recognized, exactly as in the rest of the pattern.
- PCRE2_EXTENDED does not affect verb names unless PCRE2_ALT_VERBNAMES is
- also set.
-
- The maximum length of a name is 255 in the 8-bit library and 65535 in
- the 16-bit and 32-bit libraries. If the name is empty, that is, if the
- closing parenthesis immediately follows the colon, the effect is as if
+ \Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED
+ or PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb
+ names is skipped, and #-comments are recognized, exactly as in the rest
+ of the pattern. PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect
+ verb names unless PCRE2_ALT_VERBNAMES is also set.
+
+ The maximum length of a name is 255 in the 8-bit library and 65535 in
+ the 16-bit and 32-bit libraries. If the name is empty, that is, if the
+ closing parenthesis immediately follows the colon, the effect is as if
the colon were not there. Any number of these verbs may occur in a pat-
tern.
- Since these verbs are specifically related to backtracking, most of
- them can be used only when the pattern is to be matched using the tra-
+ Since these verbs are specifically related to backtracking, most of
+ them can be used only when the pattern is to be matched using the tra-
ditional matching function, because that uses a backtracking algorithm.
- With the exception of (*FAIL), which behaves like a failing negative
+ With the exception of (*FAIL), which behaves like a failing negative
assertion, the backtracking control verbs cause an error if encountered
by the DFA matching function.
- The behaviour of these verbs in repeated groups, assertions, and in
+ The behaviour of these verbs in repeated groups, assertions, and in
subpatterns called as subroutines (whether or not recursively) is docu-
mented below.
@@ -8506,24 +8687,23 @@ BACKTRACKING CONTROL
PCRE2 contains some optimizations that are used to speed up matching by
running some checks at the start of each match attempt. For example, it
- may know the minimum length of matching subject, or that a particular
+ may know the minimum length of matching subject, or that a particular
character must be present. When one of these optimizations bypasses the
- running of a match, any included backtracking verbs will not, of
+ running of a match, any included backtracking verbs will not, of
course, be processed. You can suppress the start-of-match optimizations
- by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com-
- pile(), or by starting the pattern with (*NO_START_OPT). There is more
+ by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com-
+ pile(), or by starting the pattern with (*NO_START_OPT). There is more
discussion of this option in the section entitled "Compiling a pattern"
in the pcre2api documentation.
- Experiments with Perl suggest that it too has similar optimizations,
- sometimes leading to anomalous results.
+ Experiments with Perl suggest that it too has similar optimizations,
+ and like PCRE2, turning them off can change the result of a match.
Verbs that act immediately
- The following verbs act as soon as they are encountered. They may not
- be followed by a name.
+ The following verbs act as soon as they are encountered.
- (*ACCEPT)
+ (*ACCEPT) or (*ACCEPT:NAME)
This verb causes the match to end successfully, skipping the remainder
of the pattern. However, when it is inside a subpattern that is called
@@ -8540,38 +8720,49 @@ BACKTRACKING CONTROL
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
tured by the outer parentheses.
- (*FAIL) or (*F)
+ (*FAIL) or (*FAIL:NAME)
This verb causes a matching failure, forcing backtracking to occur. It
- is equivalent to (?!) but easier to read. The Perl documentation notes
- that it is probably useful only when combined with (?{}) or (??{}).
- Those are, of course, Perl features that are not present in PCRE2. The
- nearest equivalent is the callout feature, as for example in this pat-
- tern:
+ may be abbreviated to (*F). It is equivalent to (?!) but easier to
+ read. The Perl documentation notes that it is probably useful only when
+ combined with (?{}) or (??{}). Those are, of course, Perl features that
+ are not present in PCRE2. The nearest equivalent is the callout fea-
+ ture, as for example in this pattern:
a+(?C)(*FAIL)
A match with the string "aaaa" always fails, but the callout is taken
before each backtrack happens (in this example, 10 times).
+ (*ACCEPT:NAME) and (*FAIL:NAME) behave exactly the same as
+ (*MARK:NAME)(*ACCEPT) and (*MARK:NAME)(*FAIL), respectively.
+
Recording which path was taken
- There is one verb whose main purpose is to track how a match was
- arrived at, though it also has a secondary use in conjunction with
+ There is one verb whose main purpose is to track how a match was
+ arrived at, though it also has a secondary use in conjunction with
advancing the match starting point (see (*SKIP) below).
(*MARK:NAME) or (*:NAME)
- A name is always required with this verb. There may be as many
- instances of (*MARK) as you like in a pattern, and their names do not
+ A name is always required with this verb. There may be as many
+ instances of (*MARK) as you like in a pattern, and their names do not
have to be unique.
- When a match succeeds, the name of the last-encountered (*MARK:NAME),
- (*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed back to
- the caller as described in the section entitled "Other information
- about the match" in the pcre2api documentation. Here is an example of
- pcre2test output, where the "mark" modifier requests the retrieval and
- outputting of (*MARK) data:
+ When a match succeeds, the name of the last-encountered (*MARK:NAME) on
+ the matching path is passed back to the caller as described in the sec-
+ tion entitled "Other information about the match" in the pcre2api docu-
+ mentation. This applies to all instances of (*MARK), including those
+ inside assertions and atomic groups. (There are differences in those
+ cases when (*MARK) is used in conjunction with (*SKIP) as described
+ below.)
+
+ As well as (*MARK), the (*COMMIT), (*PRUNE) and (*THEN) verbs may have
+ associated NAME arguments. Whichever is last on the matching path is
+ passed back. See below for more details of these other verbs.
+
+ Here is an example of pcre2test output, where the "mark" modifier
+ requests the retrieval and outputting of (*MARK) data:
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
data> XY
@@ -8582,72 +8773,76 @@ BACKTRACKING CONTROL
MK: B
The (*MARK) name is tagged with "MK:" in this output, and in this exam-
- ple it indicates which of the two alternatives matched. This is a more
- efficient way of obtaining this information than putting each alterna-
+ ple it indicates which of the two alternatives matched. This is a more
+ efficient way of obtaining this information than putting each alterna-
tive in its own capturing parentheses.
- If a verb with a name is encountered in a positive assertion that is
- true, the name is recorded and passed back if it is the last-encoun-
+ If a verb with a name is encountered in a positive assertion that is
+ true, the name is recorded and passed back if it is the last-encoun-
tered. This does not happen for negative assertions or failing positive
assertions.
- After a partial match or a failed match, the last encountered name in
+ After a partial match or a failed match, the last encountered name in
the entire match process is returned. For example:
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
data> XP
No match, mark = B
- Note that in this unanchored example the mark is retained from the
+ Note that in this unanchored example the mark is retained from the
match attempt that started at the letter "X" in the subject. Subsequent
match attempts starting at "P" and then with an empty string do not get
as far as the (*MARK) item, but nevertheless do not reset it.
- If you are interested in (*MARK) values after failed matches, you
- should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to
+ If you are interested in (*MARK) values after failed matches, you
+ should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to
ensure that the match is always attempted.
Verbs that act after backtracking
The following verbs do nothing when they are encountered. Matching con-
- tinues with what follows, but if there is no subsequent match, causing
- a backtrack to the verb, a failure is forced. That is, backtracking
- cannot pass to the left of the verb. However, when one of these verbs
- appears inside an atomic group or in an assertion that is true, its
- effect is confined to that group, because once the group has been
- matched, there is never any backtracking into it. In this situation,
- backtracking has to jump to the left of the entire atomic group or
- assertion.
-
- These verbs differ in exactly what kind of failure occurs when back-
- tracking reaches them. The behaviour described below is what happens
- when the verb is not in a subroutine or an assertion. Subsequent sec-
+ tinues with what follows, but if there is a subsequent match failure,
+ causing a backtrack to the verb, a failure is forced. That is, back-
+ tracking cannot pass to the left of the verb. However, when one of
+ these verbs appears inside an atomic group or in a lookaround assertion
+ that is true, its effect is confined to that group, because once the
+ group has been matched, there is never any backtracking into it. Back-
+ tracking from beyond an assertion or an atomic group ignores the entire
+ group, and seeks a preceeding backtracking point.
+
+ These verbs differ in exactly what kind of failure occurs when back-
+ tracking reaches them. The behaviour described below is what happens
+ when the verb is not in a subroutine or an assertion. Subsequent sec-
tions cover these special cases.
- (*COMMIT)
+ (*COMMIT) or (*COMMIT:NAME)
- This verb, which may not be followed by a name, causes the whole match
- to fail outright if there is a later matching failure that causes back-
- tracking to reach it. Even if the pattern is unanchored, no further
- attempts to find a match by advancing the starting point take place. If
- (*COMMIT) is the only backtracking verb that is encountered, once it
- has been passed pcre2_match() is committed to finding a match at the
- current starting point, or not at all. For example:
+ This verb causes the whole match to fail outright if there is a later
+ matching failure that causes backtracking to reach it. Even if the pat-
+ tern is unanchored, no further attempts to find a match by advancing
+ the starting point take place. If (*COMMIT) is the only backtracking
+ verb that is encountered, once it has been passed pcre2_match() is com-
+ mitted to finding a match at the current starting point, or not at all.
+ For example:
a+(*COMMIT)b
- This matches "xxaab" but not "aacaab". It can be thought of as a kind
- of dynamic anchor, or "I've started, so I must finish." The name of the
- most recently passed (*MARK) in the path is passed back when (*COMMIT)
- forces a match failure.
+ This matches "xxaab" but not "aacaab". It can be thought of as a kind
+ of dynamic anchor, or "I've started, so I must finish."
- If there is more than one backtracking verb in a pattern, a different
- one that follows (*COMMIT) may be triggered first, so merely passing
+ The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COM-
+ MIT). It is like (*MARK:NAME) in that the name is remembered for pass-
+ ing back to the caller. However, (*SKIP:NAME) searches only for names
+ set with (*MARK), ignoring those set by (*COMMIT), (*PRUNE) and
+ (*THEN).
+
+ If there is more than one backtracking verb in a pattern, a different
+ one that follows (*COMMIT) may be triggered first, so merely passing
(*COMMIT) during a match does not always guarantee that a match must be
at this starting point.
- Note that (*COMMIT) at the start of a pattern is not the same as an
- anchor, unless PCRE2's start-of-match optimizations are turned off, as
+ Note that (*COMMIT) at the start of a pattern is not the same as an
+ anchor, unless PCRE2's start-of-match optimizations are turned off, as
shown in this output from pcre2test:
re> /(*COMMIT)abc/
@@ -8658,40 +8853,41 @@ BACKTRACKING CONTROL
data> xyzabc
No match
- For the first pattern, PCRE2 knows that any match must start with "a",
- so the optimization skips along the subject to "a" before applying the
- pattern to the first set of data. The match attempt then succeeds. The
- second pattern disables the optimization that skips along to the first
- character. The pattern is now applied starting at "x", and so the
- (*COMMIT) causes the match to fail without trying any other starting
+ For the first pattern, PCRE2 knows that any match must start with "a",
+ so the optimization skips along the subject to "a" before applying the
+ pattern to the first set of data. The match attempt then succeeds. The
+ second pattern disables the optimization that skips along to the first
+ character. The pattern is now applied starting at "x", and so the
+ (*COMMIT) causes the match to fail without trying any other starting
points.
(*PRUNE) or (*PRUNE:NAME)
- This verb causes the match to fail at the current starting position in
+ This verb causes the match to fail at the current starting position in
the subject if there is a later matching failure that causes backtrack-
- ing to reach it. If the pattern is unanchored, the normal "bumpalong"
- advance to the next starting character then happens. Backtracking can
- occur as usual to the left of (*PRUNE), before it is reached, or when
- matching to the right of (*PRUNE), but if there is no match to the
- right, backtracking cannot cross (*PRUNE). In simple cases, the use of
- (*PRUNE) is just an alternative to an atomic group or possessive quan-
+ ing to reach it. If the pattern is unanchored, the normal "bumpalong"
+ advance to the next starting character then happens. Backtracking can
+ occur as usual to the left of (*PRUNE), before it is reached, or when
+ matching to the right of (*PRUNE), but if there is no match to the
+ right, backtracking cannot cross (*PRUNE). In simple cases, the use of
+ (*PRUNE) is just an alternative to an atomic group or possessive quan-
tifier, but there are some uses of (*PRUNE) that cannot be expressed in
- any other way. In an anchored pattern (*PRUNE) has the same effect as
+ any other way. In an anchored pattern (*PRUNE) has the same effect as
(*COMMIT).
The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE).
It is like (*MARK:NAME) in that the name is remembered for passing back
- to the caller. However, (*SKIP:NAME) searches only for names set with
- (*MARK), ignoring those set by (*PRUNE) or (*THEN).
+ to the caller. However, (*SKIP:NAME) searches only for names set with
+ (*MARK), ignoring those set by (*COMMIT), (*PRUNE) or (*THEN).
(*SKIP)
- This verb, when given without a name, is like (*PRUNE), except that if
- the pattern is unanchored, the "bumpalong" advance is not to the next
+ This verb, when given without a name, is like (*PRUNE), except that if
+ the pattern is unanchored, the "bumpalong" advance is not to the next
character, but to the position in the subject where (*SKIP) was encoun-
- tered. (*SKIP) signifies that whatever text was matched leading up to
- it cannot be part of a successful match. Consider:
+ tered. (*SKIP) signifies that whatever text was matched leading up to
+ it cannot be part of a successful match if there is a later mismatch.
+ Consider:
a+(*SKIP)b
@@ -8705,15 +8901,41 @@ BACKTRACKING CONTROL
(*SKIP:NAME)
- When (*SKIP) has an associated name, its behaviour is modified. When it
- is triggered, the previous path through the pattern is searched for the
- most recent (*MARK) that has the same name. If one is found, the
- "bumpalong" advance is to the subject position that corresponds to that
- (*MARK) instead of to where (*SKIP) was encountered. If no (*MARK) with
- a matching name is found, the (*SKIP) is ignored.
-
- Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
- ignores names that are set by (*PRUNE:NAME) or (*THEN:NAME).
+ When (*SKIP) has an associated name, its behaviour is modified. When
+ such a (*SKIP) is triggered, the previous path through the pattern is
+ searched for the most recent (*MARK) that has the same name. If one is
+ found, the "bumpalong" advance is to the subject position that corre-
+ sponds to that (*MARK) instead of to where (*SKIP) was encountered. If
+ no (*MARK) with a matching name is found, the (*SKIP) is ignored.
+
+ The search for a (*MARK) name uses the normal backtracking mechanism,
+ which means that it does not see (*MARK) settings that are inside
+ atomic groups or assertions, because they are never re-entered by back-
+ tracking. Compare the following pcre2test examples:
+
+ re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/
+ data: abc
+ 0: a
+ 1: a
+ data:
+ re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/
+ data: abc
+ 0: b
+ 1: b
+
+ In the first example, the (*MARK) setting is in an atomic group, so it
+ is not seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored.
+ This allows the second branch of the pattern to be tried at the first
+ character position. In the second example, the (*MARK) setting is not
+ in an atomic group. This allows (*SKIP:X) to find the (*MARK) when it
+ backtracks, and this causes a new matching attempt to start at the sec-
+ ond character. This time, the (*MARK) is never seen because "a" does
+ not match "b", so the matcher immediately jumps to the second branch of
+ the pattern.
+
+ Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
+ ignores names that are set by (*COMMIT:NAME), (*PRUNE:NAME) or
+ (*THEN:NAME).
(*THEN) or (*THEN:NAME)
@@ -8732,87 +8954,87 @@ BACKTRACKING CONTROL
track to whatever came before the entire group. If (*THEN) is not
inside an alternation, it acts like (*PRUNE).
- The behaviour of (*THEN:NAME) is the not the same as
- (*MARK:NAME)(*THEN). It is like (*MARK:NAME) in that the name is
- remembered for passing back to the caller. However, (*SKIP:NAME)
- searches only for names set with (*MARK), ignoring those set by
- (*PRUNE) and (*THEN).
-
- A subpattern that does not contain a | character is just a part of the
- enclosing alternative; it is not a nested alternation with only one
- alternative. The effect of (*THEN) extends beyond such a subpattern to
- the enclosing alternative. Consider this pattern, where A, B, etc. are
- complex pattern fragments that do not contain any | characters at this
+ The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN).
+ It is like (*MARK:NAME) in that the name is remembered for passing back
+ to the caller. However, (*SKIP:NAME) searches only for names set with
+ (*MARK), ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
+
+ A subpattern that does not contain a | character is just a part of the
+ enclosing alternative; it is not a nested alternation with only one
+ alternative. The effect of (*THEN) extends beyond such a subpattern to
+ the enclosing alternative. Consider this pattern, where A, B, etc. are
+ complex pattern fragments that do not contain any | characters at this
level:
A (B(*THEN)C) | D
- If A and B are matched, but there is a failure in C, matching does not
+ If A and B are matched, but there is a failure in C, matching does not
backtrack into A; instead it moves to the next alternative, that is, D.
- However, if the subpattern containing (*THEN) is given an alternative,
+ However, if the subpattern containing (*THEN) is given an alternative,
it behaves differently:
A (B(*THEN)C | (*FAIL)) | D
- The effect of (*THEN) is now confined to the inner subpattern. After a
+ The effect of (*THEN) is now confined to the inner subpattern. After a
failure in C, matching moves to (*FAIL), which causes the whole subpat-
- tern to fail because there are no more alternatives to try. In this
+ tern to fail because there are no more alternatives to try. In this
case, matching does now backtrack into A.
- Note that a conditional subpattern is not considered as having two
- alternatives, because only one is ever used. In other words, the |
+ Note that a conditional subpattern is not considered as having two
+ alternatives, because only one is ever used. In other words, the |
character in a conditional subpattern has a different meaning. Ignoring
white space, consider:
^.*? (?(?=a) a | b(*THEN)c )
- If the subject is "ba", this pattern does not match. Because .*? is
- ungreedy, it initially matches zero characters. The condition (?=a)
- then fails, the character "b" is matched, but "c" is not. At this
- point, matching does not backtrack to .*? as might perhaps be expected
- from the presence of the | character. The conditional subpattern is
+ If the subject is "ba", this pattern does not match. Because .*? is
+ ungreedy, it initially matches zero characters. The condition (?=a)
+ then fails, the character "b" is matched, but "c" is not. At this
+ point, matching does not backtrack to .*? as might perhaps be expected
+ from the presence of the | character. The conditional subpattern is
part of the single alternative that comprises the whole pattern, and so
- the match fails. (If there was a backtrack into .*?, allowing it to
+ the match fails. (If there was a backtrack into .*?, allowing it to
match "b", the match would succeed.)
- The verbs just described provide four different "strengths" of control
+ The verbs just described provide four different "strengths" of control
when subsequent matching fails. (*THEN) is the weakest, carrying on the
- match at the next alternative. (*PRUNE) comes next, failing the match
- at the current starting position, but allowing an advance to the next
- character (for an unanchored pattern). (*SKIP) is similar, except that
+ match at the next alternative. (*PRUNE) comes next, failing the match
+ at the current starting position, but allowing an advance to the next
+ character (for an unanchored pattern). (*SKIP) is similar, except that
the advance may be more than one character. (*COMMIT) is the strongest,
causing the entire match to fail.
More than one backtracking verb
- If more than one backtracking verb is present in a pattern, the one
- that is backtracked onto first acts. For example, consider this pat-
+ If more than one backtracking verb is present in a pattern, the one
+ that is backtracked onto first acts. For example, consider this pat-
tern, where A, B, etc. are complex pattern fragments:
(A(*COMMIT)B(*THEN)C|ABD)
- If A matches but B fails, the backtrack to (*COMMIT) causes the entire
+ If A matches but B fails, the backtrack to (*COMMIT) causes the entire
match to fail. However, if A and B match, but C fails, the backtrack to
- (*THEN) causes the next alternative (ABD) to be tried. This behaviour
- is consistent, but is not always the same as Perl's. It means that if
- two or more backtracking verbs appear in succession, all the the last
+ (*THEN) causes the next alternative (ABD) to be tried. This behaviour
+ is consistent, but is not always the same as Perl's. It means that if
+ two or more backtracking verbs appear in succession, all the the last
of them has no effect. Consider this example:
...(*COMMIT)(*PRUNE)...
If there is a matching failure to the right, backtracking onto (*PRUNE)
- causes it to be triggered, and its action is taken. There can never be
+ causes it to be triggered, and its action is taken. There can never be
a backtrack onto (*COMMIT).
Backtracking verbs in repeated groups
- PCRE2 differs from Perl in its handling of backtracking verbs in
- repeated groups. For example, consider:
+ PCRE2 sometimes differs from Perl in its handling of backtracking verbs
+ in repeated groups. For example, consider:
/(a(*COMMIT)b)+ac/
- If the subject is "abac", Perl matches, but PCRE2 fails because the
- (*COMMIT) in the second repeat of the group acts.
+ If the subject is "abac", Perl matches unless its optimizations are
+ disabled, but PCRE2 always fails because the (*COMMIT) in the second
+ repeat of the group acts.
Backtracking verbs in assertions
@@ -8822,44 +9044,56 @@ BACKTRACKING CONTROL
in a conditional subpattern.
(*ACCEPT) in a standalone positive assertion causes the assertion to
- succeed without any further processing; captured strings are retained.
- In a standalone negative assertion, (*ACCEPT) causes the assertion to
- fail without any further processing; captured substrings are discarded.
+ succeed without any further processing; captured strings and a (*MARK)
+ name (if set) are retained. In a standalone negative assertion,
+ (*ACCEPT) causes the assertion to fail without any further processing;
+ captured substrings and any (*MARK) name are discarded.
- If the assertion is a condition, (*ACCEPT) causes the condition to be
- true for a positive assertion and false for a negative one; captured
+ If the assertion is a condition, (*ACCEPT) causes the condition to be
+ true for a positive assertion and false for a negative one; captured
substrings are retained in both cases.
+ The remaining verbs act only when a later failure causes a backtrack to
+ reach them. This means that their effect is confined to the assertion,
+ because lookaround assertions are atomic. A backtrack that occurs after
+ an assertion is complete does not jump back into the assertion. Note in
+ particular that a (*MARK) name that is set in an assertion is not
+ "seen" by an instance of (*SKIP:NAME) latter in the pattern.
+
The effect of (*THEN) is not allowed to escape beyond an assertion. If
there are no more branches to try, (*THEN) causes a positive assertion
to be false, and a negative assertion to be true.
The other backtracking verbs are not treated specially if they appear
in a standalone positive assertion. In a conditional positive asser-
- tion, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes the con-
- dition to be false. However, for both standalone and conditional nega-
- tive assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE)
- causes the assertion to be true, without considering any further alter-
- native branches.
+ tion, backtracking (from within the assertion) into (*COMMIT), (*SKIP),
+ or (*PRUNE) causes the condition to be false. However, for both stand-
+ alone and conditional negative assertions, backtracking into (*COMMIT),
+ (*SKIP), or (*PRUNE) causes the assertion to be true, without consider-
+ ing any further alternative branches.
Backtracking verbs in subroutines
These behaviours occur whether or not the subpattern is called recur-
- sively. Perl's treatment of subroutines is different in some cases.
+ sively.
- (*FAIL) in a subpattern called as a subroutine has its normal effect:
- it forces an immediate backtrack.
+ (*ACCEPT) in a subpattern called as a subroutine causes the subroutine
+ match to succeed without any further processing. Matching then contin-
+ ues after the subroutine call. Perl documents this behaviour. Perl's
+ treatment of the other verbs in subroutines is different in some cases.
- (*ACCEPT) in a subpattern called as a subroutine causes the subroutine
- match to succeed without any further processing. Matching then contin-
- ues after the subroutine call.
+ (*FAIL) in a subpattern called as a subroutine has its normal effect:
+ it forces an immediate backtrack.
- (*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine
- cause the subroutine match to fail.
+ (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail
+ when triggered by being backtracked to in a subpattern called as a sub-
+ routine. There is then a backtrack at the outer level.
- (*THEN) skips to the next alternative in the innermost enclosing group
- within the subpattern that has alternatives. If there is no such group
- within the subpattern, (*THEN) causes the subroutine match to fail.
+ (*THEN), when triggered, skips to the next alternative in the innermost
+ enclosing group within the subpattern that has alternatives (its normal
+ behaviour). However, if there is no such group within the subroutine
+ subpattern, the subroutine match fails and there is a backtrack at the
+ outer level.
SEE ALSO
@@ -8877,8 +9111,8 @@ AUTHOR
REVISION
- Last updated: 12 September 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 04 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -8922,176 +9156,183 @@ COMPILED PATTERN MEMORY USAGE
((ab){1,1000}c){1,3}
- uses over 50K bytes when compiled using the 8-bit library. When PCRE2
- is compiled with its default internal pointer size of two bytes, the
- size limit on a compiled pattern is 64K code units in the 8-bit and
- 16-bit libraries, and this is reached with the above pattern if the
- outer repetition is increased from 3 to 4. PCRE2 can be compiled to use
- larger internal pointers and thus handle larger compiled patterns, but
- it is better to try to rewrite your pattern to use less memory if you
- can.
+ uses over 50KiB when compiled using the 8-bit library. When PCRE2 is
+ compiled with its default internal pointer size of two bytes, the size
+ limit on a compiled pattern is 65535 code units in the 8-bit and 16-bit
+ libraries, and this is reached with the above pattern if the outer rep-
+ etition is increased from 3 to 4. PCRE2 can be compiled to use larger
+ internal pointers and thus handle larger compiled patterns, but it is
+ better to try to rewrite your pattern to use less memory if you can.
- One way of reducing the memory usage for such patterns is to make use
+ One way of reducing the memory usage for such patterns is to make use
of PCRE2's "subroutine" facility. Re-writing the above pattern as
((ab)(?2){0,999}c)(?1){0,2}
- reduces the memory requirements to around 16K, and indeed it remains
- under 20K even with the outer repetition increased to 100. However,
+ reduces the memory requirements to around 16KiB, and indeed it remains
+ under 20KiB even with the outer repetition increased to 100. However,
this kind of pattern is not always exactly equivalent, because any cap-
- tures within subroutine calls are lost when the subroutine completes.
- If this is not a problem, this kind of rewriting will allow you to
- process patterns that PCRE2 cannot otherwise handle. The matching per-
- formance of the two different versions of the pattern are roughly the
- same. (This applies from release 10.30 - things were different in ear-
+ tures within subroutine calls are lost when the subroutine completes.
+ If this is not a problem, this kind of rewriting will allow you to
+ process patterns that PCRE2 cannot otherwise handle. The matching per-
+ formance of the two different versions of the pattern are roughly the
+ same. (This applies from release 10.30 - things were different in ear-
lier releases.)
STACK AND HEAP USAGE AT RUN TIME
From release 10.30, the interpretive (non-JIT) version of pcre2_match()
- uses very little system stack at run time. In earlier releases recur-
- sive function calls could use a great deal of stack, and this could
- cause problems, but this usage has been eliminated. Backtracking posi-
- tions are now explicitly remembered in memory frames controlled by the
- code. An initial 20K vector of frames is allocated on the system stack
- (enough for about 100 frames for small patterns), but if this is insuf-
- ficient, heap memory is used. The amount of heap memory can be limited;
- if the limit is set to zero, only the initial stack vector is used.
- Rewriting patterns to be time-efficient, as described below, may also
- reduce the memory requirements.
-
- In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
- function calls, but only for processing atomic groups, lookaround
- assertions, and recursion within the pattern. Too much nested recursion
- may cause stack issues. The "match depth" parameter can be used to
- limit the depth of function recursion in pcre2_dfa_match().
+ uses very little system stack at run time. In earlier releases recur-
+ sive function calls could use a great deal of stack, and this could
+ cause problems, but this usage has been eliminated. Backtracking posi-
+ tions are now explicitly remembered in memory frames controlled by the
+ code. An initial 20KiB vector of frames is allocated on the system
+ stack (enough for about 100 frames for small patterns), but if this is
+ insufficient, heap memory is used. The amount of heap memory can be
+ limited; if the limit is set to zero, only the initial stack vector is
+ used. Rewriting patterns to be time-efficient, as described below, may
+ also reduce the memory requirements.
+
+ In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
+ function calls, but only for processing atomic groups, lookaround
+ assertions, and recursion within the pattern. The original version of
+ the code used to allocate quite large internal workspace vectors on the
+ stack, which caused some problems for some patterns in environments
+ with small stacks. From release 10.32 the code for pcre2_dfa_match()
+ has been re-factored to use heap memory when necessary for internal
+ workspace when recursing, though recursive function calls are still
+ used.
+
+ The "match depth" parameter can be used to limit the depth of function
+ recursion, and the "match heap" parameter to limit heap memory in
+ pcre2_dfa_match().
PROCESSING TIME
- Certain items in regular expression patterns are processed more effi-
+ Certain items in regular expression patterns are processed more effi-
ciently than others. It is more efficient to use a character class like
- [aeiou] than a set of single-character alternatives such as
- (a|e|i|o|u). In general, the simplest construction that provides the
+ [aeiou] than a set of single-character alternatives such as
+ (a|e|i|o|u). In general, the simplest construction that provides the
required behaviour is usually the most efficient. Jeffrey Friedl's book
- contains a lot of useful general discussion about optimizing regular
- expressions for efficient performance. This document contains a few
+ contains a lot of useful general discussion about optimizing regular
+ expressions for efficient performance. This document contains a few
observations about PCRE2.
- Using Unicode character properties (the \p, \P, and \X escapes) is
- slow, because PCRE2 has to use a multi-stage table lookup whenever it
- needs a character's property. If you can find an alternative pattern
+ Using Unicode character properties (the \p, \P, and \X escapes) is
+ slow, because PCRE2 has to use a multi-stage table lookup whenever it
+ needs a character's property. If you can find an alternative pattern
that does not use character properties, it will probably be faster.
- By default, the escape sequences \b, \d, \s, and \w, and the POSIX
- character classes such as [:alpha:] do not use Unicode properties,
+ By default, the escape sequences \b, \d, \s, and \w, and the POSIX
+ character classes such as [:alpha:] do not use Unicode properties,
partly for backwards compatibility, and partly for performance reasons.
- However, you can set the PCRE2_UCP option or start the pattern with
- (*UCP) if you want Unicode character properties to be used. This can
- double the matching time for items such as \d, when matched with
- pcre2_match(); the performance loss is less with a DFA matching func-
+ However, you can set the PCRE2_UCP option or start the pattern with
+ (*UCP) if you want Unicode character properties to be used. This can
+ double the matching time for items such as \d, when matched with
+ pcre2_match(); the performance loss is less with a DFA matching func-
tion, and in both cases there is not much difference for \b.
- When a pattern begins with .* not in atomic parentheses, nor in paren-
- theses that are the subject of a backreference, and the PCRE2_DOTALL
- option is set, the pattern is implicitly anchored by PCRE2, since it
- can match only at the start of a subject string. If the pattern has
+ When a pattern begins with .* not in atomic parentheses, nor in paren-
+ theses that are the subject of a backreference, and the PCRE2_DOTALL
+ option is set, the pattern is implicitly anchored by PCRE2, since it
+ can match only at the start of a subject string. If the pattern has
multiple top-level branches, they must all be anchorable. The optimiza-
- tion can be disabled by the PCRE2_NO_DOTSTAR_ANCHOR option, and is
+ tion can be disabled by the PCRE2_NO_DOTSTAR_ANCHOR option, and is
automatically disabled if the pattern contains (*PRUNE) or (*SKIP).
- If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization,
+ If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization,
because the dot metacharacter does not then match a newline, and if the
- subject string contains newlines, the pattern may match from the char-
+ subject string contains newlines, the pattern may match from the char-
acter immediately following one of them instead of from the very start.
For example, the pattern
.*second
- matches the subject "first\nand second" (where \n stands for a newline
- character), with the match starting at the seventh character. In order
- to do this, PCRE2 has to retry the match starting after every newline
+ matches the subject "first\nand second" (where \n stands for a newline
+ character), with the match starting at the seventh character. In order
+ to do this, PCRE2 has to retry the match starting after every newline
in the subject.
- If you are using such a pattern with subject strings that do not con-
- tain newlines, the best performance is obtained by setting
- PCRE2_DOTALL, or starting the pattern with ^.* or ^.*? to indicate
+ If you are using such a pattern with subject strings that do not con-
+ tain newlines, the best performance is obtained by setting
+ PCRE2_DOTALL, or starting the pattern with ^.* or ^.*? to indicate
explicit anchoring. That saves PCRE2 from having to scan along the sub-
ject looking for a newline to restart at.
- Beware of patterns that contain nested indefinite repeats. These can
- take a long time to run when applied to a string that does not match.
+ Beware of patterns that contain nested indefinite repeats. These can
+ take a long time to run when applied to a string that does not match.
Consider the pattern fragment
^(a+)*
- This can match "aaaa" in 16 different ways, and this number increases
- very rapidly as the string gets longer. (The * repeat can match 0, 1,
- 2, 3, or 4 times, and for each of those cases other than 0 or 4, the +
- repeats can match different numbers of times.) When the remainder of
- the pattern is such that the entire match is going to fail, PCRE2 has
- in principle to try every possible variation, and this can take an
+ This can match "aaaa" in 16 different ways, and this number increases
+ very rapidly as the string gets longer. (The * repeat can match 0, 1,
+ 2, 3, or 4 times, and for each of those cases other than 0 or 4, the +
+ repeats can match different numbers of times.) When the remainder of
+ the pattern is such that the entire match is going to fail, PCRE2 has
+ in principle to try every possible variation, and this can take an
extremely long time, even for relatively short strings.
An optimization catches some of the more simple cases such as
(a+)*b
- where a literal character follows. Before embarking on the standard
- matching procedure, PCRE2 checks that there is a "b" later in the sub-
- ject string, and if there is not, it fails the match immediately. How-
- ever, when there is no following literal this optimization cannot be
+ where a literal character follows. Before embarking on the standard
+ matching procedure, PCRE2 checks that there is a "b" later in the sub-
+ ject string, and if there is not, it fails the match immediately. How-
+ ever, when there is no following literal this optimization cannot be
used. You can see the difference by comparing the behaviour of
(a+)*\d
- with the pattern above. The former gives a failure almost instantly
- when applied to a whole line of "a" characters, whereas the latter
+ with the pattern above. The former gives a failure almost instantly
+ when applied to a whole line of "a" characters, whereas the latter
takes an appreciable time with strings longer than about 20 characters.
In many cases, the solution to this kind of performance issue is to use
- an atomic group or a possessive quantifier. This can often reduce mem-
+ an atomic group or a possessive quantifier. This can often reduce mem-
ory requirements as well. As another example, consider this pattern:
([^<]|<(?!inet))+
- It matches from wherever it starts until it encounters "<inet" or the
- end of the data, and is the kind of pattern that might be used when
+ It matches from wherever it starts until it encounters "<inet" or the
+ end of the data, and is the kind of pattern that might be used when
processing an XML file. Each iteration of the outer parentheses matches
- either one character that is not "<" or a "<" that is not followed by
- "inet". However, each time a parenthesis is processed, a backtracking
- position is passed, so this formulation uses a memory frame for each
+ either one character that is not "<" or a "<" that is not followed by
+ "inet". However, each time a parenthesis is processed, a backtracking
+ position is passed, so this formulation uses a memory frame for each
matched character. For a long string, a lot of memory is required. Con-
- sider now this rewritten pattern, which matches exactly the same
+ sider now this rewritten pattern, which matches exactly the same
strings:
([^<]++|<(?!inet))+
This runs much faster, because sequences of characters that do not con-
tain "<" are "swallowed" in one item inside the parentheses, and a pos-
- sessive quantifier is used to stop any backtracking into the runs of
- non-"<" characters. This version also uses a lot less memory because
- entry to a new set of parentheses happens only when a "<" character
- that is not followed by "inet" is encountered (and we assume this is
+ sessive quantifier is used to stop any backtracking into the runs of
+ non-"<" characters. This version also uses a lot less memory because
+ entry to a new set of parentheses happens only when a "<" character
+ that is not followed by "inet" is encountered (and we assume this is
relatively rare).
This example shows that one way of optimizing performance when matching
- long subject strings is to write repeated parenthesized subpatterns to
+ long subject strings is to write repeated parenthesized subpatterns to
match more than one character whenever possible.
SETTING RESOURCE LIMITS
- You can set limits on the amount of processing that takes place when
- matching, and on the amount of heap memory that is used. The default
+ You can set limits on the amount of processing that takes place when
+ matching, and on the amount of heap memory that is used. The default
values of the limits are very large, and unlikely ever to operate. They
- can be changed when PCRE2 is built, and they can also be set when
- pcre2_match() or pcre2_dfa_match() is called. For details of these
- interfaces, see the pcre2build documentation and the section entitled
+ can be changed when PCRE2 is built, and they can also be set when
+ pcre2_match() or pcre2_dfa_match() is called. For details of these
+ interfaces, see the pcre2build documentation and the section entitled
"The match context" in the pcre2api documentation.
- The pcre2test test program has a modifier called "find_limits" which,
- if applied to a subject line, causes it to find the smallest limits
+ The pcre2test test program has a modifier called "find_limits" which,
+ if applied to a subject line, causes it to find the smallest limits
that allow a pattern to match. This is done by repeatedly matching with
different limits.
@@ -9105,8 +9346,8 @@ AUTHOR
REVISION
- Last updated: 08 April 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 25 April 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -9223,54 +9464,53 @@ COMPILING A PATTERN
for matching, the nmatch and pmatch arguments are ignored, and no cap-
tured strings are returned. Versions of the PCRE library prior to 10.22
used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no
- longer happens because it disables the use of back references.
+ longer happens because it disables the use of backreferences.
REG_PEND
If this option is set, the reg_endp field in the preg structure (which
has the type const char *) must be set to point to the character beyond
the end of the pattern before calling regcomp(). The pattern itself may
- now contain binary zeroes, which are treated as data characters. With-
- out REG_PEND, a binary zero terminates the pattern and the re_endp
- field is ignored. This is a GNU extension to the POSIX standard and
- should be used with caution in software intended to be portable to
- other systems.
+ now contain binary zeros, which are treated as data characters. Without
+ REG_PEND, a binary zero terminates the pattern and the re_endp field is
+ ignored. This is a GNU extension to the POSIX standard and should be
+ used with caution in software intended to be portable to other systems.
REG_UCP
- The PCRE2_UCP option is set when the regular expression is passed for
- compilation to the native function. This causes PCRE2 to use Unicode
- properties when matchine \d, \w, etc., instead of just recognizing
+ The PCRE2_UCP option is set when the regular expression is passed for
+ compilation to the native function. This causes PCRE2 to use Unicode
+ properties when matchine \d, \w, etc., instead of just recognizing
ASCII values. Note that REG_UCP is not part of the POSIX standard.
REG_UNGREEDY
- The PCRE2_UNGREEDY option is set when the regular expression is passed
- for compilation to the native function. Note that REG_UNGREEDY is not
+ The PCRE2_UNGREEDY option is set when the regular expression is passed
+ for compilation to the native function. Note that REG_UNGREEDY is not
part of the POSIX standard.
REG_UTF
- The PCRE2_UTF option is set when the regular expression is passed for
- compilation to the native function. This causes the pattern itself and
- all data strings used for matching it to be treated as UTF-8 strings.
+ The PCRE2_UTF option is set when the regular expression is passed for
+ compilation to the native function. This causes the pattern itself and
+ all data strings used for matching it to be treated as UTF-8 strings.
Note that REG_UTF is not part of the POSIX standard.
- In the absence of these flags, no options are passed to the native
- function. This means the the regex is compiled with PCRE2 default
- semantics. In particular, the way it handles newline characters in the
- subject string is the Perl way, not the POSIX way. Note that setting
+ In the absence of these flags, no options are passed to the native
+ function. This means the the regex is compiled with PCRE2 default
+ semantics. In particular, the way it handles newline characters in the
+ subject string is the Perl way, not the POSIX way. Note that setting
PCRE2_MULTILINE has only some of the effects specified for REG_NEWLINE.
- It does not affect the way newlines are matched by the dot metacharac-
+ It does not affect the way newlines are matched by the dot metacharac-
ter (they are not) or by a negative class such as [^a] (they are).
- The yield of regcomp() is zero on success, and non-zero otherwise. The
- preg structure is filled in on success, and one other member of the
- structure (as well as re_endp) is public: re_nsub contains the number
+ The yield of regcomp() is zero on success, and non-zero otherwise. The
+ preg structure is filled in on success, and one other member of the
+ structure (as well as re_endp) is public: re_nsub contains the number
of capturing subpatterns in the regular expression. Various error codes
are defined in the header file.
- NOTE: If the yield of regcomp() is non-zero, you must not attempt to
+ NOTE: If the yield of regcomp() is non-zero, you must not attempt to
use the contents of the preg structure. If, for example, you pass it to
regexec(), the result is undefined and your program is likely to crash.
@@ -9278,9 +9518,9 @@ COMPILING A PATTERN
MATCHING NEWLINE CHARACTERS
This area is not simple, because POSIX and Perl take different views of
- things. It is not possible to get PCRE2 to obey POSIX semantics, but
+ things. It is not possible to get PCRE2 to obey POSIX semantics, but
then PCRE2 was never intended to be a POSIX engine. The following table
- lists the different possibilities for matching newline characters in
+ lists the different possibilities for matching newline characters in
Perl and PCRE2:
Default Change with
@@ -9301,25 +9541,25 @@ MATCHING NEWLINE CHARACTERS
$ matches \n in middle no REG_NEWLINE
^ matches \n in middle no REG_NEWLINE
- This behaviour is not what happens when PCRE2 is called via its POSIX
- API. By default, PCRE2's behaviour is the same as Perl's, except that
- there is no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2
+ This behaviour is not what happens when PCRE2 is called via its POSIX
+ API. By default, PCRE2's behaviour is the same as Perl's, except that
+ there is no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2
and Perl, there is no way to stop newline from matching [^a].
- Default POSIX newline handling can be obtained by setting PCRE2_DOTALL
- and PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but
- there is no way to make PCRE2 behave exactly as for the REG_NEWLINE
- action. When using the POSIX API, passing REG_NEWLINE to PCRE2's reg-
+ Default POSIX newline handling can be obtained by setting PCRE2_DOTALL
+ and PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but
+ there is no way to make PCRE2 behave exactly as for the REG_NEWLINE
+ action. When using the POSIX API, passing REG_NEWLINE to PCRE2's reg-
comp() function causes PCRE2_MULTILINE to be passed to pcre2_compile(),
- and REG_DOTALL passes PCRE2_DOTALL. There is no way to pass PCRE2_DOL-
+ and REG_DOTALL passes PCRE2_DOTALL. There is no way to pass PCRE2_DOL-
LAR_ENDONLY.
MATCHING A PATTERN
- The function regexec() is called to match a compiled pattern preg
- against a given string, which is by default terminated by a zero byte
- (but see REG_STARTEND below), subject to the options in eflags. These
+ The function regexec() is called to match a compiled pattern preg
+ against a given string, which is by default terminated by a zero byte
+ (but see REG_STARTEND below), subject to the options in eflags. These
can be:
REG_NOTBOL
@@ -9329,9 +9569,9 @@ MATCHING A PATTERN
REG_NOTEMPTY
- The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2
- matching function. Note that REG_NOTEMPTY is not part of the POSIX
- standard. However, setting this option can give more POSIX-like behav-
+ The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2
+ matching function. Note that REG_NOTEMPTY is not part of the POSIX
+ standard. However, setting this option can give more POSIX-like behav-
iour in some situations.
REG_NOTEOL
@@ -9341,66 +9581,66 @@ MATCHING A PATTERN
REG_STARTEND
- When this option is set, the subject string is starts at string +
- pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which should
- point to the first character beyond the string. There may be binary
- zeroes within the subject string, and indeed, using REG_STARTEND is the
+ When this option is set, the subject string starts at string +
+ pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which should
+ point to the first character beyond the string. There may be binary
+ zeros within the subject string, and indeed, using REG_STARTEND is the
only way to pass a subject string that contains a binary zero.
- Whatever the value of pmatch[0].rm_so, the offsets of the matched
- string and any captured substrings are still given relative to the
- start of string itself. (Before PCRE2 release 10.30 these were given
- relative to string + pmatch[0].rm_so, but this differs from other
+ Whatever the value of pmatch[0].rm_so, the offsets of the matched
+ string and any captured substrings are still given relative to the
+ start of string itself. (Before PCRE2 release 10.30 these were given
+ relative to string + pmatch[0].rm_so, but this differs from other
implementations.)
- This is a BSD extension, compatible with but not specified by IEEE
- Standard 1003.2 (POSIX.2), and should be used with caution in software
- intended to be portable to other systems. Note that a non-zero rm_so
- does not imply REG_NOTBOL; REG_STARTEND affects only the location and
- length of the string, not how it is matched. Setting REG_STARTEND and
- passing pmatch as NULL are mutually exclusive; the error REG_INVARG is
+ This is a BSD extension, compatible with but not specified by IEEE
+ Standard 1003.2 (POSIX.2), and should be used with caution in software
+ intended to be portable to other systems. Note that a non-zero rm_so
+ does not imply REG_NOTBOL; REG_STARTEND affects only the location and
+ length of the string, not how it is matched. Setting REG_STARTEND and
+ passing pmatch as NULL are mutually exclusive; the error REG_INVARG is
returned.
- If the pattern was compiled with the REG_NOSUB flag, no data about any
- matched strings is returned. The nmatch and pmatch arguments of
+ If the pattern was compiled with the REG_NOSUB flag, no data about any
+ matched strings is returned. The nmatch and pmatch arguments of
regexec() are ignored (except possibly as input for REG_STARTEND).
- The value of nmatch may be zero, and the value pmatch may be NULL
- (unless REG_STARTEND is set); in both these cases no data about any
+ The value of nmatch may be zero, and the value pmatch may be NULL
+ (unless REG_STARTEND is set); in both these cases no data about any
matched strings is returned.
- Otherwise, the portion of the string that was matched, and also any
+ Otherwise, the portion of the string that was matched, and also any
captured substrings, are returned via the pmatch argument, which points
- to an array of nmatch structures of type regmatch_t, containing the
- members rm_so and rm_eo. These contain the byte offset to the first
+ to an array of nmatch structures of type regmatch_t, containing the
+ members rm_so and rm_eo. These contain the byte offset to the first
character of each substring and the offset to the first character after
- the end of each substring, respectively. The 0th element of the vector
- relates to the entire portion of string that was matched; subsequent
+ the end of each substring, respectively. The 0th element of the vector
+ relates to the entire portion of string that was matched; subsequent
elements relate to the capturing subpatterns of the regular expression.
Unused entries in the array have both structure members set to -1.
- A successful match yields a zero return; various error codes are
- defined in the header file, of which REG_NOMATCH is the "expected"
+ A successful match yields a zero return; various error codes are
+ defined in the header file, of which REG_NOMATCH is the "expected"
failure code.
ERROR MESSAGES
The regerror() function maps a non-zero errorcode from either regcomp()
- or regexec() to a printable message. If preg is not NULL, the error
+ or regexec() to a printable message. If preg is not NULL, the error
should have arisen from the use of that structure. A message terminated
- by a binary zero is placed in errbuf. If the buffer is too short, only
+ by a binary zero is placed in errbuf. If the buffer is too short, only
the first errbuf_size - 1 characters of the error message are used. The
- yield of the function is the size of buffer needed to hold the whole
- message, including the terminating zero. This value is greater than
+ yield of the function is the size of buffer needed to hold the whole
+ message, including the terminating zero. This value is greater than
errbuf_size if the message was truncated.
MEMORY USAGE
- Compiling a regular expression causes memory to be allocated and asso-
- ciated with the preg structure. The function regfree() frees all such
- memory, after which preg may no longer be used as a compiled expres-
+ Compiling a regular expression causes memory to be allocated and asso-
+ ciated with the preg structure. The function regfree() frees all such
+ memory, after which preg may no longer be used as a compiled expres-
sion.
@@ -9539,6 +9779,14 @@ SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS
library cannot be reloaded on a 64-bit system, nor can they be reloaded
using the 8-bit library.
+ Note that "serialization" in PCRE2 does not convert compiled patterns
+ to an abstract format like Java or .NET serialization. The serialized
+ output is really just a bytecode dump, which is why it can only be
+ reloaded in the same environment as the one that created it. Hence the
+ restrictions mentioned above. Applications that are not statically
+ linked with a fixed version of PCRE2 must be prepared to recompile pat-
+ terns from their sources, in order to be immune to PCRE2 upgrades.
+
SECURITY CONCERNS
@@ -9554,21 +9802,22 @@ SECURITY CONCERNS
SAVING COMPILED PATTERNS
- Before compiled patterns can be saved they must be serialized, that is,
- converted to a stream of bytes. A single byte stream may contain any
- number of compiled patterns, but they must all use the same character
- tables. A single copy of the tables is included in the byte stream (its
- size is 1088 bytes). For more details of character tables, see the sec-
- tion on locale support in the pcre2api documentation.
+ Before compiled patterns can be saved they must be serialized, which in
+ PCRE2 means converting the pattern to a stream of bytes. A single byte
+ stream may contain any number of compiled patterns, but they must all
+ use the same character tables. A single copy of the tables is included
+ in the byte stream (its size is 1088 bytes). For more details of char-
+ acter tables, see the section on locale support in the pcre2api docu-
+ mentation.
- The function pcre2_serialize_encode() creates a serialized byte stream
- from a list of compiled patterns. Its first two arguments specify the
+ The function pcre2_serialize_encode() creates a serialized byte stream
+ from a list of compiled patterns. Its first two arguments specify the
list, being a pointer to a vector of pointers to compiled patterns, and
the length of the vector. The third and fourth arguments point to vari-
ables which are set to point to the created byte stream and its length,
- respectively. The final argument is a pointer to a general context,
- which can be used to specify custom memory mangagement functions. If
- this argument is NULL, malloc() is used to obtain memory for the byte
+ respectively. The final argument is a pointer to a general context,
+ which can be used to specify custom memory mangagement functions. If
+ this argument is NULL, malloc() is used to obtain memory for the byte
stream. The yield of the function is the number of serialized patterns,
or one of the following negative error codes:
@@ -9578,12 +9827,12 @@ SAVING COMPILED PATTERNS
PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables
PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL
- PCRE2_ERROR_BADMAGIC means either that a pattern's code has been cor-
- rupted, or that a slot in the vector does not point to a compiled pat-
+ PCRE2_ERROR_BADMAGIC means either that a pattern's code has been cor-
+ rupted, or that a slot in the vector does not point to a compiled pat-
tern.
Once a set of patterns has been serialized you can save the data in any
- appropriate manner. Here is sample code that compiles two patterns and
+ appropriate manner. Here is sample code that compiles two patterns and
writes them to a file. It assumes that the variable fd refers to a file
that is open for output. The error checking that should be present in a
real application has been omitted for simplicity.
@@ -9601,16 +9850,17 @@ SAVING COMPILED PATTERNS
&bytescount, NULL);
errorcode = fwrite(bytes, 1, bytescount, fd);
- Note that the serialized data is binary data that may contain any of
- the 256 possible byte values. On systems that make a distinction
+ Note that the serialized data is binary data that may contain any of
+ the 256 possible byte values. On systems that make a distinction
between binary and non-binary data, be sure that the file is opened for
binary output.
- Serializing a set of patterns leaves the original data untouched, so
- they can still be used for matching. Their memory must eventually be
+ Serializing a set of patterns leaves the original data untouched, so
+ they can still be used for matching. Their memory must eventually be
freed in the usual way by calling pcre2_code_free(). When you have fin-
ished with the byte stream, it too must be freed by calling pcre2_seri-
- alize_free().
+ alize_free(). If this function is called with a NULL argument, it
+ returns immediately without doing anything.
RE-USING PRECOMPILED PATTERNS
@@ -9682,8 +9932,8 @@ AUTHOR
REVISION
- Last updated: 21 March 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 27 June 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -9722,19 +9972,23 @@ ESCAPED CHARACTERS
\ddd character with octal code ddd, or backreference
\o{ddd..} character with octal code ddd..
\U "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
+ \N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
\uhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
\xhh character with hex code hh
- \x{hhh..} character with hex code hhh..
+ \x{hh..} character with hex code hh..
Note that \0dd is always an octal code. The treatment of backslash fol-
lowed by a non-zero digit is complicated; for details see the section
"Non-printing characters" in the pcre2pattern documentation, where
- details of escape processing in EBCDIC environments are also given.
+ details of escape processing in EBCDIC environments are also given.
+ \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not supported in
+ EBCDIC environments. Note that \N not followed by an opening curly
+ bracket has a different meaning (see below).
- When \x is not followed by {, from zero to two hexadecimal digits are
+ When \x is not followed by {, from zero to two hexadecimal digits are
read, but if PCRE2_ALT_BSUX is set, \x must be followed by two hexadec-
- imal digits to be recognized as a hexadecimal escape; otherwise it
- matches a literal "x". Likewise, if \u (in ALT_BSUX mode) is not fol-
+ imal digits to be recognized as a hexadecimal escape; otherwise it
+ matches a literal "x". Likewise, if \u (in ALT_BSUX mode) is not fol-
lowed by four hexadecimal digits, it matches a literal "u".
@@ -9759,14 +10013,14 @@ CHARACTER TYPES
\W a "non-word" character
\X a Unicode extended grapheme cluster
- \C is dangerous because it may leave the current matching point in the
+ \C is dangerous because it may leave the current matching point in the
middle of a UTF-8 or UTF-16 character. The application can lock out the
- use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also
+ use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also
possible to build PCRE2 with the use of \C permanently disabled.
- By default, \d, \s, and \w match only ASCII characters, even in UTF-8
+ By default, \d, \s, and \w match only ASCII characters, even in UTF-8
mode or in the 16-bit and 32-bit libraries. However, if locale-specific
- matching is happening, \s and \w may also match characters with code
+ matching is happening, \s and \w may also match characters with code
points in the range 128-255. If the PCRE2_UCP option is set, the behav-
iour of these escape sequences is changed to use Unicode properties and
they match many more characters.
@@ -9835,26 +10089,29 @@ PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P
SCRIPT NAMES FOR \p AND \P
- Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Balinese,
- Bamum, Bassa_Vah, Batak, Bengali, Bopomofo, Brahmi, Braille, Buginese,
- Buhid, Canadian_Aboriginal, Carian, Caucasian_Albanian, Chakma, Cham,
- Cherokee, Common, Coptic, Cuneiform, Cypriot, Cyrillic, Deseret,
- Devanagari, Duployan, Egyptian_Hieroglyphs, Elbasan, Ethiopic, Geor-
- gian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gurmukhi, Han,
- Hangul, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited,
+ Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
+ nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
+ Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
+ nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
+ Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
+ Elbasan, Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek,
+ Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
+ Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited,
Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan-
nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao,
Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha-
- jani, Malayalam, Mandaic, Manichaean, Meetei_Mayek, Mende_Kikakui,
- Meroitic_Cursive, Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro,
- Multani, Myanmar, Nabataean, New_Tai_Lue, Nko, Ogham, Ol_Chiki,
- Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian,
- Old_South_Arabian, Old_Turkic, Oriya, Osmanya, Pahawh_Hmong, Palmyrene,
- Pau_Cin_Hau, Phags_Pa, Phoenician, Psalter_Pahlavi, Rejang, Runic,
- Samaritan, Saurashtra, Sharada, Shavian, Siddham, SignWriting, Sinhala,
- Sora_Sompeng, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa,
- Tai_Le, Tai_Tham, Tai_Viet, Takri, Tamil, Telugu, Thaana, Thai,
- Tibetan, Tifinagh, Tirhuta, Ugaritic, Vai, Warang_Citi, Yi.
+ jani, Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi,
+ Medefaidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive,
+ Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar,
+ Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar-
+ ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
+ dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya,
+ Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
+ Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
+ vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
+ Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
+ Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
+ nagh, Tirhuta, Ugaritic, Vai, Warang_Citi, Yi, Zanabazar_Square.
CHARACTER CLASSES
@@ -9922,9 +10179,9 @@ ANCHORS AND SIMPLE ASSERTIONS
\G first matching position in subject
-MATCH POINT RESET
+REPORTED MATCH POINT SETTING
- \K reset start of match
+ \K set reported start of match
\K is honoured in positive assertions, but ignored in negative ones.
@@ -9956,6 +10213,8 @@ COMMENT
OPTION SETTING
+ Changes of these options within a group are automatically cancelled at
+ the end of the group.
(?i) caseless
(?J) allow duplicate names
@@ -9966,13 +10225,20 @@ OPTION SETTING
(?x) extended: ignore white space except in classes
(?xx) as (?x) but also ignore space and tab in classes
(?-...) unset option(s)
+ (?^) unset imnsx options
+
+ Unsetting x or xx unsets both. Several options may be set at once, and
+ a mixture of setting and unsetting such as (?i-x) is allowed, but there
+ may be only one hyphen. Setting (but no unsetting) is allowed after (?^
+ for example (?^in). An option setting may appear at the start of a non-
+ capturing group, for example (?i:...).
- The following are recognized only at the very start of a pattern or
- after one of the newline or \R options with similar syntax. More than
+ The following are recognized only at the very start of a pattern or
+ after one of the newline or \R options with similar syntax. More than
one of them may appear. For the first three, d is a decimal number.
(*LIMIT_DEPTH=d) set the backtracking limit to d
- (*LIMIT_HEAP=d) set the heap size limit to d kilobytes
+ (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
(*LIMIT_MATCH=d) set the match limit to d
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
@@ -9983,17 +10249,17 @@ OPTION SETTING
(*UTF) set appropriate UTF mode for the library in use
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
- Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the
- value of the limits set by the caller of pcre2_match() or
- pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete
+ Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the
+ value of the limits set by the caller of pcre2_match() or
+ pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete
synonym for LIMIT_DEPTH. The application can lock out the use of (*UTF)
- and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options,
+ and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options,
respectively, at compile time.
NEWLINE CONVENTION
- These are recognized only at the very start of the pattern or after
+ These are recognized only at the very start of the pattern or after
option settings with a similar syntax.
(*CR) carriage return only
@@ -10006,7 +10272,7 @@ NEWLINE CONVENTION
WHAT \R MATCHES
- These are recognized only at the very start of the pattern or after
+ These are recognized only at the very start of the pattern or after
option setting with a similar syntax.
(*BSR_ANYCRLF) CR, LF, or CRLF
@@ -10075,32 +10341,37 @@ CONDITIONAL PATTERNS
(?(VERSION[>]=n.m) test PCRE2 version
(?(assert) assertion condition
- Note the ambiguity of (?(R) and (?(Rn) which might be named reference
- conditions or recursion tests. Such a condition is interpreted as a
+ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
+ conditions or recursion tests. Such a condition is interpreted as a
reference condition if the relevant named group exists.
BACKTRACKING CONTROL
- The following act immediately they are reached:
+ All backtracking control verbs may be in the form (*VERB:NAME). For
+ (*MARK) the name is mandatory, for the others it is optional. (*SKIP)
+ changes its behaviour if :NAME is present. The others just set a name
+ for passing back to the caller, but this is not a name that (*SKIP) can
+ see. The following act immediately they are reached:
(*ACCEPT) force successful match
(*FAIL) force backtrack; synonym (*F)
(*MARK:NAME) set name to be passed back; synonym (*:NAME)
- The following act only when a subsequent match failure causes a back-
+ The following act only when a subsequent match failure causes a back-
track to reach them. They all force a match failure, but they differ in
what happens afterwards. Those that advance the start-of-match point do
so only if the pattern is not anchored.
(*COMMIT) overall failure, no advance of starting point
(*PRUNE) advance to next starting character
- (*PRUNE:NAME) equivalent to (*MARK:NAME)(*PRUNE)
(*SKIP) advance to current matching position
(*SKIP:NAME) advance to position corresponding to an earlier
(*MARK:NAME); if not found, the (*SKIP) is ignored
(*THEN) local failure, backtrack to next alternation
- (*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
+
+ The effect of one of these verbs in a group called as a subroutine is
+ confined to the subroutine call.
CALLOUTS
@@ -10130,8 +10401,8 @@ AUTHOR
REVISION
- Last updated: 17 June 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 02 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
@@ -10153,7 +10424,9 @@ UNICODE AND UTF SUPPORT
PCRE2_UTF option flag, or the pattern must start with the sequence
(*UTF). When either of these is the case, both the pattern and any sub-
ject strings that are matched against it are treated as UTF strings
- instead of strings of individual one-code-unit characters.
+ instead of strings of individual one-code-unit characters. There are
+ also some other changes to the way characters are handled, as docu-
+ mented below.
If you do not need Unicode support you can build PCRE2 without it, in
which case the library will be smaller.
@@ -10175,11 +10448,15 @@ UNICODE PROPERTY SUPPORT
WIDE CHARACTERS AND UTF MODES
- Codepoints less than 256 can be specified in patterns by either braced
+ Code points less than 256 can be specified in patterns by either braced
or unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3).
Larger values have to use braced sequences. Unbraced octal code points
up to \777 are also recognized; larger ones can be coded using \o{...}.
+ The escape sequence \N{U+<hex digits>} is recognized as another way of
+ specifying a Unicode character by code point in a UTF mode. It is not
+ allowed in non-UTF modes.
+
In UTF modes, repeat quantifiers apply to complete UTF characters, not
to individual code units.
@@ -10228,7 +10505,7 @@ CASE-EQUIVALENCE IN UTF MODES
except for characters whose code points are less than 128 and that have
at most two case-equivalent values. For these, a direct table lookup is
used for speed. A few Unicode characters such as Greek sigma have more
- than two codepoints that are case-equivalent, and these are treated as
+ than two code points that are case-equivalent, and these are treated as
such.
@@ -10387,8 +10664,8 @@ AUTHOR
REVISION
- Last updated: 17 May 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 02 September 2018
+ Copyright (c) 1997-2018 University of Cambridge.
------------------------------------------------------------------------------
diff --git a/doc/pcre2_code_free.3 b/doc/pcre2_code_free.3
index 7376869..9e0ad3c 100644
--- a/doc/pcre2_code_free.3
+++ b/doc/pcre2_code_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_CODE_FREE 3 "23 March 2017" "PCRE2 10.30"
+.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -13,7 +13,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.SH DESCRIPTION
.rs
.sp
-This function frees the memory used for a compiled pattern, including any
+If \fIcode\fP is NULL, this function does nothing. Otherwise, \fIcode\fP must
+point to a compiled pattern. This function frees its memory, including any
memory used by the JIT compiler. If the compiled pattern was created by a call
to \fBpcre2_code_copy_with_tables()\fP, the memory for the character tables is
also freed.
diff --git a/doc/pcre2_compile.3 b/doc/pcre2_compile.3
index 19f35c3..a5e8269 100644
--- a/doc/pcre2_compile.3
+++ b/doc/pcre2_compile.3
@@ -53,7 +53,7 @@ The option bits are:
PCRE2_EXTENDED Ignore white space and # comments
PCRE2_FIRSTLINE Force matching to be before newline
PCRE2_LITERAL Pattern characters are all literal
- PCRE2_MATCH_UNSET_BACKREF Match unset back references
+ PCRE2_MATCH_UNSET_BACKREF Match unset backreferences
PCRE2_MULTILINE ^ and $ match newlines within data
PCRE2_NEVER_BACKSLASH_C Lock out the use of \eC in patterns
PCRE2_NEVER_UCP Lock out PCRE2_UCP, e.g. via (*UCP)
diff --git a/doc/pcre2_compile_context_free.3 b/doc/pcre2_compile_context_free.3
index 0c6d787..e90d744 100644
--- a/doc/pcre2_compile_context_free.3
+++ b/doc/pcre2_compile_context_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE_CONTEXT_FREE 3 "22 October 2014" "PCRE2 10.00"
+.TH PCRE2_COMPILE_CONTEXT_FREE 3 "29 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -15,7 +15,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp
This function frees the memory occupied by a compile context, using the memory
freeing function from the general context with which it was created, or
-\fBfree()\fP if that was not set.
+\fBfree()\fP if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
diff --git a/doc/pcre2_convert_context_free.3 b/doc/pcre2_convert_context_free.3
index fd5b13c..3fd5783 100644
--- a/doc/pcre2_convert_context_free.3
+++ b/doc/pcre2_convert_context_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_CONVERT_CONTEXT_FREE 3 "10 July 2017" "PCRE2 10.30"
+.TH PCRE2_CONVERT_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -16,7 +16,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a convert context, using the memory
freeing function from the general context with which it was created, or
-\fBfree()\fP if that was not set.
+\fBfree()\fP if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
.P
The pattern conversion functions are described in the
.\" HREF
diff --git a/doc/pcre2_converted_pattern_free.3 b/doc/pcre2_converted_pattern_free.3
index 687e078..b0645b5 100644
--- a/doc/pcre2_converted_pattern_free.3
+++ b/doc/pcre2_converted_pattern_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_CONVERTED_PATTERN_FREE 3 "11 July 2017" "PCRE2 10.30"
+.TH PCRE2_CONVERTED_PATTERN_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -16,7 +16,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
This function is part of an experimental set of pattern conversion functions.
It frees the memory occupied by a converted pattern that was obtained by
calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place
-the converted pattern into newly obtained heap memory.
+the converted pattern into newly obtained heap memory. If the argument is NULL,
+the function returns immediately without doing anything.
.P
The pattern conversion functions are described in the
.\" HREF
diff --git a/doc/pcre2_dfa_match.3 b/doc/pcre2_dfa_match.3
index 7839145..dfc3ae6 100644
--- a/doc/pcre2_dfa_match.3
+++ b/doc/pcre2_dfa_match.3
@@ -1,4 +1,4 @@
-.TH PCRE2_DFA_MATCH 3 "30 May 2017" "PCRE2 10.30"
+.TH PCRE2_DFA_MATCH 3 "26 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -34,9 +34,9 @@ just once (except when processing lookaround assertions). This function is
\fIwscount\fP Number of elements in the vector
.sp
For \fBpcre2_dfa_match()\fP, a match context is needed only if you want to set
-up a callout function or specify the match and/or the recursion depth limits.
-The \fIlength\fP and \fIstartoffset\fP values are code units, not characters.
-The options are:
+up a callout function or specify the heap limit or the match or the recursion
+depth limits. The \fIlength\fP and \fIstartoffset\fP values are code units, not
+characters. The options are:
.sp
PCRE2_ANCHORED Match only at the first position
PCRE2_ENDANCHORED Pattern can match only at end of subject
diff --git a/doc/pcre2_general_context_free.3 b/doc/pcre2_general_context_free.3
index 6285332..df1aa1f 100644
--- a/doc/pcre2_general_context_free.3
+++ b/doc/pcre2_general_context_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_GENERAL_CONTEXT_FREE 3 "22 October 2014" "PCRE2 10.00"
+.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -14,7 +14,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.rs
.sp
This function frees the memory occupied by a general context, using the memory
-freeing function within the context, if set.
+freeing function within the context, if set. If the argument is NULL, the
+function returns immediately without doing anything.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
diff --git a/doc/pcre2_jit_stack_assign.3 b/doc/pcre2_jit_stack_assign.3
index 66b8095..33d2e1c 100644
--- a/doc/pcre2_jit_stack_assign.3
+++ b/doc/pcre2_jit_stack_assign.3
@@ -1,4 +1,4 @@
-.TH PCRE2_JIT_STACK_ASSIGN 3 "08 November 2014" "PCRE2 10.0"
+.TH PCRE2_JIT_STACK_ASSIGN 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -24,7 +24,10 @@ passed to a matching function. The arguments of this function are:
callback a callback function
callback_data a JIT stack or a value to be passed to the callback
.P
-If \fIcallback\fP is NULL and \fIcallback_data\fP is NULL, an internal 32K
+If \fImcontext\fP is NULL, the function returns immediately, without doing
+anything.
+.P
+If \fIcallback\fP is NULL and \fIcallback_data\fP is NULL, an internal 32KiB
block on the machine stack is used.
.P
If \fIcallback\fP is NULL and \fIcallback_data\fP is not NULL,
@@ -33,8 +36,9 @@ If \fIcallback\fP is NULL and \fIcallback_data\fP is not NULL,
.P
If \fIcallback\fP not NULL, it is called with \fIcallback_data\fP as an
argument at the start of matching, in order to set up a JIT stack. If the
-result is NULL, the internal 32K stack is used; otherwise the return value must
-be a valid JIT stack, the result of calling \fBpcre2_jit_stack_create()\fP.
+result is NULL, the internal 32KiB stack is used; otherwise the return value
+must be a valid JIT stack, the result of calling
+\fBpcre2_jit_stack_create()\fP.
.P
You may safely use the same JIT stack for multiple patterns, as long as they
are all matched in the same thread. In a multithread application, each thread
diff --git a/doc/pcre2_jit_stack_create.3 b/doc/pcre2_jit_stack_create.3
index 61ccf79..f0b29f0 100644
--- a/doc/pcre2_jit_stack_create.3
+++ b/doc/pcre2_jit_stack_create.3
@@ -21,8 +21,8 @@ context, for memory allocation functions, or NULL for standard memory
allocation. The result can be passed to the JIT run-time code by calling
\fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern,
which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP.
-A maximum stack size of 512K to 1M should be more than enough for any pattern.
-For more details, see the
+A maximum stack size of 512KiB to 1MiB should be more than enough for any
+pattern. For more details, see the
.\" HREF
\fBpcre2jit\fP
.\"
diff --git a/doc/pcre2_jit_stack_free.3 b/doc/pcre2_jit_stack_free.3
index bfa4f79..2131a79 100644
--- a/doc/pcre2_jit_stack_free.3
+++ b/doc/pcre2_jit_stack_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_JIT_STACK_FREE 3 "21 October 2014" "PCRE2 10.00"
+.TH PCRE2_JIT_STACK_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -13,8 +13,9 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.rs
.sp
This function is used to free a JIT stack that was created by
-\fBpcre2_jit_stack_create()\fP when it is no longer needed. For more details,
-see the
+\fBpcre2_jit_stack_create()\fP when it is no longer needed. If the argument is
+NULL, the function returns immediately without doing anything. For more
+details, see the
.\" HREF
\fBpcre2jit\fP
.\"
diff --git a/doc/pcre2_match.3 b/doc/pcre2_match.3
index 6f7aefb..9d15ec9 100644
--- a/doc/pcre2_match.3
+++ b/doc/pcre2_match.3
@@ -65,7 +65,7 @@ subject that is terminated by a binary zero code unit. The options are:
match even if there is a full match
.\" JOIN
PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial
- match if no full matches are found
+ match if no full matches are found
.sp
For details of partial matching, see the
.\" HREF
diff --git a/doc/pcre2_match_context_free.3 b/doc/pcre2_match_context_free.3
index 71b9783..7d19f98 100644
--- a/doc/pcre2_match_context_free.3
+++ b/doc/pcre2_match_context_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_MATCH_CONTEXT_FREE 3 "22 October 2014" "PCRE2 10.00"
+.TH PCRE2_MATCH_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -15,7 +15,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp
This function frees the memory occupied by a match context, using the memory
freeing function from the general context with which it was created, or
-\fBfree()\fP if that was not set.
+\fBfree()\fP if that was not set. If the argument is NULL, the function returns
+immediately without doing anything.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
diff --git a/doc/pcre2_match_data_free.3 b/doc/pcre2_match_data_free.3
index e22074b..56ed08b 100644
--- a/doc/pcre2_match_data_free.3
+++ b/doc/pcre2_match_data_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_MATCH_DATA_FREE 3 "25 March 2017" "PCRE2 10.30"
+.TH PCRE2_MATCH_DATA_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -13,9 +13,10 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.SH DESCRIPTION
.rs
.sp
-This function frees the memory occupied by a match data block, using the memory
-freeing function from the general context or compiled pattern with which it was
-created, or \fBfree()\fP if that was not set.
+If \fImatch_data\fP is NULL, this function does nothing. Otherwise,
+\fImatch_data\fP must point to a match data block, which this function frees,
+using the memory freeing function from the general context or compiled pattern
+with which it was created, or \fBfree()\fP if that was not set.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
diff --git a/doc/pcre2_pattern_info.3 b/doc/pcre2_pattern_info.3
index 64bfc45..01b74a2 100644
--- a/doc/pcre2_pattern_info.3
+++ b/doc/pcre2_pattern_info.3
@@ -24,7 +24,7 @@ request are as follows:
.sp
PCRE2_INFO_ALLOPTIONS Final options after compiling
PCRE2_INFO_ARGOPTIONS Options passed to \fBpcre2_compile()\fP
- PCRE2_INFO_BACKREFMAX Number of highest back reference
+ PCRE2_INFO_BACKREFMAX Number of highest backreference
PCRE2_INFO_BSR What \eR matches:
PCRE2_BSR_UNICODE: Unicode line endings
PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
diff --git a/doc/pcre2_serialize_decode.3 b/doc/pcre2_serialize_decode.3
index 57304a5..b67a112 100644
--- a/doc/pcre2_serialize_decode.3
+++ b/doc/pcre2_serialize_decode.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SERIALIZE_DECODE 3 "02 September 2015" "PCRE2 10.21"
+.TH PCRE2_SERIALIZE_DECODE 3 "27 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -16,7 +16,10 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.rs
.sp
This function decodes a serialized set of compiled patterns back into a list of
-individual patterns. Its arguments are:
+individual patterns. This is possible only on a host that is running the same
+version of PCRE2, with the same code unit width, and the host must also have
+the same endianness, pointer width and PCRE2_SIZE type. The arguments for
+\fBpcre2_serialize_decode()\fP are:
.sp
\fIcodes\fP pointer to a vector in which to build the list
\fInumber_of_codes\fP number of slots in the vector
@@ -43,8 +46,8 @@ There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
.\"
-page and a description of the POSIX API in the
+page and a description of the serialization functions in the
.\" HREF
-\fBpcre2posix\fP
+\fBpcre2serialize\fP
.\"
page.
diff --git a/doc/pcre2_serialize_encode.3 b/doc/pcre2_serialize_encode.3
index 9c29633..d529360 100644
--- a/doc/pcre2_serialize_encode.3
+++ b/doc/pcre2_serialize_encode.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SERIALIZE_ENCODE 3 "02 September 2015" "PCRE2 10.21"
+.TH PCRE2_SERIALIZE_ENCODE 3 "27 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -16,7 +16,12 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.rs
.sp
This function encodes a list of compiled patterns into a byte stream that can
-be saved on disc or elsewhere. Its arguments are:
+be saved on disc or elsewhere. Note that this is not an abstract format like
+Java or .NET. Conversion of the byte stream back into usable compiled patterns
+can only happen on a host that is running the same version of PCRE2, with the
+same code unit width, and the host must also have the same endianness, pointer
+width and PCRE2_SIZE type. The arguments for \fBpcre2_serialize_encode()\fP
+are:
.sp
\fIcodes\fP pointer to a vector containing the list
\fInumber_of_codes\fP number of slots in the vector
@@ -42,8 +47,8 @@ There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
.\"
-page and a description of the POSIX API in the
+page and a description of the serialization functions in the
.\" HREF
-\fBpcre2posix\fP
+\fBpcre2serialize\fP
.\"
page.
diff --git a/doc/pcre2_serialize_free.3 b/doc/pcre2_serialize_free.3
index 9daa94b..2c43824 100644
--- a/doc/pcre2_serialize_free.3
+++ b/doc/pcre2_serialize_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SERIALIZE_FREE 3 "19 January 2015" "PCRE2 10.10"
+.TH PCRE2_SERIALIZE_FREE 3 "27 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -15,14 +15,15 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp
This function frees the memory that was obtained by
\fBpcre2_serialize_encode()\fP to hold a serialized byte stream. The argument
-must point to such a byte stream.
+must point to such a byte stream or be NULL, in which case the function returns
+without doing anything.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
.\"
-page and a description of the POSIX API in the
+page and a description of the serialization functions in the
.\" HREF
-\fBpcre2posix\fP
+\fBpcre2serialize\fP
.\"
page.
diff --git a/doc/pcre2_serialize_get_number_of_codes.3 b/doc/pcre2_serialize_get_number_of_codes.3
index d8ce6a1..f5eea54 100644
--- a/doc/pcre2_serialize_get_number_of_codes.3
+++ b/doc/pcre2_serialize_get_number_of_codes.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SERIALIZE_GET_NUMBER_OF_CODES 3 "19 January 2015" "PCRE2 10.10"
+.TH PCRE2_SERIALIZE_GET_NUMBER_OF_CODES 3 "27 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -30,8 +30,8 @@ There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
.\"
-page and a description of the POSIX API in the
+page and a description of the serialization functions in the
.\" HREF
-\fBpcre2posix\fP
+\fBpcre2serialize\fP
.\"
page.
diff --git a/doc/pcre2_set_compile_extra_options.3 b/doc/pcre2_set_compile_extra_options.3
index 1d73a8f..79f71ce 100644
--- a/doc/pcre2_set_compile_extra_options.3
+++ b/doc/pcre2_set_compile_extra_options.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "16 June 2017" "PCRE2 10.30"
+.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "16 June 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
diff --git a/doc/pcre2_set_glob_separator.3 b/doc/pcre2_set_glob_separator.3
index 273b515..5d78c09 100644
--- a/doc/pcre2_set_glob_separator.3
+++ b/doc/pcre2_set_glob_separator.3
@@ -16,7 +16,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp
This function is part of an experimental set of pattern conversion functions.
It sets the component separator character that is used when converting globs.
-The second argument must one of the characters forward slash, backslash, or
+The second argument must be one of the characters forward slash, backslash, or
dot. The default is backslash when running under Windows, otherwise forward
slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if
the second argument is invalid.
diff --git a/doc/pcre2_set_heap_limit.3 b/doc/pcre2_set_heap_limit.3
index a99b4ab..7c155a2 100644
--- a/doc/pcre2_set_heap_limit.3
+++ b/doc/pcre2_set_heap_limit.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SET_DEPTH_LIMIT 3 "11 April 2017" "PCRE2 10.30"
+.TH PCRE2_SET_HEAP_LIMIT 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
diff --git a/doc/pcre2_substring_free.3 b/doc/pcre2_substring_free.3
index ca94e78..6d0fd58 100644
--- a/doc/pcre2_substring_free.3
+++ b/doc/pcre2_substring_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SUBSTRING_FREE 3 "21 October 2014" "PCRE2 10.00"
+.TH PCRE2_SUBSTRING_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
This is a convenience function for freeing the memory obtained by a previous
call to \fBpcre2_substring_get_byname()\fP or
\fBpcre2_substring_get_bynumber()\fP. Its only argument is a pointer to the
-string.
+string. If the argument is NULL, the function does nothing.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
diff --git a/doc/pcre2_substring_list_free.3 b/doc/pcre2_substring_list_free.3
index 4725f9c..d977ed5 100644
--- a/doc/pcre2_substring_list_free.3
+++ b/doc/pcre2_substring_list_free.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SUBSTRING_LIST_FREE 3 "21 October 2014" "PCRE2 10.00"
+.TH PCRE2_SUBSTRING_LIST_FREE 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -14,7 +14,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.sp
This is a convenience function for freeing the store obtained by a previous
call to \fBpcre2substring_list_get()\fP. Its only argument is a pointer to
-the list of string pointers.
+the list of string pointers. If the argument is NULL, the function returns
+immediately, without doing anything.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
index 786b314..ba90c86 100644
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "31 December 2017" "PCRE2 10.31"
+.TH PCRE2API 3 "07 September 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -453,7 +453,9 @@ been matched by \fBpcre2_match()\fP. They are:
\fBpcre2_substring_number_from_name()\fP
.sp
\fBpcre2_substring_free()\fP and \fBpcre2_substring_list_free()\fP are also
-provided, to free memory used for extracted strings.
+provided, to free memory used for extracted strings. If either of these
+functions is called with a NULL argument, the function returns immediately
+without doing anything.
.P
The function \fBpcre2_substitute()\fP can be called to match a pattern and
return a copy of the subject string with substitutions for parts that were
@@ -497,10 +499,10 @@ U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
.P
Each of the first three conventions is used by at least one operating system as
its standard newline sequence. When PCRE2 is built, a default can be specified.
-The default default is LF, which is the Unix standard. However, the newline
-convention can be changed by an application when calling \fBpcre2_compile()\fP,
-or it can be specified by special text at the start of the pattern itself; this
-overrides any other settings. See the
+If it is not, the default is set to LF, which is the Unix standard. However,
+the newline convention can be changed by an application when calling
+\fBpcre2_compile()\fP, or it can be specified by special text at the start of
+the pattern itself; this overrides any other settings. See the
.\" HREF
\fBpcre2pattern\fP
.\"
@@ -666,6 +668,8 @@ The memory used for a general context should be freed by calling:
.B void pcre2_general_context_free(pcre2_general_context *\fIgcontext\fP);
.fi
.sp
+If this function is passed a NULL argument, it returns immediately without
+doing anything.
.
.
.\" HTML <a name="compilecontext"></a>
@@ -771,10 +775,10 @@ sequence such as (*CRLF). See the
page for details.
.P
When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
-option, the newline convention affects the recognition of white space and the
-end of internal comments starting with #. The value is saved with the compiled
-pattern for subsequent use by the JIT compiler and by the two interpreted
-matching functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP.
+option, the newline convention affects the recognition of the end of internal
+comments starting with #. The value is saved with the compiled pattern for
+subsequent use by the JIT compiler and by the two interpreted matching
+functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP.
.sp
.nf
.B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP,
@@ -885,18 +889,20 @@ offset limit. In other words, whichever limit comes first is used.
.B " uint32_t \fIvalue\fP);"
.fi
.sp
-The \fIheap_limit\fP parameter specifies, in units of kilobytes, the maximum
-amount of heap memory that \fBpcre2_match()\fP may use to hold backtracking
-information when running an interpretive match. This limit does not apply to
-matching with the JIT optimization, which has its own memory control
-arrangements (see the
+The \fIheap_limit\fP parameter specifies, in units of kibibytes (1024 bytes),
+the maximum amount of heap memory that \fBpcre2_match()\fP may use to hold
+backtracking information when running an interpretive match. This limit also
+applies to \fBpcre2_dfa_match()\fP, which may use the heap when processing
+patterns with a lot of nested pattern recursion or lookarounds or atomic
+groups. This limit does not apply to matching with the JIT optimization, which
+has its own memory control arrangements (see the
.\" HREF
\fBpcre2jit\fP
.\"
-documentation for more details), nor does it apply to \fBpcre2_dfa_match()\fP.
-If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
-returned. The default limit is set when PCRE2 is built; the default default is
-very large and is essentially "unlimited".
+documentation for more details). If the limit is reached, the negative error
+code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
+is built; if it is not, the default is set very large and is essentially
+"unlimited".
.P
A value for the heap limit may also be supplied by an item at the start of a
pattern of the form
@@ -907,13 +913,18 @@ where ddd is a decimal number. However, such a setting is ignored unless ddd is
less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
limit is set, less than the default.
.P
-The \fBpcre2_match()\fP function starts out using a 20K vector on the system
+The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
stack for recording backtracking points. The more nested backtracking points
there are (that is, the deeper the search tree), the more memory is needed.
Heap memory is used only if the initial vector is too small. If the heap limit
is set to a value less than 21 (in particular, zero) no heap memory will be
used. In this case, only patterns that do not have a lot of nested backtracking
can be successfully processed.
+.P
+Similarly, for \fBpcre2_dfa_match()\fP, a vector on the system stack is used
+when processing pattern recursions, lookarounds, or atomic groups, and only if
+this is not big enough is heap memory used. In this case, too, setting a value
+of zero disables the use of the heap.
.sp
.nf
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
@@ -967,17 +978,27 @@ backtracking.
.P
The depth limit is not relevant, and is ignored, when matching is done using
JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which
-uses it to limit the depth of internal recursive function calls that implement
-atomic groups, lookaround assertions, and pattern recursions. This is,
-therefore, an indirect limit on the amount of system stack that is used. A
-recursive pattern such as /(.)(?1)/, when matched to a very long string using
-\fBpcre2_dfa_match()\fP, can use a great deal of stack.
-.P
-The default value for the depth limit can be set when PCRE2 is built; the
-default default is the same value as the default for the match limit. If the
-limit is exceeded, \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP returns
-PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be supplied by an
-item at the start of a pattern of the form
+uses it to limit the depth of nested internal recursive function calls that
+implement atomic groups, lookaround assertions, and pattern recursions. This
+limits, indirectly, the amount of system stack that is used. It was more useful
+in versions before 10.32, when stack memory was used for local workspace
+vectors for recursive function calls. From version 10.32, only local variables
+are allocated on the stack and as each call uses only a few hundred bytes, even
+a small stack can support quite a lot of recursion.
+.P
+If the depth of internal recursive function calls is great enough, local
+workspace vectors are allocated on the heap from version 10.32 onwards, so the
+depth limit also indirectly limits the amount of heap memory that is used. A
+recursive pattern such as /(.(?2))((?1)|)/, when matched to a very long string
+using \fBpcre2_dfa_match()\fP, can use a great deal of memory. However, it is
+probably better to limit heap usage directly by calling
+\fBpcre2_set_heap_limit()\fP.
+.P
+The default value for the depth limit can be set when PCRE2 is built; if it is
+not, the default is set to the same value as the default for the match limit.
+If the limit is exceeded, \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP
+returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be
+supplied by an item at the start of a pattern of the form
.sp
(*LIMIT_DEPTH=ddd)
.sp
@@ -1028,15 +1049,16 @@ and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively.
PCRE2_CONFIG_DEPTHLIMIT
.sp
The output is a uint32_t integer that gives the default limit for the depth of
-nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions
-and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with
-\fBpcre2_set_depth_limit()\fP above.
+nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions,
+lookarounds, and atomic groups in \fBpcre2_dfa_match()\fP. Further details are
+given with \fBpcre2_set_depth_limit()\fP above.
.sp
PCRE2_CONFIG_HEAPLIMIT
.sp
-The output is a uint32_t integer that gives, in kilobytes, the default limit
-for the amount of heap memory used by \fBpcre2_match()\fP. Further details are
-given with \fBpcre2_set_heap_limit()\fP above.
+The output is a uint32_t integer that gives, in kibibytes, the default limit
+for the amount of heap memory used by \fBpcre2_match()\fP or
+\fBpcre2_dfa_match()\fP. Further details are given with
+\fBpcre2_set_heap_limit()\fP above.
.sp
PCRE2_CONFIG_JIT
.sp
@@ -1066,7 +1088,7 @@ relevant.
.P
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
but the most massive patterns, since it allows the size of the compiled pattern
-to be up to 64K code units. Larger values allow larger regular expressions to
+to be up to 65535 code units. Larger values allow larger regular expressions to
be compiled by those two libraries, but at the expense of slower matching.
.sp
PCRE2_CONFIG_MATCHLIMIT
@@ -1160,6 +1182,8 @@ If the compile context argument \fIccontext\fP is NULL, memory for the compiled
pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from
the same memory function that was used for the compile context. The caller must
free the memory by calling \fBpcre2_code_free()\fP when it is no longer needed.
+If \fBpcre2_code_free()\fP is called with a NULL argument, it returns
+immediately, without doing anything.
.P
The function \fBpcre2_code_copy()\fP makes a copy of the compiled code in new
memory, using the same memory allocator as was used for the original. However,
@@ -1170,7 +1194,8 @@ below),
.\"
the JIT information cannot be copied (because it is position-dependent).
The new copy can initially be used only for non-JIT matching, though it can be
-passed to \fBpcre2_jit_compile()\fP if required.
+passed to \fBpcre2_jit_compile()\fP if required. If \fBpcre2_code_copy()\fP is
+called with a NULL argument, it returns NULL.
.P
The \fBpcre2_code_copy()\fP function provides a way for individual threads in a
multithreaded application to acquire a private copy of shared compiled code.
@@ -1187,7 +1212,9 @@ there are occasions when a copy of a compiled pattern and the relevant tables
are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility.
Copies of both the code and the tables are made, with the new code pointing to
the new tables. The memory for the new tables is automatically freed when
-\fBpcre2_code_free()\fP is called for the new copy of the compiled code.
+\fBpcre2_code_free()\fP is called for the new copy of the compiled code. If
+\fBpcre2_code_copy_withy_tables()\fP is called with a NULL argument, it returns
+NULL.
.P
NOTE: When one of the matching functions is called, pointers to the compiled
pattern and the subject string are set in the match data block so that they can
@@ -1329,9 +1356,9 @@ include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
option is set, normal backslash processing is applied to verb names and only an
unescaped closing parenthesis terminates the name. A closing parenthesis can be
included in a name either as \e) or between \eQ and \eE. If the PCRE2_EXTENDED
-or PCRE2_EXTENDED_MORE option is set, unescaped whitespace in verb names is
-skipped and #-comments are recognized in this mode, exactly as in the rest of
-the pattern.
+or PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped
+whitespace in verb names is skipped and #-comments are recognized, exactly as
+in the rest of the pattern.
.sp
PCRE2_AUTO_CALLOUT
.sp
@@ -1350,7 +1377,7 @@ If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be
changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode
properties are used for all characters with more than one other case, and for
-all characters whose code points are greater than U+007f. For lower valued
+all characters whose code points are greater than U+007F. For lower valued
characters with only one other case, a lookup table is used for speed. When
PCRE2_UTF is not set, a lookup table is used for all code points less than 256,
and higher code points (available only in 16-bit or 32-bit mode) are treated as
@@ -1373,7 +1400,8 @@ character, even if newlines are coded as CRLF. Without this option, a dot does
not match when the current position in the subject is at a newline. This option
is equivalent to Perl's /s option, and it can be changed within a pattern by a
(?s) option setting. A negative class such as [^a] always matches newline
-characters, independent of the setting of this option.
+characters, and the \eN escape sequence always matches a non-newline character,
+independent of the setting of PCRE2_DOTALL.
.sp
PCRE2_DUPNAMES
.sp
@@ -1417,14 +1445,35 @@ is not allowed within sequences such as (?> that introduce various
parenthesized subpatterns, nor within numerical quantifiers such as {1,3}.
Ignorable white space is permitted between an item and a following quantifier
and between a quantifier and a following + that indicates possessiveness.
+PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be changed within
+a pattern by a (?x) option setting.
.P
-PCRE2_EXTENDED also causes characters between an unescaped # outside a
-character class and the next newline, inclusive, to be ignored, which makes it
-possible to include comments inside complicated patterns. Note that the end of
-this type of comment is a literal newline sequence in the pattern; escape
-sequences that happen to represent a newline do not count. PCRE2_EXTENDED is
-equivalent to Perl's /x option, and it can be changed within a pattern by a
-(?x) option setting.
+When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
+white space only those characters with code points less than 256 that are
+flagged as white space in its low-character table. The table is normally
+created by
+.\" HREF
+\fBpcre2_maketables()\fP,
+.\"
+which uses the \fBisspace()\fP function to identify space characters. In most
+ASCII environments, the relevant characters are those with code points 0x0009
+(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D
+(carriage return), and 0x0020 (space).
+.P
+When PCRE2 is compiled with Unicode support, in addition to these characters,
+five more Unicode "Pattern White Space" characters are recognized by
+PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-right mark),
+U+200F (right-to-left mark), U+2028 (line separator), and U+2029 (paragraph
+separator). This set of characters is the same as recognized by Perl's /x
+option. Note that the horizontal and vertical space characters that are matched
+by the \eh and \ev escapes in patterns are a much bigger set.
+.P
+As well as ignoring most white space, PCRE2_EXTENDED also causes characters
+between an unescaped # outside a character class and the next newline,
+inclusive, to be ignored, which makes it possible to include comments inside
+complicated patterns. Note that the end of this type of comment is a literal
+newline sequence in the pattern; escape sequences that happen to represent a
+newline do not count.
.P
Which characters are interpreted as newlines can be specified by a setting in
the compile context that is passed to \fBpcre2_compile()\fP or by a special
@@ -1439,9 +1488,11 @@ built.
PCRE2_EXTENDED_MORE
.sp
This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space
-and horizontal tab characters are ignored inside a character class.
-PCRE2_EXTENDED_MORE is equivalent to Perl's 5.26 /xx option, and it can be
-changed within a pattern by a (?xx) option setting.
+and horizontal tab characters are ignored inside a character class. Note: only
+these two characters are ignored, not the full set of pattern white space
+characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is
+equivalent to Perl's /xx option, and it can be changed within a pattern by a
+(?xx) option setting.
.sp
PCRE2_FIRSTLINE
.sp
@@ -1472,7 +1523,7 @@ error.
.sp
PCRE2_MATCH_UNSET_BACKREF
.sp
-If this option is set, a back reference to an unset subpattern group matches an
+If this option is set, a backreference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail).
A pattern such as (\e1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl
@@ -1533,8 +1584,8 @@ If this option is set, it disables the use of numbered capturing parentheses in
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
were followed by ?: but named parentheses can still be used for capturing (and
they acquire numbers in the usual way). This is the same as Perl's /n option.
-Note that, when this option is set, references to capturing groups (back
-references or recursion/subroutine calls) may only refer to named groups,
+Note that, when this option is set, references to capturing groups
+(backreferences or recursion/subroutine calls) may only refer to named groups,
though the reference can be by name or by number.
.sp
PCRE2_NO_AUTO_POSSESS
@@ -1553,7 +1604,7 @@ If this option is set, it disables an optimization that is applied when .* is
the first significant item in a top-level branch of a pattern, and all the
other branches also start with .* or with \eA or \eG or ^. The optimization is
automatically disabled for .* if it is inside an atomic group or a capturing
-group that is the subject of a back reference, or if the pattern contains
+group that is the subject of a backreference, or if the pattern contains
(*PRUNE) or (*SKIP). When the optimization is not disabled, such a pattern is
automatically anchored if PCRE2_DOTALL is set for all the .* items and
PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any match
@@ -1705,7 +1756,8 @@ behaviour of PCRE2 are given in the
.\" HREF
\fBpcre2unicode\fP
.\"
-page.
+page. In particular, note that it changes the way PCRE2_CASELESS handles
+characters with code points greater than 127.
.
.
.\" HTML <a name="extracompileoptions"></a>
@@ -1939,7 +1991,7 @@ following are true:
.* is not in an atomic group
.\" JOIN
.* is not in a capturing group that is the subject
- of a back reference
+ of a backreference
PCRE2_DOTALL is in force for .*
Neither (*PRUNE) nor (*SKIP) appears in the pattern
PCRE2_NO_DOTSTAR_ANCHOR is not set
@@ -1949,20 +2001,20 @@ options returned for PCRE2_INFO_ALLOPTIONS.
.sp
PCRE2_INFO_BACKREFMAX
.sp
-Return the number of the highest back reference in the pattern. The third
+Return the number of the highest backreference in the pattern. The third
argument should point to an \fBuint32_t\fP variable. Named subpatterns acquire
-numbers as well as names, and these count towards the highest back reference.
-Back references such as \e4 or \eg{12} match the captured characters of the
+numbers as well as names, and these count towards the highest backreference.
+Backreferences such as \e4 or \eg{12} match the captured characters of the
given group, but in addition, the check that a capturing group is set in a
-conditional subpattern such as (?(3)a|b) is also a back reference. Zero is
-returned if there are no back references.
+conditional subpattern such as (?(3)a|b) is also a backreference. Zero is
+returned if there are no backreferences.
.sp
PCRE2_INFO_BSR
.sp
-The output is a uint32_t whose value indicates what character sequences the \eR
-escape sequence matches. A value of PCRE2_BSR_UNICODE means that \eR matches
-any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means that \eR
-matches only CR, LF, or CRLF.
+The output is a uint32_t integer whose value indicates what character sequences
+the \eR escape sequence matches. A value of PCRE2_BSR_UNICODE means that \eR
+matches any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means
+that \eR matches only CR, LF, or CRLF.
.sp
PCRE2_INFO_CAPTURECOUNT
.sp
@@ -1974,10 +2026,10 @@ The third argument should point to an \fBuint32_t\fP variable.
.sp
If the pattern set a backtracking depth limit by including an item of the form
(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument
-should point to an unsigned 32-bit integer. If no such value has been set, the
-call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note
-that this limit will only be used during matching if it is less than the limit
-set or defaulted by the caller of the match function.
+should point to a uint32_t integer. If no such value has been set, the call to
+\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this
+limit will only be used during matching if it is less than the limit set or
+defaulted by the caller of the match function.
.sp
PCRE2_INFO_FIRSTBITMAP
.sp
@@ -1987,7 +2039,7 @@ values for the first code unit in any match. For example, a pattern that starts
with [abc] results in a table with three bits set. When code unit values
greater than 255 are supported, the flag bit for 255 means "any code unit of
value 255 or above". If such a table was constructed, a pointer to it is
-returned. Otherwise NULL is returned. The third argument should point to an
+returned. Otherwise NULL is returned. The third argument should point to a
\fBconst uint8_t *\fP variable.
.sp
PCRE2_INFO_FIRSTCODETYPE
@@ -2014,7 +2066,7 @@ and up to 0xffffffff when not using UTF-32 mode.
.sp
Return the size (in bytes) of the data frames that are used to remember
backtracking positions when the pattern is processed by \fBpcre2_match()\fP
-without the use of JIT. The third argument should point to an \fBsize_t\fP
+without the use of JIT. The third argument should point to a \fBsize_t\fP
variable. The frame size depends on the number of capturing parentheses in the
pattern. Each additional capturing group adds two PCRE2_SIZE variables.
.sp
@@ -2034,10 +2086,10 @@ the equivalent hexadecimal or octal escape sequences.
.sp
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
-should point to an unsigned 32-bit integer. If no such value has been set, the
-call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note
-that this limit will only be used during matching if it is less than the limit
-set or defaulted by the caller of the match function.
+should point to a uint32_t integer. If no such value has been set, the call to
+\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this
+limit will only be used during matching if it is less than the limit set or
+defaulted by the caller of the match function.
.sp
PCRE2_INFO_JCHANGED
.sp
@@ -2081,15 +2133,15 @@ in such cases.
.sp
If the pattern set a match limit by including an item of the form
(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument
-should point to an unsigned 32-bit integer. If no such value has been set, the
-call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note
-that this limit will only be used during matching if it is less than the limit
-set or defaulted by the caller of the match function.
+should point to a uint32_t integer. If no such value has been set, the call to
+\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this
+limit will only be used during matching if it is less than the limit set or
+defaulted by the caller of the match function.
.sp
PCRE2_INFO_MAXLOOKBEHIND
.sp
Return the number of characters (not code units) in the longest lookbehind
-assertion in the pattern. The third argument should point to an unsigned 32-bit
+assertion in the pattern. The third argument should point to a uint32_t
integer. This information is useful when doing multi-segment matching using the
partial matching facilities. Note that the simple assertions \eb and \eB
require a one-character lookbehind. \eA also registers a one-character
@@ -2232,13 +2284,18 @@ documentation, which also gives further details about callouts.
.rs
.sp
It is possible to save compiled patterns on disc or elsewhere, and reload them
-later, subject to a number of restrictions. The functions whose names begin
-with \fBpcre2_serialize_\fP are used for this purpose. They are described in
-the
+later, subject to a number of restrictions. The host on which the patterns are
+reloaded must be running the same version of PCRE2, with the same code unit
+width, and must also have the same endianness, pointer width, and PCRE2_SIZE
+type. Before compiled patterns can be saved, they must be converted to a
+"serialized" form, which in the case of PCRE2 is really just a bytecode dump.
+The functions whose names begin with \fBpcre2_serialize_\fP are used for
+converting to and from the serialized form. They are described in the
.\" HREF
\fBpcre2serialize\fP
.\"
-documentation.
+documentation. Note that PCRE2 serialization does not convert compiled patterns
+to an abstract format like Java or .NET serialization.
.
.
.\" HTML <a name="matchdatablock"></a>
@@ -2310,7 +2367,8 @@ free a compiled pattern or a subject string until after all operations on the
match data block (for that match) have taken place.
.P
When a match data block itself is no longer needed, it should be freed by
-calling \fBpcre2_match_data_free()\fP.
+calling \fBpcre2_match_data_free()\fP. If this function is called with a NULL
+argument, it returns immediately, without doing anything.
.
.
.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
@@ -2376,7 +2434,7 @@ zero, the search for a match starts at the beginning of the subject, and this
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
must point to the start of a character, or to the end of the subject (in UTF-32
mode, one code unit equals one character, so all offsets are valid). Like the
-pattern string, the subject may contain binary zeroes.
+pattern string, the subject may contain binary zeros.
.P
A non-zero starting offset is useful when searching for another match in the
same subject by calling \fBpcre2_match()\fP again after a previous success.
@@ -2534,7 +2592,7 @@ performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
calls to \fBpcre2_match()\fP if you are making repeated calls to find other
matches in the same subject string.
.P
-WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
+\fBWarning:\fP When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
string as a subject, or an invalid value of \fIstartoffset\fP, is undefined.
Your program may crash or loop indefinitely.
.sp
@@ -2710,7 +2768,7 @@ Elements in the ovector that do not correspond to capturing parentheses in the
pattern are never changed. That is, if a pattern contains \fIn\fP capturing
parentheses, no more than \fIovector[0]\fP to \fIovector[2n+1]\fP are set by
\fBpcre2_match()\fP. The other elements retain whatever values they previously
-had.
+had. After a failed match attempt, the contents of the ovector are unchanged.
.
.
.\" HTML <a name="matchotherdata"></a>
@@ -2751,6 +2809,14 @@ When it matches "bc", the returned name is A. The B mark is "seen" in the first
branch of the group, but it is not on the matching path. On the other hand,
when this pattern fails to match "bx", the returned name is B.
.P
+\fBWarning:\fP By default, certain start-of-match optimizations are used to
+give a fast "no match" result in some situations. For example, if the anchoring
+is removed from the pattern above, there is an initial check for the presence
+of "c" in the subject before running the matching engine. This check fails for
+"bx", causing a match failure without seeing any marks. You can disable the
+start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for
+\fBpcre2_compile()\fP or starting the pattern with (*NO_START_OPT).
+.P
After a successful match, a partial match, or one of the invalid UTF errors
(for example, PCRE2_ERROR_UTF8_ERR5), \fBpcre2_get_startchar()\fP can be
called. After a successful or partial match it returns the code unit offset of
@@ -3122,7 +3188,10 @@ string in \fIoutputbuffer\fP, replacing the part that was matched with the
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
which a \eK item in a lookahead in the pattern causes the match to end before
-it starts are not supported, and give rise to an error return.
+it starts are not supported, and give rise to an error return. For global
+replacements, matches in which \eK in a lookbehind causes the match to start
+earlier than the point that was reached in the previous iteration are also not
+supported.
.P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not
@@ -3131,6 +3200,11 @@ data block is obtained and freed within this function, using memory management
functions from the match context, if provided, or else those that were used to
allocate memory for the compiled code.
.P
+If an external \fImatch_data\fP block is provided, its contents afterwards
+are those set by the final call to \fBpcre2_match()\fP, which will have
+ended in a matching error. The contents of the ovector within the match data
+block may or may not have been changed.
+.P
The \fIoutlengthptr\fP argument must point to a variable that contains the
length, in code units, of the output buffer. If the function is successful, the
value is updated to contain the length of the new string, excluding the
@@ -3302,7 +3376,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
-it started, which can happen if \eK is used in an assertion).
+it started or the match started earlier than the current position in the
+subject, which can happen if \eK is used in an assertion).
.P
As for all PCRE2 errors, a text message that describes the error can be
obtained by calling the \fBpcre2_get_error_message()\fP function (see
@@ -3514,17 +3589,7 @@ capture.
Calls to the convenience functions that extract substrings by name
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a
DFA match. The convenience functions that extract substrings by number never
-return PCRE2_ERROR_NOSUBSTRING, and the meanings of some other errors are
-slightly different:
-.sp
- PCRE2_ERROR_UNAVAILABLE
-.sp
-The ovector is not big enough to include a slot for the given substring number.
-.sp
- PCRE2_ERROR_UNSET
-.sp
-There is a slot in the ovector for this substring, but there were insufficient
-matches to fill it.
+return PCRE2_ERROR_NOSUBSTRING.
.P
The matched strings are stored in the ovector in reverse order of length; that
is, the longest matching string is first. If there were too many matches to fit
@@ -3555,12 +3620,12 @@ There are in addition the following errors that are specific to
.sp
This return is given if \fBpcre2_dfa_match()\fP encounters an item in the
pattern that it does not support, for instance, the use of \eC in a UTF mode or
-a back reference.
+a backreference.
.sp
PCRE2_ERROR_DFA_UCOND
.sp
This return is given if \fBpcre2_dfa_match()\fP encounters a condition item
-that uses a back reference for the condition, or a test for recursion in a
+that uses a backreference for the condition, or a test for recursion in a
specific group. These are not supported.
.sp
PCRE2_ERROR_DFA_WSSIZE
@@ -3605,6 +3670,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 31 December 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 07 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2build.3 b/doc/pcre2build.3
index 7586d22..540df78 100644
--- a/doc/pcre2build.3
+++ b/doc/pcre2build.3
@@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "18 July 2017" "PCRE2 10.30"
+.TH PCRE2BUILD 3 "26 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.
@@ -59,7 +59,8 @@ The following sections include descriptions of "on/off" options whose names
begin with --enable or --disable. Because of the way that \fBconfigure\fP
works, --enable and --disable always come in pairs, so the complementary option
always exists as well, but as it specifies the default, it is not described.
-Options that specify values have names that start with --with.
+Options that specify values have names that start with --with. At the end of a
+\fBconfigure\fP run, a summary of the configuration is output.
.
.
.SH "BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES"
@@ -156,8 +157,15 @@ Just-in-time (JIT) compiler support is included in the build by specifying
--enable-jit
.sp
This support is available only for certain hardware architectures. If this
-option is set for an unsupported architecture, a building error occurs. If you
-are running under SELinux you may also want to add
+option is set for an unsupported architecture, a building error occurs.
+If in doubt, use
+.sp
+ --enable-jit=auto
+.sp
+which enables JIT only if the current hardware is supported. You can check
+if JIT is enabled in the configuration summary that is output at the end of a
+\fBconfigure\fP run. If you are enabling JIT under SELinux you may also want to
+add
.sp
--enable-jit-sealloc
.sp
@@ -208,7 +216,7 @@ separator, U+2028), and PS (paragraph separator, U+2029). The final option is
.sp
--enable-newline-is-nul
.sp
-which causes NUL (binary zero) is set as the default line-ending character.
+which causes NUL (binary zero) to be set as the default line-ending character.
.P
Whatever default line ending convention is selected when PCRE2 is built can be
overridden by applications that use the library. At build time it is
@@ -236,10 +244,10 @@ Within a compiled pattern, offset values are used to point from one part to
another (for example, from an opening parenthesis to an alternation
metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
are used for these offsets, leading to a maximum size for a compiled pattern of
-around 64K code units. This is sufficient to handle all but the most gigantic
-patterns. Nevertheless, some people do want to process truly enormous patterns,
-so it is possible to compile PCRE2 to use three-byte or four-byte offsets by
-adding a setting such as
+around 64 thousand code units. This is sufficient to handle all but the most
+gigantic patterns. Nevertheless, some people do want to process truly enormous
+patterns, so it is possible to compile PCRE2 to use three-byte or four-byte
+offsets by adding a setting such as
.sp
--with-link-size=3
.sp
@@ -269,12 +277,12 @@ to the \fBconfigure\fP command. This setting also applies to the
\fBpcre2_dfa_match()\fP matching function, and to JIT matching (though the
counting is done differently).
.P
-The \fBpcre2_match()\fP function starts out using a 20K vector on the system
+The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
stack to record backtracking points. The more nested backtracking points there
are (that is, the deeper the search tree), the more memory is needed. If the
initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kilobytes. The limit can be changed at run time, as
-described in the
+which is specified in kibibytes (units of 1024 bytes). The limit can be changed
+at run time, as described in the
.\" HREF
\fBpcre2api\fP
.\"
@@ -283,10 +291,11 @@ change this by a setting such as
.sp
--with-heap-limit=500
.sp
-which limits the amount of heap to 500 kilobytes. This limit applies only to
-interpretive matching in pcre2_match(). It does not apply when JIT (which has
-its own memory arrangements) is used, nor does it apply to
-\fBpcre2_dfa_match()\fP.
+which limits the amount of heap to 500 KiB. This limit applies only to
+interpretive matching in \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, which
+may also use the heap for internal workspace when processing complicated
+patterns. This limit does not apply when JIT (which has its own memory
+arrangements) is used.
.P
You can also explicitly limit the depth of nested backtracking in the
\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set
@@ -394,13 +403,13 @@ they are not.
.sp
\fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when it
-finds a match. The starting size of the buffer is controlled by a parameter
-whose default value is 20K. The buffer itself is three times this size, but
-because of the way it is used for holding "before" lines, the longest line that
-is guaranteed to be processable is the parameter size. If a longer line is
-encountered, \fBpcre2grep\fP automatically expands the buffer, up to a
-specified maximum size, whose default is 1M or the starting size, whichever is
-the larger. You can change the default parameter values by adding, for example,
+finds a match. The default starting size of the buffer is 20KiB. The buffer
+itself is three times this size, but because of the way it is used for holding
+"before" lines, the longest line that is guaranteed to be processable is the
+notional buffer size. If a longer line is encountered, \fBpcre2grep\fP
+automatically expands the buffer, up to a specified maximum size, whose default
+is 1MiB or the starting size, whichever is the larger. You can change the
+default parameter values by adding, for example,
.sp
--with-pcre2grep-bufsize=51200
--with-pcre2grep-max-bufsize=2097152
@@ -543,7 +552,7 @@ generated from the string.
Setting --enable-fuzz-support also causes a binary called \fBpcre2fuzzcheck\fP
to be created. This is normally run under valgrind or used when PCRE2 is
compiled with address sanitizing enabled. It calls the fuzzing function and
-outputs information about it is doing. The input strings are specified by
+outputs information about what it is doing. The input strings are specified by
arguments: if an argument starts with "=" the rest of it is a literal input
string. Otherwise, it is assumed to be a file name, and the contents of the
file are the test string.
@@ -582,6 +591,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 18 July 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 26 April 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2callout.3 b/doc/pcre2callout.3
index e3fd600..c815c72 100644
--- a/doc/pcre2callout.3
+++ b/doc/pcre2callout.3
@@ -1,4 +1,4 @@
-.TH PCRE2CALLOUT 3 "22 December 2017" "PCRE2 10.31"
+.TH PCRE2CALLOUT 3 "26 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -128,7 +128,7 @@ start only after an internal newline or at the beginning of the subject, and
branch, automatic anchoring occurs if all branches are anchorable.
.P
This optimization is disabled, however, if .* is in an atomic group or if there
-is a back reference to the capturing group in which it appears. It is also
+is a backreference to the capturing group in which it appears. It is also
disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of
callouts does not affect it.
.P
@@ -291,10 +291,12 @@ than \fIcapture_top\fP also have both of their ovector slots set to
PCRE2_UNSET.
.P
For DFA matching, the \fIoffset_vector\fP field points to the ovector that was
-passed to the matching function in the match data block, but it holds no useful
-information at callout time because \fBpcre2_dfa_match()\fP does not support
-substring capturing. The value of \fIcapture_top\fP is always 1 and the value
-of \fIcapture_last\fP is always 0 for DFA matching.
+passed to the matching function in the match data block for callouts at the top
+level, but to an internal ovector during the processing of pattern recursions,
+lookarounds, and atomic groups. However, these ovectors hold no useful
+information because \fBpcre2_dfa_match()\fP does not support substring
+capturing. The value of \fIcapture_top\fP is always 1 and the value of
+\fIcapture_last\fP is always 0 for DFA matching.
.P
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
that were passed to the matching function.
@@ -441,6 +443,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 22 December 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 26 April 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2compat.3 b/doc/pcre2compat.3
index 8094ebd..6e448f6 100644
--- a/doc/pcre2compat.3
+++ b/doc/pcre2compat.3
@@ -1,4 +1,4 @@
-.TH PCRE2COMPAT 3 "18 April 2017" "PCRE2 10.30"
+.TH PCRE2COMPAT 3 "28 July 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "DIFFERENCES BETWEEN PCRE2 AND PERL"
@@ -19,7 +19,7 @@ page.
2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
they do not mean what you might think. For example, (?!a){3} does not assert
that the next three characters are not "a". It just asserts that the next
-character is not "a" three times (in principle: PCRE2 optimizes this to run the
+character is not "a" three times (in principle; PCRE2 optimizes this to run the
assertion just once). Perl allows some repeat quantifiers on other assertions,
for example, \eb* (but not \eb{3}), but these do not seem to have any use.
.P
@@ -28,13 +28,14 @@ counted, but their entries in the offsets vector are set only when a negative
assertion is a condition that has a matching branch (that is, the condition is
false).
.P
-4. The following Perl escape sequences are not supported: \el, \eu, \eL,
-\eU, and \eN when followed by a character name or Unicode value. (\eN on its
-own, matching a non-newline character, is supported.) In fact these are
+4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
+\eU, and \eN when followed by a character name. \eN on its own, matching a
+non-newline character, and \eN{U+dd..}, matching a Unicode code point, are
+supported. The escapes that modify the case of following letters are
implemented by Perl's general string-handling and are not part of its pattern
matching engine. If any of these are encountered by PCRE2, an error is
-generated by default. However, if the PCRE2_ALT_BSUX option is set,
-\eU and \eu are interpreted as ECMAScript interprets them.
+generated by default. However, if the PCRE2_ALT_BSUX option is set, \eU and \eu
+are interpreted as ECMAScript interprets them.
.P
5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
built with Unicode support (the default). The properties that can be tested
@@ -45,25 +46,30 @@ documentation says "Because Perl hides the need for the user to understand the
internal representation of Unicode characters, there is no need to implement
the somewhat messy concept of surrogates."
.P
-6. PCRE2 does support the \eQ...\eE escape for quoting substrings. Characters
-in between are treated as literals. This is slightly different from Perl in
-that $ and @ are also handled as literals inside the quotes. In Perl, they
-cause variable interpolation (but of course PCRE2 does not have variables).
-Note the following examples:
+6. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters
+in between are treated as literals. However, this is slightly different from
+Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
+they cause variable interpolation (but of course PCRE2 does not have
+variables). Also, Perl does "double-quotish backslash interpolation" on any
+backslashes between \eQ and \eE which, its documentation says, "may lead to
+confusing results". PCRE2 treats a backslash between \eQ and \eE just like any
+other character. Note the following examples:
.sp
- Pattern PCRE2 matches Perl matches
+ Pattern PCRE2 matches Perl matches
.sp
.\" JOIN
\eQabc$xyz\eE abc$xyz abc followed by the
contents of $xyz
\eQabc\e$xyz\eE abc\e$xyz abc\e$xyz
\eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz
+ \eQA\eB\eE A\eB A\eB
+ \eQ\e\eE \e \e\eE
.sp
The \eQ...\eE sequence is recognized both inside and outside character classes.
.P
7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
-constructions. However, there is support PCRE2's "callout" feature, which
-allows an external function to be called during pattern matching. See the
+constructions. However, PCRE2 does have a "callout" feature, which allows an
+external function to be called during pattern matching. See the
.\" HREF
\fBpcre2callout\fP
.\"
@@ -131,7 +137,7 @@ list is with respect to Perl 5.26:
each alternative branch of a lookbehind assertion can match a different length
of string. Perl requires them all to have the same length.
.sp
-(b) From PCRE2 10.23, back references to groups of fixed length are supported
+(b) From PCRE2 10.23, backreferences to groups of fixed length are supported
in lookbehinds, provided that there is no possibility of referencing a
non-unique number or name. Perl does not support backreferences in lookbehinds.
.sp
@@ -194,6 +200,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 18 April 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 28 July 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2convert.3 b/doc/pcre2convert.3
index 3dadf6e..34beaf0 100644
--- a/doc/pcre2convert.3
+++ b/doc/pcre2convert.3
@@ -1,4 +1,4 @@
-.TH PCRE2CONVERT 3 "12 July 2017" "PCRE2 10.30"
+.TH PCRE2CONVERT 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "EXPERIMENTAL PATTERN CONVERSION FUNCTIONS"
@@ -83,7 +83,8 @@ If \fBbuffer\fP points to a NULL pointer, an output buffer is obtained using
the allocator in the context or \fBmalloc()\fP if no context is supplied. A
pointer to this buffer is placed in the variable to which \fBbuffer\fP points.
When no longer needed the output buffer must be freed by calling
-\fBpcre2_converted_pattern_free()\fP.
+\fBpcre2_converted_pattern_free()\fP. If this function is called with a NULL
+argument, it returns immediately without doing anything.
.P
If \fBbuffer\fP points to a non-NULL pointer, \fBblength\fP must be set to the
actual length of the buffer provided (in code units).
@@ -158,6 +159,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 12 July 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 28 June 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
index 5e5cbea..ce112af 100644
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "13 November 2017" "PCRE2 10.31"
+.TH PCRE2GREP 1 "24 February 2018" "PCRE2 10.32"
.SH NAME
pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -57,15 +57,16 @@ controlled by parameters that can be set by the \fB--buffer-size\fP and
that is obtained at the start of processing. If an input file contains very
long lines, a larger buffer may be needed; this is handled by automatically
extending the buffer, up to the limit specified by \fB--max-buffer-size\fP. The
-default values for these parameters are specified when \fBpcre2grep\fP is
-built, with the default defaults being 20K and 1M respectively. An error occurs
-if a line is too long and the buffer can no longer be expanded.
+default values for these parameters can be set when \fBpcre2grep\fP is
+built; if nothing is specified, the defaults are set to 20KiB and 1MiB
+respectively. An error occurs if a line is too long and the buffer can no
+longer be expanded.
.P
The block of memory that is actually used is three times the "buffer size", to
allow for buffering "before" and "after" lines. If the buffer size is too
small, fewer than requested "before" and "after" lines may be output.
.P
-Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
+Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater.
BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
each line in the order in which they are defined, except that all the \fB-e\fP
@@ -121,6 +122,14 @@ a binary file is not applied. See the \fB--binary-files\fP option for a means
of changing the way binary files are handled.
.
.
+.SH "BINARY ZEROS IN PATTERNS"
+.rs
+.sp
+Patterns passed from the command line are strings that are terminated by a
+binary zero, so cannot contain internal zeros. However, patterns that are read
+from a file via the \fB-f\fP option may contain binary zeros.
+.
+.
.SH OPTIONS
.rs
.sp
@@ -304,12 +313,15 @@ files; it does not apply to patterns specified by any of the \fB--include\fP or
.TP
\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP
Read patterns from the file, one per line, and match them against each line of
-input. What constitutes a newline when reading the file is the operating
-system's default. The \fB--newline\fP option has no effect on this option.
-Trailing white space is removed from each line, and blank lines are ignored. An
-empty file contains no patterns and therefore matches nothing. See also the
-comments about multiple patterns versus a single pattern with alternatives in
-the description of \fB-e\fP above.
+input. As is the case with patterns on the command line, no delimiters should
+be used. What constitutes a newline when reading the file is the operating
+system's default interpretation of \en. The \fB--newline\fP option has no
+effect on this option. Trailing white space is removed from each line, and
+blank lines are ignored. An empty file contains no patterns and therefore
+matches nothing. Patterns read from a file in this way may contain binary
+zeros, which are treated as ordinary data characters. See also the comments
+about multiple patterns versus a single pattern with alternatives in the
+description of \fB-e\fP above.
.sp
If this option is given more than once, all the specified files are read. A
data line is output if any of the patterns match it. A file name can be given
@@ -320,14 +332,15 @@ command line; all arguments are treated as the names of paths to be searched.
.TP
\fB--file-list\fP=\fIfilename\fP
Read a list of files and/or directories that are to be scanned from the given
-file, one per line. Trailing white space is removed from each line, and blank
-lines are ignored. These paths are processed before any that are listed on the
-command line. The file name can be given as "-" to refer to the standard input.
-If \fB--file\fP and \fB--file-list\fP are both specified as "-", patterns are
-read first. This is useful only when the standard input is a terminal, from
-which further lines (the list of files) can be read after an end-of-file
-indication. If this option is given more than once, all the specified files are
-read.
+file, one per line. What constitutes a newline when reading the file is the
+operating system's default. Trailing white space is removed from each line, and
+blank lines are ignored. These paths are processed before any that are listed
+on the command line. The file name can be given as "-" to refer to the standard
+input. If \fB--file\fP and \fB--file-list\fP are both specified as "-",
+patterns are read first. This is useful only when the standard input is a
+terminal, from which further lines (the list of files) can be read after an
+end-of-file indication. If this option is given more than once, all the
+specified files are read.
.TP
\fB--file-offsets\fP
Instead of showing lines or parts of lines that match, show each match as an
@@ -422,13 +435,13 @@ short form for this option.
When this option is given, non-compressed input is read and processed line by
line, and the output is flushed after each write. By default, input is read in
large chunks, unless \fBpcre2grep\fP can determine that it is reading from a
-terminal (which is currently possible only in Unix-like environments). Output
-to terminal is normally automatically flushed by the operating system. This
-option can be useful when the input or output is attached to a pipe and you do
-not want \fBpcre2grep\fP to buffer up large amounts of data. However, its use
-will affect performance, and the \fB-M\fP (multiline) option ceases to work.
-When input is from a compressed .gz or .bz2 file, \fB--line-buffered\fP is
-ignored.
+terminal (which is currently possible only in Unix-like environments or
+Windows). Output to terminal is normally automatically flushed by the operating
+system. This option can be useful when the input or output is attached to a
+pipe and you do not want \fBpcre2grep\fP to buffer up large amounts of data.
+However, its use will affect performance, and the \fB-M\fP (multiline) option
+ceases to work. When input is from a compressed .gz or .bz2 file,
+\fB--line-buffered\fP is ignored.
.TP
\fB--line-offsets\fP
Instead of showing lines or parts of lines that match, show each match as a
@@ -458,11 +471,11 @@ is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
counter that is incremented each time around its main processing loop. If the
value set by \fB--match-limit\fP is reached, an error occurs.
.sp
-The \fB--heap-limit\fP option specifies, as a number of kilobytes, the amount
-of heap memory that may be used for matching. Heap memory is needed only if
-matching the pattern requires a significant number of nested backtracking
-points to be remembered. This parameter can be set to zero to forbid the use of
-heap memory altogether.
+The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
+1024 bytes), the amount of heap memory that may be used for matching. Heap
+memory is needed only if matching the pattern requires a significant number of
+nested backtracking points to be remembered. This parameter can be set to zero
+to forbid the use of heap memory altogether.
.sp
The \fB--depth-limit\fP option limits the depth of nested backtracking points,
which indirectly limits the amount of memory that is used. The amount of memory
@@ -471,9 +484,9 @@ parentheses in the pattern, so the amount of memory that is used before this
limit acts varies from pattern to pattern. This limit is of use only if it is
set smaller than \fB--match-limit\fP.
.sp
-There are no short forms for these options. The default settings are specified
-when the PCRE2 library is compiled, with the default defaults being very large
-and so effectively unlimited.
+There are no short forms for these options. The default limits can be set
+when the PCRE2 library is compiled; if they are not specified, the defaults
+are very large and so effectively unlimited.
.TP
\fB--max-buffer-size=\fInumber\fP
This limits the expansion of the processing buffer, whose initial size can be
@@ -679,12 +692,13 @@ The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
different newline conventions from the default. Any parts of the input files
that are written to the standard output are copied identically, with whatever
newline sequences they have in the input. However, the setting of this option
-does not affect the interpretation of files specified by the \fB-f\fP,
-\fB--exclude-from\fP, or \fB--include-from\fP options, which are assumed to use
-the operating system's standard newline sequence, nor does it affect the way in
-which \fBpcre2grep\fP writes informational messages to the standard error and
-output streams. For these it uses the string "\en" to indicate newlines,
-relying on the C I/O library to convert this to an appropriate sequence.
+affects only the way scanned files are processed. It does not affect the
+interpretation of files specified by the \fB-f\fP, \fB--file-list\fP,
+\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the
+way in which \fBpcre2grep\fP writes informational messages to the standard
+error and output streams. For these it uses the string "\en" to indicate
+newlines, relying on the C I/O library to convert this to an appropriate
+sequence.
.
.
.SH "OPTIONS COMPATIBILITY"
@@ -862,6 +876,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 13 November 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 24 February 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
index 30517b4..000239c 100644
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@@ -56,17 +56,17 @@ DESCRIPTION
that is obtained at the start of processing. If an input file contains
very long lines, a larger buffer may be needed; this is handled by
automatically extending the buffer, up to the limit specified by --max-
- buffer-size. The default values for these parameters are specified when
- pcre2grep is built, with the default defaults being 20K and 1M respec-
- tively. An error occurs if a line is too long and the buffer can no
- longer be expanded.
+ buffer-size. The default values for these parameters can be set when
+ pcre2grep is built; if nothing is specified, the defaults are set to
+ 20KiB and 1MiB respectively. An error occurs if a line is too long and
+ the buffer can no longer be expanded.
The block of memory that is actually used is three times the "buffer
size", to allow for buffering "before" and "after" lines. If the buffer
size is too small, fewer than requested "before" and "after" lines may
be output.
- Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the
+ Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the
greater. BUFSIZ is defined in <stdio.h>. When there is more than one
pattern (specified by the use of -e and/or -f), each pattern is applied
to each line in the order in which they are defined, except that all
@@ -122,6 +122,13 @@ BINARY FILES
handled.
+BINARY ZEROS IN PATTERNS
+
+ Patterns passed from the command line are strings that are terminated
+ by a binary zero, so cannot contain internal zeros. However, patterns
+ that are read from a file via the -f option may contain binary zeros.
+
+
OPTIONS
The order in which some of the options appear can affect the output.
@@ -329,36 +336,40 @@ OPTIONS
-f filename, --file=filename
Read patterns from the file, one per line, and match them
- against each line of input. What constitutes a newline when
- reading the file is the operating system's default. The
- --newline option has no effect on this option. Trailing
- white space is removed from each line, and blank lines are
- ignored. An empty file contains no patterns and therefore
- matches nothing. See also the comments about multiple pat-
- terns versus a single pattern with alternatives in the
- description of -e above.
-
- If this option is given more than once, all the specified
- files are read. A data line is output if any of the patterns
- match it. A file name can be given as "-" to refer to the
- standard input. When -f is used, patterns specified on the
- command line using -e may also be present; they are tested
- before the file's patterns. However, no other pattern is
+ against each line of input. As is the case with patterns on
+ the command line, no delimiters should be used. What consti-
+ tutes a newline when reading the file is the operating sys-
+ tem's default interpretation of \n. The --newline option has
+ no effect on this option. Trailing white space is removed
+ from each line, and blank lines are ignored. An empty file
+ contains no patterns and therefore matches nothing. Patterns
+ read from a file in this way may contain binary zeros, which
+ are treated as ordinary data characters. See also the com-
+ ments about multiple patterns versus a single pattern with
+ alternatives in the description of -e above.
+
+ If this option is given more than once, all the specified
+ files are read. A data line is output if any of the patterns
+ match it. A file name can be given as "-" to refer to the
+ standard input. When -f is used, patterns specified on the
+ command line using -e may also be present; they are tested
+ before the file's patterns. However, no other pattern is
taken from the command line; all arguments are treated as the
names of paths to be searched.
--file-list=filename
- Read a list of files and/or directories that are to be
- scanned from the given file, one per line. Trailing white
- space is removed from each line, and blank lines are ignored.
- These paths are processed before any that are listed on the
- command line. The file name can be given as "-" to refer to
- the standard input. If --file and --file-list are both spec-
- ified as "-", patterns are read first. This is useful only
- when the standard input is a terminal, from which further
- lines (the list of files) can be read after an end-of-file
- indication. If this option is given more than once, all the
- specified files are read.
+ Read a list of files and/or directories that are to be
+ scanned from the given file, one per line. What constitutes a
+ newline when reading the file is the operating system's
+ default. Trailing white space is removed from each line, and
+ blank lines are ignored. These paths are processed before any
+ that are listed on the command line. The file name can be
+ given as "-" to refer to the standard input. If --file and
+ --file-list are both specified as "-", patterns are read
+ first. This is useful only when the standard input is a ter-
+ minal, from which further lines (the list of files) can be
+ read after an end-of-file indication. If this option is given
+ more than once, all the specified files are read.
--file-offsets
Instead of showing lines or parts of lines that match, show
@@ -464,14 +475,14 @@ OPTIONS
processed line by line, and the output is flushed after each
write. By default, input is read in large chunks, unless
pcre2grep can determine that it is reading from a terminal
- (which is currently possible only in Unix-like environments).
- Output to terminal is normally automatically flushed by the
- operating system. This option can be useful when the input or
- output is attached to a pipe and you do not want pcre2grep to
- buffer up large amounts of data. However, its use will affect
- performance, and the -M (multiline) option ceases to work.
- When input is from a compressed .gz or .bz2 file, --line-
- buffered is ignored.
+ (which is currently possible only in Unix-like environments
+ or Windows). Output to terminal is normally automatically
+ flushed by the operating system. This option can be useful
+ when the input or output is attached to a pipe and you do not
+ want pcre2grep to buffer up large amounts of data. However,
+ its use will affect performance, and the -M (multiline)
+ option ceases to work. When input is from a compressed .gz or
+ .bz2 file, --line-buffered is ignored.
--line-offsets
Instead of showing lines or parts of lines that match, show
@@ -506,12 +517,12 @@ OPTIONS
processing loop. If the value set by --match-limit is
reached, an error occurs.
- The --heap-limit option specifies, as a number of kilobytes,
- the amount of heap memory that may be used for matching. Heap
- memory is needed only if matching the pattern requires a sig-
- nificant number of nested backtracking points to be remem-
- bered. This parameter can be set to zero to forbid the use of
- heap memory altogether.
+ The --heap-limit option specifies, as a number of kibibytes
+ (units of 1024 bytes), the amount of heap memory that may be
+ used for matching. Heap memory is needed only if matching the
+ pattern requires a significant number of nested backtracking
+ points to be remembered. This parameter can be set to zero to
+ forbid the use of heap memory altogether.
The --depth-limit option limits the depth of nested back-
tracking points, which indirectly limits the amount of memory
@@ -521,10 +532,10 @@ OPTIONS
limit acts varies from pattern to pattern. This limit is of
use only if it is set smaller than --match-limit.
- There are no short forms for these options. The default set-
- tings are specified when the PCRE2 library is compiled, with
- the default defaults being very large and so effectively
- unlimited.
+ There are no short forms for these options. The default lim-
+ its can be set when the PCRE2 library is compiled; if they
+ are not specified, the defaults are very large and so effec-
+ tively unlimited.
--max-buffer-size=number
This limits the expansion of the processing buffer, whose
@@ -758,13 +769,13 @@ NEWLINES
newline conventions from the default. Any parts of the input files that
are written to the standard output are copied identically, with what-
ever newline sequences they have in the input. However, the setting of
- this option does not affect the interpretation of files specified by
- the -f, --exclude-from, or --include-from options, which are assumed to
- use the operating system's standard newline sequence, nor does it
- affect the way in which pcre2grep writes informational messages to the
- standard error and output streams. For these it uses the string "\n" to
- indicate newlines, relying on the C I/O library to convert this to an
- appropriate sequence.
+ this option affects only the way scanned files are processed. It does
+ not affect the interpretation of files specified by the -f, --file-
+ list, --exclude-from, or --include-from options, nor does it affect the
+ way in which pcre2grep writes informational messages to the standard
+ error and output streams. For these it uses the string "\n" to indicate
+ newlines, relying on the C I/O library to convert this to an appropri-
+ ate sequence.
OPTIONS COMPATIBILITY
@@ -929,5 +940,5 @@ AUTHOR
REVISION
- Last updated: 13 November 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 24 February 2018
+ Copyright (c) 1997-2018 University of Cambridge.
diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3
index f6d17ca..c3b916b 100644
--- a/doc/pcre2jit.3
+++ b/doc/pcre2jit.3
@@ -1,4 +1,4 @@
-.TH PCRE2JIT 3 "31 March 2017" "PCRE2 10.30"
+.TH PCRE2JIT 3 "28 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
@@ -161,7 +161,7 @@ when JIT matching is used.
.rs
.sp
When the compiled JIT code runs, it needs a block of memory to use as a stack.
-By default, it uses 32K on the machine stack. However, some large or
+By default, it uses 32KiB on the machine stack. However, some large or
complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT
is given when there is not enough stack. Three functions are provided for
managing blocks of memory for use as JIT stacks. There is further discussion
@@ -177,9 +177,10 @@ are a starting size, a maximum size, and a general context (for memory
allocation functions, or NULL for standard memory allocation). It returns a
pointer to an opaque structure of type \fBpcre2_jit_stack\fP, or NULL if there
is an error. The \fBpcre2_jit_stack_free()\fP function is used to free a stack
-that is no longer needed. (For the technically minded: the address space is
-allocated by mmap or VirtualAlloc.) A maximum stack size of 512K to 1M should
-be more than enough for any pattern.
+that is no longer needed. If its argument is NULL, this function returns
+immediately, without doing anything. (For the technically minded: the address
+space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to
+1MiB should be more than enough for any pattern.
.P
The \fBpcre2_jit_stack_assign()\fP function specifies which stack JIT code
should use. Its arguments are as follows:
@@ -190,9 +191,10 @@ should use. Its arguments are as follows:
.sp
The first argument is a pointer to a match context. When this is subsequently
passed to a matching function, its information determines which JIT stack is
-used. There are three cases for the values of the other two options:
+used. If this argument is NULL, the function returns immediately, without doing
+anything. There are three cases for the values of the other two options:
.sp
- (1) If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block
+ (1) If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32KiB block
on the machine stack is used. This is the default when a match
context is created.
.sp
@@ -203,7 +205,7 @@ used. There are three cases for the values of the other two options:
(3) If \fIcallback\fP is not NULL, it must point to a function that is
called with \fIdata\fP as an argument at the start of matching, in
order to set up a JIT stack. If the return from the callback
- function is NULL, the internal 32K stack is used; otherwise the
+ function is NULL, the internal 32KiB stack is used; otherwise the
return value must be a valid JIT stack, the result of calling
\fBpcre2_jit_stack_create()\fP.
.sp
@@ -265,9 +267,9 @@ we do the recursion in memory.
Modern operating systems have a nice feature: they can reserve an address space
instead of allocating memory. We can safely allocate memory pages inside this
address space, so the stack could grow without moving memory data (this is
-important because of pointers). Thus we can allocate 1M address space, and use
-only a single memory page (usually 4K) if that is enough. However, we can still
-grow up to 1M anytime if needed.
+important because of pointers). Thus we can allocate 1MiB address space, and
+use only a single memory page (usually 4KiB) if that is enough. However, we can
+still grow up to 1MiB anytime if needed.
.P
(3) Who "owns" a JIT stack?
.sp
@@ -300,7 +302,7 @@ say two minutes. The JIT callback can help to achieve this without keeping a
list of patterns.
.P
(6) OK, the stack is for long term memory allocation. But what happens if a
-pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
+pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the
stack is freed?
.sp
Especially on embedded sytems, it might be a good idea to release memory
@@ -410,6 +412,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 31 March 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 28 June 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2limits.3 b/doc/pcre2limits.3
index 88944db..803e97b 100644
--- a/doc/pcre2limits.3
+++ b/doc/pcre2limits.3
@@ -7,12 +7,12 @@ PCRE2 - Perl-compatible regular expressions (revised API)
There are some size limitations in PCRE2 but it is hoped that they will never
in practice be relevant.
.P
-The maximum size of a compiled pattern is approximately 64K code units for the
-8-bit and 16-bit libraries if PCRE2 is compiled with the default internal
-linkage size, which is 2 bytes for these libraries. If you want to process
-regular expressions that are truly enormous, you can compile PCRE2 with an
-internal linkage size of 3 or 4 (when building the 16-bit library, 3 is rounded
-up to 4). See the \fBREADME\fP file in the source distribution and the
+The maximum size of a compiled pattern is approximately 64 thousand code units
+for the 8-bit and 16-bit libraries if PCRE2 is compiled with the default
+internal linkage size, which is 2 bytes for these libraries. If you want to
+process regular expressions that are truly enormous, you can compile PCRE2 with
+an internal linkage size of 3 or 4 (when building the 16-bit library, 3 is
+rounded up to 4). See the \fBREADME\fP file in the source distribution and the
.\" HREF
\fBpcre2build\fP
.\"
@@ -38,9 +38,9 @@ There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns. There is, however, a limit to the
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
order to limit the amount of system stack used at compile time. The default
-limit can be specified when PCRE2 is built; the default default is 250. An
-application can change this limit by calling pcre2_set_parens_nest_limit() to
-set the limit in a compile context.
+limit can be specified when PCRE2 is built; if not, the default is set to 250.
+An application can change this limit by calling pcre2_set_parens_nest_limit()
+to set the limit in a compile context.
.P
The maximum length of name for a named subpattern is 32 code units, and the
maximum number of named subpatterns is 10000.
diff --git a/doc/pcre2matching.3 b/doc/pcre2matching.3
index 3a885e5..81ce968 100644
--- a/doc/pcre2matching.3
+++ b/doc/pcre2matching.3
@@ -67,7 +67,7 @@ ungreedy repetition quantifiers are specified in the pattern.
Because it ends up with a single path through the tree, it is relatively
straightforward for this algorithm to keep track of the substrings that are
matched by portions of the pattern in parentheses. This provides support for
-capturing parentheses and back references.
+capturing parentheses and backreferences.
.
.
.SH "THE ALTERNATIVE MATCHING ALGORITHM"
@@ -134,7 +134,7 @@ straightforward to keep track of captured substrings for the different matching
possibilities, and PCRE2's implementation of this algorithm does not attempt to
do this. This means that no captured substrings are available.
.P
-3. Because no substrings are captured, back references within the pattern are
+3. Because no substrings are captured, backreferences within the pattern are
not supported, and cause errors if encountered.
.P
4. For the same reason, conditional expressions that use a backreference as the
@@ -188,7 +188,7 @@ The alternative algorithm suffers from a number of disadvantages:
because it has to search for all possible matches, but is also because it is
less susceptible to optimization.
.P
-2. Capturing parentheses and back references are not supported.
+2. Capturing parentheses and backreferences are not supported.
.P
3. Although atomic groups are supported, their use does not provide the
performance advantage that it does for the standard algorithm.
diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3
index 5c0daa8..0247c52 100644
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@@ -1,4 +1,4 @@
-.TH PCRE2PATTERN 3 "12 September 2017" "PCRE2 10.31"
+.TH PCRE2PATTERN 3 "04 September 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
@@ -141,12 +141,12 @@ the application to apply the JIT optimization by calling
.SS "Setting match resource limits"
.rs
.sp
-The pcre2_match() function contains a counter that is incremented every time it
-goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on
-this counter, which therefore limits the amount of computing resource used for
-a match. The maximum depth of nested backtracking can also be limited; this
-indirectly restricts the amount of heap memory that is used, but there is also
-an explicit memory limit that can be set.
+The \fBpcre2_match()\fP function contains a counter that is incremented every
+time it goes round its main loop. The caller of \fBpcre2_match()\fP can set a
+limit on this counter, which therefore limits the amount of computing resource
+used for a match. The maximum depth of nested backtracking can also be limited;
+this indirectly restricts the amount of heap memory that is used, but there is
+also an explicit memory limit that can be set.
.P
These facilities are provided to catch runaway matches that are provoked by
patterns with huge matching trees (a typical example is a pattern with nested
@@ -162,18 +162,20 @@ where d is any number of decimal digits. However, the value of the setting must
be less than the value set (or defaulted) by the caller of \fBpcre2_match()\fP
for it to have any effect. In other words, the pattern writer can lower the
limits set by the programmer, but not raise them. If there is more than one
-setting of one of these limits, the lower value is used.
+setting of one of these limits, the lower value is used. The heap limit is
+specified in kibibytes (units of 1024 bytes).
.P
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
still recognized for backwards compatibility.
.P
-The heap limit applies only when the \fBpcre2_match()\fP interpreter is used
-for matching. It does not apply to JIT or DFA matching. The match limit is used
-(but in a different way) when JIT is being used, or when
-\fBpcre2_dfa_match()\fP is called, to limit computing resource usage by those
-matching functions. The depth limit is ignored by JIT but is relevant for DFA
-matching, which uses function recursion for recursions within the pattern. In
-this case, the depth limit controls the amount of system stack that is used.
+The heap limit applies only when the \fBpcre2_match()\fP or
+\fBpcre2_dfa_match()\fP interpreters are used for matching. It does not apply
+to JIT. The match limit is used (but in a different way) when JIT is being
+used, or when \fBpcre2_dfa_match()\fP is called, to limit computing resource
+usage by those matching functions. The depth limit is ignored by JIT but is
+relevant for DFA matching, which uses function recursion for recursions within
+the pattern and for lookaround assertions and atomic groups. In this case, the
+depth limit controls the depth of such recursion.
.
.
.\" HTML <a name="newlines"></a>
@@ -216,10 +218,11 @@ is used.
.P
The newline convention affects where the circumflex and dollar assertions are
true. It also affects the interpretation of the dot metacharacter when
-PCRE2_DOTALL is not set, and the behaviour of \eN. However, it does not affect
-what the \eR escape sequence matches. By default, this is any Unicode newline
-sequence, for Perl compatibility. However, this can be changed; see the next
-section and the description of \eR in the section entitled
+PCRE2_DOTALL is not set, and the behaviour of \eN when not followed by an
+opening brace. However, it does not affect what the \eR escape sequence
+matches. By default, this is any Unicode newline sequence, for Perl
+compatibility. However, this can be changed; see the next section and the
+description of \eR in the section entitled
.\" HTML <a href="#newlineseq">
.\" </a>
"Newline sequences"
@@ -316,7 +319,7 @@ precede a non-alphanumeric with backslash to specify that it stands for itself.
In particular, if you want to match a backslash, you write \e\e.
.P
In a UTF mode, only ASCII numbers and letters have any special meaning after a
-backslash. All other characters (in particular, those whose codepoints are
+backslash. All other characters (in particular, those whose code points are
greater than 127) are treated as literals.
.P
If a pattern is compiled with the PCRE2_EXTENDED option, most white space in
@@ -328,7 +331,10 @@ of the pattern.
If you want to remove the special meaning from a sequence of characters, you
can do so by putting them between \eQ and \eE. This is different from Perl in
that $ and @ are handled as literals in \eQ...\eE sequences in PCRE2, whereas
-in Perl, $ and @ cause variable interpolation. Note the following examples:
+in Perl, $ and @ cause variable interpolation. Also, Perl does "double-quotish
+backslash interpolation" on any backslashes between \eQ and \eE which, its
+documentation says, "may lead to confusing results". PCRE2 treats a backslash
+between \eQ and \eE just like any other character. Note the following examples:
.sp
Pattern PCRE2 matches Perl matches
.sp
@@ -337,6 +343,8 @@ in Perl, $ and @ cause variable interpolation. Note the following examples:
contents of $xyz
\eQabc\e$xyz\eE abc\e$xyz abc\e$xyz
\eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz
+ \eQA\eB\eE A\eB A\eB
+ \eQ\e\eE \e \e\eE
.sp
The \eQ...\eE sequence is recognized both inside and outside character classes.
An isolated \eE that is not preceded by \eQ is ignored. If \eQ is not followed
@@ -357,20 +365,27 @@ text editing, it is often easier to use one of the following escape sequences
than the binary character it represents. In an ASCII or Unicode environment,
these escapes are as follows:
.sp
- \ea alarm, that is, the BEL character (hex 07)
- \ecx "control-x", where x is any printable ASCII character
- \ee escape (hex 1B)
- \ef form feed (hex 0C)
- \en linefeed (hex 0A)
- \er carriage return (hex 0D)
- \et tab (hex 09)
- \e0dd character with octal code 0dd
- \eddd character with octal code ddd, or back reference
- \eo{ddd..} character with octal code ddd..
- \exhh character with hex code hh
- \ex{hhh..} character with hex code hhh.. (default mode)
- \euhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
-.sp
+ \ea alarm, that is, the BEL character (hex 07)
+ \ecx "control-x", where x is any printable ASCII character
+ \ee escape (hex 1B)
+ \ef form feed (hex 0C)
+ \en linefeed (hex 0A)
+ \er carriage return (hex 0D)
+ \et tab (hex 09)
+ \e0dd character with octal code 0dd
+ \eddd character with octal code ddd, or backreference
+ \eo{ddd..} character with octal code ddd..
+ \exhh character with hex code hh
+ \ex{hhh..} character with hex code hhh..
+ \eN{U+hhh..} character with Unicode hex code point hhh..
+ \euhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
+.sp
+The \eN{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
+is set, that is, when PCRE2 is operating in a Unicode mode. Perl also uses
+\eN{name} to specify characters by Unicode name; PCRE2 does not support this.
+Note that when \eN is not followed by an opening brace (curly bracket) it has
+an entirely different meaning, matching any character that is not a newline.
+.P
The precise effect of \ecx on ASCII characters is as follows: if x is a lower
case letter, it is converted to upper case. Then bit 6 of the character (hex
40) is inverted. Thus \ecA to \ecZ become hex 01 to hex 1A (A is 41, Z is 5A),
@@ -378,14 +393,14 @@ but \ec{ becomes hex 3B ({ is 7B), and \ec; becomes hex 7B (; is 3B). If the
code unit following \ec has a value less than 32 or greater than 126, a
compile-time error occurs.
.P
-When PCRE2 is compiled in EBCDIC mode, \ea, \ee, \ef, \en, \er, and \et
-generate the appropriate EBCDIC code values. The \ec escape is processed
-as specified for Perl in the \fBperlebcdic\fP document. The only characters
-that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ], ^, _, or ?. Any
-other character provokes a compile-time error. The sequence \ec@ encodes
-character code 0; after \ec the letters (in either case) encode characters 1-26
-(hex 01 to hex 1A); [, \e, ], ^, and _ encode characters 27-31 (hex 1B to hex
-1F), and \ec? becomes either 255 (hex FF) or 95 (hex 5F).
+When PCRE2 is compiled in EBCDIC mode, \eN{U+hhh..} is not supported. \ea, \ee,
+\ef, \en, \er, and \et generate the appropriate EBCDIC code values. The \ec
+escape is processed as specified for Perl in the \fBperlebcdic\fP document. The
+only characters that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ],
+^, _, or ?. Any other character provokes a compile-time error. The sequence
+\ec@ encodes character code 0; after \ec the letters (in either case) encode
+characters 1-26 (hex 01 to hex 1A); [, \e, ], ^, and _ encode characters 27-31
+(hex 1B to hex 1F), and \ec? becomes either 255 (hex FF) or 95 (hex 5F).
.P
Thus, apart from \ec?, these escapes generate the same character code values as
they do in an ASCII environment, though the meanings of the values mostly
@@ -408,13 +423,13 @@ follows is itself an octal digit.
The escape \eo must be followed by a sequence of octal digits, enclosed in
braces. An error occurs if this is not the case. This escape is a recent
addition to Perl; it provides way of specifying character code points as octal
-numbers greater than 0777, and it also allows octal numbers and back references
+numbers greater than 0777, and it also allows octal numbers and backreferences
to be unambiguously specified.
.P
For greater clarity and unambiguity, it is best to avoid following \e by a
-digit greater than zero. Instead, use \eo{} or \ex{} to specify character
-numbers, and \eg{} to specify back references. The following paragraphs
-describe the old, ambiguous syntax.
+digit greater than zero. Instead, use \eo{} or \ex{} to specify numerical
+character code points, and \eg{} to specify backreferences. The following
+paragraphs describe the old, ambiguous syntax.
.P
The handling of a backslash followed by a digit other than 0 is complicated,
and Perl has changed over time, causing PCRE2 also to change.
@@ -422,7 +437,7 @@ and Perl has changed over time, causing PCRE2 also to change.
Outside a character class, PCRE2 reads the digit and any following digits as a
decimal number. If the number is less than 10, begins with the digit 8 or 9, or
if there are at least that many previous capturing left parentheses in the
-expression, the entire sequence is taken as a \fIback reference\fP. A
+expression, the entire sequence is taken as a \fIbackreference\fP. A
description of how this works is given
.\" HTML <a href="#backreferences">
.\" </a>
@@ -444,20 +459,20 @@ for themselves. For example, outside a character class:
.\" JOIN
\e40 is the same, provided there are fewer than 40
previous capturing subpatterns
- \e7 is always a back reference
+ \e7 is always a backreference
.\" JOIN
- \e11 might be a back reference, or another way of
+ \e11 might be a backreference, or another way of
writing a tab
\e011 is always a tab
\e0113 is a tab followed by the character "3"
.\" JOIN
- \e113 might be a back reference, otherwise the
+ \e113 might be a backreference, otherwise the
character with octal code 113
.\" JOIN
- \e377 might be a back reference, otherwise
+ \e377 might be a backreference, otherwise
the value 255 (decimal)
.\" JOIN
- \e81 is always a back reference
+ \e81 is always a backreference
.sp
Note that octal values of 100 or greater that are specified using this syntax
must not be introduced by a leading zero, because no more than three octal
@@ -490,12 +505,13 @@ limited to certain values, as follows:
8-bit non-UTF mode no greater than 0xff
16-bit non-UTF mode no greater than 0xffff
32-bit non-UTF mode no greater than 0xffffffff
- All UTF modes no greater than 0x10ffff and a valid codepoint
+ All UTF modes no greater than 0x10ffff and a valid code point
.sp
-Invalid Unicode codepoints are all those in the range 0xd800 to 0xdfff (the
-so-called "surrogate" codepoints). The check for these can be disabled by the
+Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the
+so-called "surrogate" code points). The check for these can be disabled by the
caller of \fBpcre2_compile()\fP by setting the option
-PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8
+and UTF-32 modes, because these values are not representable in UTF-16.
.
.
.SS "Escape sequences in character classes"
@@ -505,28 +521,28 @@ All the sequences that define a single character value can be used both inside
and outside character classes. In addition, inside a character class, \eb is
interpreted as the backspace character (hex 08).
.P
-\eN is not allowed in a character class. \eB, \eR, and \eX are not special
-inside a character class. Like other unrecognized alphabetic escape sequences,
-they cause an error. Outside a character class, these sequences have different
-meanings.
+When not followed by an opening brace, \eN is not allowed in a character class.
+\eB, \eR, and \eX are not special inside a character class. Like other
+unrecognized alphabetic escape sequences, they cause an error. Outside a
+character class, these sequences have different meanings.
.
.
.SS "Unsupported escape sequences"
.rs
.sp
-In Perl, the sequences \el, \eL, \eu, and \eU are recognized by its string
+In Perl, the sequences \eF, \el, \eL, \eu, and \eU are recognized by its string
handler and used to modify the case of following characters. By default, PCRE2
does not support these escape sequences. However, if the PCRE2_ALT_BSUX option
is set, \eU matches a "U" character, and \eu can be used to define a character
by code point, as described above.
.
.
-.SS "Absolute and relative back references"
+.SS "Absolute and relative backreferences"
.rs
.sp
The sequence \eg followed by a signed or unsigned number, optionally enclosed
-in braces, is an absolute or relative back reference. A named back reference
-can be coded as \eg{name}. Back references are discussed
+in braces, is an absolute or relative backreference. A named backreference
+can be coded as \eg{name}. Backreferences are discussed
.\" HTML <a href="#backreferences">
.\" </a>
later,
@@ -549,7 +565,7 @@ syntax for referencing a subpattern as a "subroutine". Details are discussed
later.
.\"
Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP
-synonymous. The former is a back reference; the latter is a
+synonymous. The former is a backreference; the latter is a
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
subroutine
@@ -567,6 +583,7 @@ Another use of backslash is for specifying generic character types:
\eD any character that is not a decimal digit
\eh any horizontal white space character
\eH any character that is not a horizontal white space character
+ \eN any character that is not a newline
\es any white space character
\eS any character that is not a white space character
\ev any vertical white space character
@@ -574,14 +591,20 @@ Another use of backslash is for specifying generic character types:
\ew any "word" character
\eW any "non-word" character
.sp
-There is also the single sequence \eN, which matches a non-newline character.
-This is the same as
+The \eN escape sequence has the same meaning as
.\" HTML <a href="#fullstopdot">
.\" </a>
the "." metacharacter
.\"
-when PCRE2_DOTALL is not set. Perl also uses \eN to match characters by name;
-PCRE2 does not support this.
+when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change the
+meaning of \eN. Note that when \eN is followed by an opening brace it has a
+different meaning. See the section entitled
+.\" HTML <a href="#digitsafterbackslash">
+.\" </a>
+"Non-printing characters"
+.\"
+above for details. Perl also uses \eN{name} to specify characters by Unicode
+name; PCRE2 does not support this.
.P
Each pair of lower and upper case escape sequences partitions the complete set
of characters into two disjoint sets. Any given character matches one, and only
@@ -690,7 +713,7 @@ U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next
line, U+0085). Because this is an atomic group, the two-character sequence is
treated as a single unit that cannot be split.
.P
-In other modes, two additional characters whose codepoints are greater than 255
+In other modes, two additional characters whose code points are greater than 255
are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029).
Unicode support is not needed for these characters to be recognized.
.P
@@ -725,8 +748,8 @@ an error.
When PCRE2 is built with Unicode support (the default), three additional escape
sequences that match characters with specific properties are available. In
8-bit non-UTF-8 mode, these sequences are of course limited to testing
-characters whose codepoints are less than 256, but they do work in this mode.
-In 32-bit non-UTF mode, codepoints greater than 0x10ffff (the Unicode limit)
+characters whose code points are less than 256, but they do work in this mode.
+In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit)
may be encountered. These are all treated as being in the Common script and
with an unassigned type. The extra escape sequences are:
.sp
@@ -786,6 +809,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dogra,
Duployan,
Egyptian_Hieroglyphs,
Elbasan,
@@ -796,9 +820,11 @@ Gothic,
Grantha,
Greek,
Gujarati,
+Gunjala_Gondi,
Gurmukhi,
Han,
Hangul,
+Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
@@ -826,11 +852,13 @@ Lisu,
Lycian,
Lydian,
Mahajani,
+Makasar,
Malayalam,
Mandaic,
Manichaean,
Marchen,
Masaram_Gondi,
+Medefaidrin,
Meetei_Mayek,
Mende_Kikakui,
Meroitic_Cursive,
@@ -853,6 +881,7 @@ Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
+Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Oriya,
@@ -873,6 +902,7 @@ Shavian,
Siddham,
SignWriting,
Sinhala,
+Sogdian,
Sora_Sompeng,
Soyombo,
Sundanese,
@@ -1001,7 +1031,10 @@ grapheme cluster", and treats the sequence as an atomic group
Unicode supports various kinds of composite character by giving each character
a grapheme breaking property, and having rules that use these properties to
define the boundaries of extended grapheme clusters. The rules are defined in
-Unicode Standard Annex 29, "Unicode Text Segmentation".
+Unicode Standard Annex 29, "Unicode Text Segmentation". Unicode 11.0.0
+abandoned the use of some previous properties that had been used for emojis.
+Instead it introduced various emoji-specific properties. PCRE2 uses only the
+Extended Pictographic property.
.P
\eX always matches at least one character. Then it decides whether to add
additional characters according to the following rules for ending a cluster:
@@ -1016,22 +1049,20 @@ L, V, LV, or LVT character; an LV or V character may be followed by a V or T
character; an LVT or T character may be follwed only by a T character.
.P
4. Do not end before extending characters or spacing marks or the "zero-width
-joiner" characters. Characters with the "mark" property always have the
+joiner" character. Characters with the "mark" property always have the
"extend" grapheme breaking property.
.P
5. Do not end after prepend characters.
.P
-6. Do not break within emoji modifier sequences (a base character followed by a
-modifier). Extending characters are allowed before the modifier.
+6. Do not break within emoji modifier sequences or emoji zwj sequences. That
+is, do not break between characters with the Extended_Pictographic property.
+Extend and ZWJ characters are allowed between the characters.
.P
-7. Do not break within emoji zwj sequences (zero-width jointer followed by
-"glue after ZWJ" or "base glue after ZWJ").
-.P
-8. Do not break within emoji flag sequences. That is, do not break between
+7. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) characters if there are an odd number of RI characters
before the break point.
.P
-6. Otherwise, end the cluster.
+8. Otherwise, end the cluster.
.
.
.\" HTML <a name="extraprops"></a>
@@ -1070,13 +1101,20 @@ sequences but the characters that they represent.)
.SS "Resetting the match start"
.rs
.sp
-The escape sequence \eK causes any previously matched characters not to be
-included in the final matched sequence. For example, the pattern:
+In normal use, the escape sequence \eK causes any previously matched characters
+not to be included in the final matched sequence that is returned. For example,
+the pattern:
.sp
foo\eKbar
.sp
-matches "foobar", but reports that it has matched "bar". This feature is
-similar to a lookbehind assertion
+matches "foobar", but reports that it has matched "bar". \eK does not interact
+with anchoring in any way. The pattern:
+.sp
+ ^foo\eKbar
+.sp
+matches only when the subject begins with "foobar" (in single line mode),
+though it again reports the matched string as "bar". This feature is similar to
+a lookbehind assertion
.\" HTML <a href="#lookbehind">
.\" </a>
(described below).
@@ -1098,7 +1136,14 @@ Perl documents that the use of \eK within assertions is "not well defined". In
PCRE2, \eK is acted upon when it occurs inside positive assertions, but is
ignored in negative assertions. Note that when a pattern such as (?=ab\eK)
matches, the reported start of the match can be greater than the end of the
-match.
+match. Using \eK in a lookbehind assertion at the start of a pattern can also
+lead to odd effects. For example, consider this pattern:
+.sp
+ (?<=\eKfoo)bar
+.sp
+If the subject is "foobar", a call to \fBpcre2_match()\fP with a starting
+offset of 3 succeeds and reports the matching string as "foobar", that is, the
+start of the reported match is earlier than where the match started.
.
.
.\" HTML <a name="smallassertions"></a>
@@ -1150,17 +1195,17 @@ end of the string as well as at the very end, whereas \ez matches only at the
end.
.P
The \eG assertion is true only when the current matching position is at the
-start point of the match, as specified by the \fIstartoffset\fP argument of
-\fBpcre2_match()\fP. It differs from \eA when the value of \fIstartoffset\fP is
-non-zero. By calling \fBpcre2_match()\fP multiple times with appropriate
-arguments, you can mimic Perl's /g option, and it is in this kind of
-implementation where \eG can be useful.
-.P
-Note, however, that PCRE2's interpretation of \eG, as the start of the current
-match, is subtly different from Perl's, which defines it as the end of the
-previous match. In Perl, these can be different when the previously matched
-string was empty. Because PCRE2 does just one match at a time, it cannot
-reproduce this behaviour.
+start point of the matching process, as specified by the \fIstartoffset\fP
+argument of \fBpcre2_match()\fP. It differs from \eA when the value of
+\fIstartoffset\fP is non-zero. By calling \fBpcre2_match()\fP multiple times
+with appropriate arguments, you can mimic Perl's /g option, and it is in this
+kind of implementation where \eG can be useful.
+.P
+Note, however, that PCRE2's implementation of \eG, being true at the starting
+character of the matching process, is subtly different from Perl's, which
+defines it as true at the end of the previous match. In Perl, these can be
+different when the previously matched string was empty. Because PCRE2 does just
+one match at a time, it cannot reproduce this behaviour.
.P
If all the alternatives of a pattern begin with \eG, the expression is anchored
to the starting match position, and the "anchored" flag is set in the compiled
@@ -1265,9 +1310,17 @@ The handling of dot is entirely independent of the handling of circumflex and
dollar, the only relationship being that they both involve newlines. Dot has no
special meaning in a character class.
.P
-The escape sequence \eN behaves like a dot, except that it is not affected by
-the PCRE2_DOTALL option. In other words, it matches any character except one
-that signifies the end of a line. Perl also uses \eN to match characters by
+The escape sequence \eN when not followed by an opening brace behaves like a
+dot, except that it is not affected by the PCRE2_DOTALL option. In other words,
+it matches any character except one that signifies the end of a line.
+.P
+When \eN is followed by an opening brace it has a different meaning. See the
+section entitled
+.\" HTML <a href="digitsafterbackslash">
+.\" </a>
+"Non-printing characters"
+.\"
+above for details. Perl also uses \eN{name} to specify characters by Unicode
name; PCRE2 does not support this.
.
.
@@ -1356,30 +1409,32 @@ circumflex is not an assertion; it still consumes a character from the subject
string, and therefore it fails if the current pointer is at the end of the
string.
.P
-When caseless matching is set, any letters in a class represent both their
-upper case and lower case versions, so for example, a caseless [aeiou] matches
-"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
-caseful version would.
+Characters in a class may be specified by their code points using \eo, \ex, or
+\eN{U+hh..} in the usual way. When caseless matching is set, any letters in a
+class represent both their upper case and lower case versions, so for example,
+a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not
+match "A", whereas a caseful version would.
.P
Characters that might indicate line breaks are never treated in any special way
when matching character classes, whatever line-ending sequence is in use, and
whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A
class such as [^a] always matches one of these characters.
.P
-The character escape sequences \ed, \eD, \eh, \eH, \ep, \eP, \es, \eS, \ev,
-\eV, \ew, and \eW may appear in a character class, and add the characters that
-they match to the class. For example, [\edABCDEF] matches any hexadecimal
-digit. In UTF modes, the PCRE2_UCP option affects the meanings of \ed, \es, \ew
-and their upper case partners, just as it does when they appear outside a
-character class, as described in the section entitled
+The generic character type escape sequences \ed, \eD, \eh, \eH, \ep, \eP, \es,
+\eS, \ev, \eV, \ew, and \eW may appear in a character class, and add the
+characters that they match to the class. For example, [\edABCDEF] matches any
+hexadecimal digit. In UTF modes, the PCRE2_UCP option affects the meanings of
+\ed, \es, \ew and their upper case partners, just as it does when they appear
+outside a character class, as described in the section entitled
.\" HTML <a href="#genericchartypes">
.\" </a>
"Generic character types"
.\"
above. The escape sequence \eb has a different meaning inside a character
-class; it matches the backspace character. The sequences \eB, \eN, \eR, and \eX
-are not special inside a character class. Like any other unrecognized escape
-sequences, they cause an error.
+class; it matches the backspace character. The sequences \eB, \eR, and \eX are
+not special inside a character class. Like any other unrecognized escape
+sequences, they cause an error. The same is true for \eN when not followed by
+an opening brace.
.P
The minus (hyphen) character can be used to specify a range of characters in a
character class. For example, [d-m] matches any letter between d and m,
@@ -1574,9 +1629,13 @@ alternative in the subpattern.
.rs
.sp
The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
-PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options (which
-are Perl-compatible) can be changed from within the pattern by a sequence of
-Perl option letters enclosed between "(?" and ")". The option letters are
+PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options can be
+changed from within the pattern by a sequence of letters enclosed between "(?"
+and ")". These options are Perl-compatible, and are described in detail in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+documentation. The option letters are:
.sp
i for PCRE2_CASELESS
m for PCRE2_MULTILINE
@@ -1586,19 +1645,24 @@ Perl option letters enclosed between "(?" and ")". The option letters are
xx for PCRE2_EXTENDED_MORE
.sp
For example, (?im) sets caseless, multiline matching. It is also possible to
-unset these options by preceding the letter with a hyphen. The two "extended"
-options are not independent; unsetting either one cancels the effects of both
-of them.
+unset these options by preceding the relevant letters with a hyphen, for
+example (?-im). The two "extended" options are not independent; unsetting either
+one cancels the effects of both of them.
.P
A combined setting and unsetting such as (?im-sx), which sets PCRE2_CASELESS
and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and PCRE2_EXTENDED, is also
-permitted. If a letter appears both before and after the hyphen, the option is
-unset. An empty options setting "(?)" is allowed. Needless to say, it has no
-effect.
+permitted. Only one hyphen may appear in the options string. If a letter
+appears both before and after the hyphen, the option is unset. An empty options
+setting "(?)" is allowed. Needless to say, it has no effect.
+.P
+If the first character following (? is a circumflex, it causes all of the above
+options to be unset. Thus, (?^) is equivalent to (?-imnsx). Letters may follow
+the circumflex to cause some options to be re-instated, but a hyphen may not
+appear.
.P
The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be changed in
the same way as the Perl-compatible options by using the characters J and U
-respectively.
+respectively. However, these are not unset by (?^).
.P
When one of these option changes occurs at top level (that is, not inside
subpattern parentheses), the change applies to the remainder of the pattern
@@ -1722,7 +1786,7 @@ numbers underneath show in which buffer the captured content will be stored.
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
# 1 2 2 3 2 3 4
.sp
-A back reference to a numbered subpattern uses the most recent value that is
+A backreference to a numbered subpattern uses the most recent value that is
set for that number by any subpattern. The following pattern matches "abcabc"
or "defdef":
.sp
@@ -1753,20 +1817,21 @@ duplicate named subpatterns, as described in the next section.
.rs
.sp
Identifying capturing parentheses by number is simple, but it can be very hard
-to keep track of the numbers in complicated regular expressions. Furthermore,
-if an expression is modified, the numbers may change. To help with this
-difficulty, PCRE2 supports the naming of subpatterns. This feature was not
-added to Perl until release 5.10. Python had the feature earlier, and PCRE1
+to keep track of the numbers in complicated patterns. Furthermore, if an
+expression is modified, the numbers may change. To help with this difficulty,
+PCRE2 supports the naming of capturing subpatterns. This feature was not added
+to Perl until release 5.10. Python had the feature earlier, and PCRE1
introduced it at release 4.0, using the Python syntax. PCRE2 supports both the
-Perl and the Python syntax. Perl allows identically numbered subpatterns to
-have different names, but PCRE2 does not.
+Perl and the Python syntax.
.P
-In PCRE2, a subpattern can be named in one of three ways: (?<name>...) or
-(?'name'...) as in Perl, or (?P<name>...) as in Python. References to capturing
-parentheses from other parts of the pattern, such as
+In PCRE2, a capturing subpattern can be named in one of three ways:
+(?<name>...) or (?'name'...) as in Perl, or (?P<name>...) as in Python. Names
+consist of up to 32 alphanumeric characters and underscores, but must start
+with a non-digit. References to capturing parentheses from other parts of the
+pattern, such as
.\" HTML <a href="#backreferences">
.\" </a>
-back references,
+backreferences,
.\"
.\" HTML <a href="#recursion">
.\" </a>
@@ -1777,23 +1842,47 @@ and
.\" </a>
conditions,
.\"
-can be made by name as well as by number.
+can all be made by name as well as by number.
+.P
+Named capturing parentheses are allocated numbers as well as names, exactly as
+if the names were not present. In both PCRE2 and Perl, capturing subpatterns
+are primarily identified by numbers; any names are just aliases for these
+numbers. The PCRE2 API provides function calls for extracting the complete
+name-to-number translation table from a compiled pattern, as well as
+convenience functions for extracting captured substrings by name.
.P
-Names consist of up to 32 alphanumeric characters and underscores, but must
-start with a non-digit. Named capturing parentheses are still allocated numbers
-as well as names, exactly as if the names were not present. The PCRE2 API
-provides function calls for extracting the name-to-number translation table
-from a compiled pattern. There are also convenience functions for extracting a
-captured substring by name.
+\fBWarning:\fP When more than one subpattern has the same number, as described
+in the previous section, a name given to one of them applies to all of them.
+Perl allows identically numbered subpatterns to have different names. Consider
+this pattern, where there are two capturing subpatterns, both numbered 1:
+.sp
+ (?|(?<AA>aa)|(?<BB>bb))
+.sp
+Perl allows this, with both names AA and BB as aliases of group 1. Thus, after
+a successful match, both names yield the same value (either "aa" or "bb").
.P
-By default, a name must be unique within a pattern, but it is possible to relax
-this constraint by setting the PCRE2_DUPNAMES option at compile time.
-(Duplicate names are also always permitted for subpatterns with the same
-number, set up as described in the previous section.) Duplicate names can be
-useful for patterns where only one instance of the named parentheses can match.
-Suppose you want to match the name of a weekday, either as a 3-letter
-abbreviation or as the full name, and in both cases you want to extract the
-abbreviation. This pattern (ignoring the line breaks) does the job:
+In an attempt to reduce confusion, PCRE2 does not allow the same group number
+to be associated with more than one name. The example above provokes a
+compile-time error. However, there is still scope for confusion. Consider this
+pattern:
+.sp
+ (?|(?<AA>aa)|(bb))
+.sp
+Although the second subpattern number 1 is not explicitly named, the name AA is
+still an alias for subpattern 1. Whether the pattern matches "aa" or "bb", a
+reference by name to group AA yields the matched string.
+.P
+By default, a name must be unique within a pattern, except that duplicate names
+are permitted for subpatterns with the same number, for example:
+.sp
+ (?|(?<AA>aa)|(?<AA>bb))
+.sp
+The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
+option at compile time, or by the use of (?J) within the pattern. Duplicate
+names can be useful for patterns where only one instance of the named
+parentheses can match. Suppose you want to match the name of a weekday, either
+as a 3-letter abbreviation or as the full name, and in both cases you want to
+extract the abbreviation. This pattern (ignoring the line breaks) does the job:
.sp
(?<DN>Mon|Fri|Sun)(?:day)?|
(?<DN>Tue)(?:sday)?|
@@ -1802,14 +1891,13 @@ abbreviation. This pattern (ignoring the line breaks) does the job:
(?<DN>Sat)(?:urday)?
.sp
There are five capturing substrings, but only one is ever set after a match.
-(An alternative way of solving this problem is to use a "branch reset"
-subpattern, as described in the previous section.)
-.P
The convenience functions for extracting the data by name returns the substring
for the first (and in this example, the only) subpattern of that name that
-matched. This saves searching to find which numbered subpattern it was.
+matched. This saves searching to find which numbered subpattern it was. (An
+alternative way of solving this problem is to use a "branch reset" subpattern,
+as described in the previous section.)
.P
-If you make a back reference to a non-unique named subpattern from elsewhere in
+If you make a backreference to a non-unique named subpattern from elsewhere in
the pattern, the subpatterns to which the name refers are checked in the order
in which they appear in the overall pattern. The first one that is set is used
for the reference. For example, this pattern matches both "foofoo" and
@@ -1820,8 +1908,7 @@ for the reference. For example, this pattern matches both "foofoo" and
.P
If you make a subroutine call to a non-unique named subpattern, the one that
corresponds to the first occurrence of the name is used. In the absence of
-duplicate numbers (see the previous section) this is the one with the lowest
-number.
+duplicate numbers this is the one with the lowest number.
.P
If you use a named reference in a condition
test (see the
@@ -1839,13 +1926,6 @@ handling named subpatterns, see the
\fBpcre2api\fP
.\"
documentation.
-.P
-\fBWarning:\fP You cannot use different names to distinguish between two
-subpatterns with the same number because PCRE2 uses only the numbers when
-matching. For this reason, an error is given at compile time if different names
-are given to subpatterns with the same number. However, you can always give the
-same name to subpatterns with the same number, even when PCRE2_DUPNAMES is not
-set.
.
.
.SH REPETITION
@@ -1861,7 +1941,7 @@ items:
the \eR escape sequence
an escape such as \ed or \epL that matches a single character
a character class
- a back reference
+ a backreference
a parenthesized subpattern (including most assertions)
a subroutine call to a subpattern (recursive or otherwise)
.sp
@@ -1978,7 +2058,7 @@ worth setting PCRE2_DOTALL in order to obtain this optimization, or
alternatively, using ^ to indicate anchoring explicitly.
.P
However, there are some cases where the optimization cannot be used. When .*
-is inside capturing parentheses that are the subject of a back reference
+is inside capturing parentheses that are the subject of a backreference
elsewhere in the pattern, a match at the start may fail where a later one
succeeds. Consider, for example:
.sp
@@ -2114,23 +2194,23 @@ sequences of non-digits cannot be broken, and failure happens quickly.
.
.
.\" HTML <a name="backreferences"></a>
-.SH "BACK REFERENCES"
+.SH "BACKREFERENCES"
.rs
.sp
Outside a character class, a backslash followed by a digit greater than 0 (and
-possibly further digits) is a back reference to a capturing subpattern earlier
+possibly further digits) is a backreference to a capturing subpattern earlier
(that is, to its left) in the pattern, provided there have been that many
previous capturing left parentheses.
.P
However, if the decimal number following the backslash is less than 8, it is
-always taken as a back reference, and causes an error only if there are not
+always taken as a backreference, and causes an error only if there are not
that many capturing left parentheses in the entire pattern. In other words, the
parentheses that are referenced need not be to the left of the reference for
-numbers less than 8. A "forward back reference" of this type can make sense
+numbers less than 8. A "forward backreference" of this type can make sense
when a repetition is involved and the subpattern to the right has participated
in an earlier iteration.
.P
-It is not possible to have a numerical "forward back reference" to a subpattern
+It is not possible to have a numerical "forward backreference" to a subpattern
whose number is 8 or more using this syntax because a sequence such as \e50 is
interpreted as a character defined in octal. See the subsection entitled
"Non-printing characters"
@@ -2139,7 +2219,7 @@ interpreted as a character defined in octal. See the subsection entitled
above
.\"
for further details of the handling of digits following a backslash. There is
-no such problem when named parentheses are used. A back reference to any
+no such problem when named parentheses are used. A backreference to any
subpattern is possible using named parentheses (see below).
.P
Another way of avoiding the ambiguity inherent in the use of digits following a
@@ -2167,7 +2247,7 @@ The sequence \eg{+1} is a reference to the next capturing subpattern. This kind
of forward reference can be useful it patterns that repeat. Perl does not
support the use of + in this way.
.P
-A back reference matches whatever actually matched the capturing subpattern in
+A backreference matches whatever actually matched the capturing subpattern in
the current subject string, rather than anything matching the subpattern
itself (see
.\" HTML <a href="#subpatternsassubroutines">
@@ -2180,17 +2260,17 @@ below for a way of doing that). So the pattern
.sp
matches "sense and sensibility" and "response and responsibility", but not
"sense and responsibility". If caseful matching is in force at the time of the
-back reference, the case of letters is relevant. For example,
+backreference, the case of letters is relevant. For example,
.sp
((?i)rah)\es+\e1
.sp
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
capturing subpattern is matched caselessly.
.P
-There are several different ways of writing back references to named
+There are several different ways of writing backreferences to named
subpatterns. The .NET syntax \ek{name} and the Perl syntax \ek<name> or
\ek'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
-back reference syntax, in which \eg can be used for both numeric and named
+backreference syntax, in which \eg can be used for both numeric and named
references, is also supported. We could rewrite the above example in any of
the following ways:
.sp
@@ -2202,21 +2282,22 @@ the following ways:
A subpattern that is referenced by name may appear in the pattern before or
after the reference.
.P
-There may be more than one back reference to the same subpattern. If a
-subpattern has not actually been used in a particular match, any back
-references to it always fail by default. For example, the pattern
+There may be more than one backreference to the same subpattern. If a
+subpattern has not actually been used in a particular match, any backreferences
+to it always fail by default. For example, the pattern
.sp
(a|(bc))\e2
.sp
always fails if it starts to match "a" rather than "bc". However, if the
-PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a back reference to an
+PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an
unset value matches an empty string.
.P
Because there may be many capturing parentheses in a pattern, all digits
-following a backslash are taken as part of a potential back reference number.
+following a backslash are taken as part of a potential backreference number.
If the pattern continues with a digit character, some delimiter must be used to
-terminate the back reference. If the PCRE2_EXTENDED option is set, this can be
-white space. Otherwise, the \eg{ syntax or an empty comment (see
+terminate the backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
+option is set, this can be white space. Otherwise, the \eg{ syntax or an empty
+comment (see
.\" HTML <a href="#comments">
.\" </a>
"Comments"
@@ -2224,10 +2305,10 @@ white space. Otherwise, the \eg{ syntax or an empty comment (see
below) can be used.
.
.
-.SS "Recursive back references"
+.SS "Recursive backreferences"
.rs
.sp
-A back reference that occurs inside the parentheses to which it refers fails
+A backreference that occurs inside the parentheses to which it refers fails
when the subpattern is first used, so, for example, (a\e1) never matches.
However, such references can be useful inside repeated subpatterns. For
example, the pattern
@@ -2235,13 +2316,13 @@ example, the pattern
(a|b\e1)+
.sp
matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of
-the subpattern, the back reference matches the character string corresponding
+the subpattern, the backreference matches the character string corresponding
to the previous iteration. In order for this to work, the pattern must be such
-that the first iteration does not need to match the back reference. This can be
+that the first iteration does not need to match the backreference. This can be
done using alternation, as in the example above, or by a quantifier with a
minimum of zero.
.P
-Back references of this type cause the group that they reference to be treated
+Backreferences of this type cause the group that they reference to be treated
as an
.\" HTML <a href="#atomicgroup">
.\" </a>
@@ -2268,25 +2349,35 @@ those that look ahead of the current position in the subject string, and those
that look behind it, and in each case an assertion may be positive (must
succeed for matching to continue) or negative (must not succeed for matching to
continue). An assertion subpattern is matched in the normal way, except that,
-when matching continues afterwards, the matching position in the subject string
-is as it was at the start of the assertion.
+when matching continues after a successful assertion, the matching position in
+the subject string is as it was before the assertion was processed.
.P
Assertion subpatterns are not capturing subpatterns. If an assertion contains
capturing subpatterns within it, these are counted for the purposes of
-numbering the capturing subpatterns in the whole pattern. However, substring
-capturing is carried out only for positive assertions that succeed, that is,
-one of their branches matches, so matching continues after the assertion. If
-all branches of a positive assertion fail to match, nothing is captured, and
-control is passed to the previous backtracking point.
-.P
-No capturing is done for a negative assertion unless it is being used as a
-condition in a
-.\" HTML <a href="#subpatternsassubroutines">
+numbering the capturing subpatterns in the whole pattern. Within each branch of
+an assertion, locally captured substrings may be referenced in the usual way.
+For example, a sequence such as (.)\eg{-1} can be used to check that two
+adjacent characters are the same.
+.P
+When a branch within an assertion fails to match, any substrings that were
+captured are discarded (as happens with any pattern branch that fails to
+match). A negative assertion succeeds only when all its branches fail to match;
+this means that no captured substrings are ever retained after a successful
+negative assertion. When an assertion contains a matching branch, what happens
+depends on the type of assertion.
+.P
+For a positive assertion, internally captured substrings in the successful
+branch are retained, and matching continues with the next pattern item after
+the assertion. For a negative assertion, a matching branch means that the
+assertion has failed. If the assertion is being used as a condition in a
+.\" HTML <a href="#conditions">
.\" </a>
conditional subpattern
.\"
-(see the discussion below). Matching continues after a non-conditional negative
-assertion only if all its branches fail to match.
+(see below), captured substrings are retained, because matching continues with
+the "no" branch of the condition. For other failing negative assertions,
+control passes to the previous backtracking point, thus discarding any captured
+strings within the assertion.
.P
For compatibility with Perl, most assertion subpatterns may be repeated; though
it makes no sense to assert the same thing several times, the side effect of
@@ -2404,10 +2495,10 @@ recursion,
that is, a "subroutine" call into a group that is already active,
is not supported.
.P
-Perl does not support back references in lookbehinds. PCRE2 does support them,
+Perl does not support backreferences in lookbehinds. PCRE2 does support them,
but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option
must not be set, there must be no use of (?| in the pattern (it creates
-duplicate subpattern numbers), and if the back reference is by name, the name
+duplicate subpattern numbers), and if the backreference is by name, the name
must be unique. Of course, the referenced subpattern must itself be of fixed
length. The following pattern matches words containing at least two characters
that begin and end with the same character:
@@ -2488,7 +2579,8 @@ already been matched. The two possible forms of conditional subpattern are:
(?(condition)yes-pattern|no-pattern)
.sp
If the condition is satisfied, the yes-pattern is used; otherwise the
-no-pattern (if present) is used. If there are more than two alternatives in the
+no-pattern (if present) is used. An absent no-pattern is equivalent to an empty
+string (it always matches). If there are more than two alternatives in the
subpattern, a compile-time error occurs. Each of the two alternatives may
itself contain nested subpatterns of any form, including conditional
subpatterns; the restriction to two alternatives applies only at the level of
@@ -2693,12 +2785,12 @@ no part in the pattern matching.
.P
The sequence (?# marks the start of a comment that continues up to the next
closing parenthesis. Nested parentheses are not permitted. If the
-PCRE2_EXTENDED option is set, an unescaped # character also introduces a
-comment, which in this case continues to immediately after the next newline
-character or character sequence in the pattern. Which characters are
-interpreted as newlines is controlled by an option passed to the compiling
-function or by a special sequence at the start of the pattern, as described in
-the section entitled
+PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # character
+also introduces a comment, which in this case continues to immediately after
+the next newline character or character sequence in the pattern. Which
+characters are interpreted as newlines is controlled by an option passed to the
+compiling function or by a special sequence at the start of the pattern, as
+described in the section entitled
.\" HTML <a href="#newlines">
.\" </a>
"Newline conventions"
@@ -2838,10 +2930,6 @@ the last value taken on at the top level. If a capturing subpattern is not
matched at the top level, its final captured value is unset, even if it was
(temporarily) set at a deeper level during the matching process.
.P
-If there are more than 15 capturing parentheses in a pattern, PCRE2 has to
-obtain extra memory from the heap to store data during a recursion. If no
-memory can be obtained, the match fails with the PCRE2_ERROR_NOMEMORY error.
-.P
Do not confuse the (?R) item with the condition (R), which tests for recursion.
Consider this pattern, which matches text in angle brackets, allowing for
arbitrary nesting. Only digits are allowed in nested brackets (that is, when
@@ -2901,7 +2989,7 @@ in PCRE2 these values can be referenced. Consider this pattern:
^(.)(\e1|a(?2))
.sp
This pattern matches "bab". The first capturing parentheses match "b", then in
-the second group, when the back reference \e1 fails to match "b", the second
+the second group, when the backreference \e1 fails to match "b", the second
alternative matches "a" and then recurses. In the recursion, \e1 does now match
"b" and so the whole match succeeds. This match used to fail in Perl, but in
later versions (I tried 5.024) it now works.
@@ -2912,10 +3000,12 @@ later versions (I tried 5.024) it now works.
.rs
.sp
If the syntax for a recursive subpattern call (either by number or by
-name) is used outside the parentheses to which it refers, it operates like a
-subroutine in a programming language. The called subpattern may be defined
-before or after the reference. A numbered reference can be absolute or
-relative, as in these examples:
+name) is used outside the parentheses to which it refers, it operates a bit
+like a subroutine in a programming language. More accurately, PCRE2 treats the
+referenced subpattern as an independent subpattern which it tries to match at
+the current matching position. The called subpattern may be defined before or
+after the reference. A numbered reference can be absolute or relative, as in
+these examples:
.sp
(...(absolute)...)...(?2)...
(...(relative)...)...(?-1)...
@@ -2946,6 +3036,18 @@ different calls. For example, consider this pattern:
.sp
It matches "abcabc". It does not match "abcABC" because the change of
processing option does not affect the called subpattern.
+.P
+The behaviour of
+.\" HTML <a href="#backtrackcontrol">
+.\" </a>
+backtracking control verbs
+.\"
+in subpatterns when called as subroutines is described in the section entitled
+.\" HTML <a href="#btsub">
+.\" </a>
+"Backtracking verbs in subroutines"
+.\"
+below.
.
.
.\" HTML <a name="onigurumasubroutines"></a>
@@ -2966,7 +3068,7 @@ plus or a minus sign it is taken as a relative reference. For example:
(abc)(?i:\eg<-1>)
.sp
Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP
-synonymous. The former is a back reference; the latter is a subroutine call.
+synonymous. The former is a backreference; the latter is a subroutine call.
.
.
.SH CALLOUTS
@@ -3067,10 +3169,11 @@ only backslash items that are permitted are \eQ, \eE, and sequences such as
are faulted.
.P
A closing parenthesis can be included in a name either as \e) or between \eQ
-and \eE. In addition to backslash processing, if the PCRE2_EXTENDED option is
-also set, unescaped whitespace in verb names is skipped, and #-comments are
-recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
-affect verb names unless PCRE2_ALT_VERBNAMES is also set.
+and \eE. In addition to backslash processing, if the PCRE2_EXTENDED or
+PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb names is
+skipped, and #-comments are recognized, exactly as in the rest of the pattern.
+PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect verb names unless
+PCRE2_ALT_VERBNAMES is also set.
.P
The maximum length of a name is 255 in the 8-bit library and 65535 in the
16-bit and 32-bit libraries. If the name is empty, that is, if the closing
@@ -3123,17 +3226,16 @@ in the
.\"
documentation.
.P
-Experiments with Perl suggest that it too has similar optimizations, sometimes
-leading to anomalous results.
+Experiments with Perl suggest that it too has similar optimizations, and like
+PCRE2, turning them off can change the result of a match.
.
.
.SS "Verbs that act immediately"
.rs
.sp
-The following verbs act as soon as they are encountered. They may not be
-followed by a name.
+The following verbs act as soon as they are encountered.
.sp
- (*ACCEPT)
+ (*ACCEPT) or (*ACCEPT:NAME)
.sp
This verb causes the match to end successfully, skipping the remainder of the
pattern. However, when it is inside a subpattern that is called as a
@@ -3149,18 +3251,21 @@ example:
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by
the outer parentheses.
.sp
- (*FAIL) or (*F)
+ (*FAIL) or (*FAIL:NAME)
.sp
-This verb causes a matching failure, forcing backtracking to occur. It is
-equivalent to (?!) but easier to read. The Perl documentation notes that it is
-probably useful only when combined with (?{}) or (??{}). Those are, of course,
-Perl features that are not present in PCRE2. The nearest equivalent is the
-callout feature, as for example in this pattern:
+This verb causes a matching failure, forcing backtracking to occur. It may be
+abbreviated to (*F). It is equivalent to (?!) but easier to read. The Perl
+documentation notes that it is probably useful only when combined with (?{}) or
+(??{}). Those are, of course, Perl features that are not present in PCRE2. The
+nearest equivalent is the callout feature, as for example in this pattern:
.sp
a+(?C)(*FAIL)
.sp
A match with the string "aaaa" always fails, but the callout is taken before
each backtrack happens (in this example, 10 times).
+.P
+(*ACCEPT:NAME) and (*FAIL:NAME) behave exactly the same as
+(*MARK:NAME)(*ACCEPT) and (*MARK:NAME)(*FAIL), respectively.
.
.
.SS "Recording which path was taken"
@@ -3175,9 +3280,8 @@ starting point (see (*SKIP) below).
A name is always required with this verb. There may be as many instances of
(*MARK) as you like in a pattern, and their names do not have to be unique.
.P
-When a match succeeds, the name of the last-encountered (*MARK:NAME),
-(*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed back to the
-caller as described in the section entitled
+When a match succeeds, the name of the last-encountered (*MARK:NAME) on the
+matching path is passed back to the caller as described in the section entitled
.\" HTML <a href="pcre2api.html#matchotherdata">
.\" </a>
"Other information about the match"
@@ -3186,8 +3290,16 @@ in the
.\" HREF
\fBpcre2api\fP
.\"
-documentation. Here is an example of \fBpcre2test\fP output, where the "mark"
-modifier requests the retrieval and outputting of (*MARK) data:
+documentation. This applies to all instances of (*MARK), including those inside
+assertions and atomic groups. (There are differences in those cases when
+(*MARK) is used in conjunction with (*SKIP) as described below.)
+.P
+As well as (*MARK), the (*COMMIT), (*PRUNE) and (*THEN) verbs may have
+associated NAME arguments. Whichever is last on the matching path is passed
+back. See below for more details of these other verbs.
+.P
+Here is an example of \fBpcre2test\fP output, where the "mark" modifier
+requests the retrieval and outputting of (*MARK) data:
.sp
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
data> XY
@@ -3231,35 +3343,37 @@ to ensure that the match is always attempted.
.rs
.sp
The following verbs do nothing when they are encountered. Matching continues
-with what follows, but if there is no subsequent match, causing a backtrack to
-the verb, a failure is forced. That is, backtracking cannot pass to the left of
-the verb. However, when one of these verbs appears inside an atomic group or in
-an assertion that is true, its effect is confined to that group, because once
-the group has been matched, there is never any backtracking into it. In this
-situation, backtracking has to jump to the left of the entire atomic group or
-assertion.
+with what follows, but if there is a subsequent match failure, causing a
+backtrack to the verb, a failure is forced. That is, backtracking cannot pass
+to the left of the verb. However, when one of these verbs appears inside an
+atomic group or in a lookaround assertion that is true, its effect is confined
+to that group, because once the group has been matched, there is never any
+backtracking into it. Backtracking from beyond an assertion or an atomic group
+ignores the entire group, and seeks a preceeding backtracking point.
.P
These verbs differ in exactly what kind of failure occurs when backtracking
reaches them. The behaviour described below is what happens when the verb is
not in a subroutine or an assertion. Subsequent sections cover these special
cases.
.sp
- (*COMMIT)
+ (*COMMIT) or (*COMMIT:NAME)
.sp
-This verb, which may not be followed by a name, causes the whole match to fail
-outright if there is a later matching failure that causes backtracking to reach
-it. Even if the pattern is unanchored, no further attempts to find a match by
-advancing the starting point take place. If (*COMMIT) is the only backtracking
-verb that is encountered, once it has been passed \fBpcre2_match()\fP is
-committed to finding a match at the current starting point, or not at all. For
-example:
+This verb causes the whole match to fail outright if there is a later matching
+failure that causes backtracking to reach it. Even if the pattern is
+unanchored, no further attempts to find a match by advancing the starting point
+take place. If (*COMMIT) is the only backtracking verb that is encountered,
+once it has been passed \fBpcre2_match()\fP is committed to finding a match at
+the current starting point, or not at all. For example:
.sp
a+(*COMMIT)b
.sp
This matches "xxaab" but not "aacaab". It can be thought of as a kind of
-dynamic anchor, or "I've started, so I must finish." The name of the most
-recently passed (*MARK) in the path is passed back when (*COMMIT) forces a
-match failure.
+dynamic anchor, or "I've started, so I must finish."
+.P
+The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is
+like (*MARK:NAME) in that the name is remembered for passing back to the
+caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
.P
If there is more than one backtracking verb in a pattern, a different one that
follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a
@@ -3300,7 +3414,7 @@ as (*COMMIT).
The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is
like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by (*PRUNE) or (*THEN).
+ignoring those set by (*COMMIT), (*PRUNE) or (*THEN).
.sp
(*SKIP)
.sp
@@ -3308,7 +3422,7 @@ This verb, when given without a name, is like (*PRUNE), except that if the
pattern is unanchored, the "bumpalong" advance is not to the next character,
but to the position in the subject where (*SKIP) was encountered. (*SKIP)
signifies that whatever text was matched leading up to it cannot be part of a
-successful match. Consider:
+successful match if there is a later mismatch. Consider:
.sp
a+(*SKIP)b
.sp
@@ -3321,15 +3435,39 @@ instead of skipping on to "c".
.sp
(*SKIP:NAME)
.sp
-When (*SKIP) has an associated name, its behaviour is modified. When it is
-triggered, the previous path through the pattern is searched for the most
-recent (*MARK) that has the same name. If one is found, the "bumpalong" advance
-is to the subject position that corresponds to that (*MARK) instead of to where
-(*SKIP) was encountered. If no (*MARK) with a matching name is found, the
-(*SKIP) is ignored.
+When (*SKIP) has an associated name, its behaviour is modified. When such a
+(*SKIP) is triggered, the previous path through the pattern is searched for the
+most recent (*MARK) that has the same name. If one is found, the "bumpalong"
+advance is to the subject position that corresponds to that (*MARK) instead of
+to where (*SKIP) was encountered. If no (*MARK) with a matching name is found,
+the (*SKIP) is ignored.
+.P
+The search for a (*MARK) name uses the normal backtracking mechanism, which
+means that it does not see (*MARK) settings that are inside atomic groups or
+assertions, because they are never re-entered by backtracking. Compare the
+following \fBpcre2test\fP examples:
+.sp
+ re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/
+ data: abc
+ 0: a
+ 1: a
+ data:
+ re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/
+ data: abc
+ 0: b
+ 1: b
+.sp
+In the first example, the (*MARK) setting is in an atomic group, so it is not
+seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. This allows
+the second branch of the pattern to be tried at the first character position.
+In the second example, the (*MARK) setting is not in an atomic group. This
+allows (*SKIP:X) to find the (*MARK) when it backtracks, and this causes a new
+matching attempt to start at the second character. This time, the (*MARK) is
+never seen because "a" does not match "b", so the matcher immediately jumps to
+the second branch of the pattern.
.P
Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores
-names that are set by (*PRUNE:NAME) or (*THEN:NAME).
+names that are set by (*COMMIT:NAME), (*PRUNE:NAME) or (*THEN:NAME).
.sp
(*THEN) or (*THEN:NAME)
.sp
@@ -3347,10 +3485,10 @@ succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no
more alternatives, so there is a backtrack to whatever came before the entire
group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
.P
-The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
-It is like (*MARK:NAME) in that the name is remembered for passing back to the
+The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is
+like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
-ignoring those set by (*PRUNE) and (*THEN).
+ignoring those set by (*COMMIT), (*PRUNE) and (*THEN).
.P
A subpattern that does not contain a | character is just a part of the
enclosing alternative; it is not a nested alternation with only one
@@ -3423,13 +3561,14 @@ onto (*COMMIT).
.SS "Backtracking verbs in repeated groups"
.rs
.sp
-PCRE2 differs from Perl in its handling of backtracking verbs in repeated
-groups. For example, consider:
+PCRE2 sometimes differs from Perl in its handling of backtracking verbs in
+repeated groups. For example, consider:
.sp
/(a(*COMMIT)b)+ac/
.sp
-If the subject is "abac", Perl matches, but PCRE2 fails because the (*COMMIT)
-in the second repeat of the group acts.
+If the subject is "abac", Perl matches unless its optimizations are disabled,
+but PCRE2 always fails because the (*COMMIT) in the second repeat of the group
+acts.
.
.
.\" HTML <a name="btassert"></a>
@@ -3442,24 +3581,32 @@ not the assertion is standalone or acting as the condition in a conditional
subpattern.
.P
(*ACCEPT) in a standalone positive assertion causes the assertion to succeed
-without any further processing; captured strings are retained. In a standalone
-negative assertion, (*ACCEPT) causes the assertion to fail without any further
-processing; captured substrings are discarded.
+without any further processing; captured strings and a (*MARK) name (if set)
+are retained. In a standalone negative assertion, (*ACCEPT) causes the
+assertion to fail without any further processing; captured substrings and any
+(*MARK) name are discarded.
.P
If the assertion is a condition, (*ACCEPT) causes the condition to be true for
a positive assertion and false for a negative one; captured substrings are
retained in both cases.
.P
+The remaining verbs act only when a later failure causes a backtrack to
+reach them. This means that their effect is confined to the assertion,
+because lookaround assertions are atomic. A backtrack that occurs after an
+assertion is complete does not jump back into the assertion. Note in particular
+that a (*MARK) name that is set in an assertion is not "seen" by an instance of
+(*SKIP:NAME) latter in the pattern.
+.P
The effect of (*THEN) is not allowed to escape beyond an assertion. If there
are no more branches to try, (*THEN) causes a positive assertion to be false,
and a negative assertion to be true.
.P
The other backtracking verbs are not treated specially if they appear in a
standalone positive assertion. In a conditional positive assertion,
-backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes the condition to be
-false. However, for both standalone and conditional negative assertions,
-backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes the assertion to be
-true, without considering any further alternative branches.
+backtracking (from within the assertion) into (*COMMIT), (*SKIP), or (*PRUNE)
+causes the condition to be false. However, for both standalone and conditional
+negative assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes
+the assertion to be true, without considering any further alternative branches.
.
.
.\" HTML <a name="btsub"></a>
@@ -3467,21 +3614,24 @@ true, without considering any further alternative branches.
.rs
.sp
These behaviours occur whether or not the subpattern is called recursively.
-Perl's treatment of subroutines is different in some cases.
-.P
-(*FAIL) in a subpattern called as a subroutine has its normal effect: it forces
-an immediate backtrack.
.P
(*ACCEPT) in a subpattern called as a subroutine causes the subroutine match to
succeed without any further processing. Matching then continues after the
-subroutine call.
+subroutine call. Perl documents this behaviour. Perl's treatment of the other
+verbs in subroutines is different in some cases.
+.P
+(*FAIL) in a subpattern called as a subroutine has its normal effect: it forces
+an immediate backtrack.
.P
-(*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine cause
-the subroutine match to fail.
+(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when
+triggered by being backtracked to in a subpattern called as a subroutine. There
+is then a backtrack at the outer level.
.P
-(*THEN) skips to the next alternative in the innermost enclosing group within
-the subpattern that has alternatives. If there is no such group within the
-subpattern, (*THEN) causes the subroutine match to fail.
+(*THEN), when triggered, skips to the next alternative in the innermost
+enclosing group within the subpattern that has alternatives (its normal
+behaviour). However, if there is no such group within the subroutine
+subpattern, the subroutine match fails and there is a backtrack at the outer
+level.
.
.
.SH "SEE ALSO"
@@ -3505,6 +3655,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 12 September 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 04 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2perform.3 b/doc/pcre2perform.3
index 8b49a2a..91ca22a 100644
--- a/doc/pcre2perform.3
+++ b/doc/pcre2perform.3
@@ -1,4 +1,4 @@
-.TH PCRE2PERFORM 3 "08 April 2017" "PCRE2 10.30"
+.TH PCRE2PERFORM 3 "25 April 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 PERFORMANCE"
@@ -34,9 +34,9 @@ example, the very simple pattern
.sp
((ab){1,1000}c){1,3}
.sp
-uses over 50K bytes when compiled using the 8-bit library. When PCRE2 is
+uses over 50KiB when compiled using the 8-bit library. When PCRE2 is
compiled with its default internal pointer size of two bytes, the size limit on
-a compiled pattern is 64K code units in the 8-bit and 16-bit libraries, and
+a compiled pattern is 65535 code units in the 8-bit and 16-bit libraries, and
this is reached with the above pattern if the outer repetition is increased
from 3 to 4. PCRE2 can be compiled to use larger internal pointers and thus
handle larger compiled patterns, but it is better to try to rewrite your
@@ -52,14 +52,14 @@ facility. Re-writing the above pattern as
.sp
((ab)(?2){0,999}c)(?1){0,2}
.sp
-reduces the memory requirements to around 16K, and indeed it remains under 20K
-even with the outer repetition increased to 100. However, this kind of pattern
-is not always exactly equivalent, because any captures within subroutine calls
-are lost when the subroutine completes. If this is not a problem, this kind of
-rewriting will allow you to process patterns that PCRE2 cannot otherwise
-handle. The matching performance of the two different versions of the pattern
-are roughly the same. (This applies from release 10.30 - things were different
-in earlier releases.)
+reduces the memory requirements to around 16KiB, and indeed it remains under
+20KiB even with the outer repetition increased to 100. However, this kind of
+pattern is not always exactly equivalent, because any captures within
+subroutine calls are lost when the subroutine completes. If this is not a
+problem, this kind of rewriting will allow you to process patterns that PCRE2
+cannot otherwise handle. The matching performance of the two different versions
+of the pattern are roughly the same. (This applies from release 10.30 - things
+were different in earlier releases.)
.
.
.SH "STACK AND HEAP USAGE AT RUN TIME"
@@ -69,7 +69,7 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
uses very little system stack at run time. In earlier releases recursive
function calls could use a great deal of stack, and this could cause problems,
but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20K vector of
+remembered in memory frames controlled by the code. An initial 20KiB vector of
frames is allocated on the system stack (enough for about 100 frames for small
patterns), but if this is insufficient, heap memory is used. The amount of heap
memory can be limited; if the limit is set to zero, only the initial stack
@@ -78,9 +78,16 @@ may also reduce the memory requirements.
.P
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
function calls, but only for processing atomic groups, lookaround assertions,
-and recursion within the pattern. Too much nested recursion may cause stack
-issues. The "match depth" parameter can be used to limit the depth of function
-recursion in \fBpcre2_dfa_match()\fP.
+and recursion within the pattern. The original version of the code used to
+allocate quite large internal workspace vectors on the stack, which caused some
+problems for some patterns in environments with small stacks. From release
+10.32 the code for \fBpcre2_dfa_match()\fP has been re-factored to use heap
+memory when necessary for internal workspace when recursing, though recursive
+function calls are still used.
+.P
+The "match depth" parameter can be used to limit the depth of function
+recursion, and the "match heap" parameter to limit heap memory in
+\fBpcre2_dfa_match()\fP.
.
.
.SH "PROCESSING TIME"
@@ -232,6 +239,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 08 April 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 25 April 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2posix.3 b/doc/pcre2posix.3
index 399e2a8..0d8b2c2 100644
--- a/doc/pcre2posix.3
+++ b/doc/pcre2posix.3
@@ -108,14 +108,14 @@ When a pattern that is compiled with this flag is passed to \fBregexec()\fP for
matching, the \fInmatch\fP and \fIpmatch\fP arguments are ignored, and no
captured strings are returned. Versions of the PCRE library prior to 10.22 used
to set the PCRE2_NO_AUTO_CAPTURE compile option, but this no longer happens
-because it disables the use of back references.
+because it disables the use of backreferences.
.sp
REG_PEND
.sp
If this option is set, the \fBreg_endp\fP field in the \fIpreg\fP structure
(which has the type const char *) must be set to point to the character beyond
the end of the pattern before calling \fBregcomp()\fP. The pattern itself may
-now contain binary zeroes, which are treated as data characters. Without
+now contain binary zeros, which are treated as data characters. Without
REG_PEND, a binary zero terminates the pattern and the \fBre_endp\fP field is
ignored. This is a GNU extension to the POSIX standard and should be used with
caution in software intended to be portable to other systems.
@@ -224,10 +224,10 @@ function.
.sp
REG_STARTEND
.sp
-When this option is set, the subject string is starts at \fIstring\fP +
+When this option is set, the subject string starts at \fIstring\fP +
\fIpmatch[0].rm_so\fP and ends at \fIstring\fP + \fIpmatch[0].rm_eo\fP, which
should point to the first character beyond the string. There may be binary
-zeroes within the subject string, and indeed, using REG_STARTEND is the only
+zeros within the subject string, and indeed, using REG_STARTEND is the only
way to pass a subject string that contains a binary zero.
.P
Whatever the value of \fIpmatch[0].rm_so\fP, the offsets of the matched string
diff --git a/doc/pcre2serialize.3 b/doc/pcre2serialize.3
index 5a87cec..85aee9b 100644
--- a/doc/pcre2serialize.3
+++ b/doc/pcre2serialize.3
@@ -1,4 +1,4 @@
-.TH PCRE2SERIALIZE 3 "21 March 2017" "PCRE2 10.30"
+.TH PCRE2SERIALIZE 3 "27 June 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS"
@@ -28,6 +28,14 @@ the same code unit width, and must also have the same endianness, pointer width
and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using
PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be
reloaded using the 8-bit library.
+.P
+Note that "serialization" in PCRE2 does not convert compiled patterns to an
+abstract format like Java or .NET serialization. The serialized output is
+really just a bytecode dump, which is why it can only be reloaded in the same
+environment as the one that created it. Hence the restrictions mentioned above.
+Applications that are not statically linked with a fixed version of PCRE2 must
+be prepared to recompile patterns from their sources, in order to be immune to
+PCRE2 upgrades.
.
.
.SH "SECURITY CONCERNS"
@@ -46,11 +54,11 @@ the byte stream that is passed to it.
.SH "SAVING COMPILED PATTERNS"
.rs
.sp
-Before compiled patterns can be saved they must be serialized, that is,
-converted to a stream of bytes. A single byte stream may contain any number of
-compiled patterns, but they must all use the same character tables. A single
-copy of the tables is included in the byte stream (its size is 1088 bytes). For
-more details of character tables, see the
+Before compiled patterns can be saved they must be serialized, which in PCRE2
+means converting the pattern to a stream of bytes. A single byte stream may
+contain any number of compiled patterns, but they must all use the same
+character tables. A single copy of the tables is included in the byte stream
+(its size is 1088 bytes). For more details of character tables, see the
.\" HTML <a href="pcre2api.html#localesupport">
.\" </a>
section on locale support
@@ -106,7 +114,9 @@ non-binary data, be sure that the file is opened for binary output.
Serializing a set of patterns leaves the original data untouched, so they can
still be used for matching. Their memory must eventually be freed in the usual
way by calling \fBpcre2_code_free()\fP. When you have finished with the byte
-stream, it too must be freed by calling \fBpcre2_serialize_free()\fP.
+stream, it too must be freed by calling \fBpcre2_serialize_free()\fP. If this
+function is called with a NULL argument, it returns immediately without doing
+anything.
.
.
.SH "RE-USING PRECOMPILED PATTERNS"
@@ -184,6 +194,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 21 March 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 27 June 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3
index 6eb0235..c392bfb 100644
--- a/doc/pcre2syntax.3
+++ b/doc/pcre2syntax.3
@@ -1,4 +1,4 @@
-.TH PCRE2SYNTAX 3 "17 June 2017" "PCRE2 10.30"
+.TH PCRE2SYNTAX 3 "02 September 2018" "PCRE2 10.32"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -35,9 +35,10 @@ This table applies to ASCII and Unicode environments.
\eddd character with octal code ddd, or backreference
\eo{ddd..} character with octal code ddd..
\eU "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
+ \eN{U+hh..} character with Unicode code point hh.. (Unicode mode only)
\euhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
\exhh character with hex code hh
- \ex{hhh..} character with hex code hhh..
+ \ex{hh..} character with hex code hh..
.sp
Note that \e0dd is always an octal code. The treatment of backslash followed by
a non-zero digit is complicated; for details see the section
@@ -50,7 +51,9 @@ in the
\fBpcre2pattern\fP
.\"
documentation, where details of escape processing in EBCDIC environments are
-also given.
+also given. \eN{U+hh..} is synonymous with \ex{hh..} in PCRE2 but is not
+supported in EBCDIC environments. Note that \eN not followed by an opening
+curly bracket has a different meaning (see below).
.P
When \ex is not followed by {, from zero to two hexadecimal digits are read,
but if PCRE2_ALT_BSUX is set, \ex must be followed by two hexadecimal digits to
@@ -160,6 +163,7 @@ at release 5.18.
.SH "SCRIPT NAMES FOR \ep AND \eP"
.rs
.sp
+Adlam,
Ahom,
Anatolian_Hieroglyphs,
Arabic,
@@ -170,6 +174,7 @@ Bamum,
Bassa_Vah,
Batak,
Bengali,
+Bhaiksuki,
Bopomofo,
Brahmi,
Braille,
@@ -188,6 +193,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dogra,
Duployan,
Egyptian_Hieroglyphs,
Elbasan,
@@ -198,9 +204,11 @@ Gothic,
Grantha,
Greek,
Gujarati,
+Gunjala_Gondi,
Gurmukhi,
Han,
Hangul,
+Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
@@ -228,9 +236,13 @@ Lisu,
Lycian,
Lydian,
Mahajani,
+Makasar,
Malayalam,
Mandaic,
Manichaean,
+Marchen,
+Masaram_Gondi,
+Medefaidrin,
Meetei_Mayek,
Mende_Kikakui,
Meroitic_Cursive,
@@ -243,7 +255,9 @@ Multani,
Myanmar,
Nabataean,
New_Tai_Lue,
+Newa,
Nko,
+Nushu,
Ogham,
Ol_Chiki,
Old_Hungarian,
@@ -251,9 +265,11 @@ Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
+Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Oriya,
+Osage,
Osmanya,
Pahawh_Hmong,
Palmyrene,
@@ -270,7 +286,9 @@ Shavian,
Siddham,
SignWriting,
Sinhala,
+Sogdian,
Sora_Sompeng,
+Soyombo,
Sundanese,
Syloti_Nagri,
Syriac,
@@ -281,6 +299,7 @@ Tai_Tham,
Tai_Viet,
Takri,
Tamil,
+Tangut,
Telugu,
Thaana,
Thai,
@@ -290,7 +309,8 @@ Tirhuta,
Ugaritic,
Vai,
Warang_Citi,
-Yi.
+Yi,
+Zanabazar_Square.
.
.
.SH "CHARACTER CLASSES"
@@ -361,10 +381,10 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
\eG first matching position in subject
.
.
-.SH "MATCH POINT RESET"
+.SH "REPORTED MATCH POINT SETTING"
.rs
.sp
- \eK reset start of match
+ \eK set reported start of match
.sp
\eK is honoured in positive assertions, but ignored in negative ones.
.
@@ -393,8 +413,6 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
(?>...) atomic, non-capturing group
.
.
-.
-.
.SH "COMMENT"
.rs
.sp
@@ -403,6 +421,8 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
.
.SH "OPTION SETTING"
.rs
+Changes of these options within a group are automatically cancelled at the end
+of the group.
.sp
(?i) caseless
(?J) allow duplicate names
@@ -413,13 +433,20 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
(?x) extended: ignore white space except in classes
(?xx) as (?x) but also ignore space and tab in classes
(?-...) unset option(s)
+ (?^) unset imnsx options
.sp
+Unsetting x or xx unsets both. Several options may be set at once, and a
+mixture of setting and unsetting such as (?i-x) is allowed, but there may be
+only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
+(?^in). An option setting may appear at the start of a non-capturing group, for
+example (?i:...).
+.P
The following are recognized only at the very start of a pattern or after one
of the newline or \eR options with similar syntax. More than one of them may
appear. For the first three, d is a decimal number.
.sp
(*LIMIT_DEPTH=d) set the backtracking limit to d
- (*LIMIT_HEAP=d) set the heap size limit to d kilobytes
+ (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
(*LIMIT_MATCH=d) set the match limit to d
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
@@ -535,7 +562,11 @@ condition if the relevant named group exists.
.SH "BACKTRACKING CONTROL"
.rs
.sp
-The following act immediately they are reached:
+All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
+name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
+if :NAME is present. The others just set a name for passing back to the caller,
+but this is not a name that (*SKIP) can see. The following act immediately they
+are reached:
.sp
(*ACCEPT) force successful match
(*FAIL) force backtrack; synonym (*F)
@@ -548,12 +579,13 @@ pattern is not anchored.
.sp
(*COMMIT) overall failure, no advance of starting point
(*PRUNE) advance to next starting character
- (*PRUNE:NAME) equivalent to (*MARK:NAME)(*PRUNE)
(*SKIP) advance to current matching position
(*SKIP:NAME) advance to position corresponding to an earlier
(*MARK:NAME); if not found, the (*SKIP) is ignored
(*THEN) local failure, backtrack to next alternation
- (*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
+.sp
+The effect of one of these verbs in a group called as a subroutine is confined
+to the subroutine call.
.
.
.SH "CALLOUTS"
@@ -589,6 +621,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 17 June 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 02 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2test.1 b/doc/pcre2test.1
index ee78792..f590fe1 100644
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "21 Decbmber 2017" "PCRE 10.31"
+.TH PCRE2TEST 1 "21 July 2018" "PCRE 10.32"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -101,7 +101,7 @@ to occur).
UTF-8 (in its original definition) is not capable of encoding values greater
than 0x7fffffff, but such values can be handled by the 32-bit library. When
testing this library in non-UTF mode with \fButf8_input\fP set, if any
-character is preceded by the byte 0xff (which is an illegal byte in UTF-8)
+character is preceded by the byte 0xff (which is an invalid byte in UTF-8)
0x80000000 is added to the character's value. This is the only way of passing
such code points in a pattern string. For subject strings, using an escape
sequence is preferable.
@@ -220,7 +220,7 @@ Do not output the version number of \fBpcre2test\fP at the start of execution.
.TP 10
\fB-S\fP \fIsize\fP
On Unix-like systems, set the size of the run-time stack to \fIsize\fP
-megabytes.
+mebibytes (units of 1024*1024 bytes).
.TP 10
\fB-subject\fP \fImodifier-list\fP
Behave as if each subject line contains the given modifiers.
@@ -266,7 +266,8 @@ number of subject lines to be matched against that pattern. In between sets of
test data, command lines that begin with # may appear. This file format, with
some restrictions, can also be processed by the \fBperltest.sh\fP script that
is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
-and Perl is the same.
+and Perl is the same. For a specification of \fBperltest.sh\fP, see the
+comments near its beginning.
.P
When the input is a terminal, \fBpcre2test\fP prompts for each line of input,
using "re>" to prompt for regular expression patterns, and "data>" to prompt
@@ -359,10 +360,11 @@ patterns. Modifiers on a pattern can change these settings.
The appearance of this line causes all subsequent modifier settings to be
checked for compatibility with the \fBperltest.sh\fP script, which is used to
confirm that Perl gives the same results as PCRE2. Also, apart from comment
-lines, none of the other command lines are permitted, because they and many
-of the modifiers are specific to \fBpcre2test\fP, and should not be used in
-test files that are also processed by \fBperltest.sh\fP. The \fB#perltest\fP
-command helps detect tests that are accidentally put in the wrong file.
+lines, #pattern commands, and #subject commands that set or unset "mark", no
+command lines are permitted, because they and many of the modifiers are
+specific to \fBpcre2test\fP, and should not be used in test files that are also
+processed by \fBperltest.sh\fP. The \fB#perltest\fP command helps detect tests
+that are accidentally put in the wrong file.
.sp
#pop [<modifiers>]
#popcopy [<modifiers>]
@@ -639,8 +641,8 @@ The effects of these modifiers are described in the following sections.
.sp
The \fBbsr\fP modifier specifies what \eR in a pattern should match. If it is
set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode",
-\eR matches any Unicode newline sequence. The default is specified when PCRE2
-is built, with the default default being Unicode.
+\eR matches any Unicode newline sequence. The default can be specified when
+PCRE2 is built; if it is not, the default is set to Unicode.
.P
The \fBnewline\fP modifier specifies which characters are to be interpreted as
newlines, both in the pattern and in subject lines. The type must be one of CR,
@@ -1168,7 +1170,7 @@ pattern.
get=<number or name> extract captured substring
getall extract all captured substrings
/g global global matching
- heap_limit=<n> set a limit on heap memory
+ heap_limit=<n> set a limit on heap memory (Kbytes)
jitstack=<n> set size of JIT stack
mark show mark values
match_limit=<n> set a match limit
@@ -1381,11 +1383,11 @@ matching provokes an error return ("bad option value") from
.sp
The \fBjitstack\fP modifier provides a way of setting the maximum stack size
that is used by the just-in-time optimization code. It is ignored if JIT
-optimization is not being used. The value is a number of kilobytes. Setting
-zero reverts to the default of 32K. Providing a stack that is larger than the
-default is necessary only for very complicated patterns. If \fBjitstack\fP is
-set non-zero on a subject line it overrides any value that was set on the
-pattern.
+optimization is not being used. The value is a number of kibibytes (units of
+1024 bytes). Setting zero reverts to the default of 32KiB. Providing a stack
+that is larger than the default is necessary only for very complicated
+patterns. If \fBjitstack\fP is set non-zero on a subject line it overrides any
+value that was set on the pattern.
.
.
.SS "Setting heap, match, and depth limits"
@@ -1401,24 +1403,36 @@ the appropriate limits in the match context. These values are ignored when the
.sp
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
calls the relevant matching function several times, setting different values in
-the match context via \fBpcre2_set_heap_limit(), \fBpcre2_set_match_limit()\fP,
-or \fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
-parameter that allows the match to complete without error.
-.P
-If JIT is being used, only the match limit is relevant. If DFA matching is
-being used, only the depth limit is relevant.
+the match context via \fBpcre2_set_heap_limit()\fP,
+\fBpcre2_set_match_limit()\fP, or \fBpcre2_set_depth_limit()\fP until it finds
+the minimum values for each parameter that allows the match to complete without
+error. If JIT is being used, only the match limit is relevant.
.P
-The \fImatch_limit\fP number is a measure of the amount of backtracking
-that takes place, and learning the minimum value can be instructive. For most
-simple matches, the number is quite small, but for patterns with very large
-numbers of matching possibilities, it can become large very quickly with
-increasing length of subject string.
+When using this modifier, the pattern should not contain any limit settings
+such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
+lower than the minimum matching value, the minimum value cannot be found
+because \fBpcre2_set_match_limit()\fP etc. are only able to reduce the value of
+an in-pattern limit; they cannot increase it.
.P
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
much nested backtracking happens (that is, how deeply the pattern's tree is
searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of
recursive calls of the internal function that is used for handling pattern
recursion, lookaround assertions, and atomic groups.
+.P
+For non-DFA matching, the \fImatch_limit\fP number is a measure of the amount
+of backtracking that takes place, and learning the minimum value can be
+instructive. For most simple matches, the number is quite small, but for
+patterns with very large numbers of matching possibilities, it can become large
+very quickly with increasing length of subject string. In the case of DFA
+matching, \fImatch_limit\fP controls the total number of calls, both recursive
+and non-recursive, to the internal matching function, thus controlling the
+overall amount of computing resource that is used.
+.P
+For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes
+(units of 1024 bytes), limits the amount of heap memory used for matching. A
+value of zero disables the use of any heap memory; many simple pattern matches
+can be done without using the heap, so zero is not an unreasonable setting.
.
.
.SS "Showing MARK names"
@@ -1437,13 +1451,14 @@ is added to the non-match message.
.sp
The \fBmemory\fP modifier causes \fBpcre2test\fP to log the sizes of all heap
memory allocation and freeing calls that occur during a call to
-\fBpcre2_match()\fP. These occur only when a match requires a bigger vector
-than the default for remembering backtracking points. In many cases there will
-be no heap memory used and therefore no additional output. No heap memory is
-allocated during matching with \fBpcre2_dfa_match\fP or with JIT, so in those
-cases the \fBmemory\fP modifier never has any effect. For this modifier to
-work, the \fBnull_context\fP modifier must not be set on both the pattern and
-the subject, though it can be set on one or the other.
+\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. These occur only when a match
+requires a bigger vector than the default for remembering backtracking points
+(\fBpcre2_match()\fP) or for internal workspace (\fBpcre2_dfa_match()\fP). In
+many cases there will be no heap memory used and therefore no additional
+output. No heap memory is allocated during matching with JIT, so in that case
+the \fBmemory\fP modifier never has any effect. For this modifier to work, the
+\fBnull_context\fP modifier must not be set on both the pattern and the
+subject, though it can be set on one or the other.
.
.
.SS "Setting a starting offset"
@@ -1882,15 +1897,20 @@ for serializing and de-serializing. They are described in the
documentation. In this section we describe the features of \fBpcre2test\fP that
can be used to test these functions.
.P
-When a pattern with \fBpush\fP modifier is successfully compiled, it is pushed
-onto a stack of compiled patterns, and \fBpcre2test\fP expects the next line to
-contain a new pattern (or command) instead of a subject line. By contrast,
-the \fBpushcopy\fP modifier causes a copy of the compiled pattern to be
-stacked, leaving the original available for immediate matching. By using
-\fBpush\fP and/or \fBpushcopy\fP, a number of patterns can be compiled and
-retained. These modifiers are incompatible with \fBposix\fP, and control
-modifiers that act at match time are ignored (with a message) for the stacked
-patterns. The \fBjitverify\fP modifier applies only at compile time.
+Note that "serialization" in PCRE2 does not convert compiled patterns to an
+abstract format like Java or .NET. It just makes a reloadable byte code stream.
+Hence the restrictions on reloading mentioned above.
+.P
+In \fBpcre2test\fP, when a pattern with \fBpush\fP modifier is successfully
+compiled, it is pushed onto a stack of compiled patterns, and \fBpcre2test\fP
+expects the next line to contain a new pattern (or command) instead of a
+subject line. By contrast, the \fBpushcopy\fP modifier causes a copy of the
+compiled pattern to be stacked, leaving the original available for immediate
+matching. By using \fBpush\fP and/or \fBpushcopy\fP, a number of patterns can
+be compiled and retained. These modifiers are incompatible with \fBposix\fP,
+and control modifiers that act at match time are ignored (with a message) for
+the stacked patterns. The \fBjitverify\fP modifier applies only at compile
+time.
.P
The command
.sp
@@ -1962,6 +1982,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 21 December 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 21 July 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt
index 93efd24..44727a7 100644
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@@ -94,7 +94,7 @@ INPUT ENCODING
UTF-8 (in its original definition) is not capable of encoding values
greater than 0x7fffffff, but such values can be handled by the 32-bit
library. When testing this library in non-UTF mode with utf8_input set,
- if any character is preceded by the byte 0xff (which is an illegal byte
+ if any character is preceded by the byte 0xff (which is an invalid byte
in UTF-8) 0x80000000 is added to the character's value. This is the
only way of passing such code points in a pattern string. For subject
strings, using an escape sequence is preferable.
@@ -208,7 +208,7 @@ COMMAND LINE OPTIONS
execution.
-S size On Unix-like systems, set the size of the run-time stack to
- size megabytes.
+ size mebibytes (units of 1024*1024 bytes).
-subject modifier-list
Behave as if each subject line contains the given modifiers.
@@ -251,121 +251,122 @@ DESCRIPTION
tern. In between sets of test data, command lines that begin with # may
appear. This file format, with some restrictions, can also be processed
by the perltest.sh script that is distributed with PCRE2 as a means of
- checking that the behaviour of PCRE2 and Perl is the same.
+ checking that the behaviour of PCRE2 and Perl is the same. For a speci-
+ fication of perltest.sh, see the comments near its beginning.
When the input is a terminal, pcre2test prompts for each line of input,
- using "re>" to prompt for regular expression patterns, and "data>" to
- prompt for subject lines. Command lines starting with # can be entered
+ using "re>" to prompt for regular expression patterns, and "data>" to
+ prompt for subject lines. Command lines starting with # can be entered
only in response to the "re>" prompt.
- Each subject line is matched separately and independently. If you want
+ Each subject line is matched separately and independently. If you want
to do multi-line matches, you have to use the \n escape sequence (or \r
- or \r\n, etc., depending on the newline setting) in a single line of
- input to encode the newline sequences. There is no limit on the length
- of subject lines; the input buffer is automatically extended if it is
- too small. There are replication features that makes it possible to
- generate long repetitive pattern or subject lines without having to
+ or \r\n, etc., depending on the newline setting) in a single line of
+ input to encode the newline sequences. There is no limit on the length
+ of subject lines; the input buffer is automatically extended if it is
+ too small. There are replication features that makes it possible to
+ generate long repetitive pattern or subject lines without having to
supply them explicitly.
- An empty line or the end of the file signals the end of the subject
- lines for a test, at which point a new pattern or command line is
+ An empty line or the end of the file signals the end of the subject
+ lines for a test, at which point a new pattern or command line is
expected if there is still input to be read.
COMMAND LINES
- In between sets of test data, a line that begins with # is interpreted
+ In between sets of test data, a line that begins with # is interpreted
as a command line. If the first character is followed by white space or
- an exclamation mark, the line is treated as a comment, and ignored.
+ an exclamation mark, the line is treated as a comment, and ignored.
Otherwise, the following commands are recognized:
#forbid_utf
- Subsequent patterns automatically have the PCRE2_NEVER_UTF and
- PCRE2_NEVER_UCP options set, which locks out the use of the PCRE2_UTF
- and PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start of
- patterns. This command also forces an error if a subsequent pattern
- contains any occurrences of \P, \p, or \X, which are still supported
- when PCRE2_UTF is not set, but which require Unicode property support
+ Subsequent patterns automatically have the PCRE2_NEVER_UTF and
+ PCRE2_NEVER_UCP options set, which locks out the use of the PCRE2_UTF
+ and PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start of
+ patterns. This command also forces an error if a subsequent pattern
+ contains any occurrences of \P, \p, or \X, which are still supported
+ when PCRE2_UTF is not set, but which require Unicode property support
to be included in the library.
- This is a trigger guard that is used in test files to ensure that UTF
- or Unicode property tests are not accidentally added to files that are
- used when Unicode support is not included in the library. Setting
- PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as a default can also be obtained
- by the use of #pattern; the difference is that #forbid_utf cannot be
- unset, and the automatic options are not displayed in pattern informa-
+ This is a trigger guard that is used in test files to ensure that UTF
+ or Unicode property tests are not accidentally added to files that are
+ used when Unicode support is not included in the library. Setting
+ PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as a default can also be obtained
+ by the use of #pattern; the difference is that #forbid_utf cannot be
+ unset, and the automatic options are not displayed in pattern informa-
tion, to avoid cluttering up test output.
#load <filename>
This command is used to load a set of precompiled patterns from a file,
- as described in the section entitled "Saving and restoring compiled
+ as described in the section entitled "Saving and restoring compiled
patterns" below.
#newline_default [<newline-list>]
- When PCRE2 is built, a default newline convention can be specified.
- This determines which characters and/or character pairs are recognized
+ When PCRE2 is built, a default newline convention can be specified.
+ This determines which characters and/or character pairs are recognized
as indicating a newline in a pattern or subject string. The default can
- be overridden when a pattern is compiled. The standard test files con-
- tain tests of various newline conventions, but the majority of the
- tests expect a single linefeed to be recognized as a newline by
+ be overridden when a pattern is compiled. The standard test files con-
+ tain tests of various newline conventions, but the majority of the
+ tests expect a single linefeed to be recognized as a newline by
default. Without special action the tests would fail when PCRE2 is com-
piled with either CR or CRLF as the default newline.
The #newline_default command specifies a list of newline types that are
- acceptable as the default. The types must be one of CR, LF, CRLF, ANY-
+ acceptable as the default. The types must be one of CR, LF, CRLF, ANY-
CRLF, ANY, or NUL (in upper or lower case), for example:
#newline_default LF Any anyCRLF
If the default newline is in the list, this command has no effect. Oth-
- erwise, except when testing the POSIX API, a newline modifier that
- specifies the first newline convention in the list (LF in the above
- example) is added to any pattern that does not already have a newline
+ erwise, except when testing the POSIX API, a newline modifier that
+ specifies the first newline convention in the list (LF in the above
+ example) is added to any pattern that does not already have a newline
modifier. If the newline list is empty, the feature is turned off. This
command is present in a number of the standard test input files.
- When the POSIX API is being tested there is no way to override the
- default newline convention, though it is possible to set the newline
- convention from within the pattern. A warning is given if the posix or
- posix_nosub modifier is used when #newline_default would set a default
+ When the POSIX API is being tested there is no way to override the
+ default newline convention, though it is possible to set the newline
+ convention from within the pattern. A warning is given if the posix or
+ posix_nosub modifier is used when #newline_default would set a default
for the non-POSIX API.
#pattern <modifier-list>
- This command sets a default modifier list that applies to all subse-
+ This command sets a default modifier list that applies to all subse-
quent patterns. Modifiers on a pattern can change these settings.
#perltest
- The appearance of this line causes all subsequent modifier settings to
+ The appearance of this line causes all subsequent modifier settings to
be checked for compatibility with the perltest.sh script, which is used
- to confirm that Perl gives the same results as PCRE2. Also, apart from
- comment lines, none of the other command lines are permitted, because
- they and many of the modifiers are specific to pcre2test, and should
- not be used in test files that are also processed by perltest.sh. The
- #perltest command helps detect tests that are accidentally put in the
- wrong file.
+ to confirm that Perl gives the same results as PCRE2. Also, apart from
+ comment lines, #pattern commands, and #subject commands that set or
+ unset "mark", no command lines are permitted, because they and many of
+ the modifiers are specific to pcre2test, and should not be used in test
+ files that are also processed by perltest.sh. The #perltest command
+ helps detect tests that are accidentally put in the wrong file.
#pop [<modifiers>]
#popcopy [<modifiers>]
- These commands are used to manipulate the stack of compiled patterns,
- as described in the section entitled "Saving and restoring compiled
+ These commands are used to manipulate the stack of compiled patterns,
+ as described in the section entitled "Saving and restoring compiled
patterns" below.
#save <filename>
- This command is used to save a set of compiled patterns to a file, as
- described in the section entitled "Saving and restoring compiled pat-
+ This command is used to save a set of compiled patterns to a file, as
+ described in the section entitled "Saving and restoring compiled pat-
terns" below.
#subject <modifier-list>
- This command sets a default modifier list that applies to all subse-
- quent subject lines. Modifiers on a subject line can change these set-
+ This command sets a default modifier list that applies to all subse-
+ quent subject lines. Modifiers on a subject line can change these set-
tings.
@@ -373,58 +374,58 @@ MODIFIER SYNTAX
Modifier lists are used with both pattern and subject lines. Items in a
list are separated by commas followed by optional white space. Trailing
- whitespace in a modifier list is ignored. Some modifiers may be given
- for both patterns and subject lines, whereas others are valid only for
+ whitespace in a modifier list is ignored. Some modifiers may be given
+ for both patterns and subject lines, whereas others are valid only for
one or the other. Each modifier has a long name, for example
- "anchored", and some of them must be followed by an equals sign and a
- value, for example, "offset=12". Values cannot contain comma charac-
- ters, but may contain spaces. Modifiers that do not take values may be
+ "anchored", and some of them must be followed by an equals sign and a
+ value, for example, "offset=12". Values cannot contain comma charac-
+ ters, but may contain spaces. Modifiers that do not take values may be
preceded by a minus sign to turn off a previous setting.
A few of the more common modifiers can also be specified as single let-
- ters, for example "i" for "caseless". In documentation, following the
+ ters, for example "i" for "caseless". In documentation, following the
Perl convention, these are written with a slash ("the /i modifier") for
- clarity. Abbreviated modifiers must all be concatenated in the first
- item of a modifier list. If the first item is not recognized as a long
- modifier name, it is interpreted as a sequence of these abbreviations.
+ clarity. Abbreviated modifiers must all be concatenated in the first
+ item of a modifier list. If the first item is not recognized as a long
+ modifier name, it is interpreted as a sequence of these abbreviations.
For example:
/abc/ig,newline=cr,jit=3
- This is a pattern line whose modifier list starts with two one-letter
- modifiers (/i and /g). The lower-case abbreviated modifiers are the
+ This is a pattern line whose modifier list starts with two one-letter
+ modifiers (/i and /g). The lower-case abbreviated modifiers are the
same as used in Perl.
PATTERN SYNTAX
- A pattern line must start with one of the following characters (common
+ A pattern line must start with one of the following characters (common
symbols, excluding pattern meta-characters):
/ ! " ' ` - = _ : ; , % & @ ~
- This is interpreted as the pattern's delimiter. A regular expression
- may be continued over several input lines, in which case the newline
+ This is interpreted as the pattern's delimiter. A regular expression
+ may be continued over several input lines, in which case the newline
characters are included within it. It is possible to include the delim-
iter within the pattern by escaping it with a backslash, for example
/abc\/def/
- If you do this, the escape and the delimiter form part of the pattern,
+ If you do this, the escape and the delimiter form part of the pattern,
but since the delimiters are all non-alphanumeric, this does not affect
- its interpretation. If the terminating delimiter is immediately fol-
+ its interpretation. If the terminating delimiter is immediately fol-
lowed by a backslash, for example,
/abc/\
- then a backslash is added to the end of the pattern. This is done to
- provide a way of testing the error condition that arises if a pattern
+ then a backslash is added to the end of the pattern. This is done to
+ provide a way of testing the error condition that arises if a pattern
finishes with a backslash, because
/abc\/
- is interpreted as the first line of a pattern that starts with "abc/",
- causing pcre2test to read the next line as a continuation of the regu-
+ is interpreted as the first line of a pattern that starts with "abc/",
+ causing pcre2test to read the next line as a continuation of the regu-
lar expression.
A pattern can be followed by a modifier list (details below).
@@ -432,7 +433,7 @@ PATTERN SYNTAX
SUBJECT LINE SYNTAX
- Before each subject line is passed to pcre2_match() or
+ Before each subject line is passed to pcre2_match() or
pcre2_dfa_match(), leading and trailing white space is removed, and the
line is scanned for backslash escapes, unless the subject_literal modi-
fier was set for the pattern. The following provide a means of encoding
@@ -453,23 +454,23 @@ SUBJECT LINE SYNTAX
\x{hh...} hexadecimal character (any number of hex digits)
The use of \x{hh...} is not dependent on the use of the utf modifier on
- the pattern. It is recognized always. There may be any number of hexa-
- decimal digits inside the braces; invalid values provoke error mes-
+ the pattern. It is recognized always. There may be any number of hexa-
+ decimal digits inside the braces; invalid values provoke error mes-
sages.
- Note that \xhh specifies one byte rather than one character in UTF-8
- mode; this makes it possible to construct invalid UTF-8 sequences for
- testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8
- character in UTF-8 mode, generating more than one byte if the value is
- greater than 127. When testing the 8-bit library not in UTF-8 mode,
+ Note that \xhh specifies one byte rather than one character in UTF-8
+ mode; this makes it possible to construct invalid UTF-8 sequences for
+ testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8
+ character in UTF-8 mode, generating more than one byte if the value is
+ greater than 127. When testing the 8-bit library not in UTF-8 mode,
\x{hh} generates one byte for values less than 256, and causes an error
for greater values.
In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
possible to construct invalid UTF-16 sequences for testing purposes.
- In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This
- makes it possible to construct invalid UTF-32 sequences for testing
+ In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This
+ makes it possible to construct invalid UTF-32 sequences for testing
purposes.
There is a special backslash sequence that specifies replication of one
@@ -477,31 +478,31 @@ SUBJECT LINE SYNTAX
\[<characters>]{<count>}
- This makes it possible to test long strings without having to provide
+ This makes it possible to test long strings without having to provide
them as part of the file. For example:
\[abc]{4}
- is converted to "abcabcabcabc". This feature does not support nesting.
+ is converted to "abcabcabcabc". This feature does not support nesting.
To include a closing square bracket in the characters, code it as \x5D.
- A backslash followed by an equals sign marks the end of the subject
+ A backslash followed by an equals sign marks the end of the subject
string and the start of a modifier list. For example:
abc\=notbol,notempty
- If the subject string is empty and \= is followed by whitespace, the
- line is treated as a comment line, and is not used for matching. For
+ If the subject string is empty and \= is followed by whitespace, the
+ line is treated as a comment line, and is not used for matching. For
example:
\= This is a comment.
abc\= This is an invalid modifier list.
- A backslash followed by any other non-alphanumeric character just
+ A backslash followed by any other non-alphanumeric character just
escapes that character. A backslash followed by anything else causes an
- error. However, if the very last character in the line is a backslash
- (and there is no modifier list), it is ignored. This gives a way of
- passing an empty line as data, since a real empty line terminates the
+ error. However, if the very last character in the line is a backslash
+ (and there is no modifier list), it is ignored. This gives a way of
+ passing an empty line as data, since a real empty line terminates the
data input.
If the subject_literal modifier is set for a pattern, all subject lines
@@ -512,22 +513,22 @@ SUBJECT LINE SYNTAX
PATTERN MODIFIERS
- There are several types of modifier that can appear in pattern lines.
+ There are several types of modifier that can appear in pattern lines.
Except where noted below, they may also be used in #pattern commands. A
- pattern's modifier list can add to or override default modifiers that
+ pattern's modifier list can add to or override default modifiers that
were set by a previous #pattern command.
Setting compilation options
- The following modifiers set options for pcre2_compile(). Most of them
- set bits in the options argument of that function, but those whose
+ The following modifiers set options for pcre2_compile(). Most of them
+ set bits in the options argument of that function, but those whose
names start with PCRE2_EXTRA are additional options that are set in the
- compile context. For the main options, there are some single-letter
- abbreviations that are the same as Perl options. There is special han-
- dling for /x: if a second x is present, PCRE2_EXTENDED is converted
- into PCRE2_EXTENDED_MORE as in Perl. A third appearance adds
- PCRE2_EXTENDED as well, though this makes no difference to the way
- pcre2_compile() behaves. See pcre2api for a description of the effects
+ compile context. For the main options, there are some single-letter
+ abbreviations that are the same as Perl options. There is special han-
+ dling for /x: if a second x is present, PCRE2_EXTENDED is converted
+ into PCRE2_EXTENDED_MORE as in Perl. A third appearance adds
+ PCRE2_EXTENDED as well, though this makes no difference to the way
+ pcre2_compile() behaves. See pcre2api for a description of the effects
of these options.
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
@@ -565,16 +566,16 @@ PATTERN MODIFIERS
utf set PCRE2_UTF
As well as turning on the PCRE2_UTF option, the utf modifier causes all
- non-printing characters in output strings to be printed using the
- \x{hh...} notation. Otherwise, those less than 0x100 are output in hex
- without the curly brackets. Setting utf in 16-bit or 32-bit mode also
- causes pattern and subject strings to be translated to UTF-16 or
+ non-printing characters in output strings to be printed using the
+ \x{hh...} notation. Otherwise, those less than 0x100 are output in hex
+ without the curly brackets. Setting utf in 16-bit or 32-bit mode also
+ causes pattern and subject strings to be translated to UTF-16 or
UTF-32, respectively, before being passed to library functions.
Setting compilation controls
- The following modifiers affect the compilation process or request
- information about the pattern. There are single-letter abbreviations
+ The following modifiers affect the compilation process or request
+ information about the pattern. There are single-letter abbreviations
for some that are heavily used in the test files.
bsr=[anycrlf|unicode] specify \R handling
@@ -612,10 +613,11 @@ PATTERN MODIFIERS
Newline and \R handling
- The bsr modifier specifies what \R in a pattern should match. If it is
- set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to
- "unicode", \R matches any Unicode newline sequence. The default is
- specified when PCRE2 is built, with the default default being Unicode.
+ The bsr modifier specifies what \R in a pattern should match. If it is
+ set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to
+ "unicode", \R matches any Unicode newline sequence. The default can be
+ specified when PCRE2 is built; if it is not, the default is set to Uni-
+ code.
The newline modifier specifies which characters are to be interpreted
as newlines, both in the pattern and in subject lines. The type must be
@@ -1071,7 +1073,7 @@ SUBJECT MODIFIERS
get=<number or name> extract captured substring
getall extract all captured substrings
/g global global matching
- heap_limit=<n> set a limit on heap memory
+ heap_limit=<n> set a limit on heap memory (Kbytes)
jitstack=<n> set size of JIT stack
mark show mark values
match_limit=<n> set a match limit
@@ -1272,11 +1274,11 @@ SUBJECT MODIFIERS
The jitstack modifier provides a way of setting the maximum stack size
that is used by the just-in-time optimization code. It is ignored if
- JIT optimization is not being used. The value is a number of kilobytes.
- Setting zero reverts to the default of 32K. Providing a stack that is
- larger than the default is necessary only for very complicated pat-
- terns. If jitstack is set non-zero on a subject line it overrides any
- value that was set on the pattern.
+ JIT optimization is not being used. The value is a number of kibibytes
+ (units of 1024 bytes). Setting zero reverts to the default of 32KiB.
+ Providing a stack that is larger than the default is necessary only for
+ very complicated patterns. If jitstack is set non-zero on a subject
+ line it overrides any value that was set on the pattern.
Setting heap, match, and depth limits
@@ -1291,126 +1293,139 @@ SUBJECT MODIFIERS
values in the match context via pcre2_set_heap_limit(),
pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
minimum values for each parameter that allows the match to complete
- without error.
-
- If JIT is being used, only the match limit is relevant. If DFA matching
- is being used, only the depth limit is relevant.
+ without error. If JIT is being used, only the match limit is relevant.
- The match_limit number is a measure of the amount of backtracking that
- takes place, and learning the minimum value can be instructive. For
- most simple matches, the number is quite small, but for patterns with
- very large numbers of matching possibilities, it can become large very
- quickly with increasing length of subject string.
+ When using this modifier, the pattern should not contain any limit set-
+ tings such as (*LIMIT_MATCH=...) within it. If such a setting is
+ present and is lower than the minimum matching value, the minimum value
+ cannot be found because pcre2_set_match_limit() etc. are only able to
+ reduce the value of an in-pattern limit; they cannot increase it.
- For non-DFA matching, the minimum depth_limit number is a measure of
+ For non-DFA matching, the minimum depth_limit number is a measure of
how much nested backtracking happens (that is, how deeply the pattern's
- tree is searched). In the case of DFA matching, depth_limit controls
- the depth of recursive calls of the internal function that is used for
+ tree is searched). In the case of DFA matching, depth_limit controls
+ the depth of recursive calls of the internal function that is used for
handling pattern recursion, lookaround assertions, and atomic groups.
+ For non-DFA matching, the match_limit number is a measure of the amount
+ of backtracking that takes place, and learning the minimum value can be
+ instructive. For most simple matches, the number is quite small, but
+ for patterns with very large numbers of matching possibilities, it can
+ become large very quickly with increasing length of subject string. In
+ the case of DFA matching, match_limit controls the total number of
+ calls, both recursive and non-recursive, to the internal matching func-
+ tion, thus controlling the overall amount of computing resource that is
+ used.
+
+ For both kinds of matching, the heap_limit number, which is in
+ kibibytes (units of 1024 bytes), limits the amount of heap memory used
+ for matching. A value of zero disables the use of any heap memory; many
+ simple pattern matches can be done without using the heap, so zero is
+ not an unreasonable setting.
+
Showing MARK names
The mark modifier causes the names from backtracking control verbs that
- are returned from calls to pcre2_match() to be displayed. If a mark is
- returned for a match, non-match, or partial match, pcre2test shows it.
- For a match, it is on a line by itself, tagged with "MK:". Otherwise,
+ are returned from calls to pcre2_match() to be displayed. If a mark is
+ returned for a match, non-match, or partial match, pcre2test shows it.
+ For a match, it is on a line by itself, tagged with "MK:". Otherwise,
it is added to the non-match message.
Showing memory usage
- The memory modifier causes pcre2test to log the sizes of all heap mem-
- ory allocation and freeing calls that occur during a call to
- pcre2_match(). These occur only when a match requires a bigger vector
- than the default for remembering backtracking points. In many cases
- there will be no heap memory used and therefore no additional output.
- No heap memory is allocated during matching with pcre2_dfa_match or
- with JIT, so in those cases the memory modifier never has any effect.
- For this modifier to work, the null_context modifier must not be set on
- both the pattern and the subject, though it can be set on one or the
- other.
+ The memory modifier causes pcre2test to log the sizes of all heap mem-
+ ory allocation and freeing calls that occur during a call to
+ pcre2_match() or pcre2_dfa_match(). These occur only when a match
+ requires a bigger vector than the default for remembering backtracking
+ points (pcre2_match()) or for internal workspace (pcre2_dfa_match()).
+ In many cases there will be no heap memory used and therefore no addi-
+ tional output. No heap memory is allocated during matching with JIT, so
+ in that case the memory modifier never has any effect. For this modi-
+ fier to work, the null_context modifier must not be set on both the
+ pattern and the subject, though it can be set on one or the other.
Setting a starting offset
- The offset modifier sets an offset in the subject string at which
+ The offset modifier sets an offset in the subject string at which
matching starts. Its value is a number of code units, not characters.
Setting an offset limit
- The offset_limit modifier sets a limit for unanchored matches. If a
+ The offset_limit modifier sets a limit for unanchored matches. If a
match cannot be found starting at or before this offset in the subject,
a "no match" return is given. The data value is a number of code units,
- not characters. When this modifier is used, the use_offset_limit modi-
+ not characters. When this modifier is used, the use_offset_limit modi-
fier must have been set for the pattern; if not, an error is generated.
Setting the size of the output vector
- The ovector modifier applies only to the subject line in which it
- appears, though of course it can also be used to set a default in a
- #subject command. It specifies the number of pairs of offsets that are
+ The ovector modifier applies only to the subject line in which it
+ appears, though of course it can also be used to set a default in a
+ #subject command. It specifies the number of pairs of offsets that are
available for storing matching information. The default is 15.
- A value of zero is useful when testing the POSIX API because it causes
+ A value of zero is useful when testing the POSIX API because it causes
regexec() to be called with a NULL capture vector. When not testing the
- POSIX API, a value of zero is used to cause pcre2_match_data_cre-
- ate_from_pattern() to be called, in order to create a match block of
+ POSIX API, a value of zero is used to cause pcre2_match_data_cre-
+ ate_from_pattern() to be called, in order to create a match block of
exactly the right size for the pattern. (It is not possible to create a
- match block with a zero-length ovector; there is always at least one
+ match block with a zero-length ovector; there is always at least one
pair of offsets.)
Passing the subject as zero-terminated
By default, the subject string is passed to a native API matching func-
tion with its correct length. In order to test the facility for passing
- a zero-terminated string, the zero_terminate modifier is provided. It
- causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching
+ a zero-terminated string, the zero_terminate modifier is provided. It
+ causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching
via the POSIX interface, this modifier is ignored, with a warning.
- When testing pcre2_substitute(), this modifier also has the effect of
+ When testing pcre2_substitute(), this modifier also has the effect of
passing the replacement string as zero-terminated.
Passing a NULL context
- Normally, pcre2test passes a context block to pcre2_match(),
+ Normally, pcre2test passes a context block to pcre2_match(),
pcre2_dfa_match() or pcre2_jit_match(). If the null_context modifier is
- set, however, NULL is passed. This is for testing that the matching
+ set, however, NULL is passed. This is for testing that the matching
functions behave correctly in this case (they use default values). This
- modifier cannot be used with the find_limits modifier or when testing
+ modifier cannot be used with the find_limits modifier or when testing
the substitution function.
THE ALTERNATIVE MATCHING FUNCTION
- By default, pcre2test uses the standard PCRE2 matching function,
+ By default, pcre2test uses the standard PCRE2 matching function,
pcre2_match() to match each subject line. PCRE2 also supports an alter-
- native matching function, pcre2_dfa_match(), which operates in a dif-
- ferent way, and has some restrictions. The differences between the two
+ native matching function, pcre2_dfa_match(), which operates in a dif-
+ ferent way, and has some restrictions. The differences between the two
functions are described in the pcre2matching documentation.
- If the dfa modifier is set, the alternative matching function is used.
- This function finds all possible matches at a given point in the sub-
- ject. If, however, the dfa_shortest modifier is set, processing stops
- after the first match is found. This is always the shortest possible
+ If the dfa modifier is set, the alternative matching function is used.
+ This function finds all possible matches at a given point in the sub-
+ ject. If, however, the dfa_shortest modifier is set, processing stops
+ after the first match is found. This is always the shortest possible
match.
DEFAULT OUTPUT FROM pcre2test
- This section describes the output when the normal matching function,
+ This section describes the output when the normal matching function,
pcre2_match(), is being used.
- When a match succeeds, pcre2test outputs the list of captured sub-
- strings, starting with number 0 for the string that matched the whole
- pattern. Otherwise, it outputs "No match" when the return is
- PCRE2_ERROR_NOMATCH, or "Partial match:" followed by the partially
- matching substring when the return is PCRE2_ERROR_PARTIAL. (Note that
- this is the entire substring that was inspected during the partial
- match; it may include characters before the actual match start if a
+ When a match succeeds, pcre2test outputs the list of captured sub-
+ strings, starting with number 0 for the string that matched the whole
+ pattern. Otherwise, it outputs "No match" when the return is
+ PCRE2_ERROR_NOMATCH, or "Partial match:" followed by the partially
+ matching substring when the return is PCRE2_ERROR_PARTIAL. (Note that
+ this is the entire substring that was inspected during the partial
+ match; it may include characters before the actual match start if a
lookbehind assertion, \K, \b, or \B was involved.)
For any other return, pcre2test outputs the PCRE2 negative error number
- and a short descriptive phrase. If the error is a failed UTF string
- check, the code unit offset of the start of the failing character is
+ and a short descriptive phrase. If the error is a failed UTF string
+ check, the code unit offset of the start of the failing character is
also output. Here is an example of an interactive pcre2test run.
$ pcre2test
@@ -1426,8 +1441,8 @@ DEFAULT OUTPUT FROM pcre2test
Unset capturing substrings that are not followed by one that is set are
not shown by pcre2test unless the allcaptures modifier is specified. In
the following example, there are two capturing substrings, but when the
- first data line is matched, the second, unset substring is not shown.
- An "internal" unset substring is shown as "<unset>", as for the second
+ first data line is matched, the second, unset substring is not shown.
+ An "internal" unset substring is shown as "<unset>", as for the second
data line.
re> /(a)|(b)/
@@ -1439,11 +1454,11 @@ DEFAULT OUTPUT FROM pcre2test
1: <unset>
2: b
- If the strings contain any non-printing characters, they are output as
- \xhh escapes if the value is less than 256 and UTF mode is not set.
+ If the strings contain any non-printing characters, they are output as
+ \xhh escapes if the value is less than 256 and UTF mode is not set.
Otherwise they are output as \x{hh...} escapes. See below for the defi-
- nition of non-printing characters. If the aftertext modifier is set,
- the output for substring 0 is followed by the the rest of the subject
+ nition of non-printing characters. If the aftertext modifier is set,
+ the output for substring 0 is followed by the the rest of the subject
string, identified by "0+" like this:
re> /cat/aftertext
@@ -1451,7 +1466,7 @@ DEFAULT OUTPUT FROM pcre2test
0: cat
0+ aract
- If global matching is requested, the results of successive matching
+ If global matching is requested, the results of successive matching
attempts are output in sequence, like this:
re> /\Bi(\w\w)/g
@@ -1463,8 +1478,8 @@ DEFAULT OUTPUT FROM pcre2test
0: ipp
1: pp
- "No match" is output only if the first match attempt fails. Here is an
- example of a failure message (the offset 4 that is specified by the
+ "No match" is output only if the first match attempt fails. Here is an
+ example of a failure message (the offset 4 that is specified by the
offset modifier is past the end of the subject string):
re> /xyz/
@@ -1472,7 +1487,7 @@ DEFAULT OUTPUT FROM pcre2test
Error -24 (bad offset value)
Note that whereas patterns can be continued over several lines (a plain
- ">" prompt is used for continuations), subject lines may not. However
+ ">" prompt is used for continuations), subject lines may not. However
newlines can be included in a subject by means of the \n escape (or \r,
\r\n, etc., depending on the newline sequence setting).
@@ -1480,7 +1495,7 @@ DEFAULT OUTPUT FROM pcre2test
OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
When the alternative matching function, pcre2_dfa_match(), is used, the
- output consists of a list of all the matches that start at the first
+ output consists of a list of all the matches that start at the first
point in the subject where there is at least one match. For example:
re> /(tang|tangerine|tan)/
@@ -1489,11 +1504,11 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
1: tang
2: tan
- Using the normal matching function on this data finds only "tang". The
- longest matching string is always given first (and numbered zero).
- After a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",
- followed by the partially matching substring. Note that this is the
- entire substring that was inspected during the partial match; it may
+ Using the normal matching function on this data finds only "tang". The
+ longest matching string is always given first (and numbered zero).
+ After a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",
+ followed by the partially matching substring. Note that this is the
+ entire substring that was inspected during the partial match; it may
include characters before the actual match start if a lookbehind asser-
tion, \b, or \B was involved. (\K is not supported for DFA matching.)
@@ -1509,16 +1524,16 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
1: tan
0: tan
- The alternative matching function does not support substring capture,
- so the modifiers that are concerned with captured substrings are not
+ The alternative matching function does not support substring capture,
+ so the modifiers that are concerned with captured substrings are not
relevant.
RESTARTING AFTER A PARTIAL MATCH
- When the alternative matching function has given the PCRE2_ERROR_PAR-
+ When the alternative matching function has given the PCRE2_ERROR_PAR-
TIAL return, indicating that the subject partially matched the pattern,
- you can restart the match with additional subject data by means of the
+ you can restart the match with additional subject data by means of the
dfa_restart modifier. For example:
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
@@ -1527,37 +1542,37 @@ RESTARTING AFTER A PARTIAL MATCH
data> n05\=dfa,dfa_restart
0: n05
- For further information about partial matching, see the pcre2partial
+ For further information about partial matching, see the pcre2partial
documentation.
CALLOUTS
If the pattern contains any callout requests, pcre2test's callout func-
- tion is called during matching unless callout_none is specified. This
+ tion is called during matching unless callout_none is specified. This
works with both matching functions, and with JIT, though there are some
- differences in behaviour. The output for callouts with numerical argu-
+ differences in behaviour. The output for callouts with numerical argu-
ments and those with string arguments is slightly different.
Callouts with numerical arguments
By default, the callout function displays the callout number, the start
- and current positions in the subject text at the callout time, and the
+ and current positions in the subject text at the callout time, and the
next pattern item to be tested. For example:
--->pqrabcdef
0 ^ ^ \d
- This output indicates that callout number 0 occurred for a match
- attempt starting at the fourth character of the subject string, when
- the pointer was at the seventh character, and when the next pattern
- item was \d. Just one circumflex is output if the start and current
- positions are the same, or if the current position precedes the start
+ This output indicates that callout number 0 occurred for a match
+ attempt starting at the fourth character of the subject string, when
+ the pointer was at the seventh character, and when the next pattern
+ item was \d. Just one circumflex is output if the start and current
+ positions are the same, or if the current position precedes the start
position, which can happen if the callout is in a lookbehind assertion.
Callouts numbered 255 are assumed to be automatic callouts, inserted as
a result of the auto_callout pattern modifier. In this case, instead of
- showing the callout number, the offset in the pattern, preceded by a
+ showing the callout number, the offset in the pattern, preceded by a
plus, is output. For example:
re> /\d?[A-E]\*/auto_callout
@@ -1570,7 +1585,7 @@ CALLOUTS
0: E*
If a pattern contains (*MARK) items, an additional line is output when-
- ever a change of latest mark is passed to the callout function. For
+ ever a change of latest mark is passed to the callout function. For
example:
re> /a(*MARK:X)bc/auto_callout
@@ -1584,17 +1599,17 @@ CALLOUTS
+12 ^ ^
0: abc
- The mark changes between matching "a" and "b", but stays the same for
- the rest of the match, so nothing more is output. If, as a result of
- backtracking, the mark reverts to being unset, the text "<unset>" is
+ The mark changes between matching "a" and "b", but stays the same for
+ the rest of the match, so nothing more is output. If, as a result of
+ backtracking, the mark reverts to being unset, the text "<unset>" is
output.
Callouts with string arguments
The output for a callout with a string argument is similar, except that
- instead of outputting a callout number before the position indicators,
- the callout string and its offset in the pattern string are output
- before the reflection of the subject string, and the subject string is
+ instead of outputting a callout number before the position indicators,
+ the callout string and its offset in the pattern string are output
+ before the reflection of the subject string, and the subject string is
reflected for each callout. For example:
re> /^ab(?C'first')cd(?C"second")ef/
@@ -1610,26 +1625,26 @@ CALLOUTS
Callout modifiers
- The callout function in pcre2test returns zero (carry on matching) by
- default, but you can use a callout_fail modifier in a subject line to
+ The callout function in pcre2test returns zero (carry on matching) by
+ default, but you can use a callout_fail modifier in a subject line to
change this and other parameters of the callout (see below).
If the callout_capture modifier is set, the current captured groups are
output when a callout occurs. This is useful only for non-DFA matching,
- as pcre2_dfa_match() does not support capturing, so no captures are
+ as pcre2_dfa_match() does not support capturing, so no captures are
ever shown.
The normal callout output, showing the callout number or pattern offset
- (as described above) is suppressed if the callout_no_where modifier is
+ (as described above) is suppressed if the callout_no_where modifier is
set.
- When using the interpretive matching function pcre2_match() without
- JIT, setting the callout_extra modifier causes additional output from
- pcre2test's callout function to be generated. For the first callout in
- a match attempt at a new starting position in the subject, "New match
- attempt" is output. If there has been a backtrack since the last call-
+ When using the interpretive matching function pcre2_match() without
+ JIT, setting the callout_extra modifier causes additional output from
+ pcre2test's callout function to be generated. For the first callout in
+ a match attempt at a new starting position in the subject, "New match
+ attempt" is output. If there has been a backtrack since the last call-
out (or start of matching if this is the first callout), "Backtrack" is
- output, followed by "No other matching paths" if the backtrack ended
+ output, followed by "No other matching paths" if the backtrack ended
the previous match attempt. For example:
re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess
@@ -1666,104 +1681,108 @@ CALLOUTS
+1 ^ a+
No match
- Notice that various optimizations must be turned off if you want all
- possible matching paths to be scanned. If no_start_optimize is not
- used, there is an immediate "no match", without any callouts, because
- the starting optimization fails to find "b" in the subject, which it
- knows must be present for any match. If no_auto_possess is not used,
- the "a+" item is turned into "a++", which reduces the number of back-
+ Notice that various optimizations must be turned off if you want all
+ possible matching paths to be scanned. If no_start_optimize is not
+ used, there is an immediate "no match", without any callouts, because
+ the starting optimization fails to find "b" in the subject, which it
+ knows must be present for any match. If no_auto_possess is not used,
+ the "a+" item is turned into "a++", which reduces the number of back-
tracks.
- The callout_extra modifier has no effect if used with the DFA matching
+ The callout_extra modifier has no effect if used with the DFA matching
function, or with JIT.
Return values from callouts
- The default return from the callout function is zero, which allows
+ The default return from the callout function is zero, which allows
matching to continue. The callout_fail modifier can be given one or two
numbers. If there is only one number, 1 is returned instead of 0 (caus-
ing matching to backtrack) when a callout of that number is reached. If
- two numbers (<n>:<m>) are given, 1 is returned when callout <n> is
- reached and there have been at least <m> callouts. The callout_error
+ two numbers (<n>:<m>) are given, 1 is returned when callout <n> is
+ reached and there have been at least <m> callouts. The callout_error
modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus-
- ing the entire matching process to be aborted. If both these modifiers
- are set for the same callout number, callout_error takes precedence.
- Note that callouts with string arguments are always given the number
+ ing the entire matching process to be aborted. If both these modifiers
+ are set for the same callout number, callout_error takes precedence.
+ Note that callouts with string arguments are always given the number
zero.
- The callout_data modifier can be given an unsigned or a negative num-
- ber. This is set as the "user data" that is passed to the matching
- function, and passed back when the callout function is invoked. Any
- value other than zero is used as a return from pcre2test's callout
+ The callout_data modifier can be given an unsigned or a negative num-
+ ber. This is set as the "user data" that is passed to the matching
+ function, and passed back when the callout function is invoked. Any
+ value other than zero is used as a return from pcre2test's callout
function.
Inserting callouts can be helpful when using pcre2test to check compli-
- cated regular expressions. For further information about callouts, see
+ cated regular expressions. For further information about callouts, see
the pcre2callout documentation.
NON-PRINTING CHARACTERS
When pcre2test is outputting text in the compiled version of a pattern,
- bytes other than 32-126 are always treated as non-printing characters
+ bytes other than 32-126 are always treated as non-printing characters
and are therefore shown as hex escapes.
- When pcre2test is outputting text that is a matched part of a subject
- string, it behaves in the same way, unless a different locale has been
- set for the pattern (using the locale modifier). In this case, the
- isprint() function is used to distinguish printing and non-printing
+ When pcre2test is outputting text that is a matched part of a subject
+ string, it behaves in the same way, unless a different locale has been
+ set for the pattern (using the locale modifier). In this case, the
+ isprint() function is used to distinguish printing and non-printing
characters.
SAVING AND RESTORING COMPILED PATTERNS
- It is possible to save compiled patterns on disc or elsewhere, and
+ It is possible to save compiled patterns on disc or elsewhere, and
reload them later, subject to a number of restrictions. JIT data cannot
- be saved. The host on which the patterns are reloaded must be running
+ be saved. The host on which the patterns are reloaded must be running
the same version of PCRE2, with the same code unit width, and must also
- have the same endianness, pointer width and PCRE2_SIZE type. Before
- compiled patterns can be saved they must be serialized, that is, con-
- verted to a stream of bytes. A single byte stream may contain any num-
- ber of compiled patterns, but they must all use the same character
+ have the same endianness, pointer width and PCRE2_SIZE type. Before
+ compiled patterns can be saved they must be serialized, that is, con-
+ verted to a stream of bytes. A single byte stream may contain any num-
+ ber of compiled patterns, but they must all use the same character
tables. A single copy of the tables is included in the byte stream (its
size is 1088 bytes).
- The functions whose names begin with pcre2_serialize_ are used for
- serializing and de-serializing. They are described in the pcre2serial-
+ The functions whose names begin with pcre2_serialize_ are used for
+ serializing and de-serializing. They are described in the pcre2serial-
ize documentation. In this section we describe the features of
pcre2test that can be used to test these functions.
- When a pattern with push modifier is successfully compiled, it is
- pushed onto a stack of compiled patterns, and pcre2test expects the
- next line to contain a new pattern (or command) instead of a subject
- line. By contrast, the pushcopy modifier causes a copy of the compiled
- pattern to be stacked, leaving the original available for immediate
- matching. By using push and/or pushcopy, a number of patterns can be
- compiled and retained. These modifiers are incompatible with posix, and
- control modifiers that act at match time are ignored (with a message)
- for the stacked patterns. The jitverify modifier applies only at com-
- pile time.
+ Note that "serialization" in PCRE2 does not convert compiled patterns
+ to an abstract format like Java or .NET. It just makes a reloadable
+ byte code stream. Hence the restrictions on reloading mentioned above.
+
+ In pcre2test, when a pattern with push modifier is successfully com-
+ piled, it is pushed onto a stack of compiled patterns, and pcre2test
+ expects the next line to contain a new pattern (or command) instead of
+ a subject line. By contrast, the pushcopy modifier causes a copy of the
+ compiled pattern to be stacked, leaving the original available for
+ immediate matching. By using push and/or pushcopy, a number of patterns
+ can be compiled and retained. These modifiers are incompatible with
+ posix, and control modifiers that act at match time are ignored (with a
+ message) for the stacked patterns. The jitverify modifier applies only
+ at compile time.
The command
#save <filename>
causes all the stacked patterns to be serialized and the result written
- to the named file. Afterwards, all the stacked patterns are freed. The
+ to the named file. Afterwards, all the stacked patterns are freed. The
command
#load <filename>
- reads the data in the file, and then arranges for it to be de-serial-
- ized, with the resulting compiled patterns added to the pattern stack.
- The pattern on the top of the stack can be retrieved by the #pop com-
- mand, which must be followed by lines of subjects that are to be
- matched with the pattern, terminated as usual by an empty line or end
- of file. This command may be followed by a modifier list containing
- only control modifiers that act after a pattern has been compiled. In
+ reads the data in the file, and then arranges for it to be de-serial-
+ ized, with the resulting compiled patterns added to the pattern stack.
+ The pattern on the top of the stack can be retrieved by the #pop com-
+ mand, which must be followed by lines of subjects that are to be
+ matched with the pattern, terminated as usual by an empty line or end
+ of file. This command may be followed by a modifier list containing
+ only control modifiers that act after a pattern has been compiled. In
particular, hex, posix, posix_nosub, push, and pushcopy are not
- allowed, nor are any option-setting modifiers. The JIT modifiers are,
- however permitted. Here is an example that saves and reloads two pat-
+ allowed, nor are any option-setting modifiers. The JIT modifiers are,
+ however permitted. Here is an example that saves and reloads two pat-
terns.
/abc/push
@@ -1776,10 +1795,10 @@ SAVING AND RESTORING COMPILED PATTERNS
#pop jit,bincode
abc
- If jitverify is used with #pop, it does not automatically imply jit,
+ If jitverify is used with #pop, it does not automatically imply jit,
which is different behaviour from when it is used on a pattern.
- The #popcopy command is analagous to the pushcopy modifier in that it
+ The #popcopy command is analagous to the pushcopy modifier in that it
makes current a copy of the topmost stack pattern, leaving the original
still on the stack.
@@ -1799,5 +1818,5 @@ AUTHOR
REVISION
- Last updated: 21 December 2017
- Copyright (c) 1997-2017 University of Cambridge.
+ Last updated: 21 July 2018
+ Copyright (c) 1997-2018 University of Cambridge.
diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3
index 813fadf..877d887 100644
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "17 May 2017" "PCRE2 10.30"
+.TH PCRE2UNICODE 3 "02 September 2018" "PCRE2 10.32"
.SH NAME
PCRE - Perl-compatible regular expressions (revised API)
.SH "UNICODE AND UTF SUPPORT"
@@ -16,7 +16,8 @@ you must call
with the PCRE2_UTF option flag, or the pattern must start with the sequence
(*UTF). When either of these is the case, both the pattern and any subject
strings that are matched against it are treated as UTF strings instead of
-strings of individual one-code-unit characters.
+strings of individual one-code-unit characters. There are also some other
+changes to the way characters are handled, as documented below.
.P
If you do not need Unicode support you can build PCRE2 without it, in which
case the library will be smaller.
@@ -46,11 +47,15 @@ compatibility with Perl 5.6. PCRE2 does not support this.
.SH "WIDE CHARACTERS AND UTF MODES"
.rs
.sp
-Codepoints less than 256 can be specified in patterns by either braced or
+Code points less than 256 can be specified in patterns by either braced or
unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger
values have to use braced sequences. Unbraced octal code points up to \e777 are
also recognized; larger ones can be coded using \eo{...}.
.P
+The escape sequence \eN{U+<hex digits>} is recognized as another way of
+specifying a Unicode character by code point in a UTF mode. It is not allowed
+in non-UTF modes.
+.P
In UTF modes, repeat quantifiers apply to complete UTF characters, not to
individual code units.
.P
@@ -109,7 +114,7 @@ not PCRE2_UCP is set.
Case-insensitive matching in a UTF mode makes use of Unicode properties except
for characters whose code points are less than 128 and that have at most two
case-equivalent values. For these, a direct table lookup is used for speed. A
-few Unicode characters such as Greek sigma have more than two codepoints that
+few Unicode characters such as Greek sigma have more than two code points that
are case-equivalent, and these are treated as such.
.
.
@@ -280,6 +285,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 17 May 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 02 September 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/perltest.sh b/perltest.sh
index 1a7679a..5e6c466 100755
--- a/perltest.sh
+++ b/perltest.sh
@@ -43,15 +43,25 @@ fi
# afteralltext ignored
# dupnames ignored (Perl always allows)
# jitstack ignored
-# mark ignored
+# mark show mark information
# no_auto_possess ignored
-# no_start_optimize ignored
+# no_start_optimize insert (??{""}) at pattern start (disables optimizing)
+# -no_start_optimize ignored
# subject_literal does not process subjects for escapes
# ucp sets Perl's /u modifier
# utf invoke UTF-8 functionality
#
+# Comment lines are ignored. The #pattern command can be used to set modifiers
+# that will be added to each subsequent pattern, after any modifiers it may
+# already have. NOTE: this is different to pcre2test where #pattern sets
+# defaults which can be overridden on individual patterns. The #subject command
+# may be used to set or unset a default "mark" modifier for data lines. This is
+# the only use of #subject that is supported. The #perltest, #forbid_utf, and
+# #newline_default commands, which are needed in the relevant pcre2test files,
+# are ignored. Any other #-command is ignored, with a warning message.
+#
# The data lines must not have any pcre2test modifiers. Unless
-# "subject_litersl" is on the pattern, data lines are processed as
+# "subject_literal" is on the pattern, data lines are processed as
# Perl double-quoted strings, so if they contain " $ or @ characters, these
# have to be escaped. For this reason, all such characters in the
# Perl-compatible testinput1 and testinput4 files are escaped so that they can
@@ -127,7 +137,42 @@ for (;;)
printf " re> " if $interact;
last if ! ($_ = <$infile>);
printf $outfile "$_" if ! $interact;
- next if ($_ =~ /^\s*$/ || $_ =~ /^#/);
+ next if ($_ =~ /^\s*$/ || $_ =~ /^#[\s!]/);
+
+ # A few of pcre2test's #-commands are supported, or just ignored. Any others
+ # cause an error.
+
+ if ($_ =~ /^#pattern(.*)/)
+ {
+ $extra_modifiers = $1;
+ chomp($extra_modifiers);
+ $extra_modifiers =~ s/\s+$//;
+ next;
+ }
+ elsif ($_ =~ /^#subject(.*)/)
+ {
+ $mod = $1;
+ chomp($mod);
+ $mod =~ s/\s+$//;
+ if ($mod =~ s/(-?)mark,?//)
+ {
+ $minus = $1;
+ $default_show_mark = ($minus =~ /^$/);
+ }
+ if ($mod !~ /^\s*$/)
+ {
+ printf $outfile "** Warning: \"$mod\" in #subject ignored\n";
+ }
+ next;
+ }
+ elsif ($_ =~ /^#/)
+ {
+ if ($_ !~ /^#newline_default|^#perltest|^#forbid_utf/)
+ {
+ printf $outfile "** Warning: #-command ignored: %s", $_;
+ }
+ next;
+ }
$pattern = $_;
@@ -146,7 +191,9 @@ for (;;)
$pattern =~ /^\s*((.).*\2)(.*)$/s;
$pat = $1;
- $mod = $3;
+ $del = $2;
+ $mod = "$3,$extra_modifiers";
+ $mod =~ s/^,\s*//;
# The private "aftertext" modifier means "print $' afterwards".
@@ -172,18 +219,24 @@ for (;;)
$mod =~ s/jitstack=\d+,?//;
- # Remove "mark" (asks pcre2test to check MARK data) */
+ # The "mark" modifier requests checking of MARK data */
- $mod =~ s/mark,?//;
+ $show_mark = $default_show_mark | ($mod =~ s/mark,?//);
# "ucp" asks pcre2test to set PCRE2_UCP; change this to /u for Perl
$mod =~ s/ucp,?/u/;
- # Remove "no_auto_possess" and "no_start_optimize" (disable PCRE2 optimizations)
+ # Remove "no_auto_possess".
$mod =~ s/no_auto_possess,?//;
- $mod =~ s/no_start_optimize,?//;
+
+ # Use no_start_optimize (disable PCRE2 start-up optimization) to disable Perl
+ # optimization by inserting (??{""}) at the start of the pattern. We may
+ # also encounter -no_start_optimize from a #pattern setting.
+
+ $mod =~ s/-no_start_optimize,?//;
+ if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del(??{""})/; }
# Add back retained modifiers and check that the pattern is valid.
@@ -279,7 +332,7 @@ for (;;)
elsif (scalar(@subs) == 0)
{
printf $outfile "No match";
- if (defined $REGERROR && $REGERROR != 1)
+ if ($show_mark && defined $REGERROR && $REGERROR != 1)
{ printf $outfile (", mark = %s", &pchars($REGERROR)); }
printf $outfile "\n";
}
@@ -307,7 +360,7 @@ for (;;)
# set and the input pattern was a UTF-8 string. We can, however, force
# it to be so marked.
- if (defined $REGMARK && $REGMARK != 1)
+ if ($show_mark && defined $REGMARK && $REGMARK != 1)
{
$xx = $REGMARK;
$xx = Encode::decode_utf8($xx) if $utf8;
diff --git a/src/config.h.generic b/src/config.h.generic
index f738616..89a52ef 100644
--- a/src/config.h.generic
+++ b/src/config.h.generic
@@ -18,10 +18,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
-Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
-(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
-macros are listed as a commented #undef in config.h.generic. Macros such as
-MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
+Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
+defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
+such macros are listed as a commented #undef in config.h.generic. Macros such
+as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
@@ -132,17 +132,18 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
-/* This limits the amount of memory that pcre2_match() may use while matching
- a pattern. The value is in kilobytes. */
+/* This limits the amount of memory that may be used while matching a pattern.
+ It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
+ to JIT matching. The value is in kibibytes (units of 1024 bytes). */
#ifndef HEAP_LIMIT
#define HEAP_LIMIT 20000000
#endif
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
- compiled patterns up to 64K long. This covers the vast majority of cases.
- However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
- allows for longer patterns in extreme cases. */
+ compiled patterns up to 65535 code units long. This covers the vast
+ majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
+ instead. This allows for longer patterns in extreme cases. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
@@ -155,7 +156,8 @@ sure both macros are undefined; an emulation function will then be used. */
/* The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
- matching attempt. There is a runtime interface for setting a different
+ matching attempt. The value is also used to limit a loop counter in
+ pcre2_dfa_match(). There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very
large so that it does not accidentally catch legitimate cases. */
@@ -170,7 +172,9 @@ sure both macros are undefined; an emulation function will then be used. */
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
must be less than the value of MATCH_LIMIT. The default is to use the same
value as MATCH_LIMIT. There is a runtime method for setting a different
- limit. */
+ limit. In the case of pcre2_dfa_match(), this limit controls the depth of
+ the internal nested function calls that are used for pattern recursions,
+ lookarounds, and atomic groups. */
#ifndef MATCH_LIMIT_DEPTH
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
#endif
@@ -210,7 +214,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.31"
+#define PACKAGE_STRING "PCRE2 10.32"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@@ -219,7 +223,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "10.31"
+#define PACKAGE_VERSION "10.32"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
@@ -339,7 +343,7 @@ sure both macros are undefined; an emulation function will then be used. */
#endif
/* Version number of package */
-#define VERSION "10.31"
+#define VERSION "10.32"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */
diff --git a/src/config.h.in b/src/config.h.in
index 7a3a861..d8a5280 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -18,10 +18,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
but if you do, default values will be taken from config.h for non-boolean
macros that are not defined on the command line.
-Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
-(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
-macros are listed as a commented #undef in config.h.generic. Macros such as
-MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
+Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
+defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
+such macros are listed as a commented #undef in config.h.generic. Macros such
+as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
@@ -132,15 +132,16 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <zlib.h> header file. */
#undef HAVE_ZLIB_H
-/* This limits the amount of memory that pcre2_match() may use while matching
- a pattern. The value is in kilobytes. */
+/* This limits the amount of memory that may be used while matching a pattern.
+ It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
+ to JIT matching. The value is in kibibytes (units of 1024 bytes). */
#undef HEAP_LIMIT
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
- compiled patterns up to 64K long. This covers the vast majority of cases.
- However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
- allows for longer patterns in extreme cases. */
+ compiled patterns up to 65535 code units long. This covers the vast
+ majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
+ instead. This allows for longer patterns in extreme cases. */
#undef LINK_SIZE
/* Define to the sub-directory where libtool stores uninstalled libraries. */
@@ -148,7 +149,8 @@ sure both macros are undefined; an emulation function will then be used. */
/* The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
- matching attempt. There is a runtime interface for setting a different
+ matching attempt. The value is also used to limit a loop counter in
+ pcre2_dfa_match(). There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very
large so that it does not accidentally catch legitimate cases. */
@@ -161,7 +163,9 @@ sure both macros are undefined; an emulation function will then be used. */
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
must be less than the value of MATCH_LIMIT. The default is to use the same
value as MATCH_LIMIT. There is a runtime method for setting a different
- limit. */
+ limit. In the case of pcre2_dfa_match(), this limit controls the depth of
+ the internal nested function calls that are used for pattern recursions,
+ lookarounds, and atomic groups. */
#undef MATCH_LIMIT_DEPTH
/* This limit is parameterized just in case anybody ever wants to change it.
diff --git a/src/dftables.c b/src/dftables.c
index dfb90b5..c0af362 100644
--- a/src/dftables.c
+++ b/src/dftables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -104,6 +104,14 @@ fprintf(f,
"tables are passed to PCRE2 by the application that calls it. The tables\n"
"are used only for characters whose code values are less than 256. */\n\n");
+fprintf(f,
+ "/*The dftables program (which is distributed with PCRE2) can be used to\n"
+ "build alternative versions of this file. This is necessary if you are\n"
+ "running in an EBCDIC environment, or if you want to default to a different\n"
+ "encoding, for example ISO-8859-1. When dftables is run, it creates these\n"
+ "tables in the current locale. This happens automatically if PCRE2 is\n"
+ "configured with --enable-rebuild-chartables. */\n\n");
+
/* Force config.h in z/OS */
#if defined NATIVE_ZOS
@@ -115,7 +123,7 @@ fprintf(f,
#endif
fprintf(f,
- "/* The following #includes are present because without them gcc 4.x may remove\n"
+ "/* The following #include is present because without it gcc 4.x may remove\n"
"the array definition from the final binary if PCRE2 is built into a static\n"
"library and dead code stripping is activated. This leads to link errors.\n"
"Pulling in the header ensures that the array gets flagged as \"someone\n"
@@ -153,11 +161,10 @@ for (i = 0; i < 256; i++)
fprintf(f, ",\n\n");
fprintf(f,
- "/* This table contains bit maps for various character classes.\n"
- "Each map is 32 bytes long and the bits run from the least\n"
- "significant end of each byte. The classes that have their own\n"
- "maps are: space, xdigit, digit, upper, lower, word, graph\n"
- "print, punct, and cntrl. Other classes are built from combinations. */\n\n");
+ "/* This table contains bit maps for various character classes. Each map is 32\n"
+ "bytes long and the bits run from the least significant end of each byte. The\n"
+ "classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
+ "graph print, punct, and cntrl. Other classes are built from combinations. */\n\n");
fprintf(f, " ");
for (i = 0; i < cbit_length; i++)
@@ -178,10 +185,8 @@ fprintf(f,
" 0x%02x letter\n"
" 0x%02x decimal digit\n"
" 0x%02x hexadecimal digit\n"
- " 0x%02x alphanumeric or '_'\n"
- " 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
- ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
- ctype_meta);
+ " 0x%02x alphanumeric or '_'\n*/\n\n",
+ ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word);
fprintf(f, " ");
for (i = 0; i < 256; i++)
diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic
index fffcc30..3d2feb7 100644
--- a/src/pcre2.h.generic
+++ b/src/pcre2.h.generic
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
- Copyright (c) 2016-2017 University of Cambridge
+ Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,16 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
-#define PCRE2_MAJOR 10
-#define PCRE2_MINOR 31
-#define PCRE2_PRERELEASE
-#define PCRE2_DATE 2018-02-12
+#define PCRE2_MAJOR 10
+#define PCRE2_MINOR 32
+#define PCRE2_PRERELEASE
+#define PCRE2_DATE 2018-09-10
+
+/* For the benefit of systems without stdint.h, an alternative is to use
+inttypes.h. The existence of these headers is checked by configure or CMake. */
+
+#define PCRE2_HAVE_STDINT_H 1
+#define PCRE2_HAVE_INTTYPES_H 1
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
@@ -81,12 +87,18 @@ set, we ensure here that it has no effect. */
#define PCRE2_CALL_CONVENTION
#endif
-/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
-uint8_t, UCHAR_MAX, etc are defined. */
+/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
+that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
+header, the relevant values must be provided by some other means. */
#include <limits.h>
#include <stdlib.h>
+
+#if PCRE2_HAVE_STDINT_H
#include <stdint.h>
+#elif PCRE2_HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
/* Allow for C++ users compiling this directly. */
@@ -269,6 +281,7 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
+/* Error 159 is obsolete and should now never occur */
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
#define PCRE2_ERROR_VERB_UNKNOWN 160
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
@@ -303,6 +316,8 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
+#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
+#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
/* "Expected" matching error codes: no match and partial match. */
@@ -387,6 +402,7 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
+#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
/* Request types for pcre2_pattern_info() */
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index a3a3fa6..a9396e0 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
- Copyright (c) 2016-2017 University of Cambridge
+ Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,16 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
-#define PCRE2_MAJOR @PCRE2_MAJOR@
-#define PCRE2_MINOR @PCRE2_MINOR@
-#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
-#define PCRE2_DATE @PCRE2_DATE@
+#define PCRE2_MAJOR @PCRE2_MAJOR@
+#define PCRE2_MINOR @PCRE2_MINOR@
+#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
+#define PCRE2_DATE @PCRE2_DATE@
+
+/* For the benefit of systems without stdint.h, an alternative is to use
+inttypes.h. The existence of these headers is checked by configure or CMake. */
+
+#define PCRE2_HAVE_STDINT_H @PCRE2_HAVE_STDINT_H@
+#define PCRE2_HAVE_INTTYPES_H @PCRE2_HAVE_INTTYPES_H@
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
@@ -81,12 +87,18 @@ set, we ensure here that it has no effect. */
#define PCRE2_CALL_CONVENTION
#endif
-/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
-uint8_t, UCHAR_MAX, etc are defined. */
+/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
+that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
+header, the relevant values must be provided by some other means. */
#include <limits.h>
#include <stdlib.h>
+
+#if PCRE2_HAVE_STDINT_H
#include <stdint.h>
+#elif PCRE2_HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
/* Allow for C++ users compiling this directly. */
@@ -269,6 +281,7 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
+/* Error 159 is obsolete and should now never occur */
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
#define PCRE2_ERROR_VERB_UNKNOWN 160
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
@@ -303,6 +316,8 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
+#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
+#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
/* "Expected" matching error codes: no match and partial match. */
@@ -387,6 +402,7 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
+#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
/* Request types for pcre2_pattern_info() */
diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c
index 23275a2..2ce152e 100644
--- a/src/pcre2_auto_possess.c
+++ b/src/pcre2_auto_possess.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -505,7 +505,7 @@ Arguments:
utf TRUE in UTF mode
cb compile data block
base_list the data list of the base opcode
- base_end the end of the data list
+ base_end the end of the base opcode
rec_limit points to recursion depth counter
Returns: TRUE if the auto-possessification is possible
@@ -730,7 +730,7 @@ for(;;)
if ((*xclass_flags & XCL_MAP) == 0)
{
/* No bits are set for characters < 256. */
- if (list[1] == 0) return TRUE;
+ if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
/* Might be an empty repeat. */
continue;
}
@@ -1235,6 +1235,7 @@ for (;;)
#endif
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
diff --git a/src/pcre2_chartables.c.dist b/src/pcre2_chartables.c.dist
index 203cb1a..4046500 100644
--- a/src/pcre2_chartables.c.dist
+++ b/src/pcre2_chartables.c.dist
@@ -2,23 +2,24 @@
* Perl-Compatible Regular Expressions *
*************************************************/
-/* This file contains character tables that are used when no external tables
-are passed to PCRE2 by the application that calls it. The tables are used only
-for characters whose code values are less than 256.
-
-This is a default version of the tables that assumes ASCII encoding. A program
-called dftables (which is distributed with PCRE2) can be used to build
-alternative versions of this file. This is necessary if you are running in an
-EBCDIC environment, or if you want to default to a different encoding, for
-example ISO-8859-1. When dftables is run, it creates these tables in the
-current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
-happens automatically.
-
-The following #includes are present because without them gcc 4.x may remove the
-array definition from the final binary if PCRE2 is built into a static library
-and dead code stripping is activated. This leads to link errors. Pulling in the
-header ensures that the array gets flagged as "someone outside this compilation
-unit might reference this" and so it will always be supplied to the linker. */
+/* This file was automatically written by the dftables auxiliary
+program. It contains character tables that are used when no external
+tables are passed to PCRE2 by the application that calls it. The tables
+are used only for characters whose code values are less than 256. */
+
+/*The dftables program (which is distributed with PCRE2) can be used to
+build alternative versions of this file. This is necessary if you are
+running in an EBCDIC environment, or if you want to default to a different
+encoding, for example ISO-8859-1. When dftables is run, it creates these
+tables in the current locale. This happens automatically if PCRE2 is
+configured with --enable-rebuild-chartables. */
+
+/* The following #include is present because without it gcc 4.x may remove
+the array definition from the final binary if PCRE2 is built into a static
+library and dead code stripping is activated. This leads to link errors.
+Pulling in the header ensures that the array gets flagged as "someone
+outside this compilation unit might reference this" and so it will always
+be supplied to the linker. */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -101,7 +102,7 @@ const uint8_t PRIV(default_tables)[] = {
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
-graph, print, punct, and cntrl. Other classes are built from combinations. */
+graph print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@@ -159,25 +160,24 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
0x04 decimal digit
0x08 hexadecimal digit
0x10 alphanumeric or '_'
- 0x80 regular expression metacharacter or binary zero
*/
- 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
- 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
- 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
+ 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
- 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
+ 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
- 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
+ 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
- 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
+ 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 87530fb..6bb1de3 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* Other debugging code can be enabled by these defines. */
-// #define DEBUG_SHOW_CAPTURES
-// #define DEBUG_SHOW_PARSED
+/* #define DEBUG_SHOW_CAPTURES */
+/* #define DEBUG_SHOW_PARSED */
/* There are a few things that vary with different code unit sizes. Handle them
by defining macros in order to minimize #if usage. */
@@ -250,34 +250,35 @@ is present where expected in a conditional group. */
#define META_LOOKBEHINDNOT 0x80250000u /* (?<! */
/* These must be kept in this order, with consecutive values, and the _ARG
-versions of PRUNE, SKIP, and THEN immediately after their non-argument
+versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
versions. */
#define META_MARK 0x80260000u /* (*MARK) */
#define META_ACCEPT 0x80270000u /* (*ACCEPT) */
-#define META_COMMIT 0x80280000u /* (*COMMIT) */
-#define META_FAIL 0x80290000u /* (*FAIL) */
-#define META_PRUNE 0x802a0000u /* These pairs must */
-#define META_PRUNE_ARG 0x802b0000u /* be */
-#define META_SKIP 0x802c0000u /* kept */
-#define META_SKIP_ARG 0x802d0000u /* in */
-#define META_THEN 0x802e0000u /* this */
-#define META_THEN_ARG 0x802f0000u /* order */
+#define META_FAIL 0x80280000u /* (*FAIL) */
+#define META_COMMIT 0x80290000u /* These */
+#define META_COMMIT_ARG 0x802a0000u /* pairs */
+#define META_PRUNE 0x802b0000u /* must */
+#define META_PRUNE_ARG 0x802c0000u /* be */
+#define META_SKIP 0x802d0000u /* kept */
+#define META_SKIP_ARG 0x802e0000u /* in */
+#define META_THEN 0x802f0000u /* this */
+#define META_THEN_ARG 0x80300000u /* order */
/* These must be kept in groups of adjacent 3 values, and all together. */
-#define META_ASTERISK 0x80300000u /* * */
-#define META_ASTERISK_PLUS 0x80310000u /* *+ */
-#define META_ASTERISK_QUERY 0x80320000u /* *? */
-#define META_PLUS 0x80330000u /* + */
-#define META_PLUS_PLUS 0x80340000u /* ++ */
-#define META_PLUS_QUERY 0x80350000u /* +? */
-#define META_QUERY 0x80360000u /* ? */
-#define META_QUERY_PLUS 0x80370000u /* ?+ */
-#define META_QUERY_QUERY 0x80380000u /* ?? */
-#define META_MINMAX 0x80390000u /* {n,m} repeat */
-#define META_MINMAX_PLUS 0x803a0000u /* {n,m}+ repeat */
-#define META_MINMAX_QUERY 0x803b0000u /* {n,m}? repeat */
+#define META_ASTERISK 0x80310000u /* * */
+#define META_ASTERISK_PLUS 0x80320000u /* *+ */
+#define META_ASTERISK_QUERY 0x80330000u /* *? */
+#define META_PLUS 0x80340000u /* + */
+#define META_PLUS_PLUS 0x80350000u /* ++ */
+#define META_PLUS_QUERY 0x80360000u /* +? */
+#define META_QUERY 0x80370000u /* ? */
+#define META_QUERY_PLUS 0x80380000u /* ?+ */
+#define META_QUERY_QUERY 0x80390000u /* ?? */
+#define META_MINMAX 0x803a0000u /* {n,m} repeat */
+#define META_MINMAX_PLUS 0x803b0000u /* {n,m}+ repeat */
+#define META_MINMAX_QUERY 0x803c0000u /* {n,m}? repeat */
#define META_FIRST_QUANTIFIER META_ASTERISK
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
@@ -327,8 +328,9 @@ static unsigned char meta_extra_lengths[] = {
SIZEOFFSET, /* META_LOOKBEHINDNOT */
1, /* META_MARK - plus the string length */
0, /* META_ACCEPT */
- 0, /* META_COMMIT */
0, /* META_FAIL */
+ 0, /* META_COMMIT */
+ 1, /* META_COMMIT_ARG - plus the string length */
0, /* META_PRUNE */
1, /* META_PRUNE_ARG - plus the string length */
0, /* META_SKIP */
@@ -510,17 +512,17 @@ static const short int escapes[] = {
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
- CHAR_GRAVE_ACCENT, ESC_a,
+ CHAR_GRAVE_ACCENT, CHAR_BEL,
-ESC_b, 0,
- -ESC_d, ESC_e,
- ESC_f, 0,
+ -ESC_d, CHAR_ESC,
+ CHAR_FF, 0,
-ESC_h, 0,
0, -ESC_k,
0, 0,
- ESC_n, 0,
+ CHAR_LF, 0,
-ESC_p, 0,
- ESC_r, -ESC_s,
- ESC_tee, 0,
+ CHAR_CR, -ESC_s,
+ CHAR_HT, 0,
-ESC_v, -ESC_w,
0, 0,
-ESC_z
@@ -544,22 +546,22 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#endif
static const short int escapes[] = {
-/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
-/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
-/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
-/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
-/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
-/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
-/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
-/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
-/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
-/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
-/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P,
-/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
-/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
-/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
-/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
-/* F8 */ 0, 0
+/* 80 */ CHAR_BEL, -ESC_b, 0, -ESC_d, CHAR_ESC, CHAR_FF, 0,
+/* 88 */ -ESC_h, 0, 0, '{', 0, 0, 0, 0,
+/* 90 */ 0, 0, -ESC_k, 0, 0, CHAR_LF, 0, -ESC_p,
+/* 98 */ 0, CHAR_CR, 0, '}', 0, 0, 0, 0,
+/* A0 */ 0, '~', -ESC_s, CHAR_HT, 0, -ESC_v, -ESC_w, 0,
+/* A8 */ 0, -ESC_z, 0, 0, 0, '[', 0, 0,
+/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
+/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
+/* C0 */ '{', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G,
+/* C8 */ -ESC_H, 0, 0, 0, 0, 0, 0, 0,
+/* D0 */ '}', 0, -ESC_K, 0, 0, -ESC_N, 0, -ESC_P,
+/* D8 */ -ESC_Q, -ESC_R, 0, 0, 0, 0, 0, 0,
+/* E0 */ '\\', 0, -ESC_S, 0, 0, -ESC_V, -ESC_W, -ESC_X,
+/* E8 */ 0, -ESC_Z, 0, 0, 0, 0, 0, 0,
+/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
+/* F8 */ 0, 0
};
/* We also need a table of characters that may follow \c in an EBCDIC
@@ -586,9 +588,9 @@ static const char verbnames[] =
"\0" /* Empty name is a shorthand for MARK */
STRING_MARK0
STRING_ACCEPT0
- STRING_COMMIT0
STRING_F0
STRING_FAIL0
+ STRING_COMMIT0
STRING_PRUNE0
STRING_SKIP0
STRING_THEN;
@@ -596,11 +598,11 @@ static const char verbnames[] =
static const verbitem verbs[] = {
{ 0, META_MARK, +1 }, /* > 0 => must have an argument */
{ 4, META_MARK, +1 },
- { 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */
- { 6, META_COMMIT, -1 },
+ { 6, META_ACCEPT, -1 }, /* < 0 => Optional argument, convert to pre-MARK */
{ 1, META_FAIL, -1 },
{ 4, META_FAIL, -1 },
- { 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */
+ { 6, META_COMMIT, 0 },
+ { 5, META_PRUNE, 0 }, /* Optional argument; bump META code if found */
{ 4, META_SKIP, 0 },
{ 4, META_THEN, 0 }
};
@@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);
/* Verb opcodes, indexed by their META code offset from META_MARK. */
static const uint32_t verbops[] = {
- OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP,
- OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
+ OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE,
+ OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
@@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
- ERR91, ERR92};
+ ERR91, ERR92, ERR93, ERR94 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -976,8 +978,8 @@ for (;;)
case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;
case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break;
- case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_FAIL: fprintf(stderr, "META (*FAIL)"); break;
+ case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;
case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
case META_THEN: fprintf(stderr, "META (*THEN)"); break;
@@ -1067,6 +1069,10 @@ for (;;)
fprintf(stderr, "META (*MARK:");
goto SHOWARG;
+ case META_COMMIT_ARG:
+ fprintf(stderr, "META (*COMMIT:");
+ goto SHOWARG;
+
case META_PRUNE_ARG:
fprintf(stderr, "META (*PRUNE:");
goto SHOWARG;
@@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
escape = -i; /* Else return a special escape */
if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
+
+ /* Perl supports \N{name} for character names and \N{U+dddd} for numerical
+ Unicode code points, as well as plain \N for "not newline". PCRE does not
+ support \N{name}. However, it does support quantification such as \N{2,3},
+ so if \N{ is not followed by U+dddd we check for a quantifier. */
+
+ if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
+ {
+ PCRE2_SPTR p = ptr + 1;
+
+ /* \N{U+ can be handled by the \x{ code. However, this construction is
+ not valid in EBCDIC environments because it specifies a Unicode
+ character, not a codepoint in the local code. For example \N{U+0041}
+ must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
+ casing semantics for the entire pattern, so allow it only in UTF (i.e.
+ Unicode) mode. */
+
+ if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS)
+ {
+#ifdef EBCDIC
+ *errorcodeptr = ERR93;
+#else
+ if (utf)
+ {
+ ptr = p + 1;
+ escape = 0; /* Not a fancy escape after all */
+ goto COME_FROM_NU;
+ }
+ else *errorcodeptr = ERR93;
+#endif
+ }
+
+ /* Give an error if what follows is not a quantifier, but don't override
+ an error set by the quantifier reader (e.g. number overflow). */
+
+ else
+ {
+ if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
+ *errorcodeptr == 0)
+ *errorcodeptr = ERR37;
+ }
+ }
}
}
@@ -1462,6 +1510,7 @@ else
/* A number of Perl escapes are not handled by PCRE. We give an explicit
error. */
+ case CHAR_F:
case CHAR_l:
case CHAR_L:
*errorcodeptr = ERR37;
@@ -1719,6 +1768,9 @@ else
{
if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
{
+#ifndef EBCDIC
+ COME_FROM_NU:
+#endif
if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
{
*errorcodeptr = ERR78;
@@ -1852,19 +1904,6 @@ else
}
}
-/* Perl supports \N{name} for character names, as well as plain \N for "not
-newline". PCRE does not support \N{name}. However, it does support
-quantification such as \N{2,3}. */
-
-if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET &&
- ptrend - ptr > 2)
- {
- PCRE2_SPTR p = ptr + 1;
- if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
- *errorcodeptr == 0)
- *errorcodeptr = ERR37;
- }
-
/* Set the pointer to the next character before returning. */
*ptrptr = ptr;
@@ -2251,11 +2290,14 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
-/* Of the options that are changeable within the pattern, these are tracked
-during parsing. The rest are used from META_OPTIONS items when compiling. */
+/* Options that are changeable within the pattern must be tracked during
+parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
+but all must be tracked so that META_OPTIONS items set the correct values for
+the main compiling phase. */
-#define PARSE_TRACKED_OPTIONS \
- (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
+#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
+ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
+ PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
uint32_t meta_quantifier = 0;
+uint32_t add_after_mark = 0;
uint16_t nest_depth = 0;
int after_manual_callout = 0;
int expect_cond_assert = 0;
@@ -2434,11 +2477,17 @@ while (ptr < ptrend)
/* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
- /* OR: character > 255 */
- c > 255 ||
- /* OR: not a # comment or white space */
- (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
- ))
+#ifdef SUPPORT_UNICODE
+ /* OR: character > 255 AND not Unicode Pattern White Space */
+ (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
+#endif
+ /* OR: not a # comment or isspace() white space */
+ (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
+#ifdef SUPPORT_UNICODE
+ /* and not CHAR_NEL when Unicode is supported */
+ && c != CHAR_NEL
+#endif
+ )))
{
PCRE2_SIZE verbnamelength;
@@ -2461,6 +2510,16 @@ while (ptr < ptrend)
goto FAILED;
}
*verblengthptr = (uint32_t)verbnamelength;
+
+ /* If this name was on a verb such as (*ACCEPT) which does not continue,
+ a (*MARK) was generated for the name. We now add the original verb as the
+ next item. */
+
+ if (add_after_mark != 0)
+ {
+ *parsed_pattern++ = add_after_mark;
+ add_after_mark = 0;
+ }
break;
case CHAR_BACKSLASH:
@@ -2510,11 +2569,18 @@ while (ptr < ptrend)
/* Skip over whitespace and # comments in extended mode. Note that c is a
character, not a code unit, so we must not use MAX_255 to test its size
- because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
+ because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
+ whitespace characters are those designated as "Pattern White Space" by
+ Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
+ U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
+ subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0)
{
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
+#ifdef SUPPORT_UNICODE
+ if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
+#endif
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
@@ -3206,7 +3272,6 @@ while (ptr < ptrend)
tempptr = ptr;
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
options, TRUE, cb);
-
if (errorcode != 0)
{
CLASS_ESCAPE_FAILED:
@@ -3454,13 +3519,25 @@ while (ptr < ptrend)
if (*ptr++ == CHAR_COLON) /* Skip past : or ) */
{
- if (verbs[i].has_arg < 0) /* Argument is forbidden */
+ /* Some optional arguments can be treated as a preceding (*MARK) */
+
+ if (verbs[i].has_arg < 0)
{
- errorcode = ERR59;
- goto FAILED;
+ add_after_mark = verbs[i].meta;
+ *parsed_pattern++ = META_MARK;
}
- *parsed_pattern++ = verbs[i].meta +
- ((verbs[i].meta != META_MARK)? 0x00010000u:0);
+
+ /* The remaining verbs with arguments (except *MARK) need a different
+ opcode. */
+
+ else
+ {
+ *parsed_pattern++ = verbs[i].meta +
+ ((verbs[i].meta != META_MARK)? 0x00010000u:0);
+ }
+
+ /* Set up for reading the name in the main loop. */
+
verblengthptr = parsed_pattern++;
verbnamestart = ptr;
inverbname = TRUE;
@@ -3521,17 +3598,39 @@ while (ptr < ptrend)
else
{
+ BOOL hyphenok = TRUE;
+ uint32_t oldoptions = options;
+
top_nest->reset_group = 0;
top_nest->max_group = 0;
set = unset = 0;
optset = &set;
+ /* ^ at the start unsets imnsx and disables the subsequent use of - */
+
+ if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT)
+ {
+ options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
+ PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
+ hyphenok = FALSE;
+ ptr++;
+ }
+
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
*ptr != CHAR_COLON)
{
switch (*ptr++)
{
- case CHAR_MINUS: optset = &unset; break;
+ case CHAR_MINUS:
+ if (!hyphenok)
+ {
+ errorcode = ERR94;
+ ptr--; /* Correct the offset */
+ goto FAILED;
+ }
+ optset = &unset;
+ hyphenok = FALSE;
+ break;
case CHAR_J: /* Record that it changed in the external options */
*optset |= PCRE2_DUPNAMES;
@@ -3591,7 +3690,7 @@ while (ptr < ptrend)
/* If nothing changed, no need to record. */
- if (set != 0 || unset != 0)
+ if (options != oldoptions)
{
*parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options;
@@ -3896,9 +3995,8 @@ while (ptr < ptrend)
if (*ptr == CHAR_DOT)
{
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
- if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode))
- goto FAILED;
- if (minor < 10) minor *= 10;
+ minor = (*ptr++ - CHAR_0) * 10;
+ if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION;
}
@@ -4261,11 +4359,11 @@ goto FAILED;
/*************************************************
-* Find first significant op code *
+* Find first significant opcode *
*************************************************/
/* This is called by several functions that scan a compiled expression looking
-for a fixed first character, or an anchoring op code etc. It skips over things
+for a fixed first character, or an anchoring opcode etc. It skips over things
that do not influence this. For some calls, it makes sense to skip negative
forward and all backward assertions, and also the \b assertion; for others it
does not.
@@ -5472,7 +5570,7 @@ for (;; pptr++)
set xclass = TRUE. Then, in the pre-compile phase, accumulate the length
of the extra data and reset the pointer. This is so that very large
classes that contain a zillion wide characters or Unicode property tests
- do not overwrite the work space (which is on the stack). */
+ do not overwrite the workspace (which is on the stack). */
if (class_uchardata > class_uchardata_base)
{
@@ -5563,7 +5661,7 @@ for (;; pptr++)
if (class_has_8bitchar > 0)
{
*code++ |= XCL_MAP;
- memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
+ (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
CU2BYTES(class_uchardata - code));
if (negate_class && !xclass_has_prop)
for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
@@ -5655,6 +5753,7 @@ for (;; pptr++)
cb->had_pruneorskip = TRUE;
/* Fall through */
case META_MARK:
+ case META_COMMIT_ARG:
VERB_ARG:
*code++ = verbops[(meta - META_MARK) >> 16];
/* The length is in characters. */
@@ -6509,7 +6608,7 @@ for (;; pptr++)
/* Wrap the recursion call in OP_BRA brackets. */
- memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
+ (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
op_previous = *previous = OP_BRA;
PUT(previous, 1, 2 + 2*LINK_SIZE);
previous[2 + 2*LINK_SIZE] = OP_KET;
@@ -6589,7 +6688,7 @@ for (;; pptr++)
if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
{
- memmove(previous + 1, previous, CU2BYTES(len));
+ (void)memmove(previous + 1, previous, CU2BYTES(len));
code++;
if (repeat_max == 0)
{
@@ -6610,7 +6709,7 @@ for (;; pptr++)
else
{
int linkoffset;
- memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
+ (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA;
@@ -6811,7 +6910,7 @@ for (;; pptr++)
if (*bracode == OP_COND || *bracode == OP_SCOND)
{
int nlen = (int)(code - bracode);
- memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
+ (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE;
*bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
@@ -7082,7 +7181,7 @@ for (;; pptr++)
else
{
- memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
+ (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
tempcode[0] = OP_ONCE;
@@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the
beginning. We accumulate in a local variable to save frequent testing of
lengthptr for NULL. We cannot do this by looking at the value of 'code' at the
start and end of each alternative, because compiled items are discarded during
-the pre-compile phase so that the work space is not exceeded. */
+the pre-compile phase so that the workspace is not exceeded. */
length = 2 + 2*LINK_SIZE + skipunits;
@@ -7622,7 +7721,7 @@ for (;;)
{
if (cb->open_caps->flag)
{
- memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
+ (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
CU2BYTES(code - start_bracket));
*start_bracket = OP_ONCE;
code += 1 + LINK_SIZE;
@@ -7765,10 +7864,11 @@ do {
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
}
- /* Condition */
+ /* Condition. If there is no second branch, it can't be anchored. */
- else if (op == OP_COND)
+ else if (op == OP_COND || op == OP_SCOND)
{
+ if (scode[GET(scode,1)] != OP_ALT) return FALSE;
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
@@ -8003,6 +8103,7 @@ for (;;)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
@@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++)
if (crc < 0)
{
- memmove(slot + cb->name_entry_size, slot,
+ (void)memmove(slot + cb->name_entry_size, slot,
CU2BYTES((tablecount - i) * cb->name_entry_size));
break;
}
@@ -8311,6 +8412,7 @@ for (;; pptr++)
break;
case META_MARK: /* Add the length of the name. */
+ case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:
@@ -8501,6 +8603,7 @@ for (;; pptr++)
goto EXIT;
case META_MARK:
+ case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:
@@ -8572,6 +8675,32 @@ for (;; pptr++)
case META_LOOKAHEADNOT:
pptr = parsed_skip(pptr + 1, PSKIP_KET);
if (pptr == NULL) goto PARSED_SKIP_FAILED;
+
+ /* Also ignore any qualifiers that follow a lookahead assertion. */
+
+ switch (pptr[1])
+ {
+ case META_ASTERISK:
+ case META_ASTERISK_PLUS:
+ case META_ASTERISK_QUERY:
+ case META_PLUS:
+ case META_PLUS_PLUS:
+ case META_PLUS_QUERY:
+ case META_QUERY:
+ case META_QUERY_PLUS:
+ case META_QUERY_QUERY:
+ pptr++;
+ break;
+
+ case META_MINMAX:
+ case META_MINMAX_PLUS:
+ case META_MINMAX_QUERY:
+ pptr += 3;
+ break;
+
+ default:
+ break;
+ }
break;
/* Lookbehinds can be ignored, but must themselves be checked. */
@@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
break;
case META_MARK:
+ case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:
diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c
index bdf9b86..1dd5c33 100644
--- a/src/pcre2_convert.c
+++ b/src/pcre2_convert.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -1066,11 +1066,12 @@ BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
uint32_t pattype = options & TYPE_OPTIONS;
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
+
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
(pattype & (~pattype+1)) != pattype || /* More than one type set */
pattype == 0) /* No type set */
{
- *bufflenptr = 0; /* Error offset */
+ *bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_BADOPTION;
}
@@ -1081,7 +1082,11 @@ if (ccontext == NULL) ccontext =
/* Check UTF if required. */
#ifndef SUPPORT_UNICODE
-if (utf) return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
+if (utf)
+ {
+ *bufflenptr = 0; /* Error offset */
+ return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
+ }
#else
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
{
@@ -1126,6 +1131,7 @@ for (i = 0; i < 2; i++)
break;
default:
+ *bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_INTERNAL;
}
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index c6184ff..9b43237 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -181,7 +181,8 @@ static const uint8_t coptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
- 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
+ 0, 0, /* COMMIT, COMMIT_ARG */
+ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
@@ -254,7 +255,8 @@ static const uint8_t poptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
- 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
+ 0, 0, /* COMMIT, COMMIT_ARG */
+ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
@@ -292,6 +294,35 @@ typedef struct stateblock {
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
+/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
+local working space and output vectors that were created on the stack. This has
+caused issues for some patterns, especially in small-stack environments such as
+Windows. A new scheme is now in use which sets up a vector on the stack, but if
+this is too small, heap memory is used, up to the heap_limit. The main
+parameters are all numbers of ints because the workspace is a vector of ints.
+
+The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
+defined in pcre2_internal.h so as to be available to pcre2test when it is
+finding the minimum heap requirement for a match. */
+
+#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
+
+#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
+#define RWS_RSIZE 1000 /* Work size for recursion */
+#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
+#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
+
+/* This structure is at the start of each workspace block. */
+
+typedef struct RWS_anchor {
+ struct RWS_anchor *next;
+ unsigned int size; /* Number of ints */
+ unsigned int free; /* Number of ints */
+} RWS_anchor;
+
+#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
+
+
/*************************************************
* Process a callout *
@@ -354,6 +385,61 @@ return (mb->callout)(cb, mb->callout_data);
/*************************************************
+* Expand local workspace memory *
+*************************************************/
+
+/* This function is called when internal_dfa_match() is about to be called
+recursively and there is insufficient working space left in the current
+workspace block. If there's an existing next block, use it; otherwise get a new
+block unless the heap limit is reached.
+
+Arguments:
+ rwsptr pointer to block pointer (updated)
+ ovecsize space needed for an ovector
+ mb the match block
+
+Returns: 0 rwsptr has been updated
+ !0 an error code
+*/
+
+static int
+more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
+{
+RWS_anchor *rws = *rwsptr;
+RWS_anchor *new;
+
+if (rws->next != NULL)
+ {
+ new = rws->next;
+ }
+
+/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
+kibibytes. */
+
+else
+ {
+ unsigned int newsize = rws->size * 2;
+ unsigned int heapleft = (unsigned int)
+ (((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
+ if (newsize > heapleft) newsize = heapleft;
+ if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
+ return PCRE2_ERROR_HEAPLIMIT;
+ new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
+ if (new == NULL) return PCRE2_ERROR_NOMEMORY;
+ mb->heap_used += newsize;
+ new->next = NULL;
+ new->size = newsize;
+ rws->next = new;
+ }
+
+new->free = new->size - RWS_ANCHOR_SIZE;
+*rwsptr = new;
+return 0;
+}
+
+
+
+/*************************************************
* Match a Regular Expression - DFA engine *
*************************************************/
@@ -431,7 +517,8 @@ internal_dfa_match(
uint32_t offsetcount,
int *workspace,
int wscount,
- uint32_t rlevel)
+ uint32_t rlevel,
+ int *RWS)
{
stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state;
@@ -788,7 +875,7 @@ for (;;)
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
match_count = 0;
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
- if (count > 0) memmove(offsets + 2, offsets,
+ if (count > 0) (void)memmove(offsets + 2, offsets,
(size_t)count * sizeof(PCRE2_SIZE));
if (offsetcount >= 2)
{
@@ -2587,10 +2674,22 @@ for (;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
{
- PCRE2_SPTR endasscode = code + GET(code, 1);
- PCRE2_SIZE local_offsets[2];
int rc;
- int local_workspace[1000];
+ int *local_workspace;
+ PCRE2_SIZE *local_offsets;
+ PCRE2_SPTR endasscode = code + GET(code, 1);
+ RWS_anchor *rws = (RWS_anchor *)RWS;
+
+ if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+ {
+ rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+ if (rc != 0) return rc;
+ RWS = (int *)rws;
+ }
+
+ local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+ local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+ rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@@ -2600,10 +2699,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
- sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
- sizeof(local_workspace)/sizeof(int), /* size of same */
- rlevel); /* function recursion level */
+ RWS_RSIZE, /* size of same */
+ rlevel, /* function recursion level */
+ RWS); /* recursion workspace */
+
+ rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
@@ -2615,8 +2717,6 @@ for (;;)
case OP_COND:
case OP_SCOND:
{
- PCRE2_SIZE local_offsets[1000];
- int local_workspace[1000];
int codelink = (int)GET(code, 1);
PCRE2_UCHAR condcode;
@@ -2673,8 +2773,22 @@ for (;;)
else
{
int rc;
+ int *local_workspace;
+ PCRE2_SIZE *local_offsets;
PCRE2_SPTR asscode = code + LINK_SIZE + 1;
PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
+ RWS_anchor *rws = (RWS_anchor *)RWS;
+
+ if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+ {
+ rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+ if (rc != 0) return rc;
+ RWS = (int *)rws;
+ }
+
+ local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+ local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+ rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@@ -2684,10 +2798,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
- sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
- sizeof(local_workspace)/sizeof(int), /* size of same */
- rlevel); /* function recursion level */
+ RWS_RSIZE, /* size of same */
+ rlevel, /* function recursion level */
+ RWS); /* recursion workspace */
+
+ rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) ==
@@ -2702,13 +2819,25 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_RECURSE:
{
+ int rc;
+ int *local_workspace;
+ PCRE2_SIZE *local_offsets;
+ RWS_anchor *rws = (RWS_anchor *)RWS;
dfa_recursion_info *ri;
- PCRE2_SIZE local_offsets[1000];
- int local_workspace[1000];
PCRE2_SPTR callpat = start_code + GET(code, 1);
uint32_t recno = (callpat == mb->start_code)? 0 :
GET2(callpat, 1 + LINK_SIZE);
- int rc;
+
+ if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
+ {
+ rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
+ if (rc != 0) return rc;
+ RWS = (int *)rws;
+ }
+
+ local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+ local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
+ rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
/* Check for repeating a recursion without advancing the subject
pointer. This should catch convoluted mutual recursions. (Some simple
@@ -2732,11 +2861,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
- sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+ RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
- sizeof(local_workspace)/sizeof(int), /* size of same */
- rlevel); /* function recursion level */
+ RWS_RSIZE, /* size of same */
+ rlevel, /* function recursion level */
+ RWS); /* recursion workspace */
+ rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
mb->recursive = new_recursive.prevrec; /* Done this recursion */
/* Ran out of internal offsets */
@@ -2782,10 +2913,25 @@ for (;;)
case OP_SCBRAPOS:
case OP_BRAPOSZERO:
{
+ int rc;
+ int *local_workspace;
+ PCRE2_SIZE *local_offsets;
PCRE2_SIZE charcount, matched_count;
PCRE2_SPTR local_ptr = ptr;
+ RWS_anchor *rws = (RWS_anchor *)RWS;
BOOL allow_zero;
+ if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+ {
+ rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+ if (rc != 0) return rc;
+ RWS = (int *)rws;
+ }
+
+ local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+ local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+ rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
+
if (codevalue == OP_BRAPOSZERO)
{
allow_zero = TRUE;
@@ -2798,19 +2944,17 @@ for (;;)
for (matched_count = 0;; matched_count++)
{
- PCRE2_SIZE local_offsets[2];
- int local_workspace[1000];
-
- int rc = internal_dfa_match(
+ rc = internal_dfa_match(
mb, /* fixed match data */
code, /* this subexpression's code */
local_ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
- sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
- sizeof(local_workspace)/sizeof(int), /* size of same */
- rlevel); /* function recursion level */
+ RWS_RSIZE, /* size of same */
+ rlevel, /* function recursion level */
+ RWS); /* recursion workspace */
/* Failed to match */
@@ -2827,6 +2971,8 @@ for (;;)
local_ptr += charcount; /* Advance temporary position ptr */
}
+ rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
+
/* At this point we have matched the subpattern matched_count
times, and local_ptr is pointing to the character after the end of the
last match. */
@@ -2869,19 +3015,35 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ONCE:
{
- PCRE2_SIZE local_offsets[2];
- int local_workspace[1000];
+ int rc;
+ int *local_workspace;
+ PCRE2_SIZE *local_offsets;
+ RWS_anchor *rws = (RWS_anchor *)RWS;
- int rc = internal_dfa_match(
+ if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+ {
+ rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+ if (rc != 0) return rc;
+ RWS = (int *)rws;
+ }
+
+ local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+ local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+ rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
+
+ rc = internal_dfa_match(
mb, /* fixed match data */
code, /* this subexpression's code */
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
- sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
- sizeof(local_workspace)/sizeof(int), /* size of same */
- rlevel); /* function recursion level */
+ RWS_RSIZE, /* size of same */
+ rlevel, /* function recursion level */
+ RWS); /* recursion workspace */
+
+ rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc >= 0)
{
@@ -3063,6 +3225,7 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
{
+int rc;
const pcre2_real_code *re = (const pcre2_real_code *)code;
PCRE2_SPTR start_match;
@@ -3071,9 +3234,9 @@ PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR req_cu_ptr;
BOOL utf, anchored, startline, firstline;
-
BOOL has_first_cu = FALSE;
BOOL has_req_cu = FALSE;
+
PCRE2_UCHAR first_cu = 0;
PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0;
@@ -3088,6 +3251,17 @@ pcre2_callout_block cb;
dfa_match_block actual_match_block;
dfa_match_block *mb = &actual_match_block;
+/* Set up a starting block of memory for use during recursive calls to
+internal_dfa_match(). By putting this on the stack, it minimizes resource use
+in the case when it is not needed. If this is too small, more memory is
+obtained from the heap. At the start of each block is an anchor structure.*/
+
+int base_recursion_workspace[RWS_BASE_SIZE];
+RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
+rws->next = NULL;
+rws->size = RWS_BASE_SIZE;
+rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
+
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
@@ -3184,6 +3358,7 @@ if (mcontext == NULL)
mb->memctl = re->memctl;
mb->match_limit = PRIV(default_match_context).match_limit;
mb->match_limit_depth = PRIV(default_match_context).depth_limit;
+ mb->heap_limit = PRIV(default_match_context).heap_limit;
}
else
{
@@ -3198,6 +3373,7 @@ else
mb->memctl = mcontext->memctl;
mb->match_limit = mcontext->match_limit;
mb->match_limit_depth = mcontext->depth_limit;
+ mb->heap_limit = mcontext->heap_limit;
}
if (mb->match_limit > re->limit_match)
@@ -3206,6 +3382,9 @@ if (mb->match_limit > re->limit_match)
if (mb->match_limit_depth > re->limit_depth)
mb->match_limit_depth = re->limit_depth;
+if (mb->heap_limit > re->limit_heap)
+ mb->heap_limit = re->limit_heap;
+
mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_count * re->name_entry_size;
mb->tables = re->tables;
@@ -3215,6 +3394,7 @@ mb->start_offset = start_offset;
mb->moptions = options;
mb->poptions = re->overall_options;
mb->match_call_count = 0;
+mb->heap_used = 0;
/* Process the \R and newline settings. */
@@ -3351,8 +3531,6 @@ a match. */
for (;;)
{
- int rc;
-
/* ----------------- Start of match optimizations ---------------- */
/* There are some optimizations that avoid running the match if a known
@@ -3544,7 +3722,7 @@ for (;;)
in characters, we treat it as code units to avoid spending too much time
in this optimization. */
- if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH;
+ if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
/* If req_cu is set, we know that that code unit must appear in the
subject for the match to succeed. If the first code unit is set, req_cu
@@ -3621,7 +3799,8 @@ for (;;)
(uint32_t)match_data->oveccount * 2, /* actual size of same */
workspace, /* workspace vector */
(int)wscount, /* size of same */
- 0); /* function recurse level */
+ 0, /* function recurse level */
+ base_recursion_workspace); /* initial workspace for recursion */
/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */
@@ -3637,7 +3816,7 @@ for (;;)
match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc;
- return rc;
+ goto EXIT;
}
/* Advance to the next subject character unless we are at the end of a line
@@ -3668,8 +3847,18 @@ for (;;)
} /* "Bumpalong" loop */
+NOMATCH_EXIT:
+rc = PCRE2_ERROR_NOMATCH;
+
+EXIT:
+while (rws->next != NULL)
+ {
+ RWS_anchor *next = rws->next;
+ rws->next = next->next;
+ mb->memctl.free(next, mb->memctl.memory_data);
+ }
-return PCRE2_ERROR_NOMATCH;
+return rc;
}
/* End of pcre2_dfa_match.c */
diff --git a/src/pcre2_error.c b/src/pcre2_error.c
index d98cae9..4b3b3f1 100644
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -107,7 +107,7 @@ static const unsigned char compile_error_texts[] =
/* 35 */
"lookbehind is too complicated\0"
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
- "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
+ "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
"number after (?C is greater than 255\0"
"closing parenthesis for (?C expected\0"
/* 40 */
@@ -133,7 +133,8 @@ static const unsigned char compile_error_texts[] =
"internal error: unknown newline setting\0"
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
- "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
+ /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
+ "obsolete error (should not occur)\0" /* Was the above */
/* 60 */
"(*VERB) not recognized or malformed\0"
"group number is too big\0"
@@ -160,7 +161,7 @@ static const unsigned char compile_error_texts[] =
"using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
- "digits missing in \\x{} or \\o{}\0"
+ "digits missing in \\x{} or \\o{} or \\N{U+}\0"
"syntax error or number too big in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
@@ -178,6 +179,8 @@ static const unsigned char compile_error_texts[] =
"internal error: bad code value in parsed_skip()\0"
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
"invalid option bits with PCRE2_LITERAL\0"
+ "\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
+ "invalid hyphen in option setting\0"
;
/* Match-time and UTF error texts are in the same format. */
@@ -255,11 +258,13 @@ static const unsigned char match_error_texts[] =
"expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0"
/* 60 */
- "match with end before start is not supported\0"
+ "match with end before start or start moved backwards is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
"heap limit exceeded\0"
"invalid syntax\0"
+ /* 65 */
+ "internal error - duplicate substitution match\0"
;
diff --git a/src/pcre2_extuni.c b/src/pcre2_extuni.c
index 11a0bfb..237211a 100644
--- a/src/pcre2_extuni.c
+++ b/src/pcre2_extuni.c
@@ -129,11 +129,11 @@ while (eptr < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
+ /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
+ allows any number of them before a following Extended_Pictographic. */
- if (rgb != ucp_gbExtend ||
- (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
+ lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
eptr += len;
diff --git a/src/pcre2_find_bracket.c b/src/pcre2_find_bracket.c
index 357385a..70baa13 100644
--- a/src/pcre2_find_bracket.c
+++ b/src/pcre2_find_bracket.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -131,6 +131,7 @@ for (;;)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index 3db9d60..8750f2f 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -165,6 +165,16 @@ by "configure". */
#define INT64_OR_DOUBLE double
#endif
+/* External (in the C sense) functions and tables that are private to the
+libraries are always referenced using the PRIV macro. This makes it possible
+for pcre2test.c to include some of the source files from the libraries using a
+different PRIV definition to avoid name clashes. It also makes it clear in the
+code that a non-static object is being referenced. */
+
+#ifndef PRIV
+#define PRIV(name) _pcre2_##name
+#endif
+
/* When compiling for use with the Virtual Pascal compiler, these functions
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
option on the command line. */
@@ -178,50 +188,15 @@ option on the command line. */
#define memset(s,c,n) _memset(s,c,n)
#else /* VPCOMPAT */
-/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
-define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
-is set. Otherwise, include an emulating function for those systems that have
-neither (there some non-Unix environments where this is the case). */
+/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
+a macro that calls an emulating function. */
#ifndef HAVE_MEMMOVE
-#undef memmove /* some systems may have a macro */
-#ifdef HAVE_BCOPY
-#define memmove(a, b, c) bcopy(b, a, c)
-#else /* HAVE_BCOPY */
-static void *
-pcre2_memmove(void *d, const void *s, size_t n)
-{
-size_t i;
-unsigned char *dest = (unsigned char *)d;
-const unsigned char *src = (const unsigned char *)s;
-if (dest > src)
- {
- dest += n;
- src += n;
- for (i = 0; i < n; ++i) *(--dest) = *(--src);
- return (void *)dest;
- }
-else
- {
- for (i = 0; i < n; ++i) *dest++ = *src++;
- return (void *)(dest - n);
- }
-}
-#define memmove(a, b, c) pcre2_memmove(a, b, c)
-#endif /* not HAVE_BCOPY */
+#undef memmove /* Some systems may have a macro */
+#define memmove(a, b, c) PRIV(memmove)(a, b, c)
#endif /* not HAVE_MEMMOVE */
#endif /* not VPCOMPAT */
-/* External (in the C sense) functions and tables that are private to the
-libraries are always referenced using the PRIV macro. This makes it possible
-for pcre2test.c to include some of the source files from the libraries using a
-different PRIV definition to avoid name clashes. It also makes it clear in the
-code that a non-static object is being referenced. */
-
-#ifndef PRIV
-#define PRIV(name) _pcre2_##name
-#endif
-
/* This is an unsigned int value that no UTF character can ever have, as
Unicode doesn't go beyond 0x0010ffff. */
@@ -247,12 +222,17 @@ not rely on this. */
pcre2_match() is allocated on the system stack, of this size (bytes). The size
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
-on the number of capturing parentheses) so 20K handles quite a few frames. A
+on the number of capturing parentheses) so 20KiB handles quite a few frames. A
larger vector on the heap is obtained for patterns that need more frames. The
maximum size of this can be limited. */
#define START_FRAMES_SIZE 20480
+/* Similarly, for DFA matching, an initial internal workspace vector is
+allocated on the stack. */
+
+#define DFA_START_RWS_SIZE 30720
+
/* Define the default BSR convention. */
#ifdef BSR_ANYCRLF
@@ -585,14 +565,15 @@ these tables. */
#define cbit_cntrl 288 /* [:cntrl:] */
#define cbit_length 320 /* Length of the cbits table */
-/* Bit definitions for entries in the ctypes table. */
+/* Bit definitions for entries in the ctypes table. Do not change these values
+without checking pcre2_jit_compile.c, which has an assertion to ensure that
+ctype_word has the value 16. */
#define ctype_space 0x01
#define ctype_letter 0x02
#define ctype_digit 0x04
-#define ctype_xdigit 0x08
+#define ctype_xdigit 0x08 /* not actually used any more */
#define ctype_word 0x10 /* alphanumeric or '_' */
-#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
/* Offsets of the various tables from the base tables pointer, and
total length of the tables. */
@@ -1267,36 +1248,6 @@ contain characters with values greater than 255. */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
-/* Escape items that are just an encoding of a particular data value. These
-appear in the escapes[] table in pcre2_compile.c as positive numbers. */
-
-#ifndef ESC_a
-#define ESC_a CHAR_BEL
-#endif
-
-#ifndef ESC_e
-#define ESC_e CHAR_ESC
-#endif
-
-#ifndef ESC_f
-#define ESC_f CHAR_FF
-#endif
-
-#ifndef ESC_n
-#define ESC_n CHAR_LF
-#endif
-
-#ifndef ESC_r
-#define ESC_r CHAR_CR
-#endif
-
-/* We can't officially use ESC_t because it is a POSIX reserved identifier
-(presumably because of all the others like size_t). */
-
-#ifndef ESC_tee
-#define ESC_tee CHAR_HT
-#endif
-
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0
for a data character. In the escapes[] table in pcre2_compile.c their values
@@ -1578,23 +1529,26 @@ enum {
OP_THEN, /* 155 */
OP_THEN_ARG, /* 156 same, but with argument */
OP_COMMIT, /* 157 */
+ OP_COMMIT_ARG, /* 158 same, but with argument */
- /* These are forced failure and success verbs */
+ /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
+ argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
+ without the need for a special opcode. */
- OP_FAIL, /* 158 */
- OP_ACCEPT, /* 159 */
- OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
- OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 159 */
+ OP_ACCEPT, /* 160 */
+ OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
+ OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 162 */
+ OP_SKIPZERO, /* 163 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
- OP_DEFINE, /* 163 */
+ OP_DEFINE, /* 164 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1650,7 +1604,7 @@ some cases doesn't actually use these names at all). */
"Cond false", "Cond true", \
"Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
- "*THEN", "*THEN", "*COMMIT", "*FAIL", \
+ "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \
"*ACCEPT", "*ASSERT_ACCEPT", \
"Close", "Skip zero", "Define"
@@ -1742,7 +1696,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
1, 3, /* SKIP, SKIP_ARG */ \
1, 3, /* THEN, THEN_ARG */ \
- 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
+ 1, 3, /* COMMIT, COMMIT_ARG */ \
+ 1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
1 /* DEFINE */
@@ -1896,7 +1851,7 @@ extern const ucd_record PRIV(ucd_records)[];
#if PCRE2_CODE_UNIT_WIDTH == 32
extern const ucd_record PRIV(dummy_ucd_record)[];
#endif
-extern const uint8_t PRIV(ucd_stage1)[];
+extern const uint16_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[];
extern const uint32_t PRIV(ucp_gentype)[];
@@ -1976,6 +1931,14 @@ extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
uint32_t *, BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
+
+/* This function is needed only when memmove() is not available. */
+
+#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
+#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
+extern void * _pcre2_memmove(void *, const void *, size_t);
+#endif
+
#endif /* PCRE2_CODE_UNIT_WIDTH */
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
index c4c4c3a..62626d0 100644
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@@ -793,11 +793,23 @@ typedef struct heapframe {
uint8_t return_id; /* Where to go on in internal "return" */
uint8_t op; /* Processing opcode */
+ /* At this point, the structure is 16-bit aligned. On most architectures
+ the alignment requirement for a pointer will ensure that the eptr field below
+ is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
+ that is 16-bit aligned. We must therefore ensure that what comes between here
+ and eptr is an odd multiple of 16 bits so as to get back into 32-bit
+ alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
+ fudges in the other cases. In the 32-bit case the padding comes first so that
+ the occu field itself is 32-bit aligned. Without the padding, this structure
+ is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
+
#if PCRE2_CODE_UNIT_WIDTH == 8
PCRE2_UCHAR occu[6]; /* Used for other case code units */
#elif PCRE2_CODE_UNIT_WIDTH == 16
PCRE2_UCHAR occu[2]; /* Used for other case code units */
+ uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
#else
+ uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
PCRE2_UCHAR occu[1]; /* Used for other case code units */
#endif
@@ -818,6 +830,9 @@ typedef struct heapframe {
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
} heapframe;
+/* This typedef is a check that the size of the heapframe structure is a
+multiple of PCRE2_SIZE. See various comments above. */
+
typedef char check_heapframe_size[
((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
@@ -881,6 +896,8 @@ typedef struct dfa_match_block {
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
+ PCRE2_SIZE heap_limit; /* As it says */
+ PCRE2_SIZE heap_used; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */
uint32_t match_call_count; /* Number of calls of internal function */
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 80ed1c4..32e985b 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -839,6 +839,7 @@ switch(*cc)
#endif
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
@@ -939,6 +940,7 @@ while (cc < ccend)
common->control_head_ptr = 1;
/* Fall through. */
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_MARK:
if (common->mark_ptr == 0)
@@ -1553,6 +1555,7 @@ while (cc < ccend)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@@ -1733,6 +1736,7 @@ while (cc < ccend)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@@ -2041,6 +2045,7 @@ while (cc < ccend)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@@ -2428,6 +2433,7 @@ while (cc < ccend)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
@@ -3666,7 +3672,8 @@ if (!common->utf)
#endif
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
-OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@@ -5894,6 +5901,8 @@ for (i = 0; i < 32; i++)
}
}
+if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
+
i = 0;
j = 0;
@@ -6627,7 +6636,8 @@ if (needstype || needsscript)
#endif
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
- OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+ OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@@ -7254,10 +7264,11 @@ while (cc < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
+ /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
+ allows any number of them before a following Extended_Pictographic. */
- if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
+ lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
prevcc = cc;
@@ -7309,10 +7320,11 @@ while (cc < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
- /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
- any number of Extend before a following E_Modifier. */
+ /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
+ allows any number of them before a following Extended_Pictographic. */
- if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
+ lgb != ucp_gbExtended_Pictographic)
lgb = rgb;
cc++;
@@ -10346,7 +10358,8 @@ backtrack_common *backtrack;
PCRE2_UCHAR opcode = *cc;
PCRE2_SPTR ccend = cc + 1;
-if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
+if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
+ opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
ccend += 2 + cc[1];
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@@ -10358,7 +10371,7 @@ if (opcode == OP_SKIP)
return ccend;
}
-if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
+if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
{
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
@@ -10677,6 +10690,7 @@ while (cc < ccend)
case OP_THEN:
case OP_THEN_ARG:
case OP_COMMIT:
+ case OP_COMMIT_ARG:
cc = compile_control_verb_matchingpath(common, cc, parent);
break;
@@ -11751,6 +11765,7 @@ while (current)
break;
case OP_COMMIT:
+ case OP_COMMIT_ARG:
if (!common->local_quit_available)
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
if (common->quit_label == NULL)
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index d9916b7..a28e9a0 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -1331,8 +1331,9 @@ static int regression_tests(void)
ovector8_2[i] = -2;
}
if (re8) {
+ (void)pcre2_set_match_limit_8(mcontext8, 10000000);
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
- current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1375,8 +1376,9 @@ static int regression_tests(void)
else
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
+ (void)pcre2_set_match_limit_16(mcontext16, 10000000);
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
- current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1419,8 +1421,9 @@ static int regression_tests(void)
else
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
+ (void)pcre2_set_match_limit_32(mcontext32, 10000000);
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
- current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c
index 2c7ae84..537edba 100644
--- a/src/pcre2_maketables.c
+++ b/src/pcre2_maketables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -141,13 +141,6 @@ for (i = 0; i < 256; i++)
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word;
-
- /* Note: strchr includes the terminating zero in the characters it considers.
- In this instance, that is ok because we want binary zero to be flagged as a
- meta-character, which in this sense is any character that terminates a run
- of data characters. */
-
- if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 79cc93f..8741e14 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -43,11 +43,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
-/* These defines enables debugging code */
+/* These defines enable debugging code */
-//#define DEBUG_FRAMES_DISPLAY
-//#define DEBUG_SHOW_OPS
-//#define DEBUG_SHOW_RMATCH
+/* #define DEBUG_FRAMES_DISPLAY */
+/* #define DEBUG_SHOW_OPS */
+/* #define DEBUG_SHOW_RMATCH */
#ifdef DEBUG_FRAME_DISPLAY
#include <stdarg.h>
@@ -149,7 +149,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */
enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
- RM31, RM32, RM33, RM34, RM35 };
+ RM31, RM32, RM33, RM34, RM35, RM36 };
#ifdef SUPPORT_WIDE_CHARS
enum { RM100=100, RM101 };
@@ -770,7 +770,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
/* Real or forced end of the pattern, assertion, or recursion. In an
assertion ACCEPT, update the last used pointer and remember the current
- frame so that the captures can be fished out of it. */
+ frame so that the captures and mark can be fished out of it. */
case OP_ASSERT_ACCEPT:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
@@ -1776,7 +1776,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
- /* Match a bit-mapped character class, possibly repeatedly. These op codes
+ /* Match a bit-mapped character class, possibly repeatedly. These opcodes
are used when all the characters in the class have values in the range
0-255, and either the matching is caseful, or the characters are in the
range 0-127 when UTF processing is enabled. The only difference between
@@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
+
for (;;)
{
RMATCH(Fecode, RM201);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
BACKCHAR(Feptr);
}
}
@@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
+
for(;;)
{
RMATCH(Fecode, RM101);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
#ifdef SUPPORT_UNICODE
if (utf) BACKCHAR(Feptr);
#endif
@@ -2456,7 +2464,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
/* Match a single character type repeatedly. Note that the property type
- does not need to be in a stack frame as it not used within an RMATCH()
+ does not need to be in a stack frame as it is not used within an RMATCH()
loop. */
#define Lstart_eptr F->temp_sptr[0]
@@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
- Unicode character. Use <= pp to ensure backtracking doesn't go too far.
- */
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
for(;;)
{
@@ -4135,7 +4143,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
- /* The "byte" (i.e. "code unit") case is the same as non-UTF */
+ /* The "byte" (i.e. "code unit") case is the same as non-UTF */
case OP_ANYBYTE:
fc = Lmax - Lmin;
@@ -5111,7 +5119,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* Positive assertions are like other groups except that PCRE doesn't allow
the effect of (*THEN) to escape beyond an assertion; it is therefore
treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
- captures retained. Any other return is an error. */
+ captures and mark retained. Any other return is an error. */
#define Lframe_type F->temp_32[0]
@@ -5128,6 +5136,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
(char *)assert_accept_frame + offsetof(heapframe, ovector),
assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
Foffset_top = assert_accept_frame->offset_top;
+ Fmark = assert_accept_frame->mark;
break;
}
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
@@ -5416,7 +5425,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
Feptr -= number;
}
- /* Save the earliest consulted character, then skip to next op code */
+ /* Save the earliest consulted character, then skip to next opcode */
if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
Fecode += 1 + LINK_SIZE;
@@ -5501,7 +5510,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
frame so that it points to the final branch. */
case OP_ONCE:
- Fback_frame = ((char *)F - (char *)P) + frame_size;
+ Fback_frame = ((char *)F - (char *)P);
for (;;)
{
uint32_t y = GET(P->ecode,1);
@@ -5829,6 +5838,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
mb->verb_current_recurse = Fcurrent_recurse;
RRETURN(MATCH_COMMIT);
+ case OP_COMMIT_ARG:
+ Fmark = mb->nomatch_mark = Fecode + 2;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_COMMIT);
+
case OP_PRUNE:
RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -5921,7 +5937,7 @@ in rrc. */
RETURN_SWITCH:
if (Frdepth == 0) return rrc; /* Exit from the top level */
-F = (heapframe *)((char *)F - Fback_frame); /* Back track */
+F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
#ifdef DEBUG_SHOW_RMATCH
@@ -5934,7 +5950,7 @@ switch (Freturn_id)
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
- LBL(33) LBL(34) LBL(35)
+ LBL(33) LBL(34) LBL(35) LBL(36)
#ifdef SUPPORT_WIDE_CHARS
LBL(100) LBL(101)
@@ -6275,7 +6291,7 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
/* If a pattern has very many capturing parentheses, the frame size may be very
large. Ensure that there are at least 10 available frames by getting an initial
vector on the heap if necessary, except when the heap limit prevents this. Get
-fewer if possible. (The heap limit is in kilobytes.) */
+fewer if possible. (The heap limit is in kibibytes.) */
if (frame_size <= START_FRAMES_SIZE/10)
{
diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c
index 906e919..a29f5ef 100644
--- a/src/pcre2_pattern_info.c
+++ b/src/pcre2_pattern_info.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -390,6 +390,7 @@ while (TRUE)
#endif
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c
index e4dd53f..bd10c6b 100644
--- a/src/pcre2_printint.c
+++ b/src/pcre2_printint.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -799,6 +799,7 @@ for(;;)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
diff --git a/src/pcre2_serialize.c b/src/pcre2_serialize.c
index d2cc603..cec1a03 100644
--- a/src/pcre2_serialize.c
+++ b/src/pcre2_serialize.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -127,7 +127,25 @@ dst_bytes += tables_length;
for (i = 0; i < number_of_codes; i++)
{
re = (const pcre2_real_code *)(codes[i]);
- memcpy(dst_bytes, (char *)re, re->blocksize);
+ (void)memcpy(dst_bytes, (char *)re, re->blocksize);
+
+ /* Certain fields in the compiled code block are re-set during
+ deserialization. In order to ensure that the serialized data stream is always
+ the same for the same pattern, set them to zero here. We can't assume the
+ copy of the pattern is correctly aligned for accessing the fields as part of
+ a structure. Note the use of sizeof(void *) in the second of these, to
+ specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
+ pointer to uint8_t), gcc gives a warning because the first argument is also a
+ pointer to uint8_t. Casting the first argument to (void *) can stop this, but
+ it didn't stop Coverity giving the same complaint. */
+
+ (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
+ sizeof(pcre2_memctl));
+ (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
+ sizeof(void *));
+ (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
+ sizeof(void *));
+
dst_bytes += re->blocksize;
}
diff --git a/src/pcre2_string_utils.c b/src/pcre2_string_utils.c
index 2a1f282..d6be01a 100644
--- a/src/pcre2_string_utils.c
+++ b/src/pcre2_string_utils.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,42 @@ functions work only on 8-bit data. */
/*************************************************
+* Emulated memmove() for systems without it *
+*************************************************/
+
+/* This function can make use of bcopy() if it is available. Otherwise do it by
+steam, as there some non-Unix environments that lack both memmove() and
+bcopy(). */
+
+#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
+void *
+PRIV(memmove)(void *d, const void *s, size_t n)
+{
+#ifdef HAVE_BCOPY
+bcopy(s, d, n);
+return d;
+#else
+size_t i;
+unsigned char *dest = (unsigned char *)d;
+const unsigned char *src = (const unsigned char *)s;
+if (dest > src)
+ {
+ dest += n;
+ src += n;
+ for (i = 0; i < n; ++i) *(--dest) = *(--src);
+ return (void *)dest;
+ }
+else
+ {
+ for (i = 0; i < n; ++i) *dest++ = *src++;
+ return (void *)(dest - n);
+ }
+#endif /* not HAVE_BCOPY */
+}
+#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
+
+
+/*************************************************
* Compare two zero-terminated PCRE2 strings *
*************************************************/
diff --git a/src/pcre2_study.c b/src/pcre2_study.c
index b926867..acbf98b 100644
--- a/src/pcre2_study.c
+++ b/src/pcre2_study.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -707,6 +707,7 @@ for (;;)
/* Skip these, but we need to add in the name length. */
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
@@ -956,6 +957,7 @@ do
case OP_CIRCM:
case OP_CLOSE:
case OP_COMMIT:
+ case OP_COMMIT_ARG:
case OP_COND:
case OP_CREF:
case OP_FALSE:
@@ -1274,7 +1276,7 @@ do
break;
/* Single character types set the bits and stop. Note that if PCRE2_UCP
- is set, we do not see these op codes because \d etc are converted to
+ is set, we do not see these opcodes because \d etc are converted to
properties. Therefore, these apply in the case when only characters less
than 256 are recognized to match the types. */
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 8da951f..ab8d109 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -238,10 +238,12 @@ PCRE2_SPTR repend;
PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;
+PCRE2_SIZE ovecsave[3];
buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
+ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
/* Partial matching is not valid. */
@@ -361,13 +363,33 @@ do
}
/* Handle a successful match. Matches that use \K to end before they start
- are not supported. */
-
- if (ovector[1] < ovector[0])
+ or start before the current point in the subject are not supported. */
+
+ if (ovector[1] < ovector[0] || ovector[0] < start_offset)
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
+
+ /* Check for the same match as previous. This is legitimate after matching an
+ empty string that starts after the initial match offset. We have tried again
+ at the match point in case the pattern is one like /(?<=\G.)/ which can never
+ match at its starting point, so running the match achieves the bumpalong. If
+ we do get the same (null) match at the original match point, it isn't such a
+ pattern, so we now do the empty string magic. In all other cases, a repeat
+ match should never occur. */
+
+ if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
+ {
+ if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
+ {
+ goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ ovecsave[2] = start_offset;
+ continue; /* Back to the top of the loop */
+ }
+ rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
+ goto EXIT;
+ }
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
@@ -799,13 +821,18 @@ do
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
- /* The replacement has been copied to the output. Update the start offset to
- point to the rest of the subject string. If we matched an empty string,
- do the magic for global matches. */
-
- start_offset = ovector[1];
- goptions = (ovector[0] != ovector[1])? 0 :
+ /* The replacement has been copied to the output. Save the details of this
+ match. See above for how this data is used. If we matched an empty string, do
+ the magic for global matches. Finally, update the start offset to point to
+ the rest of the subject string. */
+
+ ovecsave[0] = ovector[0];
+ ovecsave[1] = ovector[1];
+ ovecsave[2] = start_offset;
+
+ goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
+ start_offset = ovector[1];
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
/* Copy the rest of the subject. */
diff --git a/src/pcre2_tables.c b/src/pcre2_tables.c
index 9f8dc29..83d6f9d 100644
--- a/src/pcre2_tables.c
+++ b/src/pcre2_tables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = {
/* This table encodes the rules for finding the end of an extended grapheme
cluster. Every code point has a grapheme break property which is one of the
-ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
-the properties of two adjacent code points. The left property selects a word
-from the table, and the right property selects a bit from that word like this:
+ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
+10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
+code points. The left property selects a word from the table, and the right
+property selects a bit from that word like this:
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
@@ -166,49 +167,41 @@ are implementing).
6. Do not break after Prepend characters.
-7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
- by E_Modifier). Extend characters are allowed before the modifier; this
- cannot be represented in this table, the code has to deal with it.
+7. Do not break within emoji modifier sequences or emoji zwj sequences. That
+ is, do not break between characters with the Extended_Pictographic property.
+ Extend and ZWJ characters are allowed between the characters; this cannot be
+ represented in this table, the code has to deal with it.
-8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or
- E_Base_GAZ).
-
-9. Do not break within emoji flag sequences. That is, do not break between
+8. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) symbols if there are an odd number of RI characters
before the break point. This table encodes "join RI characters"; the code
has to deal with checking for previous adjoining RIs.
-10. Otherwise, break everywhere.
+9. Otherwise, break everywhere.
*/
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
const uint32_t PRIV(ucp_gbtable)[] = {
- (1<<ucp_gbLF), /* 0 CR */
- 0, /* 1 LF */
- 0, /* 2 Control */
- ESZ, /* 3 Extend */
- ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
+ (1<<ucp_gbLF), /* 0 CR */
+ 0, /* 1 LF */
+ 0, /* 2 Control */
+ ESZ, /* 3 Extend */
+ ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
(1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
- (1<<ucp_gbRegionalIndicator)|
- (1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
- (1<<ucp_gbE_Base_GAZ)|
- (1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
- ESZ, /* 5 SpacingMark */
- ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
+ (1<<ucp_gbRegionalIndicator),
+ ESZ, /* 5 SpacingMark */
+ ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
(1<<ucp_gbLVT),
- ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
- ESZ|(1<<ucp_gbT), /* 8 T */
- ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
- ESZ|(1<<ucp_gbT), /* 10 LVT */
- (1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
- ESZ, /* 12 Other */
- ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */
- ESZ, /* 14 E_Modifier */
- ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
- ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
- ESZ /* 12 Glue_After_Zwj */
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
+ ESZ|(1<<ucp_gbT), /* 8 T */
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
+ ESZ|(1<<ucp_gbT), /* 10 LVT */
+ (1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
+ ESZ, /* 12 Other */
+ ESZ, /* 13 ZWJ */
+ ESZ|(1<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */
};
#undef ESZ
@@ -282,6 +275,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
@@ -292,9 +286,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
+#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
#define STRING_Han0 STR_H STR_a STR_n "\0"
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
+#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
@@ -330,6 +326,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
#define STRING_M0 STR_M "\0"
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
+#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0"
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
@@ -337,6 +334,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
+#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
@@ -364,6 +362,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
+#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
@@ -397,6 +396,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Sk0 STR_S STR_k "\0"
#define STRING_Sm0 STR_S STR_m "\0"
#define STRING_So0 STR_S STR_o "\0"
+#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
@@ -469,6 +469,7 @@ const char PRIV(utt_names)[] =
STRING_Cyrillic0
STRING_Deseret0
STRING_Devanagari0
+ STRING_Dogra0
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
STRING_Elbasan0
@@ -479,9 +480,11 @@ const char PRIV(utt_names)[] =
STRING_Grantha0
STRING_Greek0
STRING_Gujarati0
+ STRING_Gunjala_Gondi0
STRING_Gurmukhi0
STRING_Han0
STRING_Hangul0
+ STRING_Hanifi_Rohingya0
STRING_Hanunoo0
STRING_Hatran0
STRING_Hebrew0
@@ -517,6 +520,7 @@ const char PRIV(utt_names)[] =
STRING_Lydian0
STRING_M0
STRING_Mahajani0
+ STRING_Makasar0
STRING_Malayalam0
STRING_Mandaic0
STRING_Manichaean0
@@ -524,6 +528,7 @@ const char PRIV(utt_names)[] =
STRING_Masaram_Gondi0
STRING_Mc0
STRING_Me0
+ STRING_Medefaidrin0
STRING_Meetei_Mayek0
STRING_Mende_Kikakui0
STRING_Meroitic_Cursive0
@@ -551,6 +556,7 @@ const char PRIV(utt_names)[] =
STRING_Old_North_Arabian0
STRING_Old_Permic0
STRING_Old_Persian0
+ STRING_Old_Sogdian0
STRING_Old_South_Arabian0
STRING_Old_Turkic0
STRING_Oriya0
@@ -584,6 +590,7 @@ const char PRIV(utt_names)[] =
STRING_Sk0
STRING_Sm0
STRING_So0
+ STRING_Sogdian0
STRING_Sora_Sompeng0
STRING_Soyombo0
STRING_Sundanese0
@@ -656,154 +663,161 @@ const ucp_type_table PRIV(utt)[] = {
{ 265, PT_SC, ucp_Cyrillic },
{ 274, PT_SC, ucp_Deseret },
{ 282, PT_SC, ucp_Devanagari },
- { 293, PT_SC, ucp_Duployan },
- { 302, PT_SC, ucp_Egyptian_Hieroglyphs },
- { 323, PT_SC, ucp_Elbasan },
- { 331, PT_SC, ucp_Ethiopic },
- { 340, PT_SC, ucp_Georgian },
- { 349, PT_SC, ucp_Glagolitic },
- { 360, PT_SC, ucp_Gothic },
- { 367, PT_SC, ucp_Grantha },
- { 375, PT_SC, ucp_Greek },
- { 381, PT_SC, ucp_Gujarati },
- { 390, PT_SC, ucp_Gurmukhi },
- { 399, PT_SC, ucp_Han },
- { 403, PT_SC, ucp_Hangul },
- { 410, PT_SC, ucp_Hanunoo },
- { 418, PT_SC, ucp_Hatran },
- { 425, PT_SC, ucp_Hebrew },
- { 432, PT_SC, ucp_Hiragana },
- { 441, PT_SC, ucp_Imperial_Aramaic },
- { 458, PT_SC, ucp_Inherited },
- { 468, PT_SC, ucp_Inscriptional_Pahlavi },
- { 490, PT_SC, ucp_Inscriptional_Parthian },
- { 513, PT_SC, ucp_Javanese },
- { 522, PT_SC, ucp_Kaithi },
- { 529, PT_SC, ucp_Kannada },
- { 537, PT_SC, ucp_Katakana },
- { 546, PT_SC, ucp_Kayah_Li },
- { 555, PT_SC, ucp_Kharoshthi },
- { 566, PT_SC, ucp_Khmer },
- { 572, PT_SC, ucp_Khojki },
- { 579, PT_SC, ucp_Khudawadi },
- { 589, PT_GC, ucp_L },
- { 591, PT_LAMP, 0 },
- { 594, PT_SC, ucp_Lao },
- { 598, PT_SC, ucp_Latin },
- { 604, PT_SC, ucp_Lepcha },
- { 611, PT_SC, ucp_Limbu },
- { 617, PT_SC, ucp_Linear_A },
- { 626, PT_SC, ucp_Linear_B },
- { 635, PT_SC, ucp_Lisu },
- { 640, PT_PC, ucp_Ll },
- { 643, PT_PC, ucp_Lm },
- { 646, PT_PC, ucp_Lo },
- { 649, PT_PC, ucp_Lt },
- { 652, PT_PC, ucp_Lu },
- { 655, PT_SC, ucp_Lycian },
- { 662, PT_SC, ucp_Lydian },
- { 669, PT_GC, ucp_M },
- { 671, PT_SC, ucp_Mahajani },
- { 680, PT_SC, ucp_Malayalam },
- { 690, PT_SC, ucp_Mandaic },
- { 698, PT_SC, ucp_Manichaean },
- { 709, PT_SC, ucp_Marchen },
- { 717, PT_SC, ucp_Masaram_Gondi },
- { 731, PT_PC, ucp_Mc },
- { 734, PT_PC, ucp_Me },
- { 737, PT_SC, ucp_Meetei_Mayek },
- { 750, PT_SC, ucp_Mende_Kikakui },
- { 764, PT_SC, ucp_Meroitic_Cursive },
- { 781, PT_SC, ucp_Meroitic_Hieroglyphs },
- { 802, PT_SC, ucp_Miao },
- { 807, PT_PC, ucp_Mn },
- { 810, PT_SC, ucp_Modi },
- { 815, PT_SC, ucp_Mongolian },
- { 825, PT_SC, ucp_Mro },
- { 829, PT_SC, ucp_Multani },
- { 837, PT_SC, ucp_Myanmar },
- { 845, PT_GC, ucp_N },
- { 847, PT_SC, ucp_Nabataean },
- { 857, PT_PC, ucp_Nd },
- { 860, PT_SC, ucp_New_Tai_Lue },
- { 872, PT_SC, ucp_Newa },
- { 877, PT_SC, ucp_Nko },
- { 881, PT_PC, ucp_Nl },
- { 884, PT_PC, ucp_No },
- { 887, PT_SC, ucp_Nushu },
- { 893, PT_SC, ucp_Ogham },
- { 899, PT_SC, ucp_Ol_Chiki },
- { 908, PT_SC, ucp_Old_Hungarian },
- { 922, PT_SC, ucp_Old_Italic },
- { 933, PT_SC, ucp_Old_North_Arabian },
- { 951, PT_SC, ucp_Old_Permic },
- { 962, PT_SC, ucp_Old_Persian },
- { 974, PT_SC, ucp_Old_South_Arabian },
- { 992, PT_SC, ucp_Old_Turkic },
- { 1003, PT_SC, ucp_Oriya },
- { 1009, PT_SC, ucp_Osage },
- { 1015, PT_SC, ucp_Osmanya },
- { 1023, PT_GC, ucp_P },
- { 1025, PT_SC, ucp_Pahawh_Hmong },
- { 1038, PT_SC, ucp_Palmyrene },
- { 1048, PT_SC, ucp_Pau_Cin_Hau },
- { 1060, PT_PC, ucp_Pc },
- { 1063, PT_PC, ucp_Pd },
- { 1066, PT_PC, ucp_Pe },
- { 1069, PT_PC, ucp_Pf },
- { 1072, PT_SC, ucp_Phags_Pa },
- { 1081, PT_SC, ucp_Phoenician },
- { 1092, PT_PC, ucp_Pi },
- { 1095, PT_PC, ucp_Po },
- { 1098, PT_PC, ucp_Ps },
- { 1101, PT_SC, ucp_Psalter_Pahlavi },
- { 1117, PT_SC, ucp_Rejang },
- { 1124, PT_SC, ucp_Runic },
- { 1130, PT_GC, ucp_S },
- { 1132, PT_SC, ucp_Samaritan },
- { 1142, PT_SC, ucp_Saurashtra },
- { 1153, PT_PC, ucp_Sc },
- { 1156, PT_SC, ucp_Sharada },
- { 1164, PT_SC, ucp_Shavian },
- { 1172, PT_SC, ucp_Siddham },
- { 1180, PT_SC, ucp_SignWriting },
- { 1192, PT_SC, ucp_Sinhala },
- { 1200, PT_PC, ucp_Sk },
- { 1203, PT_PC, ucp_Sm },
- { 1206, PT_PC, ucp_So },
- { 1209, PT_SC, ucp_Sora_Sompeng },
- { 1222, PT_SC, ucp_Soyombo },
- { 1230, PT_SC, ucp_Sundanese },
- { 1240, PT_SC, ucp_Syloti_Nagri },
- { 1253, PT_SC, ucp_Syriac },
- { 1260, PT_SC, ucp_Tagalog },
- { 1268, PT_SC, ucp_Tagbanwa },
- { 1277, PT_SC, ucp_Tai_Le },
- { 1284, PT_SC, ucp_Tai_Tham },
- { 1293, PT_SC, ucp_Tai_Viet },
- { 1302, PT_SC, ucp_Takri },
- { 1308, PT_SC, ucp_Tamil },
- { 1314, PT_SC, ucp_Tangut },
- { 1321, PT_SC, ucp_Telugu },
- { 1328, PT_SC, ucp_Thaana },
- { 1335, PT_SC, ucp_Thai },
- { 1340, PT_SC, ucp_Tibetan },
- { 1348, PT_SC, ucp_Tifinagh },
- { 1357, PT_SC, ucp_Tirhuta },
- { 1365, PT_SC, ucp_Ugaritic },
- { 1374, PT_SC, ucp_Vai },
- { 1378, PT_SC, ucp_Warang_Citi },
- { 1390, PT_ALNUM, 0 },
- { 1394, PT_PXSPACE, 0 },
- { 1398, PT_SPACE, 0 },
- { 1402, PT_UCNC, 0 },
- { 1406, PT_WORD, 0 },
- { 1410, PT_SC, ucp_Yi },
- { 1413, PT_GC, ucp_Z },
- { 1415, PT_SC, ucp_Zanabazar_Square },
- { 1432, PT_PC, ucp_Zl },
- { 1435, PT_PC, ucp_Zp },
- { 1438, PT_PC, ucp_Zs }
+ { 293, PT_SC, ucp_Dogra },
+ { 299, PT_SC, ucp_Duployan },
+ { 308, PT_SC, ucp_Egyptian_Hieroglyphs },
+ { 329, PT_SC, ucp_Elbasan },
+ { 337, PT_SC, ucp_Ethiopic },
+ { 346, PT_SC, ucp_Georgian },
+ { 355, PT_SC, ucp_Glagolitic },
+ { 366, PT_SC, ucp_Gothic },
+ { 373, PT_SC, ucp_Grantha },
+ { 381, PT_SC, ucp_Greek },
+ { 387, PT_SC, ucp_Gujarati },
+ { 396, PT_SC, ucp_Gunjala_Gondi },
+ { 410, PT_SC, ucp_Gurmukhi },
+ { 419, PT_SC, ucp_Han },
+ { 423, PT_SC, ucp_Hangul },
+ { 430, PT_SC, ucp_Hanifi_Rohingya },
+ { 446, PT_SC, ucp_Hanunoo },
+ { 454, PT_SC, ucp_Hatran },
+ { 461, PT_SC, ucp_Hebrew },
+ { 468, PT_SC, ucp_Hiragana },
+ { 477, PT_SC, ucp_Imperial_Aramaic },
+ { 494, PT_SC, ucp_Inherited },
+ { 504, PT_SC, ucp_Inscriptional_Pahlavi },
+ { 526, PT_SC, ucp_Inscriptional_Parthian },
+ { 549, PT_SC, ucp_Javanese },
+ { 558, PT_SC, ucp_Kaithi },
+ { 565, PT_SC, ucp_Kannada },
+ { 573, PT_SC, ucp_Katakana },
+ { 582, PT_SC, ucp_Kayah_Li },
+ { 591, PT_SC, ucp_Kharoshthi },
+ { 602, PT_SC, ucp_Khmer },
+ { 608, PT_SC, ucp_Khojki },
+ { 615, PT_SC, ucp_Khudawadi },
+ { 625, PT_GC, ucp_L },
+ { 627, PT_LAMP, 0 },
+ { 630, PT_SC, ucp_Lao },
+ { 634, PT_SC, ucp_Latin },
+ { 640, PT_SC, ucp_Lepcha },
+ { 647, PT_SC, ucp_Limbu },
+ { 653, PT_SC, ucp_Linear_A },
+ { 662, PT_SC, ucp_Linear_B },
+ { 671, PT_SC, ucp_Lisu },
+ { 676, PT_PC, ucp_Ll },
+ { 679, PT_PC, ucp_Lm },
+ { 682, PT_PC, ucp_Lo },
+ { 685, PT_PC, ucp_Lt },
+ { 688, PT_PC, ucp_Lu },
+ { 691, PT_SC, ucp_Lycian },
+ { 698, PT_SC, ucp_Lydian },
+ { 705, PT_GC, ucp_M },
+ { 707, PT_SC, ucp_Mahajani },
+ { 716, PT_SC, ucp_Makasar },
+ { 724, PT_SC, ucp_Malayalam },
+ { 734, PT_SC, ucp_Mandaic },
+ { 742, PT_SC, ucp_Manichaean },
+ { 753, PT_SC, ucp_Marchen },
+ { 761, PT_SC, ucp_Masaram_Gondi },
+ { 775, PT_PC, ucp_Mc },
+ { 778, PT_PC, ucp_Me },
+ { 781, PT_SC, ucp_Medefaidrin },
+ { 793, PT_SC, ucp_Meetei_Mayek },
+ { 806, PT_SC, ucp_Mende_Kikakui },
+ { 820, PT_SC, ucp_Meroitic_Cursive },
+ { 837, PT_SC, ucp_Meroitic_Hieroglyphs },
+ { 858, PT_SC, ucp_Miao },
+ { 863, PT_PC, ucp_Mn },
+ { 866, PT_SC, ucp_Modi },
+ { 871, PT_SC, ucp_Mongolian },
+ { 881, PT_SC, ucp_Mro },
+ { 885, PT_SC, ucp_Multani },
+ { 893, PT_SC, ucp_Myanmar },
+ { 901, PT_GC, ucp_N },
+ { 903, PT_SC, ucp_Nabataean },
+ { 913, PT_PC, ucp_Nd },
+ { 916, PT_SC, ucp_New_Tai_Lue },
+ { 928, PT_SC, ucp_Newa },
+ { 933, PT_SC, ucp_Nko },
+ { 937, PT_PC, ucp_Nl },
+ { 940, PT_PC, ucp_No },
+ { 943, PT_SC, ucp_Nushu },
+ { 949, PT_SC, ucp_Ogham },
+ { 955, PT_SC, ucp_Ol_Chiki },
+ { 964, PT_SC, ucp_Old_Hungarian },
+ { 978, PT_SC, ucp_Old_Italic },
+ { 989, PT_SC, ucp_Old_North_Arabian },
+ { 1007, PT_SC, ucp_Old_Permic },
+ { 1018, PT_SC, ucp_Old_Persian },
+ { 1030, PT_SC, ucp_Old_Sogdian },
+ { 1042, PT_SC, ucp_Old_South_Arabian },
+ { 1060, PT_SC, ucp_Old_Turkic },
+ { 1071, PT_SC, ucp_Oriya },
+ { 1077, PT_SC, ucp_Osage },
+ { 1083, PT_SC, ucp_Osmanya },
+ { 1091, PT_GC, ucp_P },
+ { 1093, PT_SC, ucp_Pahawh_Hmong },
+ { 1106, PT_SC, ucp_Palmyrene },
+ { 1116, PT_SC, ucp_Pau_Cin_Hau },
+ { 1128, PT_PC, ucp_Pc },
+ { 1131, PT_PC, ucp_Pd },
+ { 1134, PT_PC, ucp_Pe },
+ { 1137, PT_PC, ucp_Pf },
+ { 1140, PT_SC, ucp_Phags_Pa },
+ { 1149, PT_SC, ucp_Phoenician },
+ { 1160, PT_PC, ucp_Pi },
+ { 1163, PT_PC, ucp_Po },
+ { 1166, PT_PC, ucp_Ps },
+ { 1169, PT_SC, ucp_Psalter_Pahlavi },
+ { 1185, PT_SC, ucp_Rejang },
+ { 1192, PT_SC, ucp_Runic },
+ { 1198, PT_GC, ucp_S },
+ { 1200, PT_SC, ucp_Samaritan },
+ { 1210, PT_SC, ucp_Saurashtra },
+ { 1221, PT_PC, ucp_Sc },
+ { 1224, PT_SC, ucp_Sharada },
+ { 1232, PT_SC, ucp_Shavian },
+ { 1240, PT_SC, ucp_Siddham },
+ { 1248, PT_SC, ucp_SignWriting },
+ { 1260, PT_SC, ucp_Sinhala },
+ { 1268, PT_PC, ucp_Sk },
+ { 1271, PT_PC, ucp_Sm },
+ { 1274, PT_PC, ucp_So },
+ { 1277, PT_SC, ucp_Sogdian },
+ { 1285, PT_SC, ucp_Sora_Sompeng },
+ { 1298, PT_SC, ucp_Soyombo },
+ { 1306, PT_SC, ucp_Sundanese },
+ { 1316, PT_SC, ucp_Syloti_Nagri },
+ { 1329, PT_SC, ucp_Syriac },
+ { 1336, PT_SC, ucp_Tagalog },
+ { 1344, PT_SC, ucp_Tagbanwa },
+ { 1353, PT_SC, ucp_Tai_Le },
+ { 1360, PT_SC, ucp_Tai_Tham },
+ { 1369, PT_SC, ucp_Tai_Viet },
+ { 1378, PT_SC, ucp_Takri },
+ { 1384, PT_SC, ucp_Tamil },
+ { 1390, PT_SC, ucp_Tangut },
+ { 1397, PT_SC, ucp_Telugu },
+ { 1404, PT_SC, ucp_Thaana },
+ { 1411, PT_SC, ucp_Thai },
+ { 1416, PT_SC, ucp_Tibetan },
+ { 1424, PT_SC, ucp_Tifinagh },
+ { 1433, PT_SC, ucp_Tirhuta },
+ { 1441, PT_SC, ucp_Ugaritic },
+ { 1450, PT_SC, ucp_Vai },
+ { 1454, PT_SC, ucp_Warang_Citi },
+ { 1466, PT_ALNUM, 0 },
+ { 1470, PT_PXSPACE, 0 },
+ { 1474, PT_SPACE, 0 },
+ { 1478, PT_UCNC, 0 },
+ { 1482, PT_WORD, 0 },
+ { 1486, PT_SC, ucp_Yi },
+ { 1489, PT_GC, ucp_Z },
+ { 1491, PT_SC, ucp_Zanabazar_Square },
+ { 1508, PT_PC, ucp_Zl },
+ { 1511, PT_PC, ucp_Zp },
+ { 1514, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/src/pcre2_ucd.c b/src/pcre2_ucd.c
index ac7649b..275a4be 100644
--- a/src/pcre2_ucd.c
+++ b/src/pcre2_ucd.c
@@ -20,7 +20,7 @@ needed. */
/* Unicode character database. */
/* This file was autogenerated by the MultiStage2.py script. */
-/* Total size: 80808 bytes, block size: 128. */
+/* Total size: 92592 bytes, block size: 128. */
/* The tables herein are needed only when UCP support is built,
and in PCRE2 that happens automatically with UTF support.
@@ -34,12 +34,12 @@ Instead, just supply small dummy tables. */
#ifndef SUPPORT_UNICODE
const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0 }};
-const uint8_t PRIV(ucd_stage1)[] = {0};
+const uint16_t PRIV(ucd_stage1)[] = {0};
const uint16_t PRIV(ucd_stage2)[] = {0};
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
#else
-const char *PRIV(unicode_version) = "10.0.0";
+const char *PRIV(unicode_version) = "11.0.0";
/* If the 32-bit library is run in non-32-bit mode, character values
greater than 0x10ffff may be encountered. For these we set up a
@@ -104,7 +104,7 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {
#ifndef PCRE2_PCRE2TEST
-const ucd_record PRIV(ucd_records)[] = { /* 6568 bytes, record size 8 */
+const ucd_record PRIV(ucd_records)[] = { /* 6832 bytes, record size 8 */
{ 9, 0, 2, 0, 0, }, /* 0 */
{ 9, 0, 1, 0, 0, }, /* 1 */
{ 9, 0, 0, 0, 0, }, /* 2 */
@@ -125,1357 +125,1390 @@ const ucd_record PRIV(ucd_records)[] = { /* 6568 bytes, record size 8 */
{ 33, 5, 12, 100, -32, }, /* 17 */
{ 33, 5, 12, 1, -32, }, /* 18 */
{ 9, 26, 12, 0, 0, }, /* 19 */
- { 33, 7, 12, 0, 0, }, /* 20 */
- { 9, 20, 12, 0, 0, }, /* 21 */
- { 9, 1, 2, 0, 0, }, /* 22 */
- { 9, 15, 12, 0, 0, }, /* 23 */
- { 9, 5, 12, 26, 775, }, /* 24 */
- { 9, 19, 12, 0, 0, }, /* 25 */
- { 33, 9, 12, 104, 32, }, /* 26 */
- { 33, 5, 12, 0, 7615, }, /* 27 */
- { 33, 5, 12, 104, -32, }, /* 28 */
- { 33, 5, 12, 0, 121, }, /* 29 */
- { 33, 9, 12, 0, 1, }, /* 30 */
- { 33, 5, 12, 0, -1, }, /* 31 */
- { 33, 9, 12, 0, 0, }, /* 32 */
- { 33, 5, 12, 0, 0, }, /* 33 */
- { 33, 9, 12, 0, -121, }, /* 34 */
- { 33, 5, 12, 1, -268, }, /* 35 */
- { 33, 5, 12, 0, 195, }, /* 36 */
- { 33, 9, 12, 0, 210, }, /* 37 */
- { 33, 9, 12, 0, 206, }, /* 38 */
- { 33, 9, 12, 0, 205, }, /* 39 */
- { 33, 9, 12, 0, 79, }, /* 40 */
- { 33, 9, 12, 0, 202, }, /* 41 */
- { 33, 9, 12, 0, 203, }, /* 42 */
- { 33, 9, 12, 0, 207, }, /* 43 */
- { 33, 5, 12, 0, 97, }, /* 44 */
- { 33, 9, 12, 0, 211, }, /* 45 */
- { 33, 9, 12, 0, 209, }, /* 46 */
- { 33, 5, 12, 0, 163, }, /* 47 */
- { 33, 9, 12, 0, 213, }, /* 48 */
- { 33, 5, 12, 0, 130, }, /* 49 */
- { 33, 9, 12, 0, 214, }, /* 50 */
- { 33, 9, 12, 0, 218, }, /* 51 */
- { 33, 9, 12, 0, 217, }, /* 52 */
- { 33, 9, 12, 0, 219, }, /* 53 */
- { 33, 5, 12, 0, 56, }, /* 54 */
- { 33, 9, 12, 5, 2, }, /* 55 */
- { 33, 8, 12, 5, 1, }, /* 56 */
- { 33, 5, 12, 5, -2, }, /* 57 */
- { 33, 9, 12, 9, 2, }, /* 58 */
- { 33, 8, 12, 9, 1, }, /* 59 */
- { 33, 5, 12, 9, -2, }, /* 60 */
- { 33, 9, 12, 13, 2, }, /* 61 */
- { 33, 8, 12, 13, 1, }, /* 62 */
- { 33, 5, 12, 13, -2, }, /* 63 */
- { 33, 5, 12, 0, -79, }, /* 64 */
- { 33, 9, 12, 17, 2, }, /* 65 */
- { 33, 8, 12, 17, 1, }, /* 66 */
- { 33, 5, 12, 17, -2, }, /* 67 */
- { 33, 9, 12, 0, -97, }, /* 68 */
- { 33, 9, 12, 0, -56, }, /* 69 */
- { 33, 9, 12, 0, -130, }, /* 70 */
- { 33, 9, 12, 0, 10795, }, /* 71 */
- { 33, 9, 12, 0, -163, }, /* 72 */
- { 33, 9, 12, 0, 10792, }, /* 73 */
- { 33, 5, 12, 0, 10815, }, /* 74 */
- { 33, 9, 12, 0, -195, }, /* 75 */
- { 33, 9, 12, 0, 69, }, /* 76 */
- { 33, 9, 12, 0, 71, }, /* 77 */
- { 33, 5, 12, 0, 10783, }, /* 78 */
- { 33, 5, 12, 0, 10780, }, /* 79 */
- { 33, 5, 12, 0, 10782, }, /* 80 */
- { 33, 5, 12, 0, -210, }, /* 81 */
- { 33, 5, 12, 0, -206, }, /* 82 */
- { 33, 5, 12, 0, -205, }, /* 83 */
- { 33, 5, 12, 0, -202, }, /* 84 */
- { 33, 5, 12, 0, -203, }, /* 85 */
- { 33, 5, 12, 0, 42319, }, /* 86 */
- { 33, 5, 12, 0, 42315, }, /* 87 */
- { 33, 5, 12, 0, -207, }, /* 88 */
- { 33, 5, 12, 0, 42280, }, /* 89 */
- { 33, 5, 12, 0, 42308, }, /* 90 */
- { 33, 5, 12, 0, -209, }, /* 91 */
- { 33, 5, 12, 0, -211, }, /* 92 */
- { 33, 5, 12, 0, 10743, }, /* 93 */
- { 33, 5, 12, 0, 42305, }, /* 94 */
- { 33, 5, 12, 0, 10749, }, /* 95 */
- { 33, 5, 12, 0, -213, }, /* 96 */
- { 33, 5, 12, 0, -214, }, /* 97 */
- { 33, 5, 12, 0, 10727, }, /* 98 */
- { 33, 5, 12, 0, -218, }, /* 99 */
- { 33, 5, 12, 0, 42282, }, /* 100 */
- { 33, 5, 12, 0, -69, }, /* 101 */
- { 33, 5, 12, 0, -217, }, /* 102 */
- { 33, 5, 12, 0, -71, }, /* 103 */
- { 33, 5, 12, 0, -219, }, /* 104 */
- { 33, 5, 12, 0, 42261, }, /* 105 */
- { 33, 5, 12, 0, 42258, }, /* 106 */
- { 33, 6, 12, 0, 0, }, /* 107 */
- { 9, 6, 12, 0, 0, }, /* 108 */
- { 3, 24, 12, 0, 0, }, /* 109 */
- { 27, 12, 3, 0, 0, }, /* 110 */
- { 27, 12, 3, 21, 116, }, /* 111 */
- { 19, 9, 12, 0, 1, }, /* 112 */
- { 19, 5, 12, 0, -1, }, /* 113 */
- { 19, 24, 12, 0, 0, }, /* 114 */
- { 9, 2, 12, 0, 0, }, /* 115 */
- { 19, 6, 12, 0, 0, }, /* 116 */
- { 19, 5, 12, 0, 130, }, /* 117 */
- { 19, 9, 12, 0, 116, }, /* 118 */
- { 19, 9, 12, 0, 38, }, /* 119 */
- { 19, 9, 12, 0, 37, }, /* 120 */
- { 19, 9, 12, 0, 64, }, /* 121 */
- { 19, 9, 12, 0, 63, }, /* 122 */
- { 19, 5, 12, 0, 0, }, /* 123 */
- { 19, 9, 12, 0, 32, }, /* 124 */
- { 19, 9, 12, 34, 32, }, /* 125 */
- { 19, 9, 12, 59, 32, }, /* 126 */
- { 19, 9, 12, 38, 32, }, /* 127 */
- { 19, 9, 12, 21, 32, }, /* 128 */
- { 19, 9, 12, 51, 32, }, /* 129 */
- { 19, 9, 12, 26, 32, }, /* 130 */
- { 19, 9, 12, 47, 32, }, /* 131 */
- { 19, 9, 12, 55, 32, }, /* 132 */
- { 19, 9, 12, 30, 32, }, /* 133 */
- { 19, 9, 12, 43, 32, }, /* 134 */
- { 19, 9, 12, 96, 32, }, /* 135 */
- { 19, 5, 12, 0, -38, }, /* 136 */
- { 19, 5, 12, 0, -37, }, /* 137 */
- { 19, 5, 12, 0, -32, }, /* 138 */
- { 19, 5, 12, 34, -32, }, /* 139 */
- { 19, 5, 12, 59, -32, }, /* 140 */
- { 19, 5, 12, 38, -32, }, /* 141 */
- { 19, 5, 12, 21, -116, }, /* 142 */
- { 19, 5, 12, 51, -32, }, /* 143 */
- { 19, 5, 12, 26, -775, }, /* 144 */
- { 19, 5, 12, 47, -32, }, /* 145 */
- { 19, 5, 12, 55, -32, }, /* 146 */
- { 19, 5, 12, 30, 1, }, /* 147 */
- { 19, 5, 12, 30, -32, }, /* 148 */
- { 19, 5, 12, 43, -32, }, /* 149 */
- { 19, 5, 12, 96, -32, }, /* 150 */
- { 19, 5, 12, 0, -64, }, /* 151 */
- { 19, 5, 12, 0, -63, }, /* 152 */
- { 19, 9, 12, 0, 8, }, /* 153 */
- { 19, 5, 12, 34, -30, }, /* 154 */
- { 19, 5, 12, 38, -25, }, /* 155 */
- { 19, 9, 12, 0, 0, }, /* 156 */
- { 19, 5, 12, 43, -15, }, /* 157 */
- { 19, 5, 12, 47, -22, }, /* 158 */
- { 19, 5, 12, 0, -8, }, /* 159 */
- { 10, 9, 12, 0, 1, }, /* 160 */
- { 10, 5, 12, 0, -1, }, /* 161 */
- { 19, 5, 12, 51, -54, }, /* 162 */
- { 19, 5, 12, 55, -48, }, /* 163 */
- { 19, 5, 12, 0, 7, }, /* 164 */
- { 19, 5, 12, 0, -116, }, /* 165 */
- { 19, 9, 12, 38, -60, }, /* 166 */
- { 19, 5, 12, 59, -64, }, /* 167 */
- { 19, 25, 12, 0, 0, }, /* 168 */
- { 19, 9, 12, 0, -7, }, /* 169 */
- { 19, 9, 12, 0, -130, }, /* 170 */
- { 12, 9, 12, 0, 80, }, /* 171 */
- { 12, 9, 12, 0, 32, }, /* 172 */
- { 12, 9, 12, 63, 32, }, /* 173 */
- { 12, 9, 12, 67, 32, }, /* 174 */
- { 12, 9, 12, 71, 32, }, /* 175 */
- { 12, 9, 12, 75, 32, }, /* 176 */
- { 12, 9, 12, 79, 32, }, /* 177 */
- { 12, 9, 12, 84, 32, }, /* 178 */
- { 12, 5, 12, 0, -32, }, /* 179 */
- { 12, 5, 12, 63, -32, }, /* 180 */
- { 12, 5, 12, 67, -32, }, /* 181 */
- { 12, 5, 12, 71, -32, }, /* 182 */
- { 12, 5, 12, 75, -32, }, /* 183 */
- { 12, 5, 12, 79, -32, }, /* 184 */
- { 12, 5, 12, 84, -32, }, /* 185 */
- { 12, 5, 12, 0, -80, }, /* 186 */
- { 12, 9, 12, 0, 1, }, /* 187 */
- { 12, 5, 12, 0, -1, }, /* 188 */
- { 12, 9, 12, 88, 1, }, /* 189 */
- { 12, 5, 12, 88, -1, }, /* 190 */
- { 12, 26, 12, 0, 0, }, /* 191 */
- { 12, 12, 3, 0, 0, }, /* 192 */
- { 12, 11, 3, 0, 0, }, /* 193 */
- { 12, 9, 12, 0, 15, }, /* 194 */
- { 12, 5, 12, 0, -15, }, /* 195 */
- { 1, 9, 12, 0, 48, }, /* 196 */
- { 1, 6, 12, 0, 0, }, /* 197 */
- { 1, 21, 12, 0, 0, }, /* 198 */
- { 1, 5, 12, 0, -48, }, /* 199 */
+ { 9, 26, 14, 0, 0, }, /* 20 */
+ { 33, 7, 12, 0, 0, }, /* 21 */
+ { 9, 20, 12, 0, 0, }, /* 22 */
+ { 9, 1, 2, 0, 0, }, /* 23 */
+ { 9, 15, 12, 0, 0, }, /* 24 */
+ { 9, 5, 12, 26, 775, }, /* 25 */
+ { 9, 19, 12, 0, 0, }, /* 26 */
+ { 33, 9, 12, 104, 32, }, /* 27 */
+ { 33, 5, 12, 0, 7615, }, /* 28 */
+ { 33, 5, 12, 104, -32, }, /* 29 */
+ { 33, 5, 12, 0, 121, }, /* 30 */
+ { 33, 9, 12, 0, 1, }, /* 31 */
+ { 33, 5, 12, 0, -1, }, /* 32 */
+ { 33, 9, 12, 0, 0, }, /* 33 */
+ { 33, 5, 12, 0, 0, }, /* 34 */
+ { 33, 9, 12, 0, -121, }, /* 35 */
+ { 33, 5, 12, 1, -268, }, /* 36 */
+ { 33, 5, 12, 0, 195, }, /* 37 */
+ { 33, 9, 12, 0, 210, }, /* 38 */
+ { 33, 9, 12, 0, 206, }, /* 39 */
+ { 33, 9, 12, 0, 205, }, /* 40 */
+ { 33, 9, 12, 0, 79, }, /* 41 */
+ { 33, 9, 12, 0, 202, }, /* 42 */
+ { 33, 9, 12, 0, 203, }, /* 43 */
+ { 33, 9, 12, 0, 207, }, /* 44 */
+ { 33, 5, 12, 0, 97, }, /* 45 */
+ { 33, 9, 12, 0, 211, }, /* 46 */
+ { 33, 9, 12, 0, 209, }, /* 47 */
+ { 33, 5, 12, 0, 163, }, /* 48 */
+ { 33, 9, 12, 0, 213, }, /* 49 */
+ { 33, 5, 12, 0, 130, }, /* 50 */
+ { 33, 9, 12, 0, 214, }, /* 51 */
+ { 33, 9, 12, 0, 218, }, /* 52 */
+ { 33, 9, 12, 0, 217, }, /* 53 */
+ { 33, 9, 12, 0, 219, }, /* 54 */
+ { 33, 5, 12, 0, 56, }, /* 55 */
+ { 33, 9, 12, 5, 2, }, /* 56 */
+ { 33, 8, 12, 5, 1, }, /* 57 */
+ { 33, 5, 12, 5, -2, }, /* 58 */
+ { 33, 9, 12, 9, 2, }, /* 59 */
+ { 33, 8, 12, 9, 1, }, /* 60 */
+ { 33, 5, 12, 9, -2, }, /* 61 */
+ { 33, 9, 12, 13, 2, }, /* 62 */
+ { 33, 8, 12, 13, 1, }, /* 63 */
+ { 33, 5, 12, 13, -2, }, /* 64 */
+ { 33, 5, 12, 0, -79, }, /* 65 */
+ { 33, 9, 12, 17, 2, }, /* 66 */
+ { 33, 8, 12, 17, 1, }, /* 67 */
+ { 33, 5, 12, 17, -2, }, /* 68 */
+ { 33, 9, 12, 0, -97, }, /* 69 */
+ { 33, 9, 12, 0, -56, }, /* 70 */
+ { 33, 9, 12, 0, -130, }, /* 71 */
+ { 33, 9, 12, 0, 10795, }, /* 72 */
+ { 33, 9, 12, 0, -163, }, /* 73 */
+ { 33, 9, 12, 0, 10792, }, /* 74 */
+ { 33, 5, 12, 0, 10815, }, /* 75 */
+ { 33, 9, 12, 0, -195, }, /* 76 */
+ { 33, 9, 12, 0, 69, }, /* 77 */
+ { 33, 9, 12, 0, 71, }, /* 78 */
+ { 33, 5, 12, 0, 10783, }, /* 79 */
+ { 33, 5, 12, 0, 10780, }, /* 80 */
+ { 33, 5, 12, 0, 10782, }, /* 81 */
+ { 33, 5, 12, 0, -210, }, /* 82 */
+ { 33, 5, 12, 0, -206, }, /* 83 */
+ { 33, 5, 12, 0, -205, }, /* 84 */
+ { 33, 5, 12, 0, -202, }, /* 85 */
+ { 33, 5, 12, 0, -203, }, /* 86 */
+ { 33, 5, 12, 0, 42319, }, /* 87 */
+ { 33, 5, 12, 0, 42315, }, /* 88 */
+ { 33, 5, 12, 0, -207, }, /* 89 */
+ { 33, 5, 12, 0, 42280, }, /* 90 */
+ { 33, 5, 12, 0, 42308, }, /* 91 */
+ { 33, 5, 12, 0, -209, }, /* 92 */
+ { 33, 5, 12, 0, -211, }, /* 93 */
+ { 33, 5, 12, 0, 10743, }, /* 94 */
+ { 33, 5, 12, 0, 42305, }, /* 95 */
+ { 33, 5, 12, 0, 10749, }, /* 96 */
+ { 33, 5, 12, 0, -213, }, /* 97 */
+ { 33, 5, 12, 0, -214, }, /* 98 */
+ { 33, 5, 12, 0, 10727, }, /* 99 */
+ { 33, 5, 12, 0, -218, }, /* 100 */
+ { 33, 5, 12, 0, 42282, }, /* 101 */
+ { 33, 5, 12, 0, -69, }, /* 102 */
+ { 33, 5, 12, 0, -217, }, /* 103 */
+ { 33, 5, 12, 0, -71, }, /* 104 */
+ { 33, 5, 12, 0, -219, }, /* 105 */
+ { 33, 5, 12, 0, 42261, }, /* 106 */
+ { 33, 5, 12, 0, 42258, }, /* 107 */
+ { 33, 6, 12, 0, 0, }, /* 108 */
+ { 9, 6, 12, 0, 0, }, /* 109 */
+ { 3, 24, 12, 0, 0, }, /* 110 */
+ { 27, 12, 3, 0, 0, }, /* 111 */
+ { 27, 12, 3, 21, 116, }, /* 112 */
+ { 19, 9, 12, 0, 1, }, /* 113 */
+ { 19, 5, 12, 0, -1, }, /* 114 */
+ { 19, 24, 12, 0, 0, }, /* 115 */
+ { 9, 2, 12, 0, 0, }, /* 116 */
+ { 19, 6, 12, 0, 0, }, /* 117 */
+ { 19, 5, 12, 0, 130, }, /* 118 */
+ { 19, 9, 12, 0, 116, }, /* 119 */
+ { 19, 9, 12, 0, 38, }, /* 120 */
+ { 19, 9, 12, 0, 37, }, /* 121 */
+ { 19, 9, 12, 0, 64, }, /* 122 */
+ { 19, 9, 12, 0, 63, }, /* 123 */
+ { 19, 5, 12, 0, 0, }, /* 124 */
+ { 19, 9, 12, 0, 32, }, /* 125 */
+ { 19, 9, 12, 34, 32, }, /* 126 */
+ { 19, 9, 12, 59, 32, }, /* 127 */
+ { 19, 9, 12, 38, 32, }, /* 128 */
+ { 19, 9, 12, 21, 32, }, /* 129 */
+ { 19, 9, 12, 51, 32, }, /* 130 */
+ { 19, 9, 12, 26, 32, }, /* 131 */
+ { 19, 9, 12, 47, 32, }, /* 132 */
+ { 19, 9, 12, 55, 32, }, /* 133 */
+ { 19, 9, 12, 30, 32, }, /* 134 */
+ { 19, 9, 12, 43, 32, }, /* 135 */
+ { 19, 9, 12, 96, 32, }, /* 136 */
+ { 19, 5, 12, 0, -38, }, /* 137 */
+ { 19, 5, 12, 0, -37, }, /* 138 */
+ { 19, 5, 12, 0, -32, }, /* 139 */
+ { 19, 5, 12, 34, -32, }, /* 140 */
+ { 19, 5, 12, 59, -32, }, /* 141 */
+ { 19, 5, 12, 38, -32, }, /* 142 */
+ { 19, 5, 12, 21, -116, }, /* 143 */
+ { 19, 5, 12, 51, -32, }, /* 144 */
+ { 19, 5, 12, 26, -775, }, /* 145 */
+ { 19, 5, 12, 47, -32, }, /* 146 */
+ { 19, 5, 12, 55, -32, }, /* 147 */
+ { 19, 5, 12, 30, 1, }, /* 148 */
+ { 19, 5, 12, 30, -32, }, /* 149 */
+ { 19, 5, 12, 43, -32, }, /* 150 */
+ { 19, 5, 12, 96, -32, }, /* 151 */
+ { 19, 5, 12, 0, -64, }, /* 152 */
+ { 19, 5, 12, 0, -63, }, /* 153 */
+ { 19, 9, 12, 0, 8, }, /* 154 */
+ { 19, 5, 12, 34, -30, }, /* 155 */
+ { 19, 5, 12, 38, -25, }, /* 156 */
+ { 19, 9, 12, 0, 0, }, /* 157 */
+ { 19, 5, 12, 43, -15, }, /* 158 */
+ { 19, 5, 12, 47, -22, }, /* 159 */
+ { 19, 5, 12, 0, -8, }, /* 160 */
+ { 10, 9, 12, 0, 1, }, /* 161 */
+ { 10, 5, 12, 0, -1, }, /* 162 */
+ { 19, 5, 12, 51, -54, }, /* 163 */
+ { 19, 5, 12, 55, -48, }, /* 164 */
+ { 19, 5, 12, 0, 7, }, /* 165 */
+ { 19, 5, 12, 0, -116, }, /* 166 */
+ { 19, 9, 12, 38, -60, }, /* 167 */
+ { 19, 5, 12, 59, -64, }, /* 168 */
+ { 19, 25, 12, 0, 0, }, /* 169 */
+ { 19, 9, 12, 0, -7, }, /* 170 */
+ { 19, 9, 12, 0, -130, }, /* 171 */
+ { 12, 9, 12, 0, 80, }, /* 172 */
+ { 12, 9, 12, 0, 32, }, /* 173 */
+ { 12, 9, 12, 63, 32, }, /* 174 */
+ { 12, 9, 12, 67, 32, }, /* 175 */
+ { 12, 9, 12, 71, 32, }, /* 176 */
+ { 12, 9, 12, 75, 32, }, /* 177 */
+ { 12, 9, 12, 79, 32, }, /* 178 */
+ { 12, 9, 12, 84, 32, }, /* 179 */
+ { 12, 5, 12, 0, -32, }, /* 180 */
+ { 12, 5, 12, 63, -32, }, /* 181 */
+ { 12, 5, 12, 67, -32, }, /* 182 */
+ { 12, 5, 12, 71, -32, }, /* 183 */
+ { 12, 5, 12, 75, -32, }, /* 184 */
+ { 12, 5, 12, 79, -32, }, /* 185 */
+ { 12, 5, 12, 84, -32, }, /* 186 */
+ { 12, 5, 12, 0, -80, }, /* 187 */
+ { 12, 9, 12, 0, 1, }, /* 188 */
+ { 12, 5, 12, 0, -1, }, /* 189 */
+ { 12, 9, 12, 88, 1, }, /* 190 */
+ { 12, 5, 12, 88, -1, }, /* 191 */
+ { 12, 26, 12, 0, 0, }, /* 192 */
+ { 12, 12, 3, 0, 0, }, /* 193 */
+ { 12, 11, 3, 0, 0, }, /* 194 */
+ { 12, 9, 12, 0, 15, }, /* 195 */
+ { 12, 5, 12, 0, -15, }, /* 196 */
+ { 1, 9, 12, 0, 48, }, /* 197 */
+ { 1, 6, 12, 0, 0, }, /* 198 */
+ { 1, 21, 12, 0, 0, }, /* 199 */
{ 1, 5, 12, 0, 0, }, /* 200 */
- { 1, 17, 12, 0, 0, }, /* 201 */
- { 1, 26, 12, 0, 0, }, /* 202 */
- { 1, 23, 12, 0, 0, }, /* 203 */
- { 25, 12, 3, 0, 0, }, /* 204 */
- { 25, 17, 12, 0, 0, }, /* 205 */
- { 25, 21, 12, 0, 0, }, /* 206 */
- { 25, 7, 12, 0, 0, }, /* 207 */
- { 0, 1, 4, 0, 0, }, /* 208 */
- { 9, 1, 4, 0, 0, }, /* 209 */
- { 0, 25, 12, 0, 0, }, /* 210 */
- { 0, 21, 12, 0, 0, }, /* 211 */
- { 0, 23, 12, 0, 0, }, /* 212 */
- { 0, 26, 12, 0, 0, }, /* 213 */
- { 0, 12, 3, 0, 0, }, /* 214 */
- { 0, 1, 2, 0, 0, }, /* 215 */
- { 0, 7, 12, 0, 0, }, /* 216 */
- { 0, 13, 12, 0, 0, }, /* 217 */
- { 0, 6, 12, 0, 0, }, /* 218 */
- { 49, 21, 12, 0, 0, }, /* 219 */
- { 49, 1, 4, 0, 0, }, /* 220 */
- { 49, 7, 12, 0, 0, }, /* 221 */
- { 49, 12, 3, 0, 0, }, /* 222 */
- { 55, 7, 12, 0, 0, }, /* 223 */
- { 55, 12, 3, 0, 0, }, /* 224 */
- { 63, 13, 12, 0, 0, }, /* 225 */
- { 63, 7, 12, 0, 0, }, /* 226 */
- { 63, 12, 3, 0, 0, }, /* 227 */
- { 63, 6, 12, 0, 0, }, /* 228 */
- { 63, 26, 12, 0, 0, }, /* 229 */
- { 63, 21, 12, 0, 0, }, /* 230 */
- { 89, 7, 12, 0, 0, }, /* 231 */
- { 89, 12, 3, 0, 0, }, /* 232 */
- { 89, 6, 12, 0, 0, }, /* 233 */
- { 89, 21, 12, 0, 0, }, /* 234 */
- { 94, 7, 12, 0, 0, }, /* 235 */
- { 94, 12, 3, 0, 0, }, /* 236 */
- { 94, 21, 12, 0, 0, }, /* 237 */
- { 14, 12, 3, 0, 0, }, /* 238 */
- { 14, 10, 5, 0, 0, }, /* 239 */
- { 14, 7, 12, 0, 0, }, /* 240 */
- { 14, 13, 12, 0, 0, }, /* 241 */
- { 14, 21, 12, 0, 0, }, /* 242 */
- { 14, 6, 12, 0, 0, }, /* 243 */
- { 2, 7, 12, 0, 0, }, /* 244 */
- { 2, 12, 3, 0, 0, }, /* 245 */
- { 2, 10, 5, 0, 0, }, /* 246 */
- { 2, 10, 3, 0, 0, }, /* 247 */
- { 2, 13, 12, 0, 0, }, /* 248 */
- { 2, 23, 12, 0, 0, }, /* 249 */
- { 2, 15, 12, 0, 0, }, /* 250 */
- { 2, 26, 12, 0, 0, }, /* 251 */
- { 2, 21, 12, 0, 0, }, /* 252 */
- { 21, 12, 3, 0, 0, }, /* 253 */
- { 21, 10, 5, 0, 0, }, /* 254 */
- { 21, 7, 12, 0, 0, }, /* 255 */
- { 21, 13, 12, 0, 0, }, /* 256 */
- { 20, 12, 3, 0, 0, }, /* 257 */
- { 20, 10, 5, 0, 0, }, /* 258 */
- { 20, 7, 12, 0, 0, }, /* 259 */
- { 20, 13, 12, 0, 0, }, /* 260 */
- { 20, 21, 12, 0, 0, }, /* 261 */
- { 20, 23, 12, 0, 0, }, /* 262 */
- { 43, 12, 3, 0, 0, }, /* 263 */
- { 43, 10, 5, 0, 0, }, /* 264 */
- { 43, 7, 12, 0, 0, }, /* 265 */
- { 43, 10, 3, 0, 0, }, /* 266 */
- { 43, 13, 12, 0, 0, }, /* 267 */
- { 43, 26, 12, 0, 0, }, /* 268 */
- { 43, 15, 12, 0, 0, }, /* 269 */
- { 53, 12, 3, 0, 0, }, /* 270 */
- { 53, 7, 12, 0, 0, }, /* 271 */
- { 53, 10, 3, 0, 0, }, /* 272 */
- { 53, 10, 5, 0, 0, }, /* 273 */
- { 53, 13, 12, 0, 0, }, /* 274 */
- { 53, 15, 12, 0, 0, }, /* 275 */
- { 53, 26, 12, 0, 0, }, /* 276 */
- { 53, 23, 12, 0, 0, }, /* 277 */
- { 54, 12, 3, 0, 0, }, /* 278 */
- { 54, 10, 5, 0, 0, }, /* 279 */
- { 54, 7, 12, 0, 0, }, /* 280 */
- { 54, 13, 12, 0, 0, }, /* 281 */
- { 54, 15, 12, 0, 0, }, /* 282 */
- { 54, 26, 12, 0, 0, }, /* 283 */
- { 28, 7, 12, 0, 0, }, /* 284 */
- { 28, 12, 3, 0, 0, }, /* 285 */
- { 28, 10, 5, 0, 0, }, /* 286 */
- { 28, 10, 3, 0, 0, }, /* 287 */
- { 28, 13, 12, 0, 0, }, /* 288 */
- { 36, 12, 3, 0, 0, }, /* 289 */
- { 36, 10, 5, 0, 0, }, /* 290 */
- { 36, 7, 12, 0, 0, }, /* 291 */
- { 36, 10, 3, 0, 0, }, /* 292 */
- { 36, 7, 4, 0, 0, }, /* 293 */
- { 36, 26, 12, 0, 0, }, /* 294 */
- { 36, 15, 12, 0, 0, }, /* 295 */
- { 36, 13, 12, 0, 0, }, /* 296 */
- { 47, 10, 5, 0, 0, }, /* 297 */
- { 47, 7, 12, 0, 0, }, /* 298 */
- { 47, 12, 3, 0, 0, }, /* 299 */
- { 47, 10, 3, 0, 0, }, /* 300 */
- { 47, 13, 12, 0, 0, }, /* 301 */
- { 47, 21, 12, 0, 0, }, /* 302 */
- { 56, 7, 12, 0, 0, }, /* 303 */
- { 56, 12, 3, 0, 0, }, /* 304 */
- { 56, 7, 5, 0, 0, }, /* 305 */
- { 56, 6, 12, 0, 0, }, /* 306 */
- { 56, 21, 12, 0, 0, }, /* 307 */
- { 56, 13, 12, 0, 0, }, /* 308 */
- { 32, 7, 12, 0, 0, }, /* 309 */
- { 32, 12, 3, 0, 0, }, /* 310 */
- { 32, 7, 5, 0, 0, }, /* 311 */
- { 32, 6, 12, 0, 0, }, /* 312 */
- { 32, 13, 12, 0, 0, }, /* 313 */
- { 57, 7, 12, 0, 0, }, /* 314 */
- { 57, 26, 12, 0, 0, }, /* 315 */
- { 57, 21, 12, 0, 0, }, /* 316 */
- { 57, 12, 3, 0, 0, }, /* 317 */
- { 57, 13, 12, 0, 0, }, /* 318 */
- { 57, 15, 12, 0, 0, }, /* 319 */
- { 57, 22, 12, 0, 0, }, /* 320 */
- { 57, 18, 12, 0, 0, }, /* 321 */
- { 57, 10, 5, 0, 0, }, /* 322 */
- { 38, 7, 12, 0, 0, }, /* 323 */
- { 38, 10, 12, 0, 0, }, /* 324 */
- { 38, 12, 3, 0, 0, }, /* 325 */
- { 38, 10, 5, 0, 0, }, /* 326 */
- { 38, 13, 12, 0, 0, }, /* 327 */
- { 38, 21, 12, 0, 0, }, /* 328 */
- { 38, 26, 12, 0, 0, }, /* 329 */
- { 16, 9, 12, 0, 7264, }, /* 330 */
- { 16, 7, 12, 0, 0, }, /* 331 */
- { 16, 6, 12, 0, 0, }, /* 332 */
- { 23, 7, 6, 0, 0, }, /* 333 */
- { 23, 7, 7, 0, 0, }, /* 334 */
- { 23, 7, 8, 0, 0, }, /* 335 */
- { 15, 7, 12, 0, 0, }, /* 336 */
- { 15, 12, 3, 0, 0, }, /* 337 */
- { 15, 21, 12, 0, 0, }, /* 338 */
- { 15, 15, 12, 0, 0, }, /* 339 */
- { 15, 26, 12, 0, 0, }, /* 340 */
- { 8, 9, 12, 0, 38864, }, /* 341 */
- { 8, 9, 12, 0, 8, }, /* 342 */
- { 8, 5, 12, 0, -8, }, /* 343 */
- { 7, 17, 12, 0, 0, }, /* 344 */
- { 7, 7, 12, 0, 0, }, /* 345 */
- { 7, 21, 12, 0, 0, }, /* 346 */
- { 40, 29, 12, 0, 0, }, /* 347 */
- { 40, 7, 12, 0, 0, }, /* 348 */
- { 40, 22, 12, 0, 0, }, /* 349 */
- { 40, 18, 12, 0, 0, }, /* 350 */
- { 45, 7, 12, 0, 0, }, /* 351 */
- { 45, 14, 12, 0, 0, }, /* 352 */
- { 50, 7, 12, 0, 0, }, /* 353 */
- { 50, 12, 3, 0, 0, }, /* 354 */
- { 24, 7, 12, 0, 0, }, /* 355 */
- { 24, 12, 3, 0, 0, }, /* 356 */
- { 6, 7, 12, 0, 0, }, /* 357 */
- { 6, 12, 3, 0, 0, }, /* 358 */
- { 51, 7, 12, 0, 0, }, /* 359 */
- { 51, 12, 3, 0, 0, }, /* 360 */
- { 31, 7, 12, 0, 0, }, /* 361 */
- { 31, 12, 3, 0, 0, }, /* 362 */
- { 31, 10, 5, 0, 0, }, /* 363 */
- { 31, 21, 12, 0, 0, }, /* 364 */
- { 31, 6, 12, 0, 0, }, /* 365 */
- { 31, 23, 12, 0, 0, }, /* 366 */
- { 31, 13, 12, 0, 0, }, /* 367 */
- { 31, 15, 12, 0, 0, }, /* 368 */
- { 37, 21, 12, 0, 0, }, /* 369 */
- { 37, 17, 12, 0, 0, }, /* 370 */
- { 37, 12, 3, 0, 0, }, /* 371 */
- { 37, 1, 2, 0, 0, }, /* 372 */
- { 37, 13, 12, 0, 0, }, /* 373 */
- { 37, 7, 12, 0, 0, }, /* 374 */
- { 37, 6, 12, 0, 0, }, /* 375 */
- { 34, 7, 12, 0, 0, }, /* 376 */
- { 34, 12, 3, 0, 0, }, /* 377 */
- { 34, 10, 5, 0, 0, }, /* 378 */
- { 34, 26, 12, 0, 0, }, /* 379 */
- { 34, 21, 12, 0, 0, }, /* 380 */
- { 34, 13, 12, 0, 0, }, /* 381 */
- { 52, 7, 12, 0, 0, }, /* 382 */
- { 39, 7, 12, 0, 0, }, /* 383 */
- { 39, 13, 12, 0, 0, }, /* 384 */
- { 39, 15, 12, 0, 0, }, /* 385 */
- { 39, 26, 12, 0, 0, }, /* 386 */
- { 31, 26, 12, 0, 0, }, /* 387 */
- { 5, 7, 12, 0, 0, }, /* 388 */
- { 5, 12, 3, 0, 0, }, /* 389 */
- { 5, 10, 5, 0, 0, }, /* 390 */
- { 5, 21, 12, 0, 0, }, /* 391 */
- { 90, 7, 12, 0, 0, }, /* 392 */
- { 90, 10, 5, 0, 0, }, /* 393 */
- { 90, 12, 3, 0, 0, }, /* 394 */
- { 90, 10, 12, 0, 0, }, /* 395 */
- { 90, 13, 12, 0, 0, }, /* 396 */
- { 90, 21, 12, 0, 0, }, /* 397 */
- { 90, 6, 12, 0, 0, }, /* 398 */
- { 27, 11, 3, 0, 0, }, /* 399 */
- { 61, 12, 3, 0, 0, }, /* 400 */
- { 61, 10, 5, 0, 0, }, /* 401 */
- { 61, 7, 12, 0, 0, }, /* 402 */
- { 61, 13, 12, 0, 0, }, /* 403 */
- { 61, 21, 12, 0, 0, }, /* 404 */
- { 61, 26, 12, 0, 0, }, /* 405 */
- { 75, 12, 3, 0, 0, }, /* 406 */
- { 75, 10, 5, 0, 0, }, /* 407 */
- { 75, 7, 12, 0, 0, }, /* 408 */
- { 75, 13, 12, 0, 0, }, /* 409 */
- { 92, 7, 12, 0, 0, }, /* 410 */
- { 92, 12, 3, 0, 0, }, /* 411 */
- { 92, 10, 5, 0, 0, }, /* 412 */
- { 92, 21, 12, 0, 0, }, /* 413 */
- { 69, 7, 12, 0, 0, }, /* 414 */
- { 69, 10, 5, 0, 0, }, /* 415 */
- { 69, 12, 3, 0, 0, }, /* 416 */
- { 69, 21, 12, 0, 0, }, /* 417 */
- { 69, 13, 12, 0, 0, }, /* 418 */
- { 72, 13, 12, 0, 0, }, /* 419 */
- { 72, 7, 12, 0, 0, }, /* 420 */
- { 72, 6, 12, 0, 0, }, /* 421 */
- { 72, 21, 12, 0, 0, }, /* 422 */
- { 12, 5, 12, 63, -6222, }, /* 423 */
- { 12, 5, 12, 67, -6221, }, /* 424 */
- { 12, 5, 12, 71, -6212, }, /* 425 */
- { 12, 5, 12, 75, -6210, }, /* 426 */
- { 12, 5, 12, 79, -6210, }, /* 427 */
- { 12, 5, 12, 79, -6211, }, /* 428 */
- { 12, 5, 12, 84, -6204, }, /* 429 */
- { 12, 5, 12, 88, -6180, }, /* 430 */
- { 12, 5, 12, 108, 35267, }, /* 431 */
- { 75, 21, 12, 0, 0, }, /* 432 */
- { 9, 10, 5, 0, 0, }, /* 433 */
- { 9, 7, 12, 0, 0, }, /* 434 */
- { 12, 5, 12, 0, 0, }, /* 435 */
- { 12, 6, 12, 0, 0, }, /* 436 */
- { 33, 5, 12, 0, 35332, }, /* 437 */
- { 33, 5, 12, 0, 3814, }, /* 438 */
- { 33, 9, 12, 92, 1, }, /* 439 */
- { 33, 5, 12, 92, -1, }, /* 440 */
- { 33, 5, 12, 92, -58, }, /* 441 */
- { 33, 9, 12, 0, -7615, }, /* 442 */
- { 19, 5, 12, 0, 8, }, /* 443 */
- { 19, 9, 12, 0, -8, }, /* 444 */
- { 19, 5, 12, 0, 74, }, /* 445 */
- { 19, 5, 12, 0, 86, }, /* 446 */
- { 19, 5, 12, 0, 100, }, /* 447 */
- { 19, 5, 12, 0, 128, }, /* 448 */
- { 19, 5, 12, 0, 112, }, /* 449 */
- { 19, 5, 12, 0, 126, }, /* 450 */
- { 19, 8, 12, 0, -8, }, /* 451 */
- { 19, 5, 12, 0, 9, }, /* 452 */
- { 19, 9, 12, 0, -74, }, /* 453 */
- { 19, 8, 12, 0, -9, }, /* 454 */
- { 19, 5, 12, 21, -7173, }, /* 455 */
- { 19, 9, 12, 0, -86, }, /* 456 */
- { 19, 9, 12, 0, -100, }, /* 457 */
- { 19, 9, 12, 0, -112, }, /* 458 */
- { 19, 9, 12, 0, -128, }, /* 459 */
- { 19, 9, 12, 0, -126, }, /* 460 */
- { 27, 1, 3, 0, 0, }, /* 461 */
- { 27, 1, 16, 0, 0, }, /* 462 */
- { 9, 27, 2, 0, 0, }, /* 463 */
- { 9, 28, 2, 0, 0, }, /* 464 */
- { 9, 2, 2, 0, 0, }, /* 465 */
- { 9, 9, 12, 0, 0, }, /* 466 */
- { 9, 5, 12, 0, 0, }, /* 467 */
- { 19, 9, 12, 96, -7517, }, /* 468 */
- { 33, 9, 12, 100, -8383, }, /* 469 */
- { 33, 9, 12, 104, -8262, }, /* 470 */
- { 33, 9, 12, 0, 28, }, /* 471 */
- { 33, 5, 12, 0, -28, }, /* 472 */
- { 33, 14, 12, 0, 16, }, /* 473 */
- { 33, 14, 12, 0, -16, }, /* 474 */
- { 33, 14, 12, 0, 0, }, /* 475 */
- { 9, 26, 12, 0, 26, }, /* 476 */
- { 9, 26, 12, 0, -26, }, /* 477 */
- { 9, 26, 13, 0, 0, }, /* 478 */
- { 9, 26, 17, 0, 0, }, /* 479 */
- { 4, 26, 12, 0, 0, }, /* 480 */
- { 17, 9, 12, 0, 48, }, /* 481 */
- { 17, 5, 12, 0, -48, }, /* 482 */
- { 33, 9, 12, 0, -10743, }, /* 483 */
- { 33, 9, 12, 0, -3814, }, /* 484 */
- { 33, 9, 12, 0, -10727, }, /* 485 */
- { 33, 5, 12, 0, -10795, }, /* 486 */
- { 33, 5, 12, 0, -10792, }, /* 487 */
- { 33, 9, 12, 0, -10780, }, /* 488 */
- { 33, 9, 12, 0, -10749, }, /* 489 */
- { 33, 9, 12, 0, -10783, }, /* 490 */
- { 33, 9, 12, 0, -10782, }, /* 491 */
- { 33, 9, 12, 0, -10815, }, /* 492 */
- { 10, 5, 12, 0, 0, }, /* 493 */
- { 10, 26, 12, 0, 0, }, /* 494 */
- { 10, 12, 3, 0, 0, }, /* 495 */
- { 10, 21, 12, 0, 0, }, /* 496 */
- { 10, 15, 12, 0, 0, }, /* 497 */
- { 16, 5, 12, 0, -7264, }, /* 498 */
- { 58, 7, 12, 0, 0, }, /* 499 */
- { 58, 6, 12, 0, 0, }, /* 500 */
- { 58, 21, 12, 0, 0, }, /* 501 */
- { 58, 12, 3, 0, 0, }, /* 502 */
- { 22, 26, 12, 0, 0, }, /* 503 */
- { 22, 6, 12, 0, 0, }, /* 504 */
- { 22, 14, 12, 0, 0, }, /* 505 */
- { 23, 10, 3, 0, 0, }, /* 506 */
- { 26, 7, 12, 0, 0, }, /* 507 */
- { 26, 6, 12, 0, 0, }, /* 508 */
- { 29, 7, 12, 0, 0, }, /* 509 */
- { 29, 6, 12, 0, 0, }, /* 510 */
- { 3, 7, 12, 0, 0, }, /* 511 */
- { 23, 7, 12, 0, 0, }, /* 512 */
- { 23, 26, 12, 0, 0, }, /* 513 */
- { 29, 26, 12, 0, 0, }, /* 514 */
- { 22, 7, 12, 0, 0, }, /* 515 */
- { 60, 7, 12, 0, 0, }, /* 516 */
- { 60, 6, 12, 0, 0, }, /* 517 */
- { 60, 26, 12, 0, 0, }, /* 518 */
- { 85, 7, 12, 0, 0, }, /* 519 */
- { 85, 6, 12, 0, 0, }, /* 520 */
- { 85, 21, 12, 0, 0, }, /* 521 */
- { 76, 7, 12, 0, 0, }, /* 522 */
- { 76, 6, 12, 0, 0, }, /* 523 */
- { 76, 21, 12, 0, 0, }, /* 524 */
- { 76, 13, 12, 0, 0, }, /* 525 */
- { 12, 9, 12, 108, 1, }, /* 526 */
- { 12, 5, 12, 108, -35267, }, /* 527 */
- { 12, 7, 12, 0, 0, }, /* 528 */
- { 12, 21, 12, 0, 0, }, /* 529 */
- { 78, 7, 12, 0, 0, }, /* 530 */
- { 78, 14, 12, 0, 0, }, /* 531 */
- { 78, 12, 3, 0, 0, }, /* 532 */
- { 78, 21, 12, 0, 0, }, /* 533 */
- { 33, 9, 12, 0, -35332, }, /* 534 */
- { 33, 9, 12, 0, -42280, }, /* 535 */
- { 33, 9, 12, 0, -42308, }, /* 536 */
- { 33, 9, 12, 0, -42319, }, /* 537 */
- { 33, 9, 12, 0, -42315, }, /* 538 */
- { 33, 9, 12, 0, -42305, }, /* 539 */
- { 33, 9, 12, 0, -42258, }, /* 540 */
- { 33, 9, 12, 0, -42282, }, /* 541 */
- { 33, 9, 12, 0, -42261, }, /* 542 */
- { 33, 9, 12, 0, 928, }, /* 543 */
- { 48, 7, 12, 0, 0, }, /* 544 */
- { 48, 12, 3, 0, 0, }, /* 545 */
- { 48, 10, 5, 0, 0, }, /* 546 */
- { 48, 26, 12, 0, 0, }, /* 547 */
- { 64, 7, 12, 0, 0, }, /* 548 */
- { 64, 21, 12, 0, 0, }, /* 549 */
- { 74, 10, 5, 0, 0, }, /* 550 */
- { 74, 7, 12, 0, 0, }, /* 551 */
- { 74, 12, 3, 0, 0, }, /* 552 */
- { 74, 21, 12, 0, 0, }, /* 553 */
- { 74, 13, 12, 0, 0, }, /* 554 */
- { 68, 13, 12, 0, 0, }, /* 555 */
- { 68, 7, 12, 0, 0, }, /* 556 */
- { 68, 12, 3, 0, 0, }, /* 557 */
- { 68, 21, 12, 0, 0, }, /* 558 */
- { 73, 7, 12, 0, 0, }, /* 559 */
- { 73, 12, 3, 0, 0, }, /* 560 */
- { 73, 10, 5, 0, 0, }, /* 561 */
- { 73, 21, 12, 0, 0, }, /* 562 */
- { 83, 12, 3, 0, 0, }, /* 563 */
- { 83, 10, 5, 0, 0, }, /* 564 */
- { 83, 7, 12, 0, 0, }, /* 565 */
- { 83, 21, 12, 0, 0, }, /* 566 */
- { 83, 13, 12, 0, 0, }, /* 567 */
- { 38, 6, 12, 0, 0, }, /* 568 */
- { 67, 7, 12, 0, 0, }, /* 569 */
- { 67, 12, 3, 0, 0, }, /* 570 */
- { 67, 10, 5, 0, 0, }, /* 571 */
- { 67, 13, 12, 0, 0, }, /* 572 */
- { 67, 21, 12, 0, 0, }, /* 573 */
- { 91, 7, 12, 0, 0, }, /* 574 */
- { 91, 12, 3, 0, 0, }, /* 575 */
- { 91, 6, 12, 0, 0, }, /* 576 */
- { 91, 21, 12, 0, 0, }, /* 577 */
- { 86, 7, 12, 0, 0, }, /* 578 */
- { 86, 10, 5, 0, 0, }, /* 579 */
- { 86, 12, 3, 0, 0, }, /* 580 */
- { 86, 21, 12, 0, 0, }, /* 581 */
- { 86, 6, 12, 0, 0, }, /* 582 */
- { 33, 5, 12, 0, -928, }, /* 583 */
- { 8, 5, 12, 0, -38864, }, /* 584 */
- { 86, 13, 12, 0, 0, }, /* 585 */
- { 23, 7, 9, 0, 0, }, /* 586 */
- { 23, 7, 10, 0, 0, }, /* 587 */
- { 9, 4, 2, 0, 0, }, /* 588 */
- { 9, 3, 12, 0, 0, }, /* 589 */
- { 25, 25, 12, 0, 0, }, /* 590 */
- { 0, 24, 12, 0, 0, }, /* 591 */
- { 9, 6, 3, 0, 0, }, /* 592 */
- { 35, 7, 12, 0, 0, }, /* 593 */
- { 19, 14, 12, 0, 0, }, /* 594 */
- { 19, 15, 12, 0, 0, }, /* 595 */
- { 19, 26, 12, 0, 0, }, /* 596 */
- { 70, 7, 12, 0, 0, }, /* 597 */
- { 66, 7, 12, 0, 0, }, /* 598 */
- { 41, 7, 12, 0, 0, }, /* 599 */
- { 41, 15, 12, 0, 0, }, /* 600 */
- { 18, 7, 12, 0, 0, }, /* 601 */
- { 18, 14, 12, 0, 0, }, /* 602 */
- { 117, 7, 12, 0, 0, }, /* 603 */
- { 117, 12, 3, 0, 0, }, /* 604 */
- { 59, 7, 12, 0, 0, }, /* 605 */
- { 59, 21, 12, 0, 0, }, /* 606 */
- { 42, 7, 12, 0, 0, }, /* 607 */
- { 42, 21, 12, 0, 0, }, /* 608 */
- { 42, 14, 12, 0, 0, }, /* 609 */
- { 13, 9, 12, 0, 40, }, /* 610 */
- { 13, 5, 12, 0, -40, }, /* 611 */
- { 46, 7, 12, 0, 0, }, /* 612 */
- { 44, 7, 12, 0, 0, }, /* 613 */
- { 44, 13, 12, 0, 0, }, /* 614 */
- { 135, 9, 12, 0, 40, }, /* 615 */
- { 135, 5, 12, 0, -40, }, /* 616 */
- { 105, 7, 12, 0, 0, }, /* 617 */
- { 103, 7, 12, 0, 0, }, /* 618 */
- { 103, 21, 12, 0, 0, }, /* 619 */
- { 109, 7, 12, 0, 0, }, /* 620 */
- { 11, 7, 12, 0, 0, }, /* 621 */
- { 80, 7, 12, 0, 0, }, /* 622 */
- { 80, 21, 12, 0, 0, }, /* 623 */
- { 80, 15, 12, 0, 0, }, /* 624 */
- { 119, 7, 12, 0, 0, }, /* 625 */
- { 119, 26, 12, 0, 0, }, /* 626 */
- { 119, 15, 12, 0, 0, }, /* 627 */
- { 115, 7, 12, 0, 0, }, /* 628 */
- { 115, 15, 12, 0, 0, }, /* 629 */
- { 127, 7, 12, 0, 0, }, /* 630 */
- { 127, 15, 12, 0, 0, }, /* 631 */
- { 65, 7, 12, 0, 0, }, /* 632 */
- { 65, 15, 12, 0, 0, }, /* 633 */
- { 65, 21, 12, 0, 0, }, /* 634 */
- { 71, 7, 12, 0, 0, }, /* 635 */
- { 71, 21, 12, 0, 0, }, /* 636 */
- { 97, 7, 12, 0, 0, }, /* 637 */
- { 96, 7, 12, 0, 0, }, /* 638 */
- { 96, 15, 12, 0, 0, }, /* 639 */
- { 30, 7, 12, 0, 0, }, /* 640 */
- { 30, 12, 3, 0, 0, }, /* 641 */
- { 30, 15, 12, 0, 0, }, /* 642 */
- { 30, 21, 12, 0, 0, }, /* 643 */
- { 87, 7, 12, 0, 0, }, /* 644 */
- { 87, 15, 12, 0, 0, }, /* 645 */
- { 87, 21, 12, 0, 0, }, /* 646 */
- { 116, 7, 12, 0, 0, }, /* 647 */
- { 116, 15, 12, 0, 0, }, /* 648 */
- { 111, 7, 12, 0, 0, }, /* 649 */
- { 111, 26, 12, 0, 0, }, /* 650 */
- { 111, 12, 3, 0, 0, }, /* 651 */
- { 111, 15, 12, 0, 0, }, /* 652 */
- { 111, 21, 12, 0, 0, }, /* 653 */
- { 77, 7, 12, 0, 0, }, /* 654 */
- { 77, 21, 12, 0, 0, }, /* 655 */
- { 82, 7, 12, 0, 0, }, /* 656 */
- { 82, 15, 12, 0, 0, }, /* 657 */
- { 81, 7, 12, 0, 0, }, /* 658 */
- { 81, 15, 12, 0, 0, }, /* 659 */
- { 120, 7, 12, 0, 0, }, /* 660 */
- { 120, 21, 12, 0, 0, }, /* 661 */
- { 120, 15, 12, 0, 0, }, /* 662 */
- { 88, 7, 12, 0, 0, }, /* 663 */
- { 129, 9, 12, 0, 64, }, /* 664 */
- { 129, 5, 12, 0, -64, }, /* 665 */
- { 129, 15, 12, 0, 0, }, /* 666 */
- { 0, 15, 12, 0, 0, }, /* 667 */
- { 93, 10, 5, 0, 0, }, /* 668 */
- { 93, 12, 3, 0, 0, }, /* 669 */
- { 93, 7, 12, 0, 0, }, /* 670 */
- { 93, 21, 12, 0, 0, }, /* 671 */
- { 93, 15, 12, 0, 0, }, /* 672 */
- { 93, 13, 12, 0, 0, }, /* 673 */
- { 84, 12, 3, 0, 0, }, /* 674 */
- { 84, 10, 5, 0, 0, }, /* 675 */
- { 84, 7, 12, 0, 0, }, /* 676 */
- { 84, 21, 12, 0, 0, }, /* 677 */
- { 84, 1, 4, 0, 0, }, /* 678 */
- { 100, 7, 12, 0, 0, }, /* 679 */
- { 100, 13, 12, 0, 0, }, /* 680 */
- { 95, 12, 3, 0, 0, }, /* 681 */
- { 95, 7, 12, 0, 0, }, /* 682 */
- { 95, 10, 5, 0, 0, }, /* 683 */
- { 95, 13, 12, 0, 0, }, /* 684 */
- { 95, 21, 12, 0, 0, }, /* 685 */
- { 110, 7, 12, 0, 0, }, /* 686 */
- { 110, 12, 3, 0, 0, }, /* 687 */
- { 110, 21, 12, 0, 0, }, /* 688 */
- { 99, 12, 3, 0, 0, }, /* 689 */
- { 99, 10, 5, 0, 0, }, /* 690 */
- { 99, 7, 12, 0, 0, }, /* 691 */
- { 99, 7, 4, 0, 0, }, /* 692 */
- { 99, 21, 12, 0, 0, }, /* 693 */
- { 99, 13, 12, 0, 0, }, /* 694 */
- { 47, 15, 12, 0, 0, }, /* 695 */
- { 107, 7, 12, 0, 0, }, /* 696 */
- { 107, 10, 5, 0, 0, }, /* 697 */
- { 107, 12, 3, 0, 0, }, /* 698 */
- { 107, 21, 12, 0, 0, }, /* 699 */
- { 128, 7, 12, 0, 0, }, /* 700 */
- { 128, 21, 12, 0, 0, }, /* 701 */
- { 108, 7, 12, 0, 0, }, /* 702 */
- { 108, 12, 3, 0, 0, }, /* 703 */
- { 108, 10, 5, 0, 0, }, /* 704 */
- { 108, 13, 12, 0, 0, }, /* 705 */
- { 106, 12, 3, 0, 0, }, /* 706 */
- { 106, 10, 5, 0, 0, }, /* 707 */
- { 106, 7, 12, 0, 0, }, /* 708 */
- { 106, 10, 3, 0, 0, }, /* 709 */
- { 134, 7, 12, 0, 0, }, /* 710 */
- { 134, 10, 5, 0, 0, }, /* 711 */
- { 134, 12, 3, 0, 0, }, /* 712 */
- { 134, 21, 12, 0, 0, }, /* 713 */
- { 134, 13, 12, 0, 0, }, /* 714 */
- { 123, 7, 12, 0, 0, }, /* 715 */
- { 123, 10, 3, 0, 0, }, /* 716 */
- { 123, 10, 5, 0, 0, }, /* 717 */
- { 123, 12, 3, 0, 0, }, /* 718 */
- { 123, 21, 12, 0, 0, }, /* 719 */
- { 123, 13, 12, 0, 0, }, /* 720 */
- { 122, 7, 12, 0, 0, }, /* 721 */
- { 122, 10, 3, 0, 0, }, /* 722 */
- { 122, 10, 5, 0, 0, }, /* 723 */
- { 122, 12, 3, 0, 0, }, /* 724 */
- { 122, 21, 12, 0, 0, }, /* 725 */
- { 113, 7, 12, 0, 0, }, /* 726 */
- { 113, 10, 5, 0, 0, }, /* 727 */
- { 113, 12, 3, 0, 0, }, /* 728 */
- { 113, 21, 12, 0, 0, }, /* 729 */
- { 113, 13, 12, 0, 0, }, /* 730 */
- { 101, 7, 12, 0, 0, }, /* 731 */
- { 101, 12, 3, 0, 0, }, /* 732 */
- { 101, 10, 5, 0, 0, }, /* 733 */
- { 101, 13, 12, 0, 0, }, /* 734 */
- { 125, 7, 12, 0, 0, }, /* 735 */
- { 125, 12, 3, 0, 0, }, /* 736 */
- { 125, 10, 5, 0, 0, }, /* 737 */
- { 125, 13, 12, 0, 0, }, /* 738 */
- { 125, 15, 12, 0, 0, }, /* 739 */
- { 125, 21, 12, 0, 0, }, /* 740 */
- { 125, 26, 12, 0, 0, }, /* 741 */
- { 124, 9, 12, 0, 32, }, /* 742 */
- { 124, 5, 12, 0, -32, }, /* 743 */
- { 124, 13, 12, 0, 0, }, /* 744 */
- { 124, 15, 12, 0, 0, }, /* 745 */
- { 124, 7, 12, 0, 0, }, /* 746 */
- { 140, 7, 12, 0, 0, }, /* 747 */
- { 140, 12, 3, 0, 0, }, /* 748 */
- { 140, 10, 5, 0, 0, }, /* 749 */
- { 140, 7, 4, 0, 0, }, /* 750 */
- { 140, 21, 12, 0, 0, }, /* 751 */
- { 139, 7, 12, 0, 0, }, /* 752 */
- { 139, 12, 3, 0, 0, }, /* 753 */
- { 139, 10, 5, 0, 0, }, /* 754 */
- { 139, 7, 4, 0, 0, }, /* 755 */
- { 139, 21, 12, 0, 0, }, /* 756 */
- { 121, 7, 12, 0, 0, }, /* 757 */
- { 132, 7, 12, 0, 0, }, /* 758 */
- { 132, 10, 5, 0, 0, }, /* 759 */
- { 132, 12, 3, 0, 0, }, /* 760 */
- { 132, 21, 12, 0, 0, }, /* 761 */
- { 132, 13, 12, 0, 0, }, /* 762 */
- { 132, 15, 12, 0, 0, }, /* 763 */
- { 133, 21, 12, 0, 0, }, /* 764 */
- { 133, 7, 12, 0, 0, }, /* 765 */
- { 133, 12, 3, 0, 0, }, /* 766 */
- { 133, 10, 5, 0, 0, }, /* 767 */
- { 137, 7, 12, 0, 0, }, /* 768 */
- { 137, 12, 3, 0, 0, }, /* 769 */
- { 137, 7, 4, 0, 0, }, /* 770 */
- { 137, 13, 12, 0, 0, }, /* 771 */
- { 62, 7, 12, 0, 0, }, /* 772 */
- { 62, 14, 12, 0, 0, }, /* 773 */
- { 62, 21, 12, 0, 0, }, /* 774 */
- { 79, 7, 12, 0, 0, }, /* 775 */
- { 126, 7, 12, 0, 0, }, /* 776 */
- { 114, 7, 12, 0, 0, }, /* 777 */
- { 114, 13, 12, 0, 0, }, /* 778 */
- { 114, 21, 12, 0, 0, }, /* 779 */
- { 102, 7, 12, 0, 0, }, /* 780 */
- { 102, 12, 3, 0, 0, }, /* 781 */
- { 102, 21, 12, 0, 0, }, /* 782 */
- { 118, 7, 12, 0, 0, }, /* 783 */
- { 118, 12, 3, 0, 0, }, /* 784 */
- { 118, 21, 12, 0, 0, }, /* 785 */
- { 118, 26, 12, 0, 0, }, /* 786 */
- { 118, 6, 12, 0, 0, }, /* 787 */
- { 118, 13, 12, 0, 0, }, /* 788 */
- { 118, 15, 12, 0, 0, }, /* 789 */
- { 98, 7, 12, 0, 0, }, /* 790 */
- { 98, 10, 5, 0, 0, }, /* 791 */
- { 98, 12, 3, 0, 0, }, /* 792 */
- { 98, 6, 12, 0, 0, }, /* 793 */
- { 136, 6, 12, 0, 0, }, /* 794 */
- { 138, 6, 12, 0, 0, }, /* 795 */
- { 136, 7, 12, 0, 0, }, /* 796 */
- { 138, 7, 12, 0, 0, }, /* 797 */
- { 104, 7, 12, 0, 0, }, /* 798 */
- { 104, 26, 12, 0, 0, }, /* 799 */
- { 104, 12, 3, 0, 0, }, /* 800 */
- { 104, 21, 12, 0, 0, }, /* 801 */
- { 9, 10, 3, 0, 0, }, /* 802 */
- { 19, 12, 3, 0, 0, }, /* 803 */
- { 130, 26, 12, 0, 0, }, /* 804 */
- { 130, 12, 3, 0, 0, }, /* 805 */
- { 130, 21, 12, 0, 0, }, /* 806 */
- { 17, 12, 3, 0, 0, }, /* 807 */
- { 112, 7, 12, 0, 0, }, /* 808 */
- { 112, 15, 12, 0, 0, }, /* 809 */
- { 112, 12, 3, 0, 0, }, /* 810 */
- { 131, 9, 12, 0, 34, }, /* 811 */
- { 131, 5, 12, 0, -34, }, /* 812 */
- { 131, 12, 3, 0, 0, }, /* 813 */
- { 131, 13, 12, 0, 0, }, /* 814 */
- { 131, 21, 12, 0, 0, }, /* 815 */
- { 9, 26, 11, 0, 0, }, /* 816 */
- { 26, 26, 12, 0, 0, }, /* 817 */
- { 9, 24, 14, 0, 0, }, /* 818 */
- { 9, 26, 15, 0, 0, }, /* 819 */
- { 9, 1, 3, 0, 0, }, /* 820 */
+ { 1, 5, 12, 0, -48, }, /* 201 */
+ { 1, 17, 12, 0, 0, }, /* 202 */
+ { 1, 26, 12, 0, 0, }, /* 203 */
+ { 1, 23, 12, 0, 0, }, /* 204 */
+ { 25, 12, 3, 0, 0, }, /* 205 */
+ { 25, 17, 12, 0, 0, }, /* 206 */
+ { 25, 21, 12, 0, 0, }, /* 207 */
+ { 25, 7, 12, 0, 0, }, /* 208 */
+ { 0, 1, 4, 0, 0, }, /* 209 */
+ { 9, 1, 4, 0, 0, }, /* 210 */
+ { 0, 25, 12, 0, 0, }, /* 211 */
+ { 0, 21, 12, 0, 0, }, /* 212 */
+ { 0, 23, 12, 0, 0, }, /* 213 */
+ { 0, 26, 12, 0, 0, }, /* 214 */
+ { 0, 12, 3, 0, 0, }, /* 215 */
+ { 0, 1, 2, 0, 0, }, /* 216 */
+ { 0, 7, 12, 0, 0, }, /* 217 */
+ { 0, 13, 12, 0, 0, }, /* 218 */
+ { 0, 6, 12, 0, 0, }, /* 219 */
+ { 49, 21, 12, 0, 0, }, /* 220 */
+ { 49, 1, 4, 0, 0, }, /* 221 */
+ { 49, 7, 12, 0, 0, }, /* 222 */
+ { 49, 12, 3, 0, 0, }, /* 223 */
+ { 55, 7, 12, 0, 0, }, /* 224 */
+ { 55, 12, 3, 0, 0, }, /* 225 */
+ { 63, 13, 12, 0, 0, }, /* 226 */
+ { 63, 7, 12, 0, 0, }, /* 227 */
+ { 63, 12, 3, 0, 0, }, /* 228 */
+ { 63, 6, 12, 0, 0, }, /* 229 */
+ { 63, 26, 12, 0, 0, }, /* 230 */
+ { 63, 21, 12, 0, 0, }, /* 231 */
+ { 63, 23, 12, 0, 0, }, /* 232 */
+ { 89, 7, 12, 0, 0, }, /* 233 */
+ { 89, 12, 3, 0, 0, }, /* 234 */
+ { 89, 6, 12, 0, 0, }, /* 235 */
+ { 89, 21, 12, 0, 0, }, /* 236 */
+ { 94, 7, 12, 0, 0, }, /* 237 */
+ { 94, 12, 3, 0, 0, }, /* 238 */
+ { 94, 21, 12, 0, 0, }, /* 239 */
+ { 14, 12, 3, 0, 0, }, /* 240 */
+ { 14, 10, 5, 0, 0, }, /* 241 */
+ { 14, 7, 12, 0, 0, }, /* 242 */
+ { 14, 13, 12, 0, 0, }, /* 243 */
+ { 14, 21, 12, 0, 0, }, /* 244 */
+ { 14, 6, 12, 0, 0, }, /* 245 */
+ { 2, 7, 12, 0, 0, }, /* 246 */
+ { 2, 12, 3, 0, 0, }, /* 247 */
+ { 2, 10, 5, 0, 0, }, /* 248 */
+ { 2, 10, 3, 0, 0, }, /* 249 */
+ { 2, 13, 12, 0, 0, }, /* 250 */
+ { 2, 23, 12, 0, 0, }, /* 251 */
+ { 2, 15, 12, 0, 0, }, /* 252 */
+ { 2, 26, 12, 0, 0, }, /* 253 */
+ { 2, 21, 12, 0, 0, }, /* 254 */
+ { 21, 12, 3, 0, 0, }, /* 255 */
+ { 21, 10, 5, 0, 0, }, /* 256 */
+ { 21, 7, 12, 0, 0, }, /* 257 */
+ { 21, 13, 12, 0, 0, }, /* 258 */
+ { 21, 21, 12, 0, 0, }, /* 259 */
+ { 20, 12, 3, 0, 0, }, /* 260 */
+ { 20, 10, 5, 0, 0, }, /* 261 */
+ { 20, 7, 12, 0, 0, }, /* 262 */
+ { 20, 13, 12, 0, 0, }, /* 263 */
+ { 20, 21, 12, 0, 0, }, /* 264 */
+ { 20, 23, 12, 0, 0, }, /* 265 */
+ { 43, 12, 3, 0, 0, }, /* 266 */
+ { 43, 10, 5, 0, 0, }, /* 267 */
+ { 43, 7, 12, 0, 0, }, /* 268 */
+ { 43, 10, 3, 0, 0, }, /* 269 */
+ { 43, 13, 12, 0, 0, }, /* 270 */
+ { 43, 26, 12, 0, 0, }, /* 271 */
+ { 43, 15, 12, 0, 0, }, /* 272 */
+ { 53, 12, 3, 0, 0, }, /* 273 */
+ { 53, 7, 12, 0, 0, }, /* 274 */
+ { 53, 10, 3, 0, 0, }, /* 275 */
+ { 53, 10, 5, 0, 0, }, /* 276 */
+ { 53, 13, 12, 0, 0, }, /* 277 */
+ { 53, 15, 12, 0, 0, }, /* 278 */
+ { 53, 26, 12, 0, 0, }, /* 279 */
+ { 53, 23, 12, 0, 0, }, /* 280 */
+ { 54, 12, 3, 0, 0, }, /* 281 */
+ { 54, 10, 5, 0, 0, }, /* 282 */
+ { 54, 7, 12, 0, 0, }, /* 283 */
+ { 54, 13, 12, 0, 0, }, /* 284 */
+ { 54, 15, 12, 0, 0, }, /* 285 */
+ { 54, 26, 12, 0, 0, }, /* 286 */
+ { 28, 7, 12, 0, 0, }, /* 287 */
+ { 28, 12, 3, 0, 0, }, /* 288 */
+ { 28, 10, 5, 0, 0, }, /* 289 */
+ { 28, 21, 12, 0, 0, }, /* 290 */
+ { 28, 10, 3, 0, 0, }, /* 291 */
+ { 28, 13, 12, 0, 0, }, /* 292 */
+ { 36, 12, 3, 0, 0, }, /* 293 */
+ { 36, 10, 5, 0, 0, }, /* 294 */
+ { 36, 7, 12, 0, 0, }, /* 295 */
+ { 36, 10, 3, 0, 0, }, /* 296 */
+ { 36, 7, 4, 0, 0, }, /* 297 */
+ { 36, 26, 12, 0, 0, }, /* 298 */
+ { 36, 15, 12, 0, 0, }, /* 299 */
+ { 36, 13, 12, 0, 0, }, /* 300 */
+ { 47, 10, 5, 0, 0, }, /* 301 */
+ { 47, 7, 12, 0, 0, }, /* 302 */
+ { 47, 12, 3, 0, 0, }, /* 303 */
+ { 47, 10, 3, 0, 0, }, /* 304 */
+ { 47, 13, 12, 0, 0, }, /* 305 */
+ { 47, 21, 12, 0, 0, }, /* 306 */
+ { 56, 7, 12, 0, 0, }, /* 307 */
+ { 56, 12, 3, 0, 0, }, /* 308 */
+ { 56, 7, 5, 0, 0, }, /* 309 */
+ { 56, 6, 12, 0, 0, }, /* 310 */
+ { 56, 21, 12, 0, 0, }, /* 311 */
+ { 56, 13, 12, 0, 0, }, /* 312 */
+ { 32, 7, 12, 0, 0, }, /* 313 */
+ { 32, 12, 3, 0, 0, }, /* 314 */
+ { 32, 7, 5, 0, 0, }, /* 315 */
+ { 32, 6, 12, 0, 0, }, /* 316 */
+ { 32, 13, 12, 0, 0, }, /* 317 */
+ { 57, 7, 12, 0, 0, }, /* 318 */
+ { 57, 26, 12, 0, 0, }, /* 319 */
+ { 57, 21, 12, 0, 0, }, /* 320 */
+ { 57, 12, 3, 0, 0, }, /* 321 */
+ { 57, 13, 12, 0, 0, }, /* 322 */
+ { 57, 15, 12, 0, 0, }, /* 323 */
+ { 57, 22, 12, 0, 0, }, /* 324 */
+ { 57, 18, 12, 0, 0, }, /* 325 */
+ { 57, 10, 5, 0, 0, }, /* 326 */
+ { 38, 7, 12, 0, 0, }, /* 327 */
+ { 38, 10, 12, 0, 0, }, /* 328 */
+ { 38, 12, 3, 0, 0, }, /* 329 */
+ { 38, 10, 5, 0, 0, }, /* 330 */
+ { 38, 13, 12, 0, 0, }, /* 331 */
+ { 38, 21, 12, 0, 0, }, /* 332 */
+ { 38, 26, 12, 0, 0, }, /* 333 */
+ { 16, 9, 12, 0, 7264, }, /* 334 */
+ { 16, 5, 12, 0, 3008, }, /* 335 */
+ { 16, 6, 12, 0, 0, }, /* 336 */
+ { 23, 7, 6, 0, 0, }, /* 337 */
+ { 23, 7, 7, 0, 0, }, /* 338 */
+ { 23, 7, 8, 0, 0, }, /* 339 */
+ { 15, 7, 12, 0, 0, }, /* 340 */
+ { 15, 12, 3, 0, 0, }, /* 341 */
+ { 15, 21, 12, 0, 0, }, /* 342 */
+ { 15, 15, 12, 0, 0, }, /* 343 */
+ { 15, 26, 12, 0, 0, }, /* 344 */
+ { 8, 9, 12, 0, 38864, }, /* 345 */
+ { 8, 9, 12, 0, 8, }, /* 346 */
+ { 8, 5, 12, 0, -8, }, /* 347 */
+ { 7, 17, 12, 0, 0, }, /* 348 */
+ { 7, 7, 12, 0, 0, }, /* 349 */
+ { 7, 21, 12, 0, 0, }, /* 350 */
+ { 40, 29, 12, 0, 0, }, /* 351 */
+ { 40, 7, 12, 0, 0, }, /* 352 */
+ { 40, 22, 12, 0, 0, }, /* 353 */
+ { 40, 18, 12, 0, 0, }, /* 354 */
+ { 45, 7, 12, 0, 0, }, /* 355 */
+ { 45, 14, 12, 0, 0, }, /* 356 */
+ { 50, 7, 12, 0, 0, }, /* 357 */
+ { 50, 12, 3, 0, 0, }, /* 358 */
+ { 24, 7, 12, 0, 0, }, /* 359 */
+ { 24, 12, 3, 0, 0, }, /* 360 */
+ { 6, 7, 12, 0, 0, }, /* 361 */
+ { 6, 12, 3, 0, 0, }, /* 362 */
+ { 51, 7, 12, 0, 0, }, /* 363 */
+ { 51, 12, 3, 0, 0, }, /* 364 */
+ { 31, 7, 12, 0, 0, }, /* 365 */
+ { 31, 12, 3, 0, 0, }, /* 366 */
+ { 31, 10, 5, 0, 0, }, /* 367 */
+ { 31, 21, 12, 0, 0, }, /* 368 */
+ { 31, 6, 12, 0, 0, }, /* 369 */
+ { 31, 23, 12, 0, 0, }, /* 370 */
+ { 31, 13, 12, 0, 0, }, /* 371 */
+ { 31, 15, 12, 0, 0, }, /* 372 */
+ { 37, 21, 12, 0, 0, }, /* 373 */
+ { 37, 17, 12, 0, 0, }, /* 374 */
+ { 37, 12, 3, 0, 0, }, /* 375 */
+ { 37, 1, 2, 0, 0, }, /* 376 */
+ { 37, 13, 12, 0, 0, }, /* 377 */
+ { 37, 7, 12, 0, 0, }, /* 378 */
+ { 37, 6, 12, 0, 0, }, /* 379 */
+ { 34, 7, 12, 0, 0, }, /* 380 */
+ { 34, 12, 3, 0, 0, }, /* 381 */
+ { 34, 10, 5, 0, 0, }, /* 382 */
+ { 34, 26, 12, 0, 0, }, /* 383 */
+ { 34, 21, 12, 0, 0, }, /* 384 */
+ { 34, 13, 12, 0, 0, }, /* 385 */
+ { 52, 7, 12, 0, 0, }, /* 386 */
+ { 39, 7, 12, 0, 0, }, /* 387 */
+ { 39, 13, 12, 0, 0, }, /* 388 */
+ { 39, 15, 12, 0, 0, }, /* 389 */
+ { 39, 26, 12, 0, 0, }, /* 390 */
+ { 31, 26, 12, 0, 0, }, /* 391 */
+ { 5, 7, 12, 0, 0, }, /* 392 */
+ { 5, 12, 3, 0, 0, }, /* 393 */
+ { 5, 10, 5, 0, 0, }, /* 394 */
+ { 5, 21, 12, 0, 0, }, /* 395 */
+ { 90, 7, 12, 0, 0, }, /* 396 */
+ { 90, 10, 5, 0, 0, }, /* 397 */
+ { 90, 12, 3, 0, 0, }, /* 398 */
+ { 90, 10, 12, 0, 0, }, /* 399 */
+ { 90, 13, 12, 0, 0, }, /* 400 */
+ { 90, 21, 12, 0, 0, }, /* 401 */
+ { 90, 6, 12, 0, 0, }, /* 402 */
+ { 27, 11, 3, 0, 0, }, /* 403 */
+ { 61, 12, 3, 0, 0, }, /* 404 */
+ { 61, 10, 5, 0, 0, }, /* 405 */
+ { 61, 7, 12, 0, 0, }, /* 406 */
+ { 61, 13, 12, 0, 0, }, /* 407 */
+ { 61, 21, 12, 0, 0, }, /* 408 */
+ { 61, 26, 12, 0, 0, }, /* 409 */
+ { 75, 12, 3, 0, 0, }, /* 410 */
+ { 75, 10, 5, 0, 0, }, /* 411 */
+ { 75, 7, 12, 0, 0, }, /* 412 */
+ { 75, 13, 12, 0, 0, }, /* 413 */
+ { 92, 7, 12, 0, 0, }, /* 414 */
+ { 92, 12, 3, 0, 0, }, /* 415 */
+ { 92, 10, 5, 0, 0, }, /* 416 */
+ { 92, 21, 12, 0, 0, }, /* 417 */
+ { 69, 7, 12, 0, 0, }, /* 418 */
+ { 69, 10, 5, 0, 0, }, /* 419 */
+ { 69, 12, 3, 0, 0, }, /* 420 */
+ { 69, 21, 12, 0, 0, }, /* 421 */
+ { 69, 13, 12, 0, 0, }, /* 422 */
+ { 72, 13, 12, 0, 0, }, /* 423 */
+ { 72, 7, 12, 0, 0, }, /* 424 */
+ { 72, 6, 12, 0, 0, }, /* 425 */
+ { 72, 21, 12, 0, 0, }, /* 426 */
+ { 12, 5, 12, 63, -6222, }, /* 427 */
+ { 12, 5, 12, 67, -6221, }, /* 428 */
+ { 12, 5, 12, 71, -6212, }, /* 429 */
+ { 12, 5, 12, 75, -6210, }, /* 430 */
+ { 12, 5, 12, 79, -6210, }, /* 431 */
+ { 12, 5, 12, 79, -6211, }, /* 432 */
+ { 12, 5, 12, 84, -6204, }, /* 433 */
+ { 12, 5, 12, 88, -6180, }, /* 434 */
+ { 12, 5, 12, 108, 35267, }, /* 435 */
+ { 16, 9, 12, 0, -3008, }, /* 436 */
+ { 75, 21, 12, 0, 0, }, /* 437 */
+ { 9, 10, 5, 0, 0, }, /* 438 */
+ { 9, 7, 12, 0, 0, }, /* 439 */
+ { 12, 5, 12, 0, 0, }, /* 440 */
+ { 12, 6, 12, 0, 0, }, /* 441 */
+ { 33, 5, 12, 0, 35332, }, /* 442 */
+ { 33, 5, 12, 0, 3814, }, /* 443 */
+ { 33, 9, 12, 92, 1, }, /* 444 */
+ { 33, 5, 12, 92, -1, }, /* 445 */
+ { 33, 5, 12, 92, -58, }, /* 446 */
+ { 33, 9, 12, 0, -7615, }, /* 447 */
+ { 19, 5, 12, 0, 8, }, /* 448 */
+ { 19, 9, 12, 0, -8, }, /* 449 */
+ { 19, 5, 12, 0, 74, }, /* 450 */
+ { 19, 5, 12, 0, 86, }, /* 451 */
+ { 19, 5, 12, 0, 100, }, /* 452 */
+ { 19, 5, 12, 0, 128, }, /* 453 */
+ { 19, 5, 12, 0, 112, }, /* 454 */
+ { 19, 5, 12, 0, 126, }, /* 455 */
+ { 19, 8, 12, 0, -8, }, /* 456 */
+ { 19, 5, 12, 0, 9, }, /* 457 */
+ { 19, 9, 12, 0, -74, }, /* 458 */
+ { 19, 8, 12, 0, -9, }, /* 459 */
+ { 19, 5, 12, 21, -7173, }, /* 460 */
+ { 19, 9, 12, 0, -86, }, /* 461 */
+ { 19, 9, 12, 0, -100, }, /* 462 */
+ { 19, 9, 12, 0, -112, }, /* 463 */
+ { 19, 9, 12, 0, -128, }, /* 464 */
+ { 19, 9, 12, 0, -126, }, /* 465 */
+ { 27, 1, 3, 0, 0, }, /* 466 */
+ { 27, 1, 13, 0, 0, }, /* 467 */
+ { 9, 27, 2, 0, 0, }, /* 468 */
+ { 9, 28, 2, 0, 0, }, /* 469 */
+ { 9, 21, 14, 0, 0, }, /* 470 */
+ { 9, 2, 2, 0, 0, }, /* 471 */
+ { 9, 9, 12, 0, 0, }, /* 472 */
+ { 9, 5, 12, 0, 0, }, /* 473 */
+ { 19, 9, 12, 96, -7517, }, /* 474 */
+ { 33, 9, 12, 100, -8383, }, /* 475 */
+ { 33, 9, 12, 104, -8262, }, /* 476 */
+ { 33, 9, 12, 0, 28, }, /* 477 */
+ { 9, 5, 14, 0, 0, }, /* 478 */
+ { 33, 5, 12, 0, -28, }, /* 479 */
+ { 33, 14, 12, 0, 16, }, /* 480 */
+ { 33, 14, 12, 0, -16, }, /* 481 */
+ { 33, 14, 12, 0, 0, }, /* 482 */
+ { 9, 25, 14, 0, 0, }, /* 483 */
+ { 9, 26, 12, 0, 26, }, /* 484 */
+ { 9, 26, 14, 0, 26, }, /* 485 */
+ { 9, 26, 12, 0, -26, }, /* 486 */
+ { 4, 26, 12, 0, 0, }, /* 487 */
+ { 17, 9, 12, 0, 48, }, /* 488 */
+ { 17, 5, 12, 0, -48, }, /* 489 */
+ { 33, 9, 12, 0, -10743, }, /* 490 */
+ { 33, 9, 12, 0, -3814, }, /* 491 */
+ { 33, 9, 12, 0, -10727, }, /* 492 */
+ { 33, 5, 12, 0, -10795, }, /* 493 */
+ { 33, 5, 12, 0, -10792, }, /* 494 */
+ { 33, 9, 12, 0, -10780, }, /* 495 */
+ { 33, 9, 12, 0, -10749, }, /* 496 */
+ { 33, 9, 12, 0, -10783, }, /* 497 */
+ { 33, 9, 12, 0, -10782, }, /* 498 */
+ { 33, 9, 12, 0, -10815, }, /* 499 */
+ { 10, 5, 12, 0, 0, }, /* 500 */
+ { 10, 26, 12, 0, 0, }, /* 501 */
+ { 10, 12, 3, 0, 0, }, /* 502 */
+ { 10, 21, 12, 0, 0, }, /* 503 */
+ { 10, 15, 12, 0, 0, }, /* 504 */
+ { 16, 5, 12, 0, -7264, }, /* 505 */
+ { 58, 7, 12, 0, 0, }, /* 506 */
+ { 58, 6, 12, 0, 0, }, /* 507 */
+ { 58, 21, 12, 0, 0, }, /* 508 */
+ { 58, 12, 3, 0, 0, }, /* 509 */
+ { 22, 26, 12, 0, 0, }, /* 510 */
+ { 22, 6, 12, 0, 0, }, /* 511 */
+ { 22, 14, 12, 0, 0, }, /* 512 */
+ { 23, 10, 3, 0, 0, }, /* 513 */
+ { 9, 17, 14, 0, 0, }, /* 514 */
+ { 26, 7, 12, 0, 0, }, /* 515 */
+ { 26, 6, 12, 0, 0, }, /* 516 */
+ { 29, 7, 12, 0, 0, }, /* 517 */
+ { 29, 6, 12, 0, 0, }, /* 518 */
+ { 3, 7, 12, 0, 0, }, /* 519 */
+ { 23, 7, 12, 0, 0, }, /* 520 */
+ { 23, 26, 12, 0, 0, }, /* 521 */
+ { 29, 26, 12, 0, 0, }, /* 522 */
+ { 22, 7, 12, 0, 0, }, /* 523 */
+ { 60, 7, 12, 0, 0, }, /* 524 */
+ { 60, 6, 12, 0, 0, }, /* 525 */
+ { 60, 26, 12, 0, 0, }, /* 526 */
+ { 85, 7, 12, 0, 0, }, /* 527 */
+ { 85, 6, 12, 0, 0, }, /* 528 */
+ { 85, 21, 12, 0, 0, }, /* 529 */
+ { 76, 7, 12, 0, 0, }, /* 530 */
+ { 76, 6, 12, 0, 0, }, /* 531 */
+ { 76, 21, 12, 0, 0, }, /* 532 */
+ { 76, 13, 12, 0, 0, }, /* 533 */
+ { 12, 9, 12, 108, 1, }, /* 534 */
+ { 12, 5, 12, 108, -35267, }, /* 535 */
+ { 12, 7, 12, 0, 0, }, /* 536 */
+ { 12, 21, 12, 0, 0, }, /* 537 */
+ { 78, 7, 12, 0, 0, }, /* 538 */
+ { 78, 14, 12, 0, 0, }, /* 539 */
+ { 78, 12, 3, 0, 0, }, /* 540 */
+ { 78, 21, 12, 0, 0, }, /* 541 */
+ { 33, 9, 12, 0, -35332, }, /* 542 */
+ { 33, 9, 12, 0, -42280, }, /* 543 */
+ { 33, 9, 12, 0, -42308, }, /* 544 */
+ { 33, 9, 12, 0, -42319, }, /* 545 */
+ { 33, 9, 12, 0, -42315, }, /* 546 */
+ { 33, 9, 12, 0, -42305, }, /* 547 */
+ { 33, 9, 12, 0, -42258, }, /* 548 */
+ { 33, 9, 12, 0, -42282, }, /* 549 */
+ { 33, 9, 12, 0, -42261, }, /* 550 */
+ { 33, 9, 12, 0, 928, }, /* 551 */
+ { 48, 7, 12, 0, 0, }, /* 552 */
+ { 48, 12, 3, 0, 0, }, /* 553 */
+ { 48, 10, 5, 0, 0, }, /* 554 */
+ { 48, 26, 12, 0, 0, }, /* 555 */
+ { 64, 7, 12, 0, 0, }, /* 556 */
+ { 64, 21, 12, 0, 0, }, /* 557 */
+ { 74, 10, 5, 0, 0, }, /* 558 */
+ { 74, 7, 12, 0, 0, }, /* 559 */
+ { 74, 12, 3, 0, 0, }, /* 560 */
+ { 74, 21, 12, 0, 0, }, /* 561 */
+ { 74, 13, 12, 0, 0, }, /* 562 */
+ { 68, 13, 12, 0, 0, }, /* 563 */
+ { 68, 7, 12, 0, 0, }, /* 564 */
+ { 68, 12, 3, 0, 0, }, /* 565 */
+ { 68, 21, 12, 0, 0, }, /* 566 */
+ { 73, 7, 12, 0, 0, }, /* 567 */
+ { 73, 12, 3, 0, 0, }, /* 568 */
+ { 73, 10, 5, 0, 0, }, /* 569 */
+ { 73, 21, 12, 0, 0, }, /* 570 */
+ { 83, 12, 3, 0, 0, }, /* 571 */
+ { 83, 10, 5, 0, 0, }, /* 572 */
+ { 83, 7, 12, 0, 0, }, /* 573 */
+ { 83, 21, 12, 0, 0, }, /* 574 */
+ { 83, 13, 12, 0, 0, }, /* 575 */
+ { 38, 6, 12, 0, 0, }, /* 576 */
+ { 67, 7, 12, 0, 0, }, /* 577 */
+ { 67, 12, 3, 0, 0, }, /* 578 */
+ { 67, 10, 5, 0, 0, }, /* 579 */
+ { 67, 13, 12, 0, 0, }, /* 580 */
+ { 67, 21, 12, 0, 0, }, /* 581 */
+ { 91, 7, 12, 0, 0, }, /* 582 */
+ { 91, 12, 3, 0, 0, }, /* 583 */
+ { 91, 6, 12, 0, 0, }, /* 584 */
+ { 91, 21, 12, 0, 0, }, /* 585 */
+ { 86, 7, 12, 0, 0, }, /* 586 */
+ { 86, 10, 5, 0, 0, }, /* 587 */
+ { 86, 12, 3, 0, 0, }, /* 588 */
+ { 86, 21, 12, 0, 0, }, /* 589 */
+ { 86, 6, 12, 0, 0, }, /* 590 */
+ { 33, 5, 12, 0, -928, }, /* 591 */
+ { 8, 5, 12, 0, -38864, }, /* 592 */
+ { 86, 13, 12, 0, 0, }, /* 593 */
+ { 23, 7, 9, 0, 0, }, /* 594 */
+ { 23, 7, 10, 0, 0, }, /* 595 */
+ { 9, 4, 2, 0, 0, }, /* 596 */
+ { 9, 3, 12, 0, 0, }, /* 597 */
+ { 25, 25, 12, 0, 0, }, /* 598 */
+ { 0, 24, 12, 0, 0, }, /* 599 */
+ { 9, 6, 3, 0, 0, }, /* 600 */
+ { 35, 7, 12, 0, 0, }, /* 601 */
+ { 19, 14, 12, 0, 0, }, /* 602 */
+ { 19, 15, 12, 0, 0, }, /* 603 */
+ { 19, 26, 12, 0, 0, }, /* 604 */
+ { 70, 7, 12, 0, 0, }, /* 605 */
+ { 66, 7, 12, 0, 0, }, /* 606 */
+ { 41, 7, 12, 0, 0, }, /* 607 */
+ { 41, 15, 12, 0, 0, }, /* 608 */
+ { 18, 7, 12, 0, 0, }, /* 609 */
+ { 18, 14, 12, 0, 0, }, /* 610 */
+ { 117, 7, 12, 0, 0, }, /* 611 */
+ { 117, 12, 3, 0, 0, }, /* 612 */
+ { 59, 7, 12, 0, 0, }, /* 613 */
+ { 59, 21, 12, 0, 0, }, /* 614 */
+ { 42, 7, 12, 0, 0, }, /* 615 */
+ { 42, 21, 12, 0, 0, }, /* 616 */
+ { 42, 14, 12, 0, 0, }, /* 617 */
+ { 13, 9, 12, 0, 40, }, /* 618 */
+ { 13, 5, 12, 0, -40, }, /* 619 */
+ { 46, 7, 12, 0, 0, }, /* 620 */
+ { 44, 7, 12, 0, 0, }, /* 621 */
+ { 44, 13, 12, 0, 0, }, /* 622 */
+ { 135, 9, 12, 0, 40, }, /* 623 */
+ { 135, 5, 12, 0, -40, }, /* 624 */
+ { 105, 7, 12, 0, 0, }, /* 625 */
+ { 103, 7, 12, 0, 0, }, /* 626 */
+ { 103, 21, 12, 0, 0, }, /* 627 */
+ { 109, 7, 12, 0, 0, }, /* 628 */
+ { 11, 7, 12, 0, 0, }, /* 629 */
+ { 80, 7, 12, 0, 0, }, /* 630 */
+ { 80, 21, 12, 0, 0, }, /* 631 */
+ { 80, 15, 12, 0, 0, }, /* 632 */
+ { 119, 7, 12, 0, 0, }, /* 633 */
+ { 119, 26, 12, 0, 0, }, /* 634 */
+ { 119, 15, 12, 0, 0, }, /* 635 */
+ { 115, 7, 12, 0, 0, }, /* 636 */
+ { 115, 15, 12, 0, 0, }, /* 637 */
+ { 127, 7, 12, 0, 0, }, /* 638 */
+ { 127, 15, 12, 0, 0, }, /* 639 */
+ { 65, 7, 12, 0, 0, }, /* 640 */
+ { 65, 15, 12, 0, 0, }, /* 641 */
+ { 65, 21, 12, 0, 0, }, /* 642 */
+ { 71, 7, 12, 0, 0, }, /* 643 */
+ { 71, 21, 12, 0, 0, }, /* 644 */
+ { 97, 7, 12, 0, 0, }, /* 645 */
+ { 96, 7, 12, 0, 0, }, /* 646 */
+ { 96, 15, 12, 0, 0, }, /* 647 */
+ { 30, 7, 12, 0, 0, }, /* 648 */
+ { 30, 12, 3, 0, 0, }, /* 649 */
+ { 30, 15, 12, 0, 0, }, /* 650 */
+ { 30, 21, 12, 0, 0, }, /* 651 */
+ { 87, 7, 12, 0, 0, }, /* 652 */
+ { 87, 15, 12, 0, 0, }, /* 653 */
+ { 87, 21, 12, 0, 0, }, /* 654 */
+ { 116, 7, 12, 0, 0, }, /* 655 */
+ { 116, 15, 12, 0, 0, }, /* 656 */
+ { 111, 7, 12, 0, 0, }, /* 657 */
+ { 111, 26, 12, 0, 0, }, /* 658 */
+ { 111, 12, 3, 0, 0, }, /* 659 */
+ { 111, 15, 12, 0, 0, }, /* 660 */
+ { 111, 21, 12, 0, 0, }, /* 661 */
+ { 77, 7, 12, 0, 0, }, /* 662 */
+ { 77, 21, 12, 0, 0, }, /* 663 */
+ { 82, 7, 12, 0, 0, }, /* 664 */
+ { 82, 15, 12, 0, 0, }, /* 665 */
+ { 81, 7, 12, 0, 0, }, /* 666 */
+ { 81, 15, 12, 0, 0, }, /* 667 */
+ { 120, 7, 12, 0, 0, }, /* 668 */
+ { 120, 21, 12, 0, 0, }, /* 669 */
+ { 120, 15, 12, 0, 0, }, /* 670 */
+ { 88, 7, 12, 0, 0, }, /* 671 */
+ { 129, 9, 12, 0, 64, }, /* 672 */
+ { 129, 5, 12, 0, -64, }, /* 673 */
+ { 129, 15, 12, 0, 0, }, /* 674 */
+ { 143, 7, 12, 0, 0, }, /* 675 */
+ { 143, 12, 3, 0, 0, }, /* 676 */
+ { 143, 13, 12, 0, 0, }, /* 677 */
+ { 0, 15, 12, 0, 0, }, /* 678 */
+ { 146, 7, 12, 0, 0, }, /* 679 */
+ { 146, 15, 12, 0, 0, }, /* 680 */
+ { 147, 7, 12, 0, 0, }, /* 681 */
+ { 147, 12, 3, 0, 0, }, /* 682 */
+ { 147, 15, 12, 0, 0, }, /* 683 */
+ { 147, 21, 12, 0, 0, }, /* 684 */
+ { 93, 10, 5, 0, 0, }, /* 685 */
+ { 93, 12, 3, 0, 0, }, /* 686 */
+ { 93, 7, 12, 0, 0, }, /* 687 */
+ { 93, 21, 12, 0, 0, }, /* 688 */
+ { 93, 15, 12, 0, 0, }, /* 689 */
+ { 93, 13, 12, 0, 0, }, /* 690 */
+ { 84, 12, 3, 0, 0, }, /* 691 */
+ { 84, 10, 5, 0, 0, }, /* 692 */
+ { 84, 7, 12, 0, 0, }, /* 693 */
+ { 84, 21, 12, 0, 0, }, /* 694 */
+ { 84, 1, 4, 0, 0, }, /* 695 */
+ { 100, 7, 12, 0, 0, }, /* 696 */
+ { 100, 13, 12, 0, 0, }, /* 697 */
+ { 95, 12, 3, 0, 0, }, /* 698 */
+ { 95, 7, 12, 0, 0, }, /* 699 */
+ { 95, 10, 5, 0, 0, }, /* 700 */
+ { 95, 13, 12, 0, 0, }, /* 701 */
+ { 95, 21, 12, 0, 0, }, /* 702 */
+ { 110, 7, 12, 0, 0, }, /* 703 */
+ { 110, 12, 3, 0, 0, }, /* 704 */
+ { 110, 21, 12, 0, 0, }, /* 705 */
+ { 99, 12, 3, 0, 0, }, /* 706 */
+ { 99, 10, 5, 0, 0, }, /* 707 */
+ { 99, 7, 12, 0, 0, }, /* 708 */
+ { 99, 7, 4, 0, 0, }, /* 709 */
+ { 99, 21, 12, 0, 0, }, /* 710 */
+ { 99, 13, 12, 0, 0, }, /* 711 */
+ { 47, 15, 12, 0, 0, }, /* 712 */
+ { 107, 7, 12, 0, 0, }, /* 713 */
+ { 107, 10, 5, 0, 0, }, /* 714 */
+ { 107, 12, 3, 0, 0, }, /* 715 */
+ { 107, 21, 12, 0, 0, }, /* 716 */
+ { 128, 7, 12, 0, 0, }, /* 717 */
+ { 128, 21, 12, 0, 0, }, /* 718 */
+ { 108, 7, 12, 0, 0, }, /* 719 */
+ { 108, 12, 3, 0, 0, }, /* 720 */
+ { 108, 10, 5, 0, 0, }, /* 721 */
+ { 108, 13, 12, 0, 0, }, /* 722 */
+ { 106, 12, 3, 0, 0, }, /* 723 */
+ { 106, 10, 5, 0, 0, }, /* 724 */
+ { 106, 7, 12, 0, 0, }, /* 725 */
+ { 106, 10, 3, 0, 0, }, /* 726 */
+ { 134, 7, 12, 0, 0, }, /* 727 */
+ { 134, 10, 5, 0, 0, }, /* 728 */
+ { 134, 12, 3, 0, 0, }, /* 729 */
+ { 134, 21, 12, 0, 0, }, /* 730 */
+ { 134, 13, 12, 0, 0, }, /* 731 */
+ { 123, 7, 12, 0, 0, }, /* 732 */
+ { 123, 10, 3, 0, 0, }, /* 733 */
+ { 123, 10, 5, 0, 0, }, /* 734 */
+ { 123, 12, 3, 0, 0, }, /* 735 */
+ { 123, 21, 12, 0, 0, }, /* 736 */
+ { 123, 13, 12, 0, 0, }, /* 737 */
+ { 122, 7, 12, 0, 0, }, /* 738 */
+ { 122, 10, 3, 0, 0, }, /* 739 */
+ { 122, 10, 5, 0, 0, }, /* 740 */
+ { 122, 12, 3, 0, 0, }, /* 741 */
+ { 122, 21, 12, 0, 0, }, /* 742 */
+ { 113, 7, 12, 0, 0, }, /* 743 */
+ { 113, 10, 5, 0, 0, }, /* 744 */
+ { 113, 12, 3, 0, 0, }, /* 745 */
+ { 113, 21, 12, 0, 0, }, /* 746 */
+ { 113, 13, 12, 0, 0, }, /* 747 */
+ { 101, 7, 12, 0, 0, }, /* 748 */
+ { 101, 12, 3, 0, 0, }, /* 749 */
+ { 101, 10, 5, 0, 0, }, /* 750 */
+ { 101, 13, 12, 0, 0, }, /* 751 */
+ { 125, 7, 12, 0, 0, }, /* 752 */
+ { 125, 12, 3, 0, 0, }, /* 753 */
+ { 125, 10, 5, 0, 0, }, /* 754 */
+ { 125, 13, 12, 0, 0, }, /* 755 */
+ { 125, 15, 12, 0, 0, }, /* 756 */
+ { 125, 21, 12, 0, 0, }, /* 757 */
+ { 125, 26, 12, 0, 0, }, /* 758 */
+ { 141, 7, 12, 0, 0, }, /* 759 */
+ { 141, 10, 5, 0, 0, }, /* 760 */
+ { 141, 12, 3, 0, 0, }, /* 761 */
+ { 141, 21, 12, 0, 0, }, /* 762 */
+ { 124, 9, 12, 0, 32, }, /* 763 */
+ { 124, 5, 12, 0, -32, }, /* 764 */
+ { 124, 13, 12, 0, 0, }, /* 765 */
+ { 124, 15, 12, 0, 0, }, /* 766 */
+ { 124, 7, 12, 0, 0, }, /* 767 */
+ { 140, 7, 12, 0, 0, }, /* 768 */
+ { 140, 12, 3, 0, 0, }, /* 769 */
+ { 140, 10, 5, 0, 0, }, /* 770 */
+ { 140, 7, 4, 0, 0, }, /* 771 */
+ { 140, 21, 12, 0, 0, }, /* 772 */
+ { 139, 7, 12, 0, 0, }, /* 773 */
+ { 139, 12, 3, 0, 0, }, /* 774 */
+ { 139, 10, 5, 0, 0, }, /* 775 */
+ { 139, 7, 4, 0, 0, }, /* 776 */
+ { 139, 21, 12, 0, 0, }, /* 777 */
+ { 121, 7, 12, 0, 0, }, /* 778 */
+ { 132, 7, 12, 0, 0, }, /* 779 */
+ { 132, 10, 5, 0, 0, }, /* 780 */
+ { 132, 12, 3, 0, 0, }, /* 781 */
+ { 132, 21, 12, 0, 0, }, /* 782 */
+ { 132, 13, 12, 0, 0, }, /* 783 */
+ { 132, 15, 12, 0, 0, }, /* 784 */
+ { 133, 21, 12, 0, 0, }, /* 785 */
+ { 133, 7, 12, 0, 0, }, /* 786 */
+ { 133, 12, 3, 0, 0, }, /* 787 */
+ { 133, 10, 5, 0, 0, }, /* 788 */
+ { 137, 7, 12, 0, 0, }, /* 789 */
+ { 137, 12, 3, 0, 0, }, /* 790 */
+ { 137, 7, 4, 0, 0, }, /* 791 */
+ { 137, 13, 12, 0, 0, }, /* 792 */
+ { 142, 7, 12, 0, 0, }, /* 793 */
+ { 142, 10, 5, 0, 0, }, /* 794 */
+ { 142, 12, 3, 0, 0, }, /* 795 */
+ { 142, 13, 12, 0, 0, }, /* 796 */
+ { 144, 7, 12, 0, 0, }, /* 797 */
+ { 144, 12, 3, 0, 0, }, /* 798 */
+ { 144, 10, 5, 0, 0, }, /* 799 */
+ { 144, 21, 12, 0, 0, }, /* 800 */
+ { 62, 7, 12, 0, 0, }, /* 801 */
+ { 62, 14, 12, 0, 0, }, /* 802 */
+ { 62, 21, 12, 0, 0, }, /* 803 */
+ { 79, 7, 12, 0, 0, }, /* 804 */
+ { 126, 7, 12, 0, 0, }, /* 805 */
+ { 114, 7, 12, 0, 0, }, /* 806 */
+ { 114, 13, 12, 0, 0, }, /* 807 */
+ { 114, 21, 12, 0, 0, }, /* 808 */
+ { 102, 7, 12, 0, 0, }, /* 809 */
+ { 102, 12, 3, 0, 0, }, /* 810 */
+ { 102, 21, 12, 0, 0, }, /* 811 */
+ { 118, 7, 12, 0, 0, }, /* 812 */
+ { 118, 12, 3, 0, 0, }, /* 813 */
+ { 118, 21, 12, 0, 0, }, /* 814 */
+ { 118, 26, 12, 0, 0, }, /* 815 */
+ { 118, 6, 12, 0, 0, }, /* 816 */
+ { 118, 13, 12, 0, 0, }, /* 817 */
+ { 118, 15, 12, 0, 0, }, /* 818 */
+ { 145, 9, 12, 0, 32, }, /* 819 */
+ { 145, 5, 12, 0, -32, }, /* 820 */
+ { 145, 15, 12, 0, 0, }, /* 821 */
+ { 145, 21, 12, 0, 0, }, /* 822 */
+ { 98, 7, 12, 0, 0, }, /* 823 */
+ { 98, 10, 5, 0, 0, }, /* 824 */
+ { 98, 12, 3, 0, 0, }, /* 825 */
+ { 98, 6, 12, 0, 0, }, /* 826 */
+ { 136, 6, 12, 0, 0, }, /* 827 */
+ { 138, 6, 12, 0, 0, }, /* 828 */
+ { 136, 7, 12, 0, 0, }, /* 829 */
+ { 138, 7, 12, 0, 0, }, /* 830 */
+ { 104, 7, 12, 0, 0, }, /* 831 */
+ { 104, 26, 12, 0, 0, }, /* 832 */
+ { 104, 12, 3, 0, 0, }, /* 833 */
+ { 104, 21, 12, 0, 0, }, /* 834 */
+ { 9, 10, 3, 0, 0, }, /* 835 */
+ { 19, 12, 3, 0, 0, }, /* 836 */
+ { 130, 26, 12, 0, 0, }, /* 837 */
+ { 130, 12, 3, 0, 0, }, /* 838 */
+ { 130, 21, 12, 0, 0, }, /* 839 */
+ { 17, 12, 3, 0, 0, }, /* 840 */
+ { 112, 7, 12, 0, 0, }, /* 841 */
+ { 112, 15, 12, 0, 0, }, /* 842 */
+ { 112, 12, 3, 0, 0, }, /* 843 */
+ { 131, 9, 12, 0, 34, }, /* 844 */
+ { 131, 5, 12, 0, -34, }, /* 845 */
+ { 131, 12, 3, 0, 0, }, /* 846 */
+ { 131, 13, 12, 0, 0, }, /* 847 */
+ { 131, 21, 12, 0, 0, }, /* 848 */
+ { 9, 2, 14, 0, 0, }, /* 849 */
+ { 9, 26, 11, 0, 0, }, /* 850 */
+ { 26, 26, 12, 0, 0, }, /* 851 */
+ { 9, 24, 3, 0, 0, }, /* 852 */
+ { 9, 1, 3, 0, 0, }, /* 853 */
};
-const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */
+const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* U+0000 */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* U+0800 */
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 41, 41, 42, 43, 44, 45, /* U+1000 */
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, /* U+1800 */
62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, /* U+2000 */
- 77, 77, 66, 78, 66, 66, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, /* U+2800 */
- 89, 90, 91, 92, 93, 94, 95, 71, 96, 96, 96, 96, 96, 96, 96, 96, /* U+3000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+3800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+4000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 97, 96, 96, 96, 96, /* U+4800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+5000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+5800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+6000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+6800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+7000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+7800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+8000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+8800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+9000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 98, /* U+9800 */
- 99,100,100,100,100,100,100,100,100,101,102,102,103,104,105,106, /* U+A000 */
-107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,115, /* U+A800 */
-116,117,118,119,120,121,115,116,117,118,119,120,121,115,116,117, /* U+B000 */
-118,119,120,121,115,116,117,118,119,120,121,115,116,117,118,119, /* U+B800 */
-120,121,115,116,117,118,119,120,121,115,116,117,118,119,120,121, /* U+C000 */
-115,116,117,118,119,120,121,115,116,117,118,119,120,121,115,116, /* U+C800 */
-117,118,119,120,121,115,116,117,118,119,120,121,115,116,117,122, /* U+D000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+D800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+E000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+E800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F000 */
-124,124, 96, 96,125,126,127,128,129,129,130,131,132,133,134,135, /* U+F800 */
-136,137,138,139,140,141,142,143,144,145,146,140,147,147,148,140, /* U+10000 */
-149,150,151,152,153,154,155,156,157,158,140,140,159,140,140,140, /* U+10800 */
-160,161,162,163,164,165,166,140,167,168,140,169,170,171,172,140, /* U+11000 */
-140,173,140,140,174,175,140,140,176,177,178,140,140,140,140,140, /* U+11800 */
-179,179,179,179,179,179,179,180,181,179,182,140,140,140,140,140, /* U+12000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+12800 */
-183,183,183,183,183,183,183,183,184,140,140,140,140,140,140,140, /* U+13000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+13800 */
-140,140,140,140,140,140,140,140,185,185,185,185,186,140,140,140, /* U+14000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+14800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+15000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+15800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+16000 */
-187,187,187,187,188,189,190,191,140,140,140,140,140,140,192,193, /* U+16800 */
-194,194,194,194,194,194,194,194,194,194,194,194,194,194,194,194, /* U+17000 */
-194,194,194,194,194,194,194,194,194,194,194,194,194,194,194,194, /* U+17800 */
-194,194,194,194,194,194,194,194,194,194,194,194,194,194,194,195, /* U+18000 */
-194,194,194,194,194,196,140,140,140,140,140,140,140,140,140,140, /* U+18800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+19000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+19800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1A800 */
-197,198,199,200,200,201,140,140,140,140,140,140,140,140,140,140, /* U+1B000 */
-140,140,140,140,140,140,140,140,202,203,140,140,140,140,140,140, /* U+1B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1C800 */
- 71,204,205,206,207,140,208,140,209,210,211,212,213,214,215,216, /* U+1D000 */
-217,217,217,217,218,219,140,140,140,140,140,140,140,140,140,140, /* U+1D800 */
-220,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1E000 */
-221,222,223,140,140,140,140,140,140,140,140,140,224,225,140,140, /* U+1E800 */
-226,227,228,229,230,140,231,232,233,234,235,236,237,238,239,240, /* U+1F000 */
-241,242,243,244,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1F800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+20000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+20800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+21000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+21800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+22000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+22800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+23000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+23800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+24000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+24800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+25000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+25800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+26000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+26800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+27000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+27800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+28000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+28800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+29000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+29800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,245, 96, 96, /* U+2A000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2A800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,246, 96, /* U+2B000 */
-247, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2B800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2C000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,248, 96, 96, /* U+2C800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2D000 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2D800 */
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2E000 */
- 96, 96, 96, 96, 96, 96, 96,249,140,140,140,140,140,140,140,140, /* U+2E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+2F000 */
- 96, 96, 96, 96,250,140,140,140,140,140,140,140,140,140,140,140, /* U+2F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+30000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+30800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+31000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+31800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+32000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+32800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+33000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+33800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+34000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+34800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+35000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+35800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+36000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+36800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+37000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+37800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+38000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+38800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+39000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+39800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3A800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3B000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3C800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3D000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3D800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3E000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3F000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+40000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+40800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+41000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+41800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+42000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+42800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+43000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+43800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+44000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+44800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+45000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+45800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+46000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+46800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+47000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+47800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+48000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+48800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+49000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+49800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4A800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4B000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4C800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4D000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4D800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4E000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4F000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+50000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+50800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+51000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+51800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+52000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+52800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+53000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+53800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+54000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+54800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+55000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+55800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+56000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+56800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+57000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+57800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+58000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+58800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+59000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+59800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5A800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5B000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5C800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5D000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5D800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5E000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5F000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+60000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+60800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+61000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+61800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+62000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+62800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+63000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+63800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+64000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+64800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+65000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+65800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+66000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+66800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+67000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+67800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+68000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+68800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+69000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+69800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6A800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6B000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6C800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6D000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6D800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6E000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6F000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+70000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+70800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+71000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+71800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+72000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+72800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+73000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+73800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+74000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+74800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+75000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+75800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+76000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+76800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+77000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+77800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+78000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+78800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+79000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+79800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7A800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7B000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7C800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7D000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7D800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7E000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7F000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+80000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+80800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+81000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+81800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+82000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+82800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+83000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+83800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+84000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+84800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+85000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+85800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+86000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+86800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+87000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+87800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+88000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+88800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+89000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+89800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8A800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8B000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8C800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8D000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8D800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8E000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8F000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+90000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+90800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+91000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+91800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+92000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+92800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+93000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+93800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+94000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+94800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+95000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+95800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+96000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+96800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+97000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+97800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+98000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+98800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+99000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+99800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9A000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9A800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9B000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9B800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9C000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9C800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9D000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9D800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9E000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9E800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9F000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9F800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A0000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A0800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A1000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A1800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A2000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A2800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A3000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A3800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A4000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A4800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A5000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A5800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A6000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A6800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A7000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A7800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A8000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A8800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A9000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A9800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AA000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AA800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AB000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AB800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AC000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AC800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AD000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AD800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AE000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AE800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AF000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AF800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B0000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B0800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B1000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B1800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B2000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B2800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B3000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B3800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B4000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B4800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B5000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B5800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B6000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B6800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B7000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B7800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B8000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B8800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B9000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B9800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BA000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BA800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BB000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BB800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BC000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BC800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BD000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BD800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BE000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BE800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BF000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BF800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C0000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C0800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C1000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C1800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C2000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C2800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C3000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C3800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C4000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C4800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C5000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C5800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C6000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C6800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C7000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C7800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C8000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C8800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C9000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C9800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CA000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CA800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CB000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CB800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CC000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CC800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CD000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CD800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CE000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CE800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CF000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CF800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D0000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D0800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D1000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D1800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D2000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D2800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D3000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D3800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D4000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D4800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D5000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D5800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D6000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D6800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D7000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D7800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D8000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D8800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D9000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D9800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DA000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DA800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DB000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DB800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DC000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DC800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DD000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DD800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DE000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DE800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DF000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DF800 */
-251,252,253,254,252,252,252,252,252,252,252,252,252,252,252,252, /* U+E0000 */
-252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252, /* U+E0800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E1000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E1800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E2000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E2800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E3000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E3800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E4000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E4800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E5000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E5800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E6000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E6800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E7000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E7800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E8000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E8800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E9000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E9800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EA000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EA800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EB000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EB800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EC000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EC800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+ED000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+ED800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EE000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EE800 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EF000 */
-140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EF800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F0000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F0800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F1000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F1800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F2000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F2800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F3000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F3800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F4000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F4800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F5000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F5800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F6000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F6800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F7000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F7800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F8000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F8800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F9000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F9800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FA000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FA800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FB000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FB800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FC000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FC800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FD000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FD800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FE000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FE800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FF000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,255, /* U+FF800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+100000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+100800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+101000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+101800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+102000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+102800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+103000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+103800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+104000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+104800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+105000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+105800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+106000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+106800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+107000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+107800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+108000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+108800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+109000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+109800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10A000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10A800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10B000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10B800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10C000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10C800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10D000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10D800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10E000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10E800 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10F000 */
-124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,255, /* U+10F800 */
+ 77, 77, 78, 79, 66, 66, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, /* U+2800 */
+ 90, 91, 92, 93, 94, 95, 96, 71, 97, 97, 97, 97, 97, 97, 97, 97, /* U+3000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+3800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+4000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 98, 97, 97, 97, 97, /* U+4800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+5000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+5800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+6000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+6800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+7000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+7800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+8000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+8800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+9000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 99, /* U+9800 */
+100,101,101,101,101,101,101,101,101,102,103,103,104,105,106,107, /* U+A000 */
+108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,116, /* U+A800 */
+117,118,119,120,121,122,116,117,118,119,120,121,122,116,117,118, /* U+B000 */
+119,120,121,122,116,117,118,119,120,121,122,116,117,118,119,120, /* U+B800 */
+121,122,116,117,118,119,120,121,122,116,117,118,119,120,121,122, /* U+C000 */
+116,117,118,119,120,121,122,116,117,118,119,120,121,122,116,117, /* U+C800 */
+118,119,120,121,122,116,117,118,119,120,121,122,116,117,118,123, /* U+D000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+D800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+E000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+E800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F000 */
+125,125, 97, 97,126,127,128,129,130,130,131,132,133,134,135,136, /* U+F800 */
+137,138,139,140,141,142,143,144,145,146,147,141,148,148,149,141, /* U+10000 */
+150,151,152,153,154,155,156,157,158,159,160,141,161,141,162,141, /* U+10800 */
+163,164,165,166,167,168,169,141,170,171,141,172,173,174,175,141, /* U+11000 */
+176,177,141,141,178,179,141,141,180,181,182,183,141,184,141,141, /* U+11800 */
+185,185,185,185,185,185,185,186,187,185,188,141,141,141,141,141, /* U+12000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+12800 */
+189,189,189,189,189,189,189,189,190,141,141,141,141,141,141,141, /* U+13000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+13800 */
+141,141,141,141,141,141,141,141,191,191,191,191,192,141,141,141, /* U+14000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+14800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+15000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+15800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+16000 */
+193,193,193,193,194,195,196,197,141,141,141,141,198,199,200,201, /* U+16800 */
+202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,202, /* U+17000 */
+202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,202, /* U+17800 */
+202,202,202,202,202,202,202,202,202,202,202,202,202,202,202,203, /* U+18000 */
+202,202,202,202,202,204,141,141,141,141,141,141,141,141,141,141, /* U+18800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+19000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+19800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1A800 */
+205,206,207,208,208,209,141,141,141,141,141,141,141,141,141,141, /* U+1B000 */
+141,141,141,141,141,141,141,141,210,211,141,141,141,141,141,141, /* U+1B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1C800 */
+ 71,212,213,214,215,216,217,141,218,219,220,221,222,223,224,225, /* U+1D000 */
+226,226,226,226,227,228,141,141,141,141,141,141,141,141,141,141, /* U+1D800 */
+229,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+1E000 */
+230,231,232,141,141,141,141,141,233,234,141,141,235,236,141,141, /* U+1E800 */
+237,238,239,240,241,242,243,244,243,243,245,243,246,247,248,249, /* U+1F000 */
+250,251,252,253,254,242,242,242,242,242,242,242,242,242,242,255, /* U+1F800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+20000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+20800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+21000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+21800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+22000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+22800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+23000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+23800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+24000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+24800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+25000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+25800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+26000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+26800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+27000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+27800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+28000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+28800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+29000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+29800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,256, 97, 97, /* U+2A000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2A800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,257, 97, /* U+2B000 */
+258, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2B800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2C000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,259, 97, 97, /* U+2C800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2D000 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2D800 */
+ 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, /* U+2E000 */
+ 97, 97, 97, 97, 97, 97, 97,260,141,141,141,141,141,141,141,141, /* U+2E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+2F000 */
+ 97, 97, 97, 97,261,141,141,141,141,141,141,141,141,141,141,141, /* U+2F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+30000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+30800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+31000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+31800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+32000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+32800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+33000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+33800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+34000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+34800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+35000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+35800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+36000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+36800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+37000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+37800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+38000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+38800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+39000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+39800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+3F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+40000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+40800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+41000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+41800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+42000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+42800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+43000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+43800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+44000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+44800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+45000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+45800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+46000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+46800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+47000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+47800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+48000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+48800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+49000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+49800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+4F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+50000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+50800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+51000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+51800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+52000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+52800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+53000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+53800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+54000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+54800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+55000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+55800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+56000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+56800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+57000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+57800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+58000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+58800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+59000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+59800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+5F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+60000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+60800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+61000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+61800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+62000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+62800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+63000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+63800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+64000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+64800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+65000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+65800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+66000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+66800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+67000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+67800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+68000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+68800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+69000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+69800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+6F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+70000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+70800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+71000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+71800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+72000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+72800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+73000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+73800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+74000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+74800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+75000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+75800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+76000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+76800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+77000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+77800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+78000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+78800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+79000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+79800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+7F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+80000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+80800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+81000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+81800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+82000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+82800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+83000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+83800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+84000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+84800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+85000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+85800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+86000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+86800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+87000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+87800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+88000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+88800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+89000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+89800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+8F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+90000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+90800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+91000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+91800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+92000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+92800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+93000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+93800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+94000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+94800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+95000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+95800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+96000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+96800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+97000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+97800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+98000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+98800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+99000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+99800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9A000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9A800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9B000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9B800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9C000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9C800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9D000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9D800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9E000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9E800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9F000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+9F800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+A9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+AF800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+B9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+BF800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+C9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+CF800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D0000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+D9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DD000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DD800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+DF800 */
+262,263,264,265,263,263,263,263,263,263,263,263,263,263,263,263, /* U+E0000 */
+263,263,263,263,263,263,263,263,263,263,263,263,263,263,263,263, /* U+E0800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E1000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E1800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E2000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E2800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E3000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E3800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E4000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E4800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E5000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E5800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E6000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E6800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E7000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E7800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E8000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E8800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E9000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+E9800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EA000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EA800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EB000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EB800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EC000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EC800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+ED000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+ED800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EE000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EE800 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EF000 */
+141,141,141,141,141,141,141,141,141,141,141,141,141,141,141,141, /* U+EF800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F0000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F0800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F1000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F1800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F2000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F2800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F3000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F3800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F4000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F4800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F5000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F5800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F6000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F6800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F7000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F7800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F8000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F8800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F9000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+F9800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FA000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FA800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FB000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FB800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FC000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FC800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FD000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FD800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FE000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FE800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+FF000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,266, /* U+FF800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+100000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+100800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+101000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+101800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+102000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+102800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+103000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+103800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+104000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+104800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+105000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+105800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+106000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+106800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+107000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+107800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+108000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+108800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+109000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+109800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10A000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10A800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10B000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10B800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10C000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10C800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10D000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10D800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10E000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10E800 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,125, /* U+10F000 */
+125,125,125,125,125,125,125,125,125,125,125,125,125,125,125,266, /* U+10F800 */
};
-const uint16_t PRIV(ucd_stage2)[] = { /* 65536 bytes, block = 128 */
+const uint16_t PRIV(ucd_stage2)[] = { /* 68352 bytes, block = 128 */
/* block 0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1489,647 +1522,647 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 65536 bytes, block = 128 */
/* block 1 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 3, 4, 5, 5, 5, 5, 19, 4, 14, 19, 20, 21, 8, 22, 19, 14,
- 19, 8, 23, 23, 14, 24, 4, 4, 14, 23, 20, 25, 23, 23, 23, 4,
- 11, 11, 11, 11, 11, 26, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 11, 11, 8, 11, 11, 11, 11, 11, 11, 11, 27,
- 16, 16, 16, 16, 16, 28, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 8, 16, 16, 16, 16, 16, 16, 16, 29,
+ 3, 4, 5, 5, 5, 5, 19, 4, 14, 20, 21, 22, 8, 23, 20, 14,
+ 19, 8, 24, 24, 14, 25, 4, 4, 14, 24, 21, 26, 24, 24, 24, 4,
+ 11, 11, 11, 11, 11, 27, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 8, 11, 11, 11, 11, 11, 11, 11, 28,
+ 16, 16, 16, 16, 16, 29, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 8, 16, 16, 16, 16, 16, 16, 16, 30,
/* block 2 */
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 32, 33, 30, 31, 30, 31, 30, 31, 33, 30, 31, 30, 31, 30, 31, 30,
- 31, 30, 31, 30, 31, 30, 31, 30, 31, 33, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 34, 30, 31, 30, 31, 30, 31, 35,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 33, 34, 31, 32, 31, 32, 31, 32, 34, 31, 32, 31, 32, 31, 32, 31,
+ 32, 31, 32, 31, 32, 31, 32, 31, 32, 34, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 35, 31, 32, 31, 32, 31, 32, 36,
/* block 3 */
- 36, 37, 30, 31, 30, 31, 38, 30, 31, 39, 39, 30, 31, 33, 40, 41,
- 42, 30, 31, 39, 43, 44, 45, 46, 30, 31, 47, 33, 45, 48, 49, 50,
- 30, 31, 30, 31, 30, 31, 51, 30, 31, 51, 33, 33, 30, 31, 51, 30,
- 31, 52, 52, 30, 31, 30, 31, 53, 30, 31, 33, 20, 30, 31, 33, 54,
- 20, 20, 20, 20, 55, 56, 57, 58, 59, 60, 61, 62, 63, 30, 31, 30,
- 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 64, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 33, 65, 66, 67, 30, 31, 68, 69, 30, 31, 30, 31, 30, 31, 30, 31,
+ 37, 38, 31, 32, 31, 32, 39, 31, 32, 40, 40, 31, 32, 34, 41, 42,
+ 43, 31, 32, 40, 44, 45, 46, 47, 31, 32, 48, 34, 46, 49, 50, 51,
+ 31, 32, 31, 32, 31, 32, 52, 31, 32, 52, 34, 34, 31, 32, 52, 31,
+ 32, 53, 53, 31, 32, 31, 32, 54, 31, 32, 34, 21, 31, 32, 34, 55,
+ 21, 21, 21, 21, 56, 57, 58, 59, 60, 61, 62, 63, 64, 31, 32, 31,
+ 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 65, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 34, 66, 67, 68, 31, 32, 69, 70, 31, 32, 31, 32, 31, 32, 31, 32,
/* block 4 */
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 70, 33, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 33, 33, 33, 33, 33, 33, 71, 30, 31, 72, 73, 74,
- 74, 30, 31, 75, 76, 77, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 78, 79, 80, 81, 82, 33, 83, 83, 33, 84, 33, 85, 86, 33, 33, 33,
- 83, 87, 33, 88, 33, 89, 90, 33, 91, 92, 90, 93, 94, 33, 33, 92,
- 33, 95, 96, 33, 33, 97, 33, 33, 33, 33, 33, 33, 33, 98, 33, 33,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 71, 34, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 34, 34, 34, 34, 34, 34, 72, 31, 32, 73, 74, 75,
+ 75, 31, 32, 76, 77, 78, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 79, 80, 81, 82, 83, 34, 84, 84, 34, 85, 34, 86, 87, 34, 34, 34,
+ 84, 88, 34, 89, 34, 90, 91, 34, 92, 93, 91, 94, 95, 34, 34, 93,
+ 34, 96, 97, 34, 34, 98, 34, 34, 34, 34, 34, 34, 34, 99, 34, 34,
/* block 5 */
- 99, 33, 33, 99, 33, 33, 33,100, 99,101,102,102,103, 33, 33, 33,
- 33, 33,104, 33, 20, 33, 33, 33, 33, 33, 33, 33, 33,105,106, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
-107,107,107,107,107,107,107,107,107,108,108,108,108,108,108,108,
-108,108, 14, 14, 14, 14,108,108,108,108,108,108,108,108,108,108,
-108,108, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-107,107,107,107,107, 14, 14, 14, 14, 14,109,109,108, 14,108, 14,
+100, 34, 34,100, 34, 34, 34,101,100,102,103,103,104, 34, 34, 34,
+ 34, 34,105, 34, 21, 34, 34, 34, 34, 34, 34, 34, 34,106,107, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+108,108,108,108,108,108,108,108,108,109,109,109,109,109,109,109,
+109,109, 14, 14, 14, 14,109,109,109,109,109,109,109,109,109,109,
+109,109, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+108,108,108,108,108, 14, 14, 14, 14, 14,110,110,109, 14,109, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
/* block 6 */
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,111,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-112,113,112,113,108,114,112,113,115,115,116,117,117,117, 4,118,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,112,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+113,114,113,114,109,115,113,114,116,116,117,118,118,118, 4,119,
/* block 7 */
-115,115,115,115,114, 14,119, 4,120,120,120,115,121,115,122,122,
-123,124,125,124,124,126,124,124,127,128,129,124,130,124,124,124,
-131,132,115,133,124,124,134,124,124,135,124,124,136,137,137,137,
-123,138,139,138,138,140,138,138,141,142,143,138,144,138,138,138,
-145,146,147,148,138,138,149,138,138,150,138,138,151,152,152,153,
-154,155,156,156,156,157,158,159,112,113,112,113,112,113,112,113,
-112,113,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-162,163,164,165,166,167,168,112,113,169,112,113,123,170,170,170,
+116,116,116,116,115, 14,120, 4,121,121,121,116,122,116,123,123,
+124,125,126,125,125,127,125,125,128,129,130,125,131,125,125,125,
+132,133,116,134,125,125,135,125,125,136,125,125,137,138,138,138,
+124,139,140,139,139,141,139,139,142,143,144,139,145,139,139,139,
+146,147,148,149,139,139,150,139,139,151,139,139,152,153,153,154,
+155,156,157,157,157,158,159,160,113,114,113,114,113,114,113,114,
+113,114,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+163,164,165,166,167,168,169,113,114,170,113,114,124,171,171,171,
/* block 8 */
-171,171,171,171,171,171,171,171,171,171,171,171,171,171,171,171,
-172,172,173,172,174,172,172,172,172,172,172,172,172,172,175,172,
-172,176,177,172,172,172,172,172,172,172,178,172,172,172,172,172,
-179,179,180,179,181,179,179,179,179,179,179,179,179,179,182,179,
-179,183,184,179,179,179,179,179,179,179,185,179,179,179,179,179,
-186,186,186,186,186,186,186,186,186,186,186,186,186,186,186,186,
-187,188,189,190,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,
+173,173,174,173,175,173,173,173,173,173,173,173,173,173,176,173,
+173,177,178,173,173,173,173,173,173,173,179,173,173,173,173,173,
+180,180,181,180,182,180,180,180,180,180,180,180,180,180,183,180,
+180,184,185,180,180,180,180,180,180,180,186,180,180,180,180,180,
+187,187,187,187,187,187,187,187,187,187,187,187,187,187,187,187,
+188,189,190,191,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
/* block 9 */
-187,188,191,192,192,110,110,192,193,193,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-194,187,188,187,188,187,188,187,188,187,188,187,188,187,188,195,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+188,189,192,193,193,111,111,193,194,194,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+195,188,189,188,189,188,189,188,189,188,189,188,189,188,189,196,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
/* block 10 */
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-115,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,
-196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,
-196,196,196,196,196,196,196,115,115,197,198,198,198,198,198,198,
-115,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,
-199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+116,197,197,197,197,197,197,197,197,197,197,197,197,197,197,197,
+197,197,197,197,197,197,197,197,197,197,197,197,197,197,197,197,
+197,197,197,197,197,197,197,116,116,198,199,199,199,199,199,199,
+200,201,201,201,201,201,201,201,201,201,201,201,201,201,201,201,
+201,201,201,201,201,201,201,201,201,201,201,201,201,201,201,201,
/* block 11 */
-199,199,199,199,199,199,199,200,115, 4,201,115,115,202,202,203,
-115,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,
-204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,
-204,204,204,204,204,204,204,204,204,204,204,204,204,204,205,204,
-206,204,204,206,204,204,206,204,115,115,115,115,115,115,115,115,
-207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,
-207,207,207,207,207,207,207,207,207,207,207,115,115,115,115,115,
-207,207,207,206,206,115,115,115,115,115,115,115,115,115,115,115,
+201,201,201,201,201,201,201,200,200, 4,202,116,116,203,203,204,
+116,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,
+205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,
+205,205,205,205,205,205,205,205,205,205,205,205,205,205,206,205,
+207,205,205,207,205,205,207,205,116,116,116,116,116,116,116,116,
+208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,
+208,208,208,208,208,208,208,208,208,208,208,116,116,116,116,208,
+208,208,208,207,207,116,116,116,116,116,116,116,116,116,116,116,
/* block 12 */
-208,208,208,208,208,209,210,210,210,211,211,212, 4,211,213,213,
-214,214,214,214,214,214,214,214,214,214,214, 4,215,115,211, 4,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-108,216,216,216,216,216,216,216,216,216,216,110,110,110,110,110,
-110,110,110,110,110,110,214,214,214,214,214,214,214,214,214,214,
-217,217,217,217,217,217,217,217,217,217,211,211,211,211,216,216,
-110,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+209,209,209,209,209,210,211,211,211,212,212,213, 4,212,214,214,
+215,215,215,215,215,215,215,215,215,215,215, 4,216,116,212, 4,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+109,217,217,217,217,217,217,217,217,217,217,111,111,111,111,111,
+111,111,111,111,111,111,215,215,215,215,215,215,215,215,215,215,
+218,218,218,218,218,218,218,218,218,218,212,212,212,212,217,217,
+111,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 13 */
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,211,216,214,214,214,214,214,214,214,209,213,214,
-214,214,214,214,214,218,218,214,214,213,214,214,214,214,216,216,
-217,217,217,217,217,217,217,217,217,217,216,216,216,213,213,216,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,212,217,215,215,215,215,215,215,215,210,214,215,
+215,215,215,215,215,219,219,215,215,214,215,215,215,215,217,217,
+218,218,218,218,218,218,218,218,218,218,217,217,217,214,214,217,
/* block 14 */
-219,219,219,219,219,219,219,219,219,219,219,219,219,219,115,220,
-221,222,221,221,221,221,221,221,221,221,221,221,221,221,221,221,
-221,221,221,221,221,221,221,221,221,221,221,221,221,221,221,221,
+220,220,220,220,220,220,220,220,220,220,220,220,220,220,116,221,
+222,223,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
-222,222,222,222,222,222,222,222,222,222,222,115,115,221,221,221,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,
+223,223,223,223,223,223,223,223,223,223,223,116,116,222,222,222,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 15 */
-223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,
-223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,
-223,223,223,223,223,223,224,224,224,224,224,224,224,224,224,224,
-224,223,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-225,225,225,225,225,225,225,225,225,225,226,226,226,226,226,226,
-226,226,226,226,226,226,226,226,226,226,226,226,226,226,226,226,
-226,226,226,226,226,226,226,226,226,226,226,227,227,227,227,227,
-227,227,227,227,228,228,229,230,230,230,228,115,115,115,115,115,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,225,225,225,225,225,225,225,225,225,225,
+225,224,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+226,226,226,226,226,226,226,226,226,226,227,227,227,227,227,227,
+227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,227,
+227,227,227,227,227,227,227,227,227,227,227,228,228,228,228,228,
+228,228,228,228,229,229,230,231,231,231,229,116,116,228,232,232,
/* block 16 */
-231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,
-231,231,231,231,231,231,232,232,232,232,233,232,232,232,232,232,
-232,232,232,232,233,232,232,232,233,232,232,232,232,232,115,115,
-234,234,234,234,234,234,234,234,234,234,234,234,234,234,234,115,
-235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,
-235,235,235,235,235,235,235,235,235,236,236,236,115,115,237,115,
-221,221,221,221,221,221,221,221,221,221,221,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,
+233,233,233,233,233,233,234,234,234,234,235,234,234,234,234,234,
+234,234,234,234,235,234,234,234,235,234,234,234,234,234,116,116,
+236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,116,
+237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,237,
+237,237,237,237,237,237,237,237,237,238,238,238,116,116,239,116,
+222,222,222,222,222,222,222,222,222,222,222,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 17 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,115,216,216,216,216,216,216,216,216,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,214,214,214,214,214,214,214,214,214,214,214,214,
-214,214,209,214,214,214,214,214,214,214,214,214,214,214,214,214,
-214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,116,217,217,217,217,217,217,217,217,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,215,215,215,215,215,215,215,215,215,215,215,215,215,
+215,215,210,215,215,215,215,215,215,215,215,215,215,215,215,215,
+215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,
/* block 18 */
-238,238,238,239,240,240,240,240,240,240,240,240,240,240,240,240,
-240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
-240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
-240,240,240,240,240,240,240,240,240,240,238,239,238,240,239,239,
-239,238,238,238,238,238,238,238,238,239,239,239,239,238,239,239,
-240,110,110,238,238,238,238,238,240,240,240,240,240,240,240,240,
-240,240,238,238, 4, 4,241,241,241,241,241,241,241,241,241,241,
-242,243,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
+240,240,240,241,242,242,242,242,242,242,242,242,242,242,242,242,
+242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,
+242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,242,
+242,242,242,242,242,242,242,242,242,242,240,241,240,242,241,241,
+241,240,240,240,240,240,240,240,240,241,241,241,241,240,241,241,
+242,111,111,240,240,240,240,240,242,242,242,242,242,242,242,242,
+242,242,240,240, 4, 4,243,243,243,243,243,243,243,243,243,243,
+244,245,242,242,242,242,242,242,242,242,242,242,242,242,242,242,
/* block 19 */
-244,245,246,246,115,244,244,244,244,244,244,244,244,115,115,244,
-244,115,115,244,244,244,244,244,244,244,244,244,244,244,244,244,
-244,244,244,244,244,244,244,244,244,115,244,244,244,244,244,244,
-244,115,244,115,115,115,244,244,244,244,115,115,245,244,247,246,
-246,245,245,245,245,115,115,246,246,115,115,246,246,245,244,115,
-115,115,115,115,115,115,115,247,115,115,115,115,244,244,115,244,
-244,244,245,245,115,115,248,248,248,248,248,248,248,248,248,248,
-244,244,249,249,250,250,250,250,250,250,251,249,244,252,115,115,
+246,247,248,248,116,246,246,246,246,246,246,246,246,116,116,246,
+246,116,116,246,246,246,246,246,246,246,246,246,246,246,246,246,
+246,246,246,246,246,246,246,246,246,116,246,246,246,246,246,246,
+246,116,246,116,116,116,246,246,246,246,116,116,247,246,249,248,
+248,247,247,247,247,116,116,248,248,116,116,248,248,247,246,116,
+116,116,116,116,116,116,116,249,116,116,116,116,246,246,116,246,
+246,246,247,247,116,116,250,250,250,250,250,250,250,250,250,250,
+246,246,251,251,252,252,252,252,252,252,253,251,246,254,247,116,
/* block 20 */
-115,253,253,254,115,255,255,255,255,255,255,115,115,115,115,255,
-255,115,115,255,255,255,255,255,255,255,255,255,255,255,255,255,
-255,255,255,255,255,255,255,255,255,115,255,255,255,255,255,255,
-255,115,255,255,115,255,255,115,255,255,115,115,253,115,254,254,
-254,253,253,115,115,115,115,253,253,115,115,253,253,253,115,115,
-115,253,115,115,115,115,115,115,115,255,255,255,255,115,255,115,
-115,115,115,115,115,115,256,256,256,256,256,256,256,256,256,256,
-253,253,255,255,255,253,115,115,115,115,115,115,115,115,115,115,
+116,255,255,256,116,257,257,257,257,257,257,116,116,116,116,257,
+257,116,116,257,257,257,257,257,257,257,257,257,257,257,257,257,
+257,257,257,257,257,257,257,257,257,116,257,257,257,257,257,257,
+257,116,257,257,116,257,257,116,257,257,116,116,255,116,256,256,
+256,255,255,116,116,116,116,255,255,116,116,255,255,255,116,116,
+116,255,116,116,116,116,116,116,116,257,257,257,257,116,257,116,
+116,116,116,116,116,116,258,258,258,258,258,258,258,258,258,258,
+255,255,257,257,257,255,259,116,116,116,116,116,116,116,116,116,
/* block 21 */
-115,257,257,258,115,259,259,259,259,259,259,259,259,259,115,259,
-259,259,115,259,259,259,259,259,259,259,259,259,259,259,259,259,
-259,259,259,259,259,259,259,259,259,115,259,259,259,259,259,259,
-259,115,259,259,115,259,259,259,259,259,115,115,257,259,258,258,
-258,257,257,257,257,257,115,257,257,258,115,258,258,257,115,115,
-259,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-259,259,257,257,115,115,260,260,260,260,260,260,260,260,260,260,
-261,262,115,115,115,115,115,115,115,259,257,257,257,257,257,257,
+116,260,260,261,116,262,262,262,262,262,262,262,262,262,116,262,
+262,262,116,262,262,262,262,262,262,262,262,262,262,262,262,262,
+262,262,262,262,262,262,262,262,262,116,262,262,262,262,262,262,
+262,116,262,262,116,262,262,262,262,262,116,116,260,262,261,261,
+261,260,260,260,260,260,116,260,260,261,116,261,261,260,116,116,
+262,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+262,262,260,260,116,116,263,263,263,263,263,263,263,263,263,263,
+264,265,116,116,116,116,116,116,116,262,260,260,260,260,260,260,
/* block 22 */
-115,263,264,264,115,265,265,265,265,265,265,265,265,115,115,265,
-265,115,115,265,265,265,265,265,265,265,265,265,265,265,265,265,
-265,265,265,265,265,265,265,265,265,115,265,265,265,265,265,265,
-265,115,265,265,115,265,265,265,265,265,115,115,263,265,266,263,
-264,263,263,263,263,115,115,264,264,115,115,264,264,263,115,115,
-115,115,115,115,115,115,263,266,115,115,115,115,265,265,115,265,
-265,265,263,263,115,115,267,267,267,267,267,267,267,267,267,267,
-268,265,269,269,269,269,269,269,115,115,115,115,115,115,115,115,
+116,266,267,267,116,268,268,268,268,268,268,268,268,116,116,268,
+268,116,116,268,268,268,268,268,268,268,268,268,268,268,268,268,
+268,268,268,268,268,268,268,268,268,116,268,268,268,268,268,268,
+268,116,268,268,116,268,268,268,268,268,116,116,266,268,269,266,
+267,266,266,266,266,116,116,267,267,116,116,267,267,266,116,116,
+116,116,116,116,116,116,266,269,116,116,116,116,268,268,116,268,
+268,268,266,266,116,116,270,270,270,270,270,270,270,270,270,270,
+271,268,272,272,272,272,272,272,116,116,116,116,116,116,116,116,
/* block 23 */
-115,115,270,271,115,271,271,271,271,271,271,115,115,115,271,271,
-271,115,271,271,271,271,115,115,115,271,271,115,271,115,271,271,
-115,115,115,271,271,115,115,115,271,271,271,115,115,115,271,271,
-271,271,271,271,271,271,271,271,271,271,115,115,115,115,272,273,
-270,273,273,115,115,115,273,273,273,115,273,273,273,270,115,115,
-271,115,115,115,115,115,115,272,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,274,274,274,274,274,274,274,274,274,274,
-275,275,275,276,276,276,276,276,276,277,276,115,115,115,115,115,
+116,116,273,274,116,274,274,274,274,274,274,116,116,116,274,274,
+274,116,274,274,274,274,116,116,116,274,274,116,274,116,274,274,
+116,116,116,274,274,116,116,116,274,274,274,116,116,116,274,274,
+274,274,274,274,274,274,274,274,274,274,116,116,116,116,275,276,
+273,276,276,116,116,116,276,276,276,116,276,276,276,273,116,116,
+274,116,116,116,116,116,116,275,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,277,277,277,277,277,277,277,277,277,277,
+278,278,278,279,279,279,279,279,279,280,279,116,116,116,116,116,
/* block 24 */
-278,279,279,279,115,280,280,280,280,280,280,280,280,115,280,280,
-280,115,280,280,280,280,280,280,280,280,280,280,280,280,280,280,
-280,280,280,280,280,280,280,280,280,115,280,280,280,280,280,280,
-280,280,280,280,280,280,280,280,280,280,115,115,115,280,278,278,
-278,279,279,279,279,115,278,278,278,115,278,278,278,278,115,115,
-115,115,115,115,115,278,278,115,280,280,280,115,115,115,115,115,
-280,280,278,278,115,115,281,281,281,281,281,281,281,281,281,281,
-115,115,115,115,115,115,115,115,282,282,282,282,282,282,282,283,
+281,282,282,282,281,283,283,283,283,283,283,283,283,116,283,283,
+283,116,283,283,283,283,283,283,283,283,283,283,283,283,283,283,
+283,283,283,283,283,283,283,283,283,116,283,283,283,283,283,283,
+283,283,283,283,283,283,283,283,283,283,116,116,116,283,281,281,
+281,282,282,282,282,116,281,281,281,116,281,281,281,281,116,116,
+116,116,116,116,116,281,281,116,283,283,283,116,116,116,116,116,
+283,283,281,281,116,116,284,284,284,284,284,284,284,284,284,284,
+116,116,116,116,116,116,116,116,285,285,285,285,285,285,285,286,
/* block 25 */
-284,285,286,286,115,284,284,284,284,284,284,284,284,115,284,284,
-284,115,284,284,284,284,284,284,284,284,284,284,284,284,284,284,
-284,284,284,284,284,284,284,284,284,115,284,284,284,284,284,284,
-284,284,284,284,115,284,284,284,284,284,115,115,285,284,286,285,
-286,286,287,286,286,115,285,286,286,115,286,286,285,285,115,115,
-115,115,115,115,115,287,287,115,115,115,115,115,115,115,284,115,
-284,284,285,285,115,115,288,288,288,288,288,288,288,288,288,288,
-115,284,284,115,115,115,115,115,115,115,115,115,115,115,115,115,
+287,288,289,289,290,287,287,287,287,287,287,287,287,116,287,287,
+287,116,287,287,287,287,287,287,287,287,287,287,287,287,287,287,
+287,287,287,287,287,287,287,287,287,116,287,287,287,287,287,287,
+287,287,287,287,116,287,287,287,287,287,116,116,288,287,289,288,
+289,289,291,289,289,116,288,289,289,116,289,289,288,288,116,116,
+116,116,116,116,116,291,291,116,116,116,116,116,116,116,287,116,
+287,287,288,288,116,116,292,292,292,292,292,292,292,292,292,292,
+116,287,287,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 26 */
-289,289,290,290,115,291,291,291,291,291,291,291,291,115,291,291,
-291,115,291,291,291,291,291,291,291,291,291,291,291,291,291,291,
-291,291,291,291,291,291,291,291,291,291,291,291,291,291,291,291,
-291,291,291,291,291,291,291,291,291,291,291,289,289,291,292,290,
-290,289,289,289,289,115,290,290,290,115,290,290,290,289,293,294,
-115,115,115,115,291,291,291,292,295,295,295,295,295,295,295,291,
-291,291,289,289,115,115,296,296,296,296,296,296,296,296,296,296,
-295,295,295,295,295,295,295,295,295,294,291,291,291,291,291,291,
+293,293,294,294,116,295,295,295,295,295,295,295,295,116,295,295,
+295,116,295,295,295,295,295,295,295,295,295,295,295,295,295,295,
+295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,
+295,295,295,295,295,295,295,295,295,295,295,293,293,295,296,294,
+294,293,293,293,293,116,294,294,294,116,294,294,294,293,297,298,
+116,116,116,116,295,295,295,296,299,299,299,299,299,299,299,295,
+295,295,293,293,116,116,300,300,300,300,300,300,300,300,300,300,
+299,299,299,299,299,299,299,299,299,298,295,295,295,295,295,295,
/* block 27 */
-115,115,297,297,115,298,298,298,298,298,298,298,298,298,298,298,
-298,298,298,298,298,298,298,115,115,115,298,298,298,298,298,298,
-298,298,298,298,298,298,298,298,298,298,298,298,298,298,298,298,
-298,298,115,298,298,298,298,298,298,298,298,298,115,298,115,115,
-298,298,298,298,298,298,298,115,115,115,299,115,115,115,115,300,
-297,297,299,299,299,115,299,115,297,297,297,297,297,297,297,300,
-115,115,115,115,115,115,301,301,301,301,301,301,301,301,301,301,
-115,115,297,297,302,115,115,115,115,115,115,115,115,115,115,115,
+116,116,301,301,116,302,302,302,302,302,302,302,302,302,302,302,
+302,302,302,302,302,302,302,116,116,116,302,302,302,302,302,302,
+302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,302,
+302,302,116,302,302,302,302,302,302,302,302,302,116,302,116,116,
+302,302,302,302,302,302,302,116,116,116,303,116,116,116,116,304,
+301,301,303,303,303,116,303,116,301,301,301,301,301,301,301,304,
+116,116,116,116,116,116,305,305,305,305,305,305,305,305,305,305,
+116,116,301,301,306,116,116,116,116,116,116,116,116,116,116,116,
/* block 28 */
-115,303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,
-303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,
-303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,
-303,304,303,305,304,304,304,304,304,304,304,115,115,115,115, 5,
-303,303,303,303,303,303,306,304,304,304,304,304,304,304,304,307,
-308,308,308,308,308,308,308,308,308,308,307,307,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+116,307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,
+307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,
+307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,307,
+307,308,307,309,308,308,308,308,308,308,308,116,116,116,116, 5,
+307,307,307,307,307,307,310,308,308,308,308,308,308,308,308,311,
+312,312,312,312,312,312,312,312,312,312,311,311,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 29 */
-115,309,309,115,309,115,115,309,309,115,309,115,115,309,115,115,
-115,115,115,115,309,309,309,309,115,309,309,309,309,309,309,309,
-115,309,309,309,115,309,115,309,115,115,309,309,115,309,309,309,
-309,310,309,311,310,310,310,310,310,310,115,310,310,309,115,115,
-309,309,309,309,309,115,312,115,310,310,310,310,310,310,115,115,
-313,313,313,313,313,313,313,313,313,313,115,115,309,309,309,309,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+116,313,313,116,313,116,116,313,313,116,313,116,116,313,116,116,
+116,116,116,116,313,313,313,313,116,313,313,313,313,313,313,313,
+116,313,313,313,116,313,116,313,116,116,313,313,116,313,313,313,
+313,314,313,315,314,314,314,314,314,314,116,314,314,313,116,116,
+313,313,313,313,313,116,316,116,314,314,314,314,314,314,116,116,
+317,317,317,317,317,317,317,317,317,317,116,116,313,313,313,313,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 30 */
-314,315,315,315,316,316,316,316,316,316,316,316,316,316,316,316,
-316,316,316,315,316,315,315,315,317,317,315,315,315,315,315,315,
-318,318,318,318,318,318,318,318,318,318,319,319,319,319,319,319,
-319,319,319,319,315,317,315,317,315,317,320,321,320,321,322,322,
-314,314,314,314,314,314,314,314,115,314,314,314,314,314,314,314,
-314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,
-314,314,314,314,314,314,314,314,314,314,314,314,314,115,115,115,
-115,317,317,317,317,317,317,317,317,317,317,317,317,317,317,322,
+318,319,319,319,320,320,320,320,320,320,320,320,320,320,320,320,
+320,320,320,319,320,319,319,319,321,321,319,319,319,319,319,319,
+322,322,322,322,322,322,322,322,322,322,323,323,323,323,323,323,
+323,323,323,323,319,321,319,321,319,321,324,325,324,325,326,326,
+318,318,318,318,318,318,318,318,116,318,318,318,318,318,318,318,
+318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
+318,318,318,318,318,318,318,318,318,318,318,318,318,116,116,116,
+116,321,321,321,321,321,321,321,321,321,321,321,321,321,321,326,
/* block 31 */
-317,317,317,317,317,316,317,317,314,314,314,314,314,317,317,317,
-317,317,317,317,317,317,317,317,115,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,115,315,315,
-315,315,315,315,315,315,317,315,315,315,315,315,315,115,315,315,
-316,316,316,316,316, 19, 19, 19, 19,316,316,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+321,321,321,321,321,320,321,321,318,318,318,318,318,321,321,321,
+321,321,321,321,321,321,321,321,116,321,321,321,321,321,321,321,
+321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,321,
+321,321,321,321,321,321,321,321,321,321,321,321,321,116,319,319,
+319,319,319,319,319,319,321,319,319,319,319,319,319,116,319,319,
+320,320,320,320,320, 19, 19, 19, 19,320,320,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 32 */
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,324,324,325,325,325,
-325,326,325,325,325,325,325,325,324,325,325,326,326,325,325,323,
-327,327,327,327,327,327,327,327,327,327,328,328,328,328,328,328,
-323,323,323,323,323,323,326,326,325,325,323,323,323,323,325,325,
-325,323,324,324,324,323,323,324,324,324,324,324,324,324,323,323,
-323,325,325,325,325,323,323,323,323,323,323,323,323,323,323,323,
+327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+327,327,327,327,327,327,327,327,327,327,327,328,328,329,329,329,
+329,330,329,329,329,329,329,329,328,329,329,330,330,329,329,327,
+331,331,331,331,331,331,331,331,331,331,332,332,332,332,332,332,
+327,327,327,327,327,327,330,330,329,329,327,327,327,327,329,329,
+329,327,328,328,328,327,327,328,328,328,328,328,328,328,327,327,
+327,329,329,329,329,327,327,327,327,327,327,327,327,327,327,327,
/* block 33 */
-323,323,325,324,326,325,325,324,324,324,324,324,324,325,323,324,
-327,327,327,327,327,327,327,327,327,327,324,324,324,325,329,329,
-330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,
-330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,
-330,330,330,330,330,330,115,330,115,115,115,115,115,330,115,115,
-331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,
-331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,
-331,331,331,331,331,331,331,331,331,331,331, 4,332,331,331,331,
-
-/* block 34 */
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
+327,327,329,328,330,329,329,328,328,328,328,328,328,329,327,328,
+331,331,331,331,331,331,331,331,331,331,328,328,328,329,333,333,
334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
-
-/* block 35 */
334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
-334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
-334,334,334,334,334,334,334,334,335,335,335,335,335,335,335,335,
-335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
-335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
-335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+334,334,334,334,334,334,116,334,116,116,116,116,116,334,116,116,
335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335, 4,336,335,335,335,
+
+/* block 34 */
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+
+/* block 35 */
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
/* block 36 */
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,115,336,336,336,336,115,115,
-336,336,336,336,336,336,336,115,336,115,336,336,336,336,115,115,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,116,340,340,340,340,116,116,
+340,340,340,340,340,340,340,116,340,116,340,340,340,340,116,116,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
/* block 37 */
-336,336,336,336,336,336,336,336,336,115,336,336,336,336,115,115,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,115,336,336,336,336,115,115,336,336,336,336,336,336,336,115,
-336,115,336,336,336,336,115,115,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+340,340,340,340,340,340,340,340,340,116,340,340,340,340,116,116,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,116,340,340,340,340,116,116,340,340,340,340,340,340,340,116,
+340,116,340,340,340,340,116,116,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
/* block 38 */
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,115,336,336,336,336,115,115,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,115,115,337,337,337,
-338,338,338,338,338,338,338,338,338,339,339,339,339,339,339,339,
-339,339,339,339,339,339,339,339,339,339,339,339,339,115,115,115,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,116,340,340,340,340,116,116,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,340,340,340,340,116,116,341,341,341,
+342,342,342,342,342,342,342,342,342,343,343,343,343,343,343,343,
+343,343,343,343,343,343,343,343,343,343,343,343,343,116,116,116,
/* block 39 */
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-340,340,340,340,340,340,340,340,340,340,115,115,115,115,115,115,
-341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
-341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
-341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
-341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
-341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
-342,342,342,342,342,342,115,115,343,343,343,343,343,343,115,115,
-
-/* block 40 */
-344,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+344,344,344,344,344,344,344,344,344,344,116,116,116,116,116,116,
345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+346,346,346,346,346,346,116,116,347,347,347,347,347,347,116,116,
+
+/* block 40 */
+348,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
/* block 41 */
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
/* block 42 */
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,346,346,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,350,350,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
/* block 43 */
-347,348,348,348,348,348,348,348,348,348,348,348,348,348,348,348,
-348,348,348,348,348,348,348,348,348,348,348,349,350,115,115,115,
-351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
-351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
-351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
-351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
-351,351,351,351,351,351,351,351,351,351,351, 4, 4, 4,352,352,
-352,351,351,351,351,351,351,351,351,115,115,115,115,115,115,115,
+351,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,
+352,352,352,352,352,352,352,352,352,352,352,353,354,116,116,116,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,355,355,355,355,355,355,355,355,355, 4, 4, 4,356,356,
+356,355,355,355,355,355,355,355,355,116,116,116,116,116,116,116,
/* block 44 */
-353,353,353,353,353,353,353,353,353,353,353,353,353,115,353,353,
-353,353,354,354,354,115,115,115,115,115,115,115,115,115,115,115,
-355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
-355,355,356,356,356, 4, 4,115,115,115,115,115,115,115,115,115,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,358,358,115,115,115,115,115,115,115,115,115,115,115,115,
-359,359,359,359,359,359,359,359,359,359,359,359,359,115,359,359,
-359,115,360,360,115,115,115,115,115,115,115,115,115,115,115,115,
+357,357,357,357,357,357,357,357,357,357,357,357,357,116,357,357,
+357,357,358,358,358,116,116,116,116,116,116,116,116,116,116,116,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,360,360,360, 4, 4,116,116,116,116,116,116,116,116,116,
+361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
+361,361,362,362,116,116,116,116,116,116,116,116,116,116,116,116,
+363,363,363,363,363,363,363,363,363,363,363,363,363,116,363,363,
+363,116,364,364,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 45 */
-361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
-361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
-361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
-361,361,361,361,362,362,363,362,362,362,362,362,362,362,363,363,
-363,363,363,363,363,363,362,363,363,362,362,362,362,362,362,362,
-362,362,362,362,364,364,364,365,364,364,364,366,361,362,115,115,
-367,367,367,367,367,367,367,367,367,367,115,115,115,115,115,115,
-368,368,368,368,368,368,368,368,368,368,115,115,115,115,115,115,
+365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
+365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
+365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
+365,365,365,365,366,366,367,366,366,366,366,366,366,366,367,367,
+367,367,367,367,367,367,366,367,367,366,366,366,366,366,366,366,
+366,366,366,366,368,368,368,369,368,368,368,370,365,366,116,116,
+371,371,371,371,371,371,371,371,371,371,116,116,116,116,116,116,
+372,372,372,372,372,372,372,372,372,372,116,116,116,116,116,116,
/* block 46 */
-369,369, 4, 4,369, 4,370,369,369,369,369,371,371,371,372,115,
-373,373,373,373,373,373,373,373,373,373,115,115,115,115,115,115,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,375,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,115,115,115,115,115,115,115,115,
+373,373, 4, 4,373, 4,374,373,373,373,373,375,375,375,376,116,
+377,377,377,377,377,377,377,377,377,377,116,116,116,116,116,116,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,379,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,116,116,116,116,116,116,116,
/* block 47 */
-374,374,374,374,374,371,371,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,374,371,374,115,115,115,115,115,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,115,115,115,115,115,115,115,115,115,115,
+378,378,378,378,378,375,375,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,378,
+378,378,378,378,378,378,378,378,378,375,378,116,116,116,116,116,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,349,
+349,349,349,349,349,349,116,116,116,116,116,116,116,116,116,116,
/* block 48 */
-376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,
-376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,115,
-377,377,377,378,378,378,378,377,377,378,378,378,115,115,115,115,
-378,378,377,378,378,378,378,378,378,377,377,377,115,115,115,115,
-379,115,115,115,380,380,381,381,381,381,381,381,381,381,381,381,
-382,382,382,382,382,382,382,382,382,382,382,382,382,382,382,382,
-382,382,382,382,382,382,382,382,382,382,382,382,382,382,115,115,
-382,382,382,382,382,115,115,115,115,115,115,115,115,115,115,115,
+380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,
+380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,116,
+381,381,381,382,382,382,382,381,381,382,382,382,116,116,116,116,
+382,382,381,382,382,382,382,382,382,381,381,381,116,116,116,116,
+383,116,116,116,384,384,385,385,385,385,385,385,385,385,385,385,
+386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,
+386,386,386,386,386,386,386,386,386,386,386,386,386,386,116,116,
+386,386,386,386,386,116,116,116,116,116,116,116,116,116,116,116,
/* block 49 */
-383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,
-383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,
-383,383,383,383,383,383,383,383,383,383,383,383,115,115,115,115,
-383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,
-383,383,383,383,383,383,383,383,383,383,115,115,115,115,115,115,
-384,384,384,384,384,384,384,384,384,384,385,115,115,115,386,386,
387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
+387,387,387,387,387,387,387,387,387,387,387,387,116,116,116,116,
+387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
+387,387,387,387,387,387,387,387,387,387,116,116,116,116,116,116,
+388,388,388,388,388,388,388,388,388,388,389,116,116,116,390,390,
+391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,
+391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,391,
/* block 50 */
-388,388,388,388,388,388,388,388,388,388,388,388,388,388,388,388,
-388,388,388,388,388,388,388,389,389,390,390,389,115,115,391,391,
392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
-392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
-392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
-392,392,392,392,392,393,394,393,394,394,394,394,394,394,394,115,
-394,395,394,395,395,394,394,394,394,394,394,394,394,393,393,393,
-393,393,393,394,394,394,394,394,394,394,394,394,394,115,115,394,
+392,392,392,392,392,392,392,393,393,394,394,393,116,116,395,395,
+396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
+396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
+396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
+396,396,396,396,396,397,398,397,398,398,398,398,398,398,398,116,
+398,399,398,399,399,398,398,398,398,398,398,398,398,397,397,397,
+397,397,397,398,398,398,398,398,398,398,398,398,398,116,116,398,
/* block 51 */
-396,396,396,396,396,396,396,396,396,396,115,115,115,115,115,115,
-396,396,396,396,396,396,396,396,396,396,115,115,115,115,115,115,
-397,397,397,397,397,397,397,398,397,397,397,397,397,397,115,115,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,399,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+400,400,400,400,400,400,400,400,400,400,116,116,116,116,116,116,
+400,400,400,400,400,400,400,400,400,400,116,116,116,116,116,116,
+401,401,401,401,401,401,401,402,401,401,401,401,401,401,116,116,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,403,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 52 */
-400,400,400,400,401,402,402,402,402,402,402,402,402,402,402,402,
-402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,
-402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,
-402,402,402,402,400,401,400,400,400,400,400,401,400,401,401,401,
-401,401,400,401,401,402,402,402,402,402,402,402,115,115,115,115,
-403,403,403,403,403,403,403,403,403,403,404,404,404,404,404,404,
-404,405,405,405,405,405,405,405,405,405,405,400,400,400,400,400,
-400,400,400,400,405,405,405,405,405,405,405,405,405,115,115,115,
+404,404,404,404,405,406,406,406,406,406,406,406,406,406,406,406,
+406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,
+406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,406,
+406,406,406,406,404,405,404,404,404,404,404,405,404,405,405,405,
+405,405,404,405,405,406,406,406,406,406,406,406,116,116,116,116,
+407,407,407,407,407,407,407,407,407,407,408,408,408,408,408,408,
+408,409,409,409,409,409,409,409,409,409,409,404,404,404,404,404,
+404,404,404,404,409,409,409,409,409,409,409,409,409,116,116,116,
/* block 53 */
-406,406,407,408,408,408,408,408,408,408,408,408,408,408,408,408,
-408,408,408,408,408,408,408,408,408,408,408,408,408,408,408,408,
-408,407,406,406,406,406,407,407,406,406,407,406,406,406,408,408,
-409,409,409,409,409,409,409,409,409,409,408,408,408,408,408,408,
-410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,
-410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,
-410,410,410,410,410,410,411,412,411,411,412,412,412,411,412,411,
-411,411,412,412,115,115,115,115,115,115,115,115,413,413,413,413,
-
-/* block 54 */
+410,410,411,412,412,412,412,412,412,412,412,412,412,412,412,412,
+412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,412,
+412,411,410,410,410,410,411,411,410,410,411,410,410,410,412,412,
+413,413,413,413,413,413,413,413,413,413,412,412,412,412,412,412,
414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
-414,414,414,414,415,415,415,415,415,415,415,415,416,416,416,416,
-416,416,416,416,415,415,416,416,115,115,115,417,417,417,417,417,
-418,418,418,418,418,418,418,418,418,418,115,115,115,414,414,414,
-419,419,419,419,419,419,419,419,419,419,420,420,420,420,420,420,
-420,420,420,420,420,420,420,420,420,420,420,420,420,420,420,420,
-420,420,420,420,420,420,420,420,421,421,421,421,421,421,422,422,
+414,414,414,414,414,414,415,416,415,415,416,416,416,415,416,415,
+415,415,416,416,116,116,116,116,116,116,116,116,417,417,417,417,
+
+/* block 54 */
+418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,
+418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,
+418,418,418,418,419,419,419,419,419,419,419,419,420,420,420,420,
+420,420,420,420,419,419,420,420,116,116,116,421,421,421,421,421,
+422,422,422,422,422,422,422,422,422,422,116,116,116,418,418,418,
+423,423,423,423,423,423,423,423,423,423,424,424,424,424,424,424,
+424,424,424,424,424,424,424,424,424,424,424,424,424,424,424,424,
+424,424,424,424,424,424,424,424,425,425,425,425,425,425,426,426,
/* block 55 */
-423,424,425,426,427,428,429,430,431,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-432,432,432,432,432,432,432,432,115,115,115,115,115,115,115,115,
-110,110,110, 4,110,110,110,110,110,110,110,110,110,110,110,110,
-110,433,110,110,110,110,110,110,110,434,434,434,434,110,434,434,
-434,434,433,433,110,434,434,433,110,110,115,115,115,115,115,115,
+427,428,429,430,431,432,433,434,435,116,116,116,116,116,116,116,
+436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,
+436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,
+436,436,436,436,436,436,436,436,436,436,436,116,116,436,436,436,
+437,437,437,437,437,437,437,437,116,116,116,116,116,116,116,116,
+111,111,111, 4,111,111,111,111,111,111,111,111,111,111,111,111,
+111,438,111,111,111,111,111,111,111,439,439,439,439,111,439,439,
+439,439,438,438,111,439,439,438,111,111,116,116,116,116,116,116,
/* block 56 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33,123,123,123,123,123,435,107,107,107,107,
-107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,
-107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,
-107,107,107,107,107,107,107,107,107,107,107,107,107,116,116,116,
-116,116,107,107,107,107,116,116,116,116,116, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33,436,437, 33, 33, 33,438, 33, 33,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34,124,124,124,124,124,440,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,117,117,117,
+117,117,108,108,108,108,117,117,117,117,117, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34,441,442, 34, 34, 34,443, 34, 34,
/* block 57 */
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,107,107,107,107,107,
-107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,
-107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,116,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,115,110,110,110,110,110,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,
+108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,117,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,116,111,111,111,111,111,
/* block 58 */
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
-439,440, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+444,445, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
/* block 59 */
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 33, 33, 33, 33, 33,441, 33, 33,442, 33,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 34, 34, 34, 34, 34,446, 34, 34,447, 34,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
/* block 60 */
-443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
-443,443,443,443,443,443,115,115,444,444,444,444,444,444,115,115,
-443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
-443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
-443,443,443,443,443,443,115,115,444,444,444,444,444,444,115,115,
-123,443,123,443,123,443,123,443,115,444,115,444,115,444,115,444,
-443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
-445,445,446,446,446,446,447,447,448,448,449,449,450,450,115,115,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+448,448,448,448,448,448,116,116,449,449,449,449,449,449,116,116,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+448,448,448,448,448,448,116,116,449,449,449,449,449,449,116,116,
+124,448,124,448,124,448,124,448,116,449,116,449,116,449,116,449,
+448,448,448,448,448,448,448,448,449,449,449,449,449,449,449,449,
+450,450,451,451,451,451,452,452,453,453,454,454,455,455,116,116,
/* block 61 */
-443,443,443,443,443,443,443,443,451,451,451,451,451,451,451,451,
-443,443,443,443,443,443,443,443,451,451,451,451,451,451,451,451,
-443,443,443,443,443,443,443,443,451,451,451,451,451,451,451,451,
-443,443,123,452,123,115,123,123,444,444,453,453,454,114,455,114,
-114,114,123,452,123,115,123,123,456,456,456,456,454,114,114,114,
-443,443,123,123,115,115,123,123,444,444,457,457,115,114,114,114,
-443,443,123,123,123,164,123,123,444,444,458,458,169,114,114,114,
-115,115,123,452,123,115,123,123,459,459,460,460,454,114,114,115,
+448,448,448,448,448,448,448,448,456,456,456,456,456,456,456,456,
+448,448,448,448,448,448,448,448,456,456,456,456,456,456,456,456,
+448,448,448,448,448,448,448,448,456,456,456,456,456,456,456,456,
+448,448,124,457,124,116,124,124,449,449,458,458,459,115,460,115,
+115,115,124,457,124,116,124,124,461,461,461,461,459,115,115,115,
+448,448,124,124,116,116,124,124,449,449,462,462,116,115,115,115,
+448,448,124,124,124,165,124,124,449,449,463,463,170,115,115,115,
+116,116,124,457,124,116,124,124,464,464,465,465,459,115,115,116,
/* block 62 */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 22,461,462, 22, 22,
- 9, 9, 9, 9, 9, 9, 4, 4, 21, 25, 6, 21, 21, 25, 6, 21,
- 4, 4, 4, 4, 4, 4, 4, 4,463,464, 22, 22, 22, 22, 22, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 21, 25, 4, 4, 4, 4, 15,
- 15, 4, 4, 4, 8, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 23,466,467, 23, 23,
+ 9, 9, 9, 9, 9, 9, 4, 4, 22, 26, 6, 22, 22, 26, 6, 22,
+ 4, 4, 4, 4, 4, 4, 4, 4,468,469, 23, 23, 23, 23, 23, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 22, 26, 4,470, 4, 4, 15,
+ 15, 4, 4, 4, 8, 6, 7, 4, 4,470, 4, 4, 4, 4, 4, 4,
4, 4, 8, 4, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3,
- 22, 22, 22, 22, 22,465, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 23,107,115,115, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,107,
+ 23, 23, 23, 23, 23,471, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 24,108,116,116, 24, 24, 24, 24, 24, 24, 8, 8, 8, 6, 7,108,
/* block 63 */
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,115,
-107,107,107,107,107,107,107,107,107,107,107,107,107,115,115,115,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 8, 8, 8, 6, 7,116,
+108,108,108,108,108,108,108,108,108,108,108,108,108,116,116,116,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-110,110,110,110,110,110,110,110,110,110,110,110,110,399,399,399,
-399,110,399,399,399,110,110,110,110,110,110,110,110,110,110,110,
-110,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+111,111,111,111,111,111,111,111,111,111,111,111,111,403,403,403,
+403,111,403,403,403,111,111,111,111,111,111,111,111,111,111,111,
+111,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 64 */
- 19, 19,466, 19, 19, 19, 19,466, 19, 19,467,466,466,466,467,467,
-466,466,466,467, 19,466, 19, 19, 8,466,466,466,466,466, 19, 19,
- 19, 19, 19, 19,466, 19,468, 19,466, 19,469,470,466,466, 19,467,
-466,466,471,466,467,434,434,434,434,467, 19, 19,467,467,466,466,
- 8, 8, 8, 8, 8,466,467,467,467,467, 19, 8, 19, 19,472, 19,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-474,474,474,474,474,474,474,474,474,474,474,474,474,474,474,474,
+ 19, 19,472, 19, 19, 19, 19,472, 19, 19,473,472,472,472,473,473,
+472,472,472,473, 19,472, 19, 19, 8,472,472,472,472,472, 19, 19,
+ 19, 19, 20, 19,472, 19,474, 19,472, 19,475,476,472,472, 19,473,
+472,472,477,472,473,439,439,439,439,478, 19, 19,473,473,472,472,
+ 8, 8, 8, 8, 8,472,473,473,473,473, 19, 8, 19, 19,479, 19,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
/* block 65 */
-475,475,475, 30, 31,475,475,475,475, 23, 19, 19,115,115,115,115,
- 8, 8, 8, 8, 8, 19, 19, 19, 19, 19, 8, 8, 19, 19, 19, 19,
- 8, 19, 19, 8, 19, 19, 8, 19, 19, 19, 19, 19, 19, 19, 8, 19,
+482,482,482, 31, 32,482,482,482,482, 24, 19, 19,116,116,116,116,
+ 8, 8, 8, 8,483, 20, 20, 20, 20, 20, 8, 8, 19, 19, 19, 19,
+ 8, 19, 19, 8, 19, 19, 8, 19, 19, 20, 20, 19, 19, 19, 8, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8,
19, 19, 8, 19, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2148,8 +2181,8 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 65536 bytes, block = 128 */
/* block 67 */
19, 19, 19, 19, 19, 19, 19, 19, 6, 7, 6, 7, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 8, 8, 19, 19, 19, 19, 19, 19, 19, 6, 7, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 19, 19, 19, 19,
+ 8, 8, 19, 19, 19, 19, 19, 19, 20, 6, 7, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2157,34 +2190,34 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 65536 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 19, 19, 19,
/* block 68 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8,
- 8, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 8, 8, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 19, 19, 19, 19, 20, 20, 20, 19, 19, 19, 19, 19,
/* block 69 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 70 */
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19,476,476,476,476,476,476,476,476,476,476,
-476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,
-477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,
-477,477,477,477,477,477,477,477,477,477, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 19, 19, 19, 19, 19, 19,484,484,484,484,484,484,484,484,484,484,
+484,484,485,484,484,484,484,484,484,484,484,484,484,484,484,484,
+486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
+486,486,486,486,486,486,486,486,486,486, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 71 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2199,64 +2232,74 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 65536 bytes, block = 128 */
/* block 72 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 20, 8, 19, 19, 19, 19, 19, 19, 19, 19,
+ 20, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 8, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8, 8, 8, 8, 8,
+ 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8,483,483,483,483, 8,
/* block 73 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-479, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 20, 20, 20, 20, 20, 20, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,483,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 74 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19,479,479, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19, 19,
+ 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 75 */
- 19, 19, 19, 19, 19, 19, 19, 19,479, 19,478,478,478,478, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,479, 19, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7,
- 6, 7, 6, 7, 6, 7, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 20, 20, 20, 20, 20, 20, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 19, 20, 19, 20, 19, 19, 19, 19, 19, 19, 20, 19, 19,
+ 19, 20, 19, 19, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 20, 19, 19, 20, 19, 19, 19, 19, 20, 19, 20, 19,
+ 19, 19, 19, 20, 20, 20, 19, 20, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 20, 20, 20, 20, 20, 6, 7, 6, 7, 6, 7, 6, 7,
+ 6, 7, 6, 7, 6, 7, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 76 */
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 19, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
8, 8, 8, 8, 8, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
/* block 77 */
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
-480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
+487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,487,
/* block 78 */
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8,483,483, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
+/* block 79 */
8, 8, 8, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6,
7, 6, 7, 6, 7, 6, 7, 6, 7, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
@@ -2266,1775 +2309,1875 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 65536 bytes, block = 128 */
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 7, 8, 8,
-/* block 79 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+/* block 80 */
+ 19, 19, 19, 19, 19, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 19, 19, 8, 8, 8, 8, 8, 8, 19, 19, 19,
+ 20, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,116,116, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 80 */
+/* block 81 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19,116,116, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19,115, 19, 19, 19, 19, 19, 19,
- 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115, 19, 19, 19, 19,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
-/* block 81 */
-481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
-481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
-481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,115,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,115,
- 30, 31,483,484,485,486,487, 30, 31, 30, 31, 30, 31,488,489,490,
-491, 33, 30, 31, 33, 30, 31, 33, 33, 33, 33, 33,107,107,492,492,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,116, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,
/* block 82 */
-160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-160,161,160,161,493,494,494,494,494,494,494,160,161,160,161,495,
-495,495,160,161,115,115,115,115,115,496,496,496,496,497,496,496,
+488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,
+488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,
+488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,116,
+489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,
+489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,
+489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,116,
+ 31, 32,490,491,492,493,494, 31, 32, 31, 32, 31, 32,495,496,497,
+498, 34, 31, 32, 34, 31, 32, 34, 34, 34, 34, 34,108,108,499,499,
/* block 83 */
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,115,498,115,115,115,115,115,498,115,115,
-499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,
-499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,
-499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,
-499,499,499,499,499,499,499,499,115,115,115,115,115,115,115,500,
-501,115,115,115,115,115,115,115,115,115,115,115,115,115,115,502,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,
+161,162,161,162,500,501,501,501,501,501,501,161,162,161,162,502,
+502,502,161,162,116,116,116,116,116,503,503,503,503,504,503,503,
/* block 84 */
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,115,115,115,115,115,115,115,115,115,
-336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
-336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
-336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
-336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
-192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,
-192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,
+505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
+505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,505,
+505,505,505,505,505,505,116,505,116,116,116,116,116,505,116,116,
+506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,
+506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,
+506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,506,
+506,506,506,506,506,506,506,506,116,116,116,116,116,116,116,507,
+508,116,116,116,116,116,116,116,116,116,116,116,116,116,116,509,
/* block 85 */
- 4, 4, 21, 25, 21, 25, 4, 4, 4, 21, 25, 4, 21, 25, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 9, 4, 21, 25, 4, 4,
- 21, 25, 6, 7, 6, 7, 6, 7, 6, 7, 4, 4, 4, 4, 4,108,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 4, 4, 4, 4,
- 9, 4, 6, 4, 4, 4, 4, 4, 4, 4,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,340,
+340,340,340,340,340,340,340,116,116,116,116,116,116,116,116,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,
+193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,
/* block 86 */
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,115,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,115,115,115,115,115,115,115,115,115,115,115,115,
+ 4, 4, 22, 26, 22, 26, 4, 4, 4, 22, 26, 4, 22, 26, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 9, 4, 22, 26, 4, 4,
+ 22, 26, 6, 7, 6, 7, 6, 7, 6, 7, 4, 4, 4, 4, 4,109,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 4, 4, 4, 4,
+ 9, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 87 */
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,116,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 88 */
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
-503,503,503,503,503,503,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
/* block 89 */
- 3, 4, 4, 4, 19,504,434,505, 6, 7, 6, 7, 6, 7, 6, 7,
- 6, 7, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7, 9, 6, 7, 7,
- 19,505,505,505,505,505,505,505,505,505,110,110,110,110,506,506,
- 9,108,108,108,108,108, 19, 19,505,505,505,504,434, 4, 19, 19,
-115,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,510,
+510,510,510,510,510,510,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,
/* block 90 */
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,115,115,110,110, 14, 14,508,508,507,
- 9,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-509,509,509,509,509,509,509,509,509,509,509, 4,108,510,510,509,
+ 3, 4, 4, 4, 19,511,439,512, 6, 7, 6, 7, 6, 7, 6, 7,
+ 6, 7, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7, 9, 6, 7, 7,
+ 19,512,512,512,512,512,512,512,512,512,111,111,111,111,513,513,
+514,109,109,109,109,109, 19, 19,512,512,512,511,439,470, 19, 19,
+116,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
/* block 91 */
-115,115,115,115,115,511,511,511,511,511,511,511,511,511,511,511,
-511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
-511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,115,
-115,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,116,116,111,111, 14, 14,516,516,515,
+ 9,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517, 4,109,518,518,517,
/* block 92 */
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,115,
- 19, 19, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
-511,511,511,511,511,511,511,511,511,511,511,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,
-509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
+116,116,116,116,116,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+116,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
/* block 93 */
-513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
-513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,115,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,116,
+ 19, 19, 24, 24, 24, 24, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 23, 23, 23, 23, 23, 23, 23, 23,
- 19, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
-513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
-513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, 19,
+ 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,116,116,116,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
/* block 94 */
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 24, 24, 24, 24, 24, 24, 24, 24,
+ 19, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,521,
+521,521,521,521,521,521,521,521,521,521,521,521,521,521,521, 19,
/* block 95 */
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-514,514,514,514,514,514,514,514, 19, 19, 19, 19, 19, 19, 19, 19,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 20, 19, 20, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,116,
/* block 96 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 97 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,115,115,115,115,115,115,115,115,115,115,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+
+/* block 98 */
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,116,116,116,116,116,116,116,116,116,116,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 98 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
/* block 99 */
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,517,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 100 */
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,525,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
/* block 101 */
-516,516,516,516,516,516,516,516,516,516,516,516,516,115,115,115,
-518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,
-518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,
-518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,
-518,518,518,518,518,518,518,115,115,115,115,115,115,115,115,115,
-519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
-519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
-519,519,519,519,519,519,519,519,520,520,520,520,520,520,521,521,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
+524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
/* block 102 */
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+524,524,524,524,524,524,524,524,524,524,524,524,524,116,116,116,
+526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
+526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
+526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,526,
+526,526,526,526,526,526,526,116,116,116,116,116,116,116,116,116,
+527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,
+527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,
+527,527,527,527,527,527,527,527,528,528,528,528,528,528,529,529,
/* block 103 */
-522,522,522,522,522,522,522,522,522,522,522,522,523,524,524,524,
-522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
-525,525,525,525,525,525,525,525,525,525,522,522,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-187,188,187,188,187,188,187,188,187,188,526,527,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,528,192,
-193,193,193,529,192,192,192,192,192,192,192,192,192,192,529,436,
-
-/* block 104 */
-187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
-187,188,187,188,187,188,187,188,187,188,187,188,436,436,192,192,
530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,531,531,531,531,531,531,531,531,531,531,
-532,532,533,533,533,533,533,533,115,115,115,115,115,115,115,115,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+
+/* block 104 */
+530,530,530,530,530,530,530,530,530,530,530,530,531,532,532,532,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+533,533,533,533,533,533,533,533,533,533,530,530,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+188,189,188,189,188,189,188,189,188,189,534,535,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,536,193,
+194,194,194,537,193,193,193,193,193,193,193,193,193,193,537,441,
/* block 105 */
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14,108,108,108,108,108,108,108,108,108,
- 14, 14, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 33, 33, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
-107, 33, 33, 33, 33, 33, 33, 33, 33, 30, 31, 30, 31,534, 30, 31,
+188,189,188,189,188,189,188,189,188,189,188,189,188,189,188,189,
+188,189,188,189,188,189,188,189,188,189,188,189,441,441,193,193,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,539,539,539,539,539,539,539,539,539,539,
+540,540,541,541,541,541,541,541,116,116,116,116,116,116,116,116,
/* block 106 */
- 30, 31, 30, 31, 30, 31, 30, 31,108, 14, 14, 30, 31,535, 33, 20,
- 30, 31, 30, 31, 33, 33, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,536,537,538,539,536,115,
-540,541,542,543, 30, 31, 30, 31,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115, 20,107,107, 33, 20, 20, 20, 20, 20,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14,109,109,109,109,109,109,109,109,109,
+ 14, 14, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 34, 34, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+108, 34, 34, 34, 34, 34, 34, 34, 34, 31, 32, 31, 32,542, 31, 32,
/* block 107 */
-544,544,545,544,544,544,545,544,544,544,544,545,544,544,544,544,
-544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,
-544,544,544,546,546,545,545,546,547,547,547,547,115,115,115,115,
- 23, 23, 23, 23, 23, 23, 19, 19, 5, 19,115,115,115,115,115,115,
-548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
-548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
-548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
-548,548,548,548,549,549,549,549,115,115,115,115,115,115,115,115,
+ 31, 32, 31, 32, 31, 32, 31, 32,109, 14, 14, 31, 32,543, 34, 21,
+ 31, 32, 31, 32, 34, 34, 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,
+ 31, 32, 31, 32, 31, 32, 31, 32, 31, 32,544,545,546,547,544, 34,
+548,549,550,551, 31, 32, 31, 32, 31, 32,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116, 21,108,108, 34, 21, 21, 21, 21, 21,
/* block 108 */
-550,550,551,551,551,551,551,551,551,551,551,551,551,551,551,551,
-551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,
-551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,
-551,551,551,551,550,550,550,550,550,550,550,550,550,550,550,550,
-550,550,550,550,552,552,115,115,115,115,115,115,115,115,553,553,
-554,554,554,554,554,554,554,554,554,554,115,115,115,115,115,115,
-238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,
-238,238,240,240,240,240,240,240,242,242,242,240,242,240,115,115,
+552,552,553,552,552,552,553,552,552,552,552,553,552,552,552,552,
+552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
+552,552,552,554,554,553,553,554,555,555,555,555,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 19, 19, 5, 19,116,116,116,116,116,116,
+556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+556,556,556,556,557,557,557,557,116,116,116,116,116,116,116,116,
/* block 109 */
-555,555,555,555,555,555,555,555,555,555,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,557,557,557,557,557,557,557,557, 4,558,
+558,558,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
-559,559,559,559,559,559,559,560,560,560,560,560,560,560,560,560,
-560,560,561,561,115,115,115,115,115,115,115,115,115,115,115,562,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,115,115,115,
+559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
+559,559,559,559,558,558,558,558,558,558,558,558,558,558,558,558,
+558,558,558,558,560,560,116,116,116,116,116,116,116,116,561,561,
+562,562,562,562,562,562,562,562,562,562,116,116,116,116,116,116,
+240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
+240,240,242,242,242,242,242,242,244,244,244,242,244,242,242,240,
/* block 110 */
-563,563,563,564,565,565,565,565,565,565,565,565,565,565,565,565,
-565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,
-565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,
-565,565,565,563,564,564,563,563,563,563,564,564,563,564,564,564,
-564,566,566,566,566,566,566,566,566,566,566,566,566,566,115,108,
-567,567,567,567,567,567,567,567,567,567,115,115,115,115,566,566,
-323,323,323,323,323,325,568,323,323,323,323,323,323,323,323,323,
-327,327,327,327,327,327,327,327,327,327,323,323,323,323,323,115,
+563,563,563,563,563,563,563,563,563,563,564,564,564,564,564,564,
+564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
+564,564,564,564,564,564,565,565,565,565,565,565,565,565, 4,566,
+567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,
+567,567,567,567,567,567,567,568,568,568,568,568,568,568,568,568,
+568,568,569,569,116,116,116,116,116,116,116,116,116,116,116,570,
+337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,337,337,337,337,337,337,337,337,337,337,116,116,116,
/* block 111 */
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,569,569,569,569,569,569,569,569,570,570,570,570,570,570,571,
-571,570,570,571,571,570,570,115,115,115,115,115,115,115,115,115,
-569,569,569,570,569,569,569,569,569,569,569,569,570,571,115,115,
-572,572,572,572,572,572,572,572,572,572,115,115,573,573,573,573,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-568,323,323,323,323,323,323,329,329,329,323,324,325,324,323,323,
+571,571,571,572,573,573,573,573,573,573,573,573,573,573,573,573,
+573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
+573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
+573,573,573,571,572,572,571,571,571,571,572,572,571,572,572,572,
+572,574,574,574,574,574,574,574,574,574,574,574,574,574,116,109,
+575,575,575,575,575,575,575,575,575,575,116,116,116,116,574,574,
+327,327,327,327,327,329,576,327,327,327,327,327,327,327,327,327,
+331,331,331,331,331,331,331,331,331,331,327,327,327,327,327,116,
/* block 112 */
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
-574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
-575,574,575,575,575,574,574,575,575,574,574,574,574,574,575,575,
-574,575,574,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,574,574,576,577,577,
-578,578,578,578,578,578,578,578,578,578,578,579,580,580,579,579,
-581,581,578,582,582,579,580,115,115,115,115,115,115,115,115,115,
+577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,577,
+577,577,577,577,577,577,577,577,577,578,578,578,578,578,578,579,
+579,578,578,579,579,578,578,116,116,116,116,116,116,116,116,116,
+577,577,577,578,577,577,577,577,577,577,577,577,578,579,116,116,
+580,580,580,580,580,580,580,580,580,580,116,116,581,581,581,581,
+327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+576,327,327,327,327,327,327,333,333,333,327,328,329,328,327,327,
/* block 113 */
-115,336,336,336,336,336,336,115,115,336,336,336,336,336,336,115,
-115,336,336,336,336,336,336,115,115,115,115,115,115,115,115,115,
-336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33,583, 33, 33, 33, 33, 33, 33, 33, 14,107,107,107,107,
- 33, 33, 33, 33, 33,123,115,115,115,115,115,115,115,115,115,115,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
+582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
+582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
+582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
+583,582,583,583,583,582,582,583,583,582,582,582,582,582,583,583,
+582,583,582,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,582,582,584,585,585,
+586,586,586,586,586,586,586,586,586,586,586,587,588,588,587,587,
+589,589,586,590,590,587,588,116,116,116,116,116,116,116,116,116,
/* block 114 */
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,579,579,580,579,579,580,579,579,581,579,580,115,115,
-585,585,585,585,585,585,585,585,585,585,115,115,115,115,115,115,
+116,340,340,340,340,340,340,116,116,340,340,340,340,340,340,116,
+116,340,340,340,340,340,340,116,116,116,116,116,116,116,116,116,
+340,340,340,340,340,340,340,116,340,340,340,340,340,340,340,116,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 34, 34, 34,591, 34, 34, 34, 34, 34, 34, 34, 14,108,108,108,108,
+ 34, 34, 34, 34, 34,124,116,116,116,116,116,116,116,116,116,116,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
/* block 115 */
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,592,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,587,587,588,587,587,588,587,587,589,587,588,116,116,
+593,593,593,593,593,593,593,593,593,593,116,116,116,116,116,116,
/* block 116 */
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 117 */
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
/* block 118 */
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 119 */
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
/* block 120 */
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 121 */
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
/* block 122 */
-587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,115,115,115,115,115,115,115,115,115,115,115,115,
-334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
-334,334,334,334,334,334,334,115,115,115,115,335,335,335,335,335,
-335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
-335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
-335,335,335,335,335,335,335,335,335,335,335,335,115,115,115,115,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+594,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,594,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,594,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
/* block 123 */
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+595,595,595,595,595,595,595,595,594,595,595,595,595,595,595,595,
+595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,595,
+595,595,595,595,116,116,116,116,116,116,116,116,116,116,116,116,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,116,116,116,116,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,116,116,116,116,
/* block 124 */
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
/* block 125 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,115,115,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
/* block 126 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
/* block 127 */
- 33, 33, 33, 33, 33, 33, 33,115,115,115,115,115,115,115,115,115,
-115,115,115,200,200,200,200,200,115,115,115,115,115,207,204,207,
-207,207,207,207,207,207,207,207,207,590,207,207,207,207,207,207,
-207,207,207,207,207,207,207,115,207,207,207,207,207,115,207,115,
-207,207,115,207,207,115,207,207,207,207,207,207,207,207,207,207,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 128 */
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,591,591,591,591,591,591,591,591,591,591,591,591,591,591,
-591,591,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+ 34, 34, 34, 34, 34, 34, 34,116,116,116,116,116,116,116,116,116,
+116,116,116,200,200,200,200,200,116,116,116,116,116,208,205,208,
+208,208,208,208,208,208,208,208,208,598,208,208,208,208,208,208,
+208,208,208,208,208,208,208,116,208,208,208,208,208,116,208,116,
+208,208,116,208,208,116,208,208,208,208,208,208,208,208,208,208,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 129 */
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,599,599,599,599,599,599,599,599,599,599,599,599,599,599,
+599,599,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 130 */
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216, 7, 6,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 131 */
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-115,115,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-216,216,216,216,216,216,216,216,216,216,216,216,212,213,115,115,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217, 7, 6,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
/* block 132 */
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
- 4, 4, 4, 4, 4, 4, 4, 6, 7, 4,115,115,115,115,115,115,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,192,192,
- 4, 9, 9, 15, 15, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6,
- 7, 6, 7, 6, 7, 4, 4, 6, 7, 4, 4, 4, 4, 15, 15, 15,
- 4, 4, 4,115, 4, 4, 4, 4, 9, 6, 7, 6, 7, 6, 7, 4,
- 4, 4, 8, 9, 8, 8, 8,115, 4, 5, 4, 4,115,115,115,115,
-216,216,216,216,216,115,216,216,216,216,216,216,216,216,216,216,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+116,116,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+217,217,217,217,217,217,217,217,217,217,217,217,213,214,116,116,
/* block 133 */
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,115,115, 22,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+ 4, 4, 4, 4, 4, 4, 4, 6, 7, 4,116,116,116,116,116,116,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,193,193,
+ 4, 9, 9, 15, 15, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6,
+ 7, 6, 7, 6, 7, 4, 4, 6, 7, 4, 4, 4, 4, 15, 15, 15,
+ 4, 4, 4,116, 4, 4, 4, 4, 9, 6, 7, 6, 7, 6, 7, 4,
+ 4, 4, 8, 9, 8, 8, 8,116, 4, 5, 4, 4,116,116,116,116,
+217,217,217,217,217,116,217,217,217,217,217,217,217,217,217,217,
/* block 134 */
-115, 4, 4, 4, 5, 4, 4, 4, 6, 7, 4, 8, 4, 9, 4, 4,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,116,116, 23,
+
+/* block 135 */
+116, 4, 4, 4, 5, 4, 4, 4, 6, 7, 4, 8, 4, 9, 4, 4,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 4, 4, 8, 8, 8, 4,
4, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 4, 7, 14, 15,
14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 6, 8, 7, 8, 6,
- 7, 4, 6, 7, 4, 4,509,509,509,509,509,509,509,509,509,509,
-108,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-
-/* block 135 */
-509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-509,509,509,509,509,509,509,509,509,509,509,509,509,509,592,592,
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,115,
-115,115,512,512,512,512,512,512,115,115,512,512,512,512,512,512,
-115,115,512,512,512,512,512,512,115,115,512,512,512,115,115,115,
- 5, 5, 8, 14, 19, 5, 5,115, 19, 8, 8, 8, 8, 19, 19,115,
-465,465,465,465,465,465,465,465,465, 22, 22, 22, 19, 19,115,115,
+ 7, 4, 6, 7, 4, 4,517,517,517,517,517,517,517,517,517,517,
+109,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
/* block 136 */
-593,593,593,593,593,593,593,593,593,593,593,593,115,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,115,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,115,593,593,115,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,115,115,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,517,
+517,517,517,517,517,517,517,517,517,517,517,517,517,517,600,600,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,
+520,520,520,520,520,520,520,520,520,520,520,520,520,520,520,116,
+116,116,520,520,520,520,520,520,116,116,520,520,520,520,520,520,
+116,116,520,520,520,520,520,520,116,116,520,520,520,116,116,116,
+ 5, 5, 8, 14, 19, 5, 5,116, 19, 8, 8, 8, 8, 19, 19,116,
+471,471,471,471,471,471,471,471,471, 23, 23, 23, 19, 19,116,116,
/* block 137 */
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
-593,593,593,593,593,593,593,593,593,593,593,115,115,115,115,115,
+601,601,601,601,601,601,601,601,601,601,601,601,116,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,116,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,116,601,601,116,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,116,116,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 138 */
- 4, 4, 4,115,115,115,115, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,
-594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,
-594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,
-594,594,594,594,594,595,595,595,595,596,596,596,596,596,596,596,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,601,601,601,601,601,601,601,601,601,601,116,116,116,116,116,
/* block 139 */
-596,596,596,596,596,596,596,596,596,596,595,595,596,596,596,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
-596,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,110,115,115,
+ 4, 4, 4,116,116,116,116, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24,116,116,116, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,
+602,602,602,602,602,603,603,603,603,604,604,604,604,604,604,604,
/* block 140 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+604,604,604,604,604,604,604,604,604,604,603,603,604,604,604,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,
+604,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,111,116,116,
/* block 141 */
-597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
-597,597,597,597,597,597,597,597,597,597,597,597,597,115,115,115,
-598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,
-598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,
-598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,
-598,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-110, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,115,115,115,115,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 142 */
-599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,
-599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,
-600,600,600,600,115,115,115,115,115,115,115,115,115,599,599,599,
-601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
-601,602,601,601,601,601,601,601,601,601,602,115,115,115,115,115,
-603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,
-603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,
-603,603,603,603,603,603,604,604,604,604,604,115,115,115,115,115,
+605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
+605,605,605,605,605,605,605,605,605,605,605,605,605,116,116,116,
+606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
+606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
+606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
+606,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+111, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,116,116,116,116,
/* block 143 */
-605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
-605,605,605,605,605,605,605,605,605,605,605,605,605,605,115,606,
607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,
607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,
-607,607,607,607,115,115,115,115,607,607,607,607,607,607,607,607,
-608,609,609,609,609,609,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
-/* block 144 */
-610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,
-610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,
-610,610,610,610,610,610,610,610,611,611,611,611,611,611,611,611,
+608,608,608,608,116,116,116,116,116,116,116,116,116,607,607,607,
+609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,609,
+609,610,609,609,609,609,609,609,609,609,610,116,116,116,116,116,
611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
-612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
-612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
-612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
+611,611,611,611,611,611,612,612,612,612,612,116,116,116,116,116,
-/* block 145 */
+/* block 144 */
613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,
-613,613,613,613,613,613,613,613,613,613,613,613,613,613,115,115,
-614,614,614,614,614,614,614,614,614,614,115,115,115,115,115,115,
+613,613,613,613,613,613,613,613,613,613,613,613,613,613,116,614,
615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
-615,615,615,615,115,115,115,115,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
-616,616,616,616,616,616,616,616,616,616,616,616,115,115,115,115,
+615,615,615,615,116,116,116,116,615,615,615,615,615,615,615,615,
+616,617,617,617,617,617,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
-/* block 146 */
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
-617,617,617,617,617,617,617,617,115,115,115,115,115,115,115,115,
-618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+/* block 145 */
618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
-618,618,618,618,115,115,115,115,115,115,115,115,115,115,115,619,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
-/* block 147 */
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+618,618,618,618,618,618,618,618,619,619,619,619,619,619,619,619,
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
+619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+/* block 146 */
+621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
+621,621,621,621,621,621,621,621,621,621,621,621,621,621,116,116,
+622,622,622,622,622,622,622,622,622,622,116,116,116,116,116,116,
+623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,
+623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,623,
+623,623,623,623,116,116,116,116,624,624,624,624,624,624,624,624,
+624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,
+624,624,624,624,624,624,624,624,624,624,624,624,116,116,116,116,
+
+/* block 147 */
+625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,
+625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,
+625,625,625,625,625,625,625,625,116,116,116,116,116,116,116,116,
+626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
+626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
+626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
+626,626,626,626,116,116,116,116,116,116,116,116,116,116,116,627,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+
/* block 148 */
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,115,115,115,115,115,115,115,115,115,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,115,115,115,115,115,115,115,115,115,115,
-620,620,620,620,620,620,620,620,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
/* block 149 */
-621,621,621,621,621,621,115,115,621,115,621,621,621,621,621,621,
-621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
-621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
-621,621,621,621,621,621,115,621,621,115,115,115,621,115,115,621,
-622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,
-622,622,622,622,622,622,115,623,624,624,624,624,624,624,624,624,
-625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,
-625,625,625,625,625,625,625,626,626,627,627,627,627,627,627,627,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,116,116,116,116,116,116,116,116,116,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,116,116,116,116,116,116,116,116,116,116,
+628,628,628,628,628,628,628,628,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 150 */
-628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
-628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,115,
-115,115,115,115,115,115,115,629,629,629,629,629,629,629,629,629,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+629,629,629,629,629,629,116,116,629,116,629,629,629,629,629,629,
+629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
+629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
+629,629,629,629,629,629,116,629,629,116,116,116,629,116,116,629,
630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
-630,630,630,115,630,630,115,115,115,115,115,631,631,631,631,631,
+630,630,630,630,630,630,116,631,632,632,632,632,632,632,632,632,
+633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,
+633,633,633,633,633,633,633,634,634,635,635,635,635,635,635,635,
/* block 151 */
-632,632,632,632,632,632,632,632,632,632,632,632,632,632,632,632,
-632,632,632,632,632,632,633,633,633,633,633,633,115,115,115,634,
-635,635,635,635,635,635,635,635,635,635,635,635,635,635,635,635,
-635,635,635,635,635,635,635,635,635,635,115,115,115,115,115,636,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,
+636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,116,
+116,116,116,116,116,116,116,637,637,637,637,637,637,637,637,637,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,
+638,638,638,116,638,638,116,116,116,116,116,639,639,639,639,639,
/* block 152 */
-637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,
-637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,
-638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,
-638,638,638,638,638,638,638,638,115,115,115,115,639,639,638,638,
-639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
-115,115,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
-639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
-639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
+640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,
+640,640,640,640,640,640,641,641,641,641,641,641,116,116,116,642,
+643,643,643,643,643,643,643,643,643,643,643,643,643,643,643,643,
+643,643,643,643,643,643,643,643,643,643,116,116,116,116,116,644,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 153 */
-640,641,641,641,115,641,641,115,115,115,115,115,641,641,641,641,
-640,640,640,640,115,640,640,640,115,640,640,640,640,640,640,640,
-640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,
-640,640,640,640,115,115,115,115,641,641,641,115,115,115,115,641,
-642,642,642,642,642,642,642,642,115,115,115,115,115,115,115,115,
-643,643,643,643,643,643,643,643,643,115,115,115,115,115,115,115,
-644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,
-644,644,644,644,644,644,644,644,644,644,644,644,644,645,645,646,
+645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,
+645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,
+646,646,646,646,646,646,646,646,646,646,646,646,646,646,646,646,
+646,646,646,646,646,646,646,646,116,116,116,116,647,647,646,646,
+647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
+116,116,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
+647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
+647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
/* block 154 */
-647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
-647,647,647,647,647,647,647,647,647,647,647,647,647,648,648,648,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-649,649,649,649,649,649,649,649,650,649,649,649,649,649,649,649,
-649,649,649,649,649,649,649,649,649,649,649,649,649,649,649,649,
-649,649,649,649,649,651,651,115,115,115,115,652,652,652,652,652,
-653,653,653,653,653,653,653,115,115,115,115,115,115,115,115,115,
+648,649,649,649,116,649,649,116,116,116,116,116,649,649,649,649,
+648,648,648,648,116,648,648,648,116,648,648,648,648,648,648,648,
+648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,
+648,648,648,648,648,648,116,116,649,649,649,116,116,116,116,649,
+650,650,650,650,650,650,650,650,650,116,116,116,116,116,116,116,
+651,651,651,651,651,651,651,651,651,116,116,116,116,116,116,116,
+652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,
+652,652,652,652,652,652,652,652,652,652,652,652,652,653,653,654,
/* block 155 */
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
-654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
-654,654,654,654,654,654,115,115,115,655,655,655,655,655,655,655,
-656,656,656,656,656,656,656,656,656,656,656,656,656,656,656,656,
-656,656,656,656,656,656,115,115,657,657,657,657,657,657,657,657,
-658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
-658,658,658,115,115,115,115,115,659,659,659,659,659,659,659,659,
+655,655,655,655,655,655,655,655,655,655,655,655,655,655,655,655,
+655,655,655,655,655,655,655,655,655,655,655,655,655,656,656,656,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+657,657,657,657,657,657,657,657,658,657,657,657,657,657,657,657,
+657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
+657,657,657,657,657,659,659,116,116,116,116,660,660,660,660,660,
+661,661,661,661,661,661,661,116,116,116,116,116,116,116,116,116,
/* block 156 */
-660,660,660,660,660,660,660,660,660,660,660,660,660,660,660,660,
-660,660,115,115,115,115,115,115,115,661,661,661,661,115,115,115,
-115,115,115,115,115,115,115,115,115,662,662,662,662,662,662,662,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,662,662,662,116,116,116,663,663,663,663,663,663,663,
+664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
+664,664,664,664,664,664,116,116,665,665,665,665,665,665,665,665,
+666,666,666,666,666,666,666,666,666,666,666,666,666,666,666,666,
+666,666,666,116,116,116,116,116,667,667,667,667,667,667,667,667,
/* block 157 */
-663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
-663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
-663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
-663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
-663,663,663,663,663,663,663,663,663,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,
+668,668,116,116,116,116,116,116,116,669,669,669,669,116,116,116,
+116,116,116,116,116,116,116,116,116,670,670,670,670,670,670,670,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 158 */
-664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
-664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
-664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
-664,664,664,115,115,115,115,115,115,115,115,115,115,115,115,115,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
-665,665,665,115,115,115,115,115,115,115,666,666,666,666,666,666,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 159 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,115,
+672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+672,672,672,116,116,116,116,116,116,116,116,116,116,116,116,116,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,116,116,116,116,116,116,116,674,674,674,674,674,674,
/* block 160 */
-668,669,668,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,669,669,669,669,669,669,669,
-669,669,669,669,669,669,669,671,671,671,671,671,671,671,115,115,
-115,115,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,673,673,673,673,673,673,673,673,673,673,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,669,
+675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
+675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
+675,675,675,675,676,676,676,676,116,116,116,116,116,116,116,116,
+677,677,677,677,677,677,677,677,677,677,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 161 */
-674,674,675,676,676,676,676,676,676,676,676,676,676,676,676,676,
-676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,
-676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,
-675,675,675,674,674,674,674,675,675,674,674,677,677,678,677,677,
-677,677,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
-679,679,679,679,679,679,679,679,679,115,115,115,115,115,115,115,
-680,680,680,680,680,680,680,680,680,680,115,115,115,115,115,115,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
+678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,116,
/* block 162 */
-681,681,681,682,682,682,682,682,682,682,682,682,682,682,682,682,
-682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,
-682,682,682,682,682,682,682,681,681,681,681,681,683,681,681,681,
-681,681,681,681,681,115,684,684,684,684,684,684,684,684,684,684,
-685,685,685,685,115,115,115,115,115,115,115,115,115,115,115,115,
-686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
-686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
-686,686,686,687,688,688,686,115,115,115,115,115,115,115,115,115,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,680,680,680,
+680,680,680,680,680,680,680,679,116,116,116,116,116,116,116,116,
+681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,
+681,681,681,681,681,681,682,682,682,682,682,682,682,682,682,682,
+682,683,683,683,683,684,684,684,684,684,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 163 */
-689,689,690,691,691,691,691,691,691,691,691,691,691,691,691,691,
-691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,
-691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,
-691,691,691,690,690,690,689,689,689,689,689,689,689,689,689,690,
-690,691,692,692,691,693,693,693,693,693,689,689,689,693,115,115,
-694,694,694,694,694,694,694,694,694,694,691,693,691,693,693,693,
-115,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,
-695,695,695,695,695,115,115,115,115,115,115,115,115,115,115,115,
+685,686,685,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,686,686,686,686,686,686,686,686,
+686,686,686,686,686,686,686,688,688,688,688,688,688,688,116,116,
+116,116,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
+689,689,689,689,689,689,690,690,690,690,690,690,690,690,690,690,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,686,
/* block 164 */
+691,691,692,693,693,693,693,693,693,693,693,693,693,693,693,693,
+693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,
+693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,
+692,692,692,691,691,691,691,692,692,691,691,694,694,695,694,694,
+694,694,116,116,116,116,116,116,116,116,116,116,116,695,116,116,
696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,
-696,696,115,696,696,696,696,696,696,696,696,696,696,696,696,696,
-696,696,696,696,696,696,696,696,696,696,696,696,697,697,697,698,
-698,698,697,697,698,697,698,698,699,699,699,699,699,699,698,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+696,696,696,696,696,696,696,696,696,116,116,116,116,116,116,116,
+697,697,697,697,697,697,697,697,697,697,116,116,116,116,116,116,
/* block 165 */
-700,700,700,700,700,700,700,115,700,115,700,700,700,700,115,700,
-700,700,700,700,700,700,700,700,700,700,700,700,700,700,115,700,
-700,700,700,700,700,700,700,700,700,701,115,115,115,115,115,115,
-702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
-702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
-702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,703,
-704,704,704,703,703,703,703,703,703,703,703,115,115,115,115,115,
-705,705,705,705,705,705,705,705,705,705,115,115,115,115,115,115,
+698,698,698,699,699,699,699,699,699,699,699,699,699,699,699,699,
+699,699,699,699,699,699,699,699,699,699,699,699,699,699,699,699,
+699,699,699,699,699,699,699,698,698,698,698,698,700,698,698,698,
+698,698,698,698,698,116,701,701,701,701,701,701,701,701,701,701,
+702,702,702,702,699,700,700,116,116,116,116,116,116,116,116,116,
+703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
+703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
+703,703,703,704,705,705,703,116,116,116,116,116,116,116,116,116,
/* block 166 */
-706,706,707,707,115,708,708,708,708,708,708,708,708,115,115,708,
-708,115,115,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,115,708,708,708,708,708,708,
-708,115,708,708,115,708,708,708,708,708,115,115,706,708,709,707,
-706,707,707,707,707,115,115,707,707,115,115,707,707,707,115,115,
-708,115,115,115,115,115,115,709,115,115,115,115,115,708,708,708,
-708,708,707,707,115,115,706,706,706,706,706,706,706,115,115,115,
-706,706,706,706,706,115,115,115,115,115,115,115,115,115,115,115,
+706,706,707,708,708,708,708,708,708,708,708,708,708,708,708,708,
+708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
+708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
+708,708,708,707,707,707,706,706,706,706,706,706,706,706,706,707,
+707,708,709,709,708,710,710,710,710,706,706,706,706,710,116,116,
+711,711,711,711,711,711,711,711,711,711,708,710,708,710,710,710,
+116,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
+712,712,712,712,712,116,116,116,116,116,116,116,116,116,116,116,
/* block 167 */
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,711,711,711,712,712,712,712,712,712,712,712,
-711,711,712,712,712,711,712,710,710,710,710,713,713,713,713,713,
-714,714,714,714,714,714,714,714,714,714,115,713,115,713,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,
+713,713,116,713,713,713,713,713,713,713,713,713,713,713,713,713,
+713,713,713,713,713,713,713,713,713,713,713,713,714,714,714,715,
+715,715,714,714,715,714,715,715,716,716,716,716,716,716,715,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 168 */
-715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
-715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
-715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
-716,717,717,718,718,718,718,718,718,717,718,717,717,716,717,718,
-718,717,718,718,715,715,719,715,115,115,115,115,115,115,115,115,
-720,720,720,720,720,720,720,720,720,720,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+717,717,717,717,717,717,717,116,717,116,717,717,717,717,116,717,
+717,717,717,717,717,717,717,717,717,717,717,717,717,717,116,717,
+717,717,717,717,717,717,717,717,717,718,116,116,116,116,116,116,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,720,
+721,721,721,720,720,720,720,720,720,720,720,116,116,116,116,116,
+722,722,722,722,722,722,722,722,722,722,116,116,116,116,116,116,
/* block 169 */
-721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,
-721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,
-721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,722,
-723,723,724,724,724,724,115,115,723,723,723,723,724,724,723,724,
-724,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,
-725,725,725,725,725,725,725,725,721,721,721,721,724,724,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+723,723,724,724,116,725,725,725,725,725,725,725,725,116,116,725,
+725,116,116,725,725,725,725,725,725,725,725,725,725,725,725,725,
+725,725,725,725,725,725,725,725,725,116,725,725,725,725,725,725,
+725,116,725,725,116,725,725,725,725,725,116,111,723,725,726,724,
+723,724,724,724,724,116,116,724,724,116,116,724,724,724,116,116,
+725,116,116,116,116,116,116,726,116,116,116,116,116,725,725,725,
+725,725,724,724,116,116,723,723,723,723,723,723,723,116,116,116,
+723,723,723,723,723,116,116,116,116,116,116,116,116,116,116,116,
/* block 170 */
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-727,727,727,728,728,728,728,728,728,728,728,727,727,728,727,728,
-728,729,729,729,726,115,115,115,115,115,115,115,115,115,115,115,
-730,730,730,730,730,730,730,730,730,730,115,115,115,115,115,115,
-369,369,369,369,369,369,369,369,369,369,369,369,369,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
+727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
+727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
+727,727,727,727,727,728,728,728,729,729,729,729,729,729,729,729,
+728,728,729,729,729,728,729,727,727,727,727,730,730,730,730,730,
+731,731,731,731,731,731,731,731,731,731,116,730,116,730,729,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 171 */
-731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
-731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
-731,731,731,731,731,731,731,731,731,731,731,732,733,732,733,733,
-732,732,732,732,732,732,733,732,115,115,115,115,115,115,115,115,
-734,734,734,734,734,734,734,734,734,734,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,
+732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,
+732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,732,
+733,734,734,735,735,735,735,735,735,734,735,734,734,733,734,735,
+735,734,735,735,732,732,736,732,116,116,116,116,116,116,116,116,
+737,737,737,737,737,737,737,737,737,737,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 172 */
-735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,
-735,735,735,735,735,735,735,735,735,735,115,115,115,736,736,736,
-737,737,736,736,736,736,737,736,736,736,736,736,115,115,115,115,
-738,738,738,738,738,738,738,738,738,738,739,739,740,740,740,741,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,
+738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,
+738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,739,
+740,740,741,741,741,741,116,116,740,740,740,740,741,741,740,741,
+741,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
+742,742,742,742,742,742,742,742,738,738,738,738,741,741,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 173 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
-742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
-744,744,744,744,744,744,744,744,744,744,745,745,745,745,745,745,
-745,745,745,115,115,115,115,115,115,115,115,115,115,115,115,746,
+743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
+744,744,744,745,745,745,745,745,745,745,745,744,744,745,744,745,
+745,746,746,746,743,116,116,116,116,116,116,116,116,116,116,116,
+747,747,747,747,747,747,747,747,747,747,116,116,116,116,116,116,
+373,373,373,373,373,373,373,373,373,373,373,373,373,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 174 */
-747,748,748,748,748,748,748,749,749,748,748,747,747,747,747,747,
-747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,
-747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,
-747,747,747,748,748,748,748,748,748,749,750,748,748,748,748,751,
-751,751,751,751,751,751,751,748,115,115,115,115,115,115,115,115,
-752,753,753,753,753,753,753,754,754,753,753,753,752,752,752,752,
-752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,
-752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,
+748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,
+748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,
+748,748,748,748,748,748,748,748,748,748,748,749,750,749,750,750,
+749,749,749,749,749,749,750,749,116,116,116,116,116,116,116,116,
+751,751,751,751,751,751,751,751,751,751,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 175 */
-752,752,752,752,115,115,755,755,755,755,753,753,753,753,753,753,
-753,753,753,753,753,753,753,754,753,753,756,756,756,115,756,756,
-756,756,756,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
-757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
-757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
-757,757,757,757,757,757,757,757,757,115,115,115,115,115,115,115,
+752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,
+752,752,752,752,752,752,752,752,752,752,752,116,116,753,753,753,
+754,754,753,753,753,753,754,753,753,753,753,753,116,116,116,116,
+755,755,755,755,755,755,755,755,755,755,756,756,757,757,757,758,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 176 */
-758,758,758,758,758,758,758,758,758,115,758,758,758,758,758,758,
-758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,
-758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,759,
-760,760,760,760,760,760,760,115,760,760,760,760,760,760,759,760,
-758,761,761,761,761,761,115,115,115,115,115,115,115,115,115,115,
-762,762,762,762,762,762,762,762,762,762,763,763,763,763,763,763,
-763,763,763,763,763,763,763,763,763,763,763,763,763,115,115,115,
-764,764,765,765,765,765,765,765,765,765,765,765,765,765,765,765,
+759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,
+759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,759,
+759,759,759,759,759,759,759,759,759,759,759,759,760,760,760,761,
+761,761,761,761,761,761,761,761,760,761,761,762,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 177 */
-765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,
-115,115,766,766,766,766,766,766,766,766,766,766,766,766,766,766,
-766,766,766,766,766,766,766,766,115,767,766,766,766,766,766,766,
-766,767,766,766,767,766,766,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,
+763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,
+764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,
+764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,764,
+765,765,765,765,765,765,765,765,765,765,766,766,766,766,766,766,
+766,766,766,116,116,116,116,116,116,116,116,116,116,116,116,767,
/* block 178 */
-768,768,768,768,768,768,768,115,768,768,115,768,768,768,768,768,
+768,769,769,769,769,769,769,769,769,769,769,768,768,768,768,768,
768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,
768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,
-768,769,769,769,769,769,769,115,115,115,769,115,769,769,115,769,
-769,769,769,769,769,769,770,769,115,115,115,115,115,115,115,115,
-771,771,771,771,771,771,771,771,771,771,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+768,768,768,769,769,769,769,769,769,770,771,769,769,769,769,772,
+772,772,772,772,772,772,772,769,116,116,116,116,116,116,116,116,
+773,774,774,774,774,774,774,775,775,774,774,774,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
/* block 179 */
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+773,773,773,773,116,116,776,776,776,776,774,774,774,774,774,774,
+774,774,774,774,774,774,774,775,774,774,777,777,777,773,777,777,
+777,777,777,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,
+778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,
+778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,778,
+778,778,778,778,778,778,778,778,778,116,116,116,116,116,116,116,
/* block 180 */
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+779,779,779,779,779,779,779,779,779,116,779,779,779,779,779,779,
+779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,
+779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,780,
+781,781,781,781,781,781,781,116,781,781,781,781,781,781,780,781,
+779,782,782,782,782,782,116,116,116,116,116,116,116,116,116,116,
+783,783,783,783,783,783,783,783,783,783,784,784,784,784,784,784,
+784,784,784,784,784,784,784,784,784,784,784,784,784,116,116,116,
+785,785,786,786,786,786,786,786,786,786,786,786,786,786,786,786,
/* block 181 */
-773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
-773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
-773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
-773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
-773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
-773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
-773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,115,
-774,774,774,774,774,115,115,115,115,115,115,115,115,115,115,115,
+786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,
+116,116,787,787,787,787,787,787,787,787,787,787,787,787,787,787,
+787,787,787,787,787,787,787,787,116,788,787,787,787,787,787,787,
+787,788,787,787,788,787,787,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 182 */
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+789,789,789,789,789,789,789,116,789,789,116,789,789,789,789,789,
+789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,
+789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,789,
+789,790,790,790,790,790,790,116,116,116,790,116,790,790,116,790,
+790,790,790,790,790,790,791,790,116,116,116,116,116,116,116,116,
+792,792,792,792,792,792,792,792,792,792,116,116,116,116,116,116,
+793,793,793,793,793,793,116,793,793,116,793,793,793,793,793,793,
+793,793,793,793,793,793,793,793,793,793,793,793,793,793,793,793,
/* block 183 */
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+793,793,793,793,793,793,793,793,793,793,794,794,794,794,794,116,
+795,795,116,794,794,795,794,795,793,116,116,116,116,116,116,116,
+796,796,796,796,796,796,796,796,796,796,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 184 */
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
-775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,798,798,799,799,800,800,116,116,116,116,116,116,116,
/* block 185 */
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
/* block 186 */
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-776,776,776,776,776,776,776,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 187 */
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,116,
+803,803,803,803,803,116,116,116,116,116,116,116,116,116,116,116,
/* block 188 */
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
-530,530,530,530,530,530,530,530,530,115,115,115,115,115,115,115,
-777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,
-777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,115,
-778,778,778,778,778,778,778,778,778,778,115,115,115,115,779,779,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 189 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-780,780,780,780,780,780,780,780,780,780,780,780,780,780,780,780,
-780,780,780,780,780,780,780,780,780,780,780,780,780,780,115,115,
-781,781,781,781,781,782,115,115,115,115,115,115,115,115,115,115,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
/* block 190 */
-783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
-783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
-783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
-784,784,784,784,784,784,784,785,785,785,785,785,786,786,786,786,
-787,787,787,787,785,786,115,115,115,115,115,115,115,115,115,115,
-788,788,788,788,788,788,788,788,788,788,115,789,789,789,789,789,
-789,789,115,783,783,783,783,783,783,783,783,783,783,783,783,783,
-783,783,783,783,783,783,783,783,115,115,115,115,115,783,783,783,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 191 */
-783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
/* block 192 */
-790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
-790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
-790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
-790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
-790,790,790,790,790,115,115,115,115,115,115,115,115,115,115,115,
-790,791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,
-791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,
-791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,115,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 193 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,792,
-792,792,792,793,793,793,793,793,793,793,793,793,793,793,793,793,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-794,795,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
/* block 194 */
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,538,
+538,538,538,538,538,538,538,538,538,116,116,116,116,116,116,116,
+806,806,806,806,806,806,806,806,806,806,806,806,806,806,806,806,
+806,806,806,806,806,806,806,806,806,806,806,806,806,806,806,116,
+807,807,807,807,807,807,807,807,807,807,116,116,116,116,808,808,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 195 */
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+809,809,809,809,809,809,809,809,809,809,809,809,809,809,809,809,
+809,809,809,809,809,809,809,809,809,809,809,809,809,809,116,116,
+810,810,810,810,810,811,116,116,116,116,116,116,116,116,116,116,
/* block 196 */
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,115,115,115,115,115,115,115,115,115,115,115,115,115,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+813,813,813,813,813,813,813,814,814,814,814,814,815,815,815,815,
+816,816,816,816,814,815,116,116,116,116,116,116,116,116,116,116,
+817,817,817,817,817,817,817,817,817,817,116,818,818,818,818,818,
+818,818,116,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,812,812,812,812,116,116,116,116,116,812,812,812,
/* block 197 */
-509,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 198 */
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,
+819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
/* block 199 */
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
-507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+821,821,821,821,821,821,821,821,821,821,821,821,821,821,821,821,
+821,821,821,821,821,821,821,822,822,822,822,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 200 */
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,823,
+823,823,823,823,823,116,116,116,116,116,116,116,116,116,116,116,
+823,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
+824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
+824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,116,
/* block 201 */
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-797,797,797,797,797,797,797,797,797,797,797,797,115,115,115,115,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,825,
+825,825,825,826,826,826,826,826,826,826,826,826,826,826,826,826,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+827,828,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 202 */
-798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
-798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
-798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
-798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
-798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
-798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
-798,798,798,798,798,798,798,798,798,798,798,115,115,115,115,115,
-798,798,798,798,798,798,798,798,798,798,798,798,798,115,115,115,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
/* block 203 */
-798,798,798,798,798,798,798,798,798,115,115,115,115,115,115,115,
-798,798,798,798,798,798,798,798,798,798,115,115,799,800,800,801,
- 22, 22, 22, 22,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 204 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
+829,829,829,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 205 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19,802,433,110,110,110, 19, 19, 19,433,802,802,
-802,802,802, 22, 22, 22, 22, 22, 22, 22, 22,110,110,110,110,110,
+517,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
/* block 206 */
-110,110,110, 19, 19,110,110,110,110,110,110,110, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,110,110,110,110, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
/* block 207 */
-596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
-596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
-596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
-596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
-596,596,803,803,803,596,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
/* block 208 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
/* block 209 */
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,467,467,
-467,467,467,467,467,115,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,116,116,116,116,
/* block 210 */
-466,466,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,466,115,466,466,
-115,115,466,115,115,466,466,115,115,466,466,466,466,115,466,466,
-466,466,466,466,466,466,467,467,467,467,115,467,115,467,467,467,
-467,467,467,467,115,467,467,467,467,467,467,467,467,467,467,467,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,831,
+831,831,831,831,831,831,831,831,831,831,831,116,116,116,116,116,
+831,831,831,831,831,831,831,831,831,831,831,831,831,116,116,116,
/* block 211 */
-467,467,467,467,466,466,115,466,466,466,466,115,115,466,466,466,
-466,466,466,466,466,115,466,466,466,466,466,466,466,115,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,466,466,115,466,466,466,466,115,
-466,466,466,466,466,115,466,115,115,115,466,466,466,466,466,466,
-466,115,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+831,831,831,831,831,831,831,831,831,116,116,116,116,116,116,116,
+831,831,831,831,831,831,831,831,831,831,116,116,832,833,833,834,
+ 23, 23, 23, 23,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 212 */
-466,466,466,466,466,466,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,116,
/* block 213 */
-467,467,467,467,467,467,467,467,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19,116,116, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19,835,438,111,111,111, 19, 19, 19,438,835,835,
+835,835,835, 23, 23, 23, 23, 23, 23, 23, 23,111,111,111,111,111,
/* block 214 */
-466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,115,115,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466, 8,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467, 8,467,467,467,467,
-467,467,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466, 8,467,467,467,467,
+111,111,111, 19, 19,111,111,111,111,111,111,111, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,111,111,111,111, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 215 */
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467, 8,467,467,467,467,467,467,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466, 8,467,467,467,467,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467, 8,
-467,467,467,467,467,467,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466, 8,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
+604,604,836,836,836,604,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 216 */
-467,467,467,467,467,467,467,467,467, 8,467,467,467,467,467,467,
-466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-466,466,466,466,466,466,466,466,466, 8,467,467,467,467,467,467,
-467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-467,467,467, 8,467,467,467,467,467,467,466,467,115,115, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 217 */
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19,116,116,116,116,116,116,116,116,116,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24,116,116,116,116,116,116,116,
/* block 218 */
-805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
-805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
-805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
-805,805,805,805,805,805,805,804,804,804,804,805,805,805,805,805,
-805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
-805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
-805,805,805,805,805,805,805,805,805,805,805,805,805,804,804,804,
-804,804,804,804,804,805,804,804,804,804,804,804,804,804,804,804,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,473,473,
+473,473,473,473,473,116,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
/* block 219 */
-804,804,804,804,805,804,804,806,806,806,806,806,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,805,805,805,805,805,
-115,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+472,472,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,472,116,472,472,
+116,116,472,116,116,472,472,116,116,472,472,472,472,116,472,472,
+472,472,472,472,472,472,473,473,473,473,116,473,116,473,473,473,
+473,473,473,473,116,473,473,473,473,473,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
/* block 220 */
-807,807,807,807,807,807,807,115,807,807,807,807,807,807,807,807,
-807,807,807,807,807,807,807,807,807,115,115,807,807,807,807,807,
-807,807,115,807,807,115,807,807,807,807,807,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+473,473,473,473,472,472,116,472,472,472,472,116,116,472,472,472,
+472,472,472,472,472,116,472,472,472,472,472,472,472,116,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,472,472,116,472,472,472,472,116,
+472,472,472,472,472,116,472,116,116,116,472,472,472,472,472,472,
+472,116,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
/* block 221 */
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+472,472,472,472,472,472,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
/* block 222 */
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
-808,808,808,808,808,115,115,809,809,809,809,809,809,809,809,809,
-810,810,810,810,810,810,810,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+473,473,473,473,473,473,473,473,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
/* block 223 */
-811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,
-811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,
-811,811,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
-812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
-812,812,812,812,813,813,813,813,813,813,813,115,115,115,115,115,
-814,814,814,814,814,814,814,814,814,814,115,115,115,115,815,815,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+472,472,472,472,472,472,472,472,472,472,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,116,116,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472, 8,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473, 8,473,473,473,473,
+473,473,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472, 8,473,473,473,473,
/* block 224 */
-216,216,216,216,115,216,216,216,216,216,216,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
-115,216,216,115,216,115,115,216,115,216,216,216,216,216,216,216,
-216,216,216,115,216,216,216,216,115,216,115,216,115,115,115,115,
-115,115,216,115,115,115,115,216,115,216,115,216,115,216,216,216,
-115,216,216,115,216,115,115,216,115,216,115,216,115,216,115,216,
-115,216,216,115,216,115,115,216,216,216,216,115,216,216,216,216,
-216,216,216,115,216,216,216,216,115,216,216,216,216,115,216,115,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473, 8,473,473,473,473,473,473,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472, 8,473,473,473,473,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473, 8,
+473,473,473,473,473,473,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472, 8,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
/* block 225 */
-216,216,216,216,216,216,216,216,216,216,115,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,115,115,115,115,
-115,216,216,216,115,216,216,216,216,216,115,216,216,216,216,216,
-216,216,216,216,216,216,216,216,216,216,216,216,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-210,210,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+473,473,473,473,473,473,473,473,473, 8,473,473,473,473,473,473,
+472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,472,
+472,472,472,472,472,472,472,472,472, 8,473,473,473,473,473,473,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+473,473,473, 8,473,473,473,473,473,473,472,473,116,116, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
/* block 226 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
+837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,837,
/* block 227 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,
-115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,837,837,837,837,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+838,838,838,838,838,838,838,838,838,838,838,838,838,837,837,837,
+837,837,837,837,837,838,837,837,837,837,837,837,837,837,837,837,
/* block 228 */
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+837,837,837,837,838,837,837,839,839,839,839,839,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,838,838,838,838,838,
+116,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 229 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,816,816,816,816,816,816,816,816,816,816,
-816,816,816,816,816,816,816,816,816,816,816,816,816,816,816,816,
+840,840,840,840,840,840,840,116,840,840,840,840,840,840,840,840,
+840,840,840,840,840,840,840,840,840,116,116,840,840,840,840,840,
+840,840,116,840,840,116,840,840,840,840,840,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 230 */
-817, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,
- 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
/* block 231 */
- 19, 19, 19, 19, 19, 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,479, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,841,
+841,841,841,841,841,116,116,842,842,842,842,842,842,842,842,842,
+843,843,843,843,843,843,843,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 232 */
- 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,479, 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19,478,478,478, 19, 19,478, 19, 19,478,478,478, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,479, 19,479, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,818,818,818,818,818,
+844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,
+844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,
+844,844,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
+845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
+845,845,845,845,846,846,846,846,846,846,846,116,116,116,116,116,
+847,847,847,847,847,847,847,847,847,847,116,116,116,116,848,848,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 233 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19,478,478, 19, 19,478,478,478,478,478,478,478,478,478,478,
-478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19,819,819,819,819, 19, 19, 19, 19,478, 19,
-478,478,478,478,478,478,478,478,478, 19, 19, 19,478, 19, 19, 19,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
/* block 234 */
- 19,478,478,478, 19,478,478,478, 19, 19, 19,479, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,479,479, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 19, 24, 24, 24,
+ 5, 24, 24, 24, 24,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 235 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19,479, 19, 19, 19, 19,479, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,478,478, 19, 19, 19, 19,478, 19, 19, 19, 19, 19,
+217,217,217,217,116,217,217,217,217,217,217,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,217,
+116,217,217,116,217,116,116,217,116,217,217,217,217,217,217,217,
+217,217,217,116,217,217,217,217,116,217,116,217,116,116,116,116,
+116,116,217,116,116,116,116,217,116,217,116,217,116,217,217,217,
+116,217,217,116,217,116,116,217,116,217,116,217,116,217,116,217,
+116,217,217,116,217,116,116,217,217,217,217,116,217,217,217,217,
+217,217,217,116,217,217,217,217,116,217,217,217,217,116,217,116,
/* block 236 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-478, 19, 19, 19, 19,478,478, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+217,217,217,217,217,217,217,217,217,217,116,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,116,116,116,116,
+116,217,217,217,116,217,217,217,217,217,116,217,217,217,217,217,
+217,217,217,217,217,217,217,217,217,217,217,217,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+211,211,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
/* block 237 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19,478,478,478, 19, 19, 19,478,478,478,478,478,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 238 */
-479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19,478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,478,478,478, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19, 19,
- 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,
+849, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+849, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+849, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,
/* block 239 */
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,849,849,849,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,
+ 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20,
/* block 240 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 19,
+ 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,850,850,850,850,850,850,850,850,850,850,
+850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,
/* block 241 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+851, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20,
+ 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,849,
+ 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 242 */
- 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 243 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19,478,478,478,478,478, 19,478,478,
- 19, 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-478,478,478,478,478,478,478,478,478,478, 19, 19, 19,478,478,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 244 */
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19,478,478,478,478,478,478,478,478,478,478,478,478,478, 19, 19,
- 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,852,852,852,852,852,
/* block 245 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19,
+ 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 246 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,115,115,115,115,115,115,115,115,115,115,115,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 247 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,115,115,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,
/* block 248 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 249 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 20, 20, 20, 20,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 250 */
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
-515,515,515,515,515,515,515,515,515,515,515,515,515,515,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 251 */
-465, 22,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
-820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+ 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,849,849,849,849,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 252 */
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 20, 20, 20,849,
+ 20, 20, 20, 20, 20, 20, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20,849,849, 20, 20, 20, 20,849,849,849, 20,849, 20, 20, 20, 20,
/* block 253 */
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,849,849,849,849,
+ 20, 20, 20,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
/* block 254 */
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
/* block 255 */
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
-589,589,589,589,589,589,589,589,589,589,589,589,589,589,115,115,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
+849,849,849,849,849,849,849,849,849,849,849,849,849,849,116,116,
+
+/* block 256 */
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+
+/* block 257 */
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,116,116,116,116,116,116,116,116,116,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+
+/* block 258 */
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+
+/* block 259 */
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+
+/* block 260 */
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+
+/* block 261 */
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,523,
+523,523,523,523,523,523,523,523,523,523,523,523,523,523,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,
+
+/* block 262 */
+471, 23,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,853,
+
+/* block 263 */
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+
+/* block 264 */
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+
+/* block 265 */
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
+471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,471,
+
+/* block 266 */
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,116,116,
};
diff --git a/src/pcre2_ucp.h b/src/pcre2_ucp.h
index defba4c..0c330ed 100644
--- a/src/pcre2_ucp.h
+++ b/src/pcre2_ucp.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -100,27 +100,25 @@ enum {
ucp_Zs /* Space separator */
};
-/* These are grapheme break properties. */
+/* These are grapheme break properties. The Extended Pictographic property
+comes from the emoji-data.txt file. */
enum {
- ucp_gbCR, /* 0 */
- ucp_gbLF, /* 1 */
- ucp_gbControl, /* 2 */
- ucp_gbExtend, /* 3 */
- ucp_gbPrepend, /* 4 */
- ucp_gbSpacingMark, /* 5 */
- ucp_gbL, /* 6 Hangul syllable type L */
- ucp_gbV, /* 7 Hangul syllable type V */
- ucp_gbT, /* 8 Hangul syllable type T */
- ucp_gbLV, /* 9 Hangul syllable type LV */
- ucp_gbLVT, /* 10 Hangul syllable type LVT */
- ucp_gbRegionalIndicator, /* 11 */
- ucp_gbOther, /* 12 */
- ucp_gbE_Base, /* 13 */
- ucp_gbE_Modifier, /* 14 */
- ucp_gbE_Base_GAZ, /* 15 */
- ucp_gbZWJ, /* 16 */
- ucp_gbGlue_After_Zwj /* 17 */
+ ucp_gbCR, /* 0 */
+ ucp_gbLF, /* 1 */
+ ucp_gbControl, /* 2 */
+ ucp_gbExtend, /* 3 */
+ ucp_gbPrepend, /* 4 */
+ ucp_gbSpacingMark, /* 5 */
+ ucp_gbL, /* 6 Hangul syllable type L */
+ ucp_gbV, /* 7 Hangul syllable type V */
+ ucp_gbT, /* 8 Hangul syllable type T */
+ ucp_gbLV, /* 9 Hangul syllable type LV */
+ ucp_gbLVT, /* 10 Hangul syllable type LVT */
+ ucp_gbRegionalIndicator, /* 11 */
+ ucp_gbOther, /* 12 */
+ ucp_gbZWJ, /* 13 */
+ ucp_gbExtended_Pictographic /* 14 */
};
/* These are the script identifications. */
@@ -274,7 +272,15 @@ enum {
ucp_Masaram_Gondi,
ucp_Nushu,
ucp_Soyombo,
- ucp_Zanabazar_Square
+ ucp_Zanabazar_Square,
+ /* New for Unicode 11.0.0 */
+ ucp_Dogra,
+ ucp_Gunjala_Gondi,
+ ucp_Hanifi_Rohingya,
+ ucp_Makasar,
+ ucp_Medefaidrin,
+ ucp_Old_Sogdian,
+ ucp_Sogdian
};
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index 02339f5..d5f34c8 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS.
The header can be found in the special z/OS distribution, which is available
from www.zaconsultants.net or from www.cbttape.org.
- Copyright (c) 1997-2017 University of Cambridge
+ Copyright (c) 1997-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -64,8 +64,8 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
/* Some cmake's define it still */
-#if defined(__CYGWIN__) && !defined(WIN32)
-#define WIN32
+#if defined(__CYGWIN__) && defined(WIN32)
+#undef WIN32
#endif
#ifdef WIN32
@@ -96,6 +96,14 @@ POSSIBILITY OF SUCH DAMAGE.
#define PCRE2_CODE_UNIT_WIDTH 8
#include "pcre2.h"
+/* Older versions of MSVC lack snprintf(). This define allows for
+warning/error-free compilation and testing with MSVC compilers back to at least
+MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define snprintf _snprintf
+#endif
+
#define FALSE 0
#define TRUE 1
@@ -303,6 +311,7 @@ also for include/exclude patterns. */
typedef struct patstr {
struct patstr *next;
char *string;
+ PCRE2_SIZE length;
pcre2_code *compiled;
} patstr;
@@ -407,7 +416,7 @@ static option_item optionlist[] = {
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
- { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kilobytes)" },
+ { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
@@ -460,6 +469,43 @@ const char utf8_table4[] = {
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
+/*************************************************
+* Emulated memmove() for systems without it *
+*************************************************/
+
+/* This function can make use of bcopy() if it is available. Otherwise do it by
+steam, as there are some non-Unix environments that lack both memmove() and
+bcopy(). */
+
+static void *
+emulated_memmove(void *d, const void *s, size_t n)
+{
+#ifdef HAVE_BCOPY
+bcopy(s, d, n);
+return d;
+#else
+size_t i;
+unsigned char *dest = (unsigned char *)d;
+const unsigned char *src = (const unsigned char *)s;
+if (dest > src)
+ {
+ dest += n;
+ src += n;
+ for (i = 0; i < n; ++i) *(--dest) = *(--src);
+ return (void *)dest;
+ }
+else
+ {
+ for (i = 0; i < n; ++i) *dest++ = *src++;
+ return (void *)(dest - n);
+ }
+#endif /* not HAVE_BCOPY */
+}
+#undef memmove
+#define memmove(d,s,n) emulated_memmove(d,s,n)
+#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
+
/*************************************************
* Case-independent string compare *
@@ -557,13 +603,14 @@ exit(rc);
Arguments:
s pattern string to add
+ patlen length of pattern
after if not NULL points to item to insert after
Returns: new pattern block or NULL on error
*/
static patstr *
-add_pattern(char *s, patstr *after)
+add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
{
patstr *p = (patstr *)malloc(sizeof(patstr));
if (p == NULL)
@@ -571,7 +618,7 @@ if (p == NULL)
fprintf(stderr, "pcre2grep: malloc failed\n");
pcre2grep_exit(2);
}
-if (strlen(s) > MAXPATLEN)
+if (patlen > MAXPATLEN)
{
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
MAXPATLEN);
@@ -580,6 +627,7 @@ if (strlen(s) > MAXPATLEN)
}
p->next = NULL;
p->string = s;
+p->length = patlen;
p->compiled = NULL;
if (after != NULL)
@@ -1276,12 +1324,14 @@ return om;
* Read one line of input *
*************************************************/
-/* Normally, input is read using fread() (or gzread, or BZ2_read) into a large
-buffer, so many lines may be read at once. However, doing this for tty input
-means that no output appears until a lot of input has been typed. Instead, tty
-input is handled line by line. We cannot use fgets() for this, because it does
-not stop at a binary zero, and therefore there is no way of telling how many
-characters it has read, because there may be binary zeros embedded in the data.
+/* Normally, input that is to be scanned is read using fread() (or gzread, or
+BZ2_read) into a large buffer, so many lines may be read at once. However,
+doing this for tty input means that no output appears until a lot of input has
+been typed. Instead, tty input is handled line by line. We cannot use fgets()
+for this, because it does not stop at a binary zero, and therefore there is no
+way of telling how many characters it has read, because there may be binary
+zeros embedded in the data. This function is also used for reading patterns
+from files (the -f option).
Arguments:
buffer the buffer to read into
@@ -1291,7 +1341,7 @@ Arguments:
Returns: the number of characters read, zero at end of file
*/
-static unsigned int
+static PCRE2_SIZE
read_one_line(char *buffer, int length, FILE *f)
{
int c;
@@ -1651,11 +1701,11 @@ Returns: TRUE if there was a match
*/
static BOOL
-match_patterns(char *matchptr, size_t length, unsigned int options,
- size_t startoffset, int *mrc)
+match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
+ PCRE2_SIZE startoffset, int *mrc)
{
int i;
-size_t slen = length;
+PCRE2_SIZE slen = length;
patstr *p = patterns;
const char *msg = "this text:\n\n";
@@ -2314,10 +2364,10 @@ int filepos = 0;
unsigned long int linenumber = 1;
unsigned long int lastmatchnumber = 0;
unsigned long int count = 0;
-char *lastmatchrestart = NULL;
+char *lastmatchrestart = main_buffer;
char *ptr = main_buffer;
char *endptr;
-size_t bufflength;
+PCRE2_SIZE bufflength;
BOOL binary = FALSE;
BOOL endhyphenpending = FALSE;
BOOL input_line_buffered = line_buffered;
@@ -2339,7 +2389,7 @@ bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
input_line_buffered);
#ifdef SUPPORT_LIBBZ2
-if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
+if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */
#endif
endptr = main_buffer + bufflength;
@@ -2368,8 +2418,8 @@ while (ptr < endptr)
unsigned int options = 0;
BOOL match;
char *t = ptr;
- size_t length, linelength;
- size_t startoffset = 0;
+ PCRE2_SIZE length, linelength;
+ PCRE2_SIZE startoffset = 0;
/* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre2_match(). In multiline mode, it is the
@@ -2381,7 +2431,7 @@ while (ptr < endptr)
t = end_of_line(t, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
- length = multiline? (size_t)(endptr - ptr) : linelength;
+ length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
/* Check to see if the line we are looking at extends right to the very end
of the buffer without a line terminator. This means the line is too long to
@@ -2560,7 +2610,7 @@ while (ptr < endptr)
{
if (!invert)
{
- size_t oldstartoffset;
+ PCRE2_SIZE oldstartoffset;
if (printname != NULL) fprintf(stdout, "%s:", printname);
if (number) fprintf(stdout, "%lu:", linenumber);
@@ -2647,7 +2697,7 @@ while (ptr < endptr)
startoffset -= (int)(linelength + endlinelength);
t = end_of_line(ptr, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
- length = (size_t)(endptr - ptr);
+ length = (PCRE2_SIZE)(endptr - ptr);
}
goto ONLY_MATCHING_RESTART;
@@ -2812,7 +2862,7 @@ while (ptr < endptr)
endprevious -= (int)(linelength + endlinelength);
t = end_of_line(ptr, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
- length = (size_t)(endptr - ptr);
+ length = (PCRE2_SIZE)(endptr - ptr);
}
/* If startoffset is at the exact end of the line it means this
@@ -2895,7 +2945,7 @@ while (ptr < endptr)
/* If input is line buffered, and the buffer is not yet full, read another
line and add it into the buffer. */
- if (input_line_buffered && bufflength < (size_t)bufsize)
+ if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
{
int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
bufflength += add;
@@ -2907,7 +2957,7 @@ while (ptr < endptr)
1/3 and refill it. Before we do this, if some unprinted "after" lines are
about to be lost, print them. */
- if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
+ if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
{
if (after_context > 0 &&
lastmatchnumber > 0 &&
@@ -2919,7 +2969,7 @@ while (ptr < endptr)
/* Now do the shuffle */
- memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
+ (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
ptr -= bufthird;
bufflength = 2*bufthird + fill_buffer(handle, frtype,
@@ -3395,9 +3445,8 @@ PCRE2_SIZE patlen, erroffset;
PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
if (p->compiled != NULL) return TRUE;
-
ps = p->string;
-patlen = strlen(ps);
+patlen = p->length;
if ((options & PCRE2_LITERAL) != 0)
{
@@ -3407,8 +3456,8 @@ if ((options & PCRE2_LITERAL) != 0)
if (ellength != 0)
{
- if (add_pattern(pe, p) == NULL) return FALSE;
- patlen = (int)(pe - ps - ellength);
+ patlen = pe - ps - ellength;
+ if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
}
}
@@ -3470,6 +3519,7 @@ static BOOL
read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
{
int linenumber = 0;
+PCRE2_SIZE patlen;
FILE *f;
const char *filename;
char buffer[MAXPATLEN+20];
@@ -3490,20 +3540,18 @@ else
filename = name;
}
-while (fgets(buffer, sizeof(buffer), f) != NULL)
+while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
{
- char *s = buffer + (int)strlen(buffer);
- while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
- *s = 0;
+ while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
linenumber++;
- if (buffer[0] == 0) continue; /* Skip blank lines */
+ if (patlen == 0) continue; /* Skip blank lines */
/* Note: this call to add_pattern() puts a pointer to the local variable
"buffer" into the pattern chain. However, that pointer is used only when
compiling the pattern, which happens immediately below, so we flatten it
afterwards, as a precaution against any later code trying to use it. */
- *patlastptr = add_pattern(buffer, *patlastptr);
+ *patlastptr = add_pattern(buffer, patlen, *patlastptr);
if (*patlastptr == NULL)
{
if (f != stdin) fclose(f);
@@ -3513,8 +3561,9 @@ while (fgets(buffer, sizeof(buffer), f) != NULL)
/* This loop is needed because compiling a "pattern" when -F is set may add
on additional literal patterns if the original contains a newline. In the
- common case, it never will, because fgets() stops at a newline. However,
- the -N option can be used to give pcre2grep a different newline setting. */
+ common case, it never will, because read_one_line() stops at a newline.
+ However, the -N option can be used to give pcre2grep a different newline
+ setting. */
for(;;)
{
@@ -3659,14 +3708,23 @@ for (i = 1; i < argc; i++)
{
char buff1[24];
char buff2[24];
+ int ret;
int baselen = (int)(opbra - op->long_name);
int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
int arglen = (argequals == NULL || equals == NULL)?
(int)strlen(arg) : (int)(argequals - arg);
- sprintf(buff1, "%.*s", baselen, op->long_name);
- sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
+ if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
+ ret < 0 || ret > (int)sizeof(buff1)) ||
+ (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
+ fulllen - baselen - 2, opbra + 1),
+ ret < 0 || ret > (int)sizeof(buff2)))
+ {
+ fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
+ op->long_name);
+ pcre2grep_exit(2);
+ }
if (strncmp(arg, buff1, arglen) == 0 ||
strncmp(arg, buff2, arglen) == 0)
@@ -3833,7 +3891,8 @@ for (i = 1; i < argc; i++)
else if (op->type == OP_PATLIST)
{
patdatastr *pd = (patdatastr *)op->dataptr;
- *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
+ *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
+ *(pd->lastptr));
if (*(pd->lastptr) == NULL) goto EXIT2;
if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
}
@@ -4095,7 +4154,9 @@ the first argument is the one and only pattern, and it must exist. */
if (patterns == NULL && pattern_files == NULL)
{
if (i >= argc) return usage(2);
- patterns = patterns_last = add_pattern(argv[i++], NULL);
+ patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
+ NULL);
+ i++;
if (patterns == NULL) goto EXIT2;
}
diff --git a/src/pcre2posix.c b/src/pcre2posix.c
index 026943e..7b9f477 100644
--- a/src/pcre2posix.c
+++ b/src/pcre2posix.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -93,7 +93,7 @@ information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
-might be needed. In order so make this easy, all the exported functions have
+might be needed. In order to make this easy, all the exported functions have
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
@@ -344,8 +344,10 @@ if (rc >= 0)
if ((size_t)rc > nmatch) rc = (int)nmatch;
for (i = 0; i < (size_t)rc; i++)
{
- pmatch[i].rm_so = ovector[i*2] + so;
- pmatch[i].rm_eo = ovector[i*2+1] + so;
+ pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
+ (int)(ovector[i*2] + so);
+ pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
+ (int)(ovector[i*2+1] + so);
}
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 15bf404..8cfb8e9 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam.
Written by Philip Hazel
Original code Copyright (c) 1997-2012 University of Cambridge
- Rewritten code Copyright (c) 2016-2017 University of Cambridge
+ Rewritten code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -80,7 +80,7 @@ from www.cbttape.org. */
/* Debugging code enabler */
-// #define DEBUG_SHOW_MALLOC_ADDRESSES
+/* #define DEBUG_SHOW_MALLOC_ADDRESSES */
/* Both libreadline and libedit are optionally supported. The user-supplied
original patch uses readline/readline.h for libedit, but in at least one system
@@ -162,11 +162,16 @@ patterns. */
void vms_setsymbol( char *, char *, int );
#endif
-/* VC doesn't support "%td". */
-#ifdef _MSC_VER
-#define PTR_SPEC "%lu"
+/* VC and older compilers don't support %td or %zu. */
+
+#if defined(_MSC_VER) || !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L
+#define PTR_FORM "lu"
+#define SIZ_FORM "lu"
+#define SIZ_CAST (unsigned long int)
#else
-#define PTR_SPEC "%td"
+#define PTR_FORM "td"
+#define SIZ_FORM "zu"
+#define SIZ_CAST
#endif
/* ------------------End of system-specific definitions -------------------- */
@@ -492,7 +497,7 @@ so many of them that they are split into two fields. */
/* These are the matching controls that may be set either on a pattern or on a
data line. They are copied from the pattern controls as initial settings for
-data line controls Note that CTL_MEMORY is not included here, because it does
+data line controls. Note that CTL_MEMORY is not included here, because it does
different things in the two cases. */
#define CTL_ALLPD (CTL_AFTERTEXT|\
@@ -2590,6 +2595,46 @@ static const uint8_t tables2[] = {
};
+
+#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
+/*************************************************
+* Emulated memmove() for systems without it *
+*************************************************/
+
+/* This function can make use of bcopy() if it is available. Otherwise do it by
+steam, as there are some non-Unix environments that lack both memmove() and
+bcopy(). */
+
+static void *
+emulated_memmove(void *d, const void *s, size_t n)
+{
+#ifdef HAVE_BCOPY
+bcopy(s, d, n);
+return d;
+#else
+size_t i;
+unsigned char *dest = (unsigned char *)d;
+const unsigned char *src = (const unsigned char *)s;
+if (dest > src)
+ {
+ dest += n;
+ src += n;
+ for (i = 0; i < n; ++i) *(--dest) = *(--src);
+ return (void *)dest;
+ }
+else
+ {
+ for (i = 0; i < n; ++i) *dest++ = *src++;
+ return (void *)(dest - n);
+ }
+#endif /* not HAVE_BCOPY */
+}
+#undef memmove
+#define memmove(d,s,n) emulated_memmove(d,s,n)
+#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
+
+
+
#ifndef HAVE_STRERROR
/*************************************************
* Provide strerror() for non-ANSI libraries *
@@ -2626,11 +2671,11 @@ if (show_memory)
{
if (block == NULL)
{
- fprintf(outfile, "** malloc() failed for %zd\n", size);
+ fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", SIZ_CAST size);
}
else
{
- fprintf(outfile, "malloc %5zd", size);
+ fprintf(outfile, "malloc %5" SIZ_FORM, SIZ_CAST size);
#ifdef DEBUG_SHOW_MALLOC_ADDRESSES
fprintf(outfile, " %p", block); /* Not portable */
#endif
@@ -2660,7 +2705,7 @@ if (show_memory)
{
if (block == malloclist[i])
{
- fprintf(outfile, " %5zd", malloclistlength[i]);
+ fprintf(outfile, " %5" SIZ_FORM, SIZ_CAST malloclistlength[i]);
malloclistptr--;
for (j = i; j < malloclistptr; j++)
{
@@ -3038,8 +3083,8 @@ if (pbuffer16_size < 2*len + 2)
pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
if (pbuffer16 == NULL)
{
- fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
- (unsigned long int)pbuffer16_size);
+ fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
+ SIZ_CAST pbuffer16_size);
exit(1);
}
}
@@ -3125,8 +3170,8 @@ if (pbuffer32_size < 4*len + 4)
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
if (pbuffer32 == NULL)
{
- fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
- (unsigned long int)pbuffer32_size);
+ fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
+ SIZ_CAST pbuffer32_size);
exit(1);
}
}
@@ -4673,12 +4718,6 @@ uint16_t first_listed_newline;
const char *cmdname;
uint8_t *argptr, *serial;
-if (restrict_for_perl_test)
- {
- fprintf(outfile, "** #-commands are not allowed after #perltest\n");
- return PR_ABEND;
- }
-
yield = PR_OK;
cmd = CMD_UNKNOWN;
cmdlen = 0;
@@ -4697,6 +4736,12 @@ for (i = 0; i < cmdlistcount; i++)
argptr = buffer + cmdlen + 1;
+if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
+ {
+ fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
+ return PR_ABEND;
+ }
+
switch(cmd)
{
case CMD_UNKNOWN:
@@ -4844,8 +4889,8 @@ switch(cmd)
serial = malloc(serial_size);
if (serial == NULL)
{
- fprintf(outfile, "** Failed to get memory (size %lu) for #load\n",
- (unsigned long int)serial_size);
+ fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
+ SIZ_CAST serial_size);
fclose(f);
return PR_ABEND;
}
@@ -5039,7 +5084,7 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0)
if (d == 0)
{
fprintf(outfile, "** Missing closing quote in hex pattern: "
- "opening quote is at offset " PTR_SPEC ".\n", pq - buffer - 2);
+ "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
return PR_SKIP;
}
if (d == c) break;
@@ -5053,8 +5098,8 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0)
{
if (!isxdigit(c))
{
- fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
- PTR_SPEC " in hex pattern: quote missing?\n", c, pp - buffer - 2);
+ fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
+ PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
return PR_SKIP;
}
if (*pp == 0)
@@ -5065,8 +5110,8 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0)
d = *pp;
if (!isxdigit(d))
{
- fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset "
- PTR_SPEC " in hex pattern: quote missing?\n", d, pp - buffer - 1);
+ fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
+ PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
return PR_SKIP;
}
c = toupper(c);
@@ -5411,7 +5456,7 @@ switch(errorcode)
/* The pattern is now in pbuffer[8|16|32], with the length in code units in
patlen. If it is to be converted, copy the result back afterwards so that it
-it ends up back in the usual place. */
+ends up back in the usual place. */
if (pat_patctl.convert_type != CONVERT_UNSET)
{
@@ -5470,8 +5515,8 @@ if (pat_patctl.convert_type != CONVERT_UNSET)
if (rc != 0)
{
- fprintf(outfile, "** Pattern conversion error at offset %zu: ",
- converted_length);
+ fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
+ SIZ_CAST converted_length);
convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
}
@@ -5735,7 +5780,7 @@ return PR_OK;
*************************************************/
/* This is used for DFA, normal, and JIT fast matching. For DFA matching it
-should only called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
+should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
Arguments:
pp the subject string
@@ -5760,6 +5805,8 @@ PCRE2_SET_HEAP_LIMIT(dat_context, max);
for (;;)
{
+ uint32_t stack_start = 0;
+
if (errnumber == PCRE2_ERROR_HEAPLIMIT)
{
PCRE2_SET_HEAP_LIMIT(dat_context, mid);
@@ -5775,6 +5822,7 @@ for (;;)
if ((dat_datctl.control & CTL_DFA) != 0)
{
+ stack_start = DFA_START_RWS_SIZE/1024;
if (dfa_workspace == NULL)
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
if (dfa_matched++ == 0)
@@ -5789,11 +5837,21 @@ for (;;)
dat_datctl.options, match_data, PTR(dat_context));
else
+ {
+ stack_start = START_FRAMES_SIZE/1024;
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options, match_data, PTR(dat_context));
+ }
if (capcount == errnumber)
{
+ if ((mid & 0x80000000u) != 0)
+ {
+ fprintf(outfile, "Can't find minimum %s limit: check pattern for "
+ "restriction\n", msg);
+ break;
+ }
+
min = mid;
mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
}
@@ -5802,11 +5860,12 @@ for (;;)
capcount == PCRE2_ERROR_PARTIAL)
{
/* If we've not hit the error with a heap limit less than the size of the
- initial stack frame vector, the heap is not being used, so the minimum
- limit is zero; there's no need to go on. The other limits are always
- greater than zero. */
+ initial stack frame vector (for pcre2_match()) or the initial stack
+ workspace vector (for pcre2_dfa_match()), the heap is not being used, so
+ the minimum limit is zero; there's no need to go on. The other limits are
+ always greater than zero. */
- if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < START_FRAMES_SIZE/1024)
+ if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
{
fprintf(outfile, "Minimum %s limit = 0\n", msg);
break;
@@ -5889,8 +5948,8 @@ isn't a tidy way to fit it in the rest of the data. */
if (cb->callout_string != NULL)
{
uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
- fprintf(outfile, "Callout (%lu): %c",
- (unsigned long int)cb->callout_string_offset, delimiter);
+ fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
+ SIZ_CAST cb->callout_string_offset, delimiter);
PCHARSV(cb->callout_string, 0,
cb->callout_string_length, utf, outfile);
for (i = 0; callout_start_delims[i] != 0; i++)
@@ -6089,12 +6148,12 @@ for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
}
else if (length2 != length)
{
- fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
- (unsigned long int)length, (unsigned long int)length2);
+ fprintf(outfile, "Mismatched substring lengths: %"
+ SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
}
fprintf(outfile, "%2dC ", n);
PCHARSV(copybuffer, 0, length, utf, outfile);
- fprintf(outfile, " (%lu)\n", (unsigned long)length);
+ fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
}
}
@@ -6144,12 +6203,12 @@ for (;;)
}
else if (length2 != length)
{
- fprintf(outfile, "Mismatched substring lengths: %lu %lu\n",
- (unsigned long int)length, (unsigned long int)length2);
+ fprintf(outfile, "Mismatched substring lengths: %"
+ SIZ_FORM " %" SIZ_FORM "\n", SIZ_CAST length, SIZ_CAST length2);
}
fprintf(outfile, " C ");
PCHARSV(copybuffer, 0, length, utf, outfile);
- fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
+ fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
else fprintf(outfile, " (non-unique)\n");
}
@@ -6174,7 +6233,7 @@ for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
{
fprintf(outfile, "%2dG ", n);
PCHARSV(gotbuffer, 0, length, utf, outfile);
- fprintf(outfile, " (%lu)\n", (unsigned long)length);
+ fprintf(outfile, " (%" SIZ_FORM ")\n", SIZ_CAST length);
PCRE2_SUBSTRING_FREE(gotbuffer);
}
}
@@ -6218,7 +6277,7 @@ for (;;)
{
fprintf(outfile, " G ");
PCHARSV(gotbuffer, 0, length, utf, outfile);
- fprintf(outfile, " (%lu) %s", (unsigned long)length, nptr);
+ fprintf(outfile, " (%" SIZ_FORM ") %s", SIZ_CAST length, nptr);
if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
else fprintf(outfile, " (non-unique)\n");
PCRE2_SUBSTRING_FREE(gotbuffer);
@@ -6283,6 +6342,7 @@ size_t needlen;
void *use_dat_context;
BOOL utf;
BOOL subject_literal;
+PCRE2_SIZE ovecsave[3];
#ifdef SUPPORT_PCRE2_8
uint8_t *q8 = NULL;
@@ -6761,13 +6821,17 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
fprintf(outfile, "Matched without capture\n");
else
{
- size_t i;
+ size_t i, j;
+ size_t last_printed = (size_t)dat_datctl.oveccount;
for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
{
if (pmatch[i].rm_so >= 0)
{
PCRE2_SIZE start = pmatch[i].rm_so;
PCRE2_SIZE end = pmatch[i].rm_eo;
+ for (j = last_printed + 1; j < i; j++)
+ fprintf(outfile, "%2d: <unset>\n", (int)j);
+ last_printed = i;
if (start > end)
{
start = pmatch[i].rm_eo;
@@ -6926,6 +6990,9 @@ if (dat_datctl.replacement[0] != 0)
if (timeitm)
fprintf(outfile, "** Timing is not supported with replace: ignored\n");
+ if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
+ fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
+
xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
PCRE2_SUBSTITUTE_GLOBAL) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
@@ -6955,8 +7022,8 @@ if (dat_datctl.replacement[0] != 0)
}
if (n > nsize)
{
- fprintf(outfile, "Replacement buffer setting (%lu) is too large "
- "(max %lu)\n", (unsigned long int)n, (unsigned long int)nsize);
+ fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
+ "large (max %" SIZ_FORM ")\n", SIZ_CAST n, SIZ_CAST nsize);
return PR_OK;
}
nsize = n;
@@ -7044,35 +7111,24 @@ if (dat_datctl.replacement[0] != 0)
}
fprintf(outfile, "\n");
+ show_memory = FALSE;
+ return PR_OK;
} /* End of substitution handling */
/* When a replacement string is not provided, run a loop for global matching
-with one of the basic matching functions. */
+with one of the basic matching functions. For altglobal (or first time round
+the loop), set an "unset" value for the previous match info. */
-else for (gmatched = 0;; gmatched++)
+ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
+
+for (gmatched = 0;; gmatched++)
{
PCRE2_SIZE j;
int capcount;
PCRE2_SIZE *ovector;
- PCRE2_SIZE ovecsave[2];
ovector = FLD(match_data, ovector);
- /* After the first time round a global loop, for a normal global (/g)
- iteration, save the current ovector[0,1] so that we can check that they do
- change each time. Otherwise a matching bug that returns the same string
- causes an infinite loop. It has happened! */
-
- if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
- {
- ovecsave[0] = ovector[0];
- ovecsave[1] = ovector[1];
- }
-
- /* For altglobal (or first time round the loop), set an "unset" value. */
-
- else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
-
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
@@ -7135,18 +7191,16 @@ else for (gmatched = 0;; gmatched++)
(double)CLOCKS_PER_SEC);
}
- /* Find the heap, match and depth limits if requested. The match and heap
- limits are not relevant for DFA matching and the depth and heap limits are
- not relevant for JIT. The return from check_match_limit() is the return from
- the final call to pcre2_match() or pcre2_dfa_match(). */
+ /* Find the heap, match and depth limits if requested. The depth and heap
+ limits are not relevant for JIT. The return from check_match_limit() is the
+ return from the final call to pcre2_match() or pcre2_dfa_match(). */
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
{
capcount = 0; /* This stops compiler warnings */
- if ((dat_datctl.control & CTL_DFA) == 0 &&
- (FLD(compiled_code, executable_jit) == NULL ||
- (dat_datctl.options & PCRE2_NO_JIT) != 0))
+ if (FLD(compiled_code, executable_jit) == NULL ||
+ (dat_datctl.options & PCRE2_NO_JIT) != 0)
{
(void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
}
@@ -7161,6 +7215,12 @@ else for (gmatched = 0;; gmatched++)
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
"depth");
}
+
+ if (capcount == 0)
+ {
+ fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
+ capcount = dat_datctl.oveccount;
+ }
}
/* Otherwise just run a single match, setting up a callout if required (the
@@ -7239,12 +7299,23 @@ else for (gmatched = 0;; gmatched++)
}
/* If this is not the first time round a global loop, check that the
- returned string has changed. If not, there is a bug somewhere and we must
- break the loop because it will go on for ever. We know that there are
- always at least two elements in the ovector. */
+ returned string has changed. If it has not, check for an empty string match
+ at different starting offset from the previous match. This is a failed test
+ retry for null-matching patterns that don't match at their starting offset,
+ for example /(?<=\G.)/. A repeated match at the same point is not such a
+ pattern, and must be discarded, and we then proceed to seek a non-null
+ match at the current point. For any other repeated match, there is a bug
+ somewhere and we must break the loop because it will go on for ever. We
+ know that there are always at least two elements in the ovector. */
if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
{
+ if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
+ {
+ g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ ovecsave[2] = dat_datctl.offset;
+ continue; /* Back to the top of the loop */
+ }
fprintf(outfile,
"** PCRE2 error: global repeat returned the same string as previous\n");
fprintf(outfile, "** Global loop abandoned\n");
@@ -7534,7 +7605,7 @@ else for (gmatched = 0;; gmatched++)
{
PCRE2_SIZE startchar;
PCRE2_GET_STARTCHAR(startchar, match_data);
- fprintf(outfile, " at offset %lu", (unsigned long int)startchar);
+ fprintf(outfile, " at offset %" SIZ_FORM, SIZ_CAST startchar);
}
fprintf(outfile, "\n");
break;
@@ -7552,6 +7623,7 @@ else for (gmatched = 0;; gmatched++)
if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
{
+ PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
/* We must now set up for the next iteration of a global search. If we have
@@ -7559,12 +7631,19 @@ else for (gmatched = 0;; gmatched++)
subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
at the same point. If this fails it will be picked up above, where a fake
- match is set up so that at this point we advance to the next character. */
+ match is set up so that at this point we advance to the next character.
- if (FLD(match_data, ovector)[0] == end_offset)
+ However, in order to cope with patterns that never match at their starting
+ offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
+ than the starting offset. This means there will be a retry with the
+ starting offset at the match offset. If this returns the same match again,
+ it is picked up above and ignored, and the special action is then taken. */
+
+ if (match_offset == end_offset)
{
- if (end_offset == ulen) break; /* End of subject */
- g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ if (end_offset == ulen) break; /* End of subject */
+ if (match_offset <= dat_datctl.offset)
+ g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
/* However, even after matching a non-empty string, there is still one
@@ -7602,10 +7681,19 @@ else for (gmatched = 0;; gmatched++)
}
}
- /* For /g (global), update the start offset, leaving the rest alone. */
+ /* For a normal global (/g) iteration, save the current ovector[0,1] and
+ the starting offset so that we can check that they do change each time.
+ Otherwise a matching bug that returns the same string causes an infinite
+ loop. It has happened! Then update the start offset, leaving other
+ parameters alone. */
if ((dat_datctl.control & CTL_GLOBAL) != 0)
+ {
+ ovecsave[0] = ovector[0];
+ ovecsave[1] = ovector[1];
+ ovecsave[2] = dat_datctl.offset;
dat_datctl.offset = end_offset;
+ }
/* For altglobal, just update the pointer and length. */
@@ -7744,7 +7832,7 @@ printf(" -LM list pattern and subject modifiers, then exit\n");
printf(" -q quiet: do not output PCRE2 version number at start\n");
printf(" -pattern <s> set default pattern modifier fields\n");
printf(" -subject <s> set default subject modifier fields\n");
-printf(" -S <n> set stack size to <n> megabytes\n");
+printf(" -S <n> set stack size to <n> mebibytes\n");
printf(" -t [<n>] time compilation and execution, repeating <n> times\n");
printf(" -tm [<n>] time execution (matching) only, repeating <n> times\n");
printf(" -T same as -t, but show total times at the end\n");
@@ -7789,7 +7877,7 @@ if (arg != NULL && arg[0] != CHAR_MINUS)
{
case CONF_BSR:
(void)PCRE2_CONFIG(coptlist[i].value, &optval);
- printf("%s\n", optval? "ANYCRLF" : "ANY");
+ printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
break;
case CONF_FIX:
@@ -7873,7 +7961,8 @@ else
(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
print_newline_config(optval, FALSE);
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
-printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" :
+printf(" \\R matches %s\n",
+ (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
"all Unicode newlines");
(void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
printf(" \\C is %ssupported\n", optval? "not ":"");
@@ -8194,15 +8283,15 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
if (rlim.rlim_cur > rlim.rlim_max)
{
fprintf(stderr,
- "pcre2test: requested stack size %luM is greater than hard limit %lu\n",
- (unsigned long int)stack_size,
- (unsigned long int)(rlim.rlim_max));
+ "pcre2test: requested stack size %luMiB is greater than hard limit "
+ "%luMiB\n", (unsigned long int)stack_size,
+ (unsigned long int)(rlim.rlim_max));
exit(1);
}
rc = setrlimit(RLIMIT_STACK, &rlim);
if (rc != 0)
{
- fprintf(stderr, "pcre2test: setting stack size %luM failed: %s\n",
+ fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
(unsigned long int)stack_size, strerror(errno));
exit(1);
}
@@ -8334,8 +8423,8 @@ least 128 code units, because it is used for retrieving error messages. */
pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
if (pbuffer16 == NULL)
{
- fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
- (unsigned long int)pbuffer16_size);
+ fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
+ SIZ_CAST pbuffer16_size);
yield = 1;
goto EXIT;
}
@@ -8349,8 +8438,8 @@ least 128 code units, because it is used for retrieving error messages. */
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
if (pbuffer32 == NULL)
{
- fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
- (unsigned long int)pbuffer32_size);
+ fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
+ SIZ_CAST pbuffer32_size);
yield = 1;
goto EXIT;
}
diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h
index e13282c..f5703e8 100644
--- a/src/sljit/sljitConfigInternal.h
+++ b/src/sljit/sljitConfigInternal.h
@@ -66,7 +66,7 @@
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
Other macros:
- SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT
+ SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
*/
@@ -147,17 +147,23 @@
#define SLJIT_CONFIG_UNSUPPORTED 1
#endif
-#else /* !_WIN32 */
+#else /* _WIN32 */
#if defined(_M_X64) || defined(__x86_64__)
#define SLJIT_CONFIG_X86_64 1
+#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
+#define SLJIT_CONFIG_ARM_THUMB2 1
+#elif (defined(_M_ARM) && _M_ARM >= 7)
+#define SLJIT_CONFIG_ARM_V7 1
#elif defined(_ARM_)
#define SLJIT_CONFIG_ARM_V5 1
+#elif defined(_M_ARM64) || defined(__aarch64__)
+#define SLJIT_CONFIG_ARM_64 1
#else
#define SLJIT_CONFIG_X86_32 1
#endif
-#endif /* !WIN32 */
+#endif /* !_WIN32 */
#endif /* SLJIT_CONFIG_AUTO */
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
@@ -324,6 +330,11 @@
sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
+#elif defined _WIN32
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+ FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
+
#else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
@@ -371,12 +382,18 @@ typedef int sljit_sw;
#define SLJIT_64BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 3
#ifdef _WIN32
+#ifdef __GNUC__
+/* These types do not require windows.h */
+typedef unsigned long long sljit_uw;
+typedef long long sljit_sw;
+#else
typedef unsigned __int64 sljit_uw;
typedef __int64 sljit_sw;
-#else
+#endif
+#else /* !_WIN32 */
typedef unsigned long int sljit_uw;
typedef long int sljit_sw;
-#endif
+#endif /* _WIN32 */
#endif
typedef sljit_uw sljit_p;
@@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
-#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
diff --git a/src/sljit/sljitExecAllocator.c b/src/sljit/sljitExecAllocator.c
index f500978..7c18578 100644
--- a/src/sljit/sljitExecAllocator.c
+++ b/src/sljit/sljitExecAllocator.c
@@ -99,7 +99,14 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
void *retval;
#ifdef MAP_ANON
- retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+
+ int flags = MAP_PRIVATE | MAP_ANON;
+
+#ifdef MAP_JIT
+ flags |= MAP_JIT;
+#endif
+
+ retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
#else
if (dev_zero < 0) {
if (open_dev_zero())
diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c
index 5e435f0..5bdddc1 100644
--- a/src/sljit/sljitLir.c
+++ b/src/sljit/sljitLir.c
@@ -26,6 +26,13 @@
#include "sljitLir.h"
+#ifdef _WIN32
+
+/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
+#include <windows.h>
+
+#endif /* _WIN32 */
+
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
/* These libraries are needed for the macros below. */
@@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
#endif
-#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+ && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h
index 920f6d4..e71890c 100644
--- a/src/sljit/sljitLir.h
+++ b/src/sljit/sljitLir.h
@@ -138,7 +138,7 @@ of sljitConfigInternal.h */
be specified as scratch registers and the fifth one as saved register
on the CPU above and any user code which requires four scratch
registers can run unmodified. The SLJIT compiler automatically saves
- the content of the two extra scrath register on the stack. Scratch
+ the content of the two extra scratch register on the stack. Scratch
registers can also be preserved by saving their value on the stack
but this needs to be done manually.
@@ -746,7 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
register can hold any 32 or 64 bit value, and it is converted to a 32 bit
compatible format first. This conversion is free (no instructions are
- emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit
+ emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
Note: memory addressing always uses 64 bit values on 64 bit systems so
@@ -773,8 +773,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
*/
#define SLJIT_F32_OP SLJIT_I32_OP
-/* Many CPUs (x86, ARM, PPC) has status flags which can be set according
- to the result of an operation. Other CPUs (MIPS) does not have status
+/* Many CPUs (x86, ARM, PPC) have status flags which can be set according
+ to the result of an operation. Other CPUs (MIPS) do not have status
flags, and results must be stored in registers. To cover both architecture
types efficiently only two flags are defined by SLJIT:
@@ -810,14 +810,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
Using these flags can reduce the number of emitted instructions. E.g. a
fast loop can be implemented by decreasing a counter register and set the
- zero flag to jump back if the counter register is not reached zero.
+ zero flag to jump back if the counter register has not reached zero.
Motivation: although CPUs can set a large number of flags, usually their
values are ignored or only one of them is used. Emulating a large number
of flags on systems without flag register is complicated so SLJIT
instructions must specify the flag they want to use and only that flag
will be emulated. The last arithmetic instruction can be repeated if
- multiple flags needs to be checked.
+ multiple flags need to be checked.
*/
/* Set Zero status flag. */
@@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
/* Starting index of opcodes for sljit_emit_op1. */
#define SLJIT_OP1_BASE 32
-/* The MOV instruction transfer data from source to destination.
+/* The MOV instruction transfers data from source to destination.
MOV instruction suffixes:
@@ -1156,7 +1156,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_FAST_CALL 25
/* Called function must be declared with the SLJIT_FUNC attribute. */
#define SLJIT_CALL 26
- /* Called function must be decalred with cdecl attribute.
+ /* Called function must be declared with cdecl attribute.
This is the default attribute for C functions. */
#define SLJIT_CALL_CDECL 27
@@ -1210,7 +1210,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl
/* Set the destination address of the jump to this label. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
-/* Emit an indirect jump or fast call. Both direct and indirect form
+/* Emit an indirect jump or fast call.
Direct form: set src to SLJIT_IMM() and srcw to the address
Indirect form: any other valid addressing mode
type must be between SLJIT_JUMP and SLJIT_FAST_CALL
@@ -1274,7 +1274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
#define SLJIT_MEM_POST 0x1000
/* Emit a single memory load or store with update instruction. When the
- requested instruction from is not supported by the CPU, it returns
+ requested instruction form is not supported by the CPU, it returns
with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This
allows specializing tight loops based on the supported instruction
forms (see SLJIT_MEM_SUPP flag).
diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c
index 8a437bd..27af741 100644
--- a/src/sljit/sljitNativeARM_64.c
+++ b/src/sljit/sljitNativeARM_64.c
@@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
/* r18 - platform register, currently not used */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
- 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31
+ 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
};
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ADC 0x9a000000
#define ADD 0x8b000000
+#define ADDE 0x8b200000
#define ADDI 0x91000000
#define AND 0x8a000000
#define ANDI 0x92000000
@@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FSUB 0x1e603800
#define LDRI 0xf9400000
#define LDP 0xa9400000
-#define LDP_PST 0xa8c00000
+#define LDP_PRE 0xa9c00000
+#define LDR_PRE 0xf8400c00
#define LSLV 0x9ac02000
#define LSRV 0x9ac02400
#define MADD 0x9b000000
@@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
- saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
- local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+ if (saved_regs_size & 0x8)
+ saved_regs_size += sizeof(sljit_sw);
+
local_size = (local_size + 15) & ~0xf;
- compiler->local_size = local_size;
-
- if (local_size <= (63 * sizeof(sljit_sw))) {
- FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
- FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
- offs = (local_size - saved_regs_size) << (15 - 3);
- } else {
- offs = 0 << 15;
- if (saved_regs_size & 0x8) {
- offs = 1 << 15;
- saved_regs_size += sizeof(sljit_sw);
- }
- local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
- if (saved_regs_size > 0)
- FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
- }
+ compiler->local_size = local_size + saved_regs_size;
+
+ FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
+ | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
+
+#ifdef _WIN32
+ if (local_size >= 4096)
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
+ else if (local_size > 256)
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
+#endif
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1;
+ offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
- SLJIT_ASSERT(prev == -1);
+ if (prev != -1)
+ FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
- if (compiler->local_size > (63 * sizeof(sljit_sw))) {
- /* The local_size is already adjusted by the saved registers. */
- if (local_size > 0xfff) {
- FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
- local_size &= 0xfff;
- }
- if (local_size)
- FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
- FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
- FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
- }
+
+ FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
args = get_arg_count(arg_types);
@@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3)
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
+#ifdef _WIN32
+ if (local_size >= 4096) {
+ if (local_size < 4 * 4096) {
+ /* No need for a loop. */
+ if (local_size >= 2 * 4096) {
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+ local_size -= 4096;
+ }
+
+ if (local_size >= 2 * 4096) {
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+ local_size -= 4096;
+ }
+
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ local_size -= 4096;
+ }
+ else {
+ FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+ FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
+ FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+
+ local_size &= 0xfff;
+ }
+
+ if (local_size > 256) {
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ }
+ else if (local_size > 0)
+ FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
+
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
+ }
+ else if (local_size > 256) {
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
+ }
+ else if (local_size > 0)
+ FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
+
+#else /* !_WIN32 */
+
+ /* The local_size does not include saved registers size. */
+ if (local_size > 0xfff) {
+ FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+ local_size &= 0xfff;
+ }
+ if (local_size != 0)
+ FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
+
+#endif /* _WIN32 */
+
return SLJIT_SUCCESS;
}
@@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
+ sljit_s32 saved_regs_size;
+
CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
- local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
- local_size = (local_size + 15) & ~0xf;
- compiler->local_size = local_size;
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+ if (saved_regs_size & 0x8)
+ saved_regs_size += sizeof(sljit_sw);
+
+ compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
return SLJIT_SUCCESS;
}
@@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
- local_size = compiler->local_size;
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
+ if (saved_regs_size & 0x8)
+ saved_regs_size += sizeof(sljit_sw);
- saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
- if (local_size <= (63 * sizeof(sljit_sw)))
- offs = (local_size - saved_regs_size) << (15 - 3);
+ local_size = compiler->local_size - saved_regs_size;
+
+ /* Load LR as early as possible. */
+ if (local_size == 0)
+ FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
+ else if (local_size < 63 * sizeof(sljit_sw)) {
+ FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
+ | RN(SLJIT_SP) | (local_size << (15 - 3))));
+ }
else {
- FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
- offs = 0 << 15;
- if (saved_regs_size & 0x8) {
- offs = 1 << 15;
- saved_regs_size += sizeof(sljit_sw);
- }
- local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
if (local_size > 0xfff) {
- FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size)
- FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
+
+ FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
}
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1;
+ offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
- SLJIT_ASSERT(prev == -1);
+ if (prev != -1)
+ FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
- if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
- FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
- } else if (saved_regs_size > 0) {
- FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
- }
-
- FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
- return SLJIT_SUCCESS;
+ /* These two can be executed in parallel. */
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
+ return push_inst(compiler, RET | RN(TMP_LR));
}
/* --------------------------------------------------------------------- */
@@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
+{
+ sljit_s32 dst_reg;
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
+
+ SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
+
+ dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+ if (offset <= 0xffffff && offset >= -0xffffff) {
+ ins = ADDI;
+ if (offset < 0) {
+ offset = -offset;
+ ins = SUBI;
+ }
+
+ if (offset <= 0xfff)
+ FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
+ else {
+ FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
+
+ offset &= 0xfff;
+ if (offset != 0)
+ FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
+ }
+ }
+ else {
+ FAIL_IF(load_immediate (compiler, dst_reg, offset));
+ /* Add extended register form. */
+ FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
+ }
+
+ if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
+ return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c
index 75e7a38..d7024b6 100644
--- a/src/sljit/sljitNativeARM_T2_32.c
+++ b/src/sljit/sljitNativeARM_T2_32.c
@@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ASRSI 0x1000
#define ASR_W 0xfa40f000
#define ASR_WI 0xea4f0020
+#define BCC 0xd000
#define BICI 0xf0200000
#define BKPT 0xbe00
#define BLX 0x4780
@@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define EORS 0x4040
#define EOR_W 0xea800000
#define IT 0xbf00
+#define LDRI 0xf8500800
#define LSLS 0x4080
#define LSLSI 0x0000
#define LSL_W 0xfa00f000
@@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SBCI 0xf1600000
#define SBCS 0x4180
#define SBC_W 0xeb600000
+#define SDIV 0xfb90f0f0
#define SMULL 0xfb800000
#define STR_SP 0x9000
#define SUBS 0x1a00
@@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SXTH 0xb200
#define SXTH_W 0xfa0ff080
#define TST 0x4200
+#define UDIV 0xfbb0f0f0
#define UMULL 0xfba00000
#define UXTB 0xb2c0
#define UXTB_W 0xfa5ff080
@@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
/* Really complex instruction form for branches. */
s = (diff >> 23) & 0x1;
- j1 = (~(diff >> 21) ^ s) & 0x1;
- j2 = (~(diff >> 22) ^ s) & 0x1;
+ j1 = (~(diff >> 22) ^ s) & 0x1;
+ j2 = (~(diff >> 21) ^ s) & 0x1;
jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
@@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
{
sljit_uw tmp;
+ /* MOVS cannot be used since it destroy flags. */
+
if (imm >= 0x10000) {
tmp = get_imm(imm);
if (tmp != INVALID_IMM)
@@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{
sljit_s32 args, size, i, tmp;
sljit_ins push = 0;
+#ifdef _WIN32
+ sljit_uw imm;
+#endif
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
local_size = ((size + local_size + 7) & ~7) - size;
compiler->local_size = local_size;
+
+#ifdef _WIN32
+ if (local_size >= 256) {
+ if (local_size > 4096)
+ imm = get_imm(4096);
+ else
+ imm = get_imm(local_size & ~0xff);
+
+ SLJIT_ASSERT(imm != INVALID_IMM);
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
+ }
+#else
if (local_size > 0) {
if (local_size <= (127 << 2))
FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
else
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
}
+#endif
args = get_arg_count(arg_types);
@@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3)
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
+#ifdef _WIN32
+ if (local_size >= 256) {
+ if (local_size > 4096) {
+ imm = get_imm(4096);
+ SLJIT_ASSERT(imm != INVALID_IMM);
+
+ if (local_size < 4 * 4096) {
+ if (local_size > 2 * 4096) {
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ local_size -= 4096;
+ }
+
+ if (local_size > 2 * 4096) {
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ local_size -= 4096;
+ }
+
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ local_size -= 4096;
+
+ SLJIT_ASSERT(local_size > 0);
+ }
+ else {
+ FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
+ FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
+ FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
+
+ local_size &= 0xfff;
+
+ if (local_size != 0)
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ }
+
+ if (local_size >= 256) {
+ imm = get_imm(local_size & ~0xff);
+ SLJIT_ASSERT(imm != INVALID_IMM);
+
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ }
+ }
+
+ local_size &= 0xff;
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
+
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
+ }
+ else if (local_size > 0)
+ FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
+#endif
+
return SLJIT_SUCCESS;
}
@@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
/* Operators */
/* --------------------------------------------------------------------- */
+#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
+
#ifdef __cplusplus
extern "C" {
#endif
-#if defined(__GNUC__)
+#ifdef _WIN32
+extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
+extern long long __rt_sdiv(int denominator, int numerator);
+#elif defined(__GNUC__)
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
extern int __aeabi_idivmod(int numerator, int denominator);
#else
@@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
}
#endif
+#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
+#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
sljit_sw saved_reg_list[3];
sljit_sw saved_reg_count;
+#endif
CHECK_ERROR();
CHECK(check_sljit_emit_op0(compiler, op));
@@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (reg_map[SLJIT_R0] << 12)
| (reg_map[SLJIT_R0] << 16)
| reg_map[SLJIT_R1]);
+#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
+ case SLJIT_DIVMOD_UW:
+ case SLJIT_DIVMOD_SW:
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
+ FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
+ FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
+ return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
+ case SLJIT_DIV_UW:
+ case SLJIT_DIV_SW:
+ return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
+#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
@@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
}
}
-#if defined(__GNUC__)
+#ifdef _WIN32
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
+ FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+ ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
+#elif defined(__GNUC__)
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
#else
@@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
}
return SLJIT_SUCCESS;
+#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
}
return SLJIT_SUCCESS;
diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c
index 9f9e157..094c992 100644
--- a/src/sljit/sljitNativeMIPS_32.c
+++ b/src/sljit/sljitNativeMIPS_32.c
@@ -448,7 +448,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins ins = NOP;
sljit_u8 offsets[4];
- SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
@@ -516,7 +516,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
else if (arg_count != word_arg_count)
ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
else if (arg_count == 1)
- ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+ ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--;
word_arg_count--;
diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c
index ff6f048..f841aef 100644
--- a/src/sljit/sljitNativeMIPS_64.c
+++ b/src/sljit/sljitNativeMIPS_64.c
@@ -547,7 +547,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins prev_ins = NOP;
sljit_ins ins = NOP;
- SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
@@ -591,7 +591,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
if (arg_count != word_arg_count)
ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
else if (arg_count == 1)
- ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+ ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--;
word_arg_count--;
break;
diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c
index e108433..894e213 100644
--- a/src/sljit/sljitNativeMIPS_common.c
+++ b/src/sljit/sljitNativeMIPS_common.c
@@ -57,14 +57,14 @@ typedef sljit_u32 sljit_ins;
#define RETURN_ADDR_REG 31
/* Flags are kept in volatile registers. */
-#define EQUAL_FLAG 31
+#define EQUAL_FLAG 3
#define OTHER_FLAG 1
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
+ 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
};
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -612,16 +612,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
/* Frequent case. */
FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
base = S(SLJIT_SP);
+ offs = local_size - (sljit_sw)sizeof(sljit_sw);
}
else {
- FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
+ FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size));
FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
+ FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP)));
base = S(TMP_REG2);
local_size = 0;
+ offs = -(sljit_sw)sizeof(sljit_sw);
}
- offs = local_size - (sljit_sw)(sizeof(sljit_sw));
FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
@@ -805,7 +806,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
tmp_ar = reg_ar;
delay_slot = reg_ar;
- } else {
+ }
+ else {
tmp_ar = DR(TMP_REG1);
delay_slot = MOVABLE_INS;
}
@@ -881,11 +883,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
{
+ sljit_s32 tmp_ar, base, delay_slot;
+
if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
return compiler->error;
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
+
+ if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
+ tmp_ar = reg_ar;
+ delay_slot = reg_ar;
+ }
+ else {
+ tmp_ar = DR(TMP_REG1);
+ delay_slot = MOVABLE_INS;
+ }
+ base = arg & REG_MASK;
+
+ if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+ argw &= 0x3;
+
+ if (SLJIT_UNLIKELY(argw)) {
+ FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar));
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
+ }
+ else
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar));
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+ }
+
+ FAIL_IF(load_immediate(compiler, tmp_ar, argw));
+
+ if (base != 0)
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
+
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c
index 8a83e27..074e64b 100644
--- a/src/sljit/sljitNativeX86_32.c
+++ b/src/sljit/sljitNativeX86_32.c
@@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args > 0) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
+ inst += 2;
}
if (args > 1) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
+ inst += 2;
}
if (args > 2) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
- *inst++ = 0x24;
- *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
+ inst[2] = 0x24;
+ inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
}
#else
if (args > 0) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
- *inst++ = sizeof(sljit_sw) * 2;
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
+ inst[2] = sizeof(sljit_sw) * 2;
+ inst += 3;
}
if (args > 1) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
- *inst++ = sizeof(sljit_sw) * 3;
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
+ inst[2] = sizeof(sljit_sw) * 3;
+ inst += 3;
}
if (args > 2) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
- *inst++ = sizeof(sljit_sw) * 4;
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
+ inst[2] = sizeof(sljit_sw) * 4;
}
#endif
@@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size;
#ifdef _WIN32
- if (local_size > 1024) {
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
- FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
-#else
- /* Space for a single argument. This amount is excluded when the stack is allocated below. */
- local_size -= sizeof(sljit_sw);
- FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
- FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
- SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
-#endif
- FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+ if (local_size > 0) {
+ if (local_size <= 4 * 4096) {
+ if (local_size > 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
+ if (local_size > 2 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
+ if (local_size > 3 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
+ }
+ else {
+ EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
+ EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
+
+ SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+
+ INC_SIZE(2);
+ inst[0] = JNE_i8;
+ inst[1] = (sljit_s8) -16;
+ }
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
}
#endif
diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c
index 635ebd0..8506565 100644
--- a/src/sljit/sljitNativeX86_64.c
+++ b/src/sljit/sljitNativeX86_64.c
@@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+ compiler->mode32 = 0;
+
#ifdef _WIN64
/* Two/four register slots for parameters plus space for xmm6 register if needed. */
if (fscratches >= 6 || fsaveds >= 1)
@@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#ifndef _WIN64
if (args > 0) {
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
+ inst[0] = REX_W;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
+ inst += 3;
}
if (args > 1) {
- *inst++ = REX_W | REX_R;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
+ inst[0] = REX_W | REX_R;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
+ inst += 3;
}
if (args > 2) {
- *inst++ = REX_W | REX_R;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
+ inst[0] = REX_W | REX_R;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
}
#else
if (args > 0) {
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
+ inst[0] = REX_W;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
+ inst += 3;
}
if (args > 1) {
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
+ inst[0] = REX_W;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
+ inst += 3;
}
if (args > 2) {
- *inst++ = REX_W | REX_B;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
+ inst[0] = REX_W | REX_B;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
}
#endif
}
@@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size;
#ifdef _WIN64
- if (local_size > 1024) {
- /* Allocate stack for the callback, which grows the stack. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
- FAIL_IF(!inst);
- INC_SIZE(4 + (3 + sizeof(sljit_s32)));
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_83;
- *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
- /* Allocated size for registers must be divisible by 8. */
- SLJIT_ASSERT(!(saved_register_size & 0x7));
- /* Aligned to 16 byte. */
- if (saved_register_size & 0x8) {
- *inst++ = 5 * sizeof(sljit_sw);
- local_size -= 5 * sizeof(sljit_sw);
- } else {
- *inst++ = 4 * sizeof(sljit_sw);
- local_size -= 4 * sizeof(sljit_sw);
- }
- /* Second instruction */
- SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
- *inst++ = REX_W;
- *inst++ = MOV_rm_i32;
- *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
- sljit_unaligned_store_s32(inst, local_size);
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
- || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- compiler->skip_checks = 1;
-#endif
- FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
- }
-#endif
-
if (local_size > 0) {
- if (local_size <= 127) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
- FAIL_IF(!inst);
- INC_SIZE(4);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_83;
- *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
- *inst++ = local_size;
+ if (local_size <= 4 * 4096) {
+ if (local_size > 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
+ if (local_size > 2 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
+ if (local_size > 3 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
}
else {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
+ EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
+
+ SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
+
+ EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
- INC_SIZE(7);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_81;
- *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
- sljit_unaligned_store_s32(inst, local_size);
- inst += sizeof(sljit_s32);
+
+ INC_SIZE(2);
+ inst[0] = JNE_i8;
+ inst[1] = (sljit_s8) -19;
}
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
+ }
+#endif
+
+ if (local_size > 0) {
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
}
#ifdef _WIN64
diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c
index ab7b36a..6f02ee3 100644
--- a/src/sljit/sljitNativeX86_common.c
+++ b/src/sljit/sljitNativeX86_common.c
@@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
-#ifdef _WIN32
-#include <malloc.h>
-
-static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
-{
- /* Workaround for calling the internal _chkstk() function on Windows.
- This function touches all 4k pages belongs to the requested stack space,
- which size is passed in local_size. This is necessary on Windows where
- the stack can only grow in 4k steps. However, this function just burn
- CPU cycles if the stack is large enough. However, you don't know it in
- advance, so it must always be called. I think this is a bad design in
- general even if it has some reasons. */
- *(volatile sljit_s32*)alloca(local_size) = 0;
-}
-
-#endif
-
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#include "sljitNativeX86_32.c"
#else
diff --git a/testdata/grepinput b/testdata/grepinput
index b01643d..1e2ceb4 100644
--- a/testdata/grepinput
+++ b/testdata/grepinput
@@ -1,6 +1,6 @@
This is a file of miscellaneous text that is used as test data for checking
-that the pcregrep command is working correctly. The file must be more than 24K
-long so that it needs more than a single read() call to process it. New
+that the pcregrep command is working correctly. The file must be more than
+24KiB long so that it needs more than a single read() call to process it. New
features should be added at the end, because some of the tests involve the
output of line numbers, and we don't want these to change.
@@ -9,7 +9,7 @@ In the middle of a line, PATTERN appears.
This pattern is in lower case.
-Here follows a whole lot of stuff that makes the file over 24K long.
+Here follows a whole lot of stuff that makes the file over 24KiB long.
-------------------------------------------------------------------------------
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
diff --git a/testdata/grepoutput b/testdata/grepoutput
index e49c2b2..2bd69be 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -346,7 +346,7 @@ RC=0
./testdata/grepinput-9-
./testdata/grepinput:10:This pattern is in lower case.
./testdata/grepinput-11-
-./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long.
+./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24KiB long.
./testdata/grepinput-13-
--
./testdata/grepinput:623:Check up on PATTERN near the end.
@@ -379,6 +379,7 @@ RC=0
./testdata/grepinputx
RC=0
---------------------------- Test 37 -----------------------------
+24KiB long so that it needs more than a single read() call to process it. New
aaaaa0
aaaaa2
010203040506
@@ -465,11 +466,11 @@ fox jumps
This time it jumps and jumps and jumps.
RC=0
---------------------------- Test 53 ------------------------------
-36972,6
-36990,4
-37024,4
-37066,5
-37083,4
+36976,6
+36994,4
+37028,4
+37070,5
+37087,4
RC=0
---------------------------- Test 54 ------------------------------
595:15,6
@@ -519,8 +520,8 @@ RC=0
pcre2grep: pcre2_match() gave error -47 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking
-that the pcregrep command is working correctly. The file must be more than 24K
-long so that it needs more than a single read
+that the pcregrep command is working correctly. The file must be more than
+24KiB long so that it needs more than a single re
pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops.
@@ -529,8 +530,8 @@ RC=1
pcre2grep: pcre2_match() gave error -53 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking
-that the pcregrep command is working correctly. The file must be more than 24K
-long so that it needs more than a single read
+that the pcregrep command is working correctly. The file must be more than
+24KiB long so that it needs more than a single re
pcre2grep: Error -46, -47, -53 or -63 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops.
@@ -814,11 +815,11 @@ RC=0
615:0,12
RC=0
---------------------------- Test 112 -----------------------------
-37168,12
-37180,12
-37192,12
-37204,12
-37216,12
+37172,12
+37184,12
+37196,12
+37208,12
+37220,12
RC=0
---------------------------- Test 113 -----------------------------
480
@@ -945,3 +946,6 @@ RC=0
RC=0
abcd
RC=0
+---------------------------- Test 126 -----------------------------
+ABC
+RC=0
diff --git a/testdata/testinput1 b/testdata/testinput1
index 9a9c5fd..d8615ee 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -2184,6 +2184,11 @@
Blah blah
blaH blah
+/((?i)blah)\s+(?m)A(?i:\1)/
+ blah ABLAH
+\= Expect no match
+ blah aBLAH
+
/(?>a*)*/
a
aa
@@ -5157,14 +5162,6 @@ name)/mark
/A(*MARK:A)A+(*SKIP:B)(B|Z) | AAC/x,mark
AAAC
-/a(*PRUNE:X)bc|qq/mark,no_start_optimize
-\= Expect no match
- axy
-
-/a(*THEN:X)bc|qq/mark,no_start_optimize
-\= Expect no match
- axy
-
/(?=a(*MARK:A)b)..x/mark
abxy
\= Expect no match
@@ -6189,4 +6186,81 @@ ef) x/x,mark
/(?=a+)a(a+)++b/
aab
+/(?<=\G.)/g,aftertext
+ abc
+
+/(?<=(?=.)?)/
+
+/(?<=(?=.)?+)/
+
+/(?<=(?=.)*)/
+
+/(?<=(?=.){4,5})/
+
+/(?<=(?=.){4,5}x)/
+
+/a(?=.(*:X))(*SKIP:X)(*F)|(.)/
+ abc
+
+/a(?>(*:X))(*SKIP:X)(*F)|(.)/
+ abc
+
+/a(?:(*:X))(*SKIP:X)(*F)|(.)/
+ abc
+
+#pattern no_start_optimize
+
+/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/
+ abc
+
+/(?>a(*:1))(?>b)(*SKIP:1)x|.*/
+ abc
+
+#subject mark
+
+/a(*ACCEPT:X)b/
+ abc
+
+/(?=a(*ACCEPT:QQ)bc)axyz/
+ axyz
+
+/(?(DEFINE)(a(*ACCEPT:X)))(?1)b/
+ abc
+
+/a(*F:X)b/
+ abc
+
+/(?(DEFINE)(a(*F:X)))(?1)b/
+ abc
+
+/a(*COMMIT:X)b/
+ abc
+
+/(?(DEFINE)(a(*COMMIT:X)))(?1)b/
+ abc
+
+/a+(*:Z)b(*COMMIT:X)(*SKIP:Z)c|.*/
+ aaaabd
+
+/a+(*:Z)b(*COMMIT:X)(*SKIP:X)c|.*/
+ aaaabd
+
+/a(*COMMIT:X)b/
+ axabc
+
+#pattern -no_start_optimize
+#subject -mark
+
+/(.COMMIT)(*COMMIT::::::::::interal error:::)/
+
+/(*COMMIT:)/
+
+/(*COMMIT:]w)/
+
+/(?i)A(?^)B(?^x:C D)(?^i)e f/
+ aBCDE F
+\= Expect no match
+ aBCDEF
+ AbCDe f
+
# End of testinput1
diff --git a/testdata/testinput15 b/testdata/testinput15
index cd12ad1..2ef6672 100644
--- a/testdata/testinput15
+++ b/testdata/testinput15
@@ -46,32 +46,45 @@
/(*LIMIT_DEPTH=4294967280)abc/I
/(a+)*zz/
+\= Expect no match
aaaaaaaaaaaaaz
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=3000
/(a+)*zz/
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=depth_limit=10
/(*LIMIT_MATCH=3000)(a+)*zz/I
+\= Expect limit exceeded
aaaaaaaaaaaaaz
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
+\= Expect limit exceeded
aaaaaaaaaaaaaz
/(*LIMIT_MATCH=60000)(a+)*zz/I
+\= Expect no match
aaaaaaaaaaaaaz
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=3000
/(*LIMIT_DEPTH=10)(a+)*zz/I
+\= Expect limit exceeded
aaaaaaaaaaaaaz
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=depth_limit=1000
/(*LIMIT_DEPTH=10)(*LIMIT_DEPTH=1000)(a+)*zz/I
+\= Expect no match
aaaaaaaaaaaaaz
/(*LIMIT_DEPTH=1000)(a+)*zz/I
+\= Expect no match
aaaaaaaaaaaaaz
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=depth_limit=10
# These three have infinitely nested recursions.
diff --git a/testdata/testinput17 b/testdata/testinput17
index 9a73ef1..0944151 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -160,10 +160,13 @@
aaaaaaaaaaaaaz\=match_limit=3000
/(*LIMIT_MATCH=3000)(a+)*zz/I
+\= Expect limit exceeded
aaaaaaaaaaaaaz
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
+\= Expect limit exceeded
aaaaaaaaaaaaaz
/(*LIMIT_MATCH=60000)(a+)*zz/I
@@ -175,12 +178,15 @@
# These three have infinitely nested recursions.
/((?2))((?1))/
+\= Expect JIT stack limit reached
abc
/((?(R2)a+|(?1)b))()/
+\= Expect JIT stack limit reached
aaaabcde
/(?(R)a*(?1)|((?R))b)/
+\= Expect JIT stack limit reached
aaaabcde
# Invalid options disable JIT when called via pcre2_match(), causing the
@@ -277,7 +283,8 @@
/[axm]{7}/
/(.|.)*?bx/
- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabax
+\= Expect limit exceeded
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabax\=match_limit=10000000
# Test JIT disable
diff --git a/testdata/testinput18 b/testdata/testinput18
index 755a0c9..563a506 100644
--- a/testdata/testinput18
+++ b/testdata/testinput18
@@ -134,4 +134,8 @@
/a\b(c/literal,posix,dotall
+/((a)(b)?(c))/posix
+ 123ace
+ 123ace\=posix_startend=2:6
+
# End of testdata/testinput18
diff --git a/testdata/testinput2 b/testdata/testinput2
index 5d3a80e..fc94b35 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -910,6 +910,8 @@
/[:x:]/I
+/\F/I
+
/\l/I
/\L/I
@@ -2949,10 +2951,11 @@
/abc(*:)pqr/
-/abc(*FAIL:123)xyz/
+/(*COMMIT:X)/B
# This should, and does, fail. In Perl, it does not, which I think is a
# bug because replacing the B in the pattern by (B|D) does make it fail.
+# Turning off Perl's optimization by inserting (??{""}) also makes it fail.
/A(*COMMIT)B/aftertext,mark
\= Expect no match
@@ -4007,6 +4010,9 @@
/(?(VERSION>=10.0)yes|no)/I
yesno
+/(?(VERSION>=10.04)yes|no)/
+ yesno
+
/(?(VERSION=8)yes){3}/BI,aftertext
yesno
@@ -4643,6 +4649,9 @@ B)x/alt_verbnames,mark
/(?=a\K)/replace=z
BaCaD
+
+/(?<=\K.)/g,replace=-
+ ab
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
@@ -4935,6 +4944,9 @@ a)"xI
//replace=0
\=offset=7
+/(?<=\G.)/g,replace=+
+ abc
+
".+\QX\E+"B,no_auto_possess
".+\QX\E+"B,auto_callout,no_auto_possess
@@ -5429,4 +5441,68 @@ a)"xI
/(?=a+)a(a+)++b/B
+/(?<=(?=.){4,5}x)/B
+
+# Perl behaves differently with these when optimization is turned off
+
+/a(*PRUNE:X)bc|qq/mark,no_start_optimize
+\= Expect no match
+ axy
+
+/a(*THEN:X)bc|qq/mark,no_start_optimize
+\= Expect no match
+ axy
+
+/(?^x-i)AB/
+
+/(?^-i)AB/
+
+/(?x-i-i)/
+
+/(?(?=^))b/I
+ abc
+
+/(?(?=^)|)b/I
+ abc
+
+/(?(?=^)|^)b/I
+ bbc
+\= Expect no match
+ abc
+
+/(?(1)^|^())/I
+
+/(?(1)^())b/I
+
+/(?(1)^())+b/I,aftertext
+ abc
+
+/(?(1)^()|^)+b/I,aftertext
+ bbc
+\= Expect no match
+ abc
+
+/(?(1)^()|^)*b/I,aftertext
+ bbc
+ abc
+ xbc
+
+/(?(1)^())+b/I,aftertext
+ abc
+
+/(?(1)^a()|^a)+b/I,aftertext
+ abc
+\= Expect no match
+ bbc
+
+/(?(1)^|^(a))+b/I,aftertext
+ abc
+\= Expect no match
+ bbc
+
+/(?(1)^a()|^a)*b/I,aftertext
+ abc
+ bbc
+ xbc
+
# End of testinput2
diff --git a/testdata/testinput22 b/testdata/testinput22
index e6d4053..5e01fdc 100644
--- a/testdata/testinput22
+++ b/testdata/testinput22
@@ -98,4 +98,10 @@
\= Expect no match - tests \C at end of subject
ab
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+
+/\C[^\d]+\x80/utf
+ [AΏBŀC]
+
# End of testinput22
diff --git a/testdata/testinput4 b/testdata/testinput4
index 0ef7b8e..a27b6af 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -1394,28 +1394,15 @@
\x{6e9}
\x{6ef}
\x{6fa}
-\= Expect no match
- \x{650}
- \x{651}
- \x{652}
- \x{653}
- \x{654}
- \x{655}
-
+
/^\p{Cyrillic}/utf
\x{1d2b}
/^\p{Common}/utf
- \x{589}
- \x{60c}
- \x{61f}
- \x{964}
- \x{965}
+ \x{2116}
+ \x{1D183}
/^\p{Inherited}/utf
- \x{64b}
- \x{654}
- \x{655}
\x{200c}
\= Expect no match
\x{64a}
@@ -2300,5 +2287,35 @@
\x{123}\x{122}\x{123}
\= Expect no match
\x{123}\x{124}\x{123}
+
+/\N{U+1234}/utf
+ \x{1234}
+
+/[\N{U+1234}]/utf
+ \x{1234}
+
+# Test the full list of Unicode "Pattern White Space" characters that are to
+# be ignored by /x. The pattern lines below may show up oddly in text editors
+# or when listed to the screen. Note that characters such as U+2002, which are
+# matched as space by \h and \v are *not* "Pattern White Space".
+
+/A…‎‏

B/x,utf
+ AB
+
+/A B/x,utf
+ A\x{2002}B
+\= Expect no match
+ AB
+
+# -------
+
+/[^\x{100}-\x{ffff}]*[\x80-\xff]/utf
+ \x{99}\x{99}\x{99}
+
+/[^\x{100}-\x{ffff}ABC]*[\x80-\xff]/utf
+ \x{99}\x{99}\x{99}
+
+/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf
+ \x{99}\x{99}\x{99}
# End of testinput4
diff --git a/testdata/testinput5 b/testdata/testinput5
index 0366136..687de32 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2030,8 +2030,8 @@
# to test 4.
/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
- (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
- (\p{Zanabazar_Square}+)/x,utf
+ (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
+ (\p{Zanabazar_Square}+)/x,utf
\x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
/^\x{1E900}\x{104B0}/i,utf
@@ -2041,23 +2041,70 @@
/^(?:(\X)(?C))+$/utf
\x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
-# These two are here because JIT is not yet updated. Also, the very first data
-# line is handled differently by Perl.
+# Similarly for Unicode 11.0.0
+
+/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
+ (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
+ \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
+
+# These two are here because of differences from Perl.
/^\X/utf
A\x{200d}B A ZWJ
- \x{261D}\x{1F3FB}B E_Base E_Modifier
- \x{1F466}\x{1F3FF}B E_Base_GAZ E_Modifier
- \x{200d}\x{1F3A4}B ZWJ Glue_After_ZWJ
- \x{200d}\x{1F469}B ZWJ E_Base_GAZ
+ \x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic
+ \x{261D}\x{1F3FB}B Extended_Pictographic Extend
\x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator
- \x{261D}\x{E0100}\x{1F3FB}B E_Base Extend E_Modifier
+ \x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P
+ \x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P
# Regional indicators
/^(\X)(\X)/utf,aftertext
\x{1F1E6}\x{1F1E7}\x{1F1E7}B
\x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
+
+# More differences from Perl
+
+/^[\p{Arabic}]/utf
+\= Expect no match
+ \x{650}
+ \x{651}
+ \x{652}
+ \x{653}
+ \x{654}
+ \x{655}
+
+/^\p{Common}/utf
+ \x{589}
+ \x{60c}
+ \x{61f}
+ \x{964}
+ \x{965}
+
+/^\p{Inherited}/utf
+ \x{64b}
+ \x{654}
+ \x{655}
+ \x{1D1AA}
+/\N{U+}/
+
+/\N{U+}/utf
+
+/\N{U}/
+
+# This tests the non-UTF Unicode NEL pattern whitespace character, only
+# recognized by PCRE2 with /x when there is Unicode support.
+
+/A
+ B/x
+ AB
+
+# This tests Unicode Pattern White Space characters in verb names when they
+# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
+# with code points greater than 255 between A, B, and C in the pattern.
+
+/(*: A‎B
C)abc/x,utf,mark,alt_verbnames
+ abc
# End of testinput5
diff --git a/testdata/testinput6 b/testdata/testinput6
index e2f00c0..f7dedb2 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -4874,6 +4874,14 @@
\= Expect depth limit exceeded
a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]
+/(*LIMIT_HEAP=0)^((.)(?1)|.)$/
+\= Expect heap limit exceeded
+ a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]
+
+/(*LIMIT_HEAP=50000)^((.)(?1)|.)$/
+\= Expect success
+ a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]
+
/(02-)?[0-9]{3}-[0-9]{3}/
02-123-123
@@ -4929,8 +4937,9 @@
/(?<=|abc)/endanchored
abcde\=aftertext
-/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor
-.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););
+/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor
+\= Expect limit exceeded
+.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););
/\n/firstline
xyz\nabc
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 9c55be9..77b9ff0 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -3346,6 +3346,14 @@ No match
0: blaH blah
1: blaH
+/((?i)blah)\s+(?m)A(?i:\1)/
+ blah ABLAH
+ 0: blah ABLAH
+ 1: blah
+\= Expect no match
+ blah aBLAH
+No match
+
/(?>a*)*/
a
0: a
@@ -8282,16 +8290,6 @@ No match, mark = m
AAAC
0: AAC
-/a(*PRUNE:X)bc|qq/mark,no_start_optimize
-\= Expect no match
- axy
-No match, mark = X
-
-/a(*THEN:X)bc|qq/mark,no_start_optimize
-\= Expect no match
- axy
-No match, mark = X
-
/(?=a(*MARK:A)b)..x/mark
abxy
0: abx
@@ -9822,4 +9820,113 @@ No match
0: aab
1: a
+/(?<=\G.)/g,aftertext
+ abc
+ 0:
+ 0+ bc
+ 0:
+ 0+ c
+ 0:
+ 0+
+
+/(?<=(?=.)?)/
+
+/(?<=(?=.)?+)/
+
+/(?<=(?=.)*)/
+
+/(?<=(?=.){4,5})/
+
+/(?<=(?=.){4,5}x)/
+
+/a(?=.(*:X))(*SKIP:X)(*F)|(.)/
+ abc
+ 0: a
+ 1: a
+
+/a(?>(*:X))(*SKIP:X)(*F)|(.)/
+ abc
+ 0: a
+ 1: a
+
+/a(?:(*:X))(*SKIP:X)(*F)|(.)/
+ abc
+ 0: b
+ 1: b
+
+#pattern no_start_optimize
+
+/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/
+ abc
+ 0: abc
+
+/(?>a(*:1))(?>b)(*SKIP:1)x|.*/
+ abc
+ 0: abc
+
+#subject mark
+
+/a(*ACCEPT:X)b/
+ abc
+ 0: a
+MK: X
+
+/(?=a(*ACCEPT:QQ)bc)axyz/
+ axyz
+ 0: axyz
+MK: QQ
+
+/(?(DEFINE)(a(*ACCEPT:X)))(?1)b/
+ abc
+ 0: ab
+MK: X
+
+/a(*F:X)b/
+ abc
+No match, mark = X
+
+/(?(DEFINE)(a(*F:X)))(?1)b/
+ abc
+No match, mark = X
+
+/a(*COMMIT:X)b/
+ abc
+ 0: ab
+MK: X
+
+/(?(DEFINE)(a(*COMMIT:X)))(?1)b/
+ abc
+ 0: ab
+MK: X
+
+/a+(*:Z)b(*COMMIT:X)(*SKIP:Z)c|.*/
+ aaaabd
+ 0: bd
+
+/a+(*:Z)b(*COMMIT:X)(*SKIP:X)c|.*/
+ aaaabd
+No match, mark = X
+
+/a(*COMMIT:X)b/
+ axabc
+No match, mark = X
+
+#pattern -no_start_optimize
+#subject -mark
+
+/(.COMMIT)(*COMMIT::::::::::interal error:::)/
+
+/(*COMMIT:)/
+
+/(*COMMIT:]w)/
+
+/(?i)A(?^)B(?^x:C D)(?^i)e f/
+ aBCDE F
+ 0: aBCDE F
+\= Expect no match
+ aBCDEF
+No match
+ AbCDe f
+No match
+
# End of testinput1
diff --git a/testdata/testoutput15 b/testdata/testoutput15
index b2068d0..d09e781 100644
--- a/testdata/testoutput15
+++ b/testdata/testoutput15
@@ -124,12 +124,15 @@ Last code unit = 'c'
Subject length lower bound = 3
/(a+)*zz/
+\= Expect no match
aaaaaaaaaaaaaz
No match
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -47: match limit exceeded
/(a+)*zz/
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=depth_limit=10
Failed: error -53: matching depth limit exceeded
@@ -139,8 +142,10 @@ Match limit = 3000
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
+\= Expect limit exceeded
aaaaaaaaaaaaaz
Failed: error -47: match limit exceeded
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
Failed: error -47: match limit exceeded
@@ -150,6 +155,7 @@ Match limit = 3000
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
+\= Expect limit exceeded
aaaaaaaaaaaaaz
Failed: error -47: match limit exceeded
@@ -159,8 +165,10 @@ Match limit = 60000
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
+\= Expect no match
aaaaaaaaaaaaaz
No match
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -47: match limit exceeded
@@ -170,8 +178,10 @@ Depth limit = 10
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
+\= Expect limit exceeded
aaaaaaaaaaaaaz
Failed: error -53: matching depth limit exceeded
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=depth_limit=1000
Failed: error -53: matching depth limit exceeded
@@ -181,6 +191,7 @@ Depth limit = 1000
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
+\= Expect no match
aaaaaaaaaaaaaz
No match
@@ -190,8 +201,10 @@ Depth limit = 1000
Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
+\= Expect no match
aaaaaaaaaaaaaz
No match
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=depth_limit=10
Failed: error -53: matching depth limit exceeded
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index a0606a7..acf00e0 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -300,8 +300,10 @@ Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
JIT compilation was successful
+\= Expect limit exceeded
aaaaaaaaaaaaaz
Failed: error -47: match limit exceeded
+\= Expect limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000
Failed: error -47: match limit exceeded
@@ -312,6 +314,7 @@ Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
JIT compilation was successful
+\= Expect limit exceeded
aaaaaaaaaaaaaz
Failed: error -47: match limit exceeded
@@ -332,14 +335,17 @@ Failed: error -47: match limit exceeded
# These three have infinitely nested recursions.
/((?2))((?1))/
+\= Expect JIT stack limit reached
abc
Failed: error -46: JIT stack limit reached
/((?(R2)a+|(?1)b))()/
+\= Expect JIT stack limit reached
aaaabcde
Failed: error -46: JIT stack limit reached
/(?(R)a*(?1)|((?R))b)/
+\= Expect JIT stack limit reached
aaaabcde
Failed: error -46: JIT stack limit reached
@@ -516,7 +522,8 @@ Failed: error -46: JIT stack limit reached
/[axm]{7}/
/(.|.)*?bx/
- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabax
+\= Expect limit exceeded
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabax\=match_limit=10000000
Failed: error -47: match limit exceeded
# Test JIT disable
diff --git a/testdata/testoutput18 b/testdata/testoutput18
index d51423d..d6e3c71 100644
--- a/testdata/testoutput18
+++ b/testdata/testoutput18
@@ -46,6 +46,7 @@
defabc\=noteol
0: def
1: def
+ 2: <unset>
3: def
/the quick brown fox/
@@ -206,4 +207,18 @@ No match: POSIX code 17: match failed
/a\b(c/literal,posix,dotall
Failed: POSIX code 16: bad argument at offset 0
+/((a)(b)?(c))/posix
+ 123ace
+ 0: ac
+ 1: ac
+ 2: a
+ 3: <unset>
+ 4: c
+ 123ace\=posix_startend=2:6
+ 0: ac
+ 1: ac
+ 2: a
+ 3: <unset>
+ 4: c
+
# End of testdata/testinput18
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index fcaac8f..ecf0d80 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -3244,20 +3244,23 @@ Failed: error 113 at offset 0: POSIX collating elements are not supported
/[:x:]/I
Failed: error 112 at offset 0: POSIX named classes are supported only within a class
+/\F/I
+Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+
/\l/I
-Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u
+Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
/\L/I
-Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u
+Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
/\N{name}/I
-Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u
+Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
/\u/I
-Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u
+Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
/\U/I
-Failed: error 137 at offset 2: PCRE does not support \L, \l, \N{name}, \U, or \u
+Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
/a{1,3}b/ungreedy
ab
@@ -10154,11 +10157,17 @@ Failed: error 166 at offset 10: (*MARK) must have an argument
/abc(*:)pqr/
Failed: error 166 at offset 6: (*MARK) must have an argument
-/abc(*FAIL:123)xyz/
-Failed: error 159 at offset 10: an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)
+/(*COMMIT:X)/B
+------------------------------------------------------------------
+ Bra
+ *COMMIT X
+ Ket
+ End
+------------------------------------------------------------------
# This should, and does, fail. In Perl, it does not, which I think is a
# bug because replacing the B in the pattern by (B|D) does make it fail.
+# Turning off Perl's optimization by inserting (??{""}) also makes it fail.
/A(*COMMIT)B/aftertext,mark
\= Expect no match
@@ -13188,7 +13197,7 @@ Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?
Failed: error 167 at offset 7: non-hex character in \x{} (closing brace missing?)
/^A\x{/
-Failed: error 178 at offset 5: digits missing in \x{} or \o{}
+Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+}
/[ab]++/B,no_auto_possess
------------------------------------------------------------------
@@ -13402,7 +13411,7 @@ Failed: error 133 at offset 7: parentheses are too deeply nested (stack check)
Failed: error 155 at offset 2: missing opening brace after \o
/\o{}/
-Failed: error 178 at offset 3: digits missing in \x{} or \o{}
+Failed: error 178 at offset 3: digits missing in \x{} or \o{} or \N{U+}
/\o{whatever}/
Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missing?)
@@ -13410,7 +13419,7 @@ Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missin
/\xthing/
/\x{}/
-Failed: error 178 at offset 3: digits missing in \x{} or \o{}
+Failed: error 178 at offset 3: digits missing in \x{} or \o{} or \N{U+}
/\x{whatever}/
Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?)
@@ -13483,6 +13492,10 @@ Subject length lower bound = 2
yesno
0: yes
+/(?(VERSION>=10.04)yes|no)/
+ yesno
+ 0: yes
+
/(?(VERSION=8)yes){3}/BI,aftertext
------------------------------------------------------------------
Bra
@@ -13537,7 +13550,7 @@ Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION con
Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition
/(?(VERSION=10.101)yes|no)/
-Failed: error 179 at offset 17: syntax error or number too big in (?(VERSION condition
+Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition
/abcd/I
Capturing subpattern count = 0
@@ -14899,7 +14912,11 @@ Subject length lower bound = 1
/(?=a\K)/replace=z
BaCaD
-Failed: error -60: match with end before start is not supported
+Failed: error -60: match with end before start or start moved backwards is not supported
+
+/(?<=\K.)/g,replace=-
+ ab
+Failed: error -60: match with end before start or start moved backwards is not supported
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters)
@@ -15545,6 +15562,10 @@ Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement
\=offset=7
Failed: error -33: bad offset value
+/(?<=\G.)/g,replace=+
+ abc
+ 3: a+b+c+
+
".+\QX\E+"B,no_auto_possess
------------------------------------------------------------------
Bra
@@ -16575,8 +16596,175 @@ No match
End
------------------------------------------------------------------
+/(?<=(?=.){4,5}x)/B
+------------------------------------------------------------------
+ Bra
+ AssertB
+ Reverse
+ Assert
+ Any
+ Ket
+ x
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
+# Perl behaves differently with these when optimization is turned off
+
+/a(*PRUNE:X)bc|qq/mark,no_start_optimize
+\= Expect no match
+ axy
+No match, mark = X
+
+/a(*THEN:X)bc|qq/mark,no_start_optimize
+\= Expect no match
+ axy
+No match, mark = X
+
+/(?^x-i)AB/
+Failed: error 194 at offset 4: invalid hyphen in option setting
+
+/(?^-i)AB/
+Failed: error 194 at offset 3: invalid hyphen in option setting
+
+/(?x-i-i)/
+Failed: error 194 at offset 5: invalid hyphen in option setting
+
+/(?(?=^))b/I
+Capturing subpattern count = 0
+Last code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: b
+
+/(?(?=^)|)b/I
+Capturing subpattern count = 0
+First code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: b
+
+/(?(?=^)|^)b/I
+Capturing subpattern count = 0
+Compile options: <none>
+Overall options: anchored
+First code unit = 'b'
+Subject length lower bound = 1
+ bbc
+ 0: b
+\= Expect no match
+ abc
+No match
+
+/(?(1)^|^())/I
+Capturing subpattern count = 1
+Max back reference = 1
+May match empty string
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 0
+
+/(?(1)^())b/I
+Capturing subpattern count = 1
+Max back reference = 1
+Last code unit = 'b'
+Subject length lower bound = 1
+
+/(?(1)^())+b/I,aftertext
+Capturing subpattern count = 1
+Max back reference = 1
+Last code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: b
+ 0+ c
+
+/(?(1)^()|^)+b/I,aftertext
+Capturing subpattern count = 1
+Max back reference = 1
+Compile options: <none>
+Overall options: anchored
+First code unit = 'b'
+Subject length lower bound = 1
+ bbc
+ 0: b
+ 0+ bc
+\= Expect no match
+ abc
+No match
+
+/(?(1)^()|^)*b/I,aftertext
+Capturing subpattern count = 1
+Max back reference = 1
+First code unit = 'b'
+Subject length lower bound = 1
+ bbc
+ 0: b
+ 0+ bc
+ abc
+ 0: b
+ 0+ c
+ xbc
+ 0: b
+ 0+ c
+
+/(?(1)^())+b/I,aftertext
+Capturing subpattern count = 1
+Max back reference = 1
+Last code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: b
+ 0+ c
+
+/(?(1)^a()|^a)+b/I,aftertext
+Capturing subpattern count = 1
+Max back reference = 1
+Compile options: <none>
+Overall options: anchored
+First code unit = 'a'
+Last code unit = 'b'
+Subject length lower bound = 2
+ abc
+ 0: ab
+ 0+ c
+\= Expect no match
+ bbc
+No match
+
+/(?(1)^|^(a))+b/I,aftertext
+Capturing subpattern count = 1
+Max back reference = 1
+Compile options: <none>
+Overall options: anchored
+Last code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: ab
+ 0+ c
+ 1: a
+\= Expect no match
+ bbc
+No match
+
+/(?(1)^a()|^a)*b/I,aftertext
+Capturing subpattern count = 1
+Max back reference = 1
+Last code unit = 'b'
+Subject length lower bound = 1
+ abc
+ 0: ab
+ 0+ c
+ bbc
+ 0: b
+ 0+ bc
+ xbc
+ 0: b
+ 0+ c
+
# End of testinput2
-Error -65: PCRE2_ERROR_BADDATA (unknown error number)
+Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Error -2: partial match
Error -1: no match
diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16
index 88f827c..df29e14 100644
--- a/testdata/testoutput22-16
+++ b/testdata/testoutput22-16
@@ -171,4 +171,12 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
+/\C[^\d]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
index ac485fc..f0b7984 100644
--- a/testdata/testoutput22-32
+++ b/testdata/testoutput22-32
@@ -169,4 +169,12 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
+/\C[^\d]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8
index 3d31fbc..0a04aa8 100644
--- a/testdata/testoutput22-8
+++ b/testdata/testoutput22-8
@@ -173,4 +173,12 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
+/\C[^\d]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 6056e6d..ba3df37 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -2293,43 +2293,18 @@ No match
0: \x{6ef}
\x{6fa}
0: \x{6fa}
-\= Expect no match
- \x{650}
-No match
- \x{651}
-No match
- \x{652}
-No match
- \x{653}
-No match
- \x{654}
-No match
- \x{655}
-No match
-
+
/^\p{Cyrillic}/utf
\x{1d2b}
0: \x{1d2b}
/^\p{Common}/utf
- \x{589}
- 0: \x{589}
- \x{60c}
- 0: \x{60c}
- \x{61f}
- 0: \x{61f}
- \x{964}
- 0: \x{964}
- \x{965}
- 0: \x{965}
+ \x{2116}
+ 0: \x{2116}
+ \x{1D183}
+ 0: \x{1d183}
/^\p{Inherited}/utf
- \x{64b}
- 0: \x{64b}
- \x{654}
- 0: \x{654}
- \x{655}
- 0: \x{655}
\x{200c}
0: \x{200c}
\= Expect no match
@@ -3728,5 +3703,43 @@ No match
\= Expect no match
\x{123}\x{124}\x{123}
No match
+
+/\N{U+1234}/utf
+ \x{1234}
+ 0: \x{1234}
+
+/[\N{U+1234}]/utf
+ \x{1234}
+ 0: \x{1234}
+
+# Test the full list of Unicode "Pattern White Space" characters that are to
+# be ignored by /x. The pattern lines below may show up oddly in text editors
+# or when listed to the screen. Note that characters such as U+2002, which are
+# matched as space by \h and \v are *not* "Pattern White Space".
+
+/A…‎‏

B/x,utf
+ AB
+ 0: AB
+
+/A B/x,utf
+ A\x{2002}B
+ 0: A\x{2002}B
+\= Expect no match
+ AB
+No match
+
+# -------
+
+/[^\x{100}-\x{ffff}]*[\x80-\xff]/utf
+ \x{99}\x{99}\x{99}
+ 0: \x{99}\x{99}\x{99}
+
+/[^\x{100}-\x{ffff}ABC]*[\x80-\xff]/utf
+ \x{99}\x{99}\x{99}
+ 0: \x{99}\x{99}\x{99}
+
+/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf
+ \x{99}\x{99}\x{99}
+ 0: \x{99}\x{99}\x{99}
# End of testinput4
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 4b3171c..51caa18 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -4593,8 +4593,8 @@ No match
# to test 4.
/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
- (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
- (\p{Zanabazar_Square}+)/x,utf
+ (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
+ (\p{Zanabazar_Square}+)/x,utf
\x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47}
1: \x{1e900}\x{1e924}\x{1e953}
@@ -4667,24 +4667,35 @@ Callout 0: last capture = 1
0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47}
1: \x{11a00}\x{11a07}\x{11a47}
-# These two are here because JIT is not yet updated. Also, the very first data
-# line is handled differently by Perl.
+# Similarly for Unicode 11.0.0
+
+/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
+ (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
+ \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
+ 0: \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
+ 1: \x{11800}
+ 2: \x{11da9}
+ 3: \x{10d27}
+ 4: \x{11ee0}
+ 5: \x{16e48}
+ 6: \x{10f27}
+ 7: \x{10f30}
+
+# These two are here because of differences from Perl.
/^\X/utf
A\x{200d}B A ZWJ
0: A\x{200d}
- \x{261D}\x{1F3FB}B E_Base E_Modifier
+ \x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic
+ 0: \x{261d}\x{261d}
+ \x{261D}\x{1F3FB}B Extended_Pictographic Extend
0: \x{261d}\x{1f3fb}
- \x{1F466}\x{1F3FF}B E_Base_GAZ E_Modifier
- 0: \x{1f466}\x{1f3ff}
- \x{200d}\x{1F3A4}B ZWJ Glue_After_ZWJ
- 0: \x{200d}\x{1f3a4}
- \x{200d}\x{1F469}B ZWJ E_Base_GAZ
- 0: \x{200d}\x{1f469}
\x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator
0: \x{1f1e6}\x{1f1e7}
- \x{261D}\x{E0100}\x{1F3FB}B E_Base Extend E_Modifier
- 0: \x{261d}\x{e0100}\x{1f3fb}
+ \x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P
+ 0: \x{261d}\x{1f3fb}\x{261d}
+ \x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P
+ 0: \x{261d}\x{1f3fb}\x{200d}\x{261d}
# Regional indicators
@@ -4699,6 +4710,70 @@ Callout 0: last capture = 1
0+ B
1: \x{1f1e6}\x{1f1e7}
2: \x{1f1e7}\x{1f1e6}
+
+# More differences from Perl
+
+/^[\p{Arabic}]/utf
+\= Expect no match
+ \x{650}
+No match
+ \x{651}
+No match
+ \x{652}
+No match
+ \x{653}
+No match
+ \x{654}
+No match
+ \x{655}
+No match
+
+/^\p{Common}/utf
+ \x{589}
+ 0: \x{589}
+ \x{60c}
+ 0: \x{60c}
+ \x{61f}
+ 0: \x{61f}
+ \x{964}
+ 0: \x{964}
+ \x{965}
+ 0: \x{965}
+
+/^\p{Inherited}/utf
+ \x{64b}
+ 0: \x{64b}
+ \x{654}
+ 0: \x{654}
+ \x{655}
+ 0: \x{655}
+ \x{1D1AA}
+ 0: \x{1d1aa}
+
+/\N{U+}/
+Failed: error 193 at offset 2: \N{U+dddd} is supported only in Unicode (UTF) mode
+
+/\N{U+}/utf
+Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+}
+
+/\N{U}/
+Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+
+# This tests the non-UTF Unicode NEL pattern whitespace character, only
+# recognized by PCRE2 with /x when there is Unicode support.
+
+/A
+ B/x
+ AB
+ 0: AB
+
+# This tests Unicode Pattern White Space characters in verb names when they
+# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
+# with code points greater than 255 between A, B, and C in the pattern.
+/(*: A‎B
C)abc/x,utf,mark,alt_verbnames
+ abc
+ 0: abc
+MK: ABC
# End of testinput5
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index b409fe0..caec833 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -7667,12 +7667,23 @@ No match
a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]
Failed: error -53: matching depth limit exceeded
+/(*LIMIT_HEAP=0)^((.)(?1)|.)$/
+\= Expect heap limit exceeded
+ a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]
+Failed: error -63: heap limit exceeded
+
+/(*LIMIT_HEAP=50000)^((.)(?1)|.)$/
+\= Expect success
+ a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]
+ 0: a[00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]
+
/(02-)?[0-9]{3}-[0-9]{3}/
02-123-123
0: 02-123-123
/^(a(?2))(b)(?1)/
abbab\=find_limits
+Minimum heap limit = 0
Minimum match limit = 4
Minimum depth limit = 2
0: abbab
@@ -7749,8 +7760,9 @@ No match
0:
0+
-/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor
-.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););
+/(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor
+\= Expect limit exceeded
+.*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00 \x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););
Failed: error -47: match limit exceeded
/\n/firstline
diff --git a/testdata/testoutput8-16-4 b/testdata/testoutput8-16-4
new file mode 100644
index 0000000..722b0e1
--- /dev/null
+++ b/testdata/testoutput8-16-4
@@ -0,0 +1,1022 @@
+# There are two sorts of patterns in this test. A number of them are
+# representative patterns whose lengths and offsets are checked. This is just a
+# doublecheck test to ensure the sizes don't go horribly wrong when something
+# is changed. The operation of these patterns is checked in other tests.
+#
+# This file also contains tests whose output varies with code unit size and/or
+# link size. Unicode support is required for these tests. There are separate
+# output files for each code unit size and link size.
+
+#pattern fullbincode,memory
+
+/((?i)b)/
+Memory allocation (code space): 32
+------------------------------------------------------------------
+ 0 12 Bra
+ 3 6 CBra 1
+ 7 /i b
+ 9 6 Ket
+ 12 12 Ket
+ 15 End
+------------------------------------------------------------------
+
+/(?s)(.*X|^B)/
+Memory allocation (code space): 48
+------------------------------------------------------------------
+ 0 20 Bra
+ 3 8 CBra 1
+ 7 AllAny*
+ 9 X
+ 11 6 Alt
+ 14 ^
+ 15 B
+ 17 14 Ket
+ 20 20 Ket
+ 23 End
+------------------------------------------------------------------
+
+/(?s:.*X|^B)/
+Memory allocation (code space): 46
+------------------------------------------------------------------
+ 0 19 Bra
+ 3 7 Bra
+ 6 AllAny*
+ 8 X
+ 10 6 Alt
+ 13 ^
+ 14 B
+ 16 13 Ket
+ 19 19 Ket
+ 22 End
+------------------------------------------------------------------
+
+/^[[:alnum:]]/
+Memory allocation (code space): 50
+------------------------------------------------------------------
+ 0 21 Bra
+ 3 ^
+ 4 [0-9A-Za-z]
+ 21 21 Ket
+ 24 End
+------------------------------------------------------------------
+
+/#/Ix
+Memory allocation (code space): 14
+------------------------------------------------------------------
+ 0 3 Bra
+ 3 3 Ket
+ 6 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+May match empty string
+Options: extended
+Subject length lower bound = 0
+
+/a#/Ix
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 a
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: extended
+First code unit = 'a'
+Subject length lower bound = 1
+
+/x?+/
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 x?+
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/x++/
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 x++
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/x{1,3}+/
+Memory allocation (code space): 24
+------------------------------------------------------------------
+ 0 8 Bra
+ 3 x
+ 5 x{0,2}+
+ 8 8 Ket
+ 11 End
+------------------------------------------------------------------
+
+/(x)*+/
+Memory allocation (code space): 34
+------------------------------------------------------------------
+ 0 13 Bra
+ 3 Braposzero
+ 4 6 CBraPos 1
+ 8 x
+ 10 6 KetRpos
+ 13 13 Ket
+ 16 End
+------------------------------------------------------------------
+
+/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
+Memory allocation (code space): 166
+------------------------------------------------------------------
+ 0 79 Bra
+ 3 ^
+ 4 72 CBra 1
+ 8 6 CBra 2
+ 12 a+
+ 14 6 Ket
+ 17 22 CBra 3
+ 21 [ab]+?
+ 39 22 Ket
+ 42 22 CBra 4
+ 46 [bc]+
+ 64 22 Ket
+ 67 6 CBra 5
+ 71 \w*+
+ 73 6 Ket
+ 76 72 Ket
+ 79 79 Ket
+ 82 End
+------------------------------------------------------------------
+
+"8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
+Memory allocation (code space): 1652
+------------------------------------------------------------------
+ 0 822 Bra
+ 3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
+821 \b
+822 822 Ket
+825 End
+------------------------------------------------------------------
+
+"\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b"
+Memory allocation (code space): 1632
+------------------------------------------------------------------
+ 0 812 Bra
+ 3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
+811 \b
+812 812 Ket
+815 End
+------------------------------------------------------------------
+
+/(a(?1)b)/
+Memory allocation (code space): 42
+------------------------------------------------------------------
+ 0 17 Bra
+ 3 11 CBra 1
+ 7 a
+ 9 3 Recurse
+ 12 b
+ 14 11 Ket
+ 17 17 Ket
+ 20 End
+------------------------------------------------------------------
+
+/(a(?1)+b)/
+Memory allocation (code space): 54
+------------------------------------------------------------------
+ 0 23 Bra
+ 3 17 CBra 1
+ 7 a
+ 9 6 SBra
+ 12 3 Recurse
+ 15 6 KetRmax
+ 18 b
+ 20 17 Ket
+ 23 23 Ket
+ 26 End
+------------------------------------------------------------------
+
+/a(?P<name1>b|c)d(?P<longername2>e)/
+Memory allocation (code space): 68
+------------------------------------------------------------------
+ 0 30 Bra
+ 3 a
+ 5 6 CBra 1
+ 9 b
+ 11 5 Alt
+ 14 c
+ 16 11 Ket
+ 19 d
+ 21 6 CBra 2
+ 25 e
+ 27 6 Ket
+ 30 30 Ket
+ 33 End
+------------------------------------------------------------------
+
+/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/
+Memory allocation (code space): 84
+------------------------------------------------------------------
+ 0 38 Bra
+ 3 23 Bra
+ 6 a
+ 8 15 CBra 1
+ 12 c
+ 14 6 CBra 2
+ 18 d
+ 20 6 Ket
+ 23 15 Ket
+ 26 23 Ket
+ 29 6 CBra 3
+ 33 a
+ 35 6 Ket
+ 38 38 Ket
+ 41 End
+------------------------------------------------------------------
+
+/(?P<a>a)...(?P=a)bbb(?P>a)d/
+Memory allocation (code space): 64
+------------------------------------------------------------------
+ 0 28 Bra
+ 3 6 CBra 1
+ 7 a
+ 9 6 Ket
+ 12 Any
+ 13 Any
+ 14 Any
+ 15 \1
+ 17 bbb
+ 23 3 Recurse
+ 26 d
+ 28 28 Ket
+ 31 End
+------------------------------------------------------------------
+
+/abc(?C255)de(?C)f/
+Memory allocation (code space): 62
+------------------------------------------------------------------
+ 0 27 Bra
+ 3 abc
+ 9 Callout 255 10 1
+ 15 de
+ 19 Callout 0 16 1
+ 25 f
+ 27 27 Ket
+ 30 End
+------------------------------------------------------------------
+
+/abcde/auto_callout
+Memory allocation (code space): 106
+------------------------------------------------------------------
+ 0 49 Bra
+ 3 Callout 255 0 1
+ 9 a
+ 11 Callout 255 1 1
+ 17 b
+ 19 Callout 255 2 1
+ 25 c
+ 27 Callout 255 3 1
+ 33 d
+ 35 Callout 255 4 1
+ 41 e
+ 43 Callout 255 5 0
+ 49 49 Ket
+ 52 End
+------------------------------------------------------------------
+
+/\x{100}/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{100}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/\x{1000}/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{1000}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/\x{10000}/utf
+Memory allocation (code space): 20
+------------------------------------------------------------------
+ 0 6 Bra
+ 3 \x{10000}
+ 6 6 Ket
+ 9 End
+------------------------------------------------------------------
+
+/\x{100000}/utf
+Memory allocation (code space): 20
+------------------------------------------------------------------
+ 0 6 Bra
+ 3 \x{100000}
+ 6 6 Ket
+ 9 End
+------------------------------------------------------------------
+
+/\x{10ffff}/utf
+Memory allocation (code space): 20
+------------------------------------------------------------------
+ 0 6 Bra
+ 3 \x{10ffff}
+ 6 6 Ket
+ 9 End
+------------------------------------------------------------------
+
+/\x{110000}/utf
+Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
+
+/[\x{ff}]/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{ff}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[\x{100}]/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{100}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/\x80/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{80}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/\xff/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{ff}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/\x{0041}\x{2262}\x{0391}\x{002e}/I,utf
+Memory allocation (code space): 30
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 A\x{2262}\x{391}.
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First code unit = 'A'
+Last code unit = '.'
+Subject length lower bound = 4
+
+/\x{D55c}\x{ad6d}\x{C5B4}/I,utf
+Memory allocation (code space): 26
+------------------------------------------------------------------
+ 0 9 Bra
+ 3 \x{d55c}\x{ad6d}\x{c5b4}
+ 9 9 Ket
+ 12 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First code unit = \x{d55c}
+Last code unit = \x{c5b4}
+Subject length lower bound = 3
+
+/\x{65e5}\x{672c}\x{8a9e}/I,utf
+Memory allocation (code space): 26
+------------------------------------------------------------------
+ 0 9 Bra
+ 3 \x{65e5}\x{672c}\x{8a9e}
+ 9 9 Ket
+ 12 End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Options: utf
+First code unit = \x{65e5}
+Last code unit = \x{8a9e}
+Subject length lower bound = 3
+
+/[\x{100}]/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{100}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[Z\x{100}]/utf
+Memory allocation (code space): 60
+------------------------------------------------------------------
+ 0 26 Bra
+ 3 [Z\x{100}]
+ 26 26 Ket
+ 29 End
+------------------------------------------------------------------
+
+/^[\x{100}\E-\Q\E\x{150}]/utf
+Memory allocation (code space): 32
+------------------------------------------------------------------
+ 0 12 Bra
+ 3 ^
+ 4 [\x{100}-\x{150}]
+ 12 12 Ket
+ 15 End
+------------------------------------------------------------------
+
+/^[\QĀ\E-\QŐ\E]/utf
+Memory allocation (code space): 32
+------------------------------------------------------------------
+ 0 12 Bra
+ 3 ^
+ 4 [\x{100}-\x{150}]
+ 12 12 Ket
+ 15 End
+------------------------------------------------------------------
+
+/^[\QĀ\E-\QŐ\E/utf
+Failed: error 106 at offset 13: missing terminating ] for character class
+
+/[\p{L}]/
+Memory allocation (code space): 30
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 [\p{L}]
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+
+/[\p{^L}]/
+Memory allocation (code space): 30
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 [\P{L}]
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+
+/[\P{L}]/
+Memory allocation (code space): 30
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 [\P{L}]
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+
+/[\P{^L}]/
+Memory allocation (code space): 30
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 [\p{L}]
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+
+/[abc\p{L}\x{0660}]/utf
+Memory allocation (code space): 66
+------------------------------------------------------------------
+ 0 29 Bra
+ 3 [a-c\p{L}\x{660}]
+ 29 29 Ket
+ 32 End
+------------------------------------------------------------------
+
+/[\p{Nd}]/utf
+Memory allocation (code space): 30
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 [\p{Nd}]
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+
+/[\p{Nd}+-]+/utf
+Memory allocation (code space): 64
+------------------------------------------------------------------
+ 0 28 Bra
+ 3 [+\-\p{Nd}]++
+ 28 28 Ket
+ 31 End
+------------------------------------------------------------------
+
+/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/i,utf
+Memory allocation (code space): 36
+------------------------------------------------------------------
+ 0 14 Bra
+ 3 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
+ 14 14 Ket
+ 17 End
+------------------------------------------------------------------
+
+/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/utf
+Memory allocation (code space): 36
+------------------------------------------------------------------
+ 0 14 Bra
+ 3 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
+ 14 14 Ket
+ 17 End
+------------------------------------------------------------------
+
+/[\x{105}-\x{109}]/i,utf
+Memory allocation (code space): 30
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 [\x{104}-\x{109}]
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+
+/( ( (?(1)0|) )* )/x
+Memory allocation (code space): 70
+------------------------------------------------------------------
+ 0 31 Bra
+ 3 25 CBra 1
+ 7 Brazero
+ 8 17 SCBra 2
+ 12 7 Cond
+ 15 1 Cond ref
+ 17 0
+ 19 3 Alt
+ 22 10 Ket
+ 25 17 KetRmax
+ 28 25 Ket
+ 31 31 Ket
+ 34 End
+------------------------------------------------------------------
+
+/( (?(1)0|)* )/x
+Memory allocation (code space): 56
+------------------------------------------------------------------
+ 0 24 Bra
+ 3 18 CBra 1
+ 7 Brazero
+ 8 7 SCond
+ 11 1 Cond ref
+ 13 0
+ 15 3 Alt
+ 18 10 KetRmax
+ 21 18 Ket
+ 24 24 Ket
+ 27 End
+------------------------------------------------------------------
+
+/[a]/
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 a
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[a]/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 a
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[\xaa]/
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{aa}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[\xaa]/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \x{aa}
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[^a]/
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 [^a]
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[^a]/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 [^a]
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[^\xaa]/
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 [^\x{aa}]
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[^\xaa]/utf
+Memory allocation (code space): 18
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 [^\x{aa}]
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+#pattern -memory
+
+/[^\d]/utf,ucp
+------------------------------------------------------------------
+ 0 11 Bra
+ 3 [^\p{Nd}]
+ 11 11 Ket
+ 14 End
+------------------------------------------------------------------
+
+/[[:^alpha:][:^cntrl:]]+/utf,ucp
+------------------------------------------------------------------
+ 0 15 Bra
+ 3 [\P{L}\P{Cc}]++
+ 15 15 Ket
+ 18 End
+------------------------------------------------------------------
+
+/[[:^cntrl:][:^alpha:]]+/utf,ucp
+------------------------------------------------------------------
+ 0 15 Bra
+ 3 [\P{Cc}\P{L}]++
+ 15 15 Ket
+ 18 End
+------------------------------------------------------------------
+
+/[[:alpha:]]+/utf,ucp
+------------------------------------------------------------------
+ 0 12 Bra
+ 3 [\p{L}]++
+ 12 12 Ket
+ 15 End
+------------------------------------------------------------------
+
+/[[:^alpha:]\S]+/utf,ucp
+------------------------------------------------------------------
+ 0 15 Bra
+ 3 [\P{L}\P{Xsp}]++
+ 15 15 Ket
+ 18 End
+------------------------------------------------------------------
+
+/abc(d|e)(*THEN)x(123(*THEN)4|567(b|q)(*THEN)xx)/
+------------------------------------------------------------------
+ 0 70 Bra
+ 3 abc
+ 9 6 CBra 1
+ 13 d
+ 15 5 Alt
+ 18 e
+ 20 11 Ket
+ 23 *THEN
+ 24 x
+ 26 13 CBra 2
+ 30 123
+ 36 *THEN
+ 37 4
+ 39 28 Alt
+ 42 567
+ 48 6 CBra 3
+ 52 b
+ 54 5 Alt
+ 57 q
+ 59 11 Ket
+ 62 *THEN
+ 63 xx
+ 67 41 Ket
+ 70 70 Ket
+ 73 End
+------------------------------------------------------------------
+
+/(((a\2)|(a*)\g<-1>))*a?/
+------------------------------------------------------------------
+ 0 52 Bra
+ 3 Brazero
+ 4 43 SCBra 1
+ 8 36 Once
+ 11 15 CBra 2
+ 15 8 CBra 3
+ 19 a
+ 21 \2
+ 23 8 Ket
+ 26 15 Alt
+ 29 6 CBra 4
+ 33 a*
+ 35 6 Ket
+ 38 29 Recurse
+ 41 30 Ket
+ 44 36 Ket
+ 47 43 KetRmax
+ 50 a?+
+ 52 52 Ket
+ 55 End
+------------------------------------------------------------------
+
+/((?+1)(\1))/
+------------------------------------------------------------------
+ 0 28 Bra
+ 3 22 Once
+ 6 16 CBra 1
+ 10 13 Recurse
+ 13 6 CBra 2
+ 17 \1
+ 19 6 Ket
+ 22 16 Ket
+ 25 22 Ket
+ 28 28 Ket
+ 31 End
+------------------------------------------------------------------
+
+"(?1)(?#?'){2}(a)"
+------------------------------------------------------------------
+ 0 18 Bra
+ 3 9 Recurse
+ 6 9 Recurse
+ 9 6 CBra 1
+ 13 a
+ 15 6 Ket
+ 18 18 Ket
+ 21 End
+------------------------------------------------------------------
+
+/.((?2)(?R)|\1|$)()/
+------------------------------------------------------------------
+ 0 39 Bra
+ 3 Any
+ 4 25 Once
+ 7 10 CBra 1
+ 11 32 Recurse
+ 14 0 Recurse
+ 17 5 Alt
+ 20 \1
+ 22 4 Alt
+ 25 $
+ 26 19 Ket
+ 29 25 Ket
+ 32 4 CBra 2
+ 36 4 Ket
+ 39 39 Ket
+ 42 End
+------------------------------------------------------------------
+
+/.((?3)(?R)()(?2)|\1|$)()/
+------------------------------------------------------------------
+ 0 49 Bra
+ 3 Any
+ 4 35 Once
+ 7 20 CBra 1
+ 11 42 Recurse
+ 14 0 Recurse
+ 17 4 CBra 2
+ 21 4 Ket
+ 24 17 Recurse
+ 27 5 Alt
+ 30 \1
+ 32 4 Alt
+ 35 $
+ 36 29 Ket
+ 39 35 Ket
+ 42 4 CBra 3
+ 46 4 Ket
+ 49 49 Ket
+ 52 End
+------------------------------------------------------------------
+
+/(?1)()((((((\1++))\x85)+)|))/
+------------------------------------------------------------------
+ 0 69 Bra
+ 3 6 Recurse
+ 6 4 CBra 1
+ 10 4 Ket
+ 13 53 CBra 2
+ 17 43 CBra 3
+ 21 36 CBra 4
+ 25 29 CBra 5
+ 29 20 CBra 6
+ 33 13 CBra 7
+ 37 6 Once
+ 40 \1+
+ 43 6 Ket
+ 46 13 Ket
+ 49 20 Ket
+ 52 \x{85}
+ 54 29 KetRmax
+ 57 36 Ket
+ 60 3 Alt
+ 63 46 Ket
+ 66 53 Ket
+ 69 69 Ket
+ 72 End
+------------------------------------------------------------------
+
+# Check the absolute limit on nesting (?| etc. This varies with code unit
+# width because the workspace is a different number of bytes. It will fail
+# with link size 2 in 8-bit and 16-bit but not in 32-bit.
+
+/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|
+)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
+/parens_nest_limit=1000,-fullbincode
+
+# Use "expand" to create some very long patterns with nested parentheses, in
+# order to test workspace overflow. Again, this varies with code unit width,
+# and even when it fails in two modes, the error offset differs. It also varies
+# with link size - hence multiple tests with different values.
+
+/(?'ABC'\[[bar](]{792}*THEN:\[A]{255}\[)]{793}/expand,-fullbincode,parens_nest_limit=1000
+
+/(?'ABC'\[[bar](]{793}*THEN:\[A]{255}\[)]{794}/expand,-fullbincode,parens_nest_limit=1000
+
+/(?'ABC'\[[bar](]{1793}*THEN:\[A]{255}\[)]{1794}/expand,-fullbincode,parens_nest_limit=2000
+Failed: error 186 at offset 12820: regular expression is too complicated
+
+/(?(1)(?1)){8,}+()/debug
+------------------------------------------------------------------
+ 0 110 Bra
+ 3 97 Once
+ 6 8 Cond
+ 9 1 Cond ref
+ 11 103 Recurse
+ 14 8 Ket
+ 17 8 Cond
+ 20 1 Cond ref
+ 22 103 Recurse
+ 25 8 Ket
+ 28 8 Cond
+ 31 1 Cond ref
+ 33 103 Recurse
+ 36 8 Ket
+ 39 8 Cond
+ 42 1 Cond ref
+ 44 103 Recurse
+ 47 8 Ket
+ 50 8 Cond
+ 53 1 Cond ref
+ 55 103 Recurse
+ 58 8 Ket
+ 61 8 Cond
+ 64 1 Cond ref
+ 66 103 Recurse
+ 69 8 Ket
+ 72 8 Cond
+ 75 1 Cond ref
+ 77 103 Recurse
+ 80 8 Ket
+ 83 14 SBraPos
+ 86 8 SCond
+ 89 1 Cond ref
+ 91 103 Recurse
+ 94 8 Ket
+ 97 14 KetRpos
+100 97 Ket
+103 4 CBra 1
+107 4 Ket
+110 110 Ket
+113 End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Max back reference = 1
+May match empty string
+Subject length lower bound = 0
+ abcd
+ 0:
+ 1:
+
+/(?(1)|a(?1)b){2,}+()/debug
+------------------------------------------------------------------
+ 0 58 Bra
+ 3 45 Once
+ 6 5 Cond
+ 9 1 Cond ref
+ 11 10 Alt
+ 14 a
+ 16 51 Recurse
+ 19 b
+ 21 15 Ket
+ 24 21 SBraPos
+ 27 5 SCond
+ 30 1 Cond ref
+ 32 10 Alt
+ 35 a
+ 37 51 Recurse
+ 40 b
+ 42 15 Ket
+ 45 21 KetRpos
+ 48 45 Ket
+ 51 4 CBra 1
+ 55 4 Ket
+ 58 58 Ket
+ 61 End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Max back reference = 1
+May match empty string
+Subject length lower bound = 0
+ abcde
+No match
+
+/((?1)(?2)(?3)(?4)(?5)(?6)(?7)(?8)(?9)(?9)(?8)(?7)(?6)(?5)(?4)(?3)(?2)(?1)(?0)){2,}()()()()()()()()()/debug
+------------------------------------------------------------------
+ 0 194 Bra
+ 3 61 CBra 1
+ 7 3 Recurse
+ 10 131 Recurse
+ 13 138 Recurse
+ 16 145 Recurse
+ 19 152 Recurse
+ 22 159 Recurse
+ 25 166 Recurse
+ 28 173 Recurse
+ 31 180 Recurse
+ 34 180 Recurse
+ 37 173 Recurse
+ 40 166 Recurse
+ 43 159 Recurse
+ 46 152 Recurse
+ 49 145 Recurse
+ 52 138 Recurse
+ 55 131 Recurse
+ 58 3 Recurse
+ 61 0 Recurse
+ 64 61 Ket
+ 67 61 SCBra 1
+ 71 3 Recurse
+ 74 131 Recurse
+ 77 138 Recurse
+ 80 145 Recurse
+ 83 152 Recurse
+ 86 159 Recurse
+ 89 166 Recurse
+ 92 173 Recurse
+ 95 180 Recurse
+ 98 180 Recurse
+101 173 Recurse
+104 166 Recurse
+107 159 Recurse
+110 152 Recurse
+113 145 Recurse
+116 138 Recurse
+119 131 Recurse
+122 3 Recurse
+125 0 Recurse
+128 61 KetRmax
+131 4 CBra 2
+135 4 Ket
+138 4 CBra 3
+142 4 Ket
+145 4 CBra 4
+149 4 Ket
+152 4 CBra 5
+156 4 Ket
+159 4 CBra 6
+163 4 Ket
+166 4 CBra 7
+170 4 Ket
+173 4 CBra 8
+177 4 Ket
+180 4 CBra 9
+184 4 Ket
+187 4 CBra 10
+191 4 Ket
+194 194 Ket
+197 End
+------------------------------------------------------------------
+Capturing subpattern count = 10
+May match empty string
+Subject length lower bound = 0
+
+/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)/
+Failed: error 114 at offset 509: missing closing parenthesis
+
+/([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00]([00](*ACCEPT)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))/-fullbincode
+
+# End of testinput8
diff --git a/testdata/testoutputEBC b/testdata/testoutputEBC
index 03e179a..4edc8f9 100644
--- a/testdata/testoutputEBC
+++ b/testdata/testoutputEBC
@@ -1,3 +1,4 @@
+PCRE2 version 10.32-RC1 2018-02-19
# This is a specialized test for checking, when PCRE2 is compiled with the
# EBCDIC option but in an ASCII environment, that newline, white space, and \c
# functionality is working. It catches cases where explicit values such as 0x0a
@@ -200,6 +201,6 @@ No match
0: \xff
/\&/
-Failed: error 168 at offset 2: \c\x20must\x20be\x20followed\x20by\x20a\x20letter\x20or\x20one\x20of\x20[\]^_\x3f
+Failed: error 168 at offset 3: \c\x20must\x20be\x20followed\x20by\x20a\x20letter\x20or\x20one\x20of\x20[\]^_\x3f
# End