summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Vernon <matthew@debian.org>2020-11-27 09:24:14 +0000
committerMatthew Vernon <matthew@debian.org>2020-11-27 09:24:14 +0000
commitade48696948d66ff3ebba92f966ba554f7984fe8 (patch)
tree63245c2358864ed38352d5001adfa0639a05e018
parent6705d2473c4ddbe1551937973293b33e46b9e9fb (diff)
New upstream version 10.35
-rw-r--r--AUTHORS6
-rw-r--r--CMakeLists.txt160
-rw-r--r--ChangeLog122
-rwxr-xr-xCheckMan25
-rw-r--r--LICENCE6
-rw-r--r--Makefile.am19
-rw-r--r--Makefile.in113
-rw-r--r--NEWS21
-rw-r--r--NON-AUTOTOOLS-BUILD16
-rwxr-xr-xPrepareRelease2
-rw-r--r--README68
-rwxr-xr-xRunGrepTest4
-rw-r--r--aclocal.m450
-rwxr-xr-xar-lib17
-rwxr-xr-xcompile6
-rw-r--r--config-cmake.h.in4
-rwxr-xr-xconfig.guess359
-rwxr-xr-xconfig.sub2464
-rwxr-xr-xconfigure94
-rw-r--r--configure.ac53
-rwxr-xr-xdepcomp2
-rw-r--r--doc/html/NON-AUTOTOOLS-BUILD.txt16
-rw-r--r--doc/html/README.txt68
-rw-r--r--doc/html/pcre2_jit_free_unused_memory.html2
-rw-r--r--doc/html/pcre2_jit_match.html4
-rw-r--r--doc/html/pcre2_set_character_tables.html9
-rw-r--r--doc/html/pcre2_substitute.html29
-rw-r--r--doc/html/pcre2api.html255
-rw-r--r--doc/html/pcre2build.html50
-rw-r--r--doc/html/pcre2grep.html86
-rw-r--r--doc/html/pcre2jit.html14
-rw-r--r--doc/html/pcre2partial.html2
-rw-r--r--doc/html/pcre2pattern.html83
-rw-r--r--doc/html/pcre2syntax.html18
-rw-r--r--doc/html/pcre2test.html88
-rw-r--r--doc/html/pcre2unicode.html20
-rw-r--r--doc/pcre2.txt2869
-rw-r--r--doc/pcre2_jit_free_unused_memory.32
-rw-r--r--doc/pcre2_jit_match.36
-rw-r--r--doc/pcre2_set_character_tables.313
-rw-r--r--doc/pcre2_substitute.328
-rw-r--r--doc/pcre2api.3256
-rw-r--r--doc/pcre2build.350
-rw-r--r--doc/pcre2grep.185
-rw-r--r--doc/pcre2grep.txt100
-rw-r--r--doc/pcre2jit.314
-rw-r--r--doc/pcre2partial.32
-rw-r--r--doc/pcre2pattern.385
-rw-r--r--doc/pcre2syntax.320
-rw-r--r--doc/pcre2test.189
-rw-r--r--doc/pcre2test.txt680
-rw-r--r--doc/pcre2unicode.322
-rwxr-xr-xinstall-sh13
-rw-r--r--ltmain.sh8
-rw-r--r--m4/libtool.m41
-rw-r--r--m4/ltversion.m44
-rwxr-xr-xmissing2
-rw-r--r--src/config.h.generic12
-rw-r--r--src/config.h.in6
-rw-r--r--src/pcre2.h.generic10
-rw-r--r--src/pcre2.h.in6
-rw-r--r--src/pcre2_auto_possess.c34
-rw-r--r--src/pcre2_chartables.c.dist38
-rw-r--r--src/pcre2_compile.c194
-rw-r--r--src/pcre2_config.c10
-rw-r--r--src/pcre2_dfa_match.c38
-rw-r--r--src/pcre2_dftables.c (renamed from src/dftables.c)202
-rw-r--r--src/pcre2_internal.h21
-rw-r--r--src/pcre2_jit_compile.c1099
-rw-r--r--src/pcre2_jit_misc.c5
-rw-r--r--src/pcre2_jit_neon_inc.h30
-rw-r--r--src/pcre2_jit_simd_inc.h130
-rw-r--r--src/pcre2_jit_test.c14
-rw-r--r--src/pcre2_maketables.c64
-rw-r--r--src/pcre2_match.c160
-rw-r--r--src/pcre2_serialize.c16
-rw-r--r--src/pcre2_study.c94
-rw-r--r--src/pcre2_substitute.c195
-rw-r--r--src/pcre2_tables.c352
-rw-r--r--src/pcre2_ucd.c3959
-rw-r--r--src/pcre2_ucp.h7
-rw-r--r--src/pcre2_valid_utf.c4
-rw-r--r--src/pcre2grep.c66
-rw-r--r--src/pcre2test.c381
-rw-r--r--src/sljit/sljitConfig.h17
-rw-r--r--src/sljit/sljitConfigInternal.h69
-rw-r--r--src/sljit/sljitExecAllocator.c17
-rw-r--r--src/sljit/sljitLir.c95
-rw-r--r--src/sljit/sljitLir.h98
-rw-r--r--src/sljit/sljitNativeARM_32.c70
-rw-r--r--src/sljit/sljitNativeARM_64.c79
-rw-r--r--src/sljit/sljitNativeARM_T2_32.c58
-rw-r--r--src/sljit/sljitNativeMIPS_32.c31
-rw-r--r--src/sljit/sljitNativeMIPS_64.c21
-rw-r--r--src/sljit/sljitNativeMIPS_common.c186
-rw-r--r--src/sljit/sljitNativePPC_common.c58
-rw-r--r--src/sljit/sljitNativeSPARC_common.c57
-rw-r--r--src/sljit/sljitNativeTILEGX_64.c44
-rw-r--r--src/sljit/sljitNativeX86_32.c50
-rw-r--r--src/sljit/sljitNativeX86_64.c28
-rw-r--r--src/sljit/sljitNativeX86_common.c219
-rw-r--r--src/sljit/sljitProtExecAllocator.c66
-rw-r--r--src/sljit/sljitUtils.c161
-rwxr-xr-xtest-driver2
-rw-r--r--testdata/grepoutputN16
-rw-r--r--testdata/testbtablesbin0 -> 1088 bytes
-rw-r--r--testdata/testinput141
-rw-r--r--testdata/testinput1025
-rw-r--r--testdata/testinput1283
-rw-r--r--testdata/testinput1450
-rw-r--r--testdata/testinput2112
-rw-r--r--testdata/testinput42
-rw-r--r--testdata/testinput511
-rw-r--r--testdata/testoutput174
-rw-r--r--testdata/testoutput1060
-rw-r--r--testdata/testoutput12-16157
-rw-r--r--testdata/testoutput12-32155
-rw-r--r--testdata/testoutput14-1672
-rw-r--r--testdata/testoutput14-3270
-rw-r--r--testdata/testoutput14-870
-rw-r--r--testdata/testoutput2262
-rw-r--r--testdata/testoutput44
-rw-r--r--testdata/testoutput515
-rw-r--r--testdata/testoutput8-16-2118
-rw-r--r--testdata/testoutput8-16-3118
-rw-r--r--testdata/testoutput8-16-4118
-rw-r--r--testdata/testoutput8-32-2118
-rw-r--r--testdata/testoutput8-32-3118
-rw-r--r--testdata/testoutput8-32-4118
-rw-r--r--testdata/testoutput8-8-2118
-rw-r--r--testdata/testoutput8-8-3118
-rw-r--r--testdata/testoutput8-8-4118
-rw-r--r--testdata/wintestoutput36
133 files changed, 11669 insertions, 7955 deletions
diff --git a/AUTHORS b/AUTHORS
index 8d4e15a..645c065 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2020 University of Cambridge
All rights reserved
@@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2020 Zoltan Herczeg
All rights reserved.
@@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2020 Zoltan Herczeg
All rights reserved.
####
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9a3e5da..86b8896 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,5 @@
# CMakeLists.txt
#
-#
# This file enables PCRE2 to be built with the CMake configuration and build
# tool. Download CMake in source or binary form from http://www.cmake.org/
# Converted to support PCRE2 from the original PCRE file, August 2014.
@@ -85,6 +84,14 @@
# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h
# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied
# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below)
+# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere)
+# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace
+# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror
+# 2020-04-16 enh added check for __attribute__((uninitialized))
+# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and
+# library versioning.
+# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator
+# 2020-04-28 PH added function check for memfd_create based on Carlo's patch
PROJECT(PCRE2 C)
@@ -95,7 +102,11 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
# GET_TARGET_PROPERTY. This should no longer be required.
# CMAKE_POLICY(SET CMP0026 OLD)
-SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
+# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
+# on the command line.
+# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+
+LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR}/src")
@@ -107,8 +118,10 @@ FIND_PACKAGE( Editline )
# Configuration checks
-INCLUDE(CheckIncludeFile)
+INCLUDE(CheckCSourceCompiles)
INCLUDE(CheckFunctionExists)
+INCLUDE(CheckSymbolExists)
+INCLUDE(CheckIncludeFile)
INCLUDE(CheckTypeSize)
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
@@ -119,9 +132,19 @@ CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
-CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
-CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
-CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
+CHECK_FUNCTION_EXISTS(bcopy HAVE_BCOPY)
+CHECK_FUNCTION_EXISTS(memfd_create HAVE_MEMFD_CREATE)
+CHECK_FUNCTION_EXISTS(memmove HAVE_MEMMOVE)
+CHECK_FUNCTION_EXISTS(secure_getenv HAVE_SECURE_GETENV)
+CHECK_FUNCTION_EXISTS(strerror HAVE_STRERROR)
+
+set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
+CHECK_C_SOURCE_COMPILES(
+ "int main() { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
+ HAVE_ATTRIBUTE_UNINITIALIZED
+)
+set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
# User-configurable options
#
@@ -177,8 +200,12 @@ SET(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL
SET(PCRE2_SUPPORT_JIT OFF CACHE BOOL
"Enable support for Just-in-time compiling.")
-SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
- "Enable SELinux compatible execmem allocator in JIT (experimental).")
+IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+ SET(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL
+ "Enable SELinux compatible execmem allocator in JIT (experimental).")
+ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+ SET(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
+ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
SET(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL
"Enable use of Just-in-time compiling in pcre2grep.")
@@ -304,7 +331,19 @@ IF(PCRE2_SUPPORT_JIT)
ENDIF(PCRE2_SUPPORT_JIT)
IF(PCRE2_SUPPORT_JIT_SEALLOC)
- SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
+ SET(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
+ CHECK_SYMBOL_EXISTS(mkostemp stdlib.h REQUIRED)
+ UNSET(CMAKE_REQUIRED_DEFINITIONS)
+ IF(${REQUIRED})
+ IF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+ ADD_DEFINITIONS(-D_GNU_SOURCE)
+ SET(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1)
+ ELSE(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+ MESSAGE(FATAL_ERROR "Your configuration is not supported")
+ ENDIF(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
+ ELSE(${REQUIRED})
+ SET(PCRE2_SUPPORT_JIT_SEALLOC OFF)
+ ENDIF(${REQUIRED})
ENDIF(PCRE2_SUPPORT_JIT_SEALLOC)
IF(PCRE2GREP_SUPPORT_JIT)
@@ -400,12 +439,13 @@ file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
LIMIT_COUNT 50 # Read only the first 50 lines of the file
)
-set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date")
+set(SEARCHED_VARIABLES "pcre2_major" "pcre2_minor" "pcre2_prerelease" "pcre2_date"
+ "libpcre2_posix_version" "libpcre2_8_version" "libpcre2_16_version" "libpcre2_32_version")
foreach(configure_line ${configure_lines})
foreach(_substitution_variable ${SEARCHED_VARIABLES})
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
if (NOT ${_substitution_variable_upper})
- string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
+ string(REGEX MATCH "m4_define\\(${_substitution_variable}, *\\[(.*)\\]" MACTHED_STRING ${configure_line})
if (CMAKE_MATCH_1)
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
endif()
@@ -413,21 +453,74 @@ foreach(configure_line ${configure_lines})
endforeach()
endforeach()
+macro(PARSE_LIB_VERSION VARIABLE_PREFIX)
+ string(REPLACE ":" ";" ${VARIABLE_PREFIX}_VERSION_LIST ${${VARIABLE_PREFIX}_VERSION})
+ list(GET ${VARIABLE_PREFIX}_VERSION_LIST 0 ${VARIABLE_PREFIX}_VERSION_CURRENT)
+ list(GET ${VARIABLE_PREFIX}_VERSION_LIST 1 ${VARIABLE_PREFIX}_VERSION_REVISION)
+ list(GET ${VARIABLE_PREFIX}_VERSION_LIST 2 ${VARIABLE_PREFIX}_VERSION_AGE)
+
+ math(EXPR ${VARIABLE_PREFIX}_SOVERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} - ${${VARIABLE_PREFIX}_VERSION_AGE}")
+ math(EXPR ${VARIABLE_PREFIX}_MACHO_COMPATIBILITY_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+ math(EXPR ${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_VERSION_CURRENT} + 1")
+ set(${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION "${${VARIABLE_PREFIX}_MACHO_CURRENT_VERSION}.${${VARIABLE_PREFIX}_VERSION_REVISION}}")
+ set(${VARIABLE_PREFIX}_VERSION "${${VARIABLE_PREFIX}_SOVERSION}.${${VARIABLE_PREFIX}_VERSION_AGE}.${${VARIABLE_PREFIX}_VERSION_REVISION}")
+endmacro()
+
+PARSE_LIB_VERSION(LIBPCRE2_POSIX)
+PARSE_LIB_VERSION(LIBPCRE2_8)
+PARSE_LIB_VERSION(LIBPCRE2_16)
+PARSE_LIB_VERSION(LIBPCRE2_32)
+
CONFIGURE_FILE(src/pcre2.h.in
${PROJECT_BINARY_DIR}/pcre2.h
@ONLY)
-# What about pcre2-config and libpcre2.pc?
+# Generate pkg-config files
+
+SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
+SET(prefix ${CMAKE_INSTALL_PREFIX})
+
+SET(exec_prefix "\${prefix}")
+SET(libdir "\${exec_prefix}/lib")
+SET(includedir "\${prefix}/include")
+CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
+SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
+
+IF(PCRE2_BUILD_PCRE2_8)
+ CONFIGURE_FILE(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
+ SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
+ SET(enable_pcre2_8 "yes")
+ELSE()
+ SET(enable_pcre2_8 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_16)
+ CONFIGURE_FILE(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
+ SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
+ SET(enable_pcre2_16 "yes")
+ELSE()
+ SET(enable_pcre2_16 "no")
+ENDIF()
+
+IF(PCRE2_BUILD_PCRE2_32)
+ CONFIGURE_FILE(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
+ SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
+ SET(enable_pcre2_32 "yes")
+ELSE()
+ SET(enable_pcre2_32 "no")
+ENDIF()
+
+CONFIGURE_FILE(pcre2-config.in pcre2-config @ONLY)
# Character table generation
OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
IF(PCRE2_REBUILD_CHARTABLES)
- ADD_EXECUTABLE(dftables src/dftables.c)
+ ADD_EXECUTABLE(pcre2_dftables src/pcre2_dftables.c)
ADD_CUSTOM_COMMAND(
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
- DEPENDS dftables
- COMMAND dftables
+ DEPENDS pcre2_dftables
+ COMMAND pcre2_dftables
ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
)
@@ -541,12 +634,20 @@ SET(targets)
IF(PCRE2_BUILD_PCRE2_8)
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-8
- PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
+SET_TARGET_PROPERTIES(pcre2-8 PROPERTIES
+ COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+ MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
+ MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}"
+ VERSION ${LIBPCRE2_8_VERSION}
+ SOVERSION ${LIBPCRE2_8_SOVERSION})
SET(targets ${targets} pcre2-8)
ADD_LIBRARY(pcre2-posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
-SET_PROPERTY(TARGET pcre2-posix
- PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
+SET_TARGET_PROPERTIES(pcre2-posix PROPERTIES
+ COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
+ MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
+ MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}"
+ VERSION ${LIBPCRE2_POSIX_VERSION}
+ SOVERSION ${LIBPCRE2_POSIX_SOVERSION})
SET(targets ${targets} pcre2-posix)
TARGET_LINK_LIBRARIES(pcre2-posix pcre2-8)
@@ -564,8 +665,12 @@ ENDIF(PCRE2_BUILD_PCRE2_8)
IF(PCRE2_BUILD_PCRE2_16)
ADD_LIBRARY(pcre2-16 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-16
- PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16)
+SET_TARGET_PROPERTIES(pcre2-16 PROPERTIES
+ COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
+ MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+ MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+ VERSION ${LIBPCRE2_16_VERSION}
+ SOVERSION ${LIBPCRE2_16_SOVERSION})
SET(targets ${targets} pcre2-16)
IF(MINGW AND NOT PCRE2_STATIC)
@@ -582,8 +687,12 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
IF(PCRE2_BUILD_PCRE2_32)
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-SET_PROPERTY(TARGET pcre2-32
- PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
+SET_TARGET_PROPERTIES(pcre2-32 PROPERTIES
+ COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
+ MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
+ MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
+ VERSION ${LIBPCRE2_32_VERSION}
+ SOVERSION ${LIBPCRE2_32_SOVERSION})
SET(targets ${targets} pcre2-32)
IF(MINGW AND NOT PCRE2_STATIC)
@@ -756,6 +865,11 @@ INSTALL(TARGETS ${targets}
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
+INSTALL(FILES ${pkg_config_files} DESTINATION lib/pkgconfig)
+INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config"
+ DESTINATION bin
+ # Set 0755 permissions
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
INSTALL(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include)
diff --git a/ChangeLog b/ChangeLog
index 0926c29..310eb60 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,128 @@
Change Log for PCRE2
--------------------
+Version 10.35 09-May-2020
+---------------------------
+
+1. Use PCRE2_MATCH_EMPTY flag to detect empty matches in JIT.
+
+2. Fix ARMv5 JIT improper handling of labels right after a constant pool.
+
+3. A JIT bug is fixed which allowed to read the fields of the compiled
+pattern before its existence is checked.
+
+4. Back in the PCRE1 day, capturing groups that contained recursive back
+references to themselves were made atomic (version 8.01, change 18) because
+after the end a repeated group, the captured substrings had their values from
+the final repetition, not from an earlier repetition that might be the
+destination of a backtrack. This feature was documented, and was carried over
+into PCRE2. However, it has now been realized that the major refactoring that
+was done for 10.30 has made this atomicizing unnecessary, and it is confusing
+when users are unaware of it, making some patterns appear not to be working as
+expected. Capture values of recursive back references in repeated groups are
+now correctly backtracked, so this unnecessary restriction has been removed.
+
+5. Added PCRE2_SUBSTITUTE_LITERAL.
+
+6. Avoid some VS compiler warnings.
+
+7. Added PCRE2_SUBSTITUTE_MATCHED.
+
+8. Added (?* and (?<* as synonms for (*napla: and (*naplb: to match another
+regex engine. The Perl regex folks are aware of this usage and have made a note
+about it.
+
+9. When an assertion is repeated, PCRE2 used to limit the maximum repetition to
+1, believing that repeating an assertion is pointless. However, if a positive
+assertion contains capturing groups, repetition can be useful. In any case, an
+assertion could always be wrapped in a repeated group. The only restriction
+that is now imposed is that an unlimited maximum is changed to one more than
+the minimum.
+
+10. Fix *THEN verbs in lookahead assertions in JIT.
+
+11. Added PCRE2_SUBSTITUTE_REPLACEMENT_ONLY.
+
+12. The JIT stack should be freed when the low-level stack allocation fails.
+
+13. In pcre2grep, if the final line in a scanned file is output but does not
+end with a newline sequence, add a newline according to the --newline setting.
+
+14. (?(DEFINE)...) groups were not being handled correctly when checking for
+the fixed length of a lookbehind assertion. Such a group within a lookbehind
+should be skipped, as it does not contribute to the length of the group.
+Instead, the (DEFINE) group was being processed, and if at the end of the
+lookbehind, that end was not correctly recognized. Errors such as "lookbehind
+assertion is not fixed length" and also "internal error: bad code value in
+parsed_skip()" could result.
+
+15. Put a limit of 1000 on recursive calls in pcre2_study() when searching
+nested groups for starting code units, in order to avoid stack overflow issues.
+If the limit is reached, it just gives up trying for this optimization.
+
+16. The control verb chain list must always be restored when exiting from a
+recurse function in JIT.
+
+17. Fix a crash which occurs when the character type of an invalid UTF
+character is decoded in JIT.
+
+18. Changes in many areas of the code so that when Unicode is supported and
+PCRE2_UCP is set without PCRE2_UTF, Unicode character properties are used for
+upper/lower case computations on characters whose code points are greater than
+127.
+
+19. The function for checking UTF-16 validity was returning an incorrect offset
+for the start of the error when a high surrogate was not followed by a valid
+low surrogate. This caused incorrect behaviour, for example when
+PCRE2_MATCH_INVALID_UTF was set and a match started immediately following the
+invalid high surrogate, such as /aa/ matching "\x{d800}aa".
+
+20. If a DEFINE group immediately preceded a lookbehind assertion, the pattern
+could be mis-compiled and therefore not match correctly. This is the example
+that found this: /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ which failed to
+match "word" because the "move back" value was set to zero.
+
+21. Following a request from a user, some extensions and tidies to the
+character tables handling have been done:
+
+ (a) The dftables auxiliary program is renamed pcre2_dftables, but it is still
+ not installed for public use.
+
+ (b) There is now a -b option for pcre2_dftables, which causes the tables to
+ be written in binary. There is also a -help option.
+
+ (c) PCRE2_CONFIG_TABLES_LENGTH is added to pcre2_config() so that an
+ application that wants to save tables in binary knows how long they are.
+
+22. Changed setting of CMAKE_MODULE_PATH in CMakeLists.txt from SET to
+LIST(APPEND...) to allow a setting from the command line to be included.
+
+23. Updated to Unicode 13.0.0.
+
+24. CMake build now checks for secure_getenv() and strerror(). Patch by Carlo.
+
+25. Avoid using [-1] as a suffix in pcre2test because it can provoke a compiler
+warning.
+
+26. Added tests for __attribute__((uninitialized)) to both the configure and
+CMake build files, and then applied this attribute to the variable called
+stack_frames_vector[] in pcre2_match(). When implemented, this disables
+automatic initialization (a facility in clang), which can take time on big
+variables.
+
+27. Updated CMakeLists.txt (patches by Uwe Korn) to add support for
+pcre2-config, the libpcre*.pc files, SOVERSION, VERSION and the
+MACHO_*_VERSIONS settings for CMake builds.
+
+28. Another patch to CMakeLists.txt to check for mkostemp (configure already
+does). Patch by Carlo Marcelo Arenas Belon.
+
+29. Check for the existence of memfd_create in both CMake and configure
+configurations. Patch by Carlo Marcelo Arenas Belon.
+
+30. Restrict the configuration setting for the SELinux compatible execmem
+allocator (change 10.30/44) to Linux and NetBSD.
+
Version 10.34 21-November-2019
------------------------------
diff --git a/CheckMan b/CheckMan
index 5686746..2f84f99 100755
--- a/CheckMan
+++ b/CheckMan
@@ -16,6 +16,7 @@ while (scalar(@ARGV) > 0)
while (<IN>)
{
+ $count = 0;
$line++;
if (/^\s*$/)
{
@@ -50,14 +51,24 @@ while (scalar(@ARGV) > 0)
$yield = 1;
}
}
- else
+ elsif (/\\[^ef]|\\f[^IBP]/)
{
- if (/\\[^ef]|\\f[^IBP]/)
- {
- printf "Bad backslash in line $line of $file\n";
- $yield = 1;
- }
- }
+ printf "Bad backslash in line $line of $file\n";
+ $yield = 1;
+ }
+ while (/\\f[BI]/g)
+ {
+ $count++;
+ }
+ while (/\\fP/g)
+ {
+ $count--;
+ }
+ if ($count != 0)
+ {
+ printf "Mismatching formatting in line $line of $file\n";
+ $yield = 1;
+ }
}
close(IN);
diff --git a/LICENCE b/LICENCE
index 142b3b3..1568be3 100644
--- a/LICENCE
+++ b/LICENCE
@@ -26,7 +26,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2019 University of Cambridge
+Copyright (c) 1997-2020 University of Cambridge
All rights reserved.
@@ -37,7 +37,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
-Copyright(c) 2010-2019 Zoltan Herczeg
+Copyright(c) 2010-2020 Zoltan Herczeg
All rights reserved.
@@ -48,7 +48,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Email domain: freemail.hu
-Copyright(c) 2009-2019 Zoltan Herczeg
+Copyright(c) 2009-2020 Zoltan Herczeg
All rights reserved.
diff --git a/Makefile.am b/Makefile.am
index db16377..bb888f2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -325,18 +325,18 @@ include_HEADERS = src/pcre2posix.h
bin_SCRIPTS = pcre2-config
## ---------------------------------------------------------------
-## The dftables program is used to rebuild character tables before compiling
-## PCRE2, if --enable-rebuild-chartables is specified. It is not a user-visible
-## program. The default (when --enable-rebuild-chartables is not specified) is
-## to copy a distributed set of tables that are defined for ASCII code. In this
-## case, dftables is not needed.
+## The pcre2_dftables program is used to rebuild character tables before
+## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an
+## installed program. The default (when --enable-rebuild-chartables is not
+## specified) is to copy a distributed set of tables that are defined for ASCII
+## code. In this case, pcre2_dftables is not needed.
if WITH_REBUILD_CHARTABLES
-noinst_PROGRAMS += dftables
-dftables_SOURCES = src/dftables.c
-src/pcre2_chartables.c: dftables$(EXEEXT)
+noinst_PROGRAMS += pcre2_dftables
+pcre2_dftables_SOURCES = src/pcre2_dftables.c
+src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
rm -f $@
- ./dftables$(EXEEXT) $@
+ ./pcre2_dftables$(EXEEXT) $@
else
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
rm -f $@
@@ -634,6 +634,7 @@ EXTRA_DIST += \
testdata/grepoutputCN \
testdata/grepoutputN \
testdata/greppatN4 \
+ testdata/testbtables \
testdata/testinput1 \
testdata/testinput2 \
testdata/testinput3 \
diff --git a/Makefile.in b/Makefile.in
index 4304521..2873912 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.16.1 from Makefile.am.
+# Makefile.in generated by automake 1.16.2 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2018 Free Software Foundation, Inc.
+# Copyright (C) 1994-2020 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -96,7 +96,7 @@ host_triplet = @host@
TESTS = $(am__EXEEXT_4) RunTest $(am__append_32)
bin_PROGRAMS = $(am__EXEEXT_1) pcre2test$(EXEEXT)
noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4)
-@WITH_REBUILD_CHARTABLES_TRUE@am__append_1 = dftables
+@WITH_REBUILD_CHARTABLES_TRUE@am__append_1 = pcre2_dftables
@WITH_PCRE2_8_TRUE@am__append_2 = libpcre2-8.la
@WITH_PCRE2_16_TRUE@am__append_3 = libpcre2-16.la
@WITH_PCRE2_32_TRUE@am__append_4 = libpcre2-32.la
@@ -161,7 +161,7 @@ am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \
"$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" \
"$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" \
"$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"
-@WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_2 = dftables$(EXEEXT)
+@WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_2 = pcre2_dftables$(EXEEXT)
@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__EXEEXT_3 = pcre2fuzzcheck$(EXEEXT)
@WITH_JIT_TRUE@am__EXEEXT_4 = pcre2_jit_test$(EXEEXT)
PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
@@ -373,11 +373,11 @@ libpcre2_posix_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
$(libpcre2_posix_la_CFLAGS) $(CFLAGS) \
$(libpcre2_posix_la_LDFLAGS) $(LDFLAGS) -o $@
@WITH_PCRE2_8_TRUE@am_libpcre2_posix_la_rpath = -rpath $(libdir)
-am__dftables_SOURCES_DIST = src/dftables.c
-@WITH_REBUILD_CHARTABLES_TRUE@am_dftables_OBJECTS = \
-@WITH_REBUILD_CHARTABLES_TRUE@ src/dftables.$(OBJEXT)
-dftables_OBJECTS = $(am_dftables_OBJECTS)
-dftables_LDADD = $(LDADD)
+am__pcre2_dftables_SOURCES_DIST = src/pcre2_dftables.c
+@WITH_REBUILD_CHARTABLES_TRUE@am_pcre2_dftables_OBJECTS = \
+@WITH_REBUILD_CHARTABLES_TRUE@ src/pcre2_dftables.$(OBJEXT)
+pcre2_dftables_OBJECTS = $(am_pcre2_dftables_OBJECTS)
+pcre2_dftables_LDADD = $(LDADD)
am__pcre2_jit_test_SOURCES_DIST = src/pcre2_jit_test.c
@WITH_JIT_TRUE@am_pcre2_jit_test_OBJECTS = \
@WITH_JIT_TRUE@ src/pcre2_jit_test-pcre2_jit_test.$(OBJEXT)
@@ -441,7 +441,6 @@ DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__maybe_remake_depfiles = depfiles
am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po \
- src/$(DEPDIR)/dftables.Po \
src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo \
src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo \
src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo \
@@ -524,6 +523,7 @@ am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsuppo
src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo \
src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo \
src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo \
+ src/$(DEPDIR)/pcre2_dftables.Po \
src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po \
src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po \
src/$(DEPDIR)/pcre2grep-pcre2grep.Po \
@@ -551,7 +551,7 @@ SOURCES = $(_libs_libpcre2_fuzzsupport_a_SOURCES) \
$(libpcre2_16_la_SOURCES) $(nodist_libpcre2_16_la_SOURCES) \
$(libpcre2_32_la_SOURCES) $(nodist_libpcre2_32_la_SOURCES) \
$(libpcre2_8_la_SOURCES) $(nodist_libpcre2_8_la_SOURCES) \
- $(libpcre2_posix_la_SOURCES) $(dftables_SOURCES) \
+ $(libpcre2_posix_la_SOURCES) $(pcre2_dftables_SOURCES) \
$(pcre2_jit_test_SOURCES) $(pcre2fuzzcheck_SOURCES) \
$(pcre2grep_SOURCES) $(pcre2test_SOURCES)
DIST_SOURCES = $(am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST) \
@@ -559,7 +559,7 @@ DIST_SOURCES = $(am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST) \
$(am__libpcre2_32_la_SOURCES_DIST) \
$(am__libpcre2_8_la_SOURCES_DIST) \
$(am__libpcre2_posix_la_SOURCES_DIST) \
- $(am__dftables_SOURCES_DIST) \
+ $(am__pcre2_dftables_SOURCES_DIST) \
$(am__pcre2_jit_test_SOURCES_DIST) \
$(am__pcre2fuzzcheck_SOURCES_DIST) \
$(am__pcre2grep_SOURCES_DIST) $(pcre2test_SOURCES)
@@ -1223,16 +1223,17 @@ EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \
testdata/grepinputv testdata/grepinputx testdata/greplist \
testdata/grepoutput testdata/grepoutput8 testdata/grepoutputC \
testdata/grepoutputCN testdata/grepoutputN testdata/greppatN4 \
- testdata/testinput1 testdata/testinput2 testdata/testinput3 \
- testdata/testinput4 testdata/testinput5 testdata/testinput6 \
- testdata/testinput7 testdata/testinput8 testdata/testinput9 \
- testdata/testinput10 testdata/testinput11 testdata/testinput12 \
- testdata/testinput13 testdata/testinput14 testdata/testinput15 \
- testdata/testinput16 testdata/testinput17 testdata/testinput18 \
- testdata/testinput19 testdata/testinput20 testdata/testinput21 \
- testdata/testinput22 testdata/testinput23 testdata/testinput24 \
- testdata/testinput25 testdata/testinputEBC \
- testdata/testoutput1 testdata/testoutput2 testdata/testoutput3 \
+ testdata/testbtables testdata/testinput1 testdata/testinput2 \
+ testdata/testinput3 testdata/testinput4 testdata/testinput5 \
+ testdata/testinput6 testdata/testinput7 testdata/testinput8 \
+ testdata/testinput9 testdata/testinput10 testdata/testinput11 \
+ testdata/testinput12 testdata/testinput13 testdata/testinput14 \
+ testdata/testinput15 testdata/testinput16 testdata/testinput17 \
+ testdata/testinput18 testdata/testinput19 testdata/testinput20 \
+ testdata/testinput21 testdata/testinput22 testdata/testinput23 \
+ testdata/testinput24 testdata/testinput25 \
+ testdata/testinputEBC testdata/testoutput1 \
+ testdata/testoutput2 testdata/testoutput3 \
testdata/testoutput3A testdata/testoutput3B \
testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \
testdata/testoutput7 testdata/testoutput8-16-2 \
@@ -1266,7 +1267,7 @@ include_HEADERS = src/pcre2posix.h
# This is the "config" script.
bin_SCRIPTS = pcre2-config
-@WITH_REBUILD_CHARTABLES_TRUE@dftables_SOURCES = src/dftables.c
+@WITH_REBUILD_CHARTABLES_TRUE@pcre2_dftables_SOURCES = src/pcre2_dftables.c
BUILT_SOURCES = src/pcre2_chartables.c
NODIST_SOURCES = src/pcre2_chartables.c
COMMON_SOURCES = \
@@ -1750,12 +1751,12 @@ src/libpcre2_posix_la-pcre2posix.lo: src/$(am__dirstamp) \
libpcre2-posix.la: $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_DEPENDENCIES) $(EXTRA_libpcre2_posix_la_DEPENDENCIES)
$(AM_V_CCLD)$(libpcre2_posix_la_LINK) $(am_libpcre2_posix_la_rpath) $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_LIBADD) $(LIBS)
-src/dftables.$(OBJEXT): src/$(am__dirstamp) \
+src/pcre2_dftables.$(OBJEXT): src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
-dftables$(EXEEXT): $(dftables_OBJECTS) $(dftables_DEPENDENCIES) $(EXTRA_dftables_DEPENDENCIES)
- @rm -f dftables$(EXEEXT)
- $(AM_V_CCLD)$(LINK) $(dftables_OBJECTS) $(dftables_LDADD) $(LIBS)
+pcre2_dftables$(EXEEXT): $(pcre2_dftables_OBJECTS) $(pcre2_dftables_DEPENDENCIES) $(EXTRA_pcre2_dftables_DEPENDENCIES)
+ @rm -f pcre2_dftables$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(pcre2_dftables_OBJECTS) $(pcre2_dftables_LDADD) $(LIBS)
src/pcre2_jit_test-pcre2_jit_test.$(OBJEXT): src/$(am__dirstamp) \
src/$(DEPDIR)/$(am__dirstamp)
@@ -1825,7 +1826,6 @@ distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker
-@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/dftables.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo@am__quote@ # am--include-marker
@@ -1908,6 +1908,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_dftables.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2grep-pcre2grep.Po@am__quote@ # am--include-marker
@@ -3081,6 +3082,10 @@ dist-xz: distdir
tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
$(am__post_remove_distdir)
+dist-zstd: distdir
+ tardir=$(distdir) && $(am__tar) | zstd -c $${ZSTD_CLEVEL-$${ZSTD_OPT--19}} >$(distdir).tar.zst
+ $(am__post_remove_distdir)
+
dist-tarZ: distdir
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@@ -3122,6 +3127,8 @@ distcheck: dist
eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
*.zip*) \
unzip $(distdir).zip ;;\
+ *.tar.zst*) \
+ zstd -dc $(distdir).tar.zst | $(am__untar) ;;\
esac
chmod -R a-w $(distdir)
chmod u+w $(distdir)
@@ -3252,7 +3259,6 @@ clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \
distclean: distclean-am
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po
- -rm -f src/$(DEPDIR)/dftables.Po
-rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo
-rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo
-rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo
@@ -3335,6 +3341,7 @@ distclean: distclean-am
-rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo
-rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo
-rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo
+ -rm -f src/$(DEPDIR)/pcre2_dftables.Po
-rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po
-rm -f src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po
-rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po
@@ -3390,7 +3397,6 @@ maintainer-clean: maintainer-clean-am
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -rf $(top_srcdir)/autom4te.cache
-rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po
- -rm -f src/$(DEPDIR)/dftables.Po
-rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo
-rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo
-rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo
@@ -3473,6 +3479,7 @@ maintainer-clean: maintainer-clean-am
-rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo
-rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo
-rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo
+ -rm -f src/$(DEPDIR)/pcre2_dftables.Po
-rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po
-rm -f src/$(DEPDIR)/pcre2fuzzcheck-pcre2_fuzzsupport.Po
-rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po
@@ -3509,27 +3516,27 @@ uninstall-man: uninstall-man1 uninstall-man3
clean-noinstLIBRARIES clean-noinstPROGRAMS cscope \
cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \
dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \
- distcheck distclean distclean-compile distclean-generic \
- distclean-hdr distclean-libtool distclean-local distclean-tags \
- distcleancheck distdir distuninstallcheck dvi dvi-am html \
- html-am info info-am install install-am install-binPROGRAMS \
- install-binSCRIPTS install-data install-data-am \
- install-dist_docDATA install-dist_htmlDATA install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-includeHEADERS install-info \
- install-info-am install-libLTLIBRARIES install-man \
- install-man1 install-man3 install-nodist_includeHEADERS \
- install-pdf install-pdf-am install-pkgconfigDATA install-ps \
- install-ps-am install-strip installcheck installcheck-am \
- installdirs maintainer-clean maintainer-clean-generic \
- mostlyclean mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \
- uninstall uninstall-am uninstall-binPROGRAMS \
- uninstall-binSCRIPTS uninstall-dist_docDATA \
- uninstall-dist_htmlDATA uninstall-includeHEADERS \
- uninstall-libLTLIBRARIES uninstall-man uninstall-man1 \
- uninstall-man3 uninstall-nodist_includeHEADERS \
- uninstall-pkgconfigDATA
+ dist-zstd distcheck distclean distclean-compile \
+ distclean-generic distclean-hdr distclean-libtool \
+ distclean-local distclean-tags distcleancheck distdir \
+ distuninstallcheck dvi dvi-am html html-am info info-am \
+ install install-am install-binPROGRAMS install-binSCRIPTS \
+ install-data install-data-am install-dist_docDATA \
+ install-dist_htmlDATA install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am \
+ install-includeHEADERS install-info install-info-am \
+ install-libLTLIBRARIES install-man install-man1 install-man3 \
+ install-nodist_includeHEADERS install-pdf install-pdf-am \
+ install-pkgconfigDATA install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ recheck tags tags-am uninstall uninstall-am \
+ uninstall-binPROGRAMS uninstall-binSCRIPTS \
+ uninstall-dist_docDATA uninstall-dist_htmlDATA \
+ uninstall-includeHEADERS uninstall-libLTLIBRARIES \
+ uninstall-man uninstall-man1 uninstall-man3 \
+ uninstall-nodist_includeHEADERS uninstall-pkgconfigDATA
.PRECIOUS: Makefile
@@ -3571,9 +3578,9 @@ src/config.h.generic: configure.ac
-e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \
_generic/src/config.h >$@
rm -rf _generic
-@WITH_REBUILD_CHARTABLES_TRUE@src/pcre2_chartables.c: dftables$(EXEEXT)
+@WITH_REBUILD_CHARTABLES_TRUE@src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
@WITH_REBUILD_CHARTABLES_TRUE@ rm -f $@
-@WITH_REBUILD_CHARTABLES_TRUE@ ./dftables$(EXEEXT) $@
+@WITH_REBUILD_CHARTABLES_TRUE@ ./pcre2_dftables$(EXEEXT) $@
@WITH_REBUILD_CHARTABLES_FALSE@src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
@WITH_REBUILD_CHARTABLES_FALSE@ rm -f $@
@WITH_REBUILD_CHARTABLES_FALSE@ $(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
diff --git a/NEWS b/NEWS
index 30075d0..97b9350 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,27 @@ News about PCRE2 releases
-------------------------
+Version 10.35 15-April-2020
+---------------------------
+
+Bugfixes, tidies, and a few new enhancements.
+
+1. Capturing groups that contain recursive backreferences to themselves are no
+longer automatically atomic, because the restriction is no longer necessary
+as a result of the 10.30 restructuring.
+
+2. Several new options for pcre2_substitute().
+
+3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode
+character properties are used for upper/lower case computations on characters
+whose code points are greater than 127.
+
+4. The character tables (for low-valued characters) can now more easily be
+saved and restored in binary.
+
+5. Updated to Unicode 13.0.0.
+
+
Version 10.34 21-November-2019
------------------------------
diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD
index 39e7620..a73c058 100644
--- a/NON-AUTOTOOLS-BUILD
+++ b/NON-AUTOTOOLS-BUILD
@@ -74,14 +74,14 @@ can skip ahead to the CMake section.
src/pcre2_chartables.c.
OR:
- Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
- if you have set up src/config.h), and then run it with the single
- argument "src/pcre2_chartables.c". This generates a set of standard
- character tables and writes them to that file. The tables are generated
- using the default C locale for your system. If you want to use a locale
- that is specified by LC_xxx environment variables, add the -L option to
- the dftables command. You must use this method if you are building on a
- system that uses EBCDIC code.
+ Compile src/pcre2_dftables.c as a stand-alone program (using
+ -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
+ the single argument "src/pcre2_chartables.c". This generates a set of
+ standard character tables and writes them to that file. The tables are
+ generated using the default C locale for your system. If you want to use
+ a locale that is specified by LC_xxx environment variables, add the -L
+ option to the pcre2_dftables command. You must use this method if you
+ are building on a system that uses EBCDIC code.
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
specify alternative tables at run time.
diff --git a/PrepareRelease b/PrepareRelease
index 9aa6b7d..e7cf8db 100755
--- a/PrepareRelease
+++ b/PrepareRelease
@@ -190,7 +190,7 @@ files="\
libpcre2-16.pc.in \
libpcre2-32.pc.in \
libpcre2-posix.pc.in \
- src/dftables.c \
+ src/pcre2_dftables.c \
src/pcre2.h.in \
src/pcre2_auto_possess.c \
src/pcre2_compile.c \
diff --git a/README b/README
index 8ce6f96..241376f 100644
--- a/README
+++ b/README
@@ -269,9 +269,9 @@ library. They are also documented in the pcre2build man page.
--enable-rebuild-chartables
- a program called dftables is compiled and run in the default C locale when
- you obey "make". It builds a source file called pcre2_chartables.c. If you do
- not specify this option, pcre2_chartables.c is created as a copy of
+ a program called pcre2_dftables is compiled and run in the default C locale
+ when you obey "make". It builds a source file called pcre2_chartables.c. If
+ you do not specify this option, pcre2_chartables.c is created as a copy of
pcre2_chartables.c.dist. See "Character tables" below for further
information.
@@ -548,11 +548,11 @@ Cross-compiling using autotools
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE2 for some other host. However, you should NOT
-specify --enable-rebuild-chartables, because if you do, the dftables.c source
-file is compiled and run on the local host, in order to generate the inbuilt
-character tables (the pcre2_chartables.c file). This will probably not work,
-because dftables.c needs to be compiled with the local compiler, not the cross
-compiler.
+specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
+source file is compiled and run on the local host, in order to generate the
+inbuilt character tables (the pcre2_chartables.c file). This will probably not
+work, because pcre2_dftables.c needs to be compiled with the local compiler,
+not the cross compiler.
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
created by making a copy of pcre2_chartables.c.dist, which is a default set of
@@ -560,9 +560,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
not be a problem.
If you need to modify the character tables when cross-compiling, you should
-move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
-and run it on the local host to make a new version of pcre2_chartables.c.dist.
-Then when you cross-compile PCRE2 this new version of the tables will be used.
+move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
+hand and run it on the local host to make a new version of
+pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
+at build time" for more details.
Making new tarballs
@@ -721,8 +722,8 @@ compile context.
The source file called pcre2_chartables.c contains the default set of tables.
By default, this is created as a copy of pcre2_chartables.c.dist, which
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
-specified for ./configure, a different version of pcre2_chartables.c is built
-by the program dftables (compiled from dftables.c), which uses the ANSI C
+specified for ./configure, a new version of pcre2_chartables.c is built by the
+program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
character handling functions such as isalnum(), isalpha(), isupper(),
islower(), etc. to build the table sources. This means that the default C
locale that is set for your system will control the contents of these default
@@ -732,32 +733,31 @@ file does not get automatically re-generated. The best way to do this is to
move pcre2_chartables.c.dist out of the way and replace it with your customized
tables.
-When the dftables program is run as a result of --enable-rebuild-chartables,
-it uses the default C locale that is set on your system. It does not pay
-attention to the LC_xxx environment variables. In other words, it uses the
-system's default locale rather than whatever the compiling user happens to have
-set. If you really do want to build a source set of character tables in a
-locale that is specified by the LC_xxx variables, you can run the dftables
-program by hand with the -L option. For example:
+When the pcre2_dftables program is run as a result of specifying
+--enable-rebuild-chartables, it uses the default C locale that is set on your
+system. It does not pay attention to the LC_xxx environment variables. In other
+words, it uses the system's default locale rather than whatever the compiling
+user happens to have set. If you really do want to build a source set of
+character tables in a locale that is specified by the LC_xxx variables, you can
+run the pcre2_dftables program by hand with the -L option. For example:
- ./dftables -L pcre2_chartables.c.special
+ ./pcre2_dftables -L pcre2_chartables.c.special
-The first two 256-byte tables provide lower casing and case flipping functions,
-respectively. The next table consists of three 32-byte bit maps which identify
-digits, "word" characters, and white space, respectively. These are used when
-building 32-byte bit maps that represent character classes for code points less
-than 256. The final 256-byte table has bits indicating various character types,
-as follows:
+The second argument names the file where the source code for the tables is
+written. The first two 256-byte tables provide lower casing and case flipping
+functions, respectively. The next table consists of a number of 32-byte bit
+maps which identify certain character classes such as digits, "word"
+characters, white space, etc. These are used when building 32-byte bit maps
+that represent character classes for code points less than 256. The final
+256-byte table has bits indicating various character types, as follows:
1 white space character
2 letter
- 4 decimal digit
- 8 hexadecimal digit
+ 4 lower case letter
+ 8 decimal digit
16 alphanumeric or '_'
- 128 regular expression metacharacter or binary zero
-You should not alter the set of characters that contain the 128 bit, as that
-will cause PCRE2 to malfunction.
+See also the pcre2build section "Creating character tables at build time".
File manifest
@@ -768,7 +768,7 @@ The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
- src/dftables.c auxiliary program for building pcre2_chartables.c
+ src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
when --enable-rebuild-chartables is specified
src/pcre2_chartables.c.dist a default set of character tables that assume
@@ -894,4 +894,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 16 April 2019
+Last updated: 20 March 2020
diff --git a/RunGrepTest b/RunGrepTest
index 1113cd4..2ff4f7c 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -742,11 +742,11 @@ uname=`uname`
case $uname in
Linux)
printf 'abc\0def' >testNinputgrep
- $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep
+ $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/g' >>testtrygrep
echo "" >>testtrygrep
;;
*)
- echo '1:abcZERO2:def' >>testtrygrep
+ echo '1:abcZERO2:defZERO' >>testtrygrep
;;
esac
diff --git a/aclocal.m4 b/aclocal.m4
index 87e34b0..a1b8aed 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.16.2 -*- Autoconf -*-
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2020 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -364,7 +364,7 @@ AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"],
[AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])])
])dnl PKG_HAVE_DEFINE_WITH_MODULES
-# Copyright (C) 2002-2018 Free Software Foundation, Inc.
+# Copyright (C) 2002-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -379,7 +379,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.16'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.16.1], [],
+m4_if([$1], [1.16.2], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@@ -395,12 +395,12 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.16.1])dnl
+[AM_AUTOMAKE_VERSION([1.16.2])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
-# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+# Copyright (C) 2011-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -462,7 +462,7 @@ AC_SUBST([AR])dnl
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -514,7 +514,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
# AM_CONDITIONAL -*- Autoconf -*-
-# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+# Copyright (C) 1997-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -545,7 +545,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -736,7 +736,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -775,7 +775,9 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
done
if test $am_rc -ne 0; then
AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
- for automatic dependency tracking. Try re-running configure with the
+ for automatic dependency tracking. If GNU make was not used, consider
+ re-running the configure script with MAKE="gmake" (or whatever is
+ necessary). You can also try re-running configure with the
'--disable-dependency-tracking' option to at least be able to build
the package (albeit without support for automatic dependency tracking).])
fi
@@ -802,7 +804,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*-
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -999,7 +1001,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1020,7 +1022,7 @@ if test x"${install_sh+set}" != xset; then
fi
AC_SUBST([install_sh])])
-# Copyright (C) 2003-2018 Free Software Foundation, Inc.
+# Copyright (C) 2003-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1041,7 +1043,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*-
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1084,7 +1086,7 @@ AC_SUBST([am__quote])])
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
-# Copyright (C) 1997-2018 Free Software Foundation, Inc.
+# Copyright (C) 1997-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1123,7 +1125,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1152,7 +1154,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1199,7 +1201,7 @@ AC_LANG_POP([C])])
# For backward compatibility.
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1218,7 +1220,7 @@ AC_DEFUN([AM_RUN_LOG],
# Check to make sure that the build environment is sane. -*- Autoconf -*-
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1299,7 +1301,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
-# Copyright (C) 2009-2018 Free Software Foundation, Inc.
+# Copyright (C) 2009-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1359,7 +1361,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
-# Copyright (C) 2001-2018 Free Software Foundation, Inc.
+# Copyright (C) 2001-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1387,7 +1389,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
-# Copyright (C) 2006-2018 Free Software Foundation, Inc.
+# Copyright (C) 2006-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1406,7 +1408,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
-# Copyright (C) 2004-2018 Free Software Foundation, Inc.
+# Copyright (C) 2004-2020 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/ar-lib b/ar-lib
index 0baa4f6..1e9388e 100755
--- a/ar-lib
+++ b/ar-lib
@@ -2,9 +2,9 @@
# Wrapper for Microsoft lib.exe
me=ar-lib
-scriptversion=2012-03-01.08; # UTC
+scriptversion=2019-07-04.01; # UTC
-# Copyright (C) 2010-2018 Free Software Foundation, Inc.
+# Copyright (C) 2010-2020 Free Software Foundation, Inc.
# Written by Peter Rosin <peda@lysator.liu.se>.
#
# This program is free software; you can redistribute it and/or modify
@@ -53,7 +53,7 @@ func_file_conv ()
MINGW*)
file_conv=mingw
;;
- CYGWIN*)
+ CYGWIN* | MSYS*)
file_conv=cygwin
;;
*)
@@ -65,7 +65,7 @@ func_file_conv ()
mingw)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
- cygwin)
+ cygwin | msys)
file=`cygpath -m "$file" || echo "$file"`
;;
wine)
@@ -224,10 +224,11 @@ elif test -n "$extract"; then
esac
done
else
- $AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
- do
- $AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
- done
+ $AR -NOLOGO -LIST "$archive" | tr -d '\r' | sed -e 's/\\/\\\\/g' \
+ | while read member
+ do
+ $AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
+ done
fi
elif test -n "$quick$replace"; then
diff --git a/compile b/compile
index 99e5052..23fcba0 100755
--- a/compile
+++ b/compile
@@ -3,7 +3,7 @@
scriptversion=2018-03-07.03; # UTC
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2020 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
@@ -53,7 +53,7 @@ func_file_conv ()
MINGW*)
file_conv=mingw
;;
- CYGWIN*)
+ CYGWIN* | MSYS*)
file_conv=cygwin
;;
*)
@@ -67,7 +67,7 @@ func_file_conv ()
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
- cygwin/*)
+ cygwin/* | msys/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
diff --git a/config-cmake.h.in b/config-cmake.h.in
index 529b009..7766dd7 100644
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@@ -1,5 +1,6 @@
/* config.h for CMake builds */
+#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
#cmakedefine HAVE_DIRENT_H 1
#cmakedefine HAVE_INTTYPES_H 1
#cmakedefine HAVE_STDINT_H 1
@@ -10,7 +11,10 @@
#cmakedefine HAVE_WINDOWS_H 1
#cmakedefine HAVE_BCOPY 1
+#cmakedefine HAVE_MEMFD_CREATE 1
#cmakedefine HAVE_MEMMOVE 1
+#cmakedefine HAVE_SECURE_GETENV 1
+#cmakedefine HAVE_STRERROR 1
#cmakedefine PCRE2_STATIC 1
diff --git a/config.guess b/config.guess
index 256083a..45001cf 100755
--- a/config.guess
+++ b/config.guess
@@ -1,8 +1,8 @@
#! /bin/sh
# Attempt to guess a canonical system name.
-# Copyright 1992-2018 Free Software Foundation, Inc.
+# Copyright 1992-2020 Free Software Foundation, Inc.
-timestamp='2018-03-08'
+timestamp='2020-01-01'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -50,7 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright 1992-2018 Free Software Foundation, Inc.
+Copyright 1992-2020 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -84,8 +84,6 @@ if test $# != 0; then
exit 1
fi
-trap 'exit 1' 1 2 15
-
# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
# compiler to aid in system detection is discouraged as it requires
# temporary files to be created and, as you can see below, it is a
@@ -96,34 +94,40 @@ trap 'exit 1' 1 2 15
# Portable tmp directory creation inspired by the Autoconf team.
-set_cc_for_build='
-trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
-trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
-: ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
- { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
- { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
- { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
-dummy=$tmp/dummy ;
-tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
-case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,) echo "int x;" > "$dummy.c" ;
- for c in cc gcc c89 c99 ; do
- if ($c -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
- CC_FOR_BUILD="$c"; break ;
- fi ;
- done ;
- if test x"$CC_FOR_BUILD" = x ; then
- CC_FOR_BUILD=no_compiler_found ;
- fi
- ;;
- ,,*) CC_FOR_BUILD=$CC ;;
- ,*,*) CC_FOR_BUILD=$HOST_CC ;;
-esac ; set_cc_for_build= ;'
+tmp=
+# shellcheck disable=SC2172
+trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
+
+set_cc_for_build() {
+ # prevent multiple calls if $tmp is already set
+ test "$tmp" && return 0
+ : "${TMPDIR=/tmp}"
+ # shellcheck disable=SC2039
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
+ dummy=$tmp/dummy
+ case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
+ ,,) echo "int x;" > "$dummy.c"
+ for driver in cc gcc c89 c99 ; do
+ if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+ CC_FOR_BUILD="$driver"
+ break
+ fi
+ done
+ if test x"$CC_FOR_BUILD" = x ; then
+ CC_FOR_BUILD=no_compiler_found
+ fi
+ ;;
+ ,,*) CC_FOR_BUILD=$CC ;;
+ ,*,*) CC_FOR_BUILD=$HOST_CC ;;
+ esac
+}
# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
# (ghazi@noc.rutgers.edu 1994-08-24)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+if test -f /.attbin/uname ; then
PATH=$PATH:/.attbin ; export PATH
fi
@@ -138,7 +142,7 @@ Linux|GNU|GNU/*)
# We could probably try harder.
LIBC=gnu
- eval "$set_cc_for_build"
+ set_cc_for_build
cat <<-EOF > "$dummy.c"
#include <features.h>
#if defined(__UCLIBC__)
@@ -199,7 +203,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
os=netbsdelf
;;
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
- eval "$set_cc_for_build"
+ set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ELF__
then
@@ -237,7 +241,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
# contains redundant information, the shorter form:
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
- echo "$machine-${os}${release}${abi}"
+ echo "$machine-${os}${release}${abi-}"
exit ;;
*:Bitrig:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
@@ -260,6 +264,9 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
*:SolidBSD:*:*)
echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
exit ;;
+ *:OS108:*:*)
+ echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
+ exit ;;
macppc:MirBSD:*:*)
echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
exit ;;
@@ -269,12 +276,15 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
*:Sortix:*:*)
echo "$UNAME_MACHINE"-unknown-sortix
exit ;;
+ *:Twizzler:*:*)
+ echo "$UNAME_MACHINE"-unknown-twizzler
+ exit ;;
*:Redox:*:*)
echo "$UNAME_MACHINE"-unknown-redox
exit ;;
mips:OSF1:*.*)
- echo mips-dec-osf1
- exit ;;
+ echo mips-dec-osf1
+ exit ;;
alpha:OSF1:*:*)
case $UNAME_RELEASE in
*4.0)
@@ -389,7 +399,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
echo i386-pc-auroraux"$UNAME_RELEASE"
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
- eval "$set_cc_for_build"
+ set_cc_for_build
SUN_ARCH=i386
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
@@ -482,7 +492,7 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
echo clipper-intergraph-clix"$UNAME_RELEASE"
exit ;;
mips:*:*:UMIPS | mips:*:*:RISCos)
- eval "$set_cc_for_build"
+ set_cc_for_build
sed 's/^ //' << EOF > "$dummy.c"
#ifdef __cplusplus
#include <stdio.h> /* for printf() prototype */
@@ -579,7 +589,7 @@ EOF
exit ;;
*:AIX:2:3)
if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
- eval "$set_cc_for_build"
+ set_cc_for_build
sed 's/^ //' << EOF > "$dummy.c"
#include <sys/systemcfg.h>
@@ -660,7 +670,7 @@ EOF
esac
fi
if [ "$HP_ARCH" = "" ]; then
- eval "$set_cc_for_build"
+ set_cc_for_build
sed 's/^ //' << EOF > "$dummy.c"
#define _HPUX_SOURCE
@@ -700,7 +710,7 @@ EOF
esac
if [ "$HP_ARCH" = hppa2.0w ]
then
- eval "$set_cc_for_build"
+ set_cc_for_build
# hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
# 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
@@ -726,7 +736,7 @@ EOF
echo ia64-hp-hpux"$HPUX_REV"
exit ;;
3050*:HI-UX:*:*)
- eval "$set_cc_for_build"
+ set_cc_for_build
sed 's/^ //' << EOF > "$dummy.c"
#include <unistd.h>
int
@@ -840,6 +850,17 @@ EOF
*:BSD/OS:*:*)
echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
exit ;;
+ arm:FreeBSD:*:*)
+ UNAME_PROCESSOR=`uname -p`
+ set_cc_for_build
+ if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ARM_PCS_VFP
+ then
+ echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabi
+ else
+ echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabihf
+ fi
+ exit ;;
*:FreeBSD:*:*)
UNAME_PROCESSOR=`/usr/bin/uname -p`
case "$UNAME_PROCESSOR" in
@@ -881,7 +902,7 @@ EOF
echo "$UNAME_MACHINE"-pc-uwin
exit ;;
amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
- echo x86_64-unknown-cygwin
+ echo x86_64-pc-cygwin
exit ;;
prep*:SunOS:5.*:*)
echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`"
@@ -894,8 +915,8 @@ EOF
# other systems with GNU libc and userland
echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC"
exit ;;
- i*86:Minix:*:*)
- echo "$UNAME_MACHINE"-pc-minix
+ *:Minix:*:*)
+ echo "$UNAME_MACHINE"-unknown-minix
exit ;;
aarch64:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
@@ -905,7 +926,7 @@ EOF
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
alpha:Linux:*:*)
- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in
EV5) UNAME_MACHINE=alphaev5 ;;
EV56) UNAME_MACHINE=alphaev56 ;;
PCA56) UNAME_MACHINE=alphapca56 ;;
@@ -922,7 +943,7 @@ EOF
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
arm*:Linux:*:*)
- eval "$set_cc_for_build"
+ set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_EABI__
then
@@ -971,23 +992,51 @@ EOF
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
mips:Linux:*:* | mips64:Linux:*:*)
- eval "$set_cc_for_build"
+ set_cc_for_build
+ IS_GLIBC=0
+ test x"${LIBC}" = xgnu && IS_GLIBC=1
sed 's/^ //' << EOF > "$dummy.c"
#undef CPU
- #undef ${UNAME_MACHINE}
- #undef ${UNAME_MACHINE}el
+ #undef mips
+ #undef mipsel
+ #undef mips64
+ #undef mips64el
+ #if ${IS_GLIBC} && defined(_ABI64)
+ LIBCABI=gnuabi64
+ #else
+ #if ${IS_GLIBC} && defined(_ABIN32)
+ LIBCABI=gnuabin32
+ #else
+ LIBCABI=${LIBC}
+ #endif
+ #endif
+
+ #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+ CPU=mipsisa64r6
+ #else
+ #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+ CPU=mipsisa32r6
+ #else
+ #if defined(__mips64)
+ CPU=mips64
+ #else
+ CPU=mips
+ #endif
+ #endif
+ #endif
+
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=${UNAME_MACHINE}el
+ MIPS_ENDIAN=el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=${UNAME_MACHINE}
+ MIPS_ENDIAN=
#else
- CPU=
+ MIPS_ENDIAN=
#endif
#endif
EOF
- eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`"
- test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; }
+ eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`"
+ test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
;;
mips64el:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
@@ -1100,7 +1149,7 @@ EOF
*Pentium) UNAME_MACHINE=i586 ;;
*Pent*|*Celeron) UNAME_MACHINE=i686 ;;
esac
- echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}"
+ echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
exit ;;
i*86:*:3.2:*)
if test -f /usr/options/cb.name; then
@@ -1284,38 +1333,39 @@ EOF
echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
exit ;;
*:Darwin:*:*)
- UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
- eval "$set_cc_for_build"
- if test "$UNAME_PROCESSOR" = unknown ; then
- UNAME_PROCESSOR=powerpc
+ UNAME_PROCESSOR=`uname -p`
+ case $UNAME_PROCESSOR in
+ unknown) UNAME_PROCESSOR=powerpc ;;
+ esac
+ if command -v xcode-select > /dev/null 2> /dev/null && \
+ ! xcode-select --print-path > /dev/null 2> /dev/null ; then
+ # Avoid executing cc if there is no toolchain installed as
+ # cc will be a stub that puts up a graphical alert
+ # prompting the user to install developer tools.
+ CC_FOR_BUILD=no_compiler_found
+ else
+ set_cc_for_build
fi
- if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then
- if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
- grep IS_64BIT_ARCH >/dev/null
- then
- case $UNAME_PROCESSOR in
- i386) UNAME_PROCESSOR=x86_64 ;;
- powerpc) UNAME_PROCESSOR=powerpc64 ;;
- esac
- fi
- # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
- if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
- (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
- grep IS_PPC >/dev/null
- then
- UNAME_PROCESSOR=powerpc
- fi
+ if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ case $UNAME_PROCESSOR in
+ i386) UNAME_PROCESSOR=x86_64 ;;
+ powerpc) UNAME_PROCESSOR=powerpc64 ;;
+ esac
+ fi
+ # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
+ if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
+ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_PPC >/dev/null
+ then
+ UNAME_PROCESSOR=powerpc
fi
elif test "$UNAME_PROCESSOR" = i386 ; then
- # Avoid executing cc on OS X 10.9, as it ships with a stub
- # that puts up a graphical alert prompting to install
- # developer tools. Any system running Mac OS X 10.7 or
- # later (Darwin 11 and later) is required to have a 64-bit
- # processor. This is not true of the ARM version of Darwin
- # that Apple uses in portable devices.
- UNAME_PROCESSOR=x86_64
+ # uname -m returns i386 or x86_64
+ UNAME_PROCESSOR=$UNAME_MACHINE
fi
echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
exit ;;
@@ -1358,6 +1408,7 @@ EOF
# "uname -m" is not consistent, so use $cputype instead. 386
# is converted to i386 for consistency with other x86
# operating systems.
+ # shellcheck disable=SC2154
if test "$cputype" = 386; then
UNAME_MACHINE=i386
else
@@ -1414,8 +1465,148 @@ EOF
amd64:Isilon\ OneFS:*:*)
echo x86_64-unknown-onefs
exit ;;
+ *:Unleashed:*:*)
+ echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
+ exit ;;
esac
+# No uname command or uname output not recognized.
+set_cc_for_build
+cat > "$dummy.c" <<EOF
+#ifdef _SEQUENT_
+#include <sys/types.h>
+#include <sys/utsname.h>
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#include <signal.h>
+#if defined(_SIZE_T_) || defined(SIGLOST)
+#include <sys/utsname.h>
+#endif
+#endif
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+ /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
+ I don't know.... */
+ printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+ printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+ "4"
+#else
+ ""
+#endif
+ ); exit (0);
+#endif
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+ int version;
+ version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+ if (version < 4)
+ printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+ else
+ printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+ exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+ printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+ printf ("ns32k-encore-mach\n"); exit (0);
+#else
+ printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+ printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+ printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+ printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+ struct utsname un;
+
+ uname(&un);
+ if (strncmp(un.version, "V2", 2) == 0) {
+ printf ("i386-sequent-ptx2\n"); exit (0);
+ }
+ if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+ printf ("i386-sequent-ptx1\n"); exit (0);
+ }
+ printf ("i386-sequent-ptx\n"); exit (0);
+#endif
+
+#if defined (vax)
+#if !defined (ultrix)
+#include <sys/param.h>
+#if defined (BSD)
+#if BSD == 43
+ printf ("vax-dec-bsd4.3\n"); exit (0);
+#else
+#if BSD == 199006
+ printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#else
+ printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#endif
+#else
+ printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#else
+#if defined(_SIZE_T_) || defined(SIGLOST)
+ struct utsname un;
+ uname (&un);
+ printf ("vax-dec-ultrix%s\n", un.release); exit (0);
+#else
+ printf ("vax-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#if defined(_SIZE_T_) || defined(SIGLOST)
+ struct utsname *un;
+ uname (&un);
+ printf ("mips-dec-ultrix%s\n", un.release); exit (0);
+#else
+ printf ("mips-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (alliant) && defined (i860)
+ printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+ exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+ { echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
+
echo "$0: unable to guess system type" >&2
case "$UNAME_MACHINE:$UNAME_SYSTEM" in
diff --git a/config.sub b/config.sub
index 9ccf09a..f02d43a 100755
--- a/config.sub
+++ b/config.sub
@@ -1,8 +1,8 @@
#! /bin/sh
# Configuration validation subroutine script.
-# Copyright 1992-2018 Free Software Foundation, Inc.
+# Copyright 1992-2020 Free Software Foundation, Inc.
-timestamp='2018-03-08'
+timestamp='2020-01-01'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -67,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\
GNU config.sub ($timestamp)
-Copyright 1992-2018 Free Software Foundation, Inc.
+Copyright 1992-2020 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -89,7 +89,7 @@ while test $# -gt 0 ; do
- ) # Use stdin as input.
break ;;
-* )
- echo "$me: invalid option $1$help"
+ echo "$me: invalid option $1$help" >&2
exit 1 ;;
*local*)
@@ -110,1223 +110,1164 @@ case $# in
exit 1;;
esac
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
- nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
- linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
- knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
- kopensolaris*-gnu* | cloudabi*-eabi* | \
- storm-chaos* | os2-emx* | rtmk-nova*)
- os=-$maybe_os
- basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
- ;;
- android-linux)
- os=-linux-android
- basic_machine=`echo "$1" | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
- ;;
- *)
- basic_machine=`echo "$1" | sed 's/-[^-]*$//'`
- if [ "$basic_machine" != "$1" ]
- then os=`echo "$1" | sed 's/.*-/-/'`
- else os=; fi
- ;;
-esac
+# Split fields of configuration type
+# shellcheck disable=SC2162
+IFS="-" read field1 field2 field3 field4 <<EOF
+$1
+EOF
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work. We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
- -sun*os*)
- # Prevent following clause from handling this invalid input.
- ;;
- -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
- -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
- -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
- -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
- -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
- -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
- -apple | -axis | -knuth | -cray | -microblaze*)
- os=
- basic_machine=$1
- ;;
- -bluegene*)
- os=-cnk
- ;;
- -sim | -cisco | -oki | -wec | -winbond)
- os=
- basic_machine=$1
- ;;
- -scout)
- ;;
- -wrs)
- os=-vxworks
- basic_machine=$1
- ;;
- -chorusos*)
- os=-chorusos
- basic_machine=$1
- ;;
- -chorusrdb)
- os=-chorusrdb
- basic_machine=$1
- ;;
- -hiux*)
- os=-hiuxwe2
- ;;
- -sco6)
- os=-sco5v6
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5)
- os=-sco3.2v5
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco4)
- os=-sco3.2v4
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2.[4-9]*)
- os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2v[4-9]*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5v6*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco*)
- os=-sco3.2v2
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -udk*)
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -isc)
- os=-isc2.2
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -clix*)
- basic_machine=clipper-intergraph
- ;;
- -isc*)
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-pc/'`
- ;;
- -lynx*178)
- os=-lynxos178
- ;;
- -lynx*5)
- os=-lynxos5
+# Separate into logical components for further validation
+case $1 in
+ *-*-*-*-*)
+ echo Invalid configuration \`"$1"\': more than four components >&2
+ exit 1
;;
- -lynx*)
- os=-lynxos
+ *-*-*-*)
+ basic_machine=$field1-$field2
+ os=$field3-$field4
;;
- -ptx*)
- basic_machine=`echo "$1" | sed -e 's/86-.*/86-sequent/'`
+ *-*-*)
+ # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
+ # parts
+ maybe_os=$field2-$field3
+ case $maybe_os in
+ nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc \
+ | linux-newlib* | linux-musl* | linux-uclibc* | uclinux-uclibc* \
+ | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
+ | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
+ | storm-chaos* | os2-emx* | rtmk-nova*)
+ basic_machine=$field1
+ os=$maybe_os
+ ;;
+ android-linux)
+ basic_machine=$field1-unknown
+ os=linux-android
+ ;;
+ *)
+ basic_machine=$field1-$field2
+ os=$field3
+ ;;
+ esac
;;
- -psos*)
- os=-psos
+ *-*)
+ # A lone config we happen to match not fitting any pattern
+ case $field1-$field2 in
+ decstation-3100)
+ basic_machine=mips-dec
+ os=
+ ;;
+ *-*)
+ # Second component is usually, but not always the OS
+ case $field2 in
+ # Prevent following clause from handling this valid os
+ sun*os*)
+ basic_machine=$field1
+ os=$field2
+ ;;
+ # Manufacturers
+ dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
+ | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
+ | unicom* | ibm* | next | hp | isi* | apollo | altos* \
+ | convergent* | ncr* | news | 32* | 3600* | 3100* \
+ | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
+ | ultra | tti* | harris | dolphin | highlevel | gould \
+ | cbm | ns | masscomp | apple | axis | knuth | cray \
+ | microblaze* | sim | cisco \
+ | oki | wec | wrs | winbond)
+ basic_machine=$field1-$field2
+ os=
+ ;;
+ *)
+ basic_machine=$field1
+ os=$field2
+ ;;
+ esac
+ ;;
+ esac
;;
- -mint | -mint[0-9]*)
- basic_machine=m68k-atari
- os=-mint
+ *)
+ # Convert single-component short-hands not valid as part of
+ # multi-component configurations.
+ case $field1 in
+ 386bsd)
+ basic_machine=i386-pc
+ os=bsd
+ ;;
+ a29khif)
+ basic_machine=a29k-amd
+ os=udi
+ ;;
+ adobe68k)
+ basic_machine=m68010-adobe
+ os=scout
+ ;;
+ alliant)
+ basic_machine=fx80-alliant
+ os=
+ ;;
+ altos | altos3068)
+ basic_machine=m68k-altos
+ os=
+ ;;
+ am29k)
+ basic_machine=a29k-none
+ os=bsd
+ ;;
+ amdahl)
+ basic_machine=580-amdahl
+ os=sysv
+ ;;
+ amiga)
+ basic_machine=m68k-unknown
+ os=
+ ;;
+ amigaos | amigados)
+ basic_machine=m68k-unknown
+ os=amigaos
+ ;;
+ amigaunix | amix)
+ basic_machine=m68k-unknown
+ os=sysv4
+ ;;
+ apollo68)
+ basic_machine=m68k-apollo
+ os=sysv
+ ;;
+ apollo68bsd)
+ basic_machine=m68k-apollo
+ os=bsd
+ ;;
+ aros)
+ basic_machine=i386-pc
+ os=aros
+ ;;
+ aux)
+ basic_machine=m68k-apple
+ os=aux
+ ;;
+ balance)
+ basic_machine=ns32k-sequent
+ os=dynix
+ ;;
+ blackfin)
+ basic_machine=bfin-unknown
+ os=linux
+ ;;
+ cegcc)
+ basic_machine=arm-unknown
+ os=cegcc
+ ;;
+ convex-c1)
+ basic_machine=c1-convex
+ os=bsd
+ ;;
+ convex-c2)
+ basic_machine=c2-convex
+ os=bsd
+ ;;
+ convex-c32)
+ basic_machine=c32-convex
+ os=bsd
+ ;;
+ convex-c34)
+ basic_machine=c34-convex
+ os=bsd
+ ;;
+ convex-c38)
+ basic_machine=c38-convex
+ os=bsd
+ ;;
+ cray)
+ basic_machine=j90-cray
+ os=unicos
+ ;;
+ crds | unos)
+ basic_machine=m68k-crds
+ os=
+ ;;
+ da30)
+ basic_machine=m68k-da30
+ os=
+ ;;
+ decstation | pmax | pmin | dec3100 | decstatn)
+ basic_machine=mips-dec
+ os=
+ ;;
+ delta88)
+ basic_machine=m88k-motorola
+ os=sysv3
+ ;;
+ dicos)
+ basic_machine=i686-pc
+ os=dicos
+ ;;
+ djgpp)
+ basic_machine=i586-pc
+ os=msdosdjgpp
+ ;;
+ ebmon29k)
+ basic_machine=a29k-amd
+ os=ebmon
+ ;;
+ es1800 | OSE68k | ose68k | ose | OSE)
+ basic_machine=m68k-ericsson
+ os=ose
+ ;;
+ gmicro)
+ basic_machine=tron-gmicro
+ os=sysv
+ ;;
+ go32)
+ basic_machine=i386-pc
+ os=go32
+ ;;
+ h8300hms)
+ basic_machine=h8300-hitachi
+ os=hms
+ ;;
+ h8300xray)
+ basic_machine=h8300-hitachi
+ os=xray
+ ;;
+ h8500hms)
+ basic_machine=h8500-hitachi
+ os=hms
+ ;;
+ harris)
+ basic_machine=m88k-harris
+ os=sysv3
+ ;;
+ hp300 | hp300hpux)
+ basic_machine=m68k-hp
+ os=hpux
+ ;;
+ hp300bsd)
+ basic_machine=m68k-hp
+ os=bsd
+ ;;
+ hppaosf)
+ basic_machine=hppa1.1-hp
+ os=osf
+ ;;
+ hppro)
+ basic_machine=hppa1.1-hp
+ os=proelf
+ ;;
+ i386mach)
+ basic_machine=i386-mach
+ os=mach
+ ;;
+ isi68 | isi)
+ basic_machine=m68k-isi
+ os=sysv
+ ;;
+ m68knommu)
+ basic_machine=m68k-unknown
+ os=linux
+ ;;
+ magnum | m3230)
+ basic_machine=mips-mips
+ os=sysv
+ ;;
+ merlin)
+ basic_machine=ns32k-utek
+ os=sysv
+ ;;
+ mingw64)
+ basic_machine=x86_64-pc
+ os=mingw64
+ ;;
+ mingw32)
+ basic_machine=i686-pc
+ os=mingw32
+ ;;
+ mingw32ce)
+ basic_machine=arm-unknown
+ os=mingw32ce
+ ;;
+ monitor)
+ basic_machine=m68k-rom68k
+ os=coff
+ ;;
+ morphos)
+ basic_machine=powerpc-unknown
+ os=morphos
+ ;;
+ moxiebox)
+ basic_machine=moxie-unknown
+ os=moxiebox
+ ;;
+ msdos)
+ basic_machine=i386-pc
+ os=msdos
+ ;;
+ msys)
+ basic_machine=i686-pc
+ os=msys
+ ;;
+ mvs)
+ basic_machine=i370-ibm
+ os=mvs
+ ;;
+ nacl)
+ basic_machine=le32-unknown
+ os=nacl
+ ;;
+ ncr3000)
+ basic_machine=i486-ncr
+ os=sysv4
+ ;;
+ netbsd386)
+ basic_machine=i386-pc
+ os=netbsd
+ ;;
+ netwinder)
+ basic_machine=armv4l-rebel
+ os=linux
+ ;;
+ news | news700 | news800 | news900)
+ basic_machine=m68k-sony
+ os=newsos
+ ;;
+ news1000)
+ basic_machine=m68030-sony
+ os=newsos
+ ;;
+ necv70)
+ basic_machine=v70-nec
+ os=sysv
+ ;;
+ nh3000)
+ basic_machine=m68k-harris
+ os=cxux
+ ;;
+ nh[45]000)
+ basic_machine=m88k-harris
+ os=cxux
+ ;;
+ nindy960)
+ basic_machine=i960-intel
+ os=nindy
+ ;;
+ mon960)
+ basic_machine=i960-intel
+ os=mon960
+ ;;
+ nonstopux)
+ basic_machine=mips-compaq
+ os=nonstopux
+ ;;
+ os400)
+ basic_machine=powerpc-ibm
+ os=os400
+ ;;
+ OSE68000 | ose68000)
+ basic_machine=m68000-ericsson
+ os=ose
+ ;;
+ os68k)
+ basic_machine=m68k-none
+ os=os68k
+ ;;
+ paragon)
+ basic_machine=i860-intel
+ os=osf
+ ;;
+ parisc)
+ basic_machine=hppa-unknown
+ os=linux
+ ;;
+ pw32)
+ basic_machine=i586-unknown
+ os=pw32
+ ;;
+ rdos | rdos64)
+ basic_machine=x86_64-pc
+ os=rdos
+ ;;
+ rdos32)
+ basic_machine=i386-pc
+ os=rdos
+ ;;
+ rom68k)
+ basic_machine=m68k-rom68k
+ os=coff
+ ;;
+ sa29200)
+ basic_machine=a29k-amd
+ os=udi
+ ;;
+ sei)
+ basic_machine=mips-sei
+ os=seiux
+ ;;
+ sequent)
+ basic_machine=i386-sequent
+ os=
+ ;;
+ sps7)
+ basic_machine=m68k-bull
+ os=sysv2
+ ;;
+ st2000)
+ basic_machine=m68k-tandem
+ os=
+ ;;
+ stratus)
+ basic_machine=i860-stratus
+ os=sysv4
+ ;;
+ sun2)
+ basic_machine=m68000-sun
+ os=
+ ;;
+ sun2os3)
+ basic_machine=m68000-sun
+ os=sunos3
+ ;;
+ sun2os4)
+ basic_machine=m68000-sun
+ os=sunos4
+ ;;
+ sun3)
+ basic_machine=m68k-sun
+ os=
+ ;;
+ sun3os3)
+ basic_machine=m68k-sun
+ os=sunos3
+ ;;
+ sun3os4)
+ basic_machine=m68k-sun
+ os=sunos4
+ ;;
+ sun4)
+ basic_machine=sparc-sun
+ os=
+ ;;
+ sun4os3)
+ basic_machine=sparc-sun
+ os=sunos3
+ ;;
+ sun4os4)
+ basic_machine=sparc-sun
+ os=sunos4
+ ;;
+ sun4sol2)
+ basic_machine=sparc-sun
+ os=solaris2
+ ;;
+ sun386 | sun386i | roadrunner)
+ basic_machine=i386-sun
+ os=
+ ;;
+ sv1)
+ basic_machine=sv1-cray
+ os=unicos
+ ;;
+ symmetry)
+ basic_machine=i386-sequent
+ os=dynix
+ ;;
+ t3e)
+ basic_machine=alphaev5-cray
+ os=unicos
+ ;;
+ t90)
+ basic_machine=t90-cray
+ os=unicos
+ ;;
+ toad1)
+ basic_machine=pdp10-xkl
+ os=tops20
+ ;;
+ tpf)
+ basic_machine=s390x-ibm
+ os=tpf
+ ;;
+ udi29k)
+ basic_machine=a29k-amd
+ os=udi
+ ;;
+ ultra3)
+ basic_machine=a29k-nyu
+ os=sym1
+ ;;
+ v810 | necv810)
+ basic_machine=v810-nec
+ os=none
+ ;;
+ vaxv)
+ basic_machine=vax-dec
+ os=sysv
+ ;;
+ vms)
+ basic_machine=vax-dec
+ os=vms
+ ;;
+ vsta)
+ basic_machine=i386-pc
+ os=vsta
+ ;;
+ vxworks960)
+ basic_machine=i960-wrs
+ os=vxworks
+ ;;
+ vxworks68)
+ basic_machine=m68k-wrs
+ os=vxworks
+ ;;
+ vxworks29k)
+ basic_machine=a29k-wrs
+ os=vxworks
+ ;;
+ xbox)
+ basic_machine=i686-pc
+ os=mingw32
+ ;;
+ ymp)
+ basic_machine=ymp-cray
+ os=unicos
+ ;;
+ *)
+ basic_machine=$1
+ os=
+ ;;
+ esac
;;
esac
-# Decode aliases for certain CPU-COMPANY combinations.
+# Decode 1-component or ad-hoc basic machines
case $basic_machine in
- # Recognize the basic CPU types without company name.
- # Some are omitted here because they have special meanings below.
- 1750a | 580 \
- | a29k \
- | aarch64 | aarch64_be \
- | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
- | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
- | am33_2.0 \
- | arc | arceb \
- | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
- | avr | avr32 \
- | ba \
- | be32 | be64 \
- | bfin \
- | c4x | c8051 | clipper \
- | d10v | d30v | dlx | dsp16xx \
- | e2k | epiphany \
- | fido | fr30 | frv | ft32 \
- | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
- | hexagon \
- | i370 | i860 | i960 | ia16 | ia64 \
- | ip2k | iq2000 \
- | k1om \
- | le32 | le64 \
- | lm32 \
- | m32c | m32r | m32rle | m68000 | m68k | m88k \
- | maxq | mb | microblaze | microblazeel | mcore | mep | metag \
- | mips | mipsbe | mipseb | mipsel | mipsle \
- | mips16 \
- | mips64 | mips64el \
- | mips64octeon | mips64octeonel \
- | mips64orion | mips64orionel \
- | mips64r5900 | mips64r5900el \
- | mips64vr | mips64vrel \
- | mips64vr4100 | mips64vr4100el \
- | mips64vr4300 | mips64vr4300el \
- | mips64vr5000 | mips64vr5000el \
- | mips64vr5900 | mips64vr5900el \
- | mipsisa32 | mipsisa32el \
- | mipsisa32r2 | mipsisa32r2el \
- | mipsisa32r6 | mipsisa32r6el \
- | mipsisa64 | mipsisa64el \
- | mipsisa64r2 | mipsisa64r2el \
- | mipsisa64r6 | mipsisa64r6el \
- | mipsisa64sb1 | mipsisa64sb1el \
- | mipsisa64sr71k | mipsisa64sr71kel \
- | mipsr5900 | mipsr5900el \
- | mipstx39 | mipstx39el \
- | mn10200 | mn10300 \
- | moxie \
- | mt \
- | msp430 \
- | nds32 | nds32le | nds32be \
- | nios | nios2 | nios2eb | nios2el \
- | ns16k | ns32k \
- | open8 | or1k | or1knd | or32 \
- | pdp10 | pj | pjl \
- | powerpc | powerpc64 | powerpc64le | powerpcle \
- | pru \
- | pyramid \
- | riscv32 | riscv64 \
- | rl78 | rx \
- | score \
- | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
- | sh64 | sh64le \
- | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
- | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
- | spu \
- | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
- | ubicom32 \
- | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
- | visium \
- | wasm32 \
- | x86 | xc16x | xstormy16 | xtensa \
- | z8k | z80)
- basic_machine=$basic_machine-unknown
- ;;
- c54x)
- basic_machine=tic54x-unknown
- ;;
- c55x)
- basic_machine=tic55x-unknown
- ;;
- c6x)
- basic_machine=tic6x-unknown
- ;;
- leon|leon[3-9])
- basic_machine=sparc-$basic_machine
- ;;
- m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
- basic_machine=$basic_machine-unknown
- os=-none
+ # Here we handle the default manufacturer of certain CPU types. It is in
+ # some cases the only manufacturer, in others, it is the most popular.
+ w89k)
+ cpu=hppa1.1
+ vendor=winbond
;;
- m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65)
+ op50n)
+ cpu=hppa1.1
+ vendor=oki
;;
- ms1)
- basic_machine=mt-unknown
+ op60c)
+ cpu=hppa1.1
+ vendor=oki
;;
-
- strongarm | thumb | xscale)
- basic_machine=arm-unknown
+ ibm*)
+ cpu=i370
+ vendor=ibm
;;
- xgate)
- basic_machine=$basic_machine-unknown
- os=-none
+ orion105)
+ cpu=clipper
+ vendor=highlevel
;;
- xscaleeb)
- basic_machine=armeb-unknown
+ mac | mpw | mac-mpw)
+ cpu=m68k
+ vendor=apple
;;
-
- xscaleel)
- basic_machine=armel-unknown
+ pmac | pmac-mpw)
+ cpu=powerpc
+ vendor=apple
;;
- # We use `pc' rather than `unknown'
- # because (1) that's what they normally are, and
- # (2) the word "unknown" tends to confuse beginning users.
- i*86 | x86_64)
- basic_machine=$basic_machine-pc
- ;;
- # Object if more than one company name word.
- *-*-*)
- echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
- exit 1
- ;;
- # Recognize the basic CPU types with company name.
- 580-* \
- | a29k-* \
- | aarch64-* | aarch64_be-* \
- | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
- | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
- | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
- | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
- | avr-* | avr32-* \
- | ba-* \
- | be32-* | be64-* \
- | bfin-* | bs2000-* \
- | c[123]* | c30-* | [cjt]90-* | c4x-* \
- | c8051-* | clipper-* | craynv-* | cydra-* \
- | d10v-* | d30v-* | dlx-* \
- | e2k-* | elxsi-* \
- | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
- | h8300-* | h8500-* \
- | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
- | hexagon-* \
- | i*86-* | i860-* | i960-* | ia16-* | ia64-* \
- | ip2k-* | iq2000-* \
- | k1om-* \
- | le32-* | le64-* \
- | lm32-* \
- | m32c-* | m32r-* | m32rle-* \
- | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
- | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
- | microblaze-* | microblazeel-* \
- | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
- | mips16-* \
- | mips64-* | mips64el-* \
- | mips64octeon-* | mips64octeonel-* \
- | mips64orion-* | mips64orionel-* \
- | mips64r5900-* | mips64r5900el-* \
- | mips64vr-* | mips64vrel-* \
- | mips64vr4100-* | mips64vr4100el-* \
- | mips64vr4300-* | mips64vr4300el-* \
- | mips64vr5000-* | mips64vr5000el-* \
- | mips64vr5900-* | mips64vr5900el-* \
- | mipsisa32-* | mipsisa32el-* \
- | mipsisa32r2-* | mipsisa32r2el-* \
- | mipsisa32r6-* | mipsisa32r6el-* \
- | mipsisa64-* | mipsisa64el-* \
- | mipsisa64r2-* | mipsisa64r2el-* \
- | mipsisa64r6-* | mipsisa64r6el-* \
- | mipsisa64sb1-* | mipsisa64sb1el-* \
- | mipsisa64sr71k-* | mipsisa64sr71kel-* \
- | mipsr5900-* | mipsr5900el-* \
- | mipstx39-* | mipstx39el-* \
- | mmix-* \
- | mt-* \
- | msp430-* \
- | nds32-* | nds32le-* | nds32be-* \
- | nios-* | nios2-* | nios2eb-* | nios2el-* \
- | none-* | np1-* | ns16k-* | ns32k-* \
- | open8-* \
- | or1k*-* \
- | orion-* \
- | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
- | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
- | pru-* \
- | pyramid-* \
- | riscv32-* | riscv64-* \
- | rl78-* | romp-* | rs6000-* | rx-* \
- | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
- | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
- | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
- | sparclite-* \
- | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
- | tahoe-* \
- | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
- | tile*-* \
- | tron-* \
- | ubicom32-* \
- | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
- | vax-* \
- | visium-* \
- | wasm32-* \
- | we32k-* \
- | x86-* | x86_64-* | xc16x-* | xps100-* \
- | xstormy16-* | xtensa*-* \
- | ymp-* \
- | z8k-* | z80-*)
- ;;
- # Recognize the basic CPU types without company name, with glob match.
- xtensa*)
- basic_machine=$basic_machine-unknown
- ;;
# Recognize the various machine names and aliases which stand
# for a CPU type and a company and sometimes even an OS.
- 386bsd)
- basic_machine=i386-pc
- os=-bsd
- ;;
3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
- basic_machine=m68000-att
+ cpu=m68000
+ vendor=att
;;
3b*)
- basic_machine=we32k-att
- ;;
- a29khif)
- basic_machine=a29k-amd
- os=-udi
- ;;
- abacus)
- basic_machine=abacus-unknown
- ;;
- adobe68k)
- basic_machine=m68010-adobe
- os=-scout
- ;;
- alliant | fx80)
- basic_machine=fx80-alliant
- ;;
- altos | altos3068)
- basic_machine=m68k-altos
- ;;
- am29k)
- basic_machine=a29k-none
- os=-bsd
- ;;
- amd64)
- basic_machine=x86_64-pc
- ;;
- amd64-*)
- basic_machine=x86_64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- amdahl)
- basic_machine=580-amdahl
- os=-sysv
- ;;
- amiga | amiga-*)
- basic_machine=m68k-unknown
- ;;
- amigaos | amigados)
- basic_machine=m68k-unknown
- os=-amigaos
- ;;
- amigaunix | amix)
- basic_machine=m68k-unknown
- os=-sysv4
- ;;
- apollo68)
- basic_machine=m68k-apollo
- os=-sysv
- ;;
- apollo68bsd)
- basic_machine=m68k-apollo
- os=-bsd
- ;;
- aros)
- basic_machine=i386-pc
- os=-aros
- ;;
- asmjs)
- basic_machine=asmjs-unknown
- ;;
- aux)
- basic_machine=m68k-apple
- os=-aux
- ;;
- balance)
- basic_machine=ns32k-sequent
- os=-dynix
- ;;
- blackfin)
- basic_machine=bfin-unknown
- os=-linux
- ;;
- blackfin-*)
- basic_machine=bfin-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- os=-linux
+ cpu=we32k
+ vendor=att
;;
bluegene*)
- basic_machine=powerpc-ibm
- os=-cnk
- ;;
- c54x-*)
- basic_machine=tic54x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- c55x-*)
- basic_machine=tic55x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- c6x-*)
- basic_machine=tic6x-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- c90)
- basic_machine=c90-cray
- os=-unicos
- ;;
- cegcc)
- basic_machine=arm-unknown
- os=-cegcc
- ;;
- convex-c1)
- basic_machine=c1-convex
- os=-bsd
- ;;
- convex-c2)
- basic_machine=c2-convex
- os=-bsd
- ;;
- convex-c32)
- basic_machine=c32-convex
- os=-bsd
- ;;
- convex-c34)
- basic_machine=c34-convex
- os=-bsd
- ;;
- convex-c38)
- basic_machine=c38-convex
- os=-bsd
- ;;
- cray | j90)
- basic_machine=j90-cray
- os=-unicos
- ;;
- craynv)
- basic_machine=craynv-cray
- os=-unicosmp
- ;;
- cr16 | cr16-*)
- basic_machine=cr16-unknown
- os=-elf
- ;;
- crds | unos)
- basic_machine=m68k-crds
- ;;
- crisv32 | crisv32-* | etraxfs*)
- basic_machine=crisv32-axis
- ;;
- cris | cris-* | etrax*)
- basic_machine=cris-axis
- ;;
- crx)
- basic_machine=crx-unknown
- os=-elf
- ;;
- da30 | da30-*)
- basic_machine=m68k-da30
- ;;
- decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
- basic_machine=mips-dec
+ cpu=powerpc
+ vendor=ibm
+ os=cnk
;;
decsystem10* | dec10*)
- basic_machine=pdp10-dec
- os=-tops10
+ cpu=pdp10
+ vendor=dec
+ os=tops10
;;
decsystem20* | dec20*)
- basic_machine=pdp10-dec
- os=-tops20
+ cpu=pdp10
+ vendor=dec
+ os=tops20
;;
delta | 3300 | motorola-3300 | motorola-delta \
| 3300-motorola | delta-motorola)
- basic_machine=m68k-motorola
- ;;
- delta88)
- basic_machine=m88k-motorola
- os=-sysv3
- ;;
- dicos)
- basic_machine=i686-pc
- os=-dicos
- ;;
- djgpp)
- basic_machine=i586-pc
- os=-msdosdjgpp
- ;;
- dpx20 | dpx20-*)
- basic_machine=rs6000-bull
- os=-bosx
+ cpu=m68k
+ vendor=motorola
;;
dpx2*)
- basic_machine=m68k-bull
- os=-sysv3
- ;;
- e500v[12])
- basic_machine=powerpc-unknown
- os=$os"spe"
- ;;
- e500v[12]-*)
- basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- os=$os"spe"
- ;;
- ebmon29k)
- basic_machine=a29k-amd
- os=-ebmon
- ;;
- elxsi)
- basic_machine=elxsi-elxsi
- os=-bsd
+ cpu=m68k
+ vendor=bull
+ os=sysv3
;;
encore | umax | mmax)
- basic_machine=ns32k-encore
+ cpu=ns32k
+ vendor=encore
;;
- es1800 | OSE68k | ose68k | ose | OSE)
- basic_machine=m68k-ericsson
- os=-ose
+ elxsi)
+ cpu=elxsi
+ vendor=elxsi
+ os=${os:-bsd}
;;
fx2800)
- basic_machine=i860-alliant
+ cpu=i860
+ vendor=alliant
;;
genix)
- basic_machine=ns32k-ns
- ;;
- gmicro)
- basic_machine=tron-gmicro
- os=-sysv
- ;;
- go32)
- basic_machine=i386-pc
- os=-go32
+ cpu=ns32k
+ vendor=ns
;;
h3050r* | hiux*)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- h8300hms)
- basic_machine=h8300-hitachi
- os=-hms
- ;;
- h8300xray)
- basic_machine=h8300-hitachi
- os=-xray
- ;;
- h8500hms)
- basic_machine=h8500-hitachi
- os=-hms
- ;;
- harris)
- basic_machine=m88k-harris
- os=-sysv3
- ;;
- hp300-*)
- basic_machine=m68k-hp
- ;;
- hp300bsd)
- basic_machine=m68k-hp
- os=-bsd
- ;;
- hp300hpux)
- basic_machine=m68k-hp
- os=-hpux
+ cpu=hppa1.1
+ vendor=hitachi
+ os=hiuxwe2
;;
hp3k9[0-9][0-9] | hp9[0-9][0-9])
- basic_machine=hppa1.0-hp
+ cpu=hppa1.0
+ vendor=hp
;;
hp9k2[0-9][0-9] | hp9k31[0-9])
- basic_machine=m68000-hp
+ cpu=m68000
+ vendor=hp
;;
hp9k3[2-9][0-9])
- basic_machine=m68k-hp
+ cpu=m68k
+ vendor=hp
;;
hp9k6[0-9][0-9] | hp6[0-9][0-9])
- basic_machine=hppa1.0-hp
+ cpu=hppa1.0
+ vendor=hp
;;
hp9k7[0-79][0-9] | hp7[0-79][0-9])
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k78[0-9] | hp78[0-9])
# FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
# FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k8[0-9][13679] | hp8[0-9][13679])
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k8[0-9][0-9] | hp8[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hppaosf)
- basic_machine=hppa1.1-hp
- os=-osf
- ;;
- hppro)
- basic_machine=hppa1.1-hp
- os=-proelf
- ;;
- i370-ibm* | ibm*)
- basic_machine=i370-ibm
+ cpu=hppa1.0
+ vendor=hp
;;
i*86v32)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
- os=-sysv32
+ cpu=`echo "$1" | sed -e 's/86.*/86/'`
+ vendor=pc
+ os=sysv32
;;
i*86v4*)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
- os=-sysv4
+ cpu=`echo "$1" | sed -e 's/86.*/86/'`
+ vendor=pc
+ os=sysv4
;;
i*86v)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
- os=-sysv
+ cpu=`echo "$1" | sed -e 's/86.*/86/'`
+ vendor=pc
+ os=sysv
;;
i*86sol2)
- basic_machine=`echo "$1" | sed -e 's/86.*/86-pc/'`
- os=-solaris2
- ;;
- i386mach)
- basic_machine=i386-mach
- os=-mach
+ cpu=`echo "$1" | sed -e 's/86.*/86/'`
+ vendor=pc
+ os=solaris2
;;
- vsta)
- basic_machine=i386-unknown
- os=-vsta
+ j90 | j90-cray)
+ cpu=j90
+ vendor=cray
+ os=${os:-unicos}
;;
iris | iris4d)
- basic_machine=mips-sgi
+ cpu=mips
+ vendor=sgi
case $os in
- -irix*)
+ irix*)
;;
*)
- os=-irix4
+ os=irix4
;;
esac
;;
- isi68 | isi)
- basic_machine=m68k-isi
- os=-sysv
- ;;
- leon-*|leon[3-9]-*)
- basic_machine=sparc-`echo "$basic_machine" | sed 's/-.*//'`
- ;;
- m68knommu)
- basic_machine=m68k-unknown
- os=-linux
- ;;
- m68knommu-*)
- basic_machine=m68k-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- magnum | m3230)
- basic_machine=mips-mips
- os=-sysv
- ;;
- merlin)
- basic_machine=ns32k-utek
- os=-sysv
- ;;
- microblaze*)
- basic_machine=microblaze-xilinx
- ;;
- mingw64)
- basic_machine=x86_64-pc
- os=-mingw64
- ;;
- mingw32)
- basic_machine=i686-pc
- os=-mingw32
- ;;
- mingw32ce)
- basic_machine=arm-unknown
- os=-mingw32ce
- ;;
miniframe)
- basic_machine=m68000-convergent
- ;;
- *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
- mips3*-*)
- basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`
- ;;
- mips3*)
- basic_machine=`echo "$basic_machine" | sed -e 's/mips3/mips64/'`-unknown
- ;;
- monitor)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- morphos)
- basic_machine=powerpc-unknown
- os=-morphos
- ;;
- moxiebox)
- basic_machine=moxie-unknown
- os=-moxiebox
- ;;
- msdos)
- basic_machine=i386-pc
- os=-msdos
- ;;
- ms1-*)
- basic_machine=`echo "$basic_machine" | sed -e 's/ms1-/mt-/'`
- ;;
- msys)
- basic_machine=i686-pc
- os=-msys
- ;;
- mvs)
- basic_machine=i370-ibm
- os=-mvs
- ;;
- nacl)
- basic_machine=le32-unknown
- os=-nacl
- ;;
- ncr3000)
- basic_machine=i486-ncr
- os=-sysv4
+ cpu=m68000
+ vendor=convergent
;;
- netbsd386)
- basic_machine=i386-unknown
- os=-netbsd
- ;;
- netwinder)
- basic_machine=armv4l-rebel
- os=-linux
- ;;
- news | news700 | news800 | news900)
- basic_machine=m68k-sony
- os=-newsos
- ;;
- news1000)
- basic_machine=m68030-sony
- os=-newsos
+ *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
+ cpu=m68k
+ vendor=atari
+ os=mint
;;
news-3600 | risc-news)
- basic_machine=mips-sony
- os=-newsos
- ;;
- necv70)
- basic_machine=v70-nec
- os=-sysv
+ cpu=mips
+ vendor=sony
+ os=newsos
;;
next | m*-next)
- basic_machine=m68k-next
+ cpu=m68k
+ vendor=next
case $os in
- -nextstep* )
+ openstep*)
+ ;;
+ nextstep*)
;;
- -ns2*)
- os=-nextstep2
+ ns2*)
+ os=nextstep2
;;
*)
- os=-nextstep3
+ os=nextstep3
;;
esac
;;
- nh3000)
- basic_machine=m68k-harris
- os=-cxux
- ;;
- nh[45]000)
- basic_machine=m88k-harris
- os=-cxux
- ;;
- nindy960)
- basic_machine=i960-intel
- os=-nindy
- ;;
- mon960)
- basic_machine=i960-intel
- os=-mon960
- ;;
- nonstopux)
- basic_machine=mips-compaq
- os=-nonstopux
- ;;
np1)
- basic_machine=np1-gould
- ;;
- neo-tandem)
- basic_machine=neo-tandem
- ;;
- nse-tandem)
- basic_machine=nse-tandem
- ;;
- nsr-tandem)
- basic_machine=nsr-tandem
- ;;
- nsv-tandem)
- basic_machine=nsv-tandem
- ;;
- nsx-tandem)
- basic_machine=nsx-tandem
+ cpu=np1
+ vendor=gould
;;
op50n-* | op60c-*)
- basic_machine=hppa1.1-oki
- os=-proelf
- ;;
- openrisc | openrisc-*)
- basic_machine=or32-unknown
- ;;
- os400)
- basic_machine=powerpc-ibm
- os=-os400
- ;;
- OSE68000 | ose68000)
- basic_machine=m68000-ericsson
- os=-ose
- ;;
- os68k)
- basic_machine=m68k-none
- os=-os68k
+ cpu=hppa1.1
+ vendor=oki
+ os=proelf
;;
pa-hitachi)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- paragon)
- basic_machine=i860-intel
- os=-osf
- ;;
- parisc)
- basic_machine=hppa-unknown
- os=-linux
- ;;
- parisc-*)
- basic_machine=hppa-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- os=-linux
+ cpu=hppa1.1
+ vendor=hitachi
+ os=hiuxwe2
;;
pbd)
- basic_machine=sparc-tti
+ cpu=sparc
+ vendor=tti
;;
pbb)
- basic_machine=m68k-tti
+ cpu=m68k
+ vendor=tti
;;
- pc532 | pc532-*)
- basic_machine=ns32k-pc532
- ;;
- pc98)
- basic_machine=i386-pc
- ;;
- pc98-*)
- basic_machine=i386-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- pentium | p5 | k5 | k6 | nexgen | viac3)
- basic_machine=i586-pc
- ;;
- pentiumpro | p6 | 6x86 | athlon | athlon_*)
- basic_machine=i686-pc
- ;;
- pentiumii | pentium2 | pentiumiii | pentium3)
- basic_machine=i686-pc
- ;;
- pentium4)
- basic_machine=i786-pc
- ;;
- pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
- basic_machine=i586-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- pentiumpro-* | p6-* | 6x86-* | athlon-*)
- basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
- basic_machine=i686-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- pentium4-*)
- basic_machine=i786-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ pc532)
+ cpu=ns32k
+ vendor=pc532
;;
pn)
- basic_machine=pn-gould
+ cpu=pn
+ vendor=gould
;;
- power) basic_machine=power-ibm
+ power)
+ cpu=power
+ vendor=ibm
;;
- ppc | ppcbe) basic_machine=powerpc-unknown
- ;;
- ppc-* | ppcbe-*)
- basic_machine=powerpc-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- ppcle | powerpclittle)
- basic_machine=powerpcle-unknown
- ;;
- ppcle-* | powerpclittle-*)
- basic_machine=powerpcle-`echo "$basic_machine" | sed 's/^[^-]*-//'`
- ;;
- ppc64) basic_machine=powerpc64-unknown
+ ps2)
+ cpu=i386
+ vendor=ibm
;;
- ppc64-*) basic_machine=powerpc64-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ rm[46]00)
+ cpu=mips
+ vendor=siemens
;;
- ppc64le | powerpc64little)
- basic_machine=powerpc64le-unknown
+ rtpc | rtpc-*)
+ cpu=romp
+ vendor=ibm
;;
- ppc64le-* | powerpc64little-*)
- basic_machine=powerpc64le-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ sde)
+ cpu=mipsisa32
+ vendor=sde
+ os=${os:-elf}
;;
- ps2)
- basic_machine=i386-ibm
+ simso-wrs)
+ cpu=sparclite
+ vendor=wrs
+ os=vxworks
;;
- pw32)
- basic_machine=i586-unknown
- os=-pw32
+ tower | tower-32)
+ cpu=m68k
+ vendor=ncr
;;
- rdos | rdos64)
- basic_machine=x86_64-pc
- os=-rdos
+ vpp*|vx|vx-*)
+ cpu=f301
+ vendor=fujitsu
;;
- rdos32)
- basic_machine=i386-pc
- os=-rdos
+ w65)
+ cpu=w65
+ vendor=wdc
;;
- rom68k)
- basic_machine=m68k-rom68k
- os=-coff
+ w89k-*)
+ cpu=hppa1.1
+ vendor=winbond
+ os=proelf
;;
- rm[46]00)
- basic_machine=mips-siemens
+ none)
+ cpu=none
+ vendor=none
;;
- rtpc | rtpc-*)
- basic_machine=romp-ibm
+ leon|leon[3-9])
+ cpu=sparc
+ vendor=$basic_machine
;;
- s390 | s390-*)
- basic_machine=s390-ibm
+ leon-*|leon[3-9]-*)
+ cpu=sparc
+ vendor=`echo "$basic_machine" | sed 's/-.*//'`
;;
- s390x | s390x-*)
- basic_machine=s390x-ibm
+
+ *-*)
+ # shellcheck disable=SC2162
+ IFS="-" read cpu vendor <<EOF
+$basic_machine
+EOF
;;
- sa29200)
- basic_machine=a29k-amd
- os=-udi
+ # We use `pc' rather than `unknown'
+ # because (1) that's what they normally are, and
+ # (2) the word "unknown" tends to confuse beginning users.
+ i*86 | x86_64)
+ cpu=$basic_machine
+ vendor=pc
;;
- sb1)
- basic_machine=mipsisa64sb1-unknown
+ # These rules are duplicated from below for sake of the special case above;
+ # i.e. things that normalized to x86 arches should also default to "pc"
+ pc98)
+ cpu=i386
+ vendor=pc
;;
- sb1el)
- basic_machine=mipsisa64sb1el-unknown
+ x64 | amd64)
+ cpu=x86_64
+ vendor=pc
;;
- sde)
- basic_machine=mipsisa32-sde
- os=-elf
+ # Recognize the basic CPU types without company name.
+ *)
+ cpu=$basic_machine
+ vendor=unknown
;;
- sei)
- basic_machine=mips-sei
- os=-seiux
+esac
+
+unset -v basic_machine
+
+# Decode basic machines in the full and proper CPU-Company form.
+case $cpu-$vendor in
+ # Here we handle the default manufacturer of certain CPU types in canonical form. It is in
+ # some cases the only manufacturer, in others, it is the most popular.
+ craynv-unknown)
+ vendor=cray
+ os=${os:-unicosmp}
;;
- sequent)
- basic_machine=i386-sequent
+ c90-unknown | c90-cray)
+ vendor=cray
+ os=${os:-unicos}
;;
- sh5el)
- basic_machine=sh5le-unknown
+ fx80-unknown)
+ vendor=alliant
;;
- simso-wrs)
- basic_machine=sparclite-wrs
- os=-vxworks
+ romp-unknown)
+ vendor=ibm
;;
- sps7)
- basic_machine=m68k-bull
- os=-sysv2
+ mmix-unknown)
+ vendor=knuth
;;
- spur)
- basic_machine=spur-unknown
+ microblaze-unknown | microblazeel-unknown)
+ vendor=xilinx
;;
- st2000)
- basic_machine=m68k-tandem
+ rs6000-unknown)
+ vendor=ibm
;;
- stratus)
- basic_machine=i860-stratus
- os=-sysv4
+ vax-unknown)
+ vendor=dec
;;
- strongarm-* | thumb-*)
- basic_machine=arm-`echo "$basic_machine" | sed 's/^[^-]*-//'`
+ pdp11-unknown)
+ vendor=dec
;;
- sun2)
- basic_machine=m68000-sun
+ we32k-unknown)
+ vendor=att
;;
- sun2os3)
- basic_machine=m68000-sun
- os=-sunos3
+ cydra-unknown)
+ vendor=cydrome
;;
- sun2os4)
- basic_machine=m68000-sun
- os=-sunos4
+ i370-ibm*)
+ vendor=ibm
;;
- sun3os3)
- basic_machine=m68k-sun
- os=-sunos3
+ orion-unknown)
+ vendor=highlevel
;;
- sun3os4)
- basic_machine=m68k-sun
- os=-sunos4
+ xps-unknown | xps100-unknown)
+ cpu=xps100
+ vendor=honeywell
;;
- sun4os3)
- basic_machine=sparc-sun
- os=-sunos3
+
+ # Here we normalize CPU types with a missing or matching vendor
+ dpx20-unknown | dpx20-bull)
+ cpu=rs6000
+ vendor=bull
+ os=${os:-bosx}
;;
- sun4os4)
- basic_machine=sparc-sun
- os=-sunos4
+
+ # Here we normalize CPU types irrespective of the vendor
+ amd64-*)
+ cpu=x86_64
;;
- sun4sol2)
- basic_machine=sparc-sun
- os=-solaris2
+ blackfin-*)
+ cpu=bfin
+ os=linux
;;
- sun3 | sun3-*)
- basic_machine=m68k-sun
+ c54x-*)
+ cpu=tic54x
;;
- sun4)
- basic_machine=sparc-sun
+ c55x-*)
+ cpu=tic55x
;;
- sun386 | sun386i | roadrunner)
- basic_machine=i386-sun
+ c6x-*)
+ cpu=tic6x
;;
- sv1)
- basic_machine=sv1-cray
- os=-unicos
+ e500v[12]-*)
+ cpu=powerpc
+ os=$os"spe"
;;
- symmetry)
- basic_machine=i386-sequent
- os=-dynix
+ mips3*-*)
+ cpu=mips64
;;
- t3e)
- basic_machine=alphaev5-cray
- os=-unicos
+ ms1-*)
+ cpu=mt
;;
- t90)
- basic_machine=t90-cray
- os=-unicos
+ m68knommu-*)
+ cpu=m68k
+ os=linux
;;
- tile*)
- basic_machine=$basic_machine-unknown
- os=-linux-gnu
+ m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
+ cpu=s12z
;;
- tx39)
- basic_machine=mipstx39-unknown
+ openrisc-*)
+ cpu=or32
;;
- tx39el)
- basic_machine=mipstx39el-unknown
+ parisc-*)
+ cpu=hppa
+ os=linux
;;
- toad1)
- basic_machine=pdp10-xkl
- os=-tops20
+ pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+ cpu=i586
;;
- tower | tower-32)
- basic_machine=m68k-ncr
+ pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+ cpu=i686
;;
- tpf)
- basic_machine=s390x-ibm
- os=-tpf
+ pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+ cpu=i686
;;
- udi29k)
- basic_machine=a29k-amd
- os=-udi
+ pentium4-*)
+ cpu=i786
;;
- ultra3)
- basic_machine=a29k-nyu
- os=-sym1
+ pc98-*)
+ cpu=i386
;;
- v810 | necv810)
- basic_machine=v810-nec
- os=-none
+ ppc-* | ppcbe-*)
+ cpu=powerpc
;;
- vaxv)
- basic_machine=vax-dec
- os=-sysv
+ ppcle-* | powerpclittle-*)
+ cpu=powerpcle
;;
- vms)
- basic_machine=vax-dec
- os=-vms
+ ppc64-*)
+ cpu=powerpc64
;;
- vpp*|vx|vx-*)
- basic_machine=f301-fujitsu
+ ppc64le-* | powerpc64little-*)
+ cpu=powerpc64le
;;
- vxworks960)
- basic_machine=i960-wrs
- os=-vxworks
+ sb1-*)
+ cpu=mipsisa64sb1
;;
- vxworks68)
- basic_machine=m68k-wrs
- os=-vxworks
+ sb1el-*)
+ cpu=mipsisa64sb1el
;;
- vxworks29k)
- basic_machine=a29k-wrs
- os=-vxworks
+ sh5e[lb]-*)
+ cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
;;
- w65*)
- basic_machine=w65-wdc
- os=-none
+ spur-*)
+ cpu=spur
;;
- w89k-*)
- basic_machine=hppa1.1-winbond
- os=-proelf
+ strongarm-* | thumb-*)
+ cpu=arm
;;
- x64)
- basic_machine=x86_64-pc
+ tx39-*)
+ cpu=mipstx39
;;
- xbox)
- basic_machine=i686-pc
- os=-mingw32
+ tx39el-*)
+ cpu=mipstx39el
;;
- xps | xps100)
- basic_machine=xps100-honeywell
+ x64-*)
+ cpu=x86_64
;;
xscale-* | xscalee[bl]-*)
- basic_machine=`echo "$basic_machine" | sed 's/^xscale/arm/'`
- ;;
- ymp)
- basic_machine=ymp-cray
- os=-unicos
- ;;
- none)
- basic_machine=none-none
- os=-none
+ cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
;;
-# Here we handle the default manufacturer of certain CPU types. It is in
-# some cases the only manufacturer, in others, it is the most popular.
- w89k)
- basic_machine=hppa1.1-winbond
- ;;
- op50n)
- basic_machine=hppa1.1-oki
- ;;
- op60c)
- basic_machine=hppa1.1-oki
- ;;
- romp)
- basic_machine=romp-ibm
+ # Recognize the canonical CPU Types that limit and/or modify the
+ # company names they are paired with.
+ cr16-*)
+ os=${os:-elf}
;;
- mmix)
- basic_machine=mmix-knuth
+ crisv32-* | etraxfs*-*)
+ cpu=crisv32
+ vendor=axis
;;
- rs6000)
- basic_machine=rs6000-ibm
+ cris-* | etrax*-*)
+ cpu=cris
+ vendor=axis
;;
- vax)
- basic_machine=vax-dec
+ crx-*)
+ os=${os:-elf}
;;
- pdp11)
- basic_machine=pdp11-dec
- ;;
- we32k)
- basic_machine=we32k-att
+ neo-tandem)
+ cpu=neo
+ vendor=tandem
;;
- sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
- basic_machine=sh-unknown
+ nse-tandem)
+ cpu=nse
+ vendor=tandem
;;
- cydra)
- basic_machine=cydra-cydrome
+ nsr-tandem)
+ cpu=nsr
+ vendor=tandem
;;
- orion)
- basic_machine=orion-highlevel
+ nsv-tandem)
+ cpu=nsv
+ vendor=tandem
;;
- orion105)
- basic_machine=clipper-highlevel
+ nsx-tandem)
+ cpu=nsx
+ vendor=tandem
;;
- mac | mpw | mac-mpw)
- basic_machine=m68k-apple
+ s390-*)
+ cpu=s390
+ vendor=ibm
;;
- pmac | pmac-mpw)
- basic_machine=powerpc-apple
+ s390x-*)
+ cpu=s390x
+ vendor=ibm
;;
- *-unknown)
- # Make sure to match an already-canonicalized machine name.
+ tile*-*)
+ os=${os:-linux-gnu}
;;
+
*)
- echo Invalid configuration \`"$1"\': machine \`"$basic_machine"\' not recognized 1>&2
- exit 1
+ # Recognize the canonical CPU types that are allowed with any
+ # company name.
+ case $cpu in
+ 1750a | 580 \
+ | a29k \
+ | aarch64 | aarch64_be \
+ | abacus \
+ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
+ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
+ | alphapca5[67] | alpha64pca5[67] \
+ | am33_2.0 \
+ | amdgcn \
+ | arc | arceb \
+ | arm | arm[lb]e | arme[lb] | armv* \
+ | avr | avr32 \
+ | asmjs \
+ | ba \
+ | be32 | be64 \
+ | bfin | bpf | bs2000 \
+ | c[123]* | c30 | [cjt]90 | c4x \
+ | c8051 | clipper | craynv | csky | cydra \
+ | d10v | d30v | dlx | dsp16xx \
+ | e2k | elxsi | epiphany \
+ | f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
+ | h8300 | h8500 \
+ | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+ | hexagon \
+ | i370 | i*86 | i860 | i960 | ia16 | ia64 \
+ | ip2k | iq2000 \
+ | k1om \
+ | le32 | le64 \
+ | lm32 \
+ | m32c | m32r | m32rle \
+ | m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
+ | m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
+ | m88110 | m88k | maxq | mb | mcore | mep | metag \
+ | microblaze | microblazeel \
+ | mips | mipsbe | mipseb | mipsel | mipsle \
+ | mips16 \
+ | mips64 | mips64eb | mips64el \
+ | mips64octeon | mips64octeonel \
+ | mips64orion | mips64orionel \
+ | mips64r5900 | mips64r5900el \
+ | mips64vr | mips64vrel \
+ | mips64vr4100 | mips64vr4100el \
+ | mips64vr4300 | mips64vr4300el \
+ | mips64vr5000 | mips64vr5000el \
+ | mips64vr5900 | mips64vr5900el \
+ | mipsisa32 | mipsisa32el \
+ | mipsisa32r2 | mipsisa32r2el \
+ | mipsisa32r6 | mipsisa32r6el \
+ | mipsisa64 | mipsisa64el \
+ | mipsisa64r2 | mipsisa64r2el \
+ | mipsisa64r6 | mipsisa64r6el \
+ | mipsisa64sb1 | mipsisa64sb1el \
+ | mipsisa64sr71k | mipsisa64sr71kel \
+ | mipsr5900 | mipsr5900el \
+ | mipstx39 | mipstx39el \
+ | mmix \
+ | mn10200 | mn10300 \
+ | moxie \
+ | mt \
+ | msp430 \
+ | nds32 | nds32le | nds32be \
+ | nfp \
+ | nios | nios2 | nios2eb | nios2el \
+ | none | np1 | ns16k | ns32k | nvptx \
+ | open8 \
+ | or1k* \
+ | or32 \
+ | orion \
+ | picochip \
+ | pdp10 | pdp11 | pj | pjl | pn | power \
+ | powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
+ | pru \
+ | pyramid \
+ | riscv | riscv32 | riscv64 \
+ | rl78 | romp | rs6000 | rx \
+ | score \
+ | sh | shl \
+ | sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
+ | sh[1234]e[lb] | sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
+ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
+ | sparclite \
+ | sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+ | spu \
+ | tahoe \
+ | tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+ | tron \
+ | ubicom32 \
+ | v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
+ | vax \
+ | visium \
+ | w65 \
+ | wasm32 | wasm64 \
+ | we32k \
+ | x86 | x86_64 | xc16x | xgate | xps100 \
+ | xstormy16 | xtensa* \
+ | ymp \
+ | z8k | z80)
+ ;;
+
+ *)
+ echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+ exit 1
+ ;;
+ esac
;;
esac
# Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
- *-digital*)
- basic_machine=`echo "$basic_machine" | sed 's/digital.*/dec/'`
+case $vendor in
+ digital*)
+ vendor=dec
;;
- *-commodore*)
- basic_machine=`echo "$basic_machine" | sed 's/commodore.*/cbm/'`
+ commodore*)
+ vendor=cbm
;;
*)
;;
@@ -1334,199 +1275,243 @@ esac
# Decode manufacturer-specific aliases for certain operating systems.
-if [ x"$os" != x"" ]
+if [ x$os != x ]
then
case $os in
# First match some system type aliases that might get confused
# with valid system types.
- # -solaris* is a basic system type, with this one exception.
- -auroraux)
- os=-auroraux
+ # solaris* is a basic system type, with this one exception.
+ auroraux)
+ os=auroraux
;;
- -solaris1 | -solaris1.*)
+ bluegene*)
+ os=cnk
+ ;;
+ solaris1 | solaris1.*)
os=`echo $os | sed -e 's|solaris1|sunos4|'`
;;
- -solaris)
- os=-solaris2
+ solaris)
+ os=solaris2
;;
- -unixware*)
- os=-sysv4.2uw
+ unixware*)
+ os=sysv4.2uw
;;
- -gnu/linux*)
+ gnu/linux*)
os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
;;
# es1800 is here to avoid being matched by es* (a different OS)
- -es1800*)
- os=-ose
+ es1800*)
+ os=ose
+ ;;
+ # Some version numbers need modification
+ chorusos*)
+ os=chorusos
+ ;;
+ isc)
+ os=isc2.2
+ ;;
+ sco6)
+ os=sco5v6
+ ;;
+ sco5)
+ os=sco3.2v5
+ ;;
+ sco4)
+ os=sco3.2v4
+ ;;
+ sco3.2.[4-9]*)
+ os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+ ;;
+ sco3.2v[4-9]* | sco5v6*)
+ # Don't forget version if it is 3.2v4 or newer.
+ ;;
+ scout)
+ # Don't match below
+ ;;
+ sco*)
+ os=sco3.2v2
+ ;;
+ psos*)
+ os=psos
;;
# Now accept the basic system types.
# The portable systems comes first.
# Each alternative MUST end in a * to match a version number.
- # -sysv* is not here because it comes later, after sysvr4.
- -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
- | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
- | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
- | -sym* | -kopensolaris* | -plan9* \
- | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
- | -aos* | -aros* | -cloudabi* | -sortix* \
- | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
- | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
- | -hiux* | -knetbsd* | -mirbsd* | -netbsd* \
- | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
- | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
- | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
- | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
- | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* | -hcos* \
- | -chorusos* | -chorusrdb* | -cegcc* | -glidix* \
- | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
- | -linux-newlib* | -linux-musl* | -linux-uclibc* \
- | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
- | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* \
- | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
- | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
- | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
- | -morphos* | -superux* | -rtmk* | -windiss* \
- | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
- | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
- | -onefs* | -tirtos* | -phoenix* | -fuchsia* | -redox* | -bme* \
- | -midnightbsd*)
+ # sysv* is not here because it comes later, after sysvr4.
+ gnu* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
+ | *vms* | esix* | aix* | cnk* | sunos | sunos[34]*\
+ | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
+ | sym* | kopensolaris* | plan9* \
+ | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
+ | aos* | aros* | cloudabi* | sortix* | twizzler* \
+ | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
+ | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
+ | knetbsd* | mirbsd* | netbsd* \
+ | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
+ | ekkobsd* | kfreebsd* | freebsd* | riscix* | lynxos* \
+ | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
+ | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
+ | udi* | eabi* | lites* | ieee* | go32* | aux* | hcos* \
+ | chorusrdb* | cegcc* | glidix* \
+ | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
+ | midipix* | mingw32* | mingw64* | linux-gnu* | linux-android* \
+ | linux-newlib* | linux-musl* | linux-uclibc* \
+ | uxpv* | beos* | mpeix* | udk* | moxiebox* \
+ | interix* | uwin* | mks* | rhapsody* | darwin* \
+ | openstep* | oskit* | conix* | pw32* | nonstopux* \
+ | storm-chaos* | tops10* | tenex* | tops20* | its* \
+ | os2* | vos* | palmos* | uclinux* | nucleus* \
+ | morphos* | superux* | rtmk* | windiss* \
+ | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
+ | skyos* | haiku* | rdos* | toppers* | drops* | es* \
+ | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
+ | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
+ | nsk* | powerunix)
# Remember, each alternative MUST END IN *, to match a version number.
;;
- -qnx*)
- case $basic_machine in
- x86-* | i*86-*)
+ qnx*)
+ case $cpu in
+ x86 | i*86)
;;
*)
- os=-nto$os
+ os=nto-$os
;;
esac
;;
- -nto-qnx*)
+ hiux*)
+ os=hiuxwe2
;;
- -nto*)
- os=`echo $os | sed -e 's|nto|nto-qnx|'`
+ nto-qnx*)
;;
- -sim | -xray | -os68k* | -v88r* \
- | -windows* | -osx | -abug | -netware* | -os9* \
- | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+ nto*)
+ os=`echo $os | sed -e 's|nto|nto-qnx|'`
;;
- -mac*)
- os=`echo "$os" | sed -e 's|mac|macos|'`
+ sim | xray | os68k* | v88r* \
+ | windows* | osx | abug | netware* | os9* \
+ | macos* | mpw* | magic* | mmixware* | mon960* | lnews*)
;;
- -linux-dietlibc)
- os=-linux-dietlibc
+ linux-dietlibc)
+ os=linux-dietlibc
;;
- -linux*)
+ linux*)
os=`echo $os | sed -e 's|linux|linux-gnu|'`
;;
- -sunos5*)
- os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
+ lynx*178)
+ os=lynxos178
;;
- -sunos6*)
- os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
+ lynx*5)
+ os=lynxos5
+ ;;
+ lynx*)
+ os=lynxos
;;
- -opened*)
- os=-openedition
+ mac*)
+ os=`echo "$os" | sed -e 's|mac|macos|'`
;;
- -os400*)
- os=-os400
+ opened*)
+ os=openedition
;;
- -wince*)
- os=-wince
+ os400*)
+ os=os400
;;
- -utek*)
- os=-bsd
+ sunos5*)
+ os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
;;
- -dynix*)
- os=-bsd
+ sunos6*)
+ os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
;;
- -acis*)
- os=-aos
+ wince*)
+ os=wince
;;
- -atheos*)
- os=-atheos
+ utek*)
+ os=bsd
;;
- -syllable*)
- os=-syllable
+ dynix*)
+ os=bsd
;;
- -386bsd)
- os=-bsd
+ acis*)
+ os=aos
;;
- -ctix* | -uts*)
- os=-sysv
+ atheos*)
+ os=atheos
;;
- -nova*)
- os=-rtmk-nova
+ syllable*)
+ os=syllable
;;
- -ns2)
- os=-nextstep2
+ 386bsd)
+ os=bsd
;;
- -nsk*)
- os=-nsk
+ ctix* | uts*)
+ os=sysv
+ ;;
+ nova*)
+ os=rtmk-nova
+ ;;
+ ns2)
+ os=nextstep2
;;
# Preserve the version number of sinix5.
- -sinix5.*)
+ sinix5.*)
os=`echo $os | sed -e 's|sinix|sysv|'`
;;
- -sinix*)
- os=-sysv4
+ sinix*)
+ os=sysv4
;;
- -tpf*)
- os=-tpf
+ tpf*)
+ os=tpf
;;
- -triton*)
- os=-sysv3
+ triton*)
+ os=sysv3
;;
- -oss*)
- os=-sysv3
+ oss*)
+ os=sysv3
;;
- -svr4*)
- os=-sysv4
+ svr4*)
+ os=sysv4
;;
- -svr3)
- os=-sysv3
+ svr3)
+ os=sysv3
;;
- -sysvr4)
- os=-sysv4
+ sysvr4)
+ os=sysv4
;;
- # This must come after -sysvr4.
- -sysv*)
+ # This must come after sysvr4.
+ sysv*)
;;
- -ose*)
- os=-ose
+ ose*)
+ os=ose
;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- os=-mint
+ *mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
+ os=mint
;;
- -zvmoe)
- os=-zvmoe
+ zvmoe)
+ os=zvmoe
;;
- -dicos*)
- os=-dicos
+ dicos*)
+ os=dicos
;;
- -pikeos*)
+ pikeos*)
# Until real need of OS specific support for
# particular features comes up, bare metal
# configurations are quite functional.
- case $basic_machine in
+ case $cpu in
arm*)
- os=-eabi
+ os=eabi
;;
*)
- os=-elf
+ os=elf
;;
esac
;;
- -nacl*)
+ nacl*)
;;
- -ios)
+ ios)
;;
- -none)
+ none)
+ ;;
+ *-eabi)
;;
*)
- # Get rid of the `-' at the beginning of $os.
- os=`echo $os | sed 's/[^-]*-//'`
echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2
exit 1
;;
@@ -1543,254 +1528,261 @@ else
# will signal an error saying that MANUFACTURER isn't an operating
# system, and we'll never get to this point.
-case $basic_machine in
+case $cpu-$vendor in
score-*)
- os=-elf
+ os=elf
;;
spu-*)
- os=-elf
+ os=elf
;;
*-acorn)
- os=-riscix1.2
+ os=riscix1.2
;;
arm*-rebel)
- os=-linux
+ os=linux
;;
arm*-semi)
- os=-aout
+ os=aout
;;
c4x-* | tic4x-*)
- os=-coff
+ os=coff
;;
c8051-*)
- os=-elf
+ os=elf
+ ;;
+ clipper-intergraph)
+ os=clix
;;
hexagon-*)
- os=-elf
+ os=elf
;;
tic54x-*)
- os=-coff
+ os=coff
;;
tic55x-*)
- os=-coff
+ os=coff
;;
tic6x-*)
- os=-coff
+ os=coff
;;
# This must come before the *-dec entry.
pdp10-*)
- os=-tops20
+ os=tops20
;;
pdp11-*)
- os=-none
+ os=none
;;
*-dec | vax-*)
- os=-ultrix4.2
+ os=ultrix4.2
;;
m68*-apollo)
- os=-domain
+ os=domain
;;
i386-sun)
- os=-sunos4.0.2
+ os=sunos4.0.2
;;
m68000-sun)
- os=-sunos3
+ os=sunos3
;;
m68*-cisco)
- os=-aout
+ os=aout
;;
mep-*)
- os=-elf
+ os=elf
;;
mips*-cisco)
- os=-elf
+ os=elf
;;
mips*-*)
- os=-elf
+ os=elf
;;
or32-*)
- os=-coff
+ os=coff
;;
*-tti) # must be before sparc entry or we get the wrong os.
- os=-sysv3
+ os=sysv3
;;
sparc-* | *-sun)
- os=-sunos4.1.1
+ os=sunos4.1.1
;;
pru-*)
- os=-elf
+ os=elf
;;
*-be)
- os=-beos
+ os=beos
;;
*-ibm)
- os=-aix
+ os=aix
;;
*-knuth)
- os=-mmixware
+ os=mmixware
;;
*-wec)
- os=-proelf
+ os=proelf
;;
*-winbond)
- os=-proelf
+ os=proelf
;;
*-oki)
- os=-proelf
+ os=proelf
;;
*-hp)
- os=-hpux
+ os=hpux
;;
*-hitachi)
- os=-hiux
+ os=hiux
;;
i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
- os=-sysv
+ os=sysv
;;
*-cbm)
- os=-amigaos
+ os=amigaos
;;
*-dg)
- os=-dgux
+ os=dgux
;;
*-dolphin)
- os=-sysv3
+ os=sysv3
;;
m68k-ccur)
- os=-rtu
+ os=rtu
;;
m88k-omron*)
- os=-luna
+ os=luna
;;
*-next)
- os=-nextstep
+ os=nextstep
;;
*-sequent)
- os=-ptx
+ os=ptx
;;
*-crds)
- os=-unos
+ os=unos
;;
*-ns)
- os=-genix
+ os=genix
;;
i370-*)
- os=-mvs
+ os=mvs
;;
*-gould)
- os=-sysv
+ os=sysv
;;
*-highlevel)
- os=-bsd
+ os=bsd
;;
*-encore)
- os=-bsd
+ os=bsd
;;
*-sgi)
- os=-irix
+ os=irix
;;
*-siemens)
- os=-sysv4
+ os=sysv4
;;
*-masscomp)
- os=-rtu
+ os=rtu
;;
f30[01]-fujitsu | f700-fujitsu)
- os=-uxpv
+ os=uxpv
;;
*-rom68k)
- os=-coff
+ os=coff
;;
*-*bug)
- os=-coff
+ os=coff
;;
*-apple)
- os=-macos
+ os=macos
;;
*-atari*)
- os=-mint
+ os=mint
+ ;;
+ *-wrs)
+ os=vxworks
;;
*)
- os=-none
+ os=none
;;
esac
fi
# Here we handle the case where we know the os, and the CPU type, but not the
# manufacturer. We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
- *-unknown)
+case $vendor in
+ unknown)
case $os in
- -riscix*)
+ riscix*)
vendor=acorn
;;
- -sunos*)
+ sunos*)
vendor=sun
;;
- -cnk*|-aix*)
+ cnk*|-aix*)
vendor=ibm
;;
- -beos*)
+ beos*)
vendor=be
;;
- -hpux*)
+ hpux*)
vendor=hp
;;
- -mpeix*)
+ mpeix*)
vendor=hp
;;
- -hiux*)
+ hiux*)
vendor=hitachi
;;
- -unos*)
+ unos*)
vendor=crds
;;
- -dgux*)
+ dgux*)
vendor=dg
;;
- -luna*)
+ luna*)
vendor=omron
;;
- -genix*)
+ genix*)
vendor=ns
;;
- -mvs* | -opened*)
+ clix*)
+ vendor=intergraph
+ ;;
+ mvs* | opened*)
vendor=ibm
;;
- -os400*)
+ os400*)
vendor=ibm
;;
- -ptx*)
+ ptx*)
vendor=sequent
;;
- -tpf*)
+ tpf*)
vendor=ibm
;;
- -vxsim* | -vxworks* | -windiss*)
+ vxsim* | vxworks* | windiss*)
vendor=wrs
;;
- -aux*)
+ aux*)
vendor=apple
;;
- -hms*)
+ hms*)
vendor=hitachi
;;
- -mpw* | -macos*)
+ mpw* | macos*)
vendor=apple
;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+ *mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
vendor=atari
;;
- -vos*)
+ vos*)
vendor=stratus
;;
esac
- basic_machine=`echo "$basic_machine" | sed "s/unknown/$vendor/"`
;;
esac
-echo "$basic_machine$os"
+echo "$cpu-$vendor-$os"
exit
# Local variables:
diff --git a/configure b/configure
index 9fbe564..615f638 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for PCRE2 10.34.
+# Generated by GNU Autoconf 2.69 for PCRE2 10.35.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -587,8 +587,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='PCRE2'
PACKAGE_TARNAME='pcre2'
-PACKAGE_VERSION='10.34'
-PACKAGE_STRING='PCRE2 10.34'
+PACKAGE_VERSION='10.35'
+PACKAGE_STRING='PCRE2 10.35'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1413,7 +1413,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures PCRE2 10.34 to adapt to many kinds of systems.
+\`configure' configures PCRE2 10.35 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1483,7 +1483,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of PCRE2 10.34:";;
+ short | recursive ) echo "Configuration of PCRE2 10.35:";;
esac
cat <<\_ACEOF
@@ -1663,7 +1663,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-PCRE2 configure 10.34
+PCRE2 configure 10.35
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2158,7 +2158,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by PCRE2 $as_me 10.34, which was
+It was created by PCRE2 $as_me 10.35, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -3022,7 +3022,7 @@ fi
# Define the identity of the package.
PACKAGE='pcre2'
- VERSION='10.34'
+ VERSION='10.35'
cat >>confdefs.h <<_ACEOF
@@ -5250,7 +5250,7 @@ esac
-macro_version='2.4.6.42-b88ce'
+macro_version='2.4.6.42-b88ce-dirty'
macro_revision='2.4.6.42'
@@ -12979,12 +12979,56 @@ _ACEOF
+# Check for Clang __attribute__((uninitialized)) feature
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __attribute__((uninitialized))" >&5
+$as_echo_n "checking for __attribute__((uninitialized))... " >&6; }
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+tmp_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+char buf[128] __attribute__((uninitialized));(void)buf
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ pcre2_cc_cv_attribute_uninitialized=yes
+else
+ pcre2_cc_cv_attribute_uninitialized=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_attribute_uninitialized" >&5
+$as_echo "$pcre2_cc_cv_attribute_uninitialized" >&6; }
+if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
+
+$as_echo "#define HAVE_ATTRIBUTE_UNINITIALIZED 1" >>confdefs.h
+
+fi
+CFLAGS=$tmp_CFLAGS
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
# Versioning
PCRE2_MAJOR="10"
-PCRE2_MINOR="34"
+PCRE2_MINOR="35"
PCRE2_PRERELEASE=""
-PCRE2_DATE="2019-11-21"
+PCRE2_DATE="2020-05-09"
if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09"
then
@@ -13112,14 +13156,21 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
echo checking for JIT support on this hardware... $enable_jit
fi
-# Handle --enable-jit-sealloc (disabled by default)
-# Check whether --enable-jit-sealloc was given.
+# Handle --enable-jit-sealloc (disabled by default and only experimental)
+case $host_os in
+ linux* | netbsd*)
+ # Check whether --enable-jit-sealloc was given.
if test "${enable_jit_sealloc+set}" = set; then :
enableval=$enable_jit_sealloc;
else
enable_jit_sealloc=no
fi
+ ;;
+ *)
+ enable_jit_sealloc=unsupported
+ ;;
+esac
# Handle --disable-pcre2grep-jit (enabled by default)
# Check whether --enable-pcre2grep-jit was given.
@@ -13814,7 +13865,7 @@ fi
# Checks for library functions.
-for ac_func in bcopy memmove strerror mkostemp secure_getenv
+for ac_func in bcopy memfd_create memmove mkostemp secure_getenv strerror
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@@ -14973,13 +15024,13 @@ esac
# are m4 variables, assigned above.
EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \
- $NO_UNDEFINED -version-info 9:0:9"
+ $NO_UNDEFINED -version-info 10:0:10"
EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \
- $NO_UNDEFINED -version-info 9:0:9"
+ $NO_UNDEFINED -version-info 10:0:10"
EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \
- $NO_UNDEFINED -version-info 9:0:9"
+ $NO_UNDEFINED -version-info 10:0:10"
EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \
$NO_UNDEFINED -version-info 2:3:0"
@@ -16025,7 +16076,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by PCRE2 $as_me 10.34, which was
+This file was extended by PCRE2 $as_me 10.35, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -16091,7 +16142,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-PCRE2 config.status 10.34
+PCRE2 config.status 10.35
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -17194,7 +17245,9 @@ $as_echo X/"$am_mf" |
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error $? "Something went wrong bootstrapping makefile fragments
- for automatic dependency tracking. Try re-running configure with the
+ for automatic dependency tracking. If GNU make was not used, consider
+ re-running the configure script with MAKE=\"gmake\" (or whatever is
+ necessary). You can also try re-running configure with the
'--disable-dependency-tracking' option to at least be able to build
the package (albeit without support for automatic dependency tracking).
See \`config.log' for more details" "$LINENO" 5; }
@@ -17221,7 +17274,6 @@ See \`config.log' for more details" "$LINENO" 5; }
cat <<_LT_EOF >> "$cfgfile"
#! $SHELL
# Generated automatically by $as_me ($PACKAGE) $VERSION
-# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
# NOTE: Changes made to this file will be lost: look at ltmain.sh.
# Provide generalized library-building support services.
diff --git a/configure.ac b/configure.ac
index 30d4ddd..180d3dc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9,19 +9,19 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [34])
+m4_define(pcre2_minor, [35])
m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2019-11-21])
-
-# NOTE: The CMakeLists.txt file searches for the above variables in the first
-# 50 lines of this file. Please update that if the variables above are moved.
+m4_define(pcre2_date, [2020-05-09])
# Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre2_8_version, [9:0:9])
-m4_define(libpcre2_16_version, [9:0:9])
-m4_define(libpcre2_32_version, [9:0:9])
+m4_define(libpcre2_8_version, [10:0:10])
+m4_define(libpcre2_16_version, [10:0:10])
+m4_define(libpcre2_32_version, [10:0:10])
m4_define(libpcre2_posix_version, [2:3:0])
+# NOTE: The CMakeLists.txt file searches for the above variables in the first
+# 50 lines of this file. Please update that if the variables above are moved.
+
AC_PREREQ(2.57)
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
AC_CONFIG_SRCDIR([src/pcre2.h.in])
@@ -72,6 +72,24 @@ AC_PROG_LN_S
PCRE2_VISIBILITY
+# Check for Clang __attribute__((uninitialized)) feature
+
+AC_MSG_CHECKING([for __attribute__((uninitialized))])
+AC_LANG_PUSH([C])
+tmp_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,
+ [[char buf[128] __attribute__((uninitialized));(void)buf]])],
+ [pcre2_cc_cv_attribute_uninitialized=yes],
+ [pcre2_cc_cv_attribute_uninitialized=no])
+AC_MSG_RESULT([$pcre2_cc_cv_attribute_uninitialized])
+if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then
+ AC_DEFINE([HAVE_ATTRIBUTE_UNINITIALIZED], 1, [Define this if your compiler
+ supports __attribute__((uninitialized))])
+fi
+CFLAGS=$tmp_CFLAGS
+AC_LANG_POP([C])
+
# Versioning
PCRE2_MAJOR="pcre2_major"
@@ -158,11 +176,18 @@ if test "$enable_jit" = "auto"; then
echo checking for JIT support on this hardware... $enable_jit
fi
-# Handle --enable-jit-sealloc (disabled by default)
-AC_ARG_ENABLE(jit-sealloc,
- AS_HELP_STRING([--enable-jit-sealloc],
- [enable SELinux compatible execmem allocator in JIT (experimental)]),
- , enable_jit_sealloc=no)
+# Handle --enable-jit-sealloc (disabled by default and only experimental)
+case $host_os in
+ linux* | netbsd*)
+ AC_ARG_ENABLE(jit-sealloc,
+ AS_HELP_STRING([--enable-jit-sealloc],
+ [enable SELinux compatible execmem allocator in JIT (experimental)]),
+ ,enable_jit_sealloc=no)
+ ;;
+ *)
+ enable_jit_sealloc=unsupported
+ ;;
+esac
# Handle --disable-pcre2grep-jit (enabled by default)
AC_ARG_ENABLE(pcre2grep-jit,
@@ -489,7 +514,7 @@ AC_TYPE_SIZE_T
# Checks for library functions.
-AC_CHECK_FUNCS(bcopy memmove strerror mkostemp secure_getenv)
+AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
# Check for the availability of libz (aka zlib)
diff --git a/depcomp b/depcomp
index 65cbf70..6b39162 100755
--- a/depcomp
+++ b/depcomp
@@ -3,7 +3,7 @@
scriptversion=2018-03-07.03; # UTC
-# Copyright (C) 1999-2018 Free Software Foundation, Inc.
+# Copyright (C) 1999-2020 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt
index 39e7620..a73c058 100644
--- a/doc/html/NON-AUTOTOOLS-BUILD.txt
+++ b/doc/html/NON-AUTOTOOLS-BUILD.txt
@@ -74,14 +74,14 @@ can skip ahead to the CMake section.
src/pcre2_chartables.c.
OR:
- Compile src/dftables.c as a stand-alone program (using -DHAVE_CONFIG_H
- if you have set up src/config.h), and then run it with the single
- argument "src/pcre2_chartables.c". This generates a set of standard
- character tables and writes them to that file. The tables are generated
- using the default C locale for your system. If you want to use a locale
- that is specified by LC_xxx environment variables, add the -L option to
- the dftables command. You must use this method if you are building on a
- system that uses EBCDIC code.
+ Compile src/pcre2_dftables.c as a stand-alone program (using
+ -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with
+ the single argument "src/pcre2_chartables.c". This generates a set of
+ standard character tables and writes them to that file. The tables are
+ generated using the default C locale for your system. If you want to use
+ a locale that is specified by LC_xxx environment variables, add the -L
+ option to the pcre2_dftables command. You must use this method if you
+ are building on a system that uses EBCDIC code.
The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can
specify alternative tables at run time.
diff --git a/doc/html/README.txt b/doc/html/README.txt
index 8ce6f96..241376f 100644
--- a/doc/html/README.txt
+++ b/doc/html/README.txt
@@ -269,9 +269,9 @@ library. They are also documented in the pcre2build man page.
--enable-rebuild-chartables
- a program called dftables is compiled and run in the default C locale when
- you obey "make". It builds a source file called pcre2_chartables.c. If you do
- not specify this option, pcre2_chartables.c is created as a copy of
+ a program called pcre2_dftables is compiled and run in the default C locale
+ when you obey "make". It builds a source file called pcre2_chartables.c. If
+ you do not specify this option, pcre2_chartables.c is created as a copy of
pcre2_chartables.c.dist. See "Character tables" below for further
information.
@@ -548,11 +548,11 @@ Cross-compiling using autotools
You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE2 for some other host. However, you should NOT
-specify --enable-rebuild-chartables, because if you do, the dftables.c source
-file is compiled and run on the local host, in order to generate the inbuilt
-character tables (the pcre2_chartables.c file). This will probably not work,
-because dftables.c needs to be compiled with the local compiler, not the cross
-compiler.
+specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c
+source file is compiled and run on the local host, in order to generate the
+inbuilt character tables (the pcre2_chartables.c file). This will probably not
+work, because pcre2_dftables.c needs to be compiled with the local compiler,
+not the cross compiler.
When --enable-rebuild-chartables is not specified, pcre2_chartables.c is
created by making a copy of pcre2_chartables.c.dist, which is a default set of
@@ -560,9 +560,10 @@ tables that assumes ASCII code. Cross-compiling with the default tables should
not be a problem.
If you need to modify the character tables when cross-compiling, you should
-move pcre2_chartables.c.dist out of the way, then compile dftables.c by hand
-and run it on the local host to make a new version of pcre2_chartables.c.dist.
-Then when you cross-compile PCRE2 this new version of the tables will be used.
+move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
+hand and run it on the local host to make a new version of
+pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
+at build time" for more details.
Making new tarballs
@@ -721,8 +722,8 @@ compile context.
The source file called pcre2_chartables.c contains the default set of tables.
By default, this is created as a copy of pcre2_chartables.c.dist, which
contains tables for ASCII coding. However, if --enable-rebuild-chartables is
-specified for ./configure, a different version of pcre2_chartables.c is built
-by the program dftables (compiled from dftables.c), which uses the ANSI C
+specified for ./configure, a new version of pcre2_chartables.c is built by the
+program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C
character handling functions such as isalnum(), isalpha(), isupper(),
islower(), etc. to build the table sources. This means that the default C
locale that is set for your system will control the contents of these default
@@ -732,32 +733,31 @@ file does not get automatically re-generated. The best way to do this is to
move pcre2_chartables.c.dist out of the way and replace it with your customized
tables.
-When the dftables program is run as a result of --enable-rebuild-chartables,
-it uses the default C locale that is set on your system. It does not pay
-attention to the LC_xxx environment variables. In other words, it uses the
-system's default locale rather than whatever the compiling user happens to have
-set. If you really do want to build a source set of character tables in a
-locale that is specified by the LC_xxx variables, you can run the dftables
-program by hand with the -L option. For example:
+When the pcre2_dftables program is run as a result of specifying
+--enable-rebuild-chartables, it uses the default C locale that is set on your
+system. It does not pay attention to the LC_xxx environment variables. In other
+words, it uses the system's default locale rather than whatever the compiling
+user happens to have set. If you really do want to build a source set of
+character tables in a locale that is specified by the LC_xxx variables, you can
+run the pcre2_dftables program by hand with the -L option. For example:
- ./dftables -L pcre2_chartables.c.special
+ ./pcre2_dftables -L pcre2_chartables.c.special
-The first two 256-byte tables provide lower casing and case flipping functions,
-respectively. The next table consists of three 32-byte bit maps which identify
-digits, "word" characters, and white space, respectively. These are used when
-building 32-byte bit maps that represent character classes for code points less
-than 256. The final 256-byte table has bits indicating various character types,
-as follows:
+The second argument names the file where the source code for the tables is
+written. The first two 256-byte tables provide lower casing and case flipping
+functions, respectively. The next table consists of a number of 32-byte bit
+maps which identify certain character classes such as digits, "word"
+characters, white space, etc. These are used when building 32-byte bit maps
+that represent character classes for code points less than 256. The final
+256-byte table has bits indicating various character types, as follows:
1 white space character
2 letter
- 4 decimal digit
- 8 hexadecimal digit
+ 4 lower case letter
+ 8 decimal digit
16 alphanumeric or '_'
- 128 regular expression metacharacter or binary zero
-You should not alter the set of characters that contain the 128 bit, as that
-will cause PCRE2 to malfunction.
+See also the pcre2build section "Creating character tables at build time".
File manifest
@@ -768,7 +768,7 @@ The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
- src/dftables.c auxiliary program for building pcre2_chartables.c
+ src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c
when --enable-rebuild-chartables is specified
src/pcre2_chartables.c.dist a default set of character tables that assume
@@ -894,4 +894,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 16 April 2019
+Last updated: 20 March 2020
diff --git a/doc/html/pcre2_jit_free_unused_memory.html b/doc/html/pcre2_jit_free_unused_memory.html
index 8b59b8e..7f37e58 100644
--- a/doc/html/pcre2_jit_free_unused_memory.html
+++ b/doc/html/pcre2_jit_free_unused_memory.html
@@ -29,7 +29,7 @@ This function frees unused JIT executable memory. The argument is a general
context, for custom memory management, or NULL for standard memory management.
JIT memory allocation retains some memory in order to improve future JIT
compilation speed. In low memory conditions,
-\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
+<b>pcre2_jit_free_unused_memory()</b> can be used to cause this memory to be
freed.
</P>
<P>
diff --git a/doc/html/pcre2_jit_match.html b/doc/html/pcre2_jit_match.html
index 1d59667..8629e4a 100644
--- a/doc/html/pcre2_jit_match.html
+++ b/doc/html/pcre2_jit_match.html
@@ -33,7 +33,9 @@ processed by the JIT compiler against a given subject string, using a matching
algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
Its arguments are exactly the same as for
-<a href="pcre2_match.html"><b>pcre2_match()</b>.</a>
+<a href="pcre2_match.html"><b>pcre2_match()</b>,</a>
+except that the subject string must be specified with a length;
+PCRE2_ZERO_TERMINATED is not supported.
</P>
<P>
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
diff --git a/doc/html/pcre2_set_character_tables.html b/doc/html/pcre2_set_character_tables.html
index 43c02ff..8564eea 100644
--- a/doc/html/pcre2_set_character_tables.html
+++ b/doc/html/pcre2_set_character_tables.html
@@ -27,9 +27,12 @@ DESCRIPTION
</b><br>
<P>
This function sets a pointer to custom character tables within a compile
-context. The second argument must be the result of a call to
-<b>pcre2_maketables()</b> or NULL to request the default tables. The result is
-always zero.
+context. The second argument must point to a set of PCRE2 character tables or
+be NULL to request the default tables. The result is always zero. Character
+tables can be created by calling <b>pcre2_maketables()</b> or by running the
+<b>pcre2_dftables</b> maintenance command in binary mode (see the
+<a href="pcre2build.html"><b>pcre2build</b></a>
+documentation).
</P>
<P>
There is a complete description of the PCRE2 native API in the
diff --git a/doc/html/pcre2_substitute.html b/doc/html/pcre2_substitute.html
index 2215ce9..10b2267 100644
--- a/doc/html/pcre2_substitute.html
+++ b/doc/html/pcre2_substitute.html
@@ -48,8 +48,8 @@ Its arguments are:
<i>outlengthptr</i> Points to the length of the output buffer
</pre>
A match data block is needed only if you want to inspect the data from the
-match that is returned in that block. A match context is needed only if you
-want to:
+final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is
+set. A match context is needed only if you want to:
<pre>
Set up a callout function
Set a matching offset limit
@@ -57,9 +57,14 @@ want to:
Change the backtracking depth limit
Set custom memory management in the match context
</pre>
-The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code
-units, not characters, as is the contents of the variable pointed at by
-<i>outlengthptr</i>, which is updated to the actual length of the new string.
+The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code units,
+not characters, as is the contents of the variable pointed at by
+<i>outlengthptr</i>. This variable must contain the length of the output buffer
+when the function is called. If the function is successful, the value is
+changed to the length of the new string, excluding the trailing zero that is
+automatically added.
+</P>
+<P>
The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
zero-terminated strings. The options are:
<pre>
@@ -74,12 +79,24 @@ zero-terminated strings. The options are:
PCRE2_UTF was set at compile time)
PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
+ PCRE2_SUBSTITUTE_LITERAL The replacement string is literal
+ PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for 1st match
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length
+ PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s)
PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset
PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string
</pre>
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
+PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
+</P>
+<P>
+If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-zero; its
+contents must be the result of a call to <b>pcre2_match()</b> using the same
+pattern and subject.
+</P>
+<P>
The function returns the number of substitutions, which may be zero if there
-were no matches. The result can be greater than one only when
+are no matches. The result may be greater than one only when
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
is returned.
</P>
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
index 82aabee..36c2e3d 100644
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@@ -252,7 +252,7 @@ document for an overview of all the PCRE2 documentation.
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
-<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR \fIreplacementzfP,</b>
+<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacementz</i>,</b>
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
</P>
@@ -1105,10 +1105,11 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
-The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to
-discover which optional features have been compiled into the PCRE2 library. The
+The function <b>pcre2_config()</b> makes it possible for a PCRE2 client to find
+the value of certain configuration parameters and to discover which optional
+features have been compiled into the PCRE2 library. The
<a href="pcre2build.html"><b>pcre2build</b></a>
-documentation has more details about these optional features.
+documentation has more details about these features.
</P>
<P>
The first argument for <b>pcre2_config()</b> specifies which information is
@@ -1225,6 +1226,13 @@ over compilation stack usage, see <b>pcre2_set_compile_recursion_guard()</b>.
This parameter is obsolete and should not be used in new code. The output is a
uint32_t integer that is always set to zero.
<pre>
+ PCRE2_CONFIG_TABLES_LENGTH
+</pre>
+The output is a uint32_t integer that gives the length of PCRE2's character
+processing tables in bytes. For details of these tables see the
+<a href="#localesupport">section on locale support</a>
+below.
+<pre>
PCRE2_CONFIG_UNICODE_VERSION
</pre>
The <i>where</i> argument should point to a buffer that is at least 24 code
@@ -1481,13 +1489,13 @@ documentation.
</pre>
If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be
-changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode
-properties are used for all characters with more than one other case, and for
-all characters whose code points are greater than U+007F. For lower valued
-characters with only one other case, a lookup table is used for speed. When
-PCRE2_UTF is not set, a lookup table is used for all code points less than 256,
-and higher code points (available only in 16-bit or 32-bit mode) are treated as
-not having another case.
+changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
+PCRE2_UCP is set, Unicode properties are used for all characters with more than
+one other case, and for all characters whose code points are greater than
+U+007F. For lower valued characters with only one other case, a lookup table is
+used for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is
+used for all code points less than 256, and higher code points (available only
+in 16-bit or 32-bit mode) are treated as not having another case.
<pre>
PCRE2_DOLLAR_ENDONLY
</pre>
@@ -1820,16 +1828,23 @@ are not representable in UTF-16.
<pre>
PCRE2_UCP
</pre>
-This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
-\w, and some of the POSIX character classes. By default, only ASCII characters
-are recognized, but if PCRE2_UCP is set, Unicode properties are used instead to
-classify characters. More details are given in the section on
+This option has two effects. Firstly, it change the way PCRE2 processes \B,
+\b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes. By
+default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode
+properties are used instead to classify characters. More details are given in
+the section on
<a href="pcre2pattern.html#genericchartypes">generic character types</a>
in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
page. If you set PCRE2_UCP, matching one of the items it affects takes much
-longer. The option is available only if PCRE2 has been compiled with Unicode
-support (which is the default).
+longer.
+</P>
+<P>
+The second effect of PCRE2_UCP is to force the use of Unicode properties for
+upper/lower casing operations on characters with code points greater than 127,
+even when PCRE2_UTF is not set. This makes it possible, for example, to process
+strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has
+been compiled with Unicode support (which is the default).
<pre>
PCRE2_UNGREEDY
</pre>
@@ -1997,14 +2012,20 @@ PCRE2 handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character code
point. However, this applies only to characters whose code points are less than
256. By default, higher-valued code points never match escapes such as \w or
-\d. When PCRE2 is built with Unicode support (the default), all characters can
-be tested with \p and \P, or, alternatively, the PCRE2_UCP option can be set
-when a pattern is compiled; this causes \w and friends to use Unicode property
-support instead of the built-in tables.
+\d.
+</P>
+<P>
+When PCRE2 is built with Unicode support (the default), the Unicode properties
+of all characters can be tested with \p and \P, or, alternatively, the
+PCRE2_UCP option can be set when a pattern is compiled; this causes \w and
+friends to use Unicode property support instead of the built-in tables.
+PCRE2_UCP also causes upper/lower casing operations on characters with code
+points greater than 127 to use Unicode properties. These effects apply even
+when PCRE2_UTF is not set.
</P>
<P>
The use of locales with Unicode is discouraged. If you are handling characters
-with code points greater than 128, you should either use Unicode support, or
+with code points greater than 127, you should either use Unicode support, or
use locales, but not try to mix the two.
</P>
<P>
@@ -2030,7 +2051,7 @@ calling <b>pcre2_set_character_tables()</b> to set the tables pointer therein.
</P>
<P>
For example, to build and use tables that are appropriate for the French locale
-(where accented characters with values greater than 128 are treated as
+(where accented characters with values greater than 127 are treated as
letters), the following code could be used:
<pre>
setlocale(LC_CTYPE, "fr_FR");
@@ -2044,10 +2065,10 @@ are using Windows, the name for the French locale is "french".
</P>
<P>
The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
-is saved with the compiled pattern, and the same tables are used by
-<b>pcre2_match()</b> and <b>pcre_dfa_match()</b>. Thus, for any single pattern,
-compilation and matching both happen in the same locale, but different patterns
-can be processed in different locales.
+is saved with the compiled pattern, and the same tables are used by the
+matching functions. Thus, for any single pattern, compilation and matching both
+happen in the same locale, but different patterns can be processed in different
+locales.
</P>
<P>
It is the caller's responsibility to ensure that the memory containing the
@@ -2055,6 +2076,23 @@ tables remains available while they are still in use. When they are no longer
needed, you can discard them using <b>pcre2_maketables_free()</b>, which should
pass as its first parameter the same global context that was used to create the
tables.
+</P>
+<br><b>
+Saving locale tables
+</b><br>
+<P>
+The tables described above are just a sequence of binary bytes, which makes
+them independent of hardware characteristics such as endianness or whether the
+processor is 32-bit or 64-bit. A copy of the result of <b>pcre2_maketables()</b>
+can therefore be saved in a file or elsewhere and re-used later, even in a
+different program or on another computer. The size of the tables (number of
+bytes) must be obtained by calling <b>pcre2_config()</b> with the
+PCRE2_CONFIG_TABLES_LENGTH option because <b>pcre2_maketables()</b> does not
+return this value. Note that the <b>pcre2_dftables</b> program, which is part of
+the PCRE2 build system, can be used stand-alone to create a file that contains
+a set of binary tables. See the
+<a href="pcre2build.html#createtables"><b>pcre2build</b></a>
+documentation for details.
<a name="infoaboutpattern"></a></P>
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
<P>
@@ -2063,7 +2101,7 @@ tables.
<P>
The <b>pcre2_pattern_info()</b> function returns general information about a
compiled pattern. For information about callouts, see the
-<a href="pcre2pattern.html#infoaboutcallouts">next section.</a>
+<a href="#infoaboutcallouts">next section.</a>
The first argument for <b>pcre2_pattern_info()</b> is a pointer to the compiled
pattern. The second argument specifies which piece of information is required,
and the third argument is a pointer to a variable to receive the data. If the
@@ -3302,12 +3340,20 @@ same number causes an error at compile time.
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
</P>
<P>
-This function calls <b>pcre2_match()</b> and then makes a copy of the subject
-string in <i>outputbuffer</i>, replacing one or more parts that were matched
-with the <i>replacement</i> string, whose length is supplied in <b>rlength</b>.
-This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
-The default is to perform just one replacement, but there is an option that
-requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
+This function optionally calls <b>pcre2_match()</b> and then makes a copy of the
+subject string in <i>outputbuffer</i>, replacing parts that were matched with
+the <i>replacement</i> string, whose length is supplied in <b>rlength</b>. This
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
+option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
+replacement string(s). The default action is to perform just one replacement if
+the pattern matches, but there is an option that requests multiple replacements
+(see PCRE2_SUBSTITUTE_GLOBAL below).
+</P>
+<P>
+If successful, <b>pcre2_substitute()</b> returns the number of substitutions
+that were carried out. This may be zero if no match was found, and is never
+greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is
+returned if an error is detected.
</P>
<P>
Matches in which a \K item in a lookahead in the pattern causes the match to
@@ -3325,35 +3371,86 @@ functions from the match context, if provided, or else those that were used to
allocate memory for the compiled code.
</P>
<P>
-If an external <i>match_data</i> block is provided, its contents afterwards
-are those set by the final call to <b>pcre2_match()</b>. For global changes,
-this will have ended in a matching error. The contents of the ovector within
-the match data block may or may not have been changed.
+If <i>match_data</i> is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the
+provided block is used for all calls to <b>pcre2_match()</b>, and its contents
+afterwards are the result of the final call. For global changes, this will
+always be a no-match error. The contents of the ovector within the match data
+block may or may not have been changed.
+</P>
+<P>
+As well as the usual options for <b>pcre2_match()</b>, a number of additional
+options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
+One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
+<i>match_data</i> block must be provided, and it must have been used for an
+external call to <b>pcre2_match()</b>. The data in the <i>match_data</i> block
+(return code, offset vector) is used for the first substitution instead of
+calling <b>pcre2_match()</b> from within <b>pcre2_substitute()</b>. This allows
+an application to check for a match before choosing to substitute, without
+having to repeat the match.
+</P>
+<P>
+The contents of the externally supplied match data block are not changed when
+PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
+<b>pcre2_match()</b> is called after the first substitution to check for further
+matches, but this is done using an internally obtained match data block, thus
+always leaving the external block unchanged.
+</P>
+<P>
+The <i>code</i> argument is not used for matching before the first substitution
+when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when
+PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the
+UTF setting and the number of capturing parentheses in the pattern.
</P>
<P>
-The <i>outlengthptr</i> argument must point to a variable that contains the
-length, in code units, of the output buffer. If the function is successful, the
-value is updated to contain the length of the new string, excluding the
-trailing zero that is automatically added.
+The default action of <b>pcre2_substitute()</b> is to return a copy of the
+subject string with matched substrings replaced. However, if
+PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are
+returned. In the global case, multiple replacements are concatenated in the
+output buffer. Substitution callouts (see
+<a href="#subcallouts">below)</a>
+can be used to separate them if necessary.
+</P>
+<P>
+The <i>outlengthptr</i> argument of <b>pcre2_substitute()</b> must point to a
+variable that contains the length, in code units, of the output buffer. If the
+function is successful, the value is updated to contain the length in code
+units of the new string, excluding the trailing zero that is automatically
+added.
</P>
<P>
If the function is not successful, the value set via <i>outlengthptr</i> depends
on the type of error. For syntax errors in the replacement string, the value is
the offset in the replacement string where the error was detected. For other
errors, the value is PCRE2_UNSET by default. This includes the case of the
-output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set
-(see below), in which case the value is the minimum length needed, including
-space for the trailing zero. Note that in order to compute the required length,
-<b>pcre2_substitute()</b> has to simulate all the matching and copying, instead
-of giving an error return as soon as the buffer overflows. Note also that the
-length is in code units, not bytes.
+output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set.
</P>
<P>
-In the replacement string, which is interpreted as a UTF string in UTF mode,
-and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
-dollar character is an escape character that can specify the insertion of
-characters from capture groups or names from (*MARK) or other control verbs
-in the pattern. The following forms are always recognized:
+PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
+too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
+this option is set, however, <b>pcre2_substitute()</b> continues to go through
+the motions of matching and substituting (without, of course, writing anything)
+in order to compute the size of buffer that is needed. This value is passed
+back via the <i>outlengthptr</i> variable, with the result of the function still
+being PCRE2_ERROR_NOMEMORY.
+</P>
+<P>
+Passing a buffer size of zero is a permitted way of finding out how much memory
+is needed for given substitution. However, this does mean that the entire
+operation is carried out twice. Depending on the application, it may be more
+efficient to allocate a large buffer and free the excess afterwards, instead of
+using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
+</P>
+<P>
+The replacement string, which is interpreted as a UTF string in UTF mode, is
+checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF
+replacement string causes an immediate return with the relevant UTF error code.
+</P>
+<P>
+If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
+in any way. By default, however, a dollar character is an escape character that
+can specify the insertion of characters from capture groups and names from
+(*MARK) or other control verbs in the pattern. The following forms are always
+recognized:
<pre>
$$ insert a dollar character
$&#60;n&#62; or ${&#60;n&#62;} insert the contents of group &#60;n&#62;
@@ -3377,10 +3474,6 @@ facility can be used to perform simple simultaneous substitutions, as this
apple lemon
2: pear orange
</pre>
-As well as the usual options for <b>pcre2_match()</b>, a number of additional
-options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
-</P>
-<P>
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string,
replacing every matching substring. If this option is not set, only the first
matching substring is replaced. The search for matches takes place in the
@@ -3392,7 +3485,7 @@ set in the match context, searching stops when that limit is reached.
<P>
You can restrict the effect of a global substitution to a portion of the
subject string by setting either or both of <i>startoffset</i> and an offset
-limit. Here is a \fPpcre2test\fP example:
+limit. Here is a <b>pcre2test</b> example:
<pre>
/B/g,replace=!,use_offset_limit
ABC ABC ABC ABC\=offset=3,offset_limit=12
@@ -3405,22 +3498,6 @@ CRLF is a valid newline sequence and the next two characters are CR, LF. In
this case, the offset is advanced by two characters.
</P>
<P>
-PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
-too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
-this option is set, however, <b>pcre2_substitute()</b> continues to go through
-the motions of matching and substituting (without, of course, writing anything)
-in order to compute the size of buffer that is needed. This value is passed
-back via the <i>outlengthptr</i> variable, with the result of the function still
-being PCRE2_ERROR_NOMEMORY.
-</P>
-<P>
-Passing a buffer size of zero is a permitted way of finding out how much memory
-is needed for given substitution. However, this does mean that the entire
-operation is carried out twice. Depending on the application, it may be more
-efficient to allocate a large buffer and free the excess afterwards, instead of
-using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
-</P>
-<P>
PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do
not appear in the pattern to be treated as unset groups. This option should be
used with care, because it means that a typo in a group name or number no
@@ -3455,7 +3532,10 @@ terminating a \Q quoted sequence) reverts to no case forcing. The sequences
\u and \l force the next character (if it is a letter) to upper or lower
case, respectively, and then the state automatically reverts to no case
forcing. Case forcing applies to all inserted characters, including those from
-capture groups and letters within \Q...\E quoted sequences.
+capture groups and letters within \Q...\E quoted sequences. If either
+PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
+properties are used for case forcing characters whose code points are greater
+than 127.
</P>
<P>
Note that case forcing sequences such as \U...\E do not nest. For example,
@@ -3494,14 +3574,17 @@ substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
groups in the extended syntax forms to be treated as unset.
</P>
<P>
-If successful, <b>pcre2_substitute()</b> returns the number of successful
-matches. This may be zero if no matches were found, and is never greater than 1
-unless PCRE2_SUBSTITUTE_GLOBAL is set.
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
+PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and
+are ignored.
</P>
+<br><b>
+Substitution errors
+</b><br>
<P>
-In the event of an error, a negative error code is returned. Except for
-PCRE2_ERROR_NOMATCH (which is never returned), errors from <b>pcre2_match()</b>
-are passed straight back.
+In the event of an error, <b>pcre2_substitute()</b> returns a negative error
+code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from
+<b>pcre2_match()</b> are passed straight back.
</P>
<P>
PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion,
@@ -3519,6 +3602,10 @@ needed is returned via <i>outlengthptr</i>. Note that this does not happen by
default.
</P>
<P>
+PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
+<i>match_data</i> argument is NULL.
+</P>
+<P>
PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
@@ -3532,7 +3619,7 @@ As for all PCRE2 errors, a text message that describes the error can be
obtained by calling the <b>pcre2_get_error_message()</b> function (see
"Obtaining a textual error message"
<a href="#geterrormessage">above).</a>
-</P>
+<a name="subcallouts"></a></P>
<br><b>
Substitution callouts
</b><br>
@@ -3869,9 +3956,9 @@ Cambridge, England.
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 02 September 2019
+Last updated: 19 March 2020
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2020 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2build.html b/doc/html/pcre2build.html
index 13d9da2..a206b23 100644
--- a/doc/html/pcre2build.html
+++ b/doc/html/pcre2build.html
@@ -128,7 +128,7 @@ To build it without Unicode support, add
--disable-unicode
</pre>
to the <b>configure</b> command. This setting applies to all three libraries. It
-is not possible to build one library with Unicode support, and another without,
+is not possible to build one library with Unicode support and another without
in the same configuration.
</P>
<P>
@@ -188,11 +188,11 @@ which enables the use of an execmem allocator in JIT that is compatible with
SELinux. This has no effect if JIT is not enabled. See the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
documentation for a discussion of JIT usage. When JIT support is enabled,
-pcre2grep automatically makes use of it, unless you add
+<b>pcre2grep</b> automatically makes use of it, unless you add
<pre>
--disable-pcre2grep-jit
</pre>
-to the "configure" command.
+to the <b>configure</b> command.
</P>
<br><a name="SEC8" href="#TOC1">NEWLINE RECOGNITION</a><br>
<P>
@@ -321,7 +321,7 @@ As well as applying to <b>pcre2_match()</b>, the depth limit also controls
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
used for lookaround assertions, atomic groups, and recursion within patterns.
The limit does not apply to JIT matching.
-</P>
+<a name="createtables"></a></P>
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<P>
PCRE2 uses fixed tables for processing characters whose code points are less
@@ -332,12 +332,34 @@ only. If you add
--enable-rebuild-chartables
</pre>
to the <b>configure</b> command, the distributed tables are no longer used.
-Instead, a program called <b>dftables</b> is compiled and run. This outputs the
-source for new set of tables, created in the default locale of your C run-time
-system. This method of replacing the tables does not work if you are cross
-compiling, because <b>dftables</b> is run on the local host. If you need to
-create alternative tables when cross compiling, you will have to do so "by
-hand".
+Instead, a program called <b>pcre2_dftables</b> is compiled and run. This
+outputs the source for new set of tables, created in the default locale of your
+C run-time system. This method of replacing the tables does not work if you are
+cross compiling, because <b>pcre2_dftables</b> needs to be run on the local
+host and therefore not compiled with the cross compiler.
+</P>
+<P>
+If you need to create alternative tables when cross compiling, you will have to
+do so "by hand". There may also be other reasons for creating tables manually.
+To cause <b>pcre2_dftables</b> to be built on the local host, run a normal
+compiling command, and then run the program with the output file as its
+argument, for example:
+<pre>
+ cc src/pcre2_dftables.c -o pcre2_dftables
+ ./pcre2_dftables src/pcre2_chartables.c
+</pre>
+This builds the tables in the default locale of the local host. If you want to
+specify a locale, you must use the -L option:
+<pre>
+ LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+</pre>
+You can also specify -b (with or without -L). This causes the tables to be
+written in binary instead of as source code. A set of binary tables can be
+loaded into memory by an application and passed to <b>pcre2_compile()</b> in the
+same way as tables created by calling <b>pcre2_maketables()</b>. The tables are
+just a string of bytes, independent of hardware characteristics such as
+endianness. This means they can be bundled with an application that runs in
+different environments, to ensure consistent behaviour.
</P>
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
<P>
@@ -414,7 +436,7 @@ default parameter values by adding, for example,
--with-pcre2grep-bufsize=51200
--with-pcre2grep-max-bufsize=2097152
</pre>
-to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override
+to the <b>configure</b> command. The caller of <b>pcre2grep</b> can override
these values by using --buffer-size and --max-buffer-size on the command line.
</P>
<br><a name="SEC17" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
@@ -538,7 +560,7 @@ support these modifiers. If
<pre>
--disable-percent-zt
</pre>
-is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
+is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
%lu is used, with a cast for size_t values.
</P>
<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
@@ -592,9 +614,9 @@ Cambridge, England.
</P>
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 03 March 2019
+Last updated: 20 March 2020
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2020 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
index f5b72f3..0b2f241 100644
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@@ -148,7 +148,7 @@ ignored.
By default, a file that contains a binary zero byte within the first 1024 bytes
is identified as a binary file, and is processed specially. (GNU grep
identifies binary files in this manner.) However, if the newline type is
-specified as "nul", that is, the line terminator is a binary zero, the test for
+specified as NUL, that is, the line terminator is a binary zero, the test for
a binary file is not applied. See the <b>--binary-files</b> option for a means
of changing the way binary files are handled.
</P>
@@ -562,7 +562,7 @@ when the PCRE2 library is compiled; if they are not specified, the defaults
are very large and so effectively unlimited.
</P>
<P>
-\fB--max-buffer-size=<i>number</i>
+<b>--max-buffer-size</b>=<i>number</i>
This limits the expansion of the processing buffer, whose initial size can be
set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
smaller than the starting buffer size.
@@ -597,29 +597,36 @@ well as possibly handling a two-character newline sequence.
There is a limit to the number of lines that can be matched, imposed by the way
that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
large processing buffer, this should not be a problem, but the <b>-M</b> option
-does not work when input is read line by line (see \fP--line-buffered\fP.)
+does not work when input is read line by line (see <b>--line-buffered</b>.)
</P>
<P>
<b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
-The PCRE2 library supports five different conventions for indicating
-the ends of lines. They are the single-character sequences CR (carriage return)
-and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
-which recognizes any of the preceding three types, and an "any" convention, in
-which any Unicode line ending sequence is assumed to end a line. The Unicode
-sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
-(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
-PS (paragraph separator, U+2029).
+Six different conventions for indicating the ends of lines in scanned files are
+supported. For example:
+<pre>
+ pcre2grep -N CRLF 'some pattern' &#60;file&#62;
+</pre>
+The newline type may be specified in upper, lower, or mixed case. If the
+newline type is NUL, lines are separated by binary zero characters. The other
+types are the single-character sequences CR (carriage return) and LF
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
+recognizes any of the preceding three types, and an "any" type, for which any
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
+(paragraph separator, U+2029).
<br>
<br>
When the PCRE2 library is built, a default line-ending sequence is specified.
This is normally the standard sequence for the operating system. Unless
otherwise specified by this option, <b>pcre2grep</b> uses the library's default.
-The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
-makes it possible to use <b>pcre2grep</b> to scan files that have come from
-other environments without having to modify their line endings. If the data
-that is being scanned does not agree with the convention set by this option,
-<b>pcre2grep</b> may behave in strange ways. Note that this option does not
-apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
+<br>
+<br>
+This option makes it possible to use <b>pcre2grep</b> to scan files that have
+come from other environments without having to modify their line endings. If
+the data that is being scanned does not agree with the convention set by this
+option, <b>pcre2grep</b> may behave in strange ways. Note that this option does
+not apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
<b>--include-from</b> options, which are expected to use the operating system's
standard newline sequence.
</P>
@@ -642,10 +649,12 @@ It should never be needed in normal use.
<P>
<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
When there is a match, instead of outputting the whole line that matched,
-output just the given text. This option is mutually exclusive with
-<b>--only-matching</b>, <b>--file-offsets</b>, and <b>--line-offsets</b>. Escape
-sequences starting with a dollar character may be used to insert the contents
-of the matched part of the line and/or captured substrings into the text.
+output just the given text, followed by an operating-system standard newline.
+The <b>--newline</b> option has no effect on this option, which is mutually
+exclusive with <b>--only-matching</b>, <b>--file-offsets</b>, and
+<b>--line-offsets</b>. Escape sequences starting with a dollar character may be
+used to insert the contents of the matched part of the line and/or captured
+substrings into the text.
<br>
<br>
$&#60;digits&#62; or ${&#60;digits&#62;} is replaced by the captured
@@ -807,16 +816,27 @@ by the <b>--locale</b> option. If no locale is set, the PCRE2 library's default
<br><a name="SEC8" href="#TOC1">NEWLINES</a><br>
<P>
The <b>-N</b> (<b>--newline</b>) option allows <b>pcre2grep</b> to scan files with
-different newline conventions from the default. Any parts of the input files
-that are written to the standard output are copied identically, with whatever
-newline sequences they have in the input. However, the setting of this option
-affects only the way scanned files are processed. It does not affect the
-interpretation of files specified by the <b>-f</b>, <b>--file-list</b>,
-<b>--exclude-from</b>, or <b>--include-from</b> options, nor does it affect the
-way in which <b>pcre2grep</b> writes informational messages to the standard
-error and output streams. For these it uses the string "\n" to indicate
-newlines, relying on the C I/O library to convert this to an appropriate
-sequence.
+newline conventions that differ from the default. This option affects only the
+way scanned files are processed. It does not affect the interpretation of files
+specified by the <b>-f</b>, <b>--file-list</b>, <b>--exclude-from</b>, or
+<b>--include-from</b> options.
+</P>
+<P>
+Any parts of the scanned input files that are written to the standard output
+are copied with whatever newline sequences they have in the input. However, if
+the final line of a file is output, and it does not end with a newline
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
+single NL is used.
+</P>
+<P>
+The newline setting does not affect the way in which <b>pcre2grep</b> writes
+newlines in informational messages to the standard output and error streams.
+Under Windows, the standard output is set to be binary, so that "\r\n" at the
+ends of output lines that are copied from the input is not converted to
+"\r\r\n" by the C I/O library. This means that any messages written to the
+standard output must end with "\r\n". For all other operating systems, and
+for all messages to the standard error stream, "\n" is used.
</P>
<br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
<P>
@@ -992,9 +1012,9 @@ Cambridge, England.
</P>
<br><a name="SEC16" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 15 June 2019
+Last updated: 25 January 2020
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2020 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2jit.html b/doc/html/pcre2jit.html
index 47b588e..423dfd8 100644
--- a/doc/html/pcre2jit.html
+++ b/doc/html/pcre2jit.html
@@ -90,7 +90,7 @@ or a negative error code.
There is a limit to the size of pattern that JIT supports, imposed by the size
of machine stack that it uses. The exact rules are not documented because they
may change at any time, in particular, when new optimizations are introduced.
-If a pattern is too big, a call to \fBpcre2_jit_compile()\fB returns
+If a pattern is too big, a call to <b>pcre2_jit_compile()</b> returns
PCRE2_ERROR_NOMEMORY.
</P>
<P>
@@ -339,12 +339,12 @@ stack through the JIT callback function.
You can free a JIT stack at any time, as long as it will not be used by
<b>pcre2_match()</b> again. When you assign the stack to a match context, only a
pointer is set. There is no reference counting or any other magic. You can free
-compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
-not\fP call <b>pcre2_match()</b> with a match context pointing to an already
-freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
-used by <b>pcre2_match()</b> in another thread). You can also replace the stack
-in a context at any time when it is not in use. You should free the previous
-stack before assigning a replacement.
+compiled patterns, contexts, and stacks in any order, anytime.
+Just <i>do not</i> call <b>pcre2_match()</b> with a match context pointing to an
+already freed stack, as that will cause SEGFAULT. (Also, do not free a stack
+currently used by <b>pcre2_match()</b> in another thread). You can also replace
+the stack in a context at any time when it is not in use. You should free the
+previous stack before assigning a replacement.
</P>
<P>
(5) Should I allocate/free a stack every time before/after calling
diff --git a/doc/html/pcre2partial.html b/doc/html/pcre2partial.html
index 438c52c..bb73b1d 100644
--- a/doc/html/pcre2partial.html
+++ b/doc/html/pcre2partial.html
@@ -295,7 +295,7 @@ these characters with '&#60;' if the <b>allusedtext</b> modifier is set:
Partial match: 123ab
&#60;&#60;&#60;
</pre>
-However, the \fPallusedtext\fP modifier is not available for JIT matching,
+However, the <b>allusedtext</b> modifier is not available for JIT matching,
because JIT matching does not record the first (or last) consulted characters.
For this reason, this information is not available via the API. It is therefore
not possible in general to obtain the exact number of characters that must be
diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html
index 0aa2191..ec2e8c9 100644
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@@ -114,7 +114,8 @@ Another special sequence that may appear at the start of a pattern is (*UCP).
This has the same effect as setting the PCRE2_UCP option: it causes sequences
such as \d and \w to use Unicode properties to determine character types,
instead of recognizing only characters with codes less than 256 via a lookup
-table.
+table. If also causes upper/lower casing operations to use Unicode properties
+for characters with code points greater than 127, even when UTF is not set.
</P>
<P>
Some applications that allow their users to supply patterns may wish to
@@ -818,6 +819,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
+Chorasmian,
Common,
Coptic,
Cuneiform,
@@ -825,6 +827,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@@ -856,6 +859,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
+Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@@ -946,6 +950,7 @@ Unknown,
Vai,
Wancho,
Warang_Citi,
+Yezidi,
Yi,
Zanabazar_Square.
</P>
@@ -1650,7 +1655,7 @@ that succeeds is used. If the alternatives are within a group
<a href="#group">(defined below),</a>
"succeeds" means matching the rest of the main pattern as well as the
alternative in the group.
-</P>
+<a name="internaloptions"></a></P>
<br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
<P>
The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
@@ -1901,12 +1906,19 @@ are permitted for groups with the same number, for example:
(?|(?&#60;AA&#62;aa)|(?&#60;AA&#62;bb))
</pre>
The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
-option at compile time, or by the use of (?J) within the pattern. Duplicate
-names can be useful for patterns where only one instance of the named capture
-group can match. Suppose you want to match the name of a weekday, either as a
-3-letter abbreviation or as the full name, and in both cases you want to
-extract the abbreviation. This pattern (ignoring the line breaks) does the job:
+option at compile time, or by the use of (?J) within the pattern, as described
+in the section entitled
+<a href="#internaloptions">"Internal Option Setting"</a>
+above.
+</P>
+<P>
+Duplicate names can be useful for patterns where only one instance of the named
+capture group can match. Suppose you want to match the name of a weekday,
+either as a 3-letter abbreviation or as the full name, and in both cases you
+want to extract the abbreviation. This pattern (ignoring the line breaks) does
+the job:
<pre>
+ (?J)
(?&#60;DN&#62;Mon|Fri|Sun)(?:day)?|
(?&#60;DN&#62;Tue)(?:sday)?|
(?&#60;DN&#62;Wed)(?:nesday)?|
@@ -1927,7 +1939,7 @@ they appear in the overall pattern. The first one that is set is used for the
reference. For example, this pattern matches both "foofoo" and "barbar" but not
"foobar" or "barfoo":
<pre>
- (?:(?&#60;n&#62;foo)|(?&#60;n&#62;bar))\k&#60;n&#62;
+ (?J)(?:(?&#60;n&#62;foo)|(?&#60;n&#62;bar))\k&#60;n&#62;
</PRE>
</P>
@@ -1961,7 +1973,7 @@ items:
an escape such as \d or \pL that matches a single character
a character class
a backreference
- a parenthesized group (including most assertions)
+ a parenthesized group (including lookaround assertions)
a subroutine call (recursive or otherwise)
</pre>
The general repetition quantifier specifies a minimum and maximum number of
@@ -2349,11 +2361,11 @@ using alternation, as in the example above, or by a quantifier with a minimum
of zero.
</P>
<P>
-Backreferences of this type cause the group that they reference to be treated
-as an
+For versions of PCRE2 less than 10.25, backreferences of this type used to
+cause the group that they reference to be treated as an
<a href="#atomicgroup">atomic group.</a>
-Once the whole group has been matched, a subsequent matching failure cannot
-cause backtracking into the middle of the group.
+This restriction no longer applies, and backtracking into such groups can occur
+as normal.
<a name="bigassertions"></a></P>
<br><a name="SEC20" href="#TOC1">ASSERTIONS</a><br>
<P>
@@ -2413,26 +2425,13 @@ control passes to the previous backtracking point, thus discarding any captured
strings within the assertion.
</P>
<P>
-For compatibility with Perl, most assertion groups may be repeated; though it
-makes no sense to assert the same thing several times, the side effect of
-capturing may occasionally be useful. However, an assertion that forms the
-condition for a conditional group may not be quantified. In practice, for
-other assertions, there only three cases:
-<br>
-<br>
-(1) If the quantifier is {0}, the assertion is never obeyed during matching.
-However, it may contain internal capture groups that are called from elsewhere
-via the
-<a href="#groupsassubroutines">subroutine mechanism.</a>
-<br>
-<br>
-(2) If quantifier is {0,n} where n is greater than zero, it is treated as if it
-were {0,1}. At run time, the rest of the pattern match is tried with and
-without the assertion, the order depending on the greediness of the quantifier.
-<br>
-<br>
-(3) If the minimum repetition is greater than zero, the quantifier is ignored.
-The assertion is obeyed just once when encountered during matching.
+Most assertion groups may be repeated; though it makes no sense to assert the
+same thing several times, the side effect of capturing in positive assertions
+may occasionally be useful. However, an assertion that forms the condition for
+a conditional group may not be quantified. PCRE2 used to restrict the
+repetition of assertions, but from release 10.35 the only restriction is that
+an unlimited maximum repetition is changed to be one more than the minimum. For
+example, {3,} is treated as {3,4}.
</P>
<br><b>
Alphabetic assertion names
@@ -2624,8 +2623,8 @@ backtracking into the assertion. However, there are some cases where non-atomic
positive assertions can be useful. PCRE2 provides these using the following
syntax:
<pre>
- (*non_atomic_positive_lookahead: or (*napla:
- (*non_atomic_positive_lookbehind: or (*naplb:
+ (*non_atomic_positive_lookahead: or (*napla: or (?*
+ (*non_atomic_positive_lookbehind: or (*naplb: or (?&#60;*
</pre>
Consider the problem of finding the right-most word in a string that also
appears earlier in the string, that is, it must appear at least twice in total.
@@ -2665,9 +2664,15 @@ as before because nothing has changed, so using a non-atomic assertion just
wastes resources.
</P>
<P>
+There is one exception to backtracking into a non-atomic assertion. If an
+(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
+is, a subsequent match failure cannot backtrack into the assertion.
+</P>
+<P>
Non-atomic assertions are not supported by the alternative matching function
-<b>pcre2_dfa_match()</b>. They are also not supported by JIT (but may be in
-future). Note that assertions that appear as conditions for
+<b>pcre2_dfa_match()</b>. They are supported by JIT, but only if they do not
+contain any control verbs such as (*ACCEPT). (This may change in future). Note
+that assertions that appear as conditions for
<a href="#conditions">conditional groups</a>
(see below) must be atomic.
</P>
@@ -3833,9 +3838,9 @@ Cambridge, England.
</P>
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 29 July 2019
+Last updated: 24 February 2020
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2020 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2syntax.html b/doc/html/pcre2syntax.html
index 00f0513..7383104 100644
--- a/doc/html/pcre2syntax.html
+++ b/doc/html/pcre2syntax.html
@@ -223,6 +223,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
+Chorasmian,
Common,
Coptic,
Cuneiform,
@@ -230,6 +231,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@@ -261,6 +263,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
+Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@@ -350,6 +353,7 @@ Ugaritic,
Vai,
Wancho,
Warang_Citi,
+Yezidi,
Yi,
Zanabazar_Square.
</P>
@@ -467,7 +471,7 @@ Changes of these options within a group are automatically cancelled at the end
of the group.
<pre>
(?i) caseless
- (?J) allow duplicate names
+ (?J) allow duplicate named groups
(?m) multiline
(?n) no auto capture
(?s) single line (dotall)
@@ -553,11 +557,13 @@ Each top-level branch of a lookbehind must be of a fixed length.
<P>
These assertions are specific to PCRE2 and are not Perl-compatible.
<pre>
- (*napla:...)
- (*non_atomic_positive_lookahead:...)
+ (?*...) )
+ (*napla:...) ) synonyms
+ (*non_atomic_positive_lookahead:...) )
- (*naplb:...)
- (*non_atomic_positive_lookbehind:...)
+ (?&#60;*...) )
+ (*naplb:...) ) synonyms
+ (*non_atomic_positive_lookbehind:...) )
</PRE>
</P>
<br><a name="SEC21" href="#TOC1">SCRIPT RUNS</a><br>
@@ -683,7 +689,7 @@ Cambridge, England.
</P>
<br><a name="SEC29" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 29 July 2019
+Last updated: 28 December 2019
<br>
Copyright &copy; 1997-2019 University of Cambridge.
<br>
diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html
index e387315..920b265 100644
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@@ -261,7 +261,7 @@ standard output, then exit with zero exit code. All other options are ignored.
If both -C and -LM are present, whichever is first is recognized.
</P>
<P>
-\fB-pattern\fB <i>modifier-list</i>
+<b>-pattern</b> <i>modifier-list</i>
Behave as if each pattern line contains the given modifiers.
</P>
<P>
@@ -376,6 +376,12 @@ This command is used to load a set of precompiled patterns from a file, as
described in the section entitled "Saving and restoring compiled patterns"
<a href="#saverestore">below.</a>
<pre>
+ #loadtables &#60;filename&#62;
+</pre>
+This command is used to load a set of binary character tables that can be
+accessed by the tables=3 qualifier. Such tables can be created by the
+<b>pcre2_dftables</b> program with the -b option.
+<pre>
#newline_default [&#60;newline-list&#62;]
</pre>
When PCRE2 is built, a default newline convention can be specified. This
@@ -679,7 +685,7 @@ heavily used in the test files.
pushcopy push a copy onto the stack
stackguard=&#60;number&#62; test the stackguard feature
subject_literal treat all subject lines as literal
- tables=[0|1|2] select internal tables
+ tables=[0|1|2|3] select internal tables
use_length do not zero-terminate the pattern
utf8_input treat input as UTF-8
</pre>
@@ -1027,18 +1033,20 @@ Using alternative character tables
</b><br>
<P>
The value specified for the <b>tables</b> modifier must be one of the digits 0,
-1, or 2. It causes a specific set of built-in character tables to be passed to
-<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
-different character tables. The digit specifies the tables as follows:
+1, 2, or 3. It causes a specific set of built-in character tables to be passed
+to <b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour
+with different character tables. The digit specifies the tables as follows:
<pre>
0 do not pass any special character tables
1 the default ASCII tables, as distributed in
pcre2_chartables.c.dist
2 a set of tables defining ISO 8859 characters
+ 3 a set of tables loaded by the #loadtables command
</pre>
-In table 2, some characters whose codes are greater than 128 are identified as
-letters, digits, spaces, etc. Setting alternate character tables and a locale
-are mutually exclusive.
+In tables 2, some characters whose codes are greater than 128 are identified as
+letters, digits, spaces, etc. Tables 3 can be used only after a
+<b>#loadtables</b> command has loaded them from a binary file. Setting alternate
+character tables and a locale are mutually exclusive.
</P>
<br><b>
Setting certain match controls
@@ -1050,24 +1058,27 @@ modifier list, in which case they are applied to every subject line that is
processed with that pattern. These modifiers do not affect the compilation
process.
<pre>
- aftertext show text after match
- allaftertext show text after captures
- allcaptures show all captures
- allvector show the entire ovector
- allusedtext show all consulted text
- altglobal alternative global matching
- /g global global matching
- jitstack=&#60;n&#62; set size of JIT stack
- mark show mark values
- replace=&#60;string&#62; specify a replacement string
- startchar show starting character when relevant
- substitute_callout use substitution callouts
- substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=&#60;n&#62; skip substitution number n
- substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=&#60;n&#62; skip substitution number n and greater
- substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
- substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
+ aftertext show text after match
+ allaftertext show text after captures
+ allcaptures show all captures
+ allvector show the entire ovector
+ allusedtext show all consulted text
+ altglobal alternative global matching
+ /g global global matching
+ jitstack=&#60;n&#62; set size of JIT stack
+ mark show mark values
+ replace=&#60;string&#62; specify a replacement string
+ startchar show starting character when relevant
+ substitute_callout use substitution callouts
+ substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
+ substitute_literal use PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched use PCRE2_SUBSTITUTE_MATCHED
+ substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+ substitute_skip=&#60;n&#62; skip substitution &#60;n&#62;
+ substitute_stop=&#60;n&#62; skip substitution &#60;n&#62; and following
+ substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+ substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
</pre>
These modifiers may not appear in a <b>#pattern</b> command. If you want them as
defaults, set them in a <b>#subject</b> command.
@@ -1233,8 +1244,11 @@ pattern.
startoffset=&#60;n&#62; same as offset=&#60;n&#62;
substitute_callout use substitution callouts
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=&#60;n&#62; skip substitution number n
+ substitute_literal use PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched use PCRE2_SUBSTITUTE_MATCHED
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+ substitute_skip=&#60;n&#62; skip substitution number n
substitute_stop=&#60;n&#62; skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
@@ -1395,9 +1409,10 @@ Testing the substitution function
</b><br>
<P>
If the <b>replace</b> modifier is set, the <b>pcre2_substitute()</b> function is
-called instead of one of the matching functions. Note that replacement strings
-cannot contain commas, because a comma signifies the end of a modifier. This is
-not thought to be an issue in a test program.
+called instead of one of the matching functions (or after one call of
+<b>pcre2_match()</b> in the case of PCRE2_SUBSTITUTE_MATCHED). Note that
+replacement strings cannot contain commas, because a comma signifies the end of
+a modifier. This is not thought to be an issue in a test program.
</P>
<P>
Unlike subject strings, <b>pcre2test</b> does not process replacement strings
@@ -1413,11 +1428,16 @@ for <b>pcre2_substitute()</b>:
<pre>
global PCRE2_SUBSTITUTE_GLOBAL
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
+ substitute_literal PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched PCRE2_SUBSTITUTE_MATCHED
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
-
-</PRE>
+</pre>
+See the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+documentation for details of these options.
</P>
<P>
After a successful substitution, the modified string is output, preceded by the
@@ -2093,9 +2113,9 @@ Cambridge, England.
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
-Last updated: 30 July 2019
+Last updated: 20 March 2020
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2020 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html
index 3d4e6b4..76ca6ea 100644
--- a/doc/html/pcre2unicode.html
+++ b/doc/html/pcre2unicode.html
@@ -19,7 +19,7 @@ UNICODE AND UTF SUPPORT
PCRE2 is normally built with Unicode support, though if you do not need it, you
can build it without, in which case the library will be smaller. With Unicode
support, PCRE2 has knowledge of Unicode character properties and can process
-text strings in UTF-8, UTF-16, or UTF-32 format (depending on the code unit
+strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit
width), but this is not the default. Unless specifically requested, PCRE2
treats each code unit in a string as one character.
</P>
@@ -134,14 +134,16 @@ However, the special horizontal and vertical white space matching escapes (\h,
not PCRE2_UCP is set.
</P>
<br><b>
-CASE-EQUIVALENCE IN UTF MODE
+UNICODE CASE-EQUIVALENCE
</b><br>
<P>
-Case-insensitive matching in UTF mode makes use of Unicode properties except
-for characters whose code points are less than 128 and that have at most two
-case-equivalent values. For these, a direct table lookup is used for speed. A
-few Unicode characters such as Greek sigma have more than two code points that
-are case-equivalent, and these are treated specially.
+If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use
+of Unicode properties except for characters whose code points are less than 128
+and that have at most two case-equivalent values. For these, a direct table
+lookup is used for speed. A few Unicode characters such as Greek sigma have
+more than two code points that are case-equivalent, and these are treated
+specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
+processing for non-UTF character encodings such as UCS-2.
<a name="scriptruns"></a></P>
<br><b>
SCRIPT RUNS
@@ -484,9 +486,9 @@ Cambridge, England.
REVISION
</b><br>
<P>
-Last updated: 24 May 2019
+Last updated: 23 February 2020
<br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2020 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE2 index page</a>.
diff --git a/doc/pcre2.txt b/doc/pcre2.txt
index 948b91a..4651bba 100644
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@@ -351,7 +351,7 @@ PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION
int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
PCRE2_SIZE length, PCRE2_SIZE startoffset,
uint32_t options, pcre2_match_data *match_data,
- pcre2_match_context *mcontext, PCRE2_SPTR replacementzfP,
+ pcre2_match_context *mcontext, PCRE2_SPTR replacementz,
PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer,
PCRE2_SIZE *outlengthptr);
@@ -1103,9 +1103,9 @@ CHECKING BUILD-TIME OPTIONS
int pcre2_config(uint32_t what, void *where);
The function pcre2_config() makes it possible for a PCRE2 client to
- discover which optional features have been compiled into the PCRE2 li-
- brary. The pcre2build documentation has more details about these op-
- tional features.
+ find the value of certain configuration parameters and to discover
+ which optional features have been compiled into the PCRE2 library. The
+ pcre2build documentation has more details about these features.
The first argument for pcre2_config() specifies which information is
required. The second argument is a pointer to memory into which the in-
@@ -1225,6 +1225,12 @@ CHECKING BUILD-TIME OPTIONS
This parameter is obsolete and should not be used in new code. The out-
put is a uint32_t integer that is always set to zero.
+ PCRE2_CONFIG_TABLES_LENGTH
+
+ The output is a uint32_t integer that gives the length of PCRE2's char-
+ acter processing tables in bytes. For details of these tables see the
+ section on locale support below.
+
PCRE2_CONFIG_UNICODE_VERSION
The where argument should point to a buffer that is at least 24 code
@@ -1454,14 +1460,14 @@ COMPILING A PATTERN
If this bit is set, letters in the pattern match both upper and lower
case letters in the subject. It is equivalent to Perl's /i option, and
- it can be changed within a pattern by a (?i) option setting. If
- PCRE2_UTF is set, Unicode properties are used for all characters with
- more than one other case, and for all characters whose code points are
- greater than U+007F. For lower valued characters with only one other
- case, a lookup table is used for speed. When PCRE2_UTF is not set, a
- lookup table is used for all code points less than 256, and higher code
- points (available only in 16-bit or 32-bit mode) are treated as not
- having another case.
+ it can be changed within a pattern by a (?i) option setting. If either
+ PCRE2_UTF or PCRE2_UCP is set, Unicode properties are used for all
+ characters with more than one other case, and for all characters whose
+ code points are greater than U+007F. For lower valued characters with
+ only one other case, a lookup table is used for speed. When neither
+ PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used for all code
+ points less than 256, and higher code points (available only in 16-bit
+ or 32-bit mode) are treated as not having another case.
PCRE2_DOLLAR_ENDONLY
@@ -1786,14 +1792,20 @@ COMPILING A PATTERN
PCRE2_UCP
- This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
- \w, and some of the POSIX character classes. By default, only ASCII
- characters are recognized, but if PCRE2_UCP is set, Unicode properties
- are used instead to classify characters. More details are given in the
- section on generic character types in the pcre2pattern page. If you set
- PCRE2_UCP, matching one of the items it affects takes much longer. The
- option is available only if PCRE2 has been compiled with Unicode sup-
- port (which is the default).
+ This option has two effects. Firstly, it change the way PCRE2 processes
+ \B, \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character
+ classes. By default, only ASCII characters are recognized, but if
+ PCRE2_UCP is set, Unicode properties are used instead to classify char-
+ acters. More details are given in the section on generic character
+ types in the pcre2pattern page. If you set PCRE2_UCP, matching one of
+ the items it affects takes much longer.
+
+ The second effect of PCRE2_UCP is to force the use of Unicode proper-
+ ties for upper/lower casing operations on characters with code points
+ greater than 127, even when PCRE2_UTF is not set. This makes it possi-
+ ble, for example, to process strings in the 16-bit UCS-2 code. This op-
+ tion is available only if PCRE2 has been compiled with Unicode support
+ (which is the default).
PCRE2_UNGREEDY
@@ -1953,14 +1965,18 @@ LOCALE SUPPORT
letters, digits, or whatever, by reference to a set of tables, indexed
by character code point. However, this applies only to characters whose
code points are less than 256. By default, higher-valued code points
- never match escapes such as \w or \d. When PCRE2 is built with Unicode
- support (the default), all characters can be tested with \p and \P, or,
- alternatively, the PCRE2_UCP option can be set when a pattern is com-
- piled; this causes \w and friends to use Unicode property support in-
- stead of the built-in tables.
+ never match escapes such as \w or \d.
+
+ When PCRE2 is built with Unicode support (the default), the Unicode
+ properties of all characters can be tested with \p and \P, or, alterna-
+ tively, the PCRE2_UCP option can be set when a pattern is compiled;
+ this causes \w and friends to use Unicode property support instead of
+ the built-in tables. PCRE2_UCP also causes upper/lower casing opera-
+ tions on characters with code points greater than 127 to use Unicode
+ properties. These effects apply even when PCRE2_UTF is not set.
The use of locales with Unicode is discouraged. If you are handling
- characters with code points greater than 128, you should either use
+ characters with code points greater than 127, you should either use
Unicode support, or use locales, but not try to mix the two.
PCRE2 contains a built-in set of character tables that are used by de-
@@ -1984,7 +2000,7 @@ LOCALE SUPPORT
therein.
For example, to build and use tables that are appropriate for the
- French locale (where accented characters with values greater than 128
+ French locale (where accented characters with values greater than 127
are treated as letters), the following code could be used:
setlocale(LC_CTYPE, "fr_FR");
@@ -1997,10 +2013,10 @@ LOCALE SUPPORT
if you are using Windows, the name for the French locale is "french".
The pointer that is passed (via the compile context) to pcre2_compile()
- is saved with the compiled pattern, and the same tables are used by
- pcre2_match() and pcre_dfa_match(). Thus, for any single pattern, com-
- pilation and matching both happen in the same locale, but different
- patterns can be processed in different locales.
+ is saved with the compiled pattern, and the same tables are used by the
+ matching functions. Thus, for any single pattern, compilation and
+ matching both happen in the same locale, but different patterns can be
+ processed in different locales.
It is the caller's responsibility to ensure that the memory containing
the tables remains available while they are still in use. When they are
@@ -2008,6 +2024,20 @@ LOCALE SUPPORT
which should pass as its first parameter the same global context that
was used to create the tables.
+ Saving locale tables
+
+ The tables described above are just a sequence of binary bytes, which
+ makes them independent of hardware characteristics such as endianness
+ or whether the processor is 32-bit or 64-bit. A copy of the result of
+ pcre2_maketables() can therefore be saved in a file or elsewhere and
+ re-used later, even in a different program or on another computer. The
+ size of the tables (number of bytes) must be obtained by calling
+ pcre2_config() with the PCRE2_CONFIG_TABLES_LENGTH option because
+ pcre2_maketables() does not return this value. Note that the
+ pcre2_dftables program, which is part of the PCRE2 build system, can be
+ used stand-alone to create a file that contains a set of binary tables.
+ See the pcre2build documentation for details.
+
INFORMATION ABOUT A COMPILED PATTERN
@@ -3193,55 +3223,109 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer,
PCRE2_SIZE *outlengthptr);
- This function calls pcre2_match() and then makes a copy of the subject
- string in outputbuffer, replacing one or more parts that were matched
+ This function optionally calls pcre2_match() and then makes a copy of
+ the subject string in outputbuffer, replacing parts that were matched
with the replacement string, whose length is supplied in rlength. This
can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
- The default is to perform just one replacement, but there is an option
- that requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below
- for details).
+ There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to re-
+ turn just the replacement string(s). The default action is to perform
+ just one replacement if the pattern matches, but there is an option
+ that requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL be-
+ low).
- Matches in which a \K item in a lookahead in the pattern causes the
- match to end before it starts are not supported, and give rise to an
+ If successful, pcre2_substitute() returns the number of substitutions
+ that were carried out. This may be zero if no match was found, and is
+ never greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A nega-
+ tive value is returned if an error is detected.
+
+ Matches in which a \K item in a lookahead in the pattern causes the
+ match to end before it starts are not supported, and give rise to an
error return. For global replacements, matches in which \K in a lookbe-
- hind causes the match to start earlier than the point that was reached
+ hind causes the match to start earlier than the point that was reached
in the previous iteration are also not supported.
- The first seven arguments of pcre2_substitute() are the same as for
+ The first seven arguments of pcre2_substitute() are the same as for
pcre2_match(), except that the partial matching options are not permit-
- ted, and match_data may be passed as NULL, in which case a match data
- block is obtained and freed within this function, using memory manage-
- ment functions from the match context, if provided, or else those that
+ ted, and match_data may be passed as NULL, in which case a match data
+ block is obtained and freed within this function, using memory manage-
+ ment functions from the match context, if provided, or else those that
were used to allocate memory for the compiled code.
- If an external match_data block is provided, its contents afterwards
- are those set by the final call to pcre2_match(). For global changes,
- this will have ended in a matching error. The contents of the ovector
- within the match data block may or may not have been changed.
-
- The outlengthptr argument must point to a variable that contains the
- length, in code units, of the output buffer. If the function is suc-
- cessful, the value is updated to contain the length of the new string,
- excluding the trailing zero that is automatically added.
+ If match_data is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the
+ provided block is used for all calls to pcre2_match(), and its contents
+ afterwards are the result of the final call. For global changes, this
+ will always be a no-match error. The contents of the ovector within the
+ match data block may or may not have been changed.
- If the function is not successful, the value set via outlengthptr de-
- pends on the type of error. For syntax errors in the replacement
+ As well as the usual options for pcre2_match(), a number of additional
+ options can be set in the options argument of pcre2_substitute(). One
+ such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
+ match_data block must be provided, and it must have been used for an
+ external call to pcre2_match(). The data in the match_data block (re-
+ turn code, offset vector) is used for the first substitution instead of
+ calling pcre2_match() from within pcre2_substitute(). This allows an
+ application to check for a match before choosing to substitute, without
+ having to repeat the match.
+
+ The contents of the externally supplied match data block are not
+ changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI-
+ TUTE_GLOBAL is also set, pcre2_match() is called after the first sub-
+ stitution to check for further matches, but this is done using an in-
+ ternally obtained match data block, thus always leaving the external
+ block unchanged.
+
+ The code argument is not used for matching before the first substitu-
+ tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided,
+ even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in-
+ formation such as the UTF setting and the number of capturing parenthe-
+ ses in the pattern.
+
+ The default action of pcre2_substitute() is to return a copy of the
+ subject string with matched substrings replaced. However, if PCRE2_SUB-
+ STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are
+ returned. In the global case, multiple replacements are concatenated in
+ the output buffer. Substitution callouts (see below) can be used to
+ separate them if necessary.
+
+ The outlengthptr argument of pcre2_substitute() must point to a vari-
+ able that contains the length, in code units, of the output buffer. If
+ the function is successful, the value is updated to contain the length
+ in code units of the new string, excluding the trailing zero that is
+ automatically added.
+
+ If the function is not successful, the value set via outlengthptr de-
+ pends on the type of error. For syntax errors in the replacement
string, the value is the offset in the replacement string where the er-
- ror was detected. For other errors, the value is PCRE2_UNSET by de-
+ ror was detected. For other errors, the value is PCRE2_UNSET by de-
fault. This includes the case of the output buffer being too small, un-
- less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set (see below), in which case
- the value is the minimum length needed, including space for the trail-
- ing zero. Note that in order to compute the required length, pcre2_sub-
- stitute() has to simulate all the matching and copying, instead of giv-
- ing an error return as soon as the buffer overflows. Note also that the
- length is in code units, not bytes.
-
- In the replacement string, which is interpreted as a UTF string in UTF
- mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK op-
- tion is set, a dollar character is an escape character that can specify
- the insertion of characters from capture groups or names from (*MARK)
- or other control verbs in the pattern. The following forms are always
- recognized:
+ less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set.
+
+ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
+ buffer is too small. The default action is to return PCRE2_ERROR_NOMEM-
+ ORY immediately. If this option is set, however, pcre2_substitute()
+ continues to go through the motions of matching and substituting (with-
+ out, of course, writing anything) in order to compute the size of buf-
+ fer that is needed. This value is passed back via the outlengthptr
+ variable, with the result of the function still being PCRE2_ER-
+ ROR_NOMEMORY.
+
+ Passing a buffer size of zero is a permitted way of finding out how
+ much memory is needed for given substitution. However, this does mean
+ that the entire operation is carried out twice. Depending on the appli-
+ cation, it may be more efficient to allocate a large buffer and free
+ the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
+ FLOW_LENGTH.
+
+ The replacement string, which is interpreted as a UTF string in UTF
+ mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An
+ invalid UTF replacement string causes an immediate return with the rel-
+ evant UTF error code.
+
+ If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in-
+ terpreted in any way. By default, however, a dollar character is an es-
+ cape character that can specify the insertion of characters from cap-
+ ture groups and names from (*MARK) or other control verbs in the pat-
+ tern. The following forms are always recognized:
$$ insert a dollar character
$<n> or ${<n>} insert the contents of group <n>
@@ -3265,19 +3349,16 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
apple lemon
2: pear orange
- As well as the usual options for pcre2_match(), a number of additional
- options can be set in the options argument of pcre2_substitute().
-
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject
- string, replacing every matching substring. If this option is not set,
- only the first matching substring is replaced. The search for matches
- takes place in the original subject string (that is, previous replace-
- ments do not affect it). Iteration is implemented by advancing the
- startoffset value for each search, which is always passed the entire
+ string, replacing every matching substring. If this option is not set,
+ only the first matching substring is replaced. The search for matches
+ takes place in the original subject string (that is, previous replace-
+ ments do not affect it). Iteration is implemented by advancing the
+ startoffset value for each search, which is always passed the entire
subject string. If an offset limit is set in the match context, search-
ing stops when that limit is reached.
- You can restrict the effect of a global substitution to a portion of
+ You can restrict the effect of a global substitution to a portion of
the subject string by setting either or both of startoffset and an off-
set limit. Here is a pcre2test example:
@@ -3285,87 +3366,73 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
ABC ABC ABC ABC\=offset=3,offset_limit=12
2: ABC A!C A!C ABC
- When continuing with global substitutions after matching a substring
+ When continuing with global substitutions after matching a substring
with zero length, an attempt to find a non-empty match at the same off-
set is performed. If this is not successful, the offset is advanced by
one character except when CRLF is a valid newline sequence and the next
- two characters are CR, LF. In this case, the offset is advanced by two
+ two characters are CR, LF. In this case, the offset is advanced by two
characters.
- PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
- buffer is too small. The default action is to return PCRE2_ERROR_NOMEM-
- ORY immediately. If this option is set, however, pcre2_substitute()
- continues to go through the motions of matching and substituting (with-
- out, of course, writing anything) in order to compute the size of buf-
- fer that is needed. This value is passed back via the outlengthptr
- variable, with the result of the function still being PCRE2_ER-
- ROR_NOMEMORY.
-
- Passing a buffer size of zero is a permitted way of finding out how
- much memory is needed for given substitution. However, this does mean
- that the entire operation is carried out twice. Depending on the appli-
- cation, it may be more efficient to allocate a large buffer and free
- the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
- FLOW_LENGTH.
-
PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that
do not appear in the pattern to be treated as unset groups. This option
- should be used with care, because it means that a typo in a group name
+ should be used with care, because it means that a typo in a group name
or number no longer causes the PCRE2_ERROR_NOSUBSTRING error.
PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including un-
- known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated
- as empty strings when inserted as described above. If this option is
+ known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated
+ as empty strings when inserted as described above. If this option is
not set, an attempt to insert an unset group causes the PCRE2_ERROR_UN-
- SET error. This option does not influence the extended substitution
+ SET error. This option does not influence the extended substitution
syntax described below.
- PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
- replacement string. Without this option, only the dollar character is
- special, and only the group insertion forms listed above are valid.
+ PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
+ replacement string. Without this option, only the dollar character is
+ special, and only the group insertion forms listed above are valid.
When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
- Firstly, backslash in a replacement string is interpreted as an escape
+ Firstly, backslash in a replacement string is interpreted as an escape
character. The usual forms such as \n or \x{ddd} can be used to specify
- particular character codes, and backslash followed by any non-alphanu-
- meric character quotes that character. Extended quoting can be coded
+ particular character codes, and backslash followed by any non-alphanu-
+ meric character quotes that character. Extended quoting can be coded
using \Q...\E, exactly as in pattern strings.
- There are also four escape sequences for forcing the case of inserted
- letters. The insertion mechanism has three states: no case forcing,
+ There are also four escape sequences for forcing the case of inserted
+ letters. The insertion mechanism has three states: no case forcing,
force upper case, and force lower case. The escape sequences change the
current state: \U and \L change to upper or lower case forcing, respec-
- tively, and \E (when not terminating a \Q quoted sequence) reverts to
- no case forcing. The sequences \u and \l force the next character (if
- it is a letter) to upper or lower case, respectively, and then the
+ tively, and \E (when not terminating a \Q quoted sequence) reverts to
+ no case forcing. The sequences \u and \l force the next character (if
+ it is a letter) to upper or lower case, respectively, and then the
state automatically reverts to no case forcing. Case forcing applies to
- all inserted characters, including those from capture groups and let-
- ters within \Q...\E quoted sequences.
+ all inserted characters, including those from capture groups and let-
+ ters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP
+ was set when the pattern was compiled, Unicode properties are used for
+ case forcing characters whose code points are greater than 127.
Note that case forcing sequences such as \U...\E do not nest. For exam-
- ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
- \E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX-
+ ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
+ \E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX-
TRA_ALT_BSUX options do not apply to replacement strings.
- The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
- flexibility to capture group substitution. The syntax is similar to
+ The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
+ flexibility to capture group substitution. The syntax is similar to
that used by Bash:
${<n>:-<string>}
${<n>:+<string1>:<string2>}
- As before, <n> may be a group number or a name. The first form speci-
- fies a default value. If group <n> is set, its value is inserted; if
- not, <string> is expanded and the result inserted. The second form
- specifies strings that are expanded and inserted when group <n> is set
- or unset, respectively. The first form is just a convenient shorthand
+ As before, <n> may be a group number or a name. The first form speci-
+ fies a default value. If group <n> is set, its value is inserted; if
+ not, <string> is expanded and the result inserted. The second form
+ specifies strings that are expanded and inserted when group <n> is set
+ or unset, respectively. The first form is just a convenient shorthand
for
${<n>:+${<n>}:<string>}
- Backslash can be used to escape colons and closing curly brackets in
- the replacement strings. A change of the case forcing state within a
- replacement string remains in force afterwards, as shown in this
+ Backslash can be used to escape colons and closing curly brackets in
+ the replacement strings. A change of the case forcing state within a
+ replacement string remains in force afterwards, as shown in this
pcre2test example:
/(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
@@ -3374,31 +3441,36 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
somebody
1: HELLO
- The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
- substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un-
+ The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
+ substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un-
known groups in the extended syntax forms to be treated as unset.
- If successful, pcre2_substitute() returns the number of successful
- matches. This may be zero if no matches were found, and is never
- greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
+ If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
+ PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrele-
+ vant and are ignored.
- In the event of an error, a negative error code is returned. Except for
- PCRE2_ERROR_NOMATCH (which is never returned), errors from
- pcre2_match() are passed straight back.
+ Substitution errors
+
+ In the event of an error, pcre2_substitute() returns a negative error
+ code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors
+ from pcre2_match() are passed straight back.
PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser-
tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set.
PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ-
- ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set)
- when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN-
+ ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set)
+ when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN-
SET_EMPTY is not set.
- PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big
+ PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big
enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size
- of buffer that is needed is returned via outlengthptr. Note that this
+ of buffer that is needed is returned via outlengthptr. Note that this
does not happen by default.
+ PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
+ match_data argument is NULL.
+
PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in
the replacement string, with more particular errors being PCRE2_ER-
ROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE
@@ -3721,8 +3793,8 @@ AUTHOR
REVISION
- Last updated: 02 September 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 19 March 2020
+ Copyright (c) 1997-2020 University of Cambridge.
------------------------------------------------------------------------------
@@ -3821,8 +3893,8 @@ UNICODE AND UTF SUPPORT
--disable-unicode
to the configure command. This setting applies to all three libraries.
- It is not possible to build one library with Unicode support, and an-
- other without, in the same configuration.
+ It is not possible to build one library with Unicode support and an-
+ other without in the same configuration.
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8,
UTF-16 or UTF-32. To do that, applications that use the library can set
@@ -3883,7 +3955,7 @@ JUST-IN-TIME COMPILER SUPPORT
--disable-pcre2grep-jit
- to the "configure" command.
+ to the configure command.
NEWLINE RECOGNITION
@@ -4027,39 +4099,61 @@ CREATING CHARACTER TABLES AT BUILD TIME
--enable-rebuild-chartables
to the configure command, the distributed tables are no longer used.
- Instead, a program called dftables is compiled and run. This outputs
- the source for new set of tables, created in the default locale of your
- C run-time system. This method of replacing the tables does not work if
- you are cross compiling, because dftables is run on the local host. If
- you need to create alternative tables when cross compiling, you will
- have to do so "by hand".
+ Instead, a program called pcre2_dftables is compiled and run. This out-
+ puts the source for new set of tables, created in the default locale of
+ your C run-time system. This method of replacing the tables does not
+ work if you are cross compiling, because pcre2_dftables needs to be run
+ on the local host and therefore not compiled with the cross compiler.
+
+ If you need to create alternative tables when cross compiling, you will
+ have to do so "by hand". There may also be other reasons for creating
+ tables manually. To cause pcre2_dftables to be built on the local
+ host, run a normal compiling command, and then run the program with the
+ output file as its argument, for example:
+
+ cc src/pcre2_dftables.c -o pcre2_dftables
+ ./pcre2_dftables src/pcre2_chartables.c
+
+ This builds the tables in the default locale of the local host. If you
+ want to specify a locale, you must use the -L option:
+
+ LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+
+ You can also specify -b (with or without -L). This causes the tables to
+ be written in binary instead of as source code. A set of binary tables
+ can be loaded into memory by an application and passed to pcre2_com-
+ pile() in the same way as tables created by calling pcre2_maketables().
+ The tables are just a string of bytes, independent of hardware charac-
+ teristics such as endianness. This means they can be bundled with an
+ application that runs in different environments, to ensure consistent
+ behaviour.
USING EBCDIC CODE
- PCRE2 assumes by default that it will run in an environment where the
- character code is ASCII or Unicode, which is a superset of ASCII. This
+ PCRE2 assumes by default that it will run in an environment where the
+ character code is ASCII or Unicode, which is a superset of ASCII. This
is the case for most computer operating systems. PCRE2 can, however, be
compiled to run in an 8-bit EBCDIC environment by adding
--enable-ebcdic --disable-unicode
to the configure command. This setting implies --enable-rebuild-charta-
- bles. You should only use it if you know that you are in an EBCDIC en-
+ bles. You should only use it if you know that you are in an EBCDIC en-
vironment (for example, an IBM mainframe operating system).
- It is not possible to support both EBCDIC and UTF-8 codes in the same
- version of the library. Consequently, --enable-unicode and --enable-
+ It is not possible to support both EBCDIC and UTF-8 codes in the same
+ version of the library. Consequently, --enable-unicode and --enable-
ebcdic are mutually exclusive.
The EBCDIC character that corresponds to an ASCII LF is assumed to have
- the value 0x15 by default. However, in some EBCDIC environments, 0x25
+ the value 0x15 by default. However, in some EBCDIC environments, 0x25
is used. In such an environment you should use
--enable-ebcdic-nl25
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR
- has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and
+ has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and
0x25 is not chosen as LF is made to correspond to the Unicode NEL char-
acter (which, in Unicode, is 0x85).
@@ -4071,47 +4165,47 @@ USING EBCDIC CODE
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
By default pcre2grep supports the use of callouts with string arguments
- within the patterns it is matching. There are two kinds: one that gen-
+ within the patterns it is matching. There are two kinds: one that gen-
erates output using local code, and another that calls an external pro-
- gram or script. If --disable-pcre2grep-callout-fork is added to the
- configure command, only the first kind of callout is supported; if
- --disable-pcre2grep-callout is used, all callouts are completely ig-
- nored. For more details of pcre2grep callouts, see the pcre2grep docu-
+ gram or script. If --disable-pcre2grep-callout-fork is added to the
+ configure command, only the first kind of callout is supported; if
+ --disable-pcre2grep-callout is used, all callouts are completely ig-
+ nored. For more details of pcre2grep callouts, see the pcre2grep docu-
mentation.
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
- By default, pcre2grep reads all files as plain text. You can build it
- so that it recognizes files whose names end in .gz or .bz2, and reads
+ By default, pcre2grep reads all files as plain text. You can build it
+ so that it recognizes files whose names end in .gz or .bz2, and reads
them with libz or libbz2, respectively, by adding one or both of
--enable-pcre2grep-libz
--enable-pcre2grep-libbz2
to the configure command. These options naturally require that the rel-
- evant libraries are installed on your system. Configuration will fail
+ evant libraries are installed on your system. Configuration will fail
if they are not.
PCRE2GREP BUFFER SIZE
- pcre2grep uses an internal buffer to hold a "window" on the file it is
+ pcre2grep uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when
it finds a match. The default starting size of the buffer is 20KiB. The
- buffer itself is three times this size, but because of the way it is
+ buffer itself is three times this size, but because of the way it is
used for holding "before" lines, the longest line that is guaranteed to
be processable is the notional buffer size. If a longer line is encoun-
- tered, pcre2grep automatically expands the buffer, up to a specified
- maximum size, whose default is 1MiB or the starting size, whichever is
- the larger. You can change the default parameter values by adding, for
+ tered, pcre2grep automatically expands the buffer, up to a specified
+ maximum size, whose default is 1MiB or the starting size, whichever is
+ the larger. You can change the default parameter values by adding, for
example,
--with-pcre2grep-bufsize=51200
--with-pcre2grep-max-bufsize=2097152
- to the configure command. The caller of pcre2grep can override these
- values by using --buffer-size and --max-buffer-size on the command
+ to the configure command. The caller of pcre2grep can override these
+ values by using --buffer-size and --max-buffer-size on the command
line.
@@ -4122,26 +4216,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
--enable-pcre2test-libreadline
--enable-pcre2test-libedit
- to the configure command, pcre2test is linked with the libreadline or-
- libedit library, respectively, and when its input is from a terminal,
- it reads it using the readline() function. This provides line-editing
- and history facilities. Note that libreadline is GPL-licensed, so if
- you distribute a binary of pcre2test linked in this way, there may be
+ to the configure command, pcre2test is linked with the libreadline or-
+ libedit library, respectively, and when its input is from a terminal,
+ it reads it using the readline() function. This provides line-editing
+ and history facilities. Note that libreadline is GPL-licensed, so if
+ you distribute a binary of pcre2test linked in this way, there may be
licensing issues. These can be avoided by linking instead with libedit,
which has a BSD licence.
- Setting --enable-pcre2test-libreadline causes the -lreadline option to
- be added to the pcre2test build. In many operating environments with a
- sytem-installed readline library this is sufficient. However, in some
+ Setting --enable-pcre2test-libreadline causes the -lreadline option to
+ be added to the pcre2test build. In many operating environments with a
+ sytem-installed readline library this is sufficient. However, in some
environments (e.g. if an unmodified distribution version of readline is
- in use), some extra configuration may be necessary. The INSTALL file
+ in use), some extra configuration may be necessary. The INSTALL file
for libreadline says this:
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library."
- If your environment has not been set up so that an appropriate library
+ If your environment has not been set up so that an appropriate library
is automatically included, you may need to add something like
LIBS="-ncurses"
@@ -4155,7 +4249,7 @@ INCLUDING DEBUGGING CODE
--enable-debug
- to the configure command, additional debugging code is included in the
+ to the configure command, additional debugging code is included in the
build. This feature is intended for use by the PCRE2 maintainers.
@@ -4165,14 +4259,14 @@ DEBUGGING WITH VALGRIND SUPPORT
--enable-valgrind
- to the configure command, PCRE2 will use valgrind annotations to mark
- certain memory regions as unaddressable. This allows it to detect in-
+ to the configure command, PCRE2 will use valgrind annotations to mark
+ certain memory regions as unaddressable. This allows it to detect in-
valid memory accesses, and is mostly useful for debugging PCRE2 itself.
CODE COVERAGE REPORTING
- If your C compiler is gcc, you can build a version of PCRE2 that can
+ If your C compiler is gcc, you can build a version of PCRE2 that can
generate a code coverage report for its test suite. To enable this, you
must install lcov version 1.6 or above. Then specify
@@ -4181,20 +4275,20 @@ CODE COVERAGE REPORTING
to the configure command and build PCRE2 in the usual way.
Note that using ccache (a caching C compiler) is incompatible with code
- coverage reporting. If you have configured ccache to run automatically
+ coverage reporting. If you have configured ccache to run automatically
on your system, you must set the environment variable
CCACHE_DISABLE=1
before running make to build PCRE2, so that ccache is not used.
- When --enable-coverage is used, the following addition targets are
+ When --enable-coverage is used, the following addition targets are
added to the Makefile:
make coverage
- This creates a fresh coverage report for the PCRE2 test suite. It is
- equivalent to running "make coverage-reset", "make coverage-baseline",
+ This creates a fresh coverage report for the PCRE2 test suite. It is
+ equivalent to running "make coverage-reset", "make coverage-baseline",
"make check", and then "make coverage-report".
make coverage-reset
@@ -4211,71 +4305,71 @@ CODE COVERAGE REPORTING
make coverage-clean-report
- This removes the generated coverage report without cleaning the cover-
+ This removes the generated coverage report without cleaning the cover-
age data itself.
make coverage-clean-data
- This removes the captured coverage data without removing the coverage
+ This removes the captured coverage data without removing the coverage
files created at compile time (*.gcno).
make coverage-clean
- This cleans all coverage data including the generated coverage report.
- For more information about code coverage, see the gcov and lcov docu-
+ This cleans all coverage data including the generated coverage report.
+ For more information about code coverage, see the gcov and lcov docu-
mentation.
DISABLING THE Z AND T FORMATTING MODIFIERS
- The C99 standard defines formatting modifiers z and t for size_t and
- ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
- in environments other than Microsoft Visual Studio when __STDC_VER-
+ The C99 standard defines formatting modifiers z and t for size_t and
+ ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
+ in environments other than Microsoft Visual Studio when __STDC_VER-
SION__ is defined and has a value greater than or equal to 199901L (in-
- dicating C99). However, there is at least one environment that claims
+ dicating C99). However, there is at least one environment that claims
to be C99 but does not support these modifiers. If
--disable-percent-zt
- is specified, no use is made of the z or t modifiers. Instead or %td or
+ is specified, no use is made of the z or t modifiers. Instead of %td or
%zu, %lu is used, with a cast for size_t values.
SUPPORT FOR FUZZERS
- There is a special option for use by people who want to run fuzzing
+ There is a special option for use by people who want to run fuzzing
tests on PCRE2:
--enable-fuzz-support
At present this applies only to the 8-bit library. If set, it causes an
- extra library called libpcre2-fuzzsupport.a to be built, but not in-
- stalled. This contains a single function called LLVMFuzzerTestOneIn-
- put() whose arguments are a pointer to a string and the length of the
- string. When called, this function tries to compile the string as a
- pattern, and if that succeeds, to match it. This is done both with no
- options and with some random options bits that are generated from the
+ extra library called libpcre2-fuzzsupport.a to be built, but not in-
+ stalled. This contains a single function called LLVMFuzzerTestOneIn-
+ put() whose arguments are a pointer to a string and the length of the
+ string. When called, this function tries to compile the string as a
+ pattern, and if that succeeds, to match it. This is done both with no
+ options and with some random options bits that are generated from the
string.
- Setting --enable-fuzz-support also causes a binary called pcre2fuz-
- zcheck to be created. This is normally run under valgrind or used when
+ Setting --enable-fuzz-support also causes a binary called pcre2fuz-
+ zcheck to be created. This is normally run under valgrind or used when
PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
- function and outputs information about what it is doing. The input
- strings are specified by arguments: if an argument starts with "=" the
- rest of it is a literal input string. Otherwise, it is assumed to be a
+ function and outputs information about what it is doing. The input
+ strings are specified by arguments: if an argument starts with "=" the
+ rest of it is a literal input string. Otherwise, it is assumed to be a
file name, and the contents of the file are the test string.
OBSOLETE OPTION
- In versions of PCRE2 prior to 10.30, there were two ways of handling
- backtracking in the pcre2_match() function. The default was to use the
+ In versions of PCRE2 prior to 10.30, there were two ways of handling
+ backtracking in the pcre2_match() function. The default was to use the
system stack, but if
--disable-stack-for-recursion
- was set, memory on the heap was used. From release 10.30 onwards this
- has changed (the stack is no longer used) and this option now does
+ was set, memory on the heap was used. From release 10.30 onwards this
+ has changed (the stack is no longer used) and this option now does
nothing except give a warning.
@@ -4293,8 +4387,8 @@ AUTHOR
REVISION
- Last updated: 03 March 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 20 March 2020
+ Copyright (c) 1997-2020 University of Cambridge.
------------------------------------------------------------------------------
@@ -5240,7 +5334,7 @@ JIT STACK FAQ
pcre2_match() again. When you assign the stack to a match context, only
a pointer is set. There is no reference counting or any other magic.
You can free compiled patterns, contexts, and stacks in any order, any-
- time. Just do not call pcre2_match() with a match context pointing to
+ time. Just do not call pcre2_match() with a match context pointing to
an already freed stack, as that will cause SEGFAULT. (Also, do not free
a stack currently used by pcre2_match() in another thread). You can
also replace the stack in a context at any time when it is not in use.
@@ -6105,7 +6199,9 @@ SPECIAL START-OF-PATTERN ITEMS
(*UCP). This has the same effect as setting the PCRE2_UCP option: it
causes sequences such as \d and \w to use Unicode properties to deter-
mine character types, instead of recognizing only characters with codes
- less than 256 via a lookup table.
+ less than 256 via a lookup table. If also causes upper/lower casing op-
+ erations to use Unicode properties for characters with code points
+ greater than 127, even when UTF is not set.
Some applications that allow their users to supply patterns may wish to
restrict them for security reasons. If the PCRE2_NEVER_UCP option is
@@ -6729,37 +6825,38 @@ BACKSLASH
Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
- nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
- Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
- Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic, Gothic, Grantha,
- Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
- Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited, In-
- scriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kannada,
- Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Latin,
- Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani,
- Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi, Mede-
- faidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, Meroitic_Hiero-
- glyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, Nabataean, Nandi-
- nagari, New_Tai_Lue, Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham,
- Ol_Chiki, Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic,
- Old_Persian, Old_Sogdian, Old_South_Arabian, Old_Turkic, Oriya, Osage,
- Osmanya, Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
- Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
- vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
- Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
- Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
- nagh, Tirhuta, Ugaritic, Unknown, Vai, Wancho, Warang_Citi, Yi, Zan-
- abazar_Square.
+ nian, Chakma, Cham, Cherokee, Chorasmian, Common, Coptic, Cuneiform,
+ Cypriot, Cyrillic, Deseret, Devanagari, Dives_Akuru, Dogra, Duployan,
+ Egyptian_Hieroglyphs, Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic,
+ Gothic, Grantha, Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul,
+ Hanifi_Rohingya, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic,
+ Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese,
+ Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khitan_Small_Script,
+ Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha, Limbu, Linear_A, Lin-
+ ear_B, Lisu, Lycian, Lydian, Mahajani, Makasar, Malayalam, Mandaic,
+ Manichaean, Marchen, Masaram_Gondi, Medefaidrin, Meetei_Mayek,
+ Mende_Kikakui, Meroitic_Cursive, Meroitic_Hieroglyphs, Miao, Modi, Mon-
+ golian, Mro, Multani, Myanmar, Nabataean, Nandinagari, New_Tai_Lue,
+ Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham, Ol_Chiki, Old_Hungar-
+ ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
+ dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya, Pa-
+ hawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
+ Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
+ vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
+ Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
+ Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
+ nagh, Tirhuta, Ugaritic, Unknown, Vai, Wancho, Warang_Citi, Yezidi, Yi,
+ Zanabazar_Square.
Each character has exactly one Unicode general category property, spec-
- ified by a two-letter abbreviation. For compatibility with Perl, nega-
- tion can be specified by including a circumflex between the opening
- brace and the property name. For example, \p{^Lu} is the same as
+ ified by a two-letter abbreviation. For compatibility with Perl, nega-
+ tion can be specified by including a circumflex between the opening
+ brace and the property name. For example, \p{^Lu} is the same as
\P{Lu}.
If only one letter is specified with \p or \P, it includes all the gen-
- eral category properties that start with that letter. In this case, in
- the absence of negation, the curly brackets in the escape sequence are
+ eral category properties that start with that letter. In this case, in
+ the absence of negation, the curly brackets in the escape sequence are
optional; these two examples have the same effect:
\p{L}
@@ -6811,88 +6908,88 @@ BACKSLASH
Zp Paragraph separator
Zs Space separator
- The special property L& is also supported: it matches a character that
- has the Lu, Ll, or Lt property, in other words, a letter that is not
+ The special property L& is also supported: it matches a character that
+ has the Lu, Ll, or Lt property, in other words, a letter that is not
classified as a modifier or "other".
- The Cs (Surrogate) property applies only to characters whose code
- points are in the range U+D800 to U+DFFF. These characters are no dif-
- ferent to any other character when PCRE2 is not in UTF mode (using the
- 16-bit or 32-bit library). However, they are not valid in Unicode
+ The Cs (Surrogate) property applies only to characters whose code
+ points are in the range U+D800 to U+DFFF. These characters are no dif-
+ ferent to any other character when PCRE2 is not in UTF mode (using the
+ 16-bit or 32-bit library). However, they are not valid in Unicode
strings and so cannot be tested by PCRE2 in UTF mode, unless UTF valid-
- ity checking has been turned off (see the discussion of
+ ity checking has been turned off (see the discussion of
PCRE2_NO_UTF_CHECK in the pcre2api page).
- The long synonyms for property names that Perl supports (such as
- \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix
+ The long synonyms for property names that Perl supports (such as
+ \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix
any of these properties with "Is".
No character that is in the Unicode table has the Cn (unassigned) prop-
erty. Instead, this property is assumed for any code point that is not
in the Unicode table.
- Specifying caseless matching does not affect these escape sequences.
- For example, \p{Lu} always matches only upper case letters. This is
+ Specifying caseless matching does not affect these escape sequences.
+ For example, \p{Lu} always matches only upper case letters. This is
different from the behaviour of current versions of Perl.
- Matching characters by Unicode property is not fast, because PCRE2 has
- to do a multistage table lookup in order to find a character's prop-
+ Matching characters by Unicode property is not fast, because PCRE2 has
+ to do a multistage table lookup in order to find a character's prop-
erty. That is why the traditional escape sequences such as \d and \w do
- not use Unicode properties in PCRE2 by default, though you can make
- them do so by setting the PCRE2_UCP option or by starting the pattern
+ not use Unicode properties in PCRE2 by default, though you can make
+ them do so by setting the PCRE2_UCP option or by starting the pattern
with (*UCP).
Extended grapheme clusters
- The \X escape matches any number of Unicode characters that form an
+ The \X escape matches any number of Unicode characters that form an
"extended grapheme cluster", and treats the sequence as an atomic group
- (see below). Unicode supports various kinds of composite character by
- giving each character a grapheme breaking property, and having rules
+ (see below). Unicode supports various kinds of composite character by
+ giving each character a grapheme breaking property, and having rules
that use these properties to define the boundaries of extended grapheme
- clusters. The rules are defined in Unicode Standard Annex 29, "Unicode
- Text Segmentation". Unicode 11.0.0 abandoned the use of some previous
- properties that had been used for emojis. Instead it introduced vari-
- ous emoji-specific properties. PCRE2 uses only the Extended Picto-
+ clusters. The rules are defined in Unicode Standard Annex 29, "Unicode
+ Text Segmentation". Unicode 11.0.0 abandoned the use of some previous
+ properties that had been used for emojis. Instead it introduced vari-
+ ous emoji-specific properties. PCRE2 uses only the Extended Picto-
graphic property.
- \X always matches at least one character. Then it decides whether to
+ \X always matches at least one character. Then it decides whether to
add additional characters according to the following rules for ending a
cluster:
1. End at the end of the subject string.
- 2. Do not end between CR and LF; otherwise end after any control char-
+ 2. Do not end between CR and LF; otherwise end after any control char-
acter.
- 3. Do not break Hangul (a Korean script) syllable sequences. Hangul
- characters are of five types: L, V, T, LV, and LVT. An L character may
- be followed by an L, V, LV, or LVT character; an LV or V character may
+ 3. Do not break Hangul (a Korean script) syllable sequences. Hangul
+ characters are of five types: L, V, T, LV, and LVT. An L character may
+ be followed by an L, V, LV, or LVT character; an LV or V character may
be followed by a V or T character; an LVT or T character may be follwed
only by a T character.
- 4. Do not end before extending characters or spacing marks or the
- "zero-width joiner" character. Characters with the "mark" property al-
+ 4. Do not end before extending characters or spacing marks or the
+ "zero-width joiner" character. Characters with the "mark" property al-
ways have the "extend" grapheme breaking property.
5. Do not end after prepend characters.
6. Do not break within emoji modifier sequences or emoji zwj sequences.
That is, do not break between characters with the Extended_Pictographic
- property. Extend and ZWJ characters are allowed between the charac-
+ property. Extend and ZWJ characters are allowed between the charac-
ters.
- 7. Do not break within emoji flag sequences. That is, do not break be-
- tween regional indicator (RI) characters if there are an odd number of
+ 7. Do not break within emoji flag sequences. That is, do not break be-
+ tween regional indicator (RI) characters if there are an odd number of
RI characters before the break point.
8. Otherwise, end the cluster.
PCRE2's additional properties
- As well as the standard Unicode properties described above, PCRE2 sup-
+ As well as the standard Unicode properties described above, PCRE2 sup-
ports four more that make it possible to convert traditional escape se-
- quences such as \w and \s to use Unicode properties. PCRE2 uses these
- non-standard, non-Perl properties internally when PCRE2_UCP is set.
+ quences such as \w and \s to use Unicode properties. PCRE2 uses these
+ non-standard, non-Perl properties internally when PCRE2_UCP is set.
However, they may also be used explicitly. These properties are:
Xan Any alphanumeric character
@@ -6900,69 +6997,69 @@ BACKSLASH
Xsp Any Perl space character
Xwd Any Perl "word" character
- Xan matches characters that have either the L (letter) or the N (num-
- ber) property. Xps matches the characters tab, linefeed, vertical tab,
- form feed, or carriage return, and any other character that has the Z
- (separator) property. Xsp is the same as Xps; in PCRE1 it used to ex-
- clude vertical tab, for Perl compatibility, but Perl changed. Xwd
+ Xan matches characters that have either the L (letter) or the N (num-
+ ber) property. Xps matches the characters tab, linefeed, vertical tab,
+ form feed, or carriage return, and any other character that has the Z
+ (separator) property. Xsp is the same as Xps; in PCRE1 it used to ex-
+ clude vertical tab, for Perl compatibility, but Perl changed. Xwd
matches the same characters as Xan, plus underscore.
- There is another non-standard property, Xuc, which matches any charac-
- ter that can be represented by a Universal Character Name in C++ and
- other programming languages. These are the characters $, @, ` (grave
- accent), and all characters with Unicode code points greater than or
- equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that
- most base (ASCII) characters are excluded. (Universal Character Names
- are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit.
+ There is another non-standard property, Xuc, which matches any charac-
+ ter that can be represented by a Universal Character Name in C++ and
+ other programming languages. These are the characters $, @, ` (grave
+ accent), and all characters with Unicode code points greater than or
+ equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that
+ most base (ASCII) characters are excluded. (Universal Character Names
+ are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit.
Note that the Xuc property does not match these sequences but the char-
acters that they represent.)
Resetting the match start
- In normal use, the escape sequence \K causes any previously matched
+ In normal use, the escape sequence \K causes any previously matched
characters not to be included in the final matched sequence that is re-
turned. For example, the pattern:
foo\Kbar
- matches "foobar", but reports that it has matched "bar". \K does not
+ matches "foobar", but reports that it has matched "bar". \K does not
interact with anchoring in any way. The pattern:
^foo\Kbar
- matches only when the subject begins with "foobar" (in single line
- mode), though it again reports the matched string as "bar". This fea-
- ture is similar to a lookbehind assertion (described below). However,
- in this case, the part of the subject before the real match does not
- have to be of fixed length, as lookbehind assertions do. The use of \K
- does not interfere with the setting of captured substrings. For exam-
+ matches only when the subject begins with "foobar" (in single line
+ mode), though it again reports the matched string as "bar". This fea-
+ ture is similar to a lookbehind assertion (described below). However,
+ in this case, the part of the subject before the real match does not
+ have to be of fixed length, as lookbehind assertions do. The use of \K
+ does not interfere with the setting of captured substrings. For exam-
ple, when the pattern
(foo)\Kbar
matches "foobar", the first substring is still set to "foo".
- Perl documents that the use of \K within assertions is "not well de-
- fined". In PCRE2, \K is acted upon when it occurs inside positive as-
- sertions, but is ignored in negative assertions. Note that when a pat-
- tern such as (?=ab\K) matches, the reported start of the match can be
- greater than the end of the match. Using \K in a lookbehind assertion
- at the start of a pattern can also lead to odd effects. For example,
+ Perl documents that the use of \K within assertions is "not well de-
+ fined". In PCRE2, \K is acted upon when it occurs inside positive as-
+ sertions, but is ignored in negative assertions. Note that when a pat-
+ tern such as (?=ab\K) matches, the reported start of the match can be
+ greater than the end of the match. Using \K in a lookbehind assertion
+ at the start of a pattern can also lead to odd effects. For example,
consider this pattern:
(?<=\Kfoo)bar
- If the subject is "foobar", a call to pcre2_match() with a starting
- offset of 3 succeeds and reports the matching string as "foobar", that
- is, the start of the reported match is earlier than where the match
+ If the subject is "foobar", a call to pcre2_match() with a starting
+ offset of 3 succeeds and reports the matching string as "foobar", that
+ is, the start of the reported match is earlier than where the match
started.
Simple assertions
- The final use of backslash is for certain simple assertions. An asser-
- tion specifies a condition that has to be met at a particular point in
- a match, without consuming any characters from the subject string. The
- use of groups for more complicated assertions is described below. The
+ The final use of backslash is for certain simple assertions. An asser-
+ tion specifies a condition that has to be met at a particular point in
+ a match, without consuming any characters from the subject string. The
+ use of groups for more complicated assertions is described below. The
backslashed assertions are:
\b matches at a word boundary
@@ -6973,191 +7070,191 @@ BACKSLASH
\z matches only at the end of the subject
\G matches at the first matching position in the subject
- Inside a character class, \b has a different meaning; it matches the
- backspace character. If any other of these assertions appears in a
+ Inside a character class, \b has a different meaning; it matches the
+ backspace character. If any other of these assertions appears in a
character class, an "invalid escape sequence" error is generated.
- A word boundary is a position in the subject string where the current
- character and the previous character do not both match \w or \W (i.e.
- one matches \w and the other matches \W), or the start or end of the
- string if the first or last character matches \w, respectively. When
- PCRE2 is built with Unicode support, the meanings of \w and \W can be
+ A word boundary is a position in the subject string where the current
+ character and the previous character do not both match \w or \W (i.e.
+ one matches \w and the other matches \W), or the start or end of the
+ string if the first or last character matches \w, respectively. When
+ PCRE2 is built with Unicode support, the meanings of \w and \W can be
changed by setting the PCRE2_UCP option. When this is done, it also af-
- fects \b and \B. Neither PCRE2 nor Perl has a separate "start of word"
- or "end of word" metasequence. However, whatever follows \b normally
- determines which it is. For example, the fragment \ba matches "a" at
+ fects \b and \B. Neither PCRE2 nor Perl has a separate "start of word"
+ or "end of word" metasequence. However, whatever follows \b normally
+ determines which it is. For example, the fragment \ba matches "a" at
the start of a word.
- The \A, \Z, and \z assertions differ from the traditional circumflex
+ The \A, \Z, and \z assertions differ from the traditional circumflex
and dollar (described in the next section) in that they only ever match
- at the very start and end of the subject string, whatever options are
- set. Thus, they are independent of multiline mode. These three asser-
- tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options,
- which affect only the behaviour of the circumflex and dollar metachar-
- acters. However, if the startoffset argument of pcre2_match() is non-
- zero, indicating that matching is to start at a point other than the
- beginning of the subject, \A can never match. The difference between
- \Z and \z is that \Z matches before a newline at the end of the string
+ at the very start and end of the subject string, whatever options are
+ set. Thus, they are independent of multiline mode. These three asser-
+ tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options,
+ which affect only the behaviour of the circumflex and dollar metachar-
+ acters. However, if the startoffset argument of pcre2_match() is non-
+ zero, indicating that matching is to start at a point other than the
+ beginning of the subject, \A can never match. The difference between
+ \Z and \z is that \Z matches before a newline at the end of the string
as well as at the very end, whereas \z matches only at the end.
- The \G assertion is true only when the current matching position is at
- the start point of the matching process, as specified by the startoff-
- set argument of pcre2_match(). It differs from \A when the value of
- startoffset is non-zero. By calling pcre2_match() multiple times with
- appropriate arguments, you can mimic Perl's /g option, and it is in
+ The \G assertion is true only when the current matching position is at
+ the start point of the matching process, as specified by the startoff-
+ set argument of pcre2_match(). It differs from \A when the value of
+ startoffset is non-zero. By calling pcre2_match() multiple times with
+ appropriate arguments, you can mimic Perl's /g option, and it is in
this kind of implementation where \G can be useful.
- Note, however, that PCRE2's implementation of \G, being true at the
- starting character of the matching process, is subtly different from
- Perl's, which defines it as true at the end of the previous match. In
- Perl, these can be different when the previously matched string was
+ Note, however, that PCRE2's implementation of \G, being true at the
+ starting character of the matching process, is subtly different from
+ Perl's, which defines it as true at the end of the previous match. In
+ Perl, these can be different when the previously matched string was
empty. Because PCRE2 does just one match at a time, it cannot reproduce
this behaviour.
- If all the alternatives of a pattern begin with \G, the expression is
+ If all the alternatives of a pattern begin with \G, the expression is
anchored to the starting match position, and the "anchored" flag is set
in the compiled regular expression.
CIRCUMFLEX AND DOLLAR
- The circumflex and dollar metacharacters are zero-width assertions.
- That is, they test for a particular condition being true without con-
+ The circumflex and dollar metacharacters are zero-width assertions.
+ That is, they test for a particular condition being true without con-
suming any characters from the subject string. These two metacharacters
- are concerned with matching the starts and ends of lines. If the new-
- line convention is set so that only the two-character sequence CRLF is
- recognized as a newline, isolated CR and LF characters are treated as
+ are concerned with matching the starts and ends of lines. If the new-
+ line convention is set so that only the two-character sequence CRLF is
+ recognized as a newline, isolated CR and LF characters are treated as
ordinary data characters, and are not recognized as newlines.
Outside a character class, in the default matching mode, the circumflex
- character is an assertion that is true only if the current matching
- point is at the start of the subject string. If the startoffset argu-
- ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum-
- flex can never match if the PCRE2_MULTILINE option is unset. Inside a
- character class, circumflex has an entirely different meaning (see be-
+ character is an assertion that is true only if the current matching
+ point is at the start of the subject string. If the startoffset argu-
+ ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum-
+ flex can never match if the PCRE2_MULTILINE option is unset. Inside a
+ character class, circumflex has an entirely different meaning (see be-
low).
- Circumflex need not be the first character of the pattern if a number
- of alternatives are involved, but it should be the first thing in each
- alternative in which it appears if the pattern is ever to match that
- branch. If all possible alternatives start with a circumflex, that is,
- if the pattern is constrained to match only at the start of the sub-
- ject, it is said to be an "anchored" pattern. (There are also other
+ Circumflex need not be the first character of the pattern if a number
+ of alternatives are involved, but it should be the first thing in each
+ alternative in which it appears if the pattern is ever to match that
+ branch. If all possible alternatives start with a circumflex, that is,
+ if the pattern is constrained to match only at the start of the sub-
+ ject, it is said to be an "anchored" pattern. (There are also other
constructs that can cause a pattern to be anchored.)
- The dollar character is an assertion that is true only if the current
- matching point is at the end of the subject string, or immediately be-
- fore a newline at the end of the string (by default), unless PCRE2_NO-
- TEOL is set. Note, however, that it does not actually match the new-
- line. Dollar need not be the last character of the pattern if a number
- of alternatives are involved, but it should be the last item in any
- branch in which it appears. Dollar has no special meaning in a charac-
+ The dollar character is an assertion that is true only if the current
+ matching point is at the end of the subject string, or immediately be-
+ fore a newline at the end of the string (by default), unless PCRE2_NO-
+ TEOL is set. Note, however, that it does not actually match the new-
+ line. Dollar need not be the last character of the pattern if a number
+ of alternatives are involved, but it should be the last item in any
+ branch in which it appears. Dollar has no special meaning in a charac-
ter class.
- The meaning of dollar can be changed so that it matches only at the
- very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at
+ The meaning of dollar can be changed so that it matches only at the
+ very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at
compile time. This does not affect the \Z assertion.
The meanings of the circumflex and dollar metacharacters are changed if
- the PCRE2_MULTILINE option is set. When this is the case, a dollar
- character matches before any newlines in the string, as well as at the
- very end, and a circumflex matches immediately after internal newlines
- as well as at the start of the subject string. It does not match after
- a newline that ends the string, for compatibility with Perl. However,
+ the PCRE2_MULTILINE option is set. When this is the case, a dollar
+ character matches before any newlines in the string, as well as at the
+ very end, and a circumflex matches immediately after internal newlines
+ as well as at the start of the subject string. It does not match after
+ a newline that ends the string, for compatibility with Perl. However,
this can be changed by setting the PCRE2_ALT_CIRCUMFLEX option.
- For example, the pattern /^abc$/ matches the subject string "def\nabc"
- (where \n represents a newline) in multiline mode, but not otherwise.
- Consequently, patterns that are anchored in single line mode because
- all branches start with ^ are not anchored in multiline mode, and a
- match for circumflex is possible when the startoffset argument of
- pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored
+ For example, the pattern /^abc$/ matches the subject string "def\nabc"
+ (where \n represents a newline) in multiline mode, but not otherwise.
+ Consequently, patterns that are anchored in single line mode because
+ all branches start with ^ are not anchored in multiline mode, and a
+ match for circumflex is possible when the startoffset argument of
+ pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored
if PCRE2_MULTILINE is set.
- When the newline convention (see "Newline conventions" below) recog-
- nizes the two-character sequence CRLF as a newline, this is preferred,
- even if the single characters CR and LF are also recognized as new-
- lines. For example, if the newline convention is "any", a multiline
- mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather
- than after CR, even though CR on its own is a valid newline. (It also
+ When the newline convention (see "Newline conventions" below) recog-
+ nizes the two-character sequence CRLF as a newline, this is preferred,
+ even if the single characters CR and LF are also recognized as new-
+ lines. For example, if the newline convention is "any", a multiline
+ mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather
+ than after CR, even though CR on its own is a valid newline. (It also
matches at the very start of the string, of course.)
- Note that the sequences \A, \Z, and \z can be used to match the start
- and end of the subject in both modes, and if all branches of a pattern
- start with \A it is always anchored, whether or not PCRE2_MULTILINE is
+ Note that the sequences \A, \Z, and \z can be used to match the start
+ and end of the subject in both modes, and if all branches of a pattern
+ start with \A it is always anchored, whether or not PCRE2_MULTILINE is
set.
FULL STOP (PERIOD, DOT) AND \N
Outside a character class, a dot in the pattern matches any one charac-
- ter in the subject string except (by default) a character that signi-
+ ter in the subject string except (by default) a character that signi-
fies the end of a line.
- When a line ending is defined as a single character, dot never matches
- that character; when the two-character sequence CRLF is used, dot does
- not match CR if it is immediately followed by LF, but otherwise it
- matches all characters (including isolated CRs and LFs). When any Uni-
- code line endings are being recognized, dot does not match CR or LF or
+ When a line ending is defined as a single character, dot never matches
+ that character; when the two-character sequence CRLF is used, dot does
+ not match CR if it is immediately followed by LF, but otherwise it
+ matches all characters (including isolated CRs and LFs). When any Uni-
+ code line endings are being recognized, dot does not match CR or LF or
any of the other line ending characters.
- The behaviour of dot with regard to newlines can be changed. If the
- PCRE2_DOTALL option is set, a dot matches any one character, without
- exception. If the two-character sequence CRLF is present in the sub-
+ The behaviour of dot with regard to newlines can be changed. If the
+ PCRE2_DOTALL option is set, a dot matches any one character, without
+ exception. If the two-character sequence CRLF is present in the sub-
ject string, it takes two dots to match it.
- The handling of dot is entirely independent of the handling of circum-
- flex and dollar, the only relationship being that they both involve
+ The handling of dot is entirely independent of the handling of circum-
+ flex and dollar, the only relationship being that they both involve
newlines. Dot has no special meaning in a character class.
- The escape sequence \N when not followed by an opening brace behaves
- like a dot, except that it is not affected by the PCRE2_DOTALL option.
- In other words, it matches any character except one that signifies the
+ The escape sequence \N when not followed by an opening brace behaves
+ like a dot, except that it is not affected by the PCRE2_DOTALL option.
+ In other words, it matches any character except one that signifies the
end of a line.
When \N is followed by an opening brace it has a different meaning. See
- the section entitled "Non-printing characters" above for details. Perl
- also uses \N{name} to specify characters by Unicode name; PCRE2 does
+ the section entitled "Non-printing characters" above for details. Perl
+ also uses \N{name} to specify characters by Unicode name; PCRE2 does
not support this.
MATCHING A SINGLE CODE UNIT
- Outside a character class, the escape sequence \C matches any one code
- unit, whether or not a UTF mode is set. In the 8-bit library, one code
- unit is one byte; in the 16-bit library it is a 16-bit unit; in the
- 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches
- line-ending characters. The feature is provided in Perl in order to
+ Outside a character class, the escape sequence \C matches any one code
+ unit, whether or not a UTF mode is set. In the 8-bit library, one code
+ unit is one byte; in the 16-bit library it is a 16-bit unit; in the
+ 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches
+ line-ending characters. The feature is provided in Perl in order to
match individual bytes in UTF-8 mode, but it is unclear how it can use-
fully be used.
- Because \C breaks up characters into individual code units, matching
- one unit with \C in UTF-8 or UTF-16 mode means that the rest of the
+ Because \C breaks up characters into individual code units, matching
+ one unit with \C in UTF-8 or UTF-16 mode means that the rest of the
string may start with a malformed UTF character. This has undefined re-
sults, because PCRE2 assumes that it is matching character by character
in a valid UTF string (by default it checks the subject string's valid-
- ity at the start of processing unless the PCRE2_NO_UTF_CHECK or
+ ity at the start of processing unless the PCRE2_NO_UTF_CHECK or
PCRE2_MATCH_INVALID_UTF option is used).
- An application can lock out the use of \C by setting the
- PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also
+ An application can lock out the use of \C by setting the
+ PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also
possible to build PCRE2 with the use of \C permanently disabled.
- PCRE2 does not allow \C to appear in lookbehind assertions (described
- below) in UTF-8 or UTF-16 modes, because this would make it impossible
- to calculate the length of the lookbehind. Neither the alternative
+ PCRE2 does not allow \C to appear in lookbehind assertions (described
+ below) in UTF-8 or UTF-16 modes, because this would make it impossible
+ to calculate the length of the lookbehind. Neither the alternative
matching function pcre2_dfa_match() nor the JIT optimizer support \C in
these UTF modes. The former gives a match-time error; the latter fails
to optimize and so the match is always run using the interpreter.
- In the 32-bit library, however, \C is always supported (when not ex-
- plicitly locked out) because it always matches a single code unit,
+ In the 32-bit library, however, \C is always supported (when not ex-
+ plicitly locked out) because it always matches a single code unit,
whether or not UTF-32 is specified.
In general, the \C escape sequence is best avoided. However, one way of
- using it that avoids the problem of malformed UTF-8 or UTF-16 charac-
- ters is to use a lookahead to check the length of the next character,
- as in this pattern, which could be used with a UTF-8 string (ignore
+ using it that avoids the problem of malformed UTF-8 or UTF-16 charac-
+ ters is to use a lookahead to check the length of the next character,
+ as in this pattern, which could be used with a UTF-8 string (ignore
white space and line breaks):
(?| (?=[\x00-\x7f])(\C) |
@@ -7165,11 +7262,11 @@ MATCHING A SINGLE CODE UNIT
(?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
(?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))
- In this example, a group that starts with (?| resets the capturing
- parentheses numbers in each alternative (see "Duplicate Group Numbers"
+ In this example, a group that starts with (?| resets the capturing
+ parentheses numbers in each alternative (see "Duplicate Group Numbers"
below). The assertions at the start of each branch check the next UTF-8
- character for values whose encoding uses 1, 2, 3, or 4 bytes, respec-
- tively. The character's individual bytes are then captured by the ap-
+ character for values whose encoding uses 1, 2, 3, or 4 bytes, respec-
+ tively. The character's individual bytes are then captured by the ap-
propriate number of \C groups.
@@ -7177,122 +7274,122 @@ SQUARE BRACKETS AND CHARACTER CLASSES
An opening square bracket introduces a character class, terminated by a
closing square bracket. A closing square bracket on its own is not spe-
- cial by default. If a closing square bracket is required as a member
+ cial by default. If a closing square bracket is required as a member
of the class, it should be the first data character in the class (after
- an initial circumflex, if present) or escaped with a backslash. This
- means that, by default, an empty class cannot be defined. However, if
- the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at
+ an initial circumflex, if present) or escaped with a backslash. This
+ means that, by default, an empty class cannot be defined. However, if
+ the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at
the start does end the (empty) class.
- A character class matches a single character in the subject. A matched
+ A character class matches a single character in the subject. A matched
character must be in the set of characters defined by the class, unless
- the first character in the class definition is a circumflex, in which
+ the first character in the class definition is a circumflex, in which
case the subject character must not be in the set defined by the class.
- If a circumflex is actually required as a member of the class, ensure
+ If a circumflex is actually required as a member of the class, ensure
it is not the first character, or escape it with a backslash.
- For example, the character class [aeiou] matches any lower case vowel,
- while [^aeiou] matches any character that is not a lower case vowel.
+ For example, the character class [aeiou] matches any lower case vowel,
+ while [^aeiou] matches any character that is not a lower case vowel.
Note that a circumflex is just a convenient notation for specifying the
- characters that are in the class by enumerating those that are not. A
- class that starts with a circumflex is not an assertion; it still con-
- sumes a character from the subject string, and therefore it fails if
+ characters that are in the class by enumerating those that are not. A
+ class that starts with a circumflex is not an assertion; it still con-
+ sumes a character from the subject string, and therefore it fails if
the current pointer is at the end of the string.
- Characters in a class may be specified by their code points using \o,
- \x, or \N{U+hh..} in the usual way. When caseless matching is set, any
- letters in a class represent both their upper case and lower case ver-
- sions, so for example, a caseless [aeiou] matches "A" as well as "a",
- and a caseless [^aeiou] does not match "A", whereas a caseful version
+ Characters in a class may be specified by their code points using \o,
+ \x, or \N{U+hh..} in the usual way. When caseless matching is set, any
+ letters in a class represent both their upper case and lower case ver-
+ sions, so for example, a caseless [aeiou] matches "A" as well as "a",
+ and a caseless [^aeiou] does not match "A", whereas a caseful version
would.
- Characters that might indicate line breaks are never treated in any
- special way when matching character classes, whatever line-ending se-
- quence is in use, and whatever setting of the PCRE2_DOTALL and
- PCRE2_MULTILINE options is used. A class such as [^a] always matches
+ Characters that might indicate line breaks are never treated in any
+ special way when matching character classes, whatever line-ending se-
+ quence is in use, and whatever setting of the PCRE2_DOTALL and
+ PCRE2_MULTILINE options is used. A class such as [^a] always matches
one of these characters.
The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s,
- \S, \v, \V, \w, and \W may appear in a character class, and add the
- characters that they match to the class. For example, [\dABCDEF]
- matches any hexadecimal digit. In UTF modes, the PCRE2_UCP option af-
+ \S, \v, \V, \w, and \W may appear in a character class, and add the
+ characters that they match to the class. For example, [\dABCDEF]
+ matches any hexadecimal digit. In UTF modes, the PCRE2_UCP option af-
fects the meanings of \d, \s, \w and their upper case partners, just as
it does when they appear outside a character class, as described in the
- section entitled "Generic character types" above. The escape sequence
- \b has a different meaning inside a character class; it matches the
- backspace character. The sequences \B, \R, and \X are not special in-
- side a character class. Like any other unrecognized escape sequences,
- they cause an error. The same is true for \N when not followed by an
+ section entitled "Generic character types" above. The escape sequence
+ \b has a different meaning inside a character class; it matches the
+ backspace character. The sequences \B, \R, and \X are not special in-
+ side a character class. Like any other unrecognized escape sequences,
+ they cause an error. The same is true for \N when not followed by an
opening brace.
- The minus (hyphen) character can be used to specify a range of charac-
- ters in a character class. For example, [d-m] matches any letter be-
- tween d and m, inclusive. If a minus character is required in a class,
- it must be escaped with a backslash or appear in a position where it
- cannot be interpreted as indicating a range, typically as the first or
+ The minus (hyphen) character can be used to specify a range of charac-
+ ters in a character class. For example, [d-m] matches any letter be-
+ tween d and m, inclusive. If a minus character is required in a class,
+ it must be escaped with a backslash or appear in a position where it
+ cannot be interpreted as indicating a range, typically as the first or
last character in the class, or immediately after a range. For example,
[b-d-z] matches letters in the range b to d, a hyphen character, or z.
Perl treats a hyphen as a literal if it appears before or after a POSIX
class (see below) or before or after a character type escape such as as
- \d or \H. However, unless the hyphen is the last character in the
- class, Perl outputs a warning in its warning mode, as this is most
- likely a user error. As PCRE2 has no facility for warning, an error is
+ \d or \H. However, unless the hyphen is the last character in the
+ class, Perl outputs a warning in its warning mode, as this is most
+ likely a user error. As PCRE2 has no facility for warning, an error is
given in these cases.
It is not possible to have the literal character "]" as the end charac-
- ter of a range. A pattern such as [W-]46] is interpreted as a class of
- two characters ("W" and "-") followed by a literal string "46]", so it
- would match "W46]" or "-46]". However, if the "]" is escaped with a
- backslash it is interpreted as the end of range, so [W-\]46] is inter-
- preted as a class containing a range followed by two other characters.
- The octal or hexadecimal representation of "]" can also be used to end
+ ter of a range. A pattern such as [W-]46] is interpreted as a class of
+ two characters ("W" and "-") followed by a literal string "46]", so it
+ would match "W46]" or "-46]". However, if the "]" is escaped with a
+ backslash it is interpreted as the end of range, so [W-\]46] is inter-
+ preted as a class containing a range followed by two other characters.
+ The octal or hexadecimal representation of "]" can also be used to end
a range.
Ranges normally include all code points between the start and end char-
- acters, inclusive. They can also be used for code points specified nu-
- merically, for example [\000-\037]. Ranges can include any characters
- that are valid for the current mode. In any UTF mode, the so-called
- "surrogate" characters (those whose code points lie between 0xd800 and
- 0xdfff inclusive) may not be specified explicitly by default (the
- PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How-
+ acters, inclusive. They can also be used for code points specified nu-
+ merically, for example [\000-\037]. Ranges can include any characters
+ that are valid for the current mode. In any UTF mode, the so-called
+ "surrogate" characters (those whose code points lie between 0xd800 and
+ 0xdfff inclusive) may not be specified explicitly by default (the
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How-
ever, ranges such as [\x{d7ff}-\x{e000}], which include the surrogates,
are always permitted.
- There is a special case in EBCDIC environments for ranges whose end
+ There is a special case in EBCDIC environments for ranges whose end
points are both specified as literal letters in the same case. For com-
- patibility with Perl, EBCDIC code points within the range that are not
- letters are omitted. For example, [h-k] matches only four characters,
+ patibility with Perl, EBCDIC code points within the range that are not
+ letters are omitted. For example, [h-k] matches only four characters,
even though the codes for h and k are 0x88 and 0x92, a range of 11 code
- points. However, if the range is specified numerically, for example,
+ points. However, if the range is specified numerically, for example,
[\x88-\x92] or [h-\x92], all code points are included.
If a range that includes letters is used when caseless matching is set,
it matches the letters in either case. For example, [W-c] is equivalent
- to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if
- character tables for a French locale are in use, [\xc8-\xcb] matches
+ to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if
+ character tables for a French locale are in use, [\xc8-\xcb] matches
accented E characters in both cases.
- A circumflex can conveniently be used with the upper case character
- types to specify a more restricted set of characters than the matching
- lower case type. For example, the class [^\W_] matches any letter or
+ A circumflex can conveniently be used with the upper case character
+ types to specify a more restricted set of characters than the matching
+ lower case type. For example, the class [^\W_] matches any letter or
digit, but not underscore, whereas [\w] includes underscore. A positive
character class should be read as "something OR something OR ..." and a
negative class as "NOT something AND NOT something AND NOT ...".
- The only metacharacters that are recognized in character classes are
- backslash, hyphen (only where it can be interpreted as specifying a
- range), circumflex (only at the start), opening square bracket (only
- when it can be interpreted as introducing a POSIX class name, or for a
- special compatibility feature - see the next two sections), and the
- terminating closing square bracket. However, escaping other non-al-
+ The only metacharacters that are recognized in character classes are
+ backslash, hyphen (only where it can be interpreted as specifying a
+ range), circumflex (only at the start), opening square bracket (only
+ when it can be interpreted as introducing a POSIX class name, or for a
+ special compatibility feature - see the next two sections), and the
+ terminating closing square bracket. However, escaping other non-al-
phanumeric characters does no harm.
POSIX CHARACTER CLASSES
Perl supports the POSIX notation for character classes. This uses names
- enclosed by [: and :] within the enclosing square brackets. PCRE2 also
+ enclosed by [: and :] within the enclosing square brackets. PCRE2 also
supports this notation. For example,
[01[:alpha:]%]
@@ -7315,13 +7412,13 @@ POSIX CHARACTER CLASSES
word "word" characters (same as \w)
xdigit hexadecimal digits
- The default "space" characters are HT (9), LF (10), VT (11), FF (12),
- CR (13), and space (32). If locale-specific matching is taking place,
- the list of space characters may be different; there may be fewer or
+ The default "space" characters are HT (9), LF (10), VT (11), FF (12),
+ CR (13), and space (32). If locale-specific matching is taking place,
+ the list of space characters may be different; there may be fewer or
more of them. "Space" and \s match the same set of characters.
- The name "word" is a Perl extension, and "blank" is a GNU extension
- from Perl 5.8. Another Perl extension is negation, which is indicated
+ The name "word" is a Perl extension, and "blank" is a GNU extension
+ from Perl 5.8. Another Perl extension is negation, which is indicated
by a ^ character after the colon. For example,
[12[:^digit:]]
@@ -7332,10 +7429,10 @@ POSIX CHARACTER CLASSES
By default, characters with values greater than 127 do not match any of
the POSIX character classes, although this may be different for charac-
- ters in the range 128-255 when locale-specific matching is happening.
- However, if the PCRE2_UCP option is passed to pcre2_compile(), some of
- the classes are changed so that Unicode character properties are used.
- This is achieved by replacing certain POSIX classes with other se-
+ ters in the range 128-255 when locale-specific matching is happening.
+ However, if the PCRE2_UCP option is passed to pcre2_compile(), some of
+ the classes are changed so that Unicode character properties are used.
+ This is achieved by replacing certain POSIX classes with other se-
quences, as follows:
[:alnum:] becomes \p{Xan}
@@ -7348,10 +7445,10 @@ POSIX CHARACTER CLASSES
[:upper:] becomes \p{Lu}
[:word:] becomes \p{Xwd}
- Negated versions, such as [:^alpha:] use \P instead of \p. Three other
+ Negated versions, such as [:^alpha:] use \P instead of \p. Three other
POSIX classes are handled specially in UCP mode:
- [:graph:] This matches characters that have glyphs that mark the page
+ [:graph:] This matches characters that have glyphs that mark the page
when printed. In Unicode property terms, it matches all char-
acters with the L, M, N, P, S, or Cf properties, except for:
@@ -7360,60 +7457,60 @@ POSIX CHARACTER CLASSES
U+2066 - U+2069 Various "isolate"s
- [:print:] This matches the same characters as [:graph:] plus space
- characters that are not controls, that is, characters with
+ [:print:] This matches the same characters as [:graph:] plus space
+ characters that are not controls, that is, characters with
the Zs property.
[:punct:] This matches all characters that have the Unicode P (punctua-
- tion) property, plus those characters with code points less
+ tion) property, plus those characters with code points less
than 256 that have the S (Symbol) property.
- The other POSIX classes are unchanged, and match only characters with
+ The other POSIX classes are unchanged, and match only characters with
code points less than 256.
COMPATIBILITY FEATURE FOR WORD BOUNDARIES
- In the POSIX.2 compliant library that was included in 4.4BSD Unix, the
- ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word"
+ In the POSIX.2 compliant library that was included in 4.4BSD Unix, the
+ ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word"
and "end of word". PCRE2 treats these items as follows:
[[:<:]] is converted to \b(?=\w)
[[:>:]] is converted to \b(?<=\w)
Only these exact character sequences are recognized. A sequence such as
- [a[:<:]b] provokes error for an unrecognized POSIX class name. This
- support is not compatible with Perl. It is provided to help migrations
+ [a[:<:]b] provokes error for an unrecognized POSIX class name. This
+ support is not compatible with Perl. It is provided to help migrations
from other environments, and is best not used in any new patterns. Note
- that \b matches at the start and the end of a word (see "Simple asser-
- tions" above), and in a Perl-style pattern the preceding or following
- character normally shows which is wanted, without the need for the as-
- sertions that are used above in order to give exactly the POSIX behav-
+ that \b matches at the start and the end of a word (see "Simple asser-
+ tions" above), and in a Perl-style pattern the preceding or following
+ character normally shows which is wanted, without the need for the as-
+ sertions that are used above in order to give exactly the POSIX behav-
iour.
VERTICAL BAR
- Vertical bar characters are used to separate alternative patterns. For
+ Vertical bar characters are used to separate alternative patterns. For
example, the pattern
gilbert|sullivan
- matches either "gilbert" or "sullivan". Any number of alternatives may
- appear, and an empty alternative is permitted (matching the empty
+ matches either "gilbert" or "sullivan". Any number of alternatives may
+ appear, and an empty alternative is permitted (matching the empty
string). The matching process tries each alternative in turn, from left
- to right, and the first one that succeeds is used. If the alternatives
- are within a group (defined below), "succeeds" means matching the rest
+ to right, and the first one that succeeds is used. If the alternatives
+ are within a group (defined below), "succeeds" means matching the rest
of the main pattern as well as the alternative in the group.
INTERNAL OPTION SETTING
- The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
- PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options
- can be changed from within the pattern by a sequence of letters en-
- closed between "(?" and ")". These options are Perl-compatible, and
- are described in detail in the pcre2api documentation. The option let-
+ The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
+ PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options
+ can be changed from within the pattern by a sequence of letters en-
+ closed between "(?" and ")". These options are Perl-compatible, and
+ are described in detail in the pcre2api documentation. The option let-
ters are:
i for PCRE2_CASELESS
@@ -7425,48 +7522,48 @@ INTERNAL OPTION SETTING
For example, (?im) sets caseless, multiline matching. It is also possi-
ble to unset these options by preceding the relevant letters with a hy-
- phen, for example (?-im). The two "extended" options are not indepen-
+ phen, for example (?-im). The two "extended" options are not indepen-
dent; unsetting either one cancels the effects of both of them.
- A combined setting and unsetting such as (?im-sx), which sets
- PCRE2_CASELESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and
- PCRE2_EXTENDED, is also permitted. Only one hyphen may appear in the
- options string. If a letter appears both before and after the hyphen,
- the option is unset. An empty options setting "(?)" is allowed. Need-
+ A combined setting and unsetting such as (?im-sx), which sets
+ PCRE2_CASELESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and
+ PCRE2_EXTENDED, is also permitted. Only one hyphen may appear in the
+ options string. If a letter appears both before and after the hyphen,
+ the option is unset. An empty options setting "(?)" is allowed. Need-
less to say, it has no effect.
- If the first character following (? is a circumflex, it causes all of
- the above options to be unset. Thus, (?^) is equivalent to (?-imnsx).
- Letters may follow the circumflex to cause some options to be re-in-
+ If the first character following (? is a circumflex, it causes all of
+ the above options to be unset. Thus, (?^) is equivalent to (?-imnsx).
+ Letters may follow the circumflex to cause some options to be re-in-
stated, but a hyphen may not appear.
- The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be
- changed in the same way as the Perl-compatible options by using the
+ The PCRE2-specific options PCRE2_DUPNAMES and PCRE2_UNGREEDY can be
+ changed in the same way as the Perl-compatible options by using the
characters J and U respectively. However, these are not unset by (?^).
- When one of these option changes occurs at top level (that is, not in-
- side group parentheses), the change applies to the remainder of the
- pattern that follows. An option change within a group (see below for a
+ When one of these option changes occurs at top level (that is, not in-
+ side group parentheses), the change applies to the remainder of the
+ pattern that follows. An option change within a group (see below for a
description of groups) affects only that part of the group that follows
it, so
(a(?i)b)c
- matches abc and aBc and no other strings (assuming PCRE2_CASELESS is
- not used). By this means, options can be made to have different set-
+ matches abc and aBc and no other strings (assuming PCRE2_CASELESS is
+ not used). By this means, options can be made to have different set-
tings in different parts of the pattern. Any changes made in one alter-
- native do carry on into subsequent branches within the same group. For
+ native do carry on into subsequent branches within the same group. For
example,
(a(?i)b|c)
- matches "ab", "aB", "c", and "C", even though when matching "C" the
- first branch is abandoned before the option setting. This is because
- the effects of option settings happen at compile time. There would be
+ matches "ab", "aB", "c", and "C", even though when matching "C" the
+ first branch is abandoned before the option setting. This is because
+ the effects of option settings happen at compile time. There would be
some very weird behaviour otherwise.
- As a convenient shorthand, if any option settings are required at the
- start of a non-capturing group (see the next section), the option let-
+ As a convenient shorthand, if any option settings are required at the
+ start of a non-capturing group (see the next section), the option let-
ters may appear between the "?" and the ":". Thus the two patterns
(?i:saturday|sunday)
@@ -7474,39 +7571,39 @@ INTERNAL OPTION SETTING
match exactly the same set of strings.
- Note: There are other PCRE2-specific options, applying to the whole
- pattern, which can be set by the application when the compiling func-
- tion is called. In addition, the pattern can contain special leading
- sequences such as (*CRLF) to override what the application has set or
- what has been defaulted. Details are given in the section entitled
+ Note: There are other PCRE2-specific options, applying to the whole
+ pattern, which can be set by the application when the compiling func-
+ tion is called. In addition, the pattern can contain special leading
+ sequences such as (*CRLF) to override what the application has set or
+ what has been defaulted. Details are given in the section entitled
"Newline sequences" above. There are also the (*UTF) and (*UCP) leading
- sequences that can be used to set UTF and Unicode property modes; they
- are equivalent to setting the PCRE2_UTF and PCRE2_UCP options, respec-
- tively. However, the application can set the PCRE2_NEVER_UTF and
- PCRE2_NEVER_UCP options, which lock out the use of the (*UTF) and
+ sequences that can be used to set UTF and Unicode property modes; they
+ are equivalent to setting the PCRE2_UTF and PCRE2_UCP options, respec-
+ tively. However, the application can set the PCRE2_NEVER_UTF and
+ PCRE2_NEVER_UCP options, which lock out the use of the (*UTF) and
(*UCP) sequences.
GROUPS
- Groups are delimited by parentheses (round brackets), which can be
+ Groups are delimited by parentheses (round brackets), which can be
nested. Turning part of a pattern into a group does two things:
1. It localizes a set of alternatives. For example, the pattern
cat(aract|erpillar|)
- matches "cataract", "caterpillar", or "cat". Without the parentheses,
+ matches "cataract", "caterpillar", or "cat". Without the parentheses,
it would match "cataract", "erpillar" or an empty string.
- 2. It creates a "capture group". This means that, when the whole pat-
- tern matches, the portion of the subject string that matched the group
- is passed back to the caller, separately from the portion that matched
- the whole pattern. (This applies only to the traditional matching
+ 2. It creates a "capture group". This means that, when the whole pat-
+ tern matches, the portion of the subject string that matched the group
+ is passed back to the caller, separately from the portion that matched
+ the whole pattern. (This applies only to the traditional matching
function; the DFA matching function does not support capturing.)
Opening parentheses are counted from left to right (starting from 1) to
- obtain numbers for capture groups. For example, if the string "the red
+ obtain numbers for capture groups. For example, if the string "the red
king" is matched against the pattern
the ((red|white) (king|queen))
@@ -7514,11 +7611,11 @@ GROUPS
the captured substrings are "red king", "red", and "king", and are num-
bered 1, 2, and 3, respectively.
- The fact that plain parentheses fulfil two functions is not always
- helpful. There are often times when grouping is required without cap-
- turing. If an opening parenthesis is followed by a question mark and a
- colon, the group does not do any capturing, and is not counted when
- computing the number of any subsequent capture groups. For example, if
+ The fact that plain parentheses fulfil two functions is not always
+ helpful. There are often times when grouping is required without cap-
+ turing. If an opening parenthesis is followed by a question mark and a
+ colon, the group does not do any capturing, and is not counted when
+ computing the number of any subsequent capture groups. For example, if
the string "the white queen" is matched against the pattern
the ((?:red|white) (king|queen))
@@ -7526,16 +7623,16 @@ GROUPS
the captured substrings are "white queen" and "queen", and are numbered
1 and 2. The maximum number of capture groups is 65535.
- As a convenient shorthand, if any option settings are required at the
- start of a non-capturing group, the option letters may appear between
+ As a convenient shorthand, if any option settings are required at the
+ start of a non-capturing group, the option letters may appear between
the "?" and the ":". Thus the two patterns
(?i:saturday|sunday)
(?:(?i)saturday|sunday)
match exactly the same set of strings. Because alternative branches are
- tried from left to right, and options are not reset until the end of
- the group is reached, an option setting in one branch does affect sub-
+ tried from left to right, and options are not reset until the end of
+ the group is reached, an option setting in one branch does affect sub-
sequent branches, so the above patterns match "SUNDAY" as well as "Sat-
urday".
@@ -7543,19 +7640,19 @@ GROUPS
DUPLICATE GROUP NUMBERS
Perl 5.10 introduced a feature whereby each alternative in a group uses
- the same numbers for its capturing parentheses. Such a group starts
- with (?| and is itself a non-capturing group. For example, consider
+ the same numbers for its capturing parentheses. Such a group starts
+ with (?| and is itself a non-capturing group. For example, consider
this pattern:
(?|(Sat)ur|(Sun))day
- Because the two alternatives are inside a (?| group, both sets of cap-
- turing parentheses are numbered one. Thus, when the pattern matches,
- you can look at captured substring number one, whichever alternative
- matched. This construct is useful when you want to capture part, but
+ Because the two alternatives are inside a (?| group, both sets of cap-
+ turing parentheses are numbered one. Thus, when the pattern matches,
+ you can look at captured substring number one, whichever alternative
+ matched. This construct is useful when you want to capture part, but
not all, of one of a number of alternatives. Inside a (?| group, paren-
- theses are numbered as usual, but the number is reset at the start of
- each branch. The numbers of any capturing parentheses that follow the
+ theses are numbered as usual, but the number is reset at the start of
+ each branch. The numbers of any capturing parentheses that follow the
whole group start after the highest number used in any branch. The fol-
lowing example is taken from the Perl documentation. The numbers under-
neath show in which buffer the captured content will be stored.
@@ -7564,13 +7661,13 @@ DUPLICATE GROUP NUMBERS
/ ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
# 1 2 2 3 2 3 4
- A backreference to a capture group uses the most recent value that is
+ A backreference to a capture group uses the most recent value that is
set for the group. The following pattern matches "abcabc" or "defdef":
/(?|(abc)|(def))\1/
- In contrast, a subroutine call to a capture group always refers to the
- first one in the pattern with the given number. The following pattern
+ In contrast, a subroutine call to a capture group always refers to the
+ first one in the pattern with the given number. The following pattern
matches "abcabc" or "defabc":
/(?|(abc)|(def))(?1)/
@@ -7581,24 +7678,24 @@ DUPLICATE GROUP NUMBERS
If a condition test for a group's having matched refers to a non-unique
number, the test is true if any group with that number has matched.
- An alternative approach to using this "branch reset" feature is to use
+ An alternative approach to using this "branch reset" feature is to use
duplicate named groups, as described in the next section.
NAMED CAPTURE GROUPS
Identifying capture groups by number is simple, but it can be very hard
- to keep track of the numbers in complicated patterns. Furthermore, if
- an expression is modified, the numbers may change. To help with this
- difficulty, PCRE2 supports the naming of capture groups. This feature
- was not added to Perl until release 5.10. Python had the feature ear-
- lier, and PCRE1 introduced it at release 4.0, using the Python syntax.
+ to keep track of the numbers in complicated patterns. Furthermore, if
+ an expression is modified, the numbers may change. To help with this
+ difficulty, PCRE2 supports the naming of capture groups. This feature
+ was not added to Perl until release 5.10. Python had the feature ear-
+ lier, and PCRE1 introduced it at release 4.0, using the Python syntax.
PCRE2 supports both the Perl and the Python syntax.
- In PCRE2, a capture group can be named in one of three ways:
+ In PCRE2, a capture group can be named in one of three ways:
(?<name>...) or (?'name'...) as in Perl, or (?P<name>...) as in Python.
- Names may be up to 32 code units long. When PCRE2_UTF is not set, they
- may contain only ASCII alphanumeric characters and underscores, but
+ Names may be up to 32 code units long. When PCRE2_UTF is not set, they
+ may contain only ASCII alphanumeric characters and underscores, but
must start with a non-digit. When PCRE2_UTF is set, the syntax of group
names is extended to allow any Unicode letter or Unicode decimal digit.
In other words, group names must match one of these patterns:
@@ -7606,74 +7703,77 @@ NAMED CAPTURE GROUPS
^[_A-Za-z][_A-Za-z0-9]*\z when PCRE2_UTF is not set
^[_\p{L}][_\p{L}\p{Nd}]*\z when PCRE2_UTF is set
- References to capture groups from other parts of the pattern, such as
- backreferences, recursion, and conditions, can all be made by name as
+ References to capture groups from other parts of the pattern, such as
+ backreferences, recursion, and conditions, can all be made by name as
well as by number.
Named capture groups are allocated numbers as well as names, exactly as
- if the names were not present. In both PCRE2 and Perl, capture groups
- are primarily identified by numbers; any names are just aliases for
+ if the names were not present. In both PCRE2 and Perl, capture groups
+ are primarily identified by numbers; any names are just aliases for
these numbers. The PCRE2 API provides function calls for extracting the
- complete name-to-number translation table from a compiled pattern, as
- well as convenience functions for extracting captured substrings by
+ complete name-to-number translation table from a compiled pattern, as
+ well as convenience functions for extracting captured substrings by
name.
- Warning: When more than one capture group has the same number, as de-
+ Warning: When more than one capture group has the same number, as de-
scribed in the previous section, a name given to one of them applies to
- all of them. Perl allows identically numbered groups to have different
+ all of them. Perl allows identically numbered groups to have different
names. Consider this pattern, where there are two capture groups, both
numbered 1:
(?|(?<AA>aa)|(?<BB>bb))
- Perl allows this, with both names AA and BB as aliases of group 1.
+ Perl allows this, with both names AA and BB as aliases of group 1.
Thus, after a successful match, both names yield the same value (either
"aa" or "bb").
- In an attempt to reduce confusion, PCRE2 does not allow the same group
+ In an attempt to reduce confusion, PCRE2 does not allow the same group
number to be associated with more than one name. The example above pro-
- vokes a compile-time error. However, there is still scope for confu-
+ vokes a compile-time error. However, there is still scope for confu-
sion. Consider this pattern:
(?|(?<AA>aa)|(bb))
Although the second group number 1 is not explicitly named, the name AA
- is still an alias for any group 1. Whether the pattern matches "aa" or
+ is still an alias for any group 1. Whether the pattern matches "aa" or
"bb", a reference by name to group AA yields the matched string.
- By default, a name must be unique within a pattern, except that dupli-
+ By default, a name must be unique within a pattern, except that dupli-
cate names are permitted for groups with the same number, for example:
(?|(?<AA>aa)|(?<AA>bb))
The duplicate name constraint can be disabled by setting the PCRE2_DUP-
- NAMES option at compile time, or by the use of (?J) within the pattern.
- Duplicate names can be useful for patterns where only one instance of
- the named capture group can match. Suppose you want to match the name
- of a weekday, either as a 3-letter abbreviation or as the full name,
- and in both cases you want to extract the abbreviation. This pattern
+ NAMES option at compile time, or by the use of (?J) within the pattern,
+ as described in the section entitled "Internal Option Setting" above.
+
+ Duplicate names can be useful for patterns where only one instance of
+ the named capture group can match. Suppose you want to match the name
+ of a weekday, either as a 3-letter abbreviation or as the full name,
+ and in both cases you want to extract the abbreviation. This pattern
(ignoring the line breaks) does the job:
+ (?J)
(?<DN>Mon|Fri|Sun)(?:day)?|
(?<DN>Tue)(?:sday)?|
(?<DN>Wed)(?:nesday)?|
(?<DN>Thu)(?:rsday)?|
(?<DN>Sat)(?:urday)?
- There are five capture groups, but only one is ever set after a match.
- The convenience functions for extracting the data by name returns the
- substring for the first (and in this example, the only) group of that
+ There are five capture groups, but only one is ever set after a match.
+ The convenience functions for extracting the data by name returns the
+ substring for the first (and in this example, the only) group of that
name that matched. This saves searching to find which numbered group it
- was. (An alternative way of solving this problem is to use a "branch
+ was. (An alternative way of solving this problem is to use a "branch
reset" group, as described in the previous section.)
- If you make a backreference to a non-unique named group from elsewhere
- in the pattern, the groups to which the name refers are checked in the
- order in which they appear in the overall pattern. The first one that
- is set is used for the reference. For example, this pattern matches
+ If you make a backreference to a non-unique named group from elsewhere
+ in the pattern, the groups to which the name refers are checked in the
+ order in which they appear in the overall pattern. The first one that
+ is set is used for the reference. For example, this pattern matches
both "foofoo" and "barbar" but not "foobar" or "barfoo":
- (?:(?<n>foo)|(?<n>bar))\k<n>
+ (?J)(?:(?<n>foo)|(?<n>bar))\k<n>
If you make a subroutine call to a non-unique named group, the one that
@@ -7683,15 +7783,15 @@ NAMED CAPTURE GROUPS
If you use a named reference in a condition test (see the section about
conditions below), either to check whether a capture group has matched,
or to check for recursion, all groups with the same name are tested. If
- the condition is true for any one of them, the overall condition is
- true. This is the same behaviour as testing by number. For further de-
- tails of the interfaces for handling named capture groups, see the
+ the condition is true for any one of them, the overall condition is
+ true. This is the same behaviour as testing by number. For further de-
+ tails of the interfaces for handling named capture groups, see the
pcre2api documentation.
REPETITION
- Repetition is specified by quantifiers, which can follow any of the
+ Repetition is specified by quantifiers, which can follow any of the
following items:
a literal data character
@@ -7702,20 +7802,20 @@ REPETITION
an escape such as \d or \pL that matches a single character
a character class
a backreference
- a parenthesized group (including most assertions)
+ a parenthesized group (including lookaround assertions)
a subroutine call (recursive or otherwise)
- The general repetition quantifier specifies a minimum and maximum num-
- ber of permitted matches, by giving the two numbers in curly brackets
- (braces), separated by a comma. The numbers must be less than 65536,
+ The general repetition quantifier specifies a minimum and maximum num-
+ ber of permitted matches, by giving the two numbers in curly brackets
+ (braces), separated by a comma. The numbers must be less than 65536,
and the first must be less than or equal to the second. For example,
z{2,4}
- matches "zz", "zzz", or "zzzz". A closing brace on its own is not a
- special character. If the second number is omitted, but the comma is
- present, there is no upper limit; if the second number and the comma
- are both omitted, the quantifier specifies an exact number of required
+ matches "zz", "zzz", or "zzzz". A closing brace on its own is not a
+ special character. If the second number is omitted, but the comma is
+ present, there is no upper limit; if the second number and the comma
+ are both omitted, the quantifier specifies an exact number of required
matches. Thus
[aeiou]{3,}
@@ -7724,53 +7824,53 @@ REPETITION
\d{8}
- matches exactly 8 digits. An opening curly bracket that appears in a
- position where a quantifier is not allowed, or one that does not match
- the syntax of a quantifier, is taken as a literal character. For exam-
+ matches exactly 8 digits. An opening curly bracket that appears in a
+ position where a quantifier is not allowed, or one that does not match
+ the syntax of a quantifier, is taken as a literal character. For exam-
ple, {,6} is not a quantifier, but a literal string of four characters.
In UTF modes, quantifiers apply to characters rather than to individual
- code units. Thus, for example, \x{100}{2} matches two characters, each
+ code units. Thus, for example, \x{100}{2} matches two characters, each
of which is represented by a two-byte sequence in a UTF-8 string. Simi-
- larly, \X{3} matches three Unicode extended grapheme clusters, each of
- which may be several code units long (and they may be of different
+ larly, \X{3} matches three Unicode extended grapheme clusters, each of
+ which may be several code units long (and they may be of different
lengths).
The quantifier {0} is permitted, causing the expression to behave as if
the previous item and the quantifier were not present. This may be use-
- ful for capture groups that are referenced as subroutines from else-
- where in the pattern (but see also the section entitled "Defining cap-
+ ful for capture groups that are referenced as subroutines from else-
+ where in the pattern (but see also the section entitled "Defining cap-
ture groups for use by reference only" below). Except for parenthesized
- groups, items that have a {0} quantifier are omitted from the compiled
+ groups, items that have a {0} quantifier are omitted from the compiled
pattern.
- For convenience, the three most common quantifiers have single-charac-
+ For convenience, the three most common quantifiers have single-charac-
ter abbreviations:
* is equivalent to {0,}
+ is equivalent to {1,}
? is equivalent to {0,1}
- It is possible to construct infinite loops by following a group that
- can match no characters with a quantifier that has no upper limit, for
+ It is possible to construct infinite loops by following a group that
+ can match no characters with a quantifier that has no upper limit, for
example:
(a?)*
- Earlier versions of Perl and PCRE1 used to give an error at compile
+ Earlier versions of Perl and PCRE1 used to give an error at compile
time for such patterns. However, because there are cases where this can
be useful, such patterns are now accepted, but whenever an iteration of
- such a group matches no characters, matching moves on to the next item
- in the pattern instead of repeatedly matching an empty string. This
- does not prevent backtracking into any of the iterations if a subse-
+ such a group matches no characters, matching moves on to the next item
+ in the pattern instead of repeatedly matching an empty string. This
+ does not prevent backtracking into any of the iterations if a subse-
quent item fails to match.
- By default, quantifiers are "greedy", that is, they match as much as
+ By default, quantifiers are "greedy", that is, they match as much as
possible (up to the maximum number of permitted times), without causing
- the rest of the pattern to fail. The classic example of where this
- gives problems is in trying to match comments in C programs. These ap-
+ the rest of the pattern to fail. The classic example of where this
+ gives problems is in trying to match comments in C programs. These ap-
pear between /* and */ and within the comment, individual * and / char-
- acters may appear. An attempt to match C comments by applying the pat-
+ acters may appear. An attempt to match C comments by applying the pat-
tern
/\*.*\*/
@@ -7779,17 +7879,17 @@ REPETITION
/* first comment */ not comment /* second comment */
- fails, because it matches the entire string owing to the greediness of
- the .* item. However, if a quantifier is followed by a question mark,
+ fails, because it matches the entire string owing to the greediness of
+ the .* item. However, if a quantifier is followed by a question mark,
it ceases to be greedy, and instead matches the minimum number of times
possible, so the pattern
/\*.*?\*/
- does the right thing with the C comments. The meaning of the various
- quantifiers is not otherwise changed, just the preferred number of
- matches. Do not confuse this use of question mark with its use as a
- quantifier in its own right. Because it has two uses, it can sometimes
+ does the right thing with the C comments. The meaning of the various
+ quantifiers is not otherwise changed, just the preferred number of
+ matches. Do not confuse this use of question mark with its use as a
+ quantifier in its own right. Because it has two uses, it can sometimes
appear doubled, as in
\d??\d
@@ -7798,55 +7898,55 @@ REPETITION
only way the rest of the pattern matches.
If the PCRE2_UNGREEDY option is set (an option that is not available in
- Perl), the quantifiers are not greedy by default, but individual ones
- can be made greedy by following them with a question mark. In other
+ Perl), the quantifiers are not greedy by default, but individual ones
+ can be made greedy by following them with a question mark. In other
words, it inverts the default behaviour.
- When a parenthesized group is quantified with a minimum repeat count
- that is greater than 1 or with a limited maximum, more memory is re-
+ When a parenthesized group is quantified with a minimum repeat count
+ that is greater than 1 or with a limited maximum, more memory is re-
quired for the compiled pattern, in proportion to the size of the mini-
mum or maximum.
- If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option
- (equivalent to Perl's /s) is set, thus allowing the dot to match new-
- lines, the pattern is implicitly anchored, because whatever follows
- will be tried against every character position in the subject string,
- so there is no point in retrying the overall match at any position af-
- ter the first. PCRE2 normally treats such a pattern as though it were
+ If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option
+ (equivalent to Perl's /s) is set, thus allowing the dot to match new-
+ lines, the pattern is implicitly anchored, because whatever follows
+ will be tried against every character position in the subject string,
+ so there is no point in retrying the overall match at any position af-
+ ter the first. PCRE2 normally treats such a pattern as though it were
preceded by \A.
- In cases where it is known that the subject string contains no new-
- lines, it is worth setting PCRE2_DOTALL in order to obtain this opti-
+ In cases where it is known that the subject string contains no new-
+ lines, it is worth setting PCRE2_DOTALL in order to obtain this opti-
mization, or alternatively, using ^ to indicate anchoring explicitly.
- However, there are some cases where the optimization cannot be used.
- When .* is inside capturing parentheses that are the subject of a
- backreference elsewhere in the pattern, a match at the start may fail
+ However, there are some cases where the optimization cannot be used.
+ When .* is inside capturing parentheses that are the subject of a
+ backreference elsewhere in the pattern, a match at the start may fail
where a later one succeeds. Consider, for example:
(.*)abc\1
- If the subject is "xyz123abc123" the match point is the fourth charac-
+ If the subject is "xyz123abc123" the match point is the fourth charac-
ter. For this reason, such a pattern is not implicitly anchored.
- Another case where implicit anchoring is not applied is when the lead-
- ing .* is inside an atomic group. Once again, a match at the start may
+ Another case where implicit anchoring is not applied is when the lead-
+ ing .* is inside an atomic group. Once again, a match at the start may
fail where a later one succeeds. Consider this pattern:
(?>.*?a)b
- It matches "ab" in the subject "aab". The use of the backtracking con-
- trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and
+ It matches "ab" in the subject "aab". The use of the backtracking con-
+ trol verbs (*PRUNE) and (*SKIP) also disable this optimization, and
there is an option, PCRE2_NO_DOTSTAR_ANCHOR, to do so explicitly.
- When a capture group is repeated, the value captured is the substring
+ When a capture group is repeated, the value captured is the substring
that matched the final iteration. For example, after
(tweedle[dume]{3}\s*)+
has matched "tweedledum tweedledee" the value of the captured substring
- is "tweedledee". However, if there are nested capture groups, the cor-
- responding captured values may have been set in previous iterations.
+ is "tweedledee". However, if there are nested capture groups, the cor-
+ responding captured values may have been set in previous iterations.
For example, after
(a|(b))+
@@ -7856,57 +7956,57 @@ REPETITION
ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
- With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
- repetition, failure of what follows normally causes the repeated item
- to be re-evaluated to see if a different number of repeats allows the
- rest of the pattern to match. Sometimes it is useful to prevent this,
- either to change the nature of the match, or to cause it fail earlier
- than it otherwise might, when the author of the pattern knows there is
+ With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
+ repetition, failure of what follows normally causes the repeated item
+ to be re-evaluated to see if a different number of repeats allows the
+ rest of the pattern to match. Sometimes it is useful to prevent this,
+ either to change the nature of the match, or to cause it fail earlier
+ than it otherwise might, when the author of the pattern knows there is
no point in carrying on.
- Consider, for example, the pattern \d+foo when applied to the subject
+ Consider, for example, the pattern \d+foo when applied to the subject
line
123456bar
After matching all 6 digits and then failing to match "foo", the normal
- action of the matcher is to try again with only 5 digits matching the
- \d+ item, and then with 4, and so on, before ultimately failing.
- "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides
+ action of the matcher is to try again with only 5 digits matching the
+ \d+ item, and then with 4, and so on, before ultimately failing.
+ "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides
the means for specifying that once a group has matched, it is not to be
re-evaluated in this way.
- If we use atomic grouping for the previous example, the matcher gives
- up immediately on failing to match "foo" the first time. The notation
+ If we use atomic grouping for the previous example, the matcher gives
+ up immediately on failing to match "foo" the first time. The notation
is a kind of special parenthesis, starting with (?> as in this example:
(?>\d+)foo
- Perl 5.28 introduced an experimental alphabetic form starting with (*
+ Perl 5.28 introduced an experimental alphabetic form starting with (*
which may be easier to remember:
(*atomic:\d+)foo
This kind of parenthesized group "locks up" the part of the pattern it
contains once it has matched, and a failure further into the pattern is
- prevented from backtracking into it. Backtracking past it to previous
+ prevented from backtracking into it. Backtracking past it to previous
items, however, works as normal.
An alternative description is that a group of this type matches exactly
- the string of characters that an identical standalone pattern would
+ the string of characters that an identical standalone pattern would
match, if anchored at the current point in the subject string.
- Atomic groups are not capture groups. Simple cases such as the above
- example can be thought of as a maximizing repeat that must swallow ev-
- erything it can. So, while both \d+ and \d+? are prepared to adjust
- the number of digits they match in order to make the rest of the pat-
+ Atomic groups are not capture groups. Simple cases such as the above
+ example can be thought of as a maximizing repeat that must swallow ev-
+ erything it can. So, while both \d+ and \d+? are prepared to adjust
+ the number of digits they match in order to make the rest of the pat-
tern match, (?>\d+) can only match an entire sequence of digits.
- Atomic groups in general can of course contain arbitrarily complicated
+ Atomic groups in general can of course contain arbitrarily complicated
expressions, and can be nested. However, when the contents of an atomic
- group is just a single repeated item, as in the example above, a sim-
- pler notation, called a "possessive quantifier" can be used. This con-
- sists of an additional + character following a quantifier. Using this
+ group is just a single repeated item, as in the example above, a sim-
+ pler notation, called a "possessive quantifier" can be used. This con-
+ sists of an additional + character following a quantifier. Using this
notation, the previous example can be rewritten as
\d++foo
@@ -7916,46 +8016,46 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
(abc|xyz){2,3}+
- Possessive quantifiers are always greedy; the setting of the PCRE2_UN-
- GREEDY option is ignored. They are a convenient notation for the sim-
- pler forms of atomic group. However, there is no difference in the
- meaning of a possessive quantifier and the equivalent atomic group,
- though there may be a performance difference; possessive quantifiers
+ Possessive quantifiers are always greedy; the setting of the PCRE2_UN-
+ GREEDY option is ignored. They are a convenient notation for the sim-
+ pler forms of atomic group. However, there is no difference in the
+ meaning of a possessive quantifier and the equivalent atomic group,
+ though there may be a performance difference; possessive quantifiers
should be slightly faster.
- The possessive quantifier syntax is an extension to the Perl 5.8 syn-
- tax. Jeffrey Friedl originated the idea (and the name) in the first
+ The possessive quantifier syntax is an extension to the Perl 5.8 syn-
+ tax. Jeffrey Friedl originated the idea (and the name) in the first
edition of his book. Mike McCloskey liked it, so implemented it when he
- built Sun's Java package, and PCRE1 copied it from there. It found its
+ built Sun's Java package, and PCRE1 copied it from there. It found its
way into Perl at release 5.10.
- PCRE2 has an optimization that automatically "possessifies" certain
- simple pattern constructs. For example, the sequence A+B is treated as
- A++B because there is no point in backtracking into a sequence of A's
+ PCRE2 has an optimization that automatically "possessifies" certain
+ simple pattern constructs. For example, the sequence A+B is treated as
+ A++B because there is no point in backtracking into a sequence of A's
when B must follow. This feature can be disabled by the PCRE2_NO_AUTO-
POSSESS option, or starting the pattern with (*NO_AUTO_POSSESS).
When a pattern contains an unlimited repeat inside a group that can it-
- self be repeated an unlimited number of times, the use of an atomic
- group is the only way to avoid some failing matches taking a very long
+ self be repeated an unlimited number of times, the use of an atomic
+ group is the only way to avoid some failing matches taking a very long
time indeed. The pattern
(\D+|<\d+>)*[!?]
- matches an unlimited number of substrings that either consist of non-
- digits, or digits enclosed in <>, followed by either ! or ?. When it
+ matches an unlimited number of substrings that either consist of non-
+ digits, or digits enclosed in <>, followed by either ! or ?. When it
matches, it runs quickly. However, if it is applied to
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
- it takes a long time before reporting failure. This is because the
- string can be divided between the internal \D+ repeat and the external
- * repeat in a large number of ways, and all have to be tried. (The ex-
+ it takes a long time before reporting failure. This is because the
+ string can be divided between the internal \D+ repeat and the external
+ * repeat in a large number of ways, and all have to be tried. (The ex-
ample uses [!?] rather than a single character at the end, because both
PCRE2 and Perl have an optimization that allows for fast failure when a
- single character is used. They remember the last single character that
- is required for a match, and fail early if it is not present in the
- string.) If the pattern is changed so that it uses an atomic group,
+ single character is used. They remember the last single character that
+ is required for a match, and fail early if it is not present in the
+ string.) If the pattern is changed so that it uses an atomic group,
like this:
((?>\D+)|<\d+>)*[!?]
@@ -7966,28 +8066,28 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
BACKREFERENCES
Outside a character class, a backslash followed by a digit greater than
- 0 (and possibly further digits) is a backreference to a capture group
+ 0 (and possibly further digits) is a backreference to a capture group
earlier (that is, to its left) in the pattern, provided there have been
that many previous capture groups.
- However, if the decimal number following the backslash is less than 8,
- it is always taken as a backreference, and causes an error only if
- there are not that many capture groups in the entire pattern. In other
+ However, if the decimal number following the backslash is less than 8,
+ it is always taken as a backreference, and causes an error only if
+ there are not that many capture groups in the entire pattern. In other
words, the group that is referenced need not be to the left of the ref-
- erence for numbers less than 8. A "forward backreference" of this type
+ erence for numbers less than 8. A "forward backreference" of this type
can make sense when a repetition is involved and the group to the right
has participated in an earlier iteration.
- It is not possible to have a numerical "forward backreference" to a
- group whose number is 8 or more using this syntax because a sequence
- such as \50 is interpreted as a character defined in octal. See the
+ It is not possible to have a numerical "forward backreference" to a
+ group whose number is 8 or more using this syntax because a sequence
+ such as \50 is interpreted as a character defined in octal. See the
subsection entitled "Non-printing characters" above for further details
- of the handling of digits following a backslash. Other forms of back-
- referencing do not suffer from this restriction. In particular, there
+ of the handling of digits following a backslash. Other forms of back-
+ referencing do not suffer from this restriction. In particular, there
is no problem when named capture groups are used (see below).
- Another way of avoiding the ambiguity inherent in the use of digits
- following a backslash is to use the \g escape sequence. This escape
+ Another way of avoiding the ambiguity inherent in the use of digits
+ following a backslash is to use the \g escape sequence. This escape
must be followed by a signed or unsigned number, optionally enclosed in
braces. These examples are all identical:
@@ -7995,9 +8095,9 @@ BACKREFERENCES
(ring), \g1
(ring), \g{1}
- An unsigned number specifies an absolute reference without the ambigu-
+ An unsigned number specifies an absolute reference without the ambigu-
ity that is present in the older syntax. It is also useful when literal
- digits follow the reference. A signed number is a relative reference.
+ digits follow the reference. A signed number is a relative reference.
Consider this example:
(abc(def)ghi)\g{-1}
@@ -8005,36 +8105,36 @@ BACKREFERENCES
The sequence \g{-1} is a reference to the most recently started capture
group before \g, that is, is it equivalent to \2 in this example. Simi-
larly, \g{-2} would be equivalent to \1. The use of relative references
- can be helpful in long patterns, and also in patterns that are created
- by joining together fragments that contain references within them-
+ can be helpful in long patterns, and also in patterns that are created
+ by joining together fragments that contain references within them-
selves.
The sequence \g{+1} is a reference to the next capture group. This kind
- of forward reference can be useful in patterns that repeat. Perl does
+ of forward reference can be useful in patterns that repeat. Perl does
not support the use of + in this way.
- A backreference matches whatever actually most recently matched the
- capture group in the current subject string, rather than anything at
+ A backreference matches whatever actually most recently matched the
+ capture group in the current subject string, rather than anything at
all that matches the group (see "Groups as subroutines" below for a way
of doing that). So the pattern
(sens|respons)e and \1ibility
- matches "sense and sensibility" and "response and responsibility", but
- not "sense and responsibility". If caseful matching is in force at the
- time of the backreference, the case of letters is relevant. For exam-
+ matches "sense and sensibility" and "response and responsibility", but
+ not "sense and responsibility". If caseful matching is in force at the
+ time of the backreference, the case of letters is relevant. For exam-
ple,
((?i)rah)\s+\1
- matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
+ matches "rah rah" and "RAH RAH", but not "RAH rah", even though the
original capture group is matched caselessly.
- There are several different ways of writing backreferences to named
- capture groups. The .NET syntax \k{name} and the Perl syntax \k<name>
- or \k'name' are supported, as is the Python syntax (?P=name). Perl
- 5.10's unified backreference syntax, in which \g can be used for both
- numeric and named references, is also supported. We could rewrite the
+ There are several different ways of writing backreferences to named
+ capture groups. The .NET syntax \k{name} and the Perl syntax \k<name>
+ or \k'name' are supported, as is the Python syntax (?P=name). Perl
+ 5.10's unified backreference syntax, in which \g can be used for both
+ numeric and named references, is also supported. We could rewrite the
above example in any of the following ways:
(?<p1>(?i)rah)\s+\k<p1>
@@ -8042,124 +8142,114 @@ BACKREFERENCES
(?P<p1>(?i)rah)\s+(?P=p1)
(?<p1>(?i)rah)\s+\g{p1}
- A capture group that is referenced by name may appear in the pattern
+ A capture group that is referenced by name may appear in the pattern
before or after the reference.
- There may be more than one backreference to the same group. If a group
- has not actually been used in a particular match, backreferences to it
+ There may be more than one backreference to the same group. If a group
+ has not actually been used in a particular match, backreferences to it
always fail by default. For example, the pattern
(a|(bc))\2
- always fails if it starts to match "a" rather than "bc". However, if
+ always fails if it starts to match "a" rather than "bc". However, if
the PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backref-
erence to an unset value matches an empty string.
- Because there may be many capture groups in a pattern, all digits fol-
- lowing a backslash are taken as part of a potential backreference num-
- ber. If the pattern continues with a digit character, some delimiter
- must be used to terminate the backreference. If the PCRE2_EXTENDED or
- PCRE2_EXTENDED_MORE option is set, this can be white space. Otherwise,
+ Because there may be many capture groups in a pattern, all digits fol-
+ lowing a backslash are taken as part of a potential backreference num-
+ ber. If the pattern continues with a digit character, some delimiter
+ must be used to terminate the backreference. If the PCRE2_EXTENDED or
+ PCRE2_EXTENDED_MORE option is set, this can be white space. Otherwise,
the \g{} syntax or an empty comment (see "Comments" below) can be used.
Recursive backreferences
- A backreference that occurs inside the group to which it refers fails
- when the group is first used, so, for example, (a\1) never matches.
- However, such references can be useful inside repeated groups. For ex-
+ A backreference that occurs inside the group to which it refers fails
+ when the group is first used, so, for example, (a\1) never matches.
+ However, such references can be useful inside repeated groups. For ex-
ample, the pattern
(a|b\1)+
matches any number of "a"s and also "aba", "ababbaa" etc. At each iter-
ation of the group, the backreference matches the character string cor-
- responding to the previous iteration. In order for this to work, the
- pattern must be such that the first iteration does not need to match
- the backreference. This can be done using alternation, as in the exam-
+ responding to the previous iteration. In order for this to work, the
+ pattern must be such that the first iteration does not need to match
+ the backreference. This can be done using alternation, as in the exam-
ple above, or by a quantifier with a minimum of zero.
- Backreferences of this type cause the group that they reference to be
- treated as an atomic group. Once the whole group has been matched, a
- subsequent matching failure cannot cause backtracking into the middle
- of the group.
+ For versions of PCRE2 less than 10.25, backreferences of this type used
+ to cause the group that they reference to be treated as an atomic
+ group. This restriction no longer applies, and backtracking into such
+ groups can occur as normal.
ASSERTIONS
- An assertion is a test on the characters following or preceding the
+ An assertion is a test on the characters following or preceding the
current matching point that does not consume any characters. The simple
- assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described
+ assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described
above.
- More complicated assertions are coded as parenthesized groups. There
- are two kinds: those that look ahead of the current position in the
- subject string, and those that look behind it, and in each case an as-
- sertion may be positive (must match for the assertion to be true) or
- negative (must not match for the assertion to be true). An assertion
+ More complicated assertions are coded as parenthesized groups. There
+ are two kinds: those that look ahead of the current position in the
+ subject string, and those that look behind it, and in each case an as-
+ sertion may be positive (must match for the assertion to be true) or
+ negative (must not match for the assertion to be true). An assertion
group is matched in the normal way, and if it is true, matching contin-
- ues after it, but with the matching position in the subject string re-
+ ues after it, but with the matching position in the subject string re-
set to what it was before the assertion was processed.
- The Perl-compatible lookaround assertions are atomic. If an assertion
- is true, but there is a subsequent matching failure, there is no back-
- tracking into the assertion. However, there are some cases where non-
- atomic assertions can be useful. PCRE2 has some support for these, de-
+ The Perl-compatible lookaround assertions are atomic. If an assertion
+ is true, but there is a subsequent matching failure, there is no back-
+ tracking into the assertion. However, there are some cases where non-
+ atomic assertions can be useful. PCRE2 has some support for these, de-
scribed in the section entitled "Non-atomic assertions" below, but they
are not Perl-compatible.
- A lookaround assertion may appear as the condition in a conditional
- group (see below). In this case, the result of matching the assertion
+ A lookaround assertion may appear as the condition in a conditional
+ group (see below). In this case, the result of matching the assertion
determines which branch of the condition is followed.
- Assertion groups are not capture groups. If an assertion contains cap-
- ture groups within it, these are counted for the purposes of numbering
- the capture groups in the whole pattern. Within each branch of an as-
- sertion, locally captured substrings may be referenced in the usual
- way. For example, a sequence such as (.)\g{-1} can be used to check
+ Assertion groups are not capture groups. If an assertion contains cap-
+ ture groups within it, these are counted for the purposes of numbering
+ the capture groups in the whole pattern. Within each branch of an as-
+ sertion, locally captured substrings may be referenced in the usual
+ way. For example, a sequence such as (.)\g{-1} can be used to check
that two adjacent characters are the same.
- When a branch within an assertion fails to match, any substrings that
- were captured are discarded (as happens with any pattern branch that
- fails to match). A negative assertion is true only when all its
+ When a branch within an assertion fails to match, any substrings that
+ were captured are discarded (as happens with any pattern branch that
+ fails to match). A negative assertion is true only when all its
branches fail to match; this means that no captured substrings are ever
- retained after a successful negative assertion. When an assertion con-
+ retained after a successful negative assertion. When an assertion con-
tains a matching branch, what happens depends on the type of assertion.
- For a positive assertion, internally captured substrings in the suc-
- cessful branch are retained, and matching continues with the next pat-
- tern item after the assertion. For a negative assertion, a matching
- branch means that the assertion is not true. If such an assertion is
- being used as a condition in a conditional group (see below), captured
- substrings are retained, because matching continues with the "no"
+ For a positive assertion, internally captured substrings in the suc-
+ cessful branch are retained, and matching continues with the next pat-
+ tern item after the assertion. For a negative assertion, a matching
+ branch means that the assertion is not true. If such an assertion is
+ being used as a condition in a conditional group (see below), captured
+ substrings are retained, because matching continues with the "no"
branch of the condition. For other failing negative assertions, control
passes to the previous backtracking point, thus discarding any captured
strings within the assertion.
- For compatibility with Perl, most assertion groups may be repeated;
- though it makes no sense to assert the same thing several times, the
- side effect of capturing may occasionally be useful. However, an asser-
- tion that forms the condition for a conditional group may not be quan-
- tified. In practice, for other assertions, there only three cases:
-
- (1) If the quantifier is {0}, the assertion is never obeyed during
- matching. However, it may contain internal capture groups that are
- called from elsewhere via the subroutine mechanism.
-
- (2) If quantifier is {0,n} where n is greater than zero, it is treated
- as if it were {0,1}. At run time, the rest of the pattern match is
- tried with and without the assertion, the order depending on the greed-
- iness of the quantifier.
-
- (3) If the minimum repetition is greater than zero, the quantifier is
- ignored. The assertion is obeyed just once when encountered during
- matching.
+ Most assertion groups may be repeated; though it makes no sense to as-
+ sert the same thing several times, the side effect of capturing in pos-
+ itive assertions may occasionally be useful. However, an assertion that
+ forms the condition for a conditional group may not be quantified.
+ PCRE2 used to restrict the repetition of assertions, but from release
+ 10.35 the only restriction is that an unlimited maximum repetition is
+ changed to be one more than the minimum. For example, {3,} is treated
+ as {3,4}.
Alphabetic assertion names
- Traditionally, symbolic sequences such as (?= and (?<= have been used
- to specify lookaround assertions. Perl 5.28 introduced some experimen-
+ Traditionally, symbolic sequences such as (?= and (?<= have been used
+ to specify lookaround assertions. Perl 5.28 introduced some experimen-
tal alphabetic alternatives which might be easier to remember. They all
- start with (* instead of (? and must be written using lower case let-
+ start with (* instead of (? and must be written using lower case let-
ters. PCRE2 supports the following synonyms:
(*positive_lookahead: or (*pla: is the same as (?=
@@ -8167,8 +8257,8 @@ ASSERTIONS
(*positive_lookbehind: or (*plb: is the same as (?<=
(*negative_lookbehind: or (*nlb: is the same as (?<!
- For example, (*pla:foo) is the same assertion as (?=foo). In the fol-
- lowing sections, the various assertions are described using the origi-
+ For example, (*pla:foo) is the same assertion as (?=foo). In the fol-
+ lowing sections, the various assertions are described using the origi-
nal symbolic forms.
Lookahead assertions
@@ -8178,38 +8268,38 @@ ASSERTIONS
\w+(?=;)
- matches a word followed by a semicolon, but does not include the semi-
+ matches a word followed by a semicolon, but does not include the semi-
colon in the match, and
foo(?!bar)
- matches any occurrence of "foo" that is not followed by "bar". Note
+ matches any occurrence of "foo" that is not followed by "bar". Note
that the apparently similar pattern
(?!foo)bar
- does not find an occurrence of "bar" that is preceded by something
- other than "foo"; it finds any occurrence of "bar" whatsoever, because
+ does not find an occurrence of "bar" that is preceded by something
+ other than "foo"; it finds any occurrence of "bar" whatsoever, because
the assertion (?!foo) is always true when the next three characters are
"bar". A lookbehind assertion is needed to achieve the other effect.
If you want to force a matching failure at some point in a pattern, the
- most convenient way to do it is with (?!) because an empty string al-
- ways matches, so an assertion that requires there not to be an empty
+ most convenient way to do it is with (?!) because an empty string al-
+ ways matches, so an assertion that requires there not to be an empty
string must always fail. The backtracking control verb (*FAIL) or (*F)
is a synonym for (?!).
Lookbehind assertions
- Lookbehind assertions start with (?<= for positive assertions and (?<!
+ Lookbehind assertions start with (?<= for positive assertions and (?<!
for negative assertions. For example,
(?<!foo)bar
- does find an occurrence of "bar" that is not preceded by "foo". The
- contents of a lookbehind assertion are restricted such that all the
+ does find an occurrence of "bar" that is not preceded by "foo". The
+ contents of a lookbehind assertion are restricted such that all the
strings it matches must have a fixed length. However, if there are sev-
- eral top-level alternatives, they do not all have to have the same
+ eral top-level alternatives, they do not all have to have the same
fixed length. Thus
(?<=bullock|donkey)
@@ -8218,74 +8308,74 @@ ASSERTIONS
(?<!dogs?|cats?)
- causes an error at compile time. Branches that match different length
- strings are permitted only at the top level of a lookbehind assertion.
+ causes an error at compile time. Branches that match different length
+ strings are permitted only at the top level of a lookbehind assertion.
This is an extension compared with Perl, which requires all branches to
match the same length of string. An assertion such as
(?<=ab(c|de))
- is not permitted, because its single top-level branch can match two
- different lengths, but it is acceptable to PCRE2 if rewritten to use
+ is not permitted, because its single top-level branch can match two
+ different lengths, but it is acceptable to PCRE2 if rewritten to use
two top-level branches:
(?<=abc|abde)
- In some cases, the escape sequence \K (see above) can be used instead
+ In some cases, the escape sequence \K (see above) can be used instead
of a lookbehind assertion to get round the fixed-length restriction.
- The implementation of lookbehind assertions is, for each alternative,
- to temporarily move the current position back by the fixed length and
+ The implementation of lookbehind assertions is, for each alternative,
+ to temporarily move the current position back by the fixed length and
then try to match. If there are insufficient characters before the cur-
rent position, the assertion fails.
- In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which
- matches a single code unit even in a UTF mode) to appear in lookbehind
- assertions, because it makes it impossible to calculate the length of
- the lookbehind. The \X and \R escapes, which can match different num-
+ In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which
+ matches a single code unit even in a UTF mode) to appear in lookbehind
+ assertions, because it makes it impossible to calculate the length of
+ the lookbehind. The \X and \R escapes, which can match different num-
bers of code units, are never permitted in lookbehinds.
- "Subroutine" calls (see below) such as (?2) or (?&X) are permitted in
+ "Subroutine" calls (see below) such as (?2) or (?&X) are permitted in
lookbehinds, as long as the called capture group matches a fixed-length
- string. However, recursion, that is, a "subroutine" call into a group
+ string. However, recursion, that is, a "subroutine" call into a group
that is already active, is not supported.
Perl does not support backreferences in lookbehinds. PCRE2 does support
- them, but only if certain conditions are met. The PCRE2_MATCH_UN-
- SET_BACKREF option must not be set, there must be no use of (?| in the
- pattern (it creates duplicate group numbers), and if the backreference
- is by name, the name must be unique. Of course, the referenced group
- must itself match a fixed length substring. The following pattern
- matches words containing at least two characters that begin and end
+ them, but only if certain conditions are met. The PCRE2_MATCH_UN-
+ SET_BACKREF option must not be set, there must be no use of (?| in the
+ pattern (it creates duplicate group numbers), and if the backreference
+ is by name, the name must be unique. Of course, the referenced group
+ must itself match a fixed length substring. The following pattern
+ matches words containing at least two characters that begin and end
with the same character:
\b(\w)\w++(?<=\1)
- Possessive quantifiers can be used in conjunction with lookbehind as-
- sertions to specify efficient matching of fixed-length strings at the
+ Possessive quantifiers can be used in conjunction with lookbehind as-
+ sertions to specify efficient matching of fixed-length strings at the
end of subject strings. Consider a simple pattern such as
abcd$
- when applied to a long string that does not match. Because matching
- proceeds from left to right, PCRE2 will look for each "a" in the sub-
- ject and then see if what follows matches the rest of the pattern. If
+ when applied to a long string that does not match. Because matching
+ proceeds from left to right, PCRE2 will look for each "a" in the sub-
+ ject and then see if what follows matches the rest of the pattern. If
the pattern is specified as
^.*abcd$
- the initial .* matches the entire string at first, but when this fails
+ the initial .* matches the entire string at first, but when this fails
(because there is no following "a"), it backtracks to match all but the
- last character, then all but the last two characters, and so on. Once
- again the search for "a" covers the entire string, from right to left,
+ last character, then all but the last two characters, and so on. Once
+ again the search for "a" covers the entire string, from right to left,
so we are no better off. However, if the pattern is written as
^.*+(?<=abcd)
there can be no backtracking for the .*+ item because of the possessive
quantifier; it can match only the entire string. The subsequent lookbe-
- hind assertion does a single test on the last four characters. If it
- fails, the match fails immediately. For long strings, this approach
+ hind assertion does a single test on the last four characters. If it
+ fails, the match fails immediately. For long strings, this approach
makes a significant difference to the processing time.
Using multiple assertions
@@ -8294,18 +8384,18 @@ ASSERTIONS
(?<=\d{3})(?<!999)foo
- matches "foo" preceded by three digits that are not "999". Notice that
- each of the assertions is applied independently at the same point in
- the subject string. First there is a check that the previous three
- characters are all digits, and then there is a check that the same
+ matches "foo" preceded by three digits that are not "999". Notice that
+ each of the assertions is applied independently at the same point in
+ the subject string. First there is a check that the previous three
+ characters are all digits, and then there is a check that the same
three characters are not "999". This pattern does not match "foo" pre-
- ceded by six characters, the first of which are digits and the last
- three of which are not "999". For example, it doesn't match "123abc-
+ ceded by six characters, the first of which are digits and the last
+ three of which are not "999". For example, it doesn't match "123abc-
foo". A pattern to do that is
(?<=\d{3}...)(?<!999)foo
- This time the first assertion looks at the preceding six characters,
+ This time the first assertion looks at the preceding six characters,
checking that the first three are digits, and then the second assertion
checks that the preceding three characters are not "999".
@@ -8313,107 +8403,113 @@ ASSERTIONS
(?<=(?<!foo)bar)baz
- matches an occurrence of "baz" that is preceded by "bar" which in turn
+ matches an occurrence of "baz" that is preceded by "bar" which in turn
is not preceded by "foo", while
(?<=\d{3}(?!999)...)foo
- is another pattern that matches "foo" preceded by three digits and any
+ is another pattern that matches "foo" preceded by three digits and any
three characters that are not "999".
NON-ATOMIC ASSERTIONS
- The traditional Perl-compatible lookaround assertions are atomic. That
- is, if an assertion is true, but there is a subsequent matching fail-
- ure, there is no backtracking into the assertion. However, there are
- some cases where non-atomic positive assertions can be useful. PCRE2
+ The traditional Perl-compatible lookaround assertions are atomic. That
+ is, if an assertion is true, but there is a subsequent matching fail-
+ ure, there is no backtracking into the assertion. However, there are
+ some cases where non-atomic positive assertions can be useful. PCRE2
provides these using the following syntax:
- (*non_atomic_positive_lookahead: or (*napla:
- (*non_atomic_positive_lookbehind: or (*naplb:
+ (*non_atomic_positive_lookahead: or (*napla: or (?*
+ (*non_atomic_positive_lookbehind: or (*naplb: or (?<*
- Consider the problem of finding the right-most word in a string that
- also appears earlier in the string, that is, it must appear at least
- twice in total. This pattern returns the required result as captured
+ Consider the problem of finding the right-most word in a string that
+ also appears earlier in the string, that is, it must appear at least
+ twice in total. This pattern returns the required result as captured
substring 1:
^(?x)(*napla: .* \b(\w++)) (?> .*? \b\1\b ){2}
- For a subject such as "word1 word2 word3 word2 word3 word4" the result
- is "word3". How does it work? At the start, ^(?x) anchors the pattern
+ For a subject such as "word1 word2 word3 word2 word3 word4" the result
+ is "word3". How does it work? At the start, ^(?x) anchors the pattern
and sets the "x" option, which causes white space (introduced for read-
- ability) to be ignored. Inside the assertion, the greedy .* at first
+ ability) to be ignored. Inside the assertion, the greedy .* at first
consumes the entire string, but then has to backtrack until the rest of
- the assertion can match a word, which is captured by group 1. In other
- words, when the assertion first succeeds, it captures the right-most
+ the assertion can match a word, which is captured by group 1. In other
+ words, when the assertion first succeeds, it captures the right-most
word in the string.
- The current matching point is then reset to the start of the subject,
- and the rest of the pattern match checks for two occurrences of the
- captured word, using an ungreedy .*? to scan from the left. If this
- succeeds, we are done, but if the last word in the string does not oc-
- cur twice, this part of the pattern fails. If a traditional atomic
+ The current matching point is then reset to the start of the subject,
+ and the rest of the pattern match checks for two occurrences of the
+ captured word, using an ungreedy .*? to scan from the left. If this
+ succeeds, we are done, but if the last word in the string does not oc-
+ cur twice, this part of the pattern fails. If a traditional atomic
lookhead (?= or (*pla: had been used, the assertion could not be re-en-
- tered, and the whole match would fail. The pattern would succeed only
+ tered, and the whole match would fail. The pattern would succeed only
if the very last word in the subject was found twice.
- Using a non-atomic lookahead, however, means that when the last word
- does not occur twice in the string, the lookahead can backtrack and
- find the second-last word, and so on, until either the match succeeds,
+ Using a non-atomic lookahead, however, means that when the last word
+ does not occur twice in the string, the lookahead can backtrack and
+ find the second-last word, and so on, until either the match succeeds,
or all words have been tested.
Two conditions must be met for a non-atomic assertion to be useful: the
- contents of one or more capturing groups must change after a backtrack
- into the assertion, and there must be a backreference to a changed
- group later in the pattern. If this is not the case, the rest of the
- pattern match fails exactly as before because nothing has changed, so
+ contents of one or more capturing groups must change after a backtrack
+ into the assertion, and there must be a backreference to a changed
+ group later in the pattern. If this is not the case, the rest of the
+ pattern match fails exactly as before because nothing has changed, so
using a non-atomic assertion just wastes resources.
+ There is one exception to backtracking into a non-atomic assertion. If
+ an (*ACCEPT) control verb is triggered, the assertion succeeds atomi-
+ cally. That is, a subsequent match failure cannot backtrack into the
+ assertion.
+
Non-atomic assertions are not supported by the alternative matching
- function pcre2_dfa_match(). They are also not supported by JIT (but may
- be in future). Note that assertions that appear as conditions for con-
- ditional groups (see below) must be atomic.
+ function pcre2_dfa_match(). They are supported by JIT, but only if they
+ do not contain any control verbs such as (*ACCEPT). (This may change in
+ future). Note that assertions that appear as conditions for conditional
+ groups (see below) must be atomic.
SCRIPT RUNS
- In concept, a script run is a sequence of characters that are all from
- the same Unicode script such as Latin or Greek. However, because some
- scripts are commonly used together, and because some diacritical and
- other marks are used with multiple scripts, it is not that simple.
+ In concept, a script run is a sequence of characters that are all from
+ the same Unicode script such as Latin or Greek. However, because some
+ scripts are commonly used together, and because some diacritical and
+ other marks are used with multiple scripts, it is not that simple.
There is a full description of the rules that PCRE2 uses in the section
entitled "Script Runs" in the pcre2unicode documentation.
- If part of a pattern is enclosed between (*script_run: or (*sr: and a
- closing parenthesis, it fails if the sequence of characters that it
- matches are not a script run. After a failure, normal backtracking oc-
- curs. Script runs can be used to detect spoofing attacks using charac-
- ters that look the same, but are from different scripts. The string
- "paypal.com" is an infamous example, where the letters could be a mix-
+ If part of a pattern is enclosed between (*script_run: or (*sr: and a
+ closing parenthesis, it fails if the sequence of characters that it
+ matches are not a script run. After a failure, normal backtracking oc-
+ curs. Script runs can be used to detect spoofing attacks using charac-
+ ters that look the same, but are from different scripts. The string
+ "paypal.com" is an infamous example, where the letters could be a mix-
ture of Latin and Cyrillic. This pattern ensures that the matched char-
acters in a sequence of non-spaces that follow white space are a script
run:
\s+(*sr:\S+)
- To be sure that they are all from the Latin script (for example), a
+ To be sure that they are all from the Latin script (for example), a
lookahead can be used:
\s+(?=\p{Latin})(*sr:\S+)
This works as long as the first character is expected to be a character
- in that script, and not (for example) punctuation, which is allowed
- with any script. If this is not the case, a more creative lookahead is
- needed. For example, if digits, underscore, and dots are permitted at
+ in that script, and not (for example) punctuation, which is allowed
+ with any script. If this is not the case, a more creative lookahead is
+ needed. For example, if digits, underscore, and dots are permitted at
the start:
\s+(?=[0-9_.]*\p{Latin})(*sr:\S+)
- In many cases, backtracking into a script run pattern fragment is not
- desirable. The script run can employ an atomic group to prevent this.
- Because this is a common requirement, a shorthand notation is provided
+ In many cases, backtracking into a script run pattern fragment is not
+ desirable. The script run can employ an atomic group to prevent this.
+ Because this is a common requirement, a shorthand notation is provided
by (*atomic_script_run: or (*asr:
(*asr:...) is the same as (*sr:(?>...))
@@ -8421,13 +8517,13 @@ SCRIPT RUNS
Note that the atomic group is inside the script run. Putting it outside
would not prevent backtracking into the script run pattern.
- Support for script runs is not available if PCRE2 is compiled without
+ Support for script runs is not available if PCRE2 is compiled without
Unicode support. A compile-time error is given if any of the above con-
- structs is encountered. Script runs are not supported by the alternate
- matching function, pcre2_dfa_match() because they use the same mecha-
+ structs is encountered. Script runs are not supported by the alternate
+ matching function, pcre2_dfa_match() because they use the same mecha-
nism as capturing parentheses.
- Warning: The (*ACCEPT) control verb (see below) should not be used
+ Warning: The (*ACCEPT) control verb (see below) should not be used
within a script run group, because it causes an immediate exit from the
group, bypassing the script run checking.
@@ -8436,116 +8532,116 @@ CONDITIONAL GROUPS
It is possible to cause the matching process to obey a pattern fragment
conditionally or to choose between two alternative fragments, depending
- on the result of an assertion, or whether a specific capture group has
+ on the result of an assertion, or whether a specific capture group has
already been matched. The two possible forms of conditional group are:
(?(condition)yes-pattern)
(?(condition)yes-pattern|no-pattern)
- If the condition is satisfied, the yes-pattern is used; otherwise the
- no-pattern (if present) is used. An absent no-pattern is equivalent to
- an empty string (it always matches). If there are more than two alter-
- natives in the group, a compile-time error occurs. Each of the two al-
+ If the condition is satisfied, the yes-pattern is used; otherwise the
+ no-pattern (if present) is used. An absent no-pattern is equivalent to
+ an empty string (it always matches). If there are more than two alter-
+ natives in the group, a compile-time error occurs. Each of the two al-
ternatives may itself contain nested groups of any form, including con-
- ditional groups; the restriction to two alternatives applies only at
- the level of the condition itself. This pattern fragment is an example
+ ditional groups; the restriction to two alternatives applies only at
+ the level of the condition itself. This pattern fragment is an example
where the alternatives are complex:
(?(1) (A|B|C) | (D | (?(2)E|F) | E) )
There are five kinds of condition: references to capture groups, refer-
- ences to recursion, two pseudo-conditions called DEFINE and VERSION,
+ ences to recursion, two pseudo-conditions called DEFINE and VERSION,
and assertions.
Checking for a used capture group by number
- If the text between the parentheses consists of a sequence of digits,
- the condition is true if a capture group of that number has previously
- matched. If there is more than one capture group with the same number
- (see the earlier section about duplicate group numbers), the condition
+ If the text between the parentheses consists of a sequence of digits,
+ the condition is true if a capture group of that number has previously
+ matched. If there is more than one capture group with the same number
+ (see the earlier section about duplicate group numbers), the condition
is true if any of them have matched. An alternative notation is to pre-
cede the digits with a plus or minus sign. In this case, the group num-
- ber is relative rather than absolute. The most recently opened capture
- group can be referenced by (?(-1), the next most recent by (?(-2), and
- so on. Inside loops it can also make sense to refer to subsequent
- groups. The next capture group can be referenced as (?(+1), and so on.
- (The value zero in any of these forms is not used; it provokes a com-
+ ber is relative rather than absolute. The most recently opened capture
+ group can be referenced by (?(-1), the next most recent by (?(-2), and
+ so on. Inside loops it can also make sense to refer to subsequent
+ groups. The next capture group can be referenced as (?(+1), and so on.
+ (The value zero in any of these forms is not used; it provokes a com-
pile-time error.)
- Consider the following pattern, which contains non-significant white
- space to make it more readable (assume the PCRE2_EXTENDED option) and
+ Consider the following pattern, which contains non-significant white
+ space to make it more readable (assume the PCRE2_EXTENDED option) and
to divide it into three parts for ease of discussion:
( \( )? [^()]+ (?(1) \) )
- The first part matches an optional opening parenthesis, and if that
+ The first part matches an optional opening parenthesis, and if that
character is present, sets it as the first captured substring. The sec-
- ond part matches one or more characters that are not parentheses. The
- third part is a conditional group that tests whether or not the first
- capture group matched. If it did, that is, if subject started with an
- opening parenthesis, the condition is true, and so the yes-pattern is
- executed and a closing parenthesis is required. Otherwise, since no-
+ ond part matches one or more characters that are not parentheses. The
+ third part is a conditional group that tests whether or not the first
+ capture group matched. If it did, that is, if subject started with an
+ opening parenthesis, the condition is true, and so the yes-pattern is
+ executed and a closing parenthesis is required. Otherwise, since no-
pattern is not present, the conditional group matches nothing. In other
- words, this pattern matches a sequence of non-parentheses, optionally
+ words, this pattern matches a sequence of non-parentheses, optionally
enclosed in parentheses.
- If you were embedding this pattern in a larger one, you could use a
+ If you were embedding this pattern in a larger one, you could use a
relative reference:
...other stuff... ( \( )? [^()]+ (?(-1) \) ) ...
- This makes the fragment independent of the parentheses in the larger
+ This makes the fragment independent of the parentheses in the larger
pattern.
Checking for a used capture group by name
- Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a
- used capture group by name. For compatibility with earlier versions of
- PCRE1, which had this facility before Perl, the syntax (?(name)...) is
- also recognized. Note, however, that undelimited names consisting of
- the letter R followed by digits are ambiguous (see the following sec-
+ Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a
+ used capture group by name. For compatibility with earlier versions of
+ PCRE1, which had this facility before Perl, the syntax (?(name)...) is
+ also recognized. Note, however, that undelimited names consisting of
+ the letter R followed by digits are ambiguous (see the following sec-
tion). Rewriting the above example to use a named group gives this:
(?<OPEN> \( )? [^()]+ (?(<OPEN>) \) )
- If the name used in a condition of this kind is a duplicate, the test
- is applied to all groups of the same name, and is true if any one of
+ If the name used in a condition of this kind is a duplicate, the test
+ is applied to all groups of the same name, and is true if any one of
them has matched.
Checking for pattern recursion
- "Recursion" in this sense refers to any subroutine-like call from one
- part of the pattern to another, whether or not it is actually recur-
- sive. See the sections entitled "Recursive patterns" and "Groups as
+ "Recursion" in this sense refers to any subroutine-like call from one
+ part of the pattern to another, whether or not it is actually recur-
+ sive. See the sections entitled "Recursive patterns" and "Groups as
subroutines" below for details of recursion and subroutine calls.
- If a condition is the string (R), and there is no capture group with
- the name R, the condition is true if matching is currently in a recur-
- sion or subroutine call to the whole pattern or any capture group. If
- digits follow the letter R, and there is no group with that name, the
- condition is true if the most recent call is into a group with the
- given number, which must exist somewhere in the overall pattern. This
+ If a condition is the string (R), and there is no capture group with
+ the name R, the condition is true if matching is currently in a recur-
+ sion or subroutine call to the whole pattern or any capture group. If
+ digits follow the letter R, and there is no group with that name, the
+ condition is true if the most recent call is into a group with the
+ given number, which must exist somewhere in the overall pattern. This
is a contrived example that is equivalent to a+b:
((?(R1)a+|(?1)b))
- However, in both cases, if there is a capture group with a matching
- name, the condition tests for its being set, as described in the sec-
- tion above, instead of testing for recursion. For example, creating a
- group with the name R1 by adding (?<R1>) to the above pattern com-
+ However, in both cases, if there is a capture group with a matching
+ name, the condition tests for its being set, as described in the sec-
+ tion above, instead of testing for recursion. For example, creating a
+ group with the name R1 by adding (?<R1>) to the above pattern com-
pletely changes its meaning.
If a name preceded by ampersand follows the letter R, for example:
(?(R&name)...)
- the condition is true if the most recent recursion is into a group of
+ the condition is true if the most recent recursion is into a group of
that name (which must exist within the pattern).
This condition does not check the entire recursion stack. It tests only
- the current level. If the name used in a condition of this kind is a
- duplicate, the test is applied to all groups of the same name, and is
+ the current level. If the name used in a condition of this kind is a
+ duplicate, the test is applied to all groups of the same name, and is
true if any one of them is the most recent recursion.
At "top level", all these recursion test conditions are false.
@@ -8553,111 +8649,111 @@ CONDITIONAL GROUPS
Defining capture groups for use by reference only
If the condition is the string (DEFINE), the condition is always false,
- even if there is a group with the name DEFINE. In this case, there may
+ even if there is a group with the name DEFINE. In this case, there may
be only one alternative in the rest of the conditional group. It is al-
- ways skipped if control reaches this point in the pattern; the idea of
- DEFINE is that it can be used to define subroutines that can be refer-
- enced from elsewhere. (The use of subroutines is described below.) For
- example, a pattern to match an IPv4 address such as "192.168.23.245"
+ ways skipped if control reaches this point in the pattern; the idea of
+ DEFINE is that it can be used to define subroutines that can be refer-
+ enced from elsewhere. (The use of subroutines is described below.) For
+ example, a pattern to match an IPv4 address such as "192.168.23.245"
could be written like this (ignore white space and line breaks):
(?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
\b (?&byte) (\.(?&byte)){3} \b
- The first part of the pattern is a DEFINE group inside which a another
- group named "byte" is defined. This matches an individual component of
- an IPv4 address (a number less than 256). When matching takes place,
- this part of the pattern is skipped because DEFINE acts like a false
- condition. The rest of the pattern uses references to the named group
- to match the four dot-separated components of an IPv4 address, insist-
+ The first part of the pattern is a DEFINE group inside which a another
+ group named "byte" is defined. This matches an individual component of
+ an IPv4 address (a number less than 256). When matching takes place,
+ this part of the pattern is skipped because DEFINE acts like a false
+ condition. The rest of the pattern uses references to the named group
+ to match the four dot-separated components of an IPv4 address, insist-
ing on a word boundary at each end.
Checking the PCRE2 version
- Programs that link with a PCRE2 library can check the version by call-
- ing pcre2_config() with appropriate arguments. Users of applications
- that do not have access to the underlying code cannot do this. A spe-
- cial "condition" called VERSION exists to allow such users to discover
+ Programs that link with a PCRE2 library can check the version by call-
+ ing pcre2_config() with appropriate arguments. Users of applications
+ that do not have access to the underlying code cannot do this. A spe-
+ cial "condition" called VERSION exists to allow such users to discover
which version of PCRE2 they are dealing with by using this condition to
- match a string such as "yesno". VERSION must be followed either by "="
+ match a string such as "yesno". VERSION must be followed either by "="
or ">=" and a version number. For example:
(?(VERSION>=10.4)yes|no)
- This pattern matches "yes" if the PCRE2 version is greater or equal to
- 10.4, or "no" otherwise. The fractional part of the version number may
+ This pattern matches "yes" if the PCRE2 version is greater or equal to
+ 10.4, or "no" otherwise. The fractional part of the version number may
not contain more than two digits.
Assertion conditions
- If the condition is not in any of the above formats, it must be a
- parenthesized assertion. This may be a positive or negative lookahead
- or lookbehind assertion. However, it must be a traditional atomic as-
+ If the condition is not in any of the above formats, it must be a
+ parenthesized assertion. This may be a positive or negative lookahead
+ or lookbehind assertion. However, it must be a traditional atomic as-
sertion, not one of the PCRE2-specific non-atomic assertions.
- Consider this pattern, again containing non-significant white space,
+ Consider this pattern, again containing non-significant white space,
and with the two alternatives on the second line:
(?(?=[^a-z]*[a-z])
\d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} )
- The condition is a positive lookahead assertion that matches an op-
+ The condition is a positive lookahead assertion that matches an op-
tional sequence of non-letters followed by a letter. In other words, it
tests for the presence of at least one letter in the subject. If a let-
- ter is found, the subject is matched against the first alternative;
- otherwise it is matched against the second. This pattern matches
- strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
+ ter is found, the subject is matched against the first alternative;
+ otherwise it is matched against the second. This pattern matches
+ strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
letters and dd are digits.
When an assertion that is a condition contains capture groups, any cap-
- turing that occurs in a matching branch is retained afterwards, for
- both positive and negative assertions, because matching always contin-
- ues after the assertion, whether it succeeds or fails. (Compare non-
- conditional assertions, for which captures are retained only for posi-
+ turing that occurs in a matching branch is retained afterwards, for
+ both positive and negative assertions, because matching always contin-
+ ues after the assertion, whether it succeeds or fails. (Compare non-
+ conditional assertions, for which captures are retained only for posi-
tive assertions that succeed.)
COMMENTS
There are two ways of including comments in patterns that are processed
- by PCRE2. In both cases, the start of the comment must not be in a
- character class, nor in the middle of any other sequence of related
- characters such as (?: or a group name or number. The characters that
+ by PCRE2. In both cases, the start of the comment must not be in a
+ character class, nor in the middle of any other sequence of related
+ characters such as (?: or a group name or number. The characters that
make up a comment play no part in the pattern matching.
- The sequence (?# marks the start of a comment that continues up to the
- next closing parenthesis. Nested parentheses are not permitted. If the
- PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped #
- character also introduces a comment, which in this case continues to
- immediately after the next newline character or character sequence in
+ The sequence (?# marks the start of a comment that continues up to the
+ next closing parenthesis. Nested parentheses are not permitted. If the
+ PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped #
+ character also introduces a comment, which in this case continues to
+ immediately after the next newline character or character sequence in
the pattern. Which characters are interpreted as newlines is controlled
- by an option passed to the compiling function or by a special sequence
+ by an option passed to the compiling function or by a special sequence
at the start of the pattern, as described in the section entitled "New-
line conventions" above. Note that the end of this type of comment is a
- literal newline sequence in the pattern; escape sequences that happen
+ literal newline sequence in the pattern; escape sequences that happen
to represent a newline do not count. For example, consider this pattern
- when PCRE2_EXTENDED is set, and the default newline convention (a sin-
+ when PCRE2_EXTENDED is set, and the default newline convention (a sin-
gle linefeed character) is in force:
abc #comment \n still comment
- On encountering the # character, pcre2_compile() skips along, looking
- for a newline in the pattern. The sequence \n is still literal at this
- stage, so it does not terminate the comment. Only an actual character
+ On encountering the # character, pcre2_compile() skips along, looking
+ for a newline in the pattern. The sequence \n is still literal at this
+ stage, so it does not terminate the comment. Only an actual character
with the code value 0x0a (the default newline) does so.
RECURSIVE PATTERNS
- Consider the problem of matching a string in parentheses, allowing for
- unlimited nested parentheses. Without the use of recursion, the best
- that can be done is to use a pattern that matches up to some fixed
- depth of nesting. It is not possible to handle an arbitrary nesting
+ Consider the problem of matching a string in parentheses, allowing for
+ unlimited nested parentheses. Without the use of recursion, the best
+ that can be done is to use a pattern that matches up to some fixed
+ depth of nesting. It is not possible to handle an arbitrary nesting
depth.
For some time, Perl has provided a facility that allows regular expres-
- sions to recurse (amongst other things). It does this by interpolating
- Perl code in the expression at run time, and the code can refer to the
+ sions to recurse (amongst other things). It does this by interpolating
+ Perl code in the expression at run time, and the code can refer to the
expression itself. A Perl pattern using code interpolation to solve the
parentheses problem can be created like this:
@@ -8666,67 +8762,67 @@ RECURSIVE PATTERNS
The (?p{...}) item interpolates Perl code at run time, and in this case
refers recursively to the pattern in which it appears.
- Obviously, PCRE2 cannot support the interpolation of Perl code. In-
- stead, it supports special syntax for recursion of the entire pattern,
+ Obviously, PCRE2 cannot support the interpolation of Perl code. In-
+ stead, it supports special syntax for recursion of the entire pattern,
and also for individual capture group recursion. After its introduction
in PCRE1 and Python, this kind of recursion was subsequently introduced
into Perl at release 5.10.
- A special item that consists of (? followed by a number greater than
- zero and a closing parenthesis is a recursive subroutine call of the
- capture group of the given number, provided that it occurs inside that
- group. (If not, it is a non-recursive subroutine call, which is de-
+ A special item that consists of (? followed by a number greater than
+ zero and a closing parenthesis is a recursive subroutine call of the
+ capture group of the given number, provided that it occurs inside that
+ group. (If not, it is a non-recursive subroutine call, which is de-
scribed in the next section.) The special item (?R) or (?0) is a recur-
sive call of the entire regular expression.
- This PCRE2 pattern solves the nested parentheses problem (assume the
+ This PCRE2 pattern solves the nested parentheses problem (assume the
PCRE2_EXTENDED option is set so that white space is ignored):
\( ( [^()]++ | (?R) )* \)
- First it matches an opening parenthesis. Then it matches any number of
- substrings which can either be a sequence of non-parentheses, or a re-
+ First it matches an opening parenthesis. Then it matches any number of
+ substrings which can either be a sequence of non-parentheses, or a re-
cursive match of the pattern itself (that is, a correctly parenthesized
- substring). Finally there is a closing parenthesis. Note the use of a
- possessive quantifier to avoid backtracking into sequences of non-
+ substring). Finally there is a closing parenthesis. Note the use of a
+ possessive quantifier to avoid backtracking into sequences of non-
parentheses.
- If this were part of a larger pattern, you would not want to recurse
+ If this were part of a larger pattern, you would not want to recurse
the entire pattern, so instead you could use this:
( \( ( [^()]++ | (?1) )* \) )
- We have put the pattern into parentheses, and caused the recursion to
+ We have put the pattern into parentheses, and caused the recursion to
refer to them instead of the whole pattern.
- In a larger pattern, keeping track of parenthesis numbers can be
- tricky. This is made easier by the use of relative references. Instead
+ In a larger pattern, keeping track of parenthesis numbers can be
+ tricky. This is made easier by the use of relative references. Instead
of (?1) in the pattern above you can write (?-2) to refer to the second
- most recently opened parentheses preceding the recursion. In other
- words, a negative number counts capturing parentheses leftwards from
+ most recently opened parentheses preceding the recursion. In other
+ words, a negative number counts capturing parentheses leftwards from
the point at which it is encountered.
- Be aware however, that if duplicate capture group numbers are in use,
- relative references refer to the earliest group with the appropriate
+ Be aware however, that if duplicate capture group numbers are in use,
+ relative references refer to the earliest group with the appropriate
number. Consider, for example:
(?|(a)|(b)) (c) (?-2)
The first two capture groups (a) and (b) are both numbered 1, and group
- (c) is number 2. When the reference (?-2) is encountered, the second
- most recently opened parentheses has the number 1, but it is the first
+ (c) is number 2. When the reference (?-2) is encountered, the second
+ most recently opened parentheses has the number 1, but it is the first
such group (the (a) group) to which the recursion refers. This would be
- the same if an absolute reference (?1) was used. In other words, rela-
+ the same if an absolute reference (?1) was used. In other words, rela-
tive references are just a shorthand for computing a group number.
- It is also possible to refer to subsequent capture groups, by writing
- references such as (?+2). However, these cannot be recursive because
- the reference is not inside the parentheses that are referenced. They
- are always non-recursive subroutine calls, as described in the next
+ It is also possible to refer to subsequent capture groups, by writing
+ references such as (?+2). However, these cannot be recursive because
+ the reference is not inside the parentheses that are referenced. They
+ are always non-recursive subroutine calls, as described in the next
section.
- An alternative approach is to use named parentheses. The Perl syntax
- for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup-
+ An alternative approach is to use named parentheses. The Perl syntax
+ for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup-
ported. We could rewrite the above example as follows:
(?<pn> \( ( [^()]++ | (?&pn) )* \) )
@@ -8735,57 +8831,57 @@ RECURSIVE PATTERNS
used.
The example pattern that we have been looking at contains nested unlim-
- ited repeats, and so the use of a possessive quantifier for matching
- strings of non-parentheses is important when applying the pattern to
+ ited repeats, and so the use of a possessive quantifier for matching
+ strings of non-parentheses is important when applying the pattern to
strings that do not match. For example, when this pattern is applied to
(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
- it yields "no match" quickly. However, if a possessive quantifier is
- not used, the match runs for a very long time indeed because there are
- so many different ways the + and * repeats can carve up the subject,
+ it yields "no match" quickly. However, if a possessive quantifier is
+ not used, the match runs for a very long time indeed because there are
+ so many different ways the + and * repeats can carve up the subject,
and all have to be tested before failure can be reported.
- At the end of a match, the values of capturing parentheses are those
- from the outermost level. If you want to obtain intermediate values, a
+ At the end of a match, the values of capturing parentheses are those
+ from the outermost level. If you want to obtain intermediate values, a
callout function can be used (see below and the pcre2callout documenta-
tion). If the pattern above is matched against
(ab(cd)ef)
- the value for the inner capturing parentheses (numbered 2) is "ef",
- which is the last value taken on at the top level. If a capture group
- is not matched at the top level, its final captured value is unset,
- even if it was (temporarily) set at a deeper level during the matching
+ the value for the inner capturing parentheses (numbered 2) is "ef",
+ which is the last value taken on at the top level. If a capture group
+ is not matched at the top level, its final captured value is unset,
+ even if it was (temporarily) set at a deeper level during the matching
process.
- Do not confuse the (?R) item with the condition (R), which tests for
- recursion. Consider this pattern, which matches text in angle brack-
- ets, allowing for arbitrary nesting. Only digits are allowed in nested
- brackets (that is, when recursing), whereas any characters are permit-
+ Do not confuse the (?R) item with the condition (R), which tests for
+ recursion. Consider this pattern, which matches text in angle brack-
+ ets, allowing for arbitrary nesting. Only digits are allowed in nested
+ brackets (that is, when recursing), whereas any characters are permit-
ted at the outer level.
< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >
- In this pattern, (?(R) is the start of a conditional group, with two
- different alternatives for the recursive and non-recursive cases. The
+ In this pattern, (?(R) is the start of a conditional group, with two
+ different alternatives for the recursive and non-recursive cases. The
(?R) item is the actual recursive call.
Differences in recursion processing between PCRE2 and Perl
Some former differences between PCRE2 and Perl no longer exist.
- Before release 10.30, recursion processing in PCRE2 differed from Perl
- in that a recursive subroutine call was always treated as an atomic
- group. That is, once it had matched some of the subject string, it was
- never re-entered, even if it contained untried alternatives and there
- was a subsequent matching failure. (Historical note: PCRE implemented
+ Before release 10.30, recursion processing in PCRE2 differed from Perl
+ in that a recursive subroutine call was always treated as an atomic
+ group. That is, once it had matched some of the subject string, it was
+ never re-entered, even if it contained untried alternatives and there
+ was a subsequent matching failure. (Historical note: PCRE implemented
recursion before Perl did.)
- Starting with release 10.30, recursive subroutine calls are no longer
+ Starting with release 10.30, recursive subroutine calls are no longer
treated as atomic. That is, they can be re-entered to try unused alter-
- natives if there is a matching failure later in the pattern. This is
- now compatible with the way Perl works. If you want a subroutine call
+ natives if there is a matching failure later in the pattern. This is
+ now compatible with the way Perl works. If you want a subroutine call
to be atomic, you must explicitly enclose it in an atomic group.
Supporting backtracking into recursions simplifies certain types of re-
@@ -8793,47 +8889,47 @@ RECURSIVE PATTERNS
^((.)(?1)\2|.?)$
- The second branch in the group matches a single central character in
- the palindrome when there are an odd number of characters, or nothing
- when there are an even number of characters, but in order to work it
- has to be able to try the second case when the rest of the pattern
+ The second branch in the group matches a single central character in
+ the palindrome when there are an odd number of characters, or nothing
+ when there are an even number of characters, but in order to work it
+ has to be able to try the second case when the rest of the pattern
match fails. If you want to match typical palindromic phrases, the pat-
- tern has to ignore all non-word characters, which can be done like
+ tern has to ignore all non-word characters, which can be done like
this:
^\W*+((.)\W*+(?1)\W*+\2|\W*+.?)\W*+$
- If run with the PCRE2_CASELESS option, this pattern matches phrases
- such as "A man, a plan, a canal: Panama!". Note the use of the posses-
- sive quantifier *+ to avoid backtracking into sequences of non-word
+ If run with the PCRE2_CASELESS option, this pattern matches phrases
+ such as "A man, a plan, a canal: Panama!". Note the use of the posses-
+ sive quantifier *+ to avoid backtracking into sequences of non-word
characters. Without this, PCRE2 takes a great deal longer (ten times or
- more) to match typical phrases, and Perl takes so long that you think
+ more) to match typical phrases, and Perl takes so long that you think
it has gone into a loop.
- Another way in which PCRE2 and Perl used to differ in their recursion
- processing is in the handling of captured values. Formerly in Perl,
- when a group was called recursively or as a subroutine (see the next
+ Another way in which PCRE2 and Perl used to differ in their recursion
+ processing is in the handling of captured values. Formerly in Perl,
+ when a group was called recursively or as a subroutine (see the next
section), it had no access to any values that were captured outside the
- recursion, whereas in PCRE2 these values can be referenced. Consider
+ recursion, whereas in PCRE2 these values can be referenced. Consider
this pattern:
^(.)(\1|a(?2))
- This pattern matches "bab". The first capturing parentheses match "b",
+ This pattern matches "bab". The first capturing parentheses match "b",
then in the second group, when the backreference \1 fails to match "b",
the second alternative matches "a" and then recurses. In the recursion,
- \1 does now match "b" and so the whole match succeeds. This match used
+ \1 does now match "b" and so the whole match succeeds. This match used
to fail in Perl, but in later versions (I tried 5.024) it now works.
GROUPS AS SUBROUTINES
- If the syntax for a recursive group call (either by number or by name)
- is used outside the parentheses to which it refers, it operates a bit
- like a subroutine in a programming language. More accurately, PCRE2
+ If the syntax for a recursive group call (either by number or by name)
+ is used outside the parentheses to which it refers, it operates a bit
+ like a subroutine in a programming language. More accurately, PCRE2
treats the referenced group as an independent subpattern which it tries
- to match at the current matching position. The called group may be de-
- fined before or after the reference. A numbered reference can be abso-
+ to match at the current matching position. The called group may be de-
+ fined before or after the reference. A numbered reference can be abso-
lute or relative, as in these examples:
(...(absolute)...)...(?2)...
@@ -8844,106 +8940,106 @@ GROUPS AS SUBROUTINES
(sens|respons)e and \1ibility
- matches "sense and sensibility" and "response and responsibility", but
+ matches "sense and sensibility" and "response and responsibility", but
not "sense and responsibility". If instead the pattern
(sens|respons)e and (?1)ibility
- is used, it does match "sense and responsibility" as well as the other
- two strings. Another example is given in the discussion of DEFINE
+ is used, it does match "sense and responsibility" as well as the other
+ two strings. Another example is given in the discussion of DEFINE
above.
- Like recursions, subroutine calls used to be treated as atomic, but
- this changed at PCRE2 release 10.30, so backtracking into subroutine
- calls can now occur. However, any capturing parentheses that are set
+ Like recursions, subroutine calls used to be treated as atomic, but
+ this changed at PCRE2 release 10.30, so backtracking into subroutine
+ calls can now occur. However, any capturing parentheses that are set
during the subroutine call revert to their previous values afterwards.
- Processing options such as case-independence are fixed when a group is
- defined, so if it is used as a subroutine, such options cannot be
+ Processing options such as case-independence are fixed when a group is
+ defined, so if it is used as a subroutine, such options cannot be
changed for different calls. For example, consider this pattern:
(abc)(?i:(?-1))
- It matches "abcabc". It does not match "abcABC" because the change of
+ It matches "abcabc". It does not match "abcABC" because the change of
processing option does not affect the called group.
- The behaviour of backtracking control verbs in groups when called as
+ The behaviour of backtracking control verbs in groups when called as
subroutines is described in the section entitled "Backtracking verbs in
subroutines" below.
ONIGURUMA SUBROUTINE SYNTAX
- For compatibility with Oniguruma, the non-Perl syntax \g followed by a
+ For compatibility with Oniguruma, the non-Perl syntax \g followed by a
name or a number enclosed either in angle brackets or single quotes, is
an alternative syntax for calling a group as a subroutine, possibly re-
- cursively. Here are two of the examples used above, rewritten using
+ cursively. Here are two of the examples used above, rewritten using
this syntax:
(?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
(sens|respons)e and \g'1'ibility
- PCRE2 supports an extension to Oniguruma: if a number is preceded by a
+ PCRE2 supports an extension to Oniguruma: if a number is preceded by a
plus or a minus sign it is taken as a relative reference. For example:
(abc)(?i:\g<-1>)
- Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
- synonymous. The former is a backreference; the latter is a subroutine
+ Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
+ synonymous. The former is a backreference; the latter is a subroutine
call.
CALLOUTS
Perl has a feature whereby using the sequence (?{...}) causes arbitrary
- Perl code to be obeyed in the middle of matching a regular expression.
+ Perl code to be obeyed in the middle of matching a regular expression.
This makes it possible, amongst other things, to extract different sub-
strings that match the same pair of parentheses when there is a repeti-
tion.
- PCRE2 provides a similar feature, but of course it cannot obey arbi-
- trary Perl code. The feature is called "callout". The caller of PCRE2
- provides an external function by putting its entry point in a match
- context using the function pcre2_set_callout(), and then passing that
- context to pcre2_match() or pcre2_dfa_match(). If no match context is
+ PCRE2 provides a similar feature, but of course it cannot obey arbi-
+ trary Perl code. The feature is called "callout". The caller of PCRE2
+ provides an external function by putting its entry point in a match
+ context using the function pcre2_set_callout(), and then passing that
+ context to pcre2_match() or pcre2_dfa_match(). If no match context is
passed, or if the callout entry point is set to NULL, callouts are dis-
abled.
- Within a regular expression, (?C<arg>) indicates a point at which the
- external function is to be called. There are two kinds of callout:
- those with a numerical argument and those with a string argument. (?C)
- on its own with no argument is treated as (?C0). A numerical argument
- allows the application to distinguish between different callouts.
- String arguments were added for release 10.20 to make it possible for
- script languages that use PCRE2 to embed short scripts within patterns
+ Within a regular expression, (?C<arg>) indicates a point at which the
+ external function is to be called. There are two kinds of callout:
+ those with a numerical argument and those with a string argument. (?C)
+ on its own with no argument is treated as (?C0). A numerical argument
+ allows the application to distinguish between different callouts.
+ String arguments were added for release 10.20 to make it possible for
+ script languages that use PCRE2 to embed short scripts within patterns
in a similar way to Perl.
During matching, when PCRE2 reaches a callout point, the external func-
- tion is called. It is provided with the number or string argument of
- the callout, the position in the pattern, and one item of data that is
+ tion is called. It is provided with the number or string argument of
+ the callout, the position in the pattern, and one item of data that is
also set in the match block. The callout function may cause matching to
proceed, to backtrack, or to fail.
- By default, PCRE2 implements a number of optimizations at matching
- time, and one side-effect is that sometimes callouts are skipped. If
- you need all possible callouts to happen, you need to set options that
- disable the relevant optimizations. More details, including a complete
- description of the programming interface to the callout function, are
+ By default, PCRE2 implements a number of optimizations at matching
+ time, and one side-effect is that sometimes callouts are skipped. If
+ you need all possible callouts to happen, you need to set options that
+ disable the relevant optimizations. More details, including a complete
+ description of the programming interface to the callout function, are
given in the pcre2callout documentation.
Callouts with numerical arguments
- If you just want to have a means of identifying different callout
- points, put a number less than 256 after the letter C. For example,
+ If you just want to have a means of identifying different callout
+ points, put a number less than 256 after the letter C. For example,
this pattern has two callout points:
(?C1)abc(?C2)def
- If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical
- callouts are automatically installed before each item in the pattern.
- They are all numbered 255. If there is a conditional group in the pat-
+ If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical
+ callouts are automatically installed before each item in the pattern.
+ They are all numbered 255. If there is a conditional group in the pat-
tern whose condition is an assertion, an additional callout is inserted
- just before the condition. An explicit callout may also be set at this
+ just before the condition. An explicit callout may also be set at this
position, as in this example:
(?(?C9)(?=a)abc|def)
@@ -8953,78 +9049,78 @@ CALLOUTS
Callouts with string arguments
- A delimited string may be used instead of a number as a callout argu-
- ment. The starting delimiter must be one of ` ' " ^ % # $ { and the
+ A delimited string may be used instead of a number as a callout argu-
+ ment. The starting delimiter must be one of ` ' " ^ % # $ { and the
ending delimiter is the same as the start, except for {, where the end-
- ing delimiter is }. If the ending delimiter is needed within the
+ ing delimiter is }. If the ending delimiter is needed within the
string, it must be doubled. For example:
(?C'ab ''c'' d')xyz(?C{any text})pqr
- The doubling is removed before the string is passed to the callout
+ The doubling is removed before the string is passed to the callout
function.
BACKTRACKING CONTROL
- There are a number of special "Backtracking Control Verbs" (to use
- Perl's terminology) that modify the behaviour of backtracking during
- matching. They are generally of the form (*VERB) or (*VERB:NAME). Some
+ There are a number of special "Backtracking Control Verbs" (to use
+ Perl's terminology) that modify the behaviour of backtracking during
+ matching. They are generally of the form (*VERB) or (*VERB:NAME). Some
verbs take either form, and may behave differently depending on whether
- or not a name argument is present. The names are not required to be
+ or not a name argument is present. The names are not required to be
unique within the pattern.
- By default, for compatibility with Perl, a name is any sequence of
+ By default, for compatibility with Perl, a name is any sequence of
characters that does not include a closing parenthesis. The name is not
- processed in any way, and it is not possible to include a closing
- parenthesis in the name. This can be changed by setting the
- PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati-
+ processed in any way, and it is not possible to include a closing
+ parenthesis in the name. This can be changed by setting the
+ PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati-
ble.
- When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to
- verb names and only an unescaped closing parenthesis terminates the
- name. However, the only backslash items that are permitted are \Q, \E,
- and sequences such as \x{100} that define character code points. Char-
+ When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to
+ verb names and only an unescaped closing parenthesis terminates the
+ name. However, the only backslash items that are permitted are \Q, \E,
+ and sequences such as \x{100} that define character code points. Char-
acter type escapes such as \d are faulted.
A closing parenthesis can be included in a name either as \) or between
- \Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED
+ \Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED
or PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb
names is skipped, and #-comments are recognized, exactly as in the rest
- of the pattern. PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect
+ of the pattern. PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect
verb names unless PCRE2_ALT_VERBNAMES is also set.
- The maximum length of a name is 255 in the 8-bit library and 65535 in
- the 16-bit and 32-bit libraries. If the name is empty, that is, if the
- closing parenthesis immediately follows the colon, the effect is as if
+ The maximum length of a name is 255 in the 8-bit library and 65535 in
+ the 16-bit and 32-bit libraries. If the name is empty, that is, if the
+ closing parenthesis immediately follows the colon, the effect is as if
the colon were not there. Any number of these verbs may occur in a pat-
tern. Except for (*ACCEPT), they may not be quantified.
- Since these verbs are specifically related to backtracking, most of
- them can be used only when the pattern is to be matched using the tra-
+ Since these verbs are specifically related to backtracking, most of
+ them can be used only when the pattern is to be matched using the tra-
ditional matching function, because that uses a backtracking algorithm.
- With the exception of (*FAIL), which behaves like a failing negative
+ With the exception of (*FAIL), which behaves like a failing negative
assertion, the backtracking control verbs cause an error if encountered
by the DFA matching function.
- The behaviour of these verbs in repeated groups, assertions, and in
- capture groups called as subroutines (whether or not recursively) is
+ The behaviour of these verbs in repeated groups, assertions, and in
+ capture groups called as subroutines (whether or not recursively) is
documented below.
Optimizations that affect backtracking verbs
PCRE2 contains some optimizations that are used to speed up matching by
running some checks at the start of each match attempt. For example, it
- may know the minimum length of matching subject, or that a particular
+ may know the minimum length of matching subject, or that a particular
character must be present. When one of these optimizations bypasses the
- running of a match, any included backtracking verbs will not, of
+ running of a match, any included backtracking verbs will not, of
course, be processed. You can suppress the start-of-match optimizations
- by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com-
- pile(), or by starting the pattern with (*NO_START_OPT). There is more
+ by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com-
+ pile(), or by starting the pattern with (*NO_START_OPT). There is more
discussion of this option in the section entitled "Compiling a pattern"
in the pcre2api documentation.
- Experiments with Perl suggest that it too has similar optimizations,
+ Experiments with Perl suggest that it too has similar optimizations,
and like PCRE2, turning them off can change the result of a match.
Verbs that act immediately
@@ -9033,77 +9129,77 @@ BACKTRACKING CONTROL
(*ACCEPT) or (*ACCEPT:NAME)
- This verb causes the match to end successfully, skipping the remainder
- of the pattern. However, when it is inside a capture group that is
+ This verb causes the match to end successfully, skipping the remainder
+ of the pattern. However, when it is inside a capture group that is
called as a subroutine, only that group is ended successfully. Matching
then continues at the outer level. If (*ACCEPT) in triggered in a posi-
- tive assertion, the assertion succeeds; in a negative assertion, the
+ tive assertion, the assertion succeeds; in a negative assertion, the
assertion fails.
- If (*ACCEPT) is inside capturing parentheses, the data so far is cap-
+ If (*ACCEPT) is inside capturing parentheses, the data so far is cap-
tured. For example:
A((?:A|B(*ACCEPT)|C)D)
- This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
+ This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
tured by the outer parentheses.
- (*ACCEPT) is the only backtracking verb that is allowed to be quanti-
- fied because an ungreedy quantification with a minimum of zero acts
+ (*ACCEPT) is the only backtracking verb that is allowed to be quanti-
+ fied because an ungreedy quantification with a minimum of zero acts
only when a backtrack happens. Consider, for example,
(A(*ACCEPT)??B)C
- where A, B, and C may be complex expressions. After matching "A", the
- matcher processes "BC"; if that fails, causing a backtrack, (*ACCEPT)
- is triggered and the match succeeds. In both cases, all but C is cap-
- tured. Whereas (*COMMIT) (see below) means "fail on backtrack", a re-
+ where A, B, and C may be complex expressions. After matching "A", the
+ matcher processes "BC"; if that fails, causing a backtrack, (*ACCEPT)
+ is triggered and the match succeeds. In both cases, all but C is cap-
+ tured. Whereas (*COMMIT) (see below) means "fail on backtrack", a re-
peated (*ACCEPT) of this type means "succeed on backtrack".
- Warning: (*ACCEPT) should not be used within a script run group, be-
- cause it causes an immediate exit from the group, bypassing the script
+ Warning: (*ACCEPT) should not be used within a script run group, be-
+ cause it causes an immediate exit from the group, bypassing the script
run checking.
(*FAIL) or (*FAIL:NAME)
- This verb causes a matching failure, forcing backtracking to occur. It
- may be abbreviated to (*F). It is equivalent to (?!) but easier to
+ This verb causes a matching failure, forcing backtracking to occur. It
+ may be abbreviated to (*F). It is equivalent to (?!) but easier to
read. The Perl documentation notes that it is probably useful only when
combined with (?{}) or (??{}). Those are, of course, Perl features that
- are not present in PCRE2. The nearest equivalent is the callout fea-
+ are not present in PCRE2. The nearest equivalent is the callout fea-
ture, as for example in this pattern:
a+(?C)(*FAIL)
- A match with the string "aaaa" always fails, but the callout is taken
+ A match with the string "aaaa" always fails, but the callout is taken
before each backtrack happens (in this example, 10 times).
- (*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*AC-
- CEPT) and (*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is
+ (*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*AC-
+ CEPT) and (*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is
recorded just before the verb acts.
Recording which path was taken
- There is one verb whose main purpose is to track how a match was ar-
- rived at, though it also has a secondary use in conjunction with ad-
+ There is one verb whose main purpose is to track how a match was ar-
+ rived at, though it also has a secondary use in conjunction with ad-
vancing the match starting point (see (*SKIP) below).
(*MARK:NAME) or (*:NAME)
- A name is always required with this verb. For all the other backtrack-
+ A name is always required with this verb. For all the other backtrack-
ing control verbs, a NAME argument is optional.
- When a match succeeds, the name of the last-encountered mark name on
+ When a match succeeds, the name of the last-encountered mark name on
the matching path is passed back to the caller as described in the sec-
tion entitled "Other information about the match" in the pcre2api docu-
- mentation. This applies to all instances of (*MARK) and other verbs,
+ mentation. This applies to all instances of (*MARK) and other verbs,
including those inside assertions and atomic groups. However, there are
- differences in those cases when (*MARK) is used in conjunction with
+ differences in those cases when (*MARK) is used in conjunction with
(*SKIP) as described below.
- The mark name that was last encountered on the matching path is passed
- back. A verb without a NAME argument is ignored for this purpose. Here
- is an example of pcre2test output, where the "mark" modifier requests
+ The mark name that was last encountered on the matching path is passed
+ back. A verb without a NAME argument is ignored for this purpose. Here
+ is an example of pcre2test output, where the "mark" modifier requests
the retrieval and outputting of (*MARK) data:
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
@@ -9115,76 +9211,76 @@ BACKTRACKING CONTROL
MK: B
The (*MARK) name is tagged with "MK:" in this output, and in this exam-
- ple it indicates which of the two alternatives matched. This is a more
- efficient way of obtaining this information than putting each alterna-
+ ple it indicates which of the two alternatives matched. This is a more
+ efficient way of obtaining this information than putting each alterna-
tive in its own capturing parentheses.
- If a verb with a name is encountered in a positive assertion that is
- true, the name is recorded and passed back if it is the last-encoun-
+ If a verb with a name is encountered in a positive assertion that is
+ true, the name is recorded and passed back if it is the last-encoun-
tered. This does not happen for negative assertions or failing positive
assertions.
- After a partial match or a failed match, the last encountered name in
+ After a partial match or a failed match, the last encountered name in
the entire match process is returned. For example:
re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
data> XP
No match, mark = B
- Note that in this unanchored example the mark is retained from the
+ Note that in this unanchored example the mark is retained from the
match attempt that started at the letter "X" in the subject. Subsequent
match attempts starting at "P" and then with an empty string do not get
as far as the (*MARK) item, but nevertheless do not reset it.
- If you are interested in (*MARK) values after failed matches, you
- should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to
+ If you are interested in (*MARK) values after failed matches, you
+ should probably set the PCRE2_NO_START_OPTIMIZE option (see above) to
ensure that the match is always attempted.
Verbs that act after backtracking
The following verbs do nothing when they are encountered. Matching con-
- tinues with what follows, but if there is a subsequent match failure,
- causing a backtrack to the verb, a failure is forced. That is, back-
- tracking cannot pass to the left of the verb. However, when one of
+ tinues with what follows, but if there is a subsequent match failure,
+ causing a backtrack to the verb, a failure is forced. That is, back-
+ tracking cannot pass to the left of the verb. However, when one of
these verbs appears inside an atomic group or in a lookaround assertion
- that is true, its effect is confined to that group, because once the
- group has been matched, there is never any backtracking into it. Back-
+ that is true, its effect is confined to that group, because once the
+ group has been matched, there is never any backtracking into it. Back-
tracking from beyond an assertion or an atomic group ignores the entire
group, and seeks a preceding backtracking point.
- These verbs differ in exactly what kind of failure occurs when back-
- tracking reaches them. The behaviour described below is what happens
- when the verb is not in a subroutine or an assertion. Subsequent sec-
+ These verbs differ in exactly what kind of failure occurs when back-
+ tracking reaches them. The behaviour described below is what happens
+ when the verb is not in a subroutine or an assertion. Subsequent sec-
tions cover these special cases.
(*COMMIT) or (*COMMIT:NAME)
- This verb causes the whole match to fail outright if there is a later
+ This verb causes the whole match to fail outright if there is a later
matching failure that causes backtracking to reach it. Even if the pat-
- tern is unanchored, no further attempts to find a match by advancing
- the starting point take place. If (*COMMIT) is the only backtracking
+ tern is unanchored, no further attempts to find a match by advancing
+ the starting point take place. If (*COMMIT) is the only backtracking
verb that is encountered, once it has been passed pcre2_match() is com-
mitted to finding a match at the current starting point, or not at all.
For example:
a+(*COMMIT)b
- This matches "xxaab" but not "aacaab". It can be thought of as a kind
+ This matches "xxaab" but not "aacaab". It can be thought of as a kind
of dynamic anchor, or "I've started, so I must finish."
- The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COM-
- MIT). It is like (*MARK:NAME) in that the name is remembered for pass-
- ing back to the caller. However, (*SKIP:NAME) searches only for names
+ The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COM-
+ MIT). It is like (*MARK:NAME) in that the name is remembered for pass-
+ ing back to the caller. However, (*SKIP:NAME) searches only for names
that are set with (*MARK), ignoring those set by any of the other back-
tracking verbs.
- If there is more than one backtracking verb in a pattern, a different
- one that follows (*COMMIT) may be triggered first, so merely passing
+ If there is more than one backtracking verb in a pattern, a different
+ one that follows (*COMMIT) may be triggered first, so merely passing
(*COMMIT) during a match does not always guarantee that a match must be
at this starting point.
Note that (*COMMIT) at the start of a pattern is not the same as an an-
- chor, unless PCRE2's start-of-match optimizations are turned off, as
+ chor, unless PCRE2's start-of-match optimizations are turned off, as
shown in this output from pcre2test:
re> /(*COMMIT)abc/
@@ -9195,68 +9291,68 @@ BACKTRACKING CONTROL
data> xyzabc
No match
- For the first pattern, PCRE2 knows that any match must start with "a",
- so the optimization skips along the subject to "a" before applying the
- pattern to the first set of data. The match attempt then succeeds. The
- second pattern disables the optimization that skips along to the first
- character. The pattern is now applied starting at "x", and so the
- (*COMMIT) causes the match to fail without trying any other starting
+ For the first pattern, PCRE2 knows that any match must start with "a",
+ so the optimization skips along the subject to "a" before applying the
+ pattern to the first set of data. The match attempt then succeeds. The
+ second pattern disables the optimization that skips along to the first
+ character. The pattern is now applied starting at "x", and so the
+ (*COMMIT) causes the match to fail without trying any other starting
points.
(*PRUNE) or (*PRUNE:NAME)
- This verb causes the match to fail at the current starting position in
+ This verb causes the match to fail at the current starting position in
the subject if there is a later matching failure that causes backtrack-
- ing to reach it. If the pattern is unanchored, the normal "bumpalong"
- advance to the next starting character then happens. Backtracking can
- occur as usual to the left of (*PRUNE), before it is reached, or when
- matching to the right of (*PRUNE), but if there is no match to the
- right, backtracking cannot cross (*PRUNE). In simple cases, the use of
- (*PRUNE) is just an alternative to an atomic group or possessive quan-
+ ing to reach it. If the pattern is unanchored, the normal "bumpalong"
+ advance to the next starting character then happens. Backtracking can
+ occur as usual to the left of (*PRUNE), before it is reached, or when
+ matching to the right of (*PRUNE), but if there is no match to the
+ right, backtracking cannot cross (*PRUNE). In simple cases, the use of
+ (*PRUNE) is just an alternative to an atomic group or possessive quan-
tifier, but there are some uses of (*PRUNE) that cannot be expressed in
- any other way. In an anchored pattern (*PRUNE) has the same effect as
+ any other way. In an anchored pattern (*PRUNE) has the same effect as
(*COMMIT).
The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE).
It is like (*MARK:NAME) in that the name is remembered for passing back
- to the caller. However, (*SKIP:NAME) searches only for names set with
+ to the caller. However, (*SKIP:NAME) searches only for names set with
(*MARK), ignoring those set by other backtracking verbs.
(*SKIP)
- This verb, when given without a name, is like (*PRUNE), except that if
- the pattern is unanchored, the "bumpalong" advance is not to the next
+ This verb, when given without a name, is like (*PRUNE), except that if
+ the pattern is unanchored, the "bumpalong" advance is not to the next
character, but to the position in the subject where (*SKIP) was encoun-
- tered. (*SKIP) signifies that whatever text was matched leading up to
- it cannot be part of a successful match if there is a later mismatch.
+ tered. (*SKIP) signifies that whatever text was matched leading up to
+ it cannot be part of a successful match if there is a later mismatch.
Consider:
a+(*SKIP)b
- If the subject is "aaaac...", after the first match attempt fails
- (starting at the first character in the string), the starting point
+ If the subject is "aaaac...", after the first match attempt fails
+ (starting at the first character in the string), the starting point
skips on to start the next attempt at "c". Note that a possessive quan-
- tifer does not have the same effect as this example; although it would
- suppress backtracking during the first match attempt, the second at-
- tempt would start at the second character instead of skipping on to
+ tifer does not have the same effect as this example; although it would
+ suppress backtracking during the first match attempt, the second at-
+ tempt would start at the second character instead of skipping on to
"c".
- If (*SKIP) is used to specify a new starting position that is the same
- as the starting position of the current match, or (by being inside a
- lookbehind) earlier, the position specified by (*SKIP) is ignored, and
+ If (*SKIP) is used to specify a new starting position that is the same
+ as the starting position of the current match, or (by being inside a
+ lookbehind) earlier, the position specified by (*SKIP) is ignored, and
instead the normal "bumpalong" occurs.
(*SKIP:NAME)
- When (*SKIP) has an associated name, its behaviour is modified. When
- such a (*SKIP) is triggered, the previous path through the pattern is
- searched for the most recent (*MARK) that has the same name. If one is
- found, the "bumpalong" advance is to the subject position that corre-
- sponds to that (*MARK) instead of to where (*SKIP) was encountered. If
+ When (*SKIP) has an associated name, its behaviour is modified. When
+ such a (*SKIP) is triggered, the previous path through the pattern is
+ searched for the most recent (*MARK) that has the same name. If one is
+ found, the "bumpalong" advance is to the subject position that corre-
+ sponds to that (*MARK) instead of to where (*SKIP) was encountered. If
no (*MARK) with a matching name is found, the (*SKIP) is ignored.
- The search for a (*MARK) name uses the normal backtracking mechanism,
- which means that it does not see (*MARK) settings that are inside
+ The search for a (*MARK) name uses the normal backtracking mechanism,
+ which means that it does not see (*MARK) settings that are inside
atomic groups or assertions, because they are never re-entered by back-
tracking. Compare the following pcre2test examples:
@@ -9270,105 +9366,105 @@ BACKTRACKING CONTROL
0: b
1: b
- In the first example, the (*MARK) setting is in an atomic group, so it
+ In the first example, the (*MARK) setting is in an atomic group, so it
is not seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored.
- This allows the second branch of the pattern to be tried at the first
- character position. In the second example, the (*MARK) setting is not
- in an atomic group. This allows (*SKIP:X) to find the (*MARK) when it
+ This allows the second branch of the pattern to be tried at the first
+ character position. In the second example, the (*MARK) setting is not
+ in an atomic group. This allows (*SKIP:X) to find the (*MARK) when it
backtracks, and this causes a new matching attempt to start at the sec-
- ond character. This time, the (*MARK) is never seen because "a" does
+ ond character. This time, the (*MARK) is never seen because "a" does
not match "b", so the matcher immediately jumps to the second branch of
the pattern.
- Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
+ Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
ignores names that are set by other backtracking verbs.
(*THEN) or (*THEN:NAME)
- This verb causes a skip to the next innermost alternative when back-
- tracking reaches it. That is, it cancels any further backtracking
- within the current alternative. Its name comes from the observation
+ This verb causes a skip to the next innermost alternative when back-
+ tracking reaches it. That is, it cancels any further backtracking
+ within the current alternative. Its name comes from the observation
that it can be used for a pattern-based if-then-else block:
( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
- If the COND1 pattern matches, FOO is tried (and possibly further items
- after the end of the group if FOO succeeds); on failure, the matcher
- skips to the second alternative and tries COND2, without backtracking
- into COND1. If that succeeds and BAR fails, COND3 is tried. If subse-
- quently BAZ fails, there are no more alternatives, so there is a back-
- track to whatever came before the entire group. If (*THEN) is not in-
+ If the COND1 pattern matches, FOO is tried (and possibly further items
+ after the end of the group if FOO succeeds); on failure, the matcher
+ skips to the second alternative and tries COND2, without backtracking
+ into COND1. If that succeeds and BAR fails, COND3 is tried. If subse-
+ quently BAZ fails, there are no more alternatives, so there is a back-
+ track to whatever came before the entire group. If (*THEN) is not in-
side an alternation, it acts like (*PRUNE).
- The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN).
+ The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN).
It is like (*MARK:NAME) in that the name is remembered for passing back
- to the caller. However, (*SKIP:NAME) searches only for names set with
+ to the caller. However, (*SKIP:NAME) searches only for names set with
(*MARK), ignoring those set by other backtracking verbs.
- A group that does not contain a | character is just a part of the en-
- closing alternative; it is not a nested alternation with only one al-
+ A group that does not contain a | character is just a part of the en-
+ closing alternative; it is not a nested alternation with only one al-
ternative. The effect of (*THEN) extends beyond such a group to the en-
- closing alternative. Consider this pattern, where A, B, etc. are com-
- plex pattern fragments that do not contain any | characters at this
+ closing alternative. Consider this pattern, where A, B, etc. are com-
+ plex pattern fragments that do not contain any | characters at this
level:
A (B(*THEN)C) | D
- If A and B are matched, but there is a failure in C, matching does not
+ If A and B are matched, but there is a failure in C, matching does not
backtrack into A; instead it moves to the next alternative, that is, D.
- However, if the group containing (*THEN) is given an alternative, it
+ However, if the group containing (*THEN) is given an alternative, it
behaves differently:
A (B(*THEN)C | (*FAIL)) | D
The effect of (*THEN) is now confined to the inner group. After a fail-
- ure in C, matching moves to (*FAIL), which causes the whole group to
- fail because there are no more alternatives to try. In this case,
+ ure in C, matching moves to (*FAIL), which causes the whole group to
+ fail because there are no more alternatives to try. In this case,
matching does backtrack into A.
- Note that a conditional group is not considered as having two alterna-
- tives, because only one is ever used. In other words, the | character
- in a conditional group has a different meaning. Ignoring white space,
+ Note that a conditional group is not considered as having two alterna-
+ tives, because only one is ever used. In other words, the | character
+ in a conditional group has a different meaning. Ignoring white space,
consider:
^.*? (?(?=a) a | b(*THEN)c )
If the subject is "ba", this pattern does not match. Because .*? is un-
- greedy, it initially matches zero characters. The condition (?=a) then
- fails, the character "b" is matched, but "c" is not. At this point,
- matching does not backtrack to .*? as might perhaps be expected from
- the presence of the | character. The conditional group is part of the
- single alternative that comprises the whole pattern, and so the match
- fails. (If there was a backtrack into .*?, allowing it to match "b",
+ greedy, it initially matches zero characters. The condition (?=a) then
+ fails, the character "b" is matched, but "c" is not. At this point,
+ matching does not backtrack to .*? as might perhaps be expected from
+ the presence of the | character. The conditional group is part of the
+ single alternative that comprises the whole pattern, and so the match
+ fails. (If there was a backtrack into .*?, allowing it to match "b",
the match would succeed.)
- The verbs just described provide four different "strengths" of control
+ The verbs just described provide four different "strengths" of control
when subsequent matching fails. (*THEN) is the weakest, carrying on the
- match at the next alternative. (*PRUNE) comes next, failing the match
- at the current starting position, but allowing an advance to the next
- character (for an unanchored pattern). (*SKIP) is similar, except that
+ match at the next alternative. (*PRUNE) comes next, failing the match
+ at the current starting position, but allowing an advance to the next
+ character (for an unanchored pattern). (*SKIP) is similar, except that
the advance may be more than one character. (*COMMIT) is the strongest,
causing the entire match to fail.
More than one backtracking verb
- If more than one backtracking verb is present in a pattern, the one
- that is backtracked onto first acts. For example, consider this pat-
+ If more than one backtracking verb is present in a pattern, the one
+ that is backtracked onto first acts. For example, consider this pat-
tern, where A, B, etc. are complex pattern fragments:
(A(*COMMIT)B(*THEN)C|ABD)
- If A matches but B fails, the backtrack to (*COMMIT) causes the entire
+ If A matches but B fails, the backtrack to (*COMMIT) causes the entire
match to fail. However, if A and B match, but C fails, the backtrack to
- (*THEN) causes the next alternative (ABD) to be tried. This behaviour
- is consistent, but is not always the same as Perl's. It means that if
- two or more backtracking verbs appear in succession, all the the last
+ (*THEN) causes the next alternative (ABD) to be tried. This behaviour
+ is consistent, but is not always the same as Perl's. It means that if
+ two or more backtracking verbs appear in succession, all the the last
of them has no effect. Consider this example:
...(*COMMIT)(*PRUNE)...
If there is a matching failure to the right, backtracking onto (*PRUNE)
- causes it to be triggered, and its action is taken. There can never be
+ causes it to be triggered, and its action is taken. There can never be
a backtrack onto (*COMMIT).
Backtracking verbs in repeated groups
@@ -9378,50 +9474,50 @@ BACKTRACKING CONTROL
/(a(*COMMIT)b)+ac/
- If the subject is "abac", Perl matches unless its optimizations are
- disabled, but PCRE2 always fails because the (*COMMIT) in the second
+ If the subject is "abac", Perl matches unless its optimizations are
+ disabled, but PCRE2 always fails because the (*COMMIT) in the second
repeat of the group acts.
Backtracking verbs in assertions
- (*FAIL) in any assertion has its normal effect: it forces an immediate
- backtrack. The behaviour of the other backtracking verbs depends on
- whether or not the assertion is standalone or acting as the condition
+ (*FAIL) in any assertion has its normal effect: it forces an immediate
+ backtrack. The behaviour of the other backtracking verbs depends on
+ whether or not the assertion is standalone or acting as the condition
in a conditional group.
- (*ACCEPT) in a standalone positive assertion causes the assertion to
- succeed without any further processing; captured strings and a mark
- name (if set) are retained. In a standalone negative assertion, (*AC-
+ (*ACCEPT) in a standalone positive assertion causes the assertion to
+ succeed without any further processing; captured strings and a mark
+ name (if set) are retained. In a standalone negative assertion, (*AC-
CEPT) causes the assertion to fail without any further processing; cap-
tured substrings and any mark name are discarded.
- If the assertion is a condition, (*ACCEPT) causes the condition to be
- true for a positive assertion and false for a negative one; captured
+ If the assertion is a condition, (*ACCEPT) causes the condition to be
+ true for a positive assertion and false for a negative one; captured
substrings are retained in both cases.
The remaining verbs act only when a later failure causes a backtrack to
- reach them. This means that, for the Perl-compatible assertions, their
+ reach them. This means that, for the Perl-compatible assertions, their
effect is confined to the assertion, because Perl lookaround assertions
are atomic. A backtrack that occurs after such an assertion is complete
- does not jump back into the assertion. Note in particular that a
- (*MARK) name that is set in an assertion is not "seen" by an instance
+ does not jump back into the assertion. Note in particular that a
+ (*MARK) name that is set in an assertion is not "seen" by an instance
of (*SKIP:NAME) later in the pattern.
- PCRE2 now supports non-atomic positive assertions, as described in the
- section entitled "Non-atomic assertions" above. These assertions must
- be standalone (not used as conditions). They are not Perl-compatible.
- For these assertions, a later backtrack does jump back into the asser-
- tion, and therefore verbs such as (*COMMIT) can be triggered by back-
+ PCRE2 now supports non-atomic positive assertions, as described in the
+ section entitled "Non-atomic assertions" above. These assertions must
+ be standalone (not used as conditions). They are not Perl-compatible.
+ For these assertions, a later backtrack does jump back into the asser-
+ tion, and therefore verbs such as (*COMMIT) can be triggered by back-
tracks from later in the pattern.
- The effect of (*THEN) is not allowed to escape beyond an assertion. If
- there are no more branches to try, (*THEN) causes a positive assertion
+ The effect of (*THEN) is not allowed to escape beyond an assertion. If
+ there are no more branches to try, (*THEN) causes a positive assertion
to be false, and a negative assertion to be true.
- The other backtracking verbs are not treated specially if they appear
- in a standalone positive assertion. In a conditional positive asser-
+ The other backtracking verbs are not treated specially if they appear
+ in a standalone positive assertion. In a conditional positive asser-
tion, backtracking (from within the assertion) into (*COMMIT), (*SKIP),
- or (*PRUNE) causes the condition to be false. However, for both stand-
+ or (*PRUNE) causes the condition to be false. However, for both stand-
alone and conditional negative assertions, backtracking into (*COMMIT),
(*SKIP), or (*PRUNE) causes the assertion to be true, without consider-
ing any further alternative branches.
@@ -9431,26 +9527,26 @@ BACKTRACKING CONTROL
These behaviours occur whether or not the group is called recursively.
(*ACCEPT) in a group called as a subroutine causes the subroutine match
- to succeed without any further processing. Matching then continues af-
- ter the subroutine call. Perl documents this behaviour. Perl's treat-
+ to succeed without any further processing. Matching then continues af-
+ ter the subroutine call. Perl documents this behaviour. Perl's treat-
ment of the other verbs in subroutines is different in some cases.
- (*FAIL) in a group called as a subroutine has its normal effect: it
+ (*FAIL) in a group called as a subroutine has its normal effect: it
forces an immediate backtrack.
- (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail
- when triggered by being backtracked to in a group called as a subrou-
+ (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail
+ when triggered by being backtracked to in a group called as a subrou-
tine. There is then a backtrack at the outer level.
(*THEN), when triggered, skips to the next alternative in the innermost
- enclosing group that has alternatives (its normal behaviour). However,
+ enclosing group that has alternatives (its normal behaviour). However,
if there is no such group within the subroutine's group, the subroutine
match fails and there is a backtrack at the outer level.
SEE ALSO
- pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3),
+ pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3),
pcre2(3).
@@ -9463,8 +9559,8 @@ AUTHOR
REVISION
- Last updated: 29 July 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 24 February 2020
+ Copyright (c) 1997-2020 University of Cambridge.
------------------------------------------------------------------------------
@@ -10474,26 +10570,27 @@ SCRIPT NAMES FOR \p AND \P
Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
- nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
- Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
- Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic, Gothic, Grantha,
- Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
- Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited, In-
- scriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kannada,
- Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Latin,
- Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani,
- Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi, Mede-
- faidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, Meroitic_Hiero-
- glyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, Nabataean, Nandi-
- nagari, New_Tai_Lue, Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham,
- Ol_Chiki, Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic,
- Old_Persian, Old_Sogdian, Old_South_Arabian, Old_Turkic, Oriya, Osage,
- Osmanya, Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
- Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
- vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
- Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
- Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
- nagh, Tirhuta, Ugaritic, Vai, Wancho, Warang_Citi, Yi, Zan-
+ nian, Chakma, Cham, Cherokee, Chorasmian, Common, Coptic, Cuneiform,
+ Cypriot, Cyrillic, Deseret, Devanagari, Dives_Akuru, Dogra, Duployan,
+ Egyptian_Hieroglyphs, Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic,
+ Gothic, Grantha, Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul,
+ Hanifi_Rohingya, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic,
+ Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese,
+ Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khitan_Small_Script,
+ Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha, Limbu, Linear_A, Lin-
+ ear_B, Lisu, Lycian, Lydian, Mahajani, Makasar, Malayalam, Mandaic,
+ Manichaean, Marchen, Masaram_Gondi, Medefaidrin, Meetei_Mayek,
+ Mende_Kikakui, Meroitic_Cursive, Meroitic_Hieroglyphs, Miao, Modi, Mon-
+ golian, Mro, Multani, Myanmar, Nabataean, Nandinagari, New_Tai_Lue,
+ Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham, Ol_Chiki, Old_Hungar-
+ ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
+ dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya, Pa-
+ hawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
+ Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
+ vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
+ Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
+ Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
+ nagh, Tirhuta, Ugaritic, Vai, Wancho, Warang_Citi, Yezidi, Yi, Zan-
abazar_Square.
@@ -10520,8 +10617,8 @@ CHARACTER CLASSES
word same as \w
xdigit hexadecimal digit
- In PCRE2, POSIX character set names recognize only ASCII characters by
- default, but some of them use Unicode properties if PCRE2_UCP is set.
+ In PCRE2, POSIX character set names recognize only ASCII characters by
+ default, but some of them use Unicode properties if PCRE2_UCP is set.
You can use \Q...\E inside a character class.
@@ -10584,8 +10681,8 @@ CAPTURING
(?|...) non-capture group; reset group numbers for
capture groups in each alternative
- In non-UTF modes, names may contain underscores and ASCII letters and
- digits; in UTF modes, any Unicode letters and Unicode decimal digits
+ In non-UTF modes, names may contain underscores and ASCII letters and
+ digits; in UTF modes, any Unicode letters and Unicode decimal digits
are permitted. In both cases, a name must not start with a digit.
@@ -10601,11 +10698,11 @@ COMMENT
OPTION SETTING
- Changes of these options within a group are automatically cancelled at
+ Changes of these options within a group are automatically cancelled at
the end of the group.
(?i) caseless
- (?J) allow duplicate names
+ (?J) allow duplicate named groups
(?m) multiline
(?n) no auto capture
(?s) single line (dotall)
@@ -10615,7 +10712,7 @@ OPTION SETTING
(?-...) unset option(s)
(?^) unset imnsx options
- Unsetting x or xx unsets both. Several options may be set at once, and
+ Unsetting x or xx unsets both. Several options may be set at once, and
a mixture of setting and unsetting such as (?i-x) is allowed, but there
may be only one hyphen. Setting (but no unsetting) is allowed after (?^
for example (?^in). An option setting may appear at the start of a non-
@@ -10637,11 +10734,11 @@ OPTION SETTING
(*UTF) set appropriate UTF mode for the library in use
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
- Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the
- value of the limits set by the caller of pcre2_match() or
- pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete
+ Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the
+ value of the limits set by the caller of pcre2_match() or
+ pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete
synonym for LIMIT_DEPTH. The application can lock out the use of (*UTF)
- and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options,
+ and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options,
respectively, at compile time.
@@ -10692,11 +10789,13 @@ NON-ATOMIC LOOKAROUND ASSERTIONS
These assertions are specific to PCRE2 and are not Perl-compatible.
- (*napla:...)
- (*non_atomic_positive_lookahead:...)
+ (?*...) )
+ (*napla:...) ) synonyms
+ (*non_atomic_positive_lookahead:...) )
- (*naplb:...)
- (*non_atomic_positive_lookbehind:...)
+ (?<*...) )
+ (*naplb:...) ) synonyms
+ (*non_atomic_positive_lookbehind:...) )
SCRIPT RUNS
@@ -10760,16 +10859,16 @@ CONDITIONAL PATTERNS
(?(VERSION[>]=n.m) test PCRE2 version
(?(assert) assertion condition
- Note the ambiguity of (?(R) and (?(Rn) which might be named reference
- conditions or recursion tests. Such a condition is interpreted as a
+ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
+ conditions or recursion tests. Such a condition is interpreted as a
reference condition if the relevant named group exists.
BACKTRACKING CONTROL
- All backtracking control verbs may be in the form (*VERB:NAME). For
- (*MARK) the name is mandatory, for the others it is optional. (*SKIP)
- changes its behaviour if :NAME is present. The others just set a name
+ All backtracking control verbs may be in the form (*VERB:NAME). For
+ (*MARK) the name is mandatory, for the others it is optional. (*SKIP)
+ changes its behaviour if :NAME is present. The others just set a name
for passing back to the caller, but this is not a name that (*SKIP) can
see. The following act immediately they are reached:
@@ -10777,7 +10876,7 @@ BACKTRACKING CONTROL
(*FAIL) force backtrack; synonym (*F)
(*MARK:NAME) set name to be passed back; synonym (*:NAME)
- The following act only when a subsequent match failure causes a back-
+ The following act only when a subsequent match failure causes a back-
track to reach them. They all force a match failure, but they differ in
what happens afterwards. Those that advance the start-of-match point do
so only if the pattern is not anchored.
@@ -10789,7 +10888,7 @@ BACKTRACKING CONTROL
(*MARK:NAME); if not found, the (*SKIP) is ignored
(*THEN) local failure, backtrack to next alternation
- The effect of one of these verbs in a group called as a subroutine is
+ The effect of one of these verbs in a group called as a subroutine is
confined to the subroutine call.
@@ -10800,14 +10899,14 @@ CALLOUTS
(?C"text") callout with string data
The allowed string delimiters are ` ' " ^ % # $ (which are the same for
- the start and the end), and the starting delimiter { matched with the
- ending delimiter }. To encode the ending delimiter within the string,
+ the start and the end), and the starting delimiter { matched with the
+ ending delimiter }. To encode the ending delimiter within the string,
double it.
SEE ALSO
- pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3),
+ pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3),
pcre2(3).
@@ -10820,7 +10919,7 @@ AUTHOR
REVISION
- Last updated: 29 July 2019
+ Last updated: 28 December 2019
Copyright (c) 1997-2019 University of Cambridge.
------------------------------------------------------------------------------
@@ -10837,7 +10936,7 @@ UNICODE AND UTF SUPPORT
PCRE2 is normally built with Unicode support, though if you do not need
it, you can build it without, in which case the library will be
smaller. With Unicode support, PCRE2 has knowledge of Unicode character
- properties and can process text strings in UTF-8, UTF-16, or UTF-32
+ properties and can process strings of text in UTF-8, UTF-16, and UTF-32
format (depending on the code unit width), but this is not the default.
Unless specifically requested, PCRE2 treats each code unit in a string
as one character.
@@ -10933,14 +11032,16 @@ WIDE CHARACTERS AND UTF MODES
ters, whether or not PCRE2_UCP is set.
-CASE-EQUIVALENCE IN UTF MODE
+UNICODE CASE-EQUIVALENCE
- Case-insensitive matching in UTF mode makes use of Unicode properties
- except for characters whose code points are less than 128 and that have
- at most two case-equivalent values. For these, a direct table lookup is
- used for speed. A few Unicode characters such as Greek sigma have more
- than two code points that are case-equivalent, and these are treated
- specially.
+ If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing
+ makes use of Unicode properties except for characters whose code points
+ are less than 128 and that have at most two case-equivalent values. For
+ these, a direct table lookup is used for speed. A few Unicode charac-
+ ters such as Greek sigma have more than two code points that are case-
+ equivalent, and these are treated specially. Setting PCRE2_UCP without
+ PCRE2_UTF allows Unicode-style case processing for non-UTF character
+ encodings such as UCS-2.
SCRIPT RUNS
@@ -11253,8 +11354,8 @@ AUTHOR
REVISION
- Last updated: 24 May 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 23 February 2020
+ Copyright (c) 1997-2020 University of Cambridge.
------------------------------------------------------------------------------
diff --git a/doc/pcre2_jit_free_unused_memory.3 b/doc/pcre2_jit_free_unused_memory.3
index bf050c8..183bba0 100644
--- a/doc/pcre2_jit_free_unused_memory.3
+++ b/doc/pcre2_jit_free_unused_memory.3
@@ -17,7 +17,7 @@ This function frees unused JIT executable memory. The argument is a general
context, for custom memory management, or NULL for standard memory management.
JIT memory allocation retains some memory in order to improve future JIT
compilation speed. In low memory conditions,
-\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
+\fBpcre2_jit_free_unused_memory()\fP can be used to cause this memory to be
freed.
.P
There is a complete description of the PCRE2 native API in the
diff --git a/doc/pcre2_jit_match.3 b/doc/pcre2_jit_match.3
index b0cc197..5877fcb 100644
--- a/doc/pcre2_jit_match.3
+++ b/doc/pcre2_jit_match.3
@@ -1,4 +1,4 @@
-.TH PCRE2_JIT_MATCH 3 "03 November 2014" "PCRE2 10.0"
+.TH PCRE2_JIT_MATCH 3 "11 February 2020" "PCRE2 10.35"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -22,8 +22,10 @@ algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
it bypasses some of the sanity checks that \fBpcre2_match()\fP applies.
Its arguments are exactly the same as for
.\" HREF
-\fBpcre2_match()\fP.
+\fBpcre2_match()\fP,
.\"
+except that the subject string must be specified with a length;
+PCRE2_ZERO_TERMINATED is not supported.
.P
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported
diff --git a/doc/pcre2_set_character_tables.3 b/doc/pcre2_set_character_tables.3
index 1b740a1..1ca4134 100644
--- a/doc/pcre2_set_character_tables.3
+++ b/doc/pcre2_set_character_tables.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SET_CHARACTER_TABLES 3 "22 October 2014" "PCRE2 10.00"
+.TH PCRE2_SET_CHARACTER_TABLES 3 "20 March 2020" "PCRE2 10.35"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -15,9 +15,14 @@ PCRE2 - Perl-compatible regular expressions (revised API)
.rs
.sp
This function sets a pointer to custom character tables within a compile
-context. The second argument must be the result of a call to
-\fBpcre2_maketables()\fP or NULL to request the default tables. The result is
-always zero.
+context. The second argument must point to a set of PCRE2 character tables or
+be NULL to request the default tables. The result is always zero. Character
+tables can be created by calling \fBpcre2_maketables()\fP or by running the
+\fBpcre2_dftables\fP maintenance command in binary mode (see the
+.\" HREF
+\fBpcre2build\fP
+.\"
+documentation).
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
diff --git a/doc/pcre2_substitute.3 b/doc/pcre2_substitute.3
index 7da668c..cceb784 100644
--- a/doc/pcre2_substitute.3
+++ b/doc/pcre2_substitute.3
@@ -1,4 +1,4 @@
-.TH PCRE2_SUBSTITUTE 3 "04 April 2017" "PCRE2 10.30"
+.TH PCRE2_SUBSTITUTE 3 "22 January 2020" "PCRE2 10.35"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@@ -36,8 +36,8 @@ Its arguments are:
\fIoutlengthptr\fP Points to the length of the output buffer
.sp
A match data block is needed only if you want to inspect the data from the
-match that is returned in that block. A match context is needed only if you
-want to:
+final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is
+set. A match context is needed only if you want to:
.sp
Set up a callout function
Set a matching offset limit
@@ -45,9 +45,13 @@ want to:
Change the backtracking depth limit
Set custom memory management in the match context
.sp
-The \fIlength\fP, \fIstartoffset\fP and \fIrlength\fP values are code
-units, not characters, as is the contents of the variable pointed at by
-\fIoutlengthptr\fP, which is updated to the actual length of the new string.
+The \fIlength\fP, \fIstartoffset\fP and \fIrlength\fP values are code units,
+not characters, as is the contents of the variable pointed at by
+\fIoutlengthptr\fP. This variable must contain the length of the output buffer
+when the function is called. If the function is successful, the value is
+changed to the length of the new string, excluding the trailing zero that is
+automatically added.
+.P
The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
zero-terminated strings. The options are:
.sp
@@ -66,12 +70,22 @@ zero-terminated strings. The options are:
PCRE2_UTF was set at compile time)
PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
+ PCRE2_SUBSTITUTE_LITERAL The replacement string is literal
+ PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for 1st match
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length
+ PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s)
PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset
PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string
.sp
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
+PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
+.P
+If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-zero; its
+contents must be the result of a call to \fBpcre2_match()\fP using the same
+pattern and subject.
+.P
The function returns the number of substitutions, which may be zero if there
-were no matches. The result can be greater than one only when
+are no matches. The result may be greater than one only when
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
is returned.
.P
diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
index 27e3ef4..8c581a0 100644
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "02 September 2019" "PCRE2 10.34"
+.TH PCRE2API 3 "19 March 2020" "PCRE2 10.35"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@@ -187,7 +187,7 @@ document for an overview of all the PCRE2 documentation.
.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP,
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
-.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementzfP,"
+.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementz\fP,"
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP,"
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
.fi
@@ -1034,12 +1034,13 @@ less than the limit set by the caller of \fBpcre2_match()\fP or
.sp
.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP);
.P
-The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to
-discover which optional features have been compiled into the PCRE2 library. The
+The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to find
+the value of certain configuration parameters and to discover which optional
+features have been compiled into the PCRE2 library. The
.\" HREF
\fBpcre2build\fP
.\"
-documentation has more details about these optional features.
+documentation has more details about these features.
.P
The first argument for \fBpcre2_config()\fP specifies which information is
required. The second argument is a pointer to memory into which the information
@@ -1153,6 +1154,16 @@ over compilation stack usage, see \fBpcre2_set_compile_recursion_guard()\fP.
This parameter is obsolete and should not be used in new code. The output is a
uint32_t integer that is always set to zero.
.sp
+ PCRE2_CONFIG_TABLES_LENGTH
+.sp
+The output is a uint32_t integer that gives the length of PCRE2's character
+processing tables in bytes. For details of these tables see the
+.\" HTML <a href="#localesupport">
+.\" </a>
+section on locale support
+.\"
+below.
+.sp
PCRE2_CONFIG_UNICODE_VERSION
.sp
The \fIwhere\fP argument should point to a buffer that is at least 24 code
@@ -1420,13 +1431,13 @@ documentation.
.sp
If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be
-changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode
-properties are used for all characters with more than one other case, and for
-all characters whose code points are greater than U+007F. For lower valued
-characters with only one other case, a lookup table is used for speed. When
-PCRE2_UTF is not set, a lookup table is used for all code points less than 256,
-and higher code points (available only in 16-bit or 32-bit mode) are treated as
-not having another case.
+changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
+PCRE2_UCP is set, Unicode properties are used for all characters with more than
+one other case, and for all characters whose code points are greater than
+U+007F. For lower valued characters with only one other case, a lookup table is
+used for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is
+used for all code points less than 256, and higher code points (available only
+in 16-bit or 32-bit mode) are treated as not having another case.
.sp
PCRE2_DOLLAR_ENDONLY
.sp
@@ -1769,10 +1780,11 @@ are not representable in UTF-16.
.sp
PCRE2_UCP
.sp
-This option changes the way PCRE2 processes \eB, \eb, \eD, \ed, \eS, \es, \eW,
-\ew, and some of the POSIX character classes. By default, only ASCII characters
-are recognized, but if PCRE2_UCP is set, Unicode properties are used instead to
-classify characters. More details are given in the section on
+This option has two effects. Firstly, it change the way PCRE2 processes \eB,
+\eb, \eD, \ed, \eS, \es, \eW, \ew, and some of the POSIX character classes. By
+default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode
+properties are used instead to classify characters. More details are given in
+the section on
.\" HTML <a href="pcre2pattern.html#genericchartypes">
.\" </a>
generic character types
@@ -1782,8 +1794,13 @@ in the
\fBpcre2pattern\fP
.\"
page. If you set PCRE2_UCP, matching one of the items it affects takes much
-longer. The option is available only if PCRE2 has been compiled with Unicode
-support (which is the default).
+longer.
+.P
+The second effect of PCRE2_UCP is to force the use of Unicode properties for
+upper/lower casing operations on characters with code points greater than 127,
+even when PCRE2_UTF is not set. This makes it possible, for example, to process
+strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has
+been compiled with Unicode support (which is the default).
.sp
PCRE2_UNGREEDY
.sp
@@ -1957,13 +1974,18 @@ PCRE2 handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character code
point. However, this applies only to characters whose code points are less than
256. By default, higher-valued code points never match escapes such as \ew or
-\ed. When PCRE2 is built with Unicode support (the default), all characters can
-be tested with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set
-when a pattern is compiled; this causes \ew and friends to use Unicode property
-support instead of the built-in tables.
+\ed.
+.P
+When PCRE2 is built with Unicode support (the default), the Unicode properties
+of all characters can be tested with \ep and \eP, or, alternatively, the
+PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and
+friends to use Unicode property support instead of the built-in tables.
+PCRE2_UCP also causes upper/lower casing operations on characters with code
+points greater than 127 to use Unicode properties. These effects apply even
+when PCRE2_UTF is not set.
.P
The use of locales with Unicode is discouraged. If you are handling characters
-with code points greater than 128, you should either use Unicode support, or
+with code points greater than 127, you should either use Unicode support, or
use locales, but not try to mix the two.
.P
PCRE2 contains a built-in set of character tables that are used by default.
@@ -1985,7 +2007,7 @@ the system \fBmalloc()\fP is used. The result can be passed to
calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein.
.P
For example, to build and use tables that are appropriate for the French locale
-(where accented characters with values greater than 128 are treated as
+(where accented characters with values greater than 127 are treated as
letters), the following code could be used:
.sp
setlocale(LC_CTYPE, "fr_FR");
@@ -1998,10 +2020,10 @@ The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
are using Windows, the name for the French locale is "french".
.P
The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP
-is saved with the compiled pattern, and the same tables are used by
-\fBpcre2_match()\fP and \fBpcre_dfa_match()\fP. Thus, for any single pattern,
-compilation and matching both happen in the same locale, but different patterns
-can be processed in different locales.
+is saved with the compiled pattern, and the same tables are used by the
+matching functions. Thus, for any single pattern, compilation and matching both
+happen in the same locale, but different patterns can be processed in different
+locales.
.P
It is the caller's responsibility to ensure that the memory containing the
tables remains available while they are still in use. When they are no longer
@@ -2010,6 +2032,26 @@ pass as its first parameter the same global context that was used to create the
tables.
.
.
+.SS "Saving locale tables"
+.rs
+.sp
+The tables described above are just a sequence of binary bytes, which makes
+them independent of hardware characteristics such as endianness or whether the
+processor is 32-bit or 64-bit. A copy of the result of \fBpcre2_maketables()\fP
+can therefore be saved in a file or elsewhere and re-used later, even in a
+different program or on another computer. The size of the tables (number of
+bytes) must be obtained by calling \fBpcre2_config()\fP with the
+PCRE2_CONFIG_TABLES_LENGTH option because \fBpcre2_maketables()\fP does not
+return this value. Note that the \fBpcre2_dftables\fP program, which is part of
+the PCRE2 build system, can be used stand-alone to create a file that contains
+a set of binary tables. See the
+.\" HTML <a href="pcre2build.html#createtables">
+.\" </a>
+\fBpcre2build\fP
+.\"
+documentation for details.
+.
+.
.\" HTML <a name="infoaboutpattern"></a>
.SH "INFORMATION ABOUT A COMPILED PATTERN"
.rs
@@ -2020,7 +2062,7 @@ tables.
.P
The \fBpcre2_pattern_info()\fP function returns general information about a
compiled pattern. For information about callouts, see the
-.\" HTML <a href="pcre2pattern.html#infoaboutcallouts">
+.\" HTML <a href="#infoaboutcallouts">
.\" </a>
next section.
.\"
@@ -3321,12 +3363,19 @@ same number causes an error at compile time.
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
.fi
.P
-This function calls \fBpcre2_match()\fP and then makes a copy of the subject
-string in \fIoutputbuffer\fP, replacing one or more parts that were matched
-with the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP.
-This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
-The default is to perform just one replacement, but there is an option that
-requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
+This function optionally calls \fBpcre2_match()\fP and then makes a copy of the
+subject string in \fIoutputbuffer\fP, replacing parts that were matched with
+the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
+option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
+replacement string(s). The default action is to perform just one replacement if
+the pattern matches, but there is an option that requests multiple replacements
+(see PCRE2_SUBSTITUTE_GLOBAL below).
+.P
+If successful, \fBpcre2_substitute()\fP returns the number of substitutions
+that were carried out. This may be zero if no match was found, and is never
+greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is
+returned if an error is detected.
.P
Matches in which a \eK item in a lookahead in the pattern causes the match to
end before it starts are not supported, and give rise to an error return. For
@@ -3341,32 +3390,79 @@ data block is obtained and freed within this function, using memory management
functions from the match context, if provided, or else those that were used to
allocate memory for the compiled code.
.P
-If an external \fImatch_data\fP block is provided, its contents afterwards
-are those set by the final call to \fBpcre2_match()\fP. For global changes,
-this will have ended in a matching error. The contents of the ovector within
-the match data block may or may not have been changed.
+If \fImatch_data\fP is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the
+provided block is used for all calls to \fBpcre2_match()\fP, and its contents
+afterwards are the result of the final call. For global changes, this will
+always be a no-match error. The contents of the ovector within the match data
+block may or may not have been changed.
.P
-The \fIoutlengthptr\fP argument must point to a variable that contains the
-length, in code units, of the output buffer. If the function is successful, the
-value is updated to contain the length of the new string, excluding the
-trailing zero that is automatically added.
+As well as the usual options for \fBpcre2_match()\fP, a number of additional
+options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
+One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
+\fImatch_data\fP block must be provided, and it must have been used for an
+external call to \fBpcre2_match()\fP. The data in the \fImatch_data\fP block
+(return code, offset vector) is used for the first substitution instead of
+calling \fBpcre2_match()\fP from within \fBpcre2_substitute()\fP. This allows
+an application to check for a match before choosing to substitute, without
+having to repeat the match.
+.P
+The contents of the externally supplied match data block are not changed when
+PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
+\fBpcre2_match()\fP is called after the first substitution to check for further
+matches, but this is done using an internally obtained match data block, thus
+always leaving the external block unchanged.
+.P
+The \fIcode\fP argument is not used for matching before the first substitution
+when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when
+PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the
+UTF setting and the number of capturing parentheses in the pattern.
+.P
+The default action of \fBpcre2_substitute()\fP is to return a copy of the
+subject string with matched substrings replaced. However, if
+PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are
+returned. In the global case, multiple replacements are concatenated in the
+output buffer. Substitution callouts (see
+.\" HTML <a href="#subcallouts">
+.\" </a>
+below)
+.\"
+can be used to separate them if necessary.
+.P
+The \fIoutlengthptr\fP argument of \fBpcre2_substitute()\fP must point to a
+variable that contains the length, in code units, of the output buffer. If the
+function is successful, the value is updated to contain the length in code
+units of the new string, excluding the trailing zero that is automatically
+added.
.P
If the function is not successful, the value set via \fIoutlengthptr\fP depends
on the type of error. For syntax errors in the replacement string, the value is
the offset in the replacement string where the error was detected. For other
errors, the value is PCRE2_UNSET by default. This includes the case of the
-output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set
-(see below), in which case the value is the minimum length needed, including
-space for the trailing zero. Note that in order to compute the required length,
-\fBpcre2_substitute()\fP has to simulate all the matching and copying, instead
-of giving an error return as soon as the buffer overflows. Note also that the
-length is in code units, not bytes.
-.P
-In the replacement string, which is interpreted as a UTF string in UTF mode,
-and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
-dollar character is an escape character that can specify the insertion of
-characters from capture groups or names from (*MARK) or other control verbs
-in the pattern. The following forms are always recognized:
+output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set.
+.P
+PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
+too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
+this option is set, however, \fBpcre2_substitute()\fP continues to go through
+the motions of matching and substituting (without, of course, writing anything)
+in order to compute the size of buffer that is needed. This value is passed
+back via the \fIoutlengthptr\fP variable, with the result of the function still
+being PCRE2_ERROR_NOMEMORY.
+.P
+Passing a buffer size of zero is a permitted way of finding out how much memory
+is needed for given substitution. However, this does mean that the entire
+operation is carried out twice. Depending on the application, it may be more
+efficient to allocate a large buffer and free the excess afterwards, instead of
+using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
+.P
+The replacement string, which is interpreted as a UTF string in UTF mode, is
+checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF
+replacement string causes an immediate return with the relevant UTF error code.
+.P
+If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
+in any way. By default, however, a dollar character is an escape character that
+can specify the insertion of characters from capture groups and names from
+(*MARK) or other control verbs in the pattern. The following forms are always
+recognized:
.sp
$$ insert a dollar character
$<n> or ${<n>} insert the contents of group <n>
@@ -3389,9 +3485,6 @@ facility can be used to perform simple simultaneous substitutions, as this
apple lemon
2: pear orange
.sp
-As well as the usual options for \fBpcre2_match()\fP, a number of additional
-options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
-.P
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string,
replacing every matching substring. If this option is not set, only the first
matching substring is replaced. The search for matches takes place in the
@@ -3402,7 +3495,7 @@ set in the match context, searching stops when that limit is reached.
.P
You can restrict the effect of a global substitution to a portion of the
subject string by setting either or both of \fIstartoffset\fP and an offset
-limit. Here is a \fPpcre2test\fP example:
+limit. Here is a \fBpcre2test\fP example:
.sp
/B/g,replace=!,use_offset_limit
ABC ABC ABC ABC\e=offset=3,offset_limit=12
@@ -3414,20 +3507,6 @@ If this is not successful, the offset is advanced by one character except when
CRLF is a valid newline sequence and the next two characters are CR, LF. In
this case, the offset is advanced by two characters.
.P
-PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is
-too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If
-this option is set, however, \fBpcre2_substitute()\fP continues to go through
-the motions of matching and substituting (without, of course, writing anything)
-in order to compute the size of buffer that is needed. This value is passed
-back via the \fIoutlengthptr\fP variable, with the result of the function still
-being PCRE2_ERROR_NOMEMORY.
-.P
-Passing a buffer size of zero is a permitted way of finding out how much memory
-is needed for given substitution. However, this does mean that the entire
-operation is carried out twice. Depending on the application, it may be more
-efficient to allocate a large buffer and free the excess afterwards, instead of
-using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH.
-.P
PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do
not appear in the pattern to be treated as unset groups. This option should be
used with care, because it means that a typo in a group name or number no
@@ -3458,7 +3537,10 @@ terminating a \eQ quoted sequence) reverts to no case forcing. The sequences
\eu and \el force the next character (if it is a letter) to upper or lower
case, respectively, and then the state automatically reverts to no case
forcing. Case forcing applies to all inserted characters, including those from
-capture groups and letters within \eQ...\eE quoted sequences.
+capture groups and letters within \eQ...\eE quoted sequences. If either
+PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
+properties are used for case forcing characters whose code points are greater
+than 127.
.P
Note that case forcing sequences such as \eU...\eE do not nest. For example,
the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no
@@ -3494,13 +3576,17 @@ The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
groups in the extended syntax forms to be treated as unset.
.P
-If successful, \fBpcre2_substitute()\fP returns the number of successful
-matches. This may be zero if no matches were found, and is never greater than 1
-unless PCRE2_SUBSTITUTE_GLOBAL is set.
-.P
-In the event of an error, a negative error code is returned. Except for
-PCRE2_ERROR_NOMATCH (which is never returned), errors from \fBpcre2_match()\fP
-are passed straight back.
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
+PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and
+are ignored.
+.
+.
+.SS "Substitution errors"
+.rs
+.sp
+In the event of an error, \fBpcre2_substitute()\fP returns a negative error
+code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from
+\fBpcre2_match()\fP are passed straight back.
.P
PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion,
unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set.
@@ -3514,6 +3600,9 @@ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is
needed is returned via \fIoutlengthptr\fP. Note that this does not happen by
default.
.P
+PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
+\fImatch_data\fP argument is NULL.
+.P
PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
@@ -3531,6 +3620,7 @@ above).
.\"
.
.
+.\" HTML <a name="subcallouts"></a>
.SS "Substitution callouts"
.rs
.sp
@@ -3878,6 +3968,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 02 September 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 19 March 2020
+Copyright (c) 1997-2020 University of Cambridge.
.fi
diff --git a/doc/pcre2build.3 b/doc/pcre2build.3
index f1d28f8..edea222 100644
--- a/doc/pcre2build.3
+++ b/doc/pcre2build.3
@@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "03 March 2019" "PCRE2 10.33"
+.TH PCRE2BUILD 3 "20 March 2020" "PCRE2 10.35"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.
@@ -110,7 +110,7 @@ To build it without Unicode support, add
--disable-unicode
.sp
to the \fBconfigure\fP command. This setting applies to all three libraries. It
-is not possible to build one library with Unicode support, and another without,
+is not possible to build one library with Unicode support and another without
in the same configuration.
.P
Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16
@@ -175,11 +175,11 @@ SELinux. This has no effect if JIT is not enabled. See the
\fBpcre2jit\fP
.\"
documentation for a discussion of JIT usage. When JIT support is enabled,
-pcre2grep automatically makes use of it, unless you add
+\fBpcre2grep\fP automatically makes use of it, unless you add
.sp
--disable-pcre2grep-jit
.sp
-to the "configure" command.
+to the \fBconfigure\fP command.
.
.
.SH "NEWLINE RECOGNITION"
@@ -317,6 +317,7 @@ used for lookaround assertions, atomic groups, and recursion within patterns.
The limit does not apply to JIT matching.
.
.
+.\" HTML <a name="createtables"></a>
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
.rs
.sp
@@ -328,12 +329,33 @@ only. If you add
--enable-rebuild-chartables
.sp
to the \fBconfigure\fP command, the distributed tables are no longer used.
-Instead, a program called \fBdftables\fP is compiled and run. This outputs the
-source for new set of tables, created in the default locale of your C run-time
-system. This method of replacing the tables does not work if you are cross
-compiling, because \fBdftables\fP is run on the local host. If you need to
-create alternative tables when cross compiling, you will have to do so "by
-hand".
+Instead, a program called \fBpcre2_dftables\fP is compiled and run. This
+outputs the source for new set of tables, created in the default locale of your
+C run-time system. This method of replacing the tables does not work if you are
+cross compiling, because \fBpcre2_dftables\fP needs to be run on the local
+host and therefore not compiled with the cross compiler.
+.P
+If you need to create alternative tables when cross compiling, you will have to
+do so "by hand". There may also be other reasons for creating tables manually.
+To cause \fBpcre2_dftables\fP to be built on the local host, run a normal
+compiling command, and then run the program with the output file as its
+argument, for example:
+.sp
+ cc src/pcre2_dftables.c -o pcre2_dftables
+ ./pcre2_dftables src/pcre2_chartables.c
+.sp
+This builds the tables in the default locale of the local host. If you want to
+specify a locale, you must use the -L option:
+.sp
+ LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+.sp
+You can also specify -b (with or without -L). This causes the tables to be
+written in binary instead of as source code. A set of binary tables can be
+loaded into memory by an application and passed to \fBpcre2_compile()\fP in the
+same way as tables created by calling \fBpcre2_maketables()\fP. The tables are
+just a string of bytes, independent of hardware characteristics such as
+endianness. This means they can be bundled with an application that runs in
+different environments, to ensure consistent behaviour.
.
.
.SH "USING EBCDIC CODE"
@@ -417,7 +439,7 @@ default parameter values by adding, for example,
--with-pcre2grep-bufsize=51200
--with-pcre2grep-max-bufsize=2097152
.sp
-to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override
+to the \fBconfigure\fP command. The caller of \fBpcre2grep\fP can override
these values by using --buffer-size and --max-buffer-size on the command line.
.
.
@@ -548,7 +570,7 @@ support these modifiers. If
.sp
--disable-percent-zt
.sp
-is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
+is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
%lu is used, with a cast for size_t values.
.
.
@@ -610,6 +632,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 03 March 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 20 March 2020
+Copyright (c) 1997-2020 University of Cambridge.
.fi
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
index 22992b1..137117a 100644
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "15 June 2019" "PCRE2 10.34"
+.TH PCRE2GREP 1 "25 January 2020" "PCRE2 10.35"
.SH NAME
pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -117,7 +117,7 @@ ignored.
By default, a file that contains a binary zero byte within the first 1024 bytes
is identified as a binary file, and is processed specially. (GNU grep
identifies binary files in this manner.) However, if the newline type is
-specified as "nul", that is, the line terminator is a binary zero, the test for
+specified as NUL, that is, the line terminator is a binary zero, the test for
a binary file is not applied. See the \fB--binary-files\fP option for a means
of changing the way binary files are handled.
.
@@ -488,7 +488,7 @@ There are no short forms for these options. The default limits can be set
when the PCRE2 library is compiled; if they are not specified, the defaults
are very large and so effectively unlimited.
.TP
-\fB--max-buffer-size=\fInumber\fP
+\fB--max-buffer-size\fP=\fInumber\fP
This limits the expansion of the processing buffer, whose initial size can be
set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
smaller than the starting buffer size.
@@ -520,27 +520,33 @@ well as possibly handling a two-character newline sequence.
There is a limit to the number of lines that can be matched, imposed by the way
that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
large processing buffer, this should not be a problem, but the \fB-M\fP option
-does not work when input is read line by line (see \fP--line-buffered\fP.)
+does not work when input is read line by line (see \fB--line-buffered\fP.)
.TP
\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP
-The PCRE2 library supports five different conventions for indicating
-the ends of lines. They are the single-character sequences CR (carriage return)
-and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
-which recognizes any of the preceding three types, and an "any" convention, in
-which any Unicode line ending sequence is assumed to end a line. The Unicode
-sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
-(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
-PS (paragraph separator, U+2029).
+Six different conventions for indicating the ends of lines in scanned files are
+supported. For example:
+.sp
+ pcre2grep -N CRLF 'some pattern' <file>
+.sp
+The newline type may be specified in upper, lower, or mixed case. If the
+newline type is NUL, lines are separated by binary zero characters. The other
+types are the single-character sequences CR (carriage return) and LF
+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which
+recognizes any of the preceding three types, and an "any" type, for which any
+Unicode line ending sequence is assumed to end a line. The Unicode sequences
+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed,
+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
+(paragraph separator, U+2029).
.sp
When the PCRE2 library is built, a default line-ending sequence is specified.
This is normally the standard sequence for the operating system. Unless
otherwise specified by this option, \fBpcre2grep\fP uses the library's default.
-The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
-makes it possible to use \fBpcre2grep\fP to scan files that have come from
-other environments without having to modify their line endings. If the data
-that is being scanned does not agree with the convention set by this option,
-\fBpcre2grep\fP may behave in strange ways. Note that this option does not
-apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
+.sp
+This option makes it possible to use \fBpcre2grep\fP to scan files that have
+come from other environments without having to modify their line endings. If
+the data that is being scanned does not agree with the convention set by this
+option, \fBpcre2grep\fP may behave in strange ways. Note that this option does
+not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or
\fB--include-from\fP options, which are expected to use the operating system's
standard newline sequence.
.TP
@@ -560,10 +566,12 @@ It should never be needed in normal use.
.TP
\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
When there is a match, instead of outputting the whole line that matched,
-output just the given text. This option is mutually exclusive with
-\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape
-sequences starting with a dollar character may be used to insert the contents
-of the matched part of the line and/or captured substrings into the text.
+output just the given text, followed by an operating-system standard newline.
+The \fB--newline\fP option has no effect on this option, which is mutually
+exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and
+\fB--line-offsets\fP. Escape sequences starting with a dollar character may be
+used to insert the contents of the matched part of the line and/or captured
+substrings into the text.
.sp
$<digits> or ${<digits>} is replaced by the captured
substring of the given decimal number; zero substitutes the whole match. If
@@ -709,16 +717,25 @@ by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default
.rs
.sp
The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
-different newline conventions from the default. Any parts of the input files
-that are written to the standard output are copied identically, with whatever
-newline sequences they have in the input. However, the setting of this option
-affects only the way scanned files are processed. It does not affect the
-interpretation of files specified by the \fB-f\fP, \fB--file-list\fP,
-\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the
-way in which \fBpcre2grep\fP writes informational messages to the standard
-error and output streams. For these it uses the string "\en" to indicate
-newlines, relying on the C I/O library to convert this to an appropriate
-sequence.
+newline conventions that differ from the default. This option affects only the
+way scanned files are processed. It does not affect the interpretation of files
+specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or
+\fB--include-from\fP options.
+.P
+Any parts of the scanned input files that are written to the standard output
+are copied with whatever newline sequences they have in the input. However, if
+the final line of a file is output, and it does not end with a newline
+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF
+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a
+single NL is used.
+.P
+The newline setting does not affect the way in which \fBpcre2grep\fP writes
+newlines in informational messages to the standard output and error streams.
+Under Windows, the standard output is set to be binary, so that "\er\en" at the
+ends of output lines that are copied from the input is not converted to
+"\er\er\en" by the C I/O library. This means that any messages written to the
+standard output must end with "\er\en". For all other operating systems, and
+for all messages to the standard error stream, "\en" is used.
.
.
.SH "OPTIONS COMPATIBILITY"
@@ -904,6 +921,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 15 June 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 25 January 2020
+Copyright (c) 1997-2020 University of Cambridge.
.fi
diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
index b11092a..4d41f54 100644
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@@ -116,9 +116,9 @@ BINARY FILES
By default, a file that contains a binary zero byte within the first
1024 bytes is identified as a binary file, and is processed specially.
(GNU grep identifies binary files in this manner.) However, if the new-
- line type is specified as "nul", that is, the line terminator is a bi-
- nary zero, the test for a binary file is not applied. See the --binary-
- files option for a means of changing the way binary files are handled.
+ line type is specified as NUL, that is, the line terminator is a binary
+ zero, the test for a binary file is not applied. See the --binary-files
+ option for a means of changing the way binary files are handled.
BINARY ZEROS IN PATTERNS
@@ -578,30 +578,36 @@ OPTIONS
when input is read line by line (see --line-buffered.)
-N newline-type, --newline=newline-type
- The PCRE2 library supports five different conventions for in-
- dicating the ends of lines. They are the single-character se-
- quences CR (carriage return) and LF (linefeed), the two-char-
- acter sequence CRLF, an "anycrlf" convention, which recog-
- nizes any of the preceding three types, and an "any" conven-
- tion, in which any Unicode line ending sequence is assumed to
- end a line. The Unicode sequences are the three just men-
- tioned, plus VT (vertical tab, U+000B), FF (form feed,
- U+000C), NEL (next line, U+0085), LS (line separator,
- U+2028), and PS (paragraph separator, U+2029).
+ Six different conventions for indicating the ends of lines in
+ scanned files are supported. For example:
+
+ pcre2grep -N CRLF 'some pattern' <file>
+
+ The newline type may be specified in upper, lower, or mixed
+ case. If the newline type is NUL, lines are separated by bi-
+ nary zero characters. The other types are the single-charac-
+ ter sequences CR (carriage return) and LF (linefeed), the
+ two-character sequence CRLF, an "anycrlf" type, which recog-
+ nizes any of the preceding three types, and an "any" type,
+ for which any Unicode line ending sequence is assumed to end
+ a line. The Unicode sequences are the three just mentioned,
+ plus VT (vertical tab, U+000B), FF (form feed, U+000C), NEL
+ (next line, U+0085), LS (line separator, U+2028), and PS
+ (paragraph separator, U+2029).
When the PCRE2 library is built, a default line-ending se-
quence is specified. This is normally the standard sequence
for the operating system. Unless otherwise specified by this
- option, pcre2grep uses the library's default. The possible
- values for this option are CR, LF, CRLF, ANYCRLF, or ANY.
- This makes it possible to use pcre2grep to scan files that
- have come from other environments without having to modify
- their line endings. If the data that is being scanned does
- not agree with the convention set by this option, pcre2grep
- may behave in strange ways. Note that this option does not
- apply to files specified by the -f, --exclude-from, or --in-
- clude-from options, which are expected to use the operating
- system's standard newline sequence.
+ option, pcre2grep uses the library's default.
+
+ This option makes it possible to use pcre2grep to scan files
+ that have come from other environments without having to mod-
+ ify their line endings. If the data that is being scanned
+ does not agree with the convention set by this option,
+ pcre2grep may behave in strange ways. Note that this option
+ does not apply to files specified by the -f, --exclude-from,
+ or --include-from options, which are expected to use the op-
+ erating system's standard newline sequence.
-n, --line-number
Precede each output line by its line number in the file, fol-
@@ -620,11 +626,13 @@ OPTIONS
-O text, --output=text
When there is a match, instead of outputting the whole line
- that matched, output just the given text. This option is mu-
- tually exclusive with --only-matching, --file-offsets, and
- --line-offsets. Escape sequences starting with a dollar char-
- acter may be used to insert the contents of the matched part
- of the line and/or captured substrings into the text.
+ that matched, output just the given text, followed by an op-
+ erating-system standard newline. The --newline option has no
+ effect on this option, which is mutually exclusive with
+ --only-matching, --file-offsets, and --line-offsets. Escape
+ sequences starting with a dollar character may be used to in-
+ sert the contents of the matched part of the line and/or cap-
+ tured substrings into the text.
$<digits> or ${<digits>} is replaced by the captured sub-
string of the given decimal number; zero substitutes the
@@ -780,17 +788,27 @@ ENVIRONMENT VARIABLES
NEWLINES
- The -N (--newline) option allows pcre2grep to scan files with different
- newline conventions from the default. Any parts of the input files that
- are written to the standard output are copied identically, with what-
- ever newline sequences they have in the input. However, the setting of
- this option affects only the way scanned files are processed. It does
- not affect the interpretation of files specified by the -f, --file-
- list, --exclude-from, or --include-from options, nor does it affect the
- way in which pcre2grep writes informational messages to the standard
- error and output streams. For these it uses the string "\n" to indicate
- newlines, relying on the C I/O library to convert this to an appropri-
- ate sequence.
+ The -N (--newline) option allows pcre2grep to scan files with newline
+ conventions that differ from the default. This option affects only the
+ way scanned files are processed. It does not affect the interpretation
+ of files specified by the -f, --file-list, --exclude-from, or --in-
+ clude-from options.
+
+ Any parts of the scanned input files that are written to the standard
+ output are copied with whatever newline sequences they have in the in-
+ put. However, if the final line of a file is output, and it does not
+ end with a newline sequence, a newline sequence is added. If the new-
+ line setting is CR, LF, CRLF or NUL, that line ending is output; for
+ the other settings (ANYCRLF or ANY) a single NL is used.
+
+ The newline setting does not affect the way in which pcre2grep writes
+ newlines in informational messages to the standard output and error
+ streams. Under Windows, the standard output is set to be binary, so
+ that "\r\n" at the ends of output lines that are copied from the input
+ is not converted to "\r\r\n" by the C I/O library. This means that any
+ messages written to the standard output must end with "\r\n". For all
+ other operating systems, and for all messages to the standard error
+ stream, "\n" is used.
OPTIONS COMPATIBILITY
@@ -963,5 +981,5 @@ AUTHOR
REVISION
- Last updated: 15 June 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 25 January 2020
+ Copyright (c) 1997-2020 University of Cambridge.
diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3
index f4be3fb..fab8366 100644
--- a/doc/pcre2jit.3
+++ b/doc/pcre2jit.3
@@ -64,7 +64,7 @@ or a negative error code.
There is a limit to the size of pattern that JIT supports, imposed by the size
of machine stack that it uses. The exact rules are not documented because they
may change at any time, in particular, when new optimizations are introduced.
-If a pattern is too big, a call to \fBpcre2_jit_compile()\fB returns
+If a pattern is too big, a call to \fBpcre2_jit_compile()\fP returns
PCRE2_ERROR_NOMEMORY.
.P
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete
@@ -315,12 +315,12 @@ stack through the JIT callback function.
You can free a JIT stack at any time, as long as it will not be used by
\fBpcre2_match()\fP again. When you assign the stack to a match context, only a
pointer is set. There is no reference counting or any other magic. You can free
-compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
-not\fP call \fBpcre2_match()\fP with a match context pointing to an already
-freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
-used by \fBpcre2_match()\fP in another thread). You can also replace the stack
-in a context at any time when it is not in use. You should free the previous
-stack before assigning a replacement.
+compiled patterns, contexts, and stacks in any order, anytime.
+Just \fIdo not\fP call \fBpcre2_match()\fP with a match context pointing to an
+already freed stack, as that will cause SEGFAULT. (Also, do not free a stack
+currently used by \fBpcre2_match()\fP in another thread). You can also replace
+the stack in a context at any time when it is not in use. You should free the
+previous stack before assigning a replacement.
.P
(5) Should I allocate/free a stack every time before/after calling
\fBpcre2_match()\fP?
diff --git a/doc/pcre2partial.3 b/doc/pcre2partial.3
index 3ff939f..892906a 100644
--- a/doc/pcre2partial.3
+++ b/doc/pcre2partial.3
@@ -261,7 +261,7 @@ these characters with '<' if the \fBallusedtext\fP modifier is set:
Partial match: 123ab
<<<
.sp
-However, the \fPallusedtext\fP modifier is not available for JIT matching,
+However, the \fBallusedtext\fP modifier is not available for JIT matching,
because JIT matching does not record the first (or last) consulted characters.
For this reason, this information is not available via the API. It is therefore
not possible in general to obtain the exact number of characters that must be
diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3
index d5228f3..c88ce03 100644
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@@ -1,4 +1,4 @@
-.TH PCRE2PATTERN 3 "29 July 2019" "PCRE2 10.34"
+.TH PCRE2PATTERN 3 "24 February 2020" "PCRE2 10.35"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
@@ -75,7 +75,8 @@ Another special sequence that may appear at the start of a pattern is (*UCP).
This has the same effect as setting the PCRE2_UCP option: it causes sequences
such as \ed and \ew to use Unicode properties to determine character types,
instead of recognizing only characters with codes less than 256 via a lookup
-table.
+table. If also causes upper/lower casing operations to use Unicode properties
+for characters with code points greater than 127, even when UTF is not set.
.P
Some applications that allow their users to supply patterns may wish to
restrict them for security reasons. If the PCRE2_NEVER_UCP option is passed to
@@ -813,6 +814,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
+Chorasmian,
Common,
Coptic,
Cuneiform,
@@ -820,6 +822,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@@ -851,6 +854,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
+Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@@ -941,6 +945,7 @@ Unknown,
Vai,
Wancho,
Warang_Citi,
+Yezidi,
Yi,
Zanabazar_Square.
.P
@@ -1643,6 +1648,7 @@ that succeeds is used. If the alternatives are within a group
alternative in the group.
.
.
+.\" HTML <a name="internaloptions"></a>
.SH "INTERNAL OPTION SETTING"
.rs
.sp
@@ -1901,12 +1907,21 @@ are permitted for groups with the same number, for example:
(?|(?<AA>aa)|(?<AA>bb))
.sp
The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES
-option at compile time, or by the use of (?J) within the pattern. Duplicate
-names can be useful for patterns where only one instance of the named capture
-group can match. Suppose you want to match the name of a weekday, either as a
-3-letter abbreviation or as the full name, and in both cases you want to
-extract the abbreviation. This pattern (ignoring the line breaks) does the job:
+option at compile time, or by the use of (?J) within the pattern, as described
+in the section entitled
+.\" HTML <a href="#internaloptions">
+.\" </a>
+"Internal Option Setting"
+.\"
+above.
+.P
+Duplicate names can be useful for patterns where only one instance of the named
+capture group can match. Suppose you want to match the name of a weekday,
+either as a 3-letter abbreviation or as the full name, and in both cases you
+want to extract the abbreviation. This pattern (ignoring the line breaks) does
+the job:
.sp
+ (?J)
(?<DN>Mon|Fri|Sun)(?:day)?|
(?<DN>Tue)(?:sday)?|
(?<DN>Wed)(?:nesday)?|
@@ -1926,7 +1941,7 @@ they appear in the overall pattern. The first one that is set is used for the
reference. For example, this pattern matches both "foofoo" and "barbar" but not
"foobar" or "barfoo":
.sp
- (?:(?<n>foo)|(?<n>bar))\ek<n>
+ (?J)(?:(?<n>foo)|(?<n>bar))\ek<n>
.sp
.P
If you make a subroutine call to a non-unique named group, the one that
@@ -1965,7 +1980,7 @@ items:
an escape such as \ed or \epL that matches a single character
a character class
a backreference
- a parenthesized group (including most assertions)
+ a parenthesized group (including lookaround assertions)
a subroutine call (recursive or otherwise)
.sp
The general repetition quantifier specifies a minimum and maximum number of
@@ -2346,14 +2361,14 @@ the first iteration does not need to match the backreference. This can be done
using alternation, as in the example above, or by a quantifier with a minimum
of zero.
.P
-Backreferences of this type cause the group that they reference to be treated
-as an
+For versions of PCRE2 less than 10.25, backreferences of this type used to
+cause the group that they reference to be treated as an
.\" HTML <a href="#atomicgroup">
.\" </a>
atomic group.
.\"
-Once the whole group has been matched, a subsequent matching failure cannot
-cause backtracking into the middle of the group.
+This restriction no longer applies, and backtracking into such groups can occur
+as normal.
.
.
.\" HTML <a name="bigassertions"></a>
@@ -2421,26 +2436,13 @@ the "no" branch of the condition. For other failing negative assertions,
control passes to the previous backtracking point, thus discarding any captured
strings within the assertion.
.P
-For compatibility with Perl, most assertion groups may be repeated; though it
-makes no sense to assert the same thing several times, the side effect of
-capturing may occasionally be useful. However, an assertion that forms the
-condition for a conditional group may not be quantified. In practice, for
-other assertions, there only three cases:
-.sp
-(1) If the quantifier is {0}, the assertion is never obeyed during matching.
-However, it may contain internal capture groups that are called from elsewhere
-via the
-.\" HTML <a href="#groupsassubroutines">
-.\" </a>
-subroutine mechanism.
-.\"
-.sp
-(2) If quantifier is {0,n} where n is greater than zero, it is treated as if it
-were {0,1}. At run time, the rest of the pattern match is tried with and
-without the assertion, the order depending on the greediness of the quantifier.
-.sp
-(3) If the minimum repetition is greater than zero, the quantifier is ignored.
-The assertion is obeyed just once when encountered during matching.
+Most assertion groups may be repeated; though it makes no sense to assert the
+same thing several times, the side effect of capturing in positive assertions
+may occasionally be useful. However, an assertion that forms the condition for
+a conditional group may not be quantified. PCRE2 used to restrict the
+repetition of assertions, but from release 10.35 the only restriction is that
+an unlimited maximum repetition is changed to be one more than the minimum. For
+example, {3,} is treated as {3,4}.
.
.
.SS "Alphabetic assertion names"
@@ -2637,8 +2639,8 @@ backtracking into the assertion. However, there are some cases where non-atomic
positive assertions can be useful. PCRE2 provides these using the following
syntax:
.sp
- (*non_atomic_positive_lookahead: or (*napla:
- (*non_atomic_positive_lookbehind: or (*naplb:
+ (*non_atomic_positive_lookahead: or (*napla: or (?*
+ (*non_atomic_positive_lookbehind: or (*naplb: or (?<*
.sp
Consider the problem of finding the right-most word in a string that also
appears earlier in the string, that is, it must appear at least twice in total.
@@ -2674,9 +2676,14 @@ pattern. If this is not the case, the rest of the pattern match fails exactly
as before because nothing has changed, so using a non-atomic assertion just
wastes resources.
.P
+There is one exception to backtracking into a non-atomic assertion. If an
+(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
+is, a subsequent match failure cannot backtrack into the assertion.
+.P
Non-atomic assertions are not supported by the alternative matching function
-\fBpcre2_dfa_match()\fP. They are also not supported by JIT (but may be in
-future). Note that assertions that appear as conditions for
+\fBpcre2_dfa_match()\fP. They are supported by JIT, but only if they do not
+contain any control verbs such as (*ACCEPT). (This may change in future). Note
+that assertions that appear as conditions for
.\" HTML <a href="#conditions">
.\" </a>
conditional groups
@@ -3874,6 +3881,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 29 July 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 24 February 2020
+Copyright (c) 1997-2020 University of Cambridge.
.fi
diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3
index bbe418a..7076462 100644
--- a/doc/pcre2syntax.3
+++ b/doc/pcre2syntax.3
@@ -1,4 +1,4 @@
-.TH PCRE2SYNTAX 3 "29 July 2019" "PCRE2 10.34"
+.TH PCRE2SYNTAX 3 "28 December 2019" "PCRE2 10.35"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -193,6 +193,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
+Chorasmian,
Common,
Coptic,
Cuneiform,
@@ -200,6 +201,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
+Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@@ -231,6 +233,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
+Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@@ -320,6 +323,7 @@ Ugaritic,
Vai,
Wancho,
Warang_Citi,
+Yezidi,
Yi,
Zanabazar_Square.
.
@@ -441,7 +445,7 @@ Changes of these options within a group are automatically cancelled at the end
of the group.
.sp
(?i) caseless
- (?J) allow duplicate names
+ (?J) allow duplicate named groups
(?m) multiline
(?n) no auto capture
(?s) single line (dotall)
@@ -531,11 +535,13 @@ Each top-level branch of a lookbehind must be of a fixed length.
.sp
These assertions are specific to PCRE2 and are not Perl-compatible.
.sp
- (*napla:...)
- (*non_atomic_positive_lookahead:...)
+ (?*...) )
+ (*napla:...) ) synonyms
+ (*non_atomic_positive_lookahead:...) )
.sp
- (*naplb:...)
- (*non_atomic_positive_lookbehind:...)
+ (?<*...) )
+ (*naplb:...) ) synonyms
+ (*non_atomic_positive_lookbehind:...) )
.
.
.SH "SCRIPT RUNS"
@@ -670,6 +676,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 29 July 2019
+Last updated: 28 December 2019
Copyright (c) 1997-2019 University of Cambridge.
.fi
diff --git a/doc/pcre2test.1 b/doc/pcre2test.1
index 355dc75..ec17763 100644
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "30 July 2019" "PCRE 10.34"
+.TH PCRE2TEST 1 "20 March 2020" "PCRE 10.35"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -218,7 +218,7 @@ List modifiers: write a list of available pattern and subject modifiers to the
standard output, then exit with zero exit code. All other options are ignored.
If both -C and -LM are present, whichever is first is recognized.
.TP 10
-\fB-pattern\fB \fImodifier-list\fP
+\fB-pattern\fP \fImodifier-list\fP
Behave as if each pattern line contains the given modifiers.
.TP 10
\fB-q\fP
@@ -327,6 +327,12 @@ described in the section entitled "Saving and restoring compiled patterns"
below.
.\"
.sp
+ #loadtables <filename>
+.sp
+This command is used to load a set of binary character tables that can be
+accessed by the tables=3 qualifier. Such tables can be created by the
+\fBpcre2_dftables\fP program with the -b option.
+.sp
#newline_default [<newline-list>]
.sp
When PCRE2 is built, a default newline convention can be specified. This
@@ -638,7 +644,7 @@ heavily used in the test files.
pushcopy push a copy onto the stack
stackguard=<number> test the stackguard feature
subject_literal treat all subject lines as literal
- tables=[0|1|2] select internal tables
+ tables=[0|1|2|3] select internal tables
use_length do not zero-terminate the pattern
utf8_input treat input as UTF-8
.sp
@@ -988,18 +994,20 @@ be aborted.
.rs
.sp
The value specified for the \fBtables\fP modifier must be one of the digits 0,
-1, or 2. It causes a specific set of built-in character tables to be passed to
-\fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour with
-different character tables. The digit specifies the tables as follows:
+1, 2, or 3. It causes a specific set of built-in character tables to be passed
+to \fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour
+with different character tables. The digit specifies the tables as follows:
.sp
0 do not pass any special character tables
1 the default ASCII tables, as distributed in
pcre2_chartables.c.dist
2 a set of tables defining ISO 8859 characters
+ 3 a set of tables loaded by the #loadtables command
.sp
-In table 2, some characters whose codes are greater than 128 are identified as
-letters, digits, spaces, etc. Setting alternate character tables and a locale
-are mutually exclusive.
+In tables 2, some characters whose codes are greater than 128 are identified as
+letters, digits, spaces, etc. Tables 3 can be used only after a
+\fB#loadtables\fP command has loaded them from a binary file. Setting alternate
+character tables and a locale are mutually exclusive.
.
.
.SS "Setting certain match controls"
@@ -1011,24 +1019,27 @@ modifier list, in which case they are applied to every subject line that is
processed with that pattern. These modifiers do not affect the compilation
process.
.sp
- aftertext show text after match
- allaftertext show text after captures
- allcaptures show all captures
- allvector show the entire ovector
- allusedtext show all consulted text
- altglobal alternative global matching
- /g global global matching
- jitstack=<n> set size of JIT stack
- mark show mark values
- replace=<string> specify a replacement string
- startchar show starting character when relevant
- substitute_callout use substitution callouts
- substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
- substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=<n> skip substitution number n and greater
- substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
- substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
+ aftertext show text after match
+ allaftertext show text after captures
+ allcaptures show all captures
+ allvector show the entire ovector
+ allusedtext show all consulted text
+ altglobal alternative global matching
+ /g global global matching
+ jitstack=<n> set size of JIT stack
+ mark show mark values
+ replace=<string> specify a replacement string
+ startchar show starting character when relevant
+ substitute_callout use substitution callouts
+ substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
+ substitute_literal use PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched use PCRE2_SUBSTITUTE_MATCHED
+ substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+ substitute_skip=<n> skip substitution <n>
+ substitute_stop=<n> skip substitution <n> and following
+ substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+ substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
.sp
These modifiers may not appear in a \fB#pattern\fP command. If you want them as
defaults, set them in a \fB#subject\fP command.
@@ -1201,8 +1212,11 @@ pattern.
startoffset=<n> same as offset=<n>
substitute_callout use substitution callouts
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
+ substitute_literal use PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched use PCRE2_SUBSTITUTE_MATCHED
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+ substitute_skip=<n> skip substitution number n
substitute_stop=<n> skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
@@ -1365,9 +1379,10 @@ by name.
.rs
.sp
If the \fBreplace\fP modifier is set, the \fBpcre2_substitute()\fP function is
-called instead of one of the matching functions. Note that replacement strings
-cannot contain commas, because a comma signifies the end of a modifier. This is
-not thought to be an issue in a test program.
+called instead of one of the matching functions (or after one call of
+\fBpcre2_match()\fP in the case of PCRE2_SUBSTITUTE_MATCHED). Note that
+replacement strings cannot contain commas, because a comma signifies the end of
+a modifier. This is not thought to be an issue in a test program.
.P
Unlike subject strings, \fBpcre2test\fP does not process replacement strings
for escape sequences. In UTF mode, a replacement string is checked to see if it
@@ -1381,10 +1396,18 @@ for \fBpcre2_substitute()\fP:
.sp
global PCRE2_SUBSTITUTE_GLOBAL
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
+ substitute_literal PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched PCRE2_SUBSTITUTE_MATCHED
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
.sp
+See the
+.\" HREF
+\fBpcre2api\fP
+.\"
+documentation for details of these options.
.P
After a successful substitution, the modified string is output, preceded by the
number of replacements. This may be zero if there were no matches. Here is a
@@ -2073,6 +2096,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 30 July 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 20 March 2020
+Copyright (c) 1997-2020 University of Cambridge.
.fi
diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt
index fcd830f..c0ba83f 100644
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@@ -309,6 +309,12 @@ COMMAND LINES
as described in the section entitled "Saving and restoring compiled
patterns" below.
+ #loadtables <filename>
+
+ This command is used to load a set of binary character tables that can
+ be accessed by the tables=3 qualifier. Such tables can be created by
+ the pcre2_dftables program with the -b option.
+
#newline_default [<newline-list>]
When PCRE2 is built, a default newline convention can be specified.
@@ -613,7 +619,7 @@ PATTERN MODIFIERS
pushcopy push a copy onto the stack
stackguard=<number> test the stackguard feature
subject_literal treat all subject lines as literal
- tables=[0|1|2] select internal tables
+ tables=[0|1|2|3] select internal tables
use_length do not zero-terminate the pattern
utf8_input treat input as UTF-8
@@ -914,80 +920,85 @@ PATTERN MODIFIERS
Using alternative character tables
The value specified for the tables modifier must be one of the digits
- 0, 1, or 2. It causes a specific set of built-in character tables to be
- passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
- haviour with different character tables. The digit specifies the tables
- as follows:
+ 0, 1, 2, or 3. It causes a specific set of built-in character tables to
+ be passed to pcre2_compile(). This is used in the PCRE2 tests to check
+ behaviour with different character tables. The digit specifies the ta-
+ bles as follows:
0 do not pass any special character tables
1 the default ASCII tables, as distributed in
pcre2_chartables.c.dist
2 a set of tables defining ISO 8859 characters
+ 3 a set of tables loaded by the #loadtables command
- In table 2, some characters whose codes are greater than 128 are iden-
- tified as letters, digits, spaces, etc. Setting alternate character ta-
- bles and a locale are mutually exclusive.
+ In tables 2, some characters whose codes are greater than 128 are iden-
+ tified as letters, digits, spaces, etc. Tables 3 can be used only after
+ a #loadtables command has loaded them from a binary file. Setting al-
+ ternate character tables and a locale are mutually exclusive.
Setting certain match controls
The following modifiers are really subject modifiers, and are described
- under "Subject Modifiers" below. However, they may be included in a
- pattern's modifier list, in which case they are applied to every sub-
- ject line that is processed with that pattern. These modifiers do not
+ under "Subject Modifiers" below. However, they may be included in a
+ pattern's modifier list, in which case they are applied to every sub-
+ ject line that is processed with that pattern. These modifiers do not
affect the compilation process.
- aftertext show text after match
- allaftertext show text after captures
- allcaptures show all captures
- allvector show the entire ovector
- allusedtext show all consulted text
- altglobal alternative global matching
- /g global global matching
- jitstack=<n> set size of JIT stack
- mark show mark values
- replace=<string> specify a replacement string
- startchar show starting character when relevant
- substitute_callout use substitution callouts
- substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
- substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
- substitute_stop=<n> skip substitution number n and greater
- substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
- substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
-
- These modifiers may not appear in a #pattern command. If you want them
+ aftertext show text after match
+ allaftertext show text after captures
+ allcaptures show all captures
+ allvector show the entire ovector
+ allusedtext show all consulted text
+ altglobal alternative global matching
+ /g global global matching
+ jitstack=<n> set size of JIT stack
+ mark show mark values
+ replace=<string> specify a replacement string
+ startchar show starting character when relevant
+ substitute_callout use substitution callouts
+ substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
+ substitute_literal use PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched use PCRE2_SUBSTITUTE_MATCHED
+ substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+ substitute_skip=<n> skip substitution <n>
+ substitute_stop=<n> skip substitution <n> and following
+ substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+ substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
+
+ These modifiers may not appear in a #pattern command. If you want them
as defaults, set them in a #subject command.
Specifying literal subject lines
- If the subject_literal modifier is present on a pattern, all the sub-
+ If the subject_literal modifier is present on a pattern, all the sub-
ject lines that it matches are taken as literal strings, with no inter-
- pretation of backslashes. It is not possible to set subject modifiers
- on such lines, but any that are set as defaults by a #subject command
+ pretation of backslashes. It is not possible to set subject modifiers
+ on such lines, but any that are set as defaults by a #subject command
are recognized.
Saving a compiled pattern
- When a pattern with the push modifier is successfully compiled, it is
- pushed onto a stack of compiled patterns, and pcre2test expects the
- next line to contain a new pattern (or a command) instead of a subject
+ When a pattern with the push modifier is successfully compiled, it is
+ pushed onto a stack of compiled patterns, and pcre2test expects the
+ next line to contain a new pattern (or a command) instead of a subject
line. This facility is used when saving compiled patterns to a file, as
- described in the section entitled "Saving and restoring compiled pat-
- terns" below. If pushcopy is used instead of push, a copy of the com-
- piled pattern is stacked, leaving the original as current, ready to
- match the following input lines. This provides a way of testing the
- pcre2_code_copy() function. The push and pushcopy modifiers are in-
- compatible with compilation modifiers such as global that act at match
+ described in the section entitled "Saving and restoring compiled pat-
+ terns" below. If pushcopy is used instead of push, a copy of the com-
+ piled pattern is stacked, leaving the original as current, ready to
+ match the following input lines. This provides a way of testing the
+ pcre2_code_copy() function. The push and pushcopy modifiers are in-
+ compatible with compilation modifiers such as global that act at match
time. Any that are specified are ignored (for the stacked copy), with a
- warning message, except for replace, which causes an error. Note that
- jitverify, which is allowed, does not carry through to any subsequent
+ warning message, except for replace, which causes an error. Note that
+ jitverify, which is allowed, does not carry through to any subsequent
matching that uses a stacked pattern.
Testing foreign pattern conversion
- The experimental foreign pattern conversion functions in PCRE2 can be
- tested by setting the convert modifier. Its argument is a colon-sepa-
- rated list of options, which set the equivalent option for the
+ The experimental foreign pattern conversion functions in PCRE2 can be
+ tested by setting the convert modifier. Its argument is a colon-sepa-
+ rated list of options, which set the equivalent option for the
pcre2_pattern_convert() function:
glob PCRE2_CONVERT_GLOB
@@ -999,19 +1010,19 @@ PATTERN MODIFIERS
The "unset" value is useful for turning off a default that has been set
by a #pattern command. When one of these options is set, the input pat-
- tern is passed to pcre2_pattern_convert(). If the conversion is suc-
- cessful, the result is reflected in the output and then passed to
+ tern is passed to pcre2_pattern_convert(). If the conversion is suc-
+ cessful, the result is reflected in the output and then passed to
pcre2_compile(). The normal utf and no_utf_check options, if set, cause
- the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be
+ the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be
passed to pcre2_pattern_convert().
By default, the conversion function is allowed to allocate a buffer for
- its output. However, if the convert_length modifier is set to a value
- greater than zero, pcre2test passes a buffer of the given length. This
+ its output. However, if the convert_length modifier is set to a value
+ greater than zero, pcre2test passes a buffer of the given length. This
makes it possible to test the length check.
- The convert_glob_escape and convert_glob_separator modifiers can be
- used to specify the escape and separator characters for glob process-
+ The convert_glob_escape and convert_glob_separator modifiers can be
+ used to specify the escape and separator characters for glob process-
ing, overriding the defaults, which are operating-system dependent.
@@ -1022,7 +1033,7 @@ SUBJECT MODIFIERS
Setting match options
- The following modifiers set options for pcre2_match() or
+ The following modifiers set options for pcre2_match() or
pcre2_dfa_match(). See pcreapi for a description of their effects.
anchored set PCRE2_ANCHORED
@@ -1038,34 +1049,34 @@ SUBJECT MODIFIERS
partial_hard (or ph) set PCRE2_PARTIAL_HARD
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
- The partial matching modifiers are provided with abbreviations because
+ The partial matching modifiers are provided with abbreviations because
they appear frequently in tests.
- If the posix or posix_nosub modifier was present on the pattern, caus-
+ If the posix or posix_nosub modifier was present on the pattern, caus-
ing the POSIX wrapper API to be used, the only option-setting modifiers
that have any effect are notbol, notempty, and noteol, causing REG_NOT-
- BOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to
+ BOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to
regexec(). The other modifiers are ignored, with a warning message.
- There is one additional modifier that can be used with the POSIX wrap-
+ There is one additional modifier that can be used with the POSIX wrap-
per. It is ignored (with a warning) if used for non-POSIX matching.
posix_startend=<n>[:<m>]
- This causes the subject string to be passed to regexec() using the
- REG_STARTEND option, which uses offsets to specify which part of the
- string is searched. If only one number is given, the end offset is
- passed as the end of the subject string. For more detail of REG_STAR-
- TEND, see the pcre2posix documentation. If the subject string contains
- binary zeros (coded as escapes such as \x{00} because pcre2test does
+ This causes the subject string to be passed to regexec() using the
+ REG_STARTEND option, which uses offsets to specify which part of the
+ string is searched. If only one number is given, the end offset is
+ passed as the end of the subject string. For more detail of REG_STAR-
+ TEND, see the pcre2posix documentation. If the subject string contains
+ binary zeros (coded as escapes such as \x{00} because pcre2test does
not support actual binary zeros in its input), you must use posix_star-
tend to specify its length.
Setting match controls
- The following modifiers affect the matching process or request addi-
- tional information. Some of them may also be specified on a pattern
- line (see above), in which case they apply to every subject line that
+ The following modifiers affect the matching process or request addi-
+ tional information. Some of them may also be specified on a pattern
+ line (see above), in which case they apply to every subject line that
is matched against that pattern.
aftertext show text after match
@@ -1103,37 +1114,40 @@ SUBJECT MODIFIERS
startoffset=<n> same as offset=<n>
substitute_callout use substitution callouts
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
- substitute_skip=<n> skip substitution number n
+ substitute_literal use PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched use PCRE2_SUBSTITUTE_MATCHED
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+ substitute_skip=<n> skip substitution number n
substitute_stop=<n> skip substitution number n and greater
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
zero_terminate pass the subject as zero-terminated
The effects of these modifiers are described in the following sections.
- When matching via the POSIX wrapper API, the aftertext, allaftertext,
- and ovector subject modifiers work as described below. All other modi-
+ When matching via the POSIX wrapper API, the aftertext, allaftertext,
+ and ovector subject modifiers work as described below. All other modi-
fiers are either ignored, with a warning message, or cause an error.
Showing more text
- The aftertext modifier requests that as well as outputting the part of
+ The aftertext modifier requests that as well as outputting the part of
the subject string that matched the entire pattern, pcre2test should in
addition output the remainder of the subject string. This is useful for
tests where the subject contains multiple copies of the same substring.
- The allaftertext modifier requests the same action for captured sub-
+ The allaftertext modifier requests the same action for captured sub-
strings as well as the main matched substring. In each case the remain-
der is output on the following line with a plus character following the
capture number.
- The allusedtext modifier requests that all the text that was consulted
- during a successful pattern match by the interpreter should be shown,
- for both full and partial matches. This feature is not supported for
- JIT matching, and if requested with JIT it is ignored (with a warning
- message). Setting this modifier affects the output if there is a look-
- behind at the start of a match, or, for a complete match, a lookahead
+ The allusedtext modifier requests that all the text that was consulted
+ during a successful pattern match by the interpreter should be shown,
+ for both full and partial matches. This feature is not supported for
+ JIT matching, and if requested with JIT it is ignored (with a warning
+ message). Setting this modifier affects the output if there is a look-
+ behind at the start of a match, or, for a complete match, a lookahead
at the end, or if \K is used in the pattern. Characters that precede or
- follow the start and end of the actual match are indicated in the out-
+ follow the start and end of the actual match are indicated in the out-
put by '<' or '>' characters underneath them. Here is an example:
re> /(?<=pqr)abc(?=xyz)/
@@ -1144,16 +1158,16 @@ SUBJECT MODIFIERS
Partial match: pqrabcxy
<<<
- The first, complete match shows that the matched string is "abc", with
- the preceding and following strings "pqr" and "xyz" having been con-
- sulted during the match (when processing the assertions). The partial
+ The first, complete match shows that the matched string is "abc", with
+ the preceding and following strings "pqr" and "xyz" having been con-
+ sulted during the match (when processing the assertions). The partial
match can indicate only the preceding string.
- The startchar modifier requests that the starting character for the
- match be indicated, if it is different to the start of the matched
+ The startchar modifier requests that the starting character for the
+ match be indicated, if it is different to the start of the matched
string. The only time when this occurs is when \K has been processed as
part of the match. In this situation, the output for the matched string
- is displayed from the starting character instead of from the match
+ is displayed from the starting character instead of from the match
point, with circumflex characters under the earlier characters. For ex-
ample:
@@ -1162,7 +1176,7 @@ SUBJECT MODIFIERS
0: abcxyz
^^^
- Unlike allusedtext, the startchar modifier can be used with JIT. How-
+ Unlike allusedtext, the startchar modifier can be used with JIT. How-
ever, these two modifiers are mutually exclusive.
Showing the value of all capture groups
@@ -1170,108 +1184,114 @@ SUBJECT MODIFIERS
The allcaptures modifier requests that the values of all potential cap-
tured parentheses be output after a match. By default, only those up to
the highest one actually used in the match are output (corresponding to
- the return code from pcre2_match()). Groups that did not take part in
- the match are output as "<unset>". This modifier is not relevant for
- DFA matching (which does no capturing) and does not apply when replace
+ the return code from pcre2_match()). Groups that did not take part in
+ the match are output as "<unset>". This modifier is not relevant for
+ DFA matching (which does no capturing) and does not apply when replace
is specified; it is ignored, with a warning message, if present.
Showing the entire ovector, for all outcomes
The allvector modifier requests that the entire ovector be shown, what-
ever the outcome of the match. Compare allcaptures, which shows only up
- to the maximum number of capture groups for the pattern, and then only
- for a successful complete non-DFA match. This modifier, which acts af-
- ter any match result, and also for DFA matching, provides a means of
- checking that there are no unexpected modifications to ovector fields.
- Before each match attempt, the ovector is filled with a special value,
- and if this is found in both elements of a capturing pair, "<un-
- changed>" is output. After a successful match, this applies to all
- groups after the maximum capture group for the pattern. In other cases
- it applies to the entire ovector. After a partial match, the first two
- elements are the only ones that should be set. After a DFA match, the
- amount of ovector that is used depends on the number of matches that
+ to the maximum number of capture groups for the pattern, and then only
+ for a successful complete non-DFA match. This modifier, which acts af-
+ ter any match result, and also for DFA matching, provides a means of
+ checking that there are no unexpected modifications to ovector fields.
+ Before each match attempt, the ovector is filled with a special value,
+ and if this is found in both elements of a capturing pair, "<un-
+ changed>" is output. After a successful match, this applies to all
+ groups after the maximum capture group for the pattern. In other cases
+ it applies to the entire ovector. After a partial match, the first two
+ elements are the only ones that should be set. After a DFA match, the
+ amount of ovector that is used depends on the number of matches that
were found.
Testing pattern callouts
- A callout function is supplied when pcre2test calls the library match-
- ing functions, unless callout_none is specified. Its behaviour can be
- controlled by various modifiers listed above whose names begin with
- callout_. Details are given in the section entitled "Callouts" below.
- Testing callouts from pcre2_substitute() is decribed separately in
+ A callout function is supplied when pcre2test calls the library match-
+ ing functions, unless callout_none is specified. Its behaviour can be
+ controlled by various modifiers listed above whose names begin with
+ callout_. Details are given in the section entitled "Callouts" below.
+ Testing callouts from pcre2_substitute() is decribed separately in
"Testing the substitution function" below.
Finding all matches in a string
Searching for all possible matches within a subject can be requested by
- the global or altglobal modifier. After finding a match, the matching
- function is called again to search the remainder of the subject. The
- difference between global and altglobal is that the former uses the
- start_offset argument to pcre2_match() or pcre2_dfa_match() to start
- searching at a new point within the entire string (which is what Perl
+ the global or altglobal modifier. After finding a match, the matching
+ function is called again to search the remainder of the subject. The
+ difference between global and altglobal is that the former uses the
+ start_offset argument to pcre2_match() or pcre2_dfa_match() to start
+ searching at a new point within the entire string (which is what Perl
does), whereas the latter passes over a shortened subject. This makes a
difference to the matching process if the pattern begins with a lookbe-
hind assertion (including \b or \B).
- If an empty string is matched, the next match is done with the
+ If an empty string is matched, the next match is done with the
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
for another, non-empty, match at the same point in the subject. If this
- match fails, the start offset is advanced, and the normal match is re-
- tried. This imitates the way Perl handles such cases when using the /g
- modifier or the split() function. Normally, the start offset is ad-
- vanced by one character, but if the newline convention recognizes CRLF
- as a newline, and the current character is CR followed by LF, an ad-
+ match fails, the start offset is advanced, and the normal match is re-
+ tried. This imitates the way Perl handles such cases when using the /g
+ modifier or the split() function. Normally, the start offset is ad-
+ vanced by one character, but if the newline convention recognizes CRLF
+ as a newline, and the current character is CR followed by LF, an ad-
vance of two characters occurs.
Testing substring extraction functions
- The copy and get modifiers can be used to test the pcre2_sub-
+ The copy and get modifiers can be used to test the pcre2_sub-
string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be
given more than once, and each can specify a capture group name or num-
ber, for example:
abcd\=copy=1,copy=3,get=G1
- If the #subject command is used to set default copy and/or get lists,
- these can be unset by specifying a negative number to cancel all num-
+ If the #subject command is used to set default copy and/or get lists,
+ these can be unset by specifying a negative number to cancel all num-
bered groups and an empty name to cancel all named groups.
- The getall modifier tests pcre2_substring_list_get(), which extracts
+ The getall modifier tests pcre2_substring_list_get(), which extracts
all captured substrings.
- If the subject line is successfully matched, the substrings extracted
- by the convenience functions are output with C, G, or L after the
- string number instead of a colon. This is in addition to the normal
- full list. The string length (that is, the return from the extraction
+ If the subject line is successfully matched, the substrings extracted
+ by the convenience functions are output with C, G, or L after the
+ string number instead of a colon. This is in addition to the normal
+ full list. The string length (that is, the return from the extraction
function) is given in parentheses after each substring, followed by the
name when the extraction was by name.
Testing the substitution function
- If the replace modifier is set, the pcre2_substitute() function is
- called instead of one of the matching functions. Note that replacement
- strings cannot contain commas, because a comma signifies the end of a
- modifier. This is not thought to be an issue in a test program.
-
- Unlike subject strings, pcre2test does not process replacement strings
- for escape sequences. In UTF mode, a replacement string is checked to
- see if it is a valid UTF-8 string. If so, it is correctly converted to
- a UTF string of the appropriate code unit width. If it is not a valid
- UTF-8 string, the individual code units are copied directly. This pro-
+ If the replace modifier is set, the pcre2_substitute() function is
+ called instead of one of the matching functions (or after one call of
+ pcre2_match() in the case of PCRE2_SUBSTITUTE_MATCHED). Note that re-
+ placement strings cannot contain commas, because a comma signifies the
+ end of a modifier. This is not thought to be an issue in a test pro-
+ gram.
+
+ Unlike subject strings, pcre2test does not process replacement strings
+ for escape sequences. In UTF mode, a replacement string is checked to
+ see if it is a valid UTF-8 string. If so, it is correctly converted to
+ a UTF string of the appropriate code unit width. If it is not a valid
+ UTF-8 string, the individual code units are copied directly. This pro-
vides a means of passing an invalid UTF-8 string for testing purposes.
- The following modifiers set options (in additional to the normal match
+ The following modifiers set options (in additional to the normal match
options) for pcre2_substitute():
global PCRE2_SUBSTITUTE_GLOBAL
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
+ substitute_literal PCRE2_SUBSTITUTE_LITERAL
+ substitute_matched PCRE2_SUBSTITUTE_MATCHED
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+ substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
+ See the pcre2api documentation for details of these options.
- After a successful substitution, the modified string is output, pre-
- ceded by the number of replacements. This may be zero if there were no
+ After a successful substitution, the modified string is output, pre-
+ ceded by the number of replacements. This may be zero if there were no
matches. Here is a simple example of a substitution test:
/abc/replace=xxx
@@ -1280,12 +1300,12 @@ SUBJECT MODIFIERS
=abc=abc=\=global
2: =xxx=xxx=
- Subject and replacement strings should be kept relatively short (fewer
- than 256 characters) for substitution tests, as fixed-size buffers are
- used. To make it easy to test for buffer overflow, if the replacement
- string starts with a number in square brackets, that number is passed
- to pcre2_substitute() as the size of the output buffer, with the re-
- placement string starting at the next character. Here is an example
+ Subject and replacement strings should be kept relatively short (fewer
+ than 256 characters) for substitution tests, as fixed-size buffers are
+ used. To make it easy to test for buffer overflow, if the replacement
+ string starts with a number in square brackets, that number is passed
+ to pcre2_substitute() as the size of the output buffer, with the re-
+ placement string starting at the next character. Here is an example
that tests the edge case:
/abc/
@@ -1295,12 +1315,12 @@ SUBJECT MODIFIERS
Failed: error -47: no more memory
The default action of pcre2_substitute() is to return PCRE2_ER-
- ROR_NOMEMORY when the output buffer is too small. However, if the
- PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the substi-
+ ROR_NOMEMORY when the output buffer is too small. However, if the
+ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the substi-
tute_overflow_length modifier), pcre2_substitute() continues to go
- through the motions of matching and substituting (but not doing any
- callouts), in order to compute the size of buffer that is required.
- When this happens, pcre2test shows the required buffer length (which
+ through the motions of matching and substituting (but not doing any
+ callouts), in order to compute the size of buffer that is required.
+ When this happens, pcre2test shows the required buffer length (which
includes space for the trailing zero) as part of the error message. For
example:
@@ -1309,15 +1329,15 @@ SUBJECT MODIFIERS
Failed: error -47: no more memory: 10 code units are needed
A replacement string is ignored with POSIX and DFA matching. Specifying
- partial matching provokes an error return ("bad option value") from
+ partial matching provokes an error return ("bad option value") from
pcre2_substitute().
Testing substitute callouts
If the substitute_callout modifier is set, a substitution callout func-
- tion is set up. The null_context modifier must not be set, because the
- address of the callout function is passed in a match context. When the
- callout function is called (after each substitution), details of the
+ tion is set up. The null_context modifier must not be set, because the
+ address of the callout function is passed in a match context. When the
+ callout function is called (after each substitution), details of the
the input and output strings are output. For example:
/abc/g,replace=<$0>,substitute_callout
@@ -1326,19 +1346,19 @@ SUBJECT MODIFIERS
2(1) Old 6 9 "abc" New 8 13 "<abc>"
2: <abc>def<abc>pqr
- The first number on each callout line is the count of matches. The
+ The first number on each callout line is the count of matches. The
parenthesized number is the number of pairs that are set in the ovector
- (that is, one more than the number of capturing groups that were set).
+ (that is, one more than the number of capturing groups that were set).
Then are listed the offsets of the old substring, its contents, and the
same for the replacement.
- By default, the substitution callout function returns zero, which ac-
- cepts the replacement and causes matching to continue if /g was used.
- Two further modifiers can be used to test other return values. If sub-
- stitute_skip is set to a value greater than zero the callout function
- returns +1 for the match of that number, and similarly substitute_stop
- returns -1. These cause the replacement to be rejected, and -1 causes
- no further matching to take place. If either of them are set, substi-
+ By default, the substitution callout function returns zero, which ac-
+ cepts the replacement and causes matching to continue if /g was used.
+ Two further modifiers can be used to test other return values. If sub-
+ stitute_skip is set to a value greater than zero the callout function
+ returns +1 for the match of that number, and similarly substitute_stop
+ returns -1. These cause the replacement to be rejected, and -1 causes
+ no further matching to take place. If either of them are set, substi-
tute_callout is assumed. For example:
/abc/g,replace=<$0>,substitute_skip=1
@@ -1356,160 +1376,160 @@ SUBJECT MODIFIERS
Setting the JIT stack size
- The jitstack modifier provides a way of setting the maximum stack size
- that is used by the just-in-time optimization code. It is ignored if
- JIT optimization is not being used. The value is a number of kibibytes
- (units of 1024 bytes). Setting zero reverts to the default of 32KiB.
+ The jitstack modifier provides a way of setting the maximum stack size
+ that is used by the just-in-time optimization code. It is ignored if
+ JIT optimization is not being used. The value is a number of kibibytes
+ (units of 1024 bytes). Setting zero reverts to the default of 32KiB.
Providing a stack that is larger than the default is necessary only for
- very complicated patterns. If jitstack is set non-zero on a subject
+ very complicated patterns. If jitstack is set non-zero on a subject
line it overrides any value that was set on the pattern.
Setting heap, match, and depth limits
- The heap_limit, match_limit, and depth_limit modifiers set the appro-
- priate limits in the match context. These values are ignored when the
+ The heap_limit, match_limit, and depth_limit modifiers set the appro-
+ priate limits in the match context. These values are ignored when the
find_limits modifier is specified.
Finding minimum limits
- If the find_limits modifier is present on a subject line, pcre2test
- calls the relevant matching function several times, setting different
- values in the match context via pcre2_set_heap_limit(),
- pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
- minimum values for each parameter that allows the match to complete
+ If the find_limits modifier is present on a subject line, pcre2test
+ calls the relevant matching function several times, setting different
+ values in the match context via pcre2_set_heap_limit(),
+ pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
+ minimum values for each parameter that allows the match to complete
without error. If JIT is being used, only the match limit is relevant.
When using this modifier, the pattern should not contain any limit set-
- tings such as (*LIMIT_MATCH=...) within it. If such a setting is
+ tings such as (*LIMIT_MATCH=...) within it. If such a setting is
present and is lower than the minimum matching value, the minimum value
- cannot be found because pcre2_set_match_limit() etc. are only able to
+ cannot be found because pcre2_set_match_limit() etc. are only able to
reduce the value of an in-pattern limit; they cannot increase it.
- For non-DFA matching, the minimum depth_limit number is a measure of
+ For non-DFA matching, the minimum depth_limit number is a measure of
how much nested backtracking happens (that is, how deeply the pattern's
- tree is searched). In the case of DFA matching, depth_limit controls
- the depth of recursive calls of the internal function that is used for
+ tree is searched). In the case of DFA matching, depth_limit controls
+ the depth of recursive calls of the internal function that is used for
handling pattern recursion, lookaround assertions, and atomic groups.
For non-DFA matching, the match_limit number is a measure of the amount
of backtracking that takes place, and learning the minimum value can be
- instructive. For most simple matches, the number is quite small, but
- for patterns with very large numbers of matching possibilities, it can
- become large very quickly with increasing length of subject string. In
- the case of DFA matching, match_limit controls the total number of
+ instructive. For most simple matches, the number is quite small, but
+ for patterns with very large numbers of matching possibilities, it can
+ become large very quickly with increasing length of subject string. In
+ the case of DFA matching, match_limit controls the total number of
calls, both recursive and non-recursive, to the internal matching func-
tion, thus controlling the overall amount of computing resource that is
used.
- For both kinds of matching, the heap_limit number, which is in
- kibibytes (units of 1024 bytes), limits the amount of heap memory used
+ For both kinds of matching, the heap_limit number, which is in
+ kibibytes (units of 1024 bytes), limits the amount of heap memory used
for matching. A value of zero disables the use of any heap memory; many
- simple pattern matches can be done without using the heap, so zero is
+ simple pattern matches can be done without using the heap, so zero is
not an unreasonable setting.
Showing MARK names
The mark modifier causes the names from backtracking control verbs that
- are returned from calls to pcre2_match() to be displayed. If a mark is
- returned for a match, non-match, or partial match, pcre2test shows it.
- For a match, it is on a line by itself, tagged with "MK:". Otherwise,
+ are returned from calls to pcre2_match() to be displayed. If a mark is
+ returned for a match, non-match, or partial match, pcre2test shows it.
+ For a match, it is on a line by itself, tagged with "MK:". Otherwise,
it is added to the non-match message.
Showing memory usage
- The memory modifier causes pcre2test to log the sizes of all heap mem-
- ory allocation and freeing calls that occur during a call to
- pcre2_match() or pcre2_dfa_match(). These occur only when a match re-
- quires a bigger vector than the default for remembering backtracking
- points (pcre2_match()) or for internal workspace (pcre2_dfa_match()).
- In many cases there will be no heap memory used and therefore no addi-
+ The memory modifier causes pcre2test to log the sizes of all heap mem-
+ ory allocation and freeing calls that occur during a call to
+ pcre2_match() or pcre2_dfa_match(). These occur only when a match re-
+ quires a bigger vector than the default for remembering backtracking
+ points (pcre2_match()) or for internal workspace (pcre2_dfa_match()).
+ In many cases there will be no heap memory used and therefore no addi-
tional output. No heap memory is allocated during matching with JIT, so
- in that case the memory modifier never has any effect. For this modi-
- fier to work, the null_context modifier must not be set on both the
+ in that case the memory modifier never has any effect. For this modi-
+ fier to work, the null_context modifier must not be set on both the
pattern and the subject, though it can be set on one or the other.
Setting a starting offset
- The offset modifier sets an offset in the subject string at which
+ The offset modifier sets an offset in the subject string at which
matching starts. Its value is a number of code units, not characters.
Setting an offset limit
- The offset_limit modifier sets a limit for unanchored matches. If a
+ The offset_limit modifier sets a limit for unanchored matches. If a
match cannot be found starting at or before this offset in the subject,
a "no match" return is given. The data value is a number of code units,
- not characters. When this modifier is used, the use_offset_limit modi-
+ not characters. When this modifier is used, the use_offset_limit modi-
fier must have been set for the pattern; if not, an error is generated.
Setting the size of the output vector
- The ovector modifier applies only to the subject line in which it ap-
+ The ovector modifier applies only to the subject line in which it ap-
pears, though of course it can also be used to set a default in a #sub-
- ject command. It specifies the number of pairs of offsets that are
+ ject command. It specifies the number of pairs of offsets that are
available for storing matching information. The default is 15.
- A value of zero is useful when testing the POSIX API because it causes
+ A value of zero is useful when testing the POSIX API because it causes
regexec() to be called with a NULL capture vector. When not testing the
- POSIX API, a value of zero is used to cause pcre2_match_data_cre-
- ate_from_pattern() to be called, in order to create a match block of
+ POSIX API, a value of zero is used to cause pcre2_match_data_cre-
+ ate_from_pattern() to be called, in order to create a match block of
exactly the right size for the pattern. (It is not possible to create a
- match block with a zero-length ovector; there is always at least one
+ match block with a zero-length ovector; there is always at least one
pair of offsets.)
Passing the subject as zero-terminated
By default, the subject string is passed to a native API matching func-
tion with its correct length. In order to test the facility for passing
- a zero-terminated string, the zero_terminate modifier is provided. It
- causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching
+ a zero-terminated string, the zero_terminate modifier is provided. It
+ causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching
via the POSIX interface, this modifier is ignored, with a warning.
- When testing pcre2_substitute(), this modifier also has the effect of
+ When testing pcre2_substitute(), this modifier also has the effect of
passing the replacement string as zero-terminated.
Passing a NULL context
- Normally, pcre2test passes a context block to pcre2_match(),
- pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). If the
- null_context modifier is set, however, NULL is passed. This is for
- testing that the matching and substitution functions behave correctly
- in this case (they use default values). This modifier cannot be used
+ Normally, pcre2test passes a context block to pcre2_match(),
+ pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). If the
+ null_context modifier is set, however, NULL is passed. This is for
+ testing that the matching and substitution functions behave correctly
+ in this case (they use default values). This modifier cannot be used
with the find_limits or substitute_callout modifiers.
THE ALTERNATIVE MATCHING FUNCTION
- By default, pcre2test uses the standard PCRE2 matching function,
+ By default, pcre2test uses the standard PCRE2 matching function,
pcre2_match() to match each subject line. PCRE2 also supports an alter-
- native matching function, pcre2_dfa_match(), which operates in a dif-
- ferent way, and has some restrictions. The differences between the two
+ native matching function, pcre2_dfa_match(), which operates in a dif-
+ ferent way, and has some restrictions. The differences between the two
functions are described in the pcre2matching documentation.
- If the dfa modifier is set, the alternative matching function is used.
- This function finds all possible matches at a given point in the sub-
- ject. If, however, the dfa_shortest modifier is set, processing stops
- after the first match is found. This is always the shortest possible
+ If the dfa modifier is set, the alternative matching function is used.
+ This function finds all possible matches at a given point in the sub-
+ ject. If, however, the dfa_shortest modifier is set, processing stops
+ after the first match is found. This is always the shortest possible
match.
DEFAULT OUTPUT FROM pcre2test
- This section describes the output when the normal matching function,
+ This section describes the output when the normal matching function,
pcre2_match(), is being used.
- When a match succeeds, pcre2test outputs the list of captured sub-
- strings, starting with number 0 for the string that matched the whole
+ When a match succeeds, pcre2test outputs the list of captured sub-
+ strings, starting with number 0 for the string that matched the whole
pattern. Otherwise, it outputs "No match" when the return is PCRE2_ER-
- ROR_NOMATCH, or "Partial match:" followed by the partially matching
- substring when the return is PCRE2_ERROR_PARTIAL. (Note that this is
- the entire substring that was inspected during the partial match; it
- may include characters before the actual match start if a lookbehind
+ ROR_NOMATCH, or "Partial match:" followed by the partially matching
+ substring when the return is PCRE2_ERROR_PARTIAL. (Note that this is
+ the entire substring that was inspected during the partial match; it
+ may include characters before the actual match start if a lookbehind
assertion, \K, \b, or \B was involved.)
For any other return, pcre2test outputs the PCRE2 negative error number
- and a short descriptive phrase. If the error is a failed UTF string
- check, the code unit offset of the start of the failing character is
+ and a short descriptive phrase. If the error is a failed UTF string
+ check, the code unit offset of the start of the failing character is
also output. Here is an example of an interactive pcre2test run.
$ pcre2test
@@ -1525,8 +1545,8 @@ DEFAULT OUTPUT FROM pcre2test
Unset capturing substrings that are not followed by one that is set are
not shown by pcre2test unless the allcaptures modifier is specified. In
the following example, there are two capturing substrings, but when the
- first data line is matched, the second, unset substring is not shown.
- An "internal" unset substring is shown as "<unset>", as for the second
+ first data line is matched, the second, unset substring is not shown.
+ An "internal" unset substring is shown as "<unset>", as for the second
data line.
re> /(a)|(b)/
@@ -1538,11 +1558,11 @@ DEFAULT OUTPUT FROM pcre2test
1: <unset>
2: b
- If the strings contain any non-printing characters, they are output as
- \xhh escapes if the value is less than 256 and UTF mode is not set.
+ If the strings contain any non-printing characters, they are output as
+ \xhh escapes if the value is less than 256 and UTF mode is not set.
Otherwise they are output as \x{hh...} escapes. See below for the defi-
- nition of non-printing characters. If the aftertext modifier is set,
- the output for substring 0 is followed by the the rest of the subject
+ nition of non-printing characters. If the aftertext modifier is set,
+ the output for substring 0 is followed by the the rest of the subject
string, identified by "0+" like this:
re> /cat/aftertext
@@ -1562,8 +1582,8 @@ DEFAULT OUTPUT FROM pcre2test
0: ipp
1: pp
- "No match" is output only if the first match attempt fails. Here is an
- example of a failure message (the offset 4 that is specified by the
+ "No match" is output only if the first match attempt fails. Here is an
+ example of a failure message (the offset 4 that is specified by the
offset modifier is past the end of the subject string):
re> /xyz/
@@ -1571,7 +1591,7 @@ DEFAULT OUTPUT FROM pcre2test
Error -24 (bad offset value)
Note that whereas patterns can be continued over several lines (a plain
- ">" prompt is used for continuations), subject lines may not. However
+ ">" prompt is used for continuations), subject lines may not. However
newlines can be included in a subject by means of the \n escape (or \r,
\r\n, etc., depending on the newline sequence setting).
@@ -1579,7 +1599,7 @@ DEFAULT OUTPUT FROM pcre2test
OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
When the alternative matching function, pcre2_dfa_match(), is used, the
- output consists of a list of all the matches that start at the first
+ output consists of a list of all the matches that start at the first
point in the subject where there is at least one match. For example:
re> /(tang|tangerine|tan)/
@@ -1588,11 +1608,11 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
1: tang
2: tan
- Using the normal matching function on this data finds only "tang". The
- longest matching string is always given first (and numbered zero). Af-
- ter a PCRE2_ERROR_PARTIAL return, the output is "Partial match:", fol-
+ Using the normal matching function on this data finds only "tang". The
+ longest matching string is always given first (and numbered zero). Af-
+ ter a PCRE2_ERROR_PARTIAL return, the output is "Partial match:", fol-
lowed by the partially matching substring. Note that this is the entire
- substring that was inspected during the partial match; it may include
+ substring that was inspected during the partial match; it may include
characters before the actual match start if a lookbehind assertion, \b,
or \B was involved. (\K is not supported for DFA matching.)
@@ -1608,16 +1628,16 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
1: tan
0: tan
- The alternative matching function does not support substring capture,
- so the modifiers that are concerned with captured substrings are not
+ The alternative matching function does not support substring capture,
+ so the modifiers that are concerned with captured substrings are not
relevant.
RESTARTING AFTER A PARTIAL MATCH
- When the alternative matching function has given the PCRE2_ERROR_PAR-
+ When the alternative matching function has given the PCRE2_ERROR_PAR-
TIAL return, indicating that the subject partially matched the pattern,
- you can restart the match with additional subject data by means of the
+ you can restart the match with additional subject data by means of the
dfa_restart modifier. For example:
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
@@ -1626,37 +1646,37 @@ RESTARTING AFTER A PARTIAL MATCH
data> n05\=dfa,dfa_restart
0: n05
- For further information about partial matching, see the pcre2partial
+ For further information about partial matching, see the pcre2partial
documentation.
CALLOUTS
If the pattern contains any callout requests, pcre2test's callout func-
- tion is called during matching unless callout_none is specified. This
+ tion is called during matching unless callout_none is specified. This
works with both matching functions, and with JIT, though there are some
- differences in behaviour. The output for callouts with numerical argu-
+ differences in behaviour. The output for callouts with numerical argu-
ments and those with string arguments is slightly different.
Callouts with numerical arguments
By default, the callout function displays the callout number, the start
- and current positions in the subject text at the callout time, and the
+ and current positions in the subject text at the callout time, and the
next pattern item to be tested. For example:
--->pqrabcdef
0 ^ ^ \d
- This output indicates that callout number 0 occurred for a match at-
- tempt starting at the fourth character of the subject string, when the
- pointer was at the seventh character, and when the next pattern item
- was \d. Just one circumflex is output if the start and current posi-
+ This output indicates that callout number 0 occurred for a match at-
+ tempt starting at the fourth character of the subject string, when the
+ pointer was at the seventh character, and when the next pattern item
+ was \d. Just one circumflex is output if the start and current posi-
tions are the same, or if the current position precedes the start posi-
tion, which can happen if the callout is in a lookbehind assertion.
Callouts numbered 255 are assumed to be automatic callouts, inserted as
a result of the auto_callout pattern modifier. In this case, instead of
- showing the callout number, the offset in the pattern, preceded by a
+ showing the callout number, the offset in the pattern, preceded by a
plus, is output. For example:
re> /\d?[A-E]\*/auto_callout
@@ -1683,17 +1703,17 @@ CALLOUTS
+12 ^ ^
0: abc
- The mark changes between matching "a" and "b", but stays the same for
- the rest of the match, so nothing more is output. If, as a result of
- backtracking, the mark reverts to being unset, the text "<unset>" is
+ The mark changes between matching "a" and "b", but stays the same for
+ the rest of the match, so nothing more is output. If, as a result of
+ backtracking, the mark reverts to being unset, the text "<unset>" is
output.
Callouts with string arguments
The output for a callout with a string argument is similar, except that
- instead of outputting a callout number before the position indicators,
- the callout string and its offset in the pattern string are output be-
- fore the reflection of the subject string, and the subject string is
+ instead of outputting a callout number before the position indicators,
+ the callout string and its offset in the pattern string are output be-
+ fore the reflection of the subject string, and the subject string is
reflected for each callout. For example:
re> /^ab(?C'first')cd(?C"second")ef/
@@ -1709,26 +1729,26 @@ CALLOUTS
Callout modifiers
- The callout function in pcre2test returns zero (carry on matching) by
- default, but you can use a callout_fail modifier in a subject line to
+ The callout function in pcre2test returns zero (carry on matching) by
+ default, but you can use a callout_fail modifier in a subject line to
change this and other parameters of the callout (see below).
If the callout_capture modifier is set, the current captured groups are
output when a callout occurs. This is useful only for non-DFA matching,
- as pcre2_dfa_match() does not support capturing, so no captures are
+ as pcre2_dfa_match() does not support capturing, so no captures are
ever shown.
The normal callout output, showing the callout number or pattern offset
- (as described above) is suppressed if the callout_no_where modifier is
+ (as described above) is suppressed if the callout_no_where modifier is
set.
- When using the interpretive matching function pcre2_match() without
- JIT, setting the callout_extra modifier causes additional output from
- pcre2test's callout function to be generated. For the first callout in
- a match attempt at a new starting position in the subject, "New match
- attempt" is output. If there has been a backtrack since the last call-
+ When using the interpretive matching function pcre2_match() without
+ JIT, setting the callout_extra modifier causes additional output from
+ pcre2test's callout function to be generated. For the first callout in
+ a match attempt at a new starting position in the subject, "New match
+ attempt" is output. If there has been a backtrack since the last call-
out (or start of matching if this is the first callout), "Backtrack" is
- output, followed by "No other matching paths" if the backtrack ended
+ output, followed by "No other matching paths" if the backtrack ended
the previous match attempt. For example:
re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess
@@ -1765,86 +1785,86 @@ CALLOUTS
+1 ^ a+
No match
- Notice that various optimizations must be turned off if you want all
- possible matching paths to be scanned. If no_start_optimize is not
- used, there is an immediate "no match", without any callouts, because
- the starting optimization fails to find "b" in the subject, which it
- knows must be present for any match. If no_auto_possess is not used,
- the "a+" item is turned into "a++", which reduces the number of back-
+ Notice that various optimizations must be turned off if you want all
+ possible matching paths to be scanned. If no_start_optimize is not
+ used, there is an immediate "no match", without any callouts, because
+ the starting optimization fails to find "b" in the subject, which it
+ knows must be present for any match. If no_auto_possess is not used,
+ the "a+" item is turned into "a++", which reduces the number of back-
tracks.
- The callout_extra modifier has no effect if used with the DFA matching
+ The callout_extra modifier has no effect if used with the DFA matching
function, or with JIT.
Return values from callouts
- The default return from the callout function is zero, which allows
+ The default return from the callout function is zero, which allows
matching to continue. The callout_fail modifier can be given one or two
numbers. If there is only one number, 1 is returned instead of 0 (caus-
ing matching to backtrack) when a callout of that number is reached. If
- two numbers (<n>:<m>) are given, 1 is returned when callout <n> is
- reached and there have been at least <m> callouts. The callout_error
+ two numbers (<n>:<m>) are given, 1 is returned when callout <n> is
+ reached and there have been at least <m> callouts. The callout_error
modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus-
- ing the entire matching process to be aborted. If both these modifiers
- are set for the same callout number, callout_error takes precedence.
- Note that callouts with string arguments are always given the number
+ ing the entire matching process to be aborted. If both these modifiers
+ are set for the same callout number, callout_error takes precedence.
+ Note that callouts with string arguments are always given the number
zero.
- The callout_data modifier can be given an unsigned or a negative num-
- ber. This is set as the "user data" that is passed to the matching
- function, and passed back when the callout function is invoked. Any
- value other than zero is used as a return from pcre2test's callout
+ The callout_data modifier can be given an unsigned or a negative num-
+ ber. This is set as the "user data" that is passed to the matching
+ function, and passed back when the callout function is invoked. Any
+ value other than zero is used as a return from pcre2test's callout
function.
Inserting callouts can be helpful when using pcre2test to check compli-
- cated regular expressions. For further information about callouts, see
+ cated regular expressions. For further information about callouts, see
the pcre2callout documentation.
NON-PRINTING CHARACTERS
When pcre2test is outputting text in the compiled version of a pattern,
- bytes other than 32-126 are always treated as non-printing characters
+ bytes other than 32-126 are always treated as non-printing characters
and are therefore shown as hex escapes.
- When pcre2test is outputting text that is a matched part of a subject
- string, it behaves in the same way, unless a different locale has been
- set for the pattern (using the locale modifier). In this case, the is-
+ When pcre2test is outputting text that is a matched part of a subject
+ string, it behaves in the same way, unless a different locale has been
+ set for the pattern (using the locale modifier). In this case, the is-
print() function is used to distinguish printing and non-printing char-
acters.
SAVING AND RESTORING COMPILED PATTERNS
- It is possible to save compiled patterns on disc or elsewhere, and
+ It is possible to save compiled patterns on disc or elsewhere, and
reload them later, subject to a number of restrictions. JIT data cannot
- be saved. The host on which the patterns are reloaded must be running
+ be saved. The host on which the patterns are reloaded must be running
the same version of PCRE2, with the same code unit width, and must also
- have the same endianness, pointer width and PCRE2_SIZE type. Before
- compiled patterns can be saved they must be serialized, that is, con-
- verted to a stream of bytes. A single byte stream may contain any num-
- ber of compiled patterns, but they must all use the same character ta-
- bles. A single copy of the tables is included in the byte stream (its
+ have the same endianness, pointer width and PCRE2_SIZE type. Before
+ compiled patterns can be saved they must be serialized, that is, con-
+ verted to a stream of bytes. A single byte stream may contain any num-
+ ber of compiled patterns, but they must all use the same character ta-
+ bles. A single copy of the tables is included in the byte stream (its
size is 1088 bytes).
- The functions whose names begin with pcre2_serialize_ are used for se-
- rializing and de-serializing. They are described in the pcre2serialize
- documentation. In this section we describe the features of pcre2test
+ The functions whose names begin with pcre2_serialize_ are used for se-
+ rializing and de-serializing. They are described in the pcre2serialize
+ documentation. In this section we describe the features of pcre2test
that can be used to test these functions.
- Note that "serialization" in PCRE2 does not convert compiled patterns
- to an abstract format like Java or .NET. It just makes a reloadable
+ Note that "serialization" in PCRE2 does not convert compiled patterns
+ to an abstract format like Java or .NET. It just makes a reloadable
byte code stream. Hence the restrictions on reloading mentioned above.
- In pcre2test, when a pattern with push modifier is successfully com-
- piled, it is pushed onto a stack of compiled patterns, and pcre2test
- expects the next line to contain a new pattern (or command) instead of
+ In pcre2test, when a pattern with push modifier is successfully com-
+ piled, it is pushed onto a stack of compiled patterns, and pcre2test
+ expects the next line to contain a new pattern (or command) instead of
a subject line. By contrast, the pushcopy modifier causes a copy of the
- compiled pattern to be stacked, leaving the original available for im-
- mediate matching. By using push and/or pushcopy, a number of patterns
- can be compiled and retained. These modifiers are incompatible with
+ compiled pattern to be stacked, leaving the original available for im-
+ mediate matching. By using push and/or pushcopy, a number of patterns
+ can be compiled and retained. These modifiers are incompatible with
posix, and control modifiers that act at match time are ignored (with a
- message) for the stacked patterns. The jitverify modifier applies only
+ message) for the stacked patterns. The jitverify modifier applies only
at compile time.
The command
@@ -1852,21 +1872,21 @@ SAVING AND RESTORING COMPILED PATTERNS
#save <filename>
causes all the stacked patterns to be serialized and the result written
- to the named file. Afterwards, all the stacked patterns are freed. The
+ to the named file. Afterwards, all the stacked patterns are freed. The
command
#load <filename>
- reads the data in the file, and then arranges for it to be de-serial-
- ized, with the resulting compiled patterns added to the pattern stack.
- The pattern on the top of the stack can be retrieved by the #pop com-
- mand, which must be followed by lines of subjects that are to be
- matched with the pattern, terminated as usual by an empty line or end
- of file. This command may be followed by a modifier list containing
- only control modifiers that act after a pattern has been compiled. In
- particular, hex, posix, posix_nosub, push, and pushcopy are not al-
- lowed, nor are any option-setting modifiers. The JIT modifiers are,
- however permitted. Here is an example that saves and reloads two pat-
+ reads the data in the file, and then arranges for it to be de-serial-
+ ized, with the resulting compiled patterns added to the pattern stack.
+ The pattern on the top of the stack can be retrieved by the #pop com-
+ mand, which must be followed by lines of subjects that are to be
+ matched with the pattern, terminated as usual by an empty line or end
+ of file. This command may be followed by a modifier list containing
+ only control modifiers that act after a pattern has been compiled. In
+ particular, hex, posix, posix_nosub, push, and pushcopy are not al-
+ lowed, nor are any option-setting modifiers. The JIT modifiers are,
+ however permitted. Here is an example that saves and reloads two pat-
terns.
/abc/push
@@ -1879,10 +1899,10 @@ SAVING AND RESTORING COMPILED PATTERNS
#pop jit,bincode
abc
- If jitverify is used with #pop, it does not automatically imply jit,
+ If jitverify is used with #pop, it does not automatically imply jit,
which is different behaviour from when it is used on a pattern.
- The #popcopy command is analagous to the pushcopy modifier in that it
+ The #popcopy command is analagous to the pushcopy modifier in that it
makes current a copy of the topmost stack pattern, leaving the original
still on the stack.
@@ -1902,5 +1922,5 @@ AUTHOR
REVISION
- Last updated: 30 July 2019
- Copyright (c) 1997-2019 University of Cambridge.
+ Last updated: 20 March 2020
+ Copyright (c) 1997-2020 University of Cambridge.
diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3
index 10efd7d..055a4ce 100644
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "24 May 2019" "PCRE2 10.34"
+.TH PCRE2UNICODE 3 "23 February 2020" "PCRE2 10.35"
.SH NAME
PCRE - Perl-compatible regular expressions (revised API)
.SH "UNICODE AND UTF SUPPORT"
@@ -7,7 +7,7 @@ PCRE - Perl-compatible regular expressions (revised API)
PCRE2 is normally built with Unicode support, though if you do not need it, you
can build it without, in which case the library will be smaller. With Unicode
support, PCRE2 has knowledge of Unicode character properties and can process
-text strings in UTF-8, UTF-16, or UTF-32 format (depending on the code unit
+strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit
width), but this is not the default. Unless specifically requested, PCRE2
treats each code unit in a string as one character.
.P
@@ -126,14 +126,16 @@ However, the special horizontal and vertical white space matching escapes (\eh,
not PCRE2_UCP is set.
.
.
-.SH "CASE-EQUIVALENCE IN UTF MODE"
+.SH "UNICODE CASE-EQUIVALENCE"
.rs
.sp
-Case-insensitive matching in UTF mode makes use of Unicode properties except
-for characters whose code points are less than 128 and that have at most two
-case-equivalent values. For these, a direct table lookup is used for speed. A
-few Unicode characters such as Greek sigma have more than two code points that
-are case-equivalent, and these are treated specially.
+If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use
+of Unicode properties except for characters whose code points are less than 128
+and that have at most two case-equivalent values. For these, a direct table
+lookup is used for speed. A few Unicode characters such as Greek sigma have
+more than two code points that are case-equivalent, and these are treated
+specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
+processing for non-UTF character encodings such as UCS-2.
.
.
.\" HTML <a name="scriptruns"></a>
@@ -455,6 +457,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 24 May 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 23 February 2020
+Copyright (c) 1997-2020 University of Cambridge.
.fi
diff --git a/install-sh b/install-sh
index 8175c64..20d8b2e 100755
--- a/install-sh
+++ b/install-sh
@@ -451,7 +451,18 @@ do
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
- (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
+ (umask $cp_umask &&
+ { test -z "$stripcmd" || {
+ # Create $dsttmp read-write so that cp doesn't create it read-only,
+ # which would cause strip to fail.
+ if test -z "$doit"; then
+ : >"$dsttmp" # No need to fork-exec 'touch'.
+ else
+ $doit touch "$dsttmp"
+ fi
+ }
+ } &&
+ $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
diff --git a/ltmain.sh b/ltmain.sh
index d3ab94d..48cea9b 100644
--- a/ltmain.sh
+++ b/ltmain.sh
@@ -2,7 +2,7 @@
## DO NOT EDIT - This file generated from ./build-aux/ltmain.in
## by inline-source v2018-07-24.06
-# libtool (GNU libtool) 2.4.6.42-b88ce
+# libtool (GNU libtool) 2.4.6.42-b88ce-dirty
# Provide generalized library-building support services.
# Written by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
@@ -31,7 +31,7 @@
PROGRAM=libtool
PACKAGE=libtool
-VERSION=2.4.6.42-b88ce
+VERSION=2.4.6.42-b88ce-dirty
package_revision=2.4.6.42
@@ -2176,7 +2176,7 @@ func_version ()
# End:
# Set a version string.
-scriptversion='(GNU libtool) 2.4.6.42-b88ce'
+scriptversion='(GNU libtool) 2.4.6.42-b88ce-dirty'
# func_echo ARG...
@@ -2267,7 +2267,7 @@ include the following information:
compiler: $LTCC
compiler flags: $LTCFLAGS
linker: $LD (gnu? $with_gnu_ld)
- version: $progname (GNU libtool) 2.4.6.42-b88ce
+ version: $progname (GNU libtool) 2.4.6.42-b88ce-dirty
automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q`
autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q`
diff --git a/m4/libtool.m4 b/m4/libtool.m4
index b55a6e5..2b73e38 100644
--- a/m4/libtool.m4
+++ b/m4/libtool.m4
@@ -728,7 +728,6 @@ _LT_CONFIG_SAVE_COMMANDS([
cat <<_LT_EOF >> "$cfgfile"
#! $SHELL
# Generated automatically by $as_me ($PACKAGE) $VERSION
-# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
# NOTE: Changes made to this file will be lost: look at ltmain.sh.
# Provide generalized library-building support services.
diff --git a/m4/ltversion.m4 b/m4/ltversion.m4
index 86b2ad7..7f9a3ad 100644
--- a/m4/ltversion.m4
+++ b/m4/ltversion.m4
@@ -12,11 +12,11 @@
# serial 4221 ltversion.m4
# This file is part of GNU Libtool
-m4_define([LT_PACKAGE_VERSION], [2.4.6.42-b88ce])
+m4_define([LT_PACKAGE_VERSION], [2.4.6.42-b88ce-dirty])
m4_define([LT_PACKAGE_REVISION], [2.4.6.42])
AC_DEFUN([LTVERSION_VERSION],
-[macro_version='2.4.6.42-b88ce'
+[macro_version='2.4.6.42-b88ce-dirty'
macro_revision='2.4.6.42'
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
_LT_DECL(, macro_revision, 0)
diff --git a/missing b/missing
index 625aeb1..8d0eaad 100755
--- a/missing
+++ b/missing
@@ -3,7 +3,7 @@
scriptversion=2018-03-07.03; # UTC
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2020 Free Software Foundation, Inc.
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
diff --git a/src/config.h.generic b/src/config.h.generic
index 787bb9c..2c3b7f7 100644
--- a/src/config.h.generic
+++ b/src/config.h.generic
@@ -52,6 +52,9 @@ sure both macros are undefined; an emulation function will then be used. */
LF does in an ASCII/Unicode environment. */
/* #undef EBCDIC_NL25 */
+/* Define this if your compiler supports __attribute__((uninitialized)) */
+/* #undef HAVE_ATTRIBUTE_UNINITIALIZED */
+
/* Define to 1 if you have the `bcopy' function. */
/* #undef HAVE_BCOPY */
@@ -76,6 +79,9 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <limits.h> header file. */
/* #undef HAVE_LIMITS_H */
+/* Define to 1 if you have the `memfd_create' function. */
+/* #undef HAVE_MEMFD_CREATE */
+
/* Define to 1 if you have the `memmove' function. */
/* #undef HAVE_MEMMOVE */
@@ -218,7 +224,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.34"
+#define PACKAGE_STRING "PCRE2 10.35"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@@ -227,7 +233,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "10.34"
+#define PACKAGE_VERSION "10.35"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
@@ -352,7 +358,7 @@ sure both macros are undefined; an emulation function will then be used. */
#endif
/* Version number of package */
-#define VERSION "10.34"
+#define VERSION "10.35"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */
diff --git a/src/config.h.in b/src/config.h.in
index 6b8eb7e..d42cc00 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -52,6 +52,9 @@ sure both macros are undefined; an emulation function will then be used. */
LF does in an ASCII/Unicode environment. */
#undef EBCDIC_NL25
+/* Define this if your compiler supports __attribute__((uninitialized)) */
+#undef HAVE_ATTRIBUTE_UNINITIALIZED
+
/* Define to 1 if you have the `bcopy' function. */
#undef HAVE_BCOPY
@@ -76,6 +79,9 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <limits.h> header file. */
#undef HAVE_LIMITS_H
+/* Define to 1 if you have the `memfd_create' function. */
+#undef HAVE_MEMFD_CREATE
+
/* Define to 1 if you have the `memmove' function. */
#undef HAVE_MEMMOVE
diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic
index cb9d61a..4a42a79 100644
--- a/src/pcre2.h.generic
+++ b/src/pcre2.h.generic
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
- Copyright (c) 2016-2019 University of Cambridge
+ Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
-#define PCRE2_MINOR 34
+#define PCRE2_MINOR 35
#define PCRE2_PRERELEASE
-#define PCRE2_DATE 2019-11-21
+#define PCRE2_DATE 2020-05-09
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
@@ -181,6 +181,9 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
+#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
/* Options for pcre2_pattern_convert(). */
@@ -445,6 +448,7 @@ released, the numbers must not be changed. */
#define PCRE2_CONFIG_HEAPLIMIT 12
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
+#define PCRE2_CONFIG_TABLES_LENGTH 15
/* Types for code units in patterns and subject strings. */
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index 9a0ad0b..4fd6a1e 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
- Copyright (c) 2016-2019 University of Cambridge
+ Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -181,6 +181,9 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
+#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
/* Options for pcre2_pattern_convert(). */
@@ -445,6 +448,7 @@ released, the numbers must not be changed. */
#define PCRE2_CONFIG_HEAPLIMIT 12
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
+#define PCRE2_CONFIG_TABLES_LENGTH 15
/* Types for code units in patterns and subject strings. */
diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c
index 5b95b9b..c64cf85 100644
--- a/src/pcre2_auto_possess.c
+++ b/src/pcre2_auto_possess.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -292,6 +292,7 @@ possessification, and if so, fills a list with its properties.
Arguments:
code points to start of expression
utf TRUE if in UTF mode
+ ucp TRUE if in UCP mode
fcc points to the case-flipping table
list points to output list
list[0] will be filled with the opcode
@@ -304,7 +305,7 @@ Returns: points to the start of the next opcode if *code is accepted
*/
static PCRE2_SPTR
-get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
+get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
uint32_t *list)
{
PCRE2_UCHAR c = *code;
@@ -316,7 +317,8 @@ uint32_t chr;
uint32_t *clist_dest;
const uint32_t *clist_src;
#else
-(void)utf; /* Suppress "unused parameter" compiler warning */
+(void)utf; /* Suppress "unused parameter" compiler warnings */
+(void)ucp;
#endif
list[0] = c;
@@ -396,7 +398,7 @@ switch(c)
list[2] = chr;
#ifdef SUPPORT_UNICODE
- if (chr < 128 || (chr < 256 && !utf))
+ if (chr < 128 || (chr < 256 && !utf && !ucp))
list[3] = fcc[chr];
else
list[3] = UCD_OTHERCASE(chr);
@@ -503,6 +505,7 @@ which case the base cannot be possessified.
Arguments:
code points to the byte code
utf TRUE in UTF mode
+ ucp TRUE in UCP mode
cb compile data block
base_list the data list of the base opcode
base_end the end of the base opcode
@@ -512,7 +515,7 @@ Returns: TRUE if the auto-possessification is possible
*/
static BOOL
-compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
+compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
{
PCRE2_UCHAR c;
@@ -651,7 +654,7 @@ for(;;)
while (*next_code == OP_ALT)
{
- if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
+ if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
return FALSE;
code = next_code + 1 + LINK_SIZE;
next_code += GET(next_code, 1);
@@ -672,7 +675,8 @@ for(;;)
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE;
- if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
+ if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
+ rec_limit))
return FALSE;
code += PRIV(OP_lengths)[c];
@@ -688,7 +692,7 @@ for(;;)
/* We now have the next appropriate opcode to compare with the base. Check
for a supported opcode, and load its properties. */
- code = get_chr_property_list(code, utf, cb->fcc, list);
+ code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
if (code == NULL) return FALSE; /* Unsupported */
/* If either opcode is a small character list, set pointers for comparing
@@ -1100,7 +1104,6 @@ leaving the remainder of the pattern unpossessified.
Arguments:
code points to start of the byte code
- utf TRUE in UTF mode
cb compile data block
Returns: 0 for success
@@ -1108,13 +1111,15 @@ Returns: 0 for success
*/
int
-PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
+PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
{
PCRE2_UCHAR c;
PCRE2_SPTR end;
PCRE2_UCHAR *repeat_opcode;
uint32_t list[8];
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
+BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
+BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
for (;;)
{
@@ -1126,10 +1131,11 @@ for (;;)
{
c -= get_repeat_base(c) - OP_STAR;
end = (c <= OP_MINUPTO) ?
- get_chr_property_list(code, utf, cb->fcc, list) : NULL;
+ get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
- if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
+ if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
+ &rec_limit))
{
switch(c)
{
@@ -1181,11 +1187,11 @@ for (;;)
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
{
/* end must not be NULL. */
- end = get_chr_property_list(code, utf, cb->fcc, list);
+ end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
list[1] = (c & 1) == 0;
- if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
+ if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
{
switch (c)
{
diff --git a/src/pcre2_chartables.c.dist b/src/pcre2_chartables.c.dist
index 0e07edb..861914d 100644
--- a/src/pcre2_chartables.c.dist
+++ b/src/pcre2_chartables.c.dist
@@ -2,17 +2,21 @@
* Perl-Compatible Regular Expressions *
*************************************************/
-/* This file was automatically written by the dftables auxiliary
+/* This file was automatically written by the pcre2_dftables auxiliary
program. It contains character tables that are used when no external
tables are passed to PCRE2 by the application that calls it. The tables
are used only for characters whose code values are less than 256. */
-/*The dftables program (which is distributed with PCRE2) can be used to
-build alternative versions of this file. This is necessary if you are
+/* This set of tables was written in the C locale. */
+
+/* The pcre2_ftables program (which is distributed with PCRE2) can be used
+to build alternative versions of this file. This is necessary if you are
running in an EBCDIC environment, or if you want to default to a different
-encoding, for example ISO-8859-1. When dftables is run, it creates these
-tables in the current locale. This happens automatically if PCRE2 is
-configured with --enable-rebuild-chartables. */
+encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
+these tables in the "C" locale by default. This happens automatically if
+PCRE2 is configured with --enable-rebuild-chartables. However, you can run
+pcre2_dftables manually with the -L option to build tables using the LC_ALL
+locale. */
/* The following #include is present because without it gcc 4.x may remove
the array definition from the final binary if PCRE2 is built into a static
@@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
-graph print, punct, and cntrl. Other classes are built from combinations. */
+graph, print, punct, and cntrl. Other classes are built from combinations. */
- 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
+ 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
+ 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
+ 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
+ 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
+ 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index f2e6b6b..62393be 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -1202,7 +1202,7 @@ in the decoded tables. */
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
{
- ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+ ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
(*ref_count)++;
}
@@ -1232,15 +1232,15 @@ if (newcode == NULL) return NULL;
memcpy(newcode, code, code->blocksize);
newcode->executable_jit = NULL;
-newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
+newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
code->memctl.memory_data);
if (newtables == NULL)
{
code->memctl.free((void *)newcode, code->memctl.memory_data);
return NULL;
}
-memcpy(newtables, code->tables, tables_length);
-ref_count = (PCRE2_SIZE *)(newtables + tables_length);
+memcpy(newtables, code->tables, TABLES_LENGTH);
+ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
*ref_count = 1;
newcode->tables = newtables;
@@ -1270,7 +1270,7 @@ if (code != NULL)
be freed when there are no more references to them. The *ref_count should
always be > 0. */
- ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+ ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
if (*ref_count > 0)
{
(*ref_count)--;
@@ -3653,7 +3653,7 @@ while (ptr < ptrend)
if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
/* If ( is not followed by ? it is either a capture or a special verb or an
- alpha assertion. */
+ alpha assertion or a positive non-atomic lookahead. */
if (*ptr != CHAR_QUESTION_MARK)
{
@@ -3685,10 +3685,10 @@ while (ptr < ptrend)
break;
/* Handle "alpha assertions" such as (*pla:...). Most of these are
- synonyms for the historical symbolic assertions, but the script run ones
- are new. They are distinguished by starting with a lower case letter.
- Checking both ends of the alphabet makes this work in all character
- codes. */
+ synonyms for the historical symbolic assertions, but the script run and
+ non-atomic lookaround ones are new. They are distinguished by starting
+ with a lower case letter. Checking both ends of the alphabet makes this
+ work in all character codes. */
else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0)
{
@@ -3747,9 +3747,7 @@ while (ptr < ptrend)
goto POSITIVE_LOOK_AHEAD;
case META_LOOKAHEAD_NA:
- *parsed_pattern++ = meta;
- ptr++;
- goto POST_ASSERTION;
+ goto POSITIVE_NONATOMIC_LOOK_AHEAD;
case META_LOOKAHEADNOT:
goto NEGATIVE_LOOK_AHEAD;
@@ -4438,6 +4436,12 @@ while (ptr < ptrend)
ptr++;
goto POST_ASSERTION;
+ case CHAR_ASTERISK:
+ POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */
+ *parsed_pattern++ = META_LOOKAHEAD_NA;
+ ptr++;
+ goto POST_ASSERTION;
+
case CHAR_EXCLAMATION_MARK:
NEGATIVE_LOOK_AHEAD: /* Come from (*nla: */
*parsed_pattern++ = META_LOOKAHEADNOT;
@@ -4447,20 +4451,23 @@ while (ptr < ptrend)
/* ---- Lookbehind assertions ---- */
- /* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the
- start of the name of a capturing group. */
+ /* (?< followed by = or ! or * is a lookbehind assertion. Otherwise (?<
+ is the start of the name of a capturing group. */
case CHAR_LESS_THAN_SIGN:
if (ptrend - ptr <= 1 ||
- (ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK))
+ (ptr[1] != CHAR_EQUALS_SIGN &&
+ ptr[1] != CHAR_EXCLAMATION_MARK &&
+ ptr[1] != CHAR_ASTERISK))
{
terminator = CHAR_GREATER_THAN_SIGN;
goto DEFINE_NAME;
}
*parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
- META_LOOKBEHIND : META_LOOKBEHINDNOT;
+ META_LOOKBEHIND : (ptr[1] == CHAR_EXCLAMATION_MARK)?
+ META_LOOKBEHINDNOT : META_LOOKBEHIND_NA;
- POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
+ POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
*has_lookbehind = TRUE;
offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
PUTOFFSET(offset, parsed_pattern);
@@ -4633,8 +4640,6 @@ while (ptr < ptrend)
*parsed_pattern++ = META_KET;
}
-
-
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--;
}
@@ -4899,7 +4904,7 @@ range. */
if ((options & PCRE2_CASELESS) != 0)
{
#ifdef SUPPORT_UNICODE
- if ((options & PCRE2_UTF) != 0)
+ if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
{
int rc;
uint32_t oc, od;
@@ -5314,7 +5319,8 @@ dynamically as we process the pattern. */
#ifdef SUPPORT_UNICODE
BOOL utf = (options & PCRE2_UTF) != 0;
-#else /* No UTF support */
+BOOL ucp = (options & PCRE2_UCP) != 0;
+#else /* No Unicode support */
BOOL utf = FALSE;
#endif
@@ -5559,12 +5565,12 @@ for (;; pptr++)
zerofirstcu = firstcu;
zerofirstcuflags = firstcuflags;
- /* For caseless UTF mode, check whether this character has more than
- one other case. If so, generate a special OP_NOTPROP item instead of
+ /* For caseless UTF or UCP mode, check whether this character has more
+ than one other case. If so, generate a special OP_NOTPROP item instead of
OP_NOTI. */
#ifdef SUPPORT_UNICODE
- if (utf && (options & PCRE2_CASELESS) != 0 &&
+ if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 &&
(d = UCD_CASESET(c)) != 0)
{
*code++ = OP_NOTPROP;
@@ -5597,7 +5603,7 @@ for (;; pptr++)
uint32_t d;
#ifdef SUPPORT_UNICODE
- if (utf && c > 127) d = UCD_OTHERCASE(c); else
+ if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else
#endif
{
#if PCRE2_CODE_UNIT_WIDTH != 8
@@ -6671,23 +6677,11 @@ for (;; pptr++)
}
/* For a back reference, update the back reference map and the
- maximum back reference. Then, for each group, we must check to
- see if it is recursive, that is, it is inside the group that it
- references. A flag is set so that the group can be made atomic.
- */
+ maximum back reference. */
cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
if (groupnumber > cb->top_backref)
cb->top_backref = groupnumber;
-
- for (oc = cb->open_caps; oc != NULL; oc = oc->next)
- {
- if (oc->number == groupnumber)
- {
- oc->flag = TRUE;
- break;
- }
- }
}
}
@@ -7081,15 +7075,18 @@ for (;; pptr++)
previous[GET(previous, 1)] != OP_ALT)
goto END_REPEAT;
- /* There is no sense in actually repeating assertions. The only
- potential use of repetition is in cases when the assertion is optional.
- Therefore, if the minimum is greater than zero, just ignore the repeat.
- If the maximum is not zero or one, set it to 1. */
+ /* Perl allows all assertions to be quantified, and when they contain
+ capturing parentheses and/or are optional there are potential uses for
+ this feature. PCRE2 used to force the maximum quantifier to 1 on the
+ invalid grounds that further repetition was never useful. This was
+ always a bit pointless, since an assertion could be wrapped with a
+ repeated group to achieve the effect. General repetition is now
+ permitted, but if the maximum is unlimited it is set to one more than
+ the minimum. */
if (op_previous < OP_ONCE) /* Assertion */
{
- if (repeat_min > 0) goto END_REPEAT;
- if (repeat_max > 1) repeat_max = 1;
+ if (repeat_max == REPEAT_UNLIMITED) repeat_max = repeat_min + 1;
}
/* The case of a zero minimum is special because of the need to stick
@@ -7682,19 +7679,6 @@ for (;; pptr++)
cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1;
if (meta_arg > cb->top_backref) cb->top_backref = meta_arg;
-
- /* Check to see if this back reference is recursive, that it, it
- is inside the group that it references. A flag is set so that the
- group can be made atomic. */
-
- for (oc = cb->open_caps; oc != NULL; oc = oc->next)
- {
- if (oc->number == meta_arg)
- {
- oc->flag = TRUE;
- break;
- }
- }
break;
@@ -7840,11 +7824,12 @@ for (;; pptr++)
NORMAL_CHAR_SET: /* Character is already in meta */
matched_char = TRUE;
- /* For caseless UTF mode, check whether this character has more than one
- other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
+ /* For caseless UTF or UCP mode, check whether this character has more than
+ one other case. If so, generate a special OP_PROP item instead of OP_CHARI.
+ */
#ifdef SUPPORT_UNICODE
- if (utf && (options & PCRE2_CASELESS) != 0)
+ if ((utf||ucp) && (options & PCRE2_CASELESS) != 0)
{
uint32_t caseset = UCD_CASESET(meta);
if (caseset != 0)
@@ -8053,7 +8038,6 @@ if (*code == OP_CBRA)
capnumber = GET2(code, 1 + LINK_SIZE);
capitem.number = capnumber;
capitem.next = cb->open_caps;
- capitem.flag = FALSE;
capitem.assert_depth = cb->assert_depth;
cb->open_caps = &capitem;
}
@@ -8182,26 +8166,9 @@ for (;;)
PUT(code, 1, (int)(code - start_bracket));
code += 1 + LINK_SIZE;
- /* If it was a capturing subpattern, check to see if it contained any
- recursive back references. If so, we must wrap it in atomic brackets. In
- any event, remove the block from the chain. */
+ /* If it was a capturing subpattern, remove the block from the chain. */
- if (capnumber > 0)
- {
- if (cb->open_caps->flag)
- {
- (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
- CU2BYTES(code - start_bracket));
- *start_bracket = OP_ONCE;
- code += 1 + LINK_SIZE;
- PUT(start_bracket, 1, (int)(code - start_bracket));
- *code = OP_KET;
- PUT(code, 1, (int)(code - start_bracket));
- code += 1 + LINK_SIZE;
- length += 2 + 2*LINK_SIZE;
- }
- cb->open_caps = cb->open_caps->next;
- }
+ if (capnumber > 0) cb->open_caps = cb->open_caps->next;
/* Set values to pass back */
@@ -8836,9 +8803,10 @@ memset(slot + IMM2_SIZE + length, 0,
/* This function is called to skip parts of the parsed pattern when finding the
length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find
-the end of the branch, it is called to skip over an internal lookaround, and it
-is also called to skip to the end of a class, during which it will never
-encounter nested groups (but there's no need to have special code for that).
+the end of the branch, it is called to skip over an internal lookaround or
+(DEFINE) group, and it is also called to skip to the end of a class, during
+which it will never encounter nested groups (but there's no need to have
+special code for that).
When called to find the end of a branch or group, pptr must point to the first
meta code inside the branch, not the branch-starting code. In other cases it
@@ -9316,14 +9284,21 @@ for (;; pptr++)
itemlength = grouplength;
break;
- /* Check nested groups - advance past the initial data for each type and
- then seek a fixed length with get_grouplength(). */
+ /* A (DEFINE) group is never obeyed inline and so it does not contribute to
+ the length of this branch. Skip from the following item to the next
+ unpaired ket. */
+
+ case META_COND_DEFINE:
+ pptr = parsed_skip(pptr + 1, PSKIP_KET);
+ break;
+
+ /* Check other nested groups - advance past the initial data for each type
+ and then seek a fixed length with get_grouplength(). */
case META_COND_NAME:
case META_COND_NUMBER:
case META_COND_RNAME:
case META_COND_RNUMBER:
- case META_COND_DEFINE:
pptr += 2 + SIZEOFFSET;
goto CHECK_GROUP;
@@ -9580,6 +9555,10 @@ for (; *pptr != META_END; pptr++)
break;
case META_COND_DEFINE:
+ pptr += SIZEOFFSET;
+ nestlevel++;
+ break;
+
case META_COND_NAME:
case META_COND_NUMBER:
case META_COND_RNAME:
@@ -9660,6 +9639,7 @@ pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{
BOOL utf; /* Set TRUE for UTF mode */
+BOOL ucp; /* Set TRUE for UCP mode */
BOOL has_lookbehind = FALSE; /* Set TRUE if a lookbehind is found */
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
pcre2_real_code *re = NULL; /* What we will return */
@@ -9947,8 +9927,8 @@ if (utf)
/* Check UCP lockout. */
-if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
- (PCRE2_UCP|PCRE2_NEVER_UCP))
+ucp = (cb.external_options & PCRE2_UCP) != 0;
+if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0)
{
errorcode = ERR75;
goto HAD_EARLY_ERROR;
@@ -10324,7 +10304,7 @@ function call. */
if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
{
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
- if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
+ if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80;
}
/* Failed to compile, or error while post-processing. */
@@ -10372,21 +10352,25 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
if ((firstcuflags & REQ_CASELESS) != 0)
{
- if (firstcu < 128 || (!utf && firstcu < 255))
+ if (firstcu < 128 || (!utf && !ucp && firstcu < 255))
{
if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS;
}
- /* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
- 8-bit UTF mode, codepoints in the range 128-255 are introductory code
- points and cannot have another case. In 16-bit and 32-bit modes, we can
- check wide characters when UTF (and therefore UCP) is supported. */
+ /* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise.
+ In 8-bit UTF mode, codepoints in the range 128-255 are introductory code
+ points and cannot have another case, but if UCP is set they may do. */
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- else if (firstcu <= MAX_UTF_CODE_POINT &&
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ else if (ucp && !utf && UCD_OTHERCASE(firstcu) != firstcu)
+ re->flags |= PCRE2_FIRSTCASELESS;
+#else
+ else if ((utf || ucp) && firstcu <= MAX_UTF_CODE_POINT &&
UCD_OTHERCASE(firstcu) != firstcu)
re->flags |= PCRE2_FIRSTCASELESS;
#endif
+#endif /* SUPPORT_UNICODE */
}
}
@@ -10435,14 +10419,20 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
if ((reqcuflags & REQ_CASELESS) != 0)
{
- if (reqcu < 128 || (!utf && reqcu < 255))
+ if (reqcu < 128 || (!utf && !ucp && reqcu < 255))
{
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
}
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
- re->flags |= PCRE2_LASTCASELESS;
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ else if (ucp && !utf && UCD_OTHERCASE(reqcu) != reqcu)
+ re->flags |= PCRE2_LASTCASELESS;
+#else
+ else if ((utf || ucp) && reqcu <= MAX_UTF_CODE_POINT &&
+ UCD_OTHERCASE(reqcu) != reqcu)
+ re->flags |= PCRE2_LASTCASELESS;
#endif
+#endif /* SUPPORT_UNICODE */
}
}
}
diff --git a/src/pcre2_config.c b/src/pcre2_config.c
index e487b10..5ef103c 100644
--- a/src/pcre2_config.c
+++ b/src/pcre2_config.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
-its value gets changed by pcre2_internal.h to be in code units. */
+its value gets changed by pcre2_intmodedep.h (included by pcre2_internal.h) to
+be in code units. */
static int configured_link_size = LINK_SIZE;
@@ -94,6 +95,7 @@ if (where == NULL) /* Requests a length */
case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_PARENSLIMIT:
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
+ case PCRE2_CONFIG_TABLES_LENGTH:
case PCRE2_CONFIG_UNICODE:
return sizeof(uint32_t);
@@ -191,6 +193,10 @@ switch (what)
*((uint32_t *)where) = 0;
break;
+ case PCRE2_CONFIG_TABLES_LENGTH:
+ *((uint32_t *)where) = TABLES_LENGTH;
+ break;
+
case PCRE2_CONFIG_UNICODE_VERSION:
{
#if defined SUPPORT_UNICODE
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 7d8ffe8..625695b 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -548,6 +548,7 @@ PCRE2_SPTR start_code = mb->start_code;
#ifdef SUPPORT_UNICODE
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
+BOOL utf_or_ucp = utf || (mb->poptions & PCRE2_UCP) != 0;
#else
BOOL utf = FALSE;
#endif
@@ -2190,7 +2191,7 @@ for (;;)
if (clen == 0) break;
#ifdef SUPPORT_UNICODE
- if (utf)
+ if (utf_or_ucp)
{
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
{
@@ -2204,7 +2205,7 @@ for (;;)
}
else
#endif /* SUPPORT_UNICODE */
- /* Not UTF mode */
+ /* Not UTF or UCP mode */
{
if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
{ ADD_NEW(state_offset + 2, 0); }
@@ -2339,7 +2340,7 @@ for (;;)
{
uint32_t otherd;
#ifdef SUPPORT_UNICODE
- if (utf && d >= 128)
+ if (utf_or_ucp && d >= 128)
otherd = UCD_OTHERCASE(d);
else
#endif /* SUPPORT_UNICODE */
@@ -2374,7 +2375,7 @@ for (;;)
if (caseless)
{
#ifdef SUPPORT_UNICODE
- if (utf && d >= 128)
+ if (utf_or_ucp && d >= 128)
otherd = UCD_OTHERCASE(d);
else
#endif /* SUPPORT_UNICODE */
@@ -2417,7 +2418,7 @@ for (;;)
if (caseless)
{
#ifdef SUPPORT_UNICODE
- if (utf && d >= 128)
+ if (utf_or_ucp && d >= 128)
otherd = UCD_OTHERCASE(d);
else
#endif /* SUPPORT_UNICODE */
@@ -2458,7 +2459,7 @@ for (;;)
if (caseless)
{
#ifdef SUPPORT_UNICODE
- if (utf && d >= 128)
+ if (utf_or_ucp && d >= 128)
otherd = UCD_OTHERCASE(d);
else
#endif /* SUPPORT_UNICODE */
@@ -2491,7 +2492,7 @@ for (;;)
if (caseless)
{
#ifdef SUPPORT_UNICODE
- if (utf && d >= 128)
+ if (utf_or_ucp && d >= 128)
otherd = UCD_OTHERCASE(d);
else
#endif /* SUPPORT_UNICODE */
@@ -2531,7 +2532,7 @@ for (;;)
if (caseless)
{
#ifdef SUPPORT_UNICODE
- if (utf && d >= 128)
+ if (utf_or_ucp && d >= 128)
otherd = UCD_OTHERCASE(d);
else
#endif /* SUPPORT_UNICODE */
@@ -3526,10 +3527,15 @@ if ((re->flags & PCRE2_FIRSTSET) != 0)
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
{
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (utf && first_cu > 127)
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
+ first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
+#else
+ if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
#endif
+#endif /* SUPPORT_UNICODE */
}
}
else
@@ -3545,9 +3551,15 @@ if ((re->flags & PCRE2_LASTSET) != 0)
if ((re->flags & PCRE2_LASTCASELESS) != 0)
{
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (utf && req_cu > 127) req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
+ req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
+#else
+ if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
+ req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
#endif
+#endif /* SUPPORT_UNICODE */
}
}
diff --git a/src/dftables.c b/src/pcre2_dftables.c
index 02796cc..1c059e2 100644
--- a/src/dftables.c
+++ b/src/pcre2_dftables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -40,9 +40,12 @@ POSSIBILITY OF SUCH DAMAGE.
/* This is a freestanding support program to generate a file containing
-character tables for PCRE2. The tables are built according to the current
-locale using the pcre2_maketables() function, which is part of the PCRE2 API.
-*/
+character tables for PCRE2. The tables are built using the pcre2_maketables()
+function, which is part of the PCRE2 API. By default, the system's "C" locale
+is used rather than what the building user happens to have set, but the -L
+option can be used to select the current locale from the LC_ALL environment
+variable. By default, the tables are written in source form, but if -b is
+given, they are written in binary. */
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -56,73 +59,153 @@ locale using the pcre2_maketables() function, which is part of the PCRE2 API.
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
#include "pcre2_internal.h"
-#define DFTABLES /* pcre2_maketables.c notices this */
+#define PCRE2_DFTABLES /* pcre2_maketables.c notices this */
#include "pcre2_maketables.c"
+
+static char *classlist[] =
+ {
+ "space", "xdigit", "digit", "upper", "lower",
+ "word", "graph", "print", "punct", "cntrl"
+ };
+
+
+
+/*************************************************
+* Usage *
+*************************************************/
+
+static void
+usage(void)
+{
+(void)fprintf(stderr,
+ "Usage: pcre2_dftables [options] <output file>\n"
+ " -b Write output in binary (default is source code)\n"
+ " -L Use locale from LC_ALL (default is \"C\" locale)\n"
+ );
+}
+
+
+
+/*************************************************
+* Entry point *
+*************************************************/
+
int main(int argc, char **argv)
{
FILE *f;
-int i = 1;
+int i;
+int nclass = 0;
+BOOL binary = FALSE;
+char *env = "C";
const unsigned char *tables;
const unsigned char *base_of_tables;
-/* By default, the default C locale is used rather than what the building user
-happens to have set. However, if the -L option is given, set the locale from
-the LC_xxx environment variables. */
+/* Process options */
-if (argc > 1 && strcmp(argv[1], "-L") == 0)
+for (i = 1; i < argc; i++)
{
- setlocale(LC_ALL, ""); /* Set from environment variables */
- i++;
+ unsigned char *arg = (unsigned char *)argv[i];
+ if (*arg != '-') break;
+
+ if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
+ {
+ usage();
+ return 0;
+ }
+
+ else if (strcmp(arg, "-L") == 0)
+ {
+ if (setlocale(LC_ALL, "") == NULL)
+ {
+ (void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
+ return 1;
+ }
+ env = getenv("LC_ALL");
+ }
+
+ else if (strcmp(arg, "-b") == 0)
+ binary = TRUE;
+
+ else
+ {
+ (void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
+ return 1;
+ }
}
-if (argc < i + 1)
+if (i != argc - 1)
{
- fprintf(stderr, "dftables: one filename argument is required\n");
+ (void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
return 1;
}
+/* Make the tables */
+
tables = maketables();
base_of_tables = tables;
f = fopen(argv[i], "wb");
if (f == NULL)
{
- fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
+ fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
return 1;
}
-/* There are several fprintf() calls here, because gcc in pedantic mode
-complains about the very long string otherwise. */
+/* If -b was specified, we write the tables in binary. */
-fprintf(f,
+if (binary)
+ {
+ int yield = 0;
+ size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
+ if (len != TABLES_LENGTH)
+ {
+ (void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
+ "instead of %d\n", (int)len, TABLES_LENGTH);
+ yield = 1;
+ }
+ fclose(f);
+ free((void *)base_of_tables);
+ return yield;
+ }
+
+/* Write the tables as source code for inclusion in the PCRE2 library. There
+are several fprintf() calls here, because gcc in pedantic mode complains about
+the very long string otherwise. */
+
+(void)fprintf(f,
"/*************************************************\n"
"* Perl-Compatible Regular Expressions *\n"
"*************************************************/\n\n"
- "/* This file was automatically written by the dftables auxiliary\n"
+ "/* This file was automatically written by the pcre2_dftables auxiliary\n"
"program. It contains character tables that are used when no external\n"
"tables are passed to PCRE2 by the application that calls it. The tables\n"
"are used only for characters whose code values are less than 256. */\n\n");
-fprintf(f,
- "/*The dftables program (which is distributed with PCRE2) can be used to\n"
- "build alternative versions of this file. This is necessary if you are\n"
+(void)fprintf(f,
+ "/* This set of tables was written in the %s locale. */\n\n", env);
+
+(void)fprintf(f,
+ "/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
+ "to build alternative versions of this file. This is necessary if you are\n"
"running in an EBCDIC environment, or if you want to default to a different\n"
- "encoding, for example ISO-8859-1. When dftables is run, it creates these\n"
- "tables in the current locale. This happens automatically if PCRE2 is\n"
- "configured with --enable-rebuild-chartables. */\n\n");
+ "encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates\n"
+ "these tables in the \"C\" locale by default. This happens automatically if\n"
+ "PCRE2 is configured with --enable-rebuild-chartables. However, you can run\n"
+ "pcre2_dftables manually with the -L option to build tables using the LC_ALL\n"
+ "locale. */\n\n");
/* Force config.h in z/OS */
#if defined NATIVE_ZOS
-fprintf(f,
+(void)fprintf(f,
"/* For z/OS, config.h is forced */\n"
"#ifndef HAVE_CONFIG_H\n"
"#define HAVE_CONFIG_H 1\n"
"#endif\n\n");
#endif
-fprintf(f,
+(void)fprintf(f,
"/* The following #include is present because without it gcc 4.x may remove\n"
"the array definition from the final binary if PCRE2 is built into a static\n"
"library and dead code stripping is activated. This leads to link errors.\n"
@@ -130,56 +213,57 @@ fprintf(f,
"outside this compilation unit might reference this\" and so it will always\n"
"be supplied to the linker. */\n\n");
-fprintf(f,
+(void)fprintf(f,
"#ifdef HAVE_CONFIG_H\n"
"#include \"config.h\"\n"
"#endif\n\n"
"#include \"pcre2_internal.h\"\n\n");
-fprintf(f,
+(void)fprintf(f,
"const uint8_t PRIV(default_tables)[] = {\n\n"
"/* This table is a lower casing table. */\n\n");
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
-fprintf(f, ",\n\n");
+(void)fprintf(f, ",\n\n");
-fprintf(f, "/* This table is a case flipping table. */\n\n");
+(void)fprintf(f, "/* This table is a case flipping table. */\n\n");
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
-fprintf(f, ",\n\n");
+(void)fprintf(f, ",\n\n");
-fprintf(f,
+(void)fprintf(f,
"/* This table contains bit maps for various character classes. Each map is 32\n"
"bytes long and the bits run from the least significant end of each byte. The\n"
"classes that have their own maps are: space, xdigit, digit, upper, lower, word,\n"
- "graph print, punct, and cntrl. Other classes are built from combinations. */\n\n");
+ "graph, print, punct, and cntrl. Other classes are built from combinations. */\n\n");
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < cbit_length; i++)
{
if ((i & 7) == 0 && i != 0)
{
- if ((i & 31) == 0) fprintf(f, "\n");
- fprintf(f, "\n ");
+ if ((i & 31) == 0) (void)fprintf(f, "\n");
+ if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
+ (void)fprintf(f, "\n ");
}
- fprintf(f, "0x%02x", *tables++);
- if (i != cbit_length - 1) fprintf(f, ",");
+ (void)fprintf(f, "0x%02x", *tables++);
+ if (i != cbit_length - 1) (void)fprintf(f, ",");
}
-fprintf(f, ",\n\n");
+(void)fprintf(f, ",\n\n");
-fprintf(f,
+(void)fprintf(f,
"/* This table identifies various classes of character by individual bits:\n"
" 0x%02x white space character\n"
" 0x%02x letter\n"
@@ -188,32 +272,32 @@ fprintf(f,
" 0x%02x alphanumeric or '_'\n*/\n\n",
ctype_space, ctype_letter, ctype_lcletter, ctype_digit, ctype_word);
-fprintf(f, " ");
+(void)fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0)
{
- fprintf(f, " /* ");
- if (isprint(i-8)) fprintf(f, " %c -", i-8);
- else fprintf(f, "%3d-", i-8);
- if (isprint(i-1)) fprintf(f, " %c ", i-1);
- else fprintf(f, "%3d", i-1);
- fprintf(f, " */\n ");
+ (void)fprintf(f, " /* ");
+ if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
+ else (void)fprintf(f, "%3d-", i-8);
+ if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
+ else (void)fprintf(f, "%3d", i-1);
+ (void)fprintf(f, " */\n ");
}
- fprintf(f, "0x%02x", *tables++);
- if (i != 255) fprintf(f, ",");
+ (void)fprintf(f, "0x%02x", *tables++);
+ if (i != 255) (void)fprintf(f, ",");
}
-fprintf(f, "};/* ");
-if (isprint(i-8)) fprintf(f, " %c -", i-8);
- else fprintf(f, "%3d-", i-8);
-if (isprint(i-1)) fprintf(f, " %c ", i-1);
- else fprintf(f, "%3d", i-1);
-fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
+(void)fprintf(f, "};/* ");
+if (isprint(i-8)) (void)fprintf(f, " %c -", i-8);
+ else (void)fprintf(f, "%3d-", i-8);
+if (isprint(i-1)) (void)fprintf(f, " %c ", i-1);
+ else (void)fprintf(f, "%3d", i-1);
+(void)fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
fclose(f);
free((void *)base_of_tables);
return 0;
}
-/* End of dftables.c */
+/* End of pcre2_dftables.c */
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index fe8ffe5..d8fad1e 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -76,6 +76,17 @@ typedef int BOOL;
#include <valgrind/memcheck.h>
#endif
+/* -ftrivial-auto-var-init support supports initializing all local variables
+to avoid some classes of bug, but this can cause an unacceptable slowdown
+for large on-stack arrays in hot functions. This macro lets us annotate
+such arrays. */
+
+#ifdef HAVE_ATTRIBUTE_UNINITIALIZED
+#define PCRE2_KEEP_UNINITIALIZED __attribute__((uninitialized))
+#else
+#define PCRE2_KEEP_UNINITIALIZED
+#endif
+
/* Older versions of MSVC lack snprintf(). This define allows for
warning/error-free compilation and testing with MSVC compilers back to at least
MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
@@ -579,7 +590,7 @@ total length of the tables. */
#define fcc_offset 256 /* Flip case */
#define cbits_offset 512 /* Character classes */
#define ctypes_offset (cbits_offset + cbit_length) /* Character types */
-#define tables_length (ctypes_offset + 256)
+#define TABLES_LENGTH (ctypes_offset + 256)
/* -------------------- Character and string names ------------------------ */
@@ -1759,13 +1770,11 @@ typedef struct pcre2_memctl {
/* Structure for building a chain of open capturing subpatterns during
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
-encountered. This is also used to identify subpatterns that contain recursive
-back references to themselves, so that they can be made atomic. */
+encountered. */
typedef struct open_capitem {
struct open_capitem *next; /* Chain link */
uint16_t number; /* Capture number */
- uint16_t flag; /* Set TRUE if recursive back ref */
uint16_t assert_depth; /* Assertion depth when opened */
} open_capitem;
@@ -1954,7 +1963,7 @@ is available. */
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
-extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
+extern int _pcre2_auto_possessify(PCRE2_UCHAR *,
const compile_block *);
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
int *, uint32_t, uint32_t, BOOL, compile_block *);
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index f564127..33ad7e6 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -223,6 +223,12 @@ enum control_types {
type_then_trap = 1
};
+enum early_fail_types {
+ type_skip = 0,
+ type_fail = 1,
+ type_fail_range = 2
+};
+
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
/* The following structure is the key data type for the recursive
@@ -405,8 +411,8 @@ typedef struct compiler_common {
/* Fast forward skipping byte code pointer. */
PCRE2_SPTR fast_forward_bc_ptr;
/* Locals used by fast fail optimization. */
- sljit_s32 fast_fail_start_ptr;
- sljit_s32 fast_fail_end_ptr;
+ sljit_s32 early_fail_start_ptr;
+ sljit_s32 early_fail_end_ptr;
/* Flipped and lower case tables. */
const sljit_u8 *fcc;
@@ -476,7 +482,7 @@ typedef struct compiler_common {
#ifdef SUPPORT_UNICODE
BOOL utf;
BOOL invalid_utf;
- BOOL use_ucp;
+ BOOL ucp;
/* Points to saving area for iref. */
sljit_s32 iref_ptr;
jump_list *getucd;
@@ -607,6 +613,8 @@ the start pointers when the end of the capturing group has not yet reached. */
sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
+#define OP_SRC(op, src, srcw) \
+ sljit_emit_op_src(compiler, (op), (src), (srcw))
#define LABEL() \
sljit_emit_label(compiler)
#define JUMP(type) \
@@ -823,7 +831,7 @@ the start pointers when the end of the capturing group has not yet reached. */
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
{
-SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
+SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
do cc += GET(cc, 1); while (*cc == OP_ALT);
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
cc += 1 + LINK_SIZE;
@@ -833,7 +841,7 @@ return cc;
static int no_alternatives(PCRE2_SPTR cc)
{
int count = 0;
-SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
+SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
do
{
cc += GET(cc, 1);
@@ -918,6 +926,8 @@ switch(*cc)
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRA:
@@ -1050,8 +1060,7 @@ switch(*cc)
return cc + 1 + 2 + cc[1];
default:
- /* Unsupported opcodes: OP_ASSERT_NA and OP_ASSERTBACK_NA */
- /* SLJIT_UNREACHABLE(); */
+ SLJIT_UNREACHABLE();
return NULL;
}
}
@@ -1061,6 +1070,7 @@ static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPT
int count;
PCRE2_SPTR slot;
PCRE2_SPTR assert_back_end = cc - 1;
+PCRE2_SPTR assert_na_end = cc - 1;
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
while (cc < ccend)
@@ -1087,6 +1097,14 @@ while (cc < ccend)
cc += 1 + IMM2_SIZE;
break;
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
+ slot = bracketend(cc);
+ if (slot > assert_na_end)
+ assert_na_end = slot;
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_CBRAPOS:
case OP_SCBRAPOS:
common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
@@ -1154,6 +1172,9 @@ while (cc < ccend)
case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
+ if (cc < assert_na_end)
+ return FALSE;
+ /* Fall through */
case OP_MARK:
if (common->mark_ptr == 0)
{
@@ -1172,6 +1193,8 @@ while (cc < ccend)
case OP_SKIP:
if (cc < assert_back_end)
common->has_skip_in_assert_back = TRUE;
+ if (cc < assert_na_end)
+ return FALSE;
cc += 1;
break;
@@ -1180,9 +1203,19 @@ while (cc < ccend)
common->has_skip_arg = TRUE;
if (cc < assert_back_end)
common->has_skip_in_assert_back = TRUE;
+ if (cc < assert_na_end)
+ return FALSE;
cc += 1 + 2 + cc[1];
break;
+ case OP_PRUNE:
+ case OP_COMMIT:
+ case OP_ASSERT_ACCEPT:
+ if (cc < assert_na_end)
+ return FALSE;
+ cc++;
+ break;
+
default:
cc = next_opcode(common, cc);
if (cc == NULL)
@@ -1193,183 +1226,355 @@ while (cc < ccend)
return TRUE;
}
-static BOOL is_accelerated_repeat(PCRE2_SPTR cc)
+#define EARLY_FAIL_ENHANCE_MAX (1 + 1)
+
+/*
+start:
+ 0 - skip / early fail allowed
+ 1 - only early fail with range allowed
+ >1 - (start - 1) early fail is processed
+
+return: current number of iterators enhanced with fast fail
+*/
+static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
{
-switch(*cc)
- {
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
+PCRE2_SPTR next_alt;
+PCRE2_SPTR end;
+PCRE2_SPTR accelerated_start;
+int result = 0;
+int count;
+BOOL fast_forward_allowed = TRUE;
- case OP_STAR:
- case OP_MINSTAR:
- case OP_PLUS:
- case OP_MINPLUS:
- case OP_POSSTAR:
- case OP_POSPLUS:
+SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
+SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
+SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
- case OP_STARI:
- case OP_MINSTARI:
- case OP_PLUSI:
- case OP_MINPLUSI:
- case OP_POSSTARI:
- case OP_POSPLUSI:
+do
+ {
+ count = start;
+ next_alt = cc + GET(cc, 1);
+ cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
- case OP_NOTSTAR:
- case OP_NOTMINSTAR:
- case OP_NOTPLUS:
- case OP_NOTMINPLUS:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSPLUS:
+ while (TRUE)
+ {
+ accelerated_start = NULL;
- case OP_NOTSTARI:
- case OP_NOTMINSTARI:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUSI:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSPLUSI:
- return TRUE;
+ switch(*cc)
+ {
+ case OP_SOD:
+ case OP_SOM:
+ case OP_SET_SOM:
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ case OP_EODN:
+ case OP_EOD:
+ case OP_CIRC:
+ case OP_CIRCM:
+ case OP_DOLL:
+ case OP_DOLLM:
+ /* Zero width assertions. */
+ cc++;
+ continue;
+
+ case OP_NOT_DIGIT:
+ case OP_DIGIT:
+ case OP_NOT_WHITESPACE:
+ case OP_WHITESPACE:
+ case OP_NOT_WORDCHAR:
+ case OP_WORDCHAR:
+ case OP_ANY:
+ case OP_ALLANY:
+ case OP_ANYBYTE:
+ case OP_NOT_HSPACE:
+ case OP_HSPACE:
+ case OP_NOT_VSPACE:
+ case OP_VSPACE:
+ fast_forward_allowed = FALSE;
+ cc++;
+ continue;
- case OP_CLASS:
- case OP_NCLASS:
-#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR)));
-#else
- cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
+ case OP_ANYNL:
+ case OP_EXTUNI:
+ fast_forward_allowed = FALSE;
+ if (count == 0)
+ count = 1;
+ cc++;
+ continue;
+
+ case OP_NOTPROP:
+ case OP_PROP:
+ fast_forward_allowed = FALSE;
+ cc += 1 + 2;
+ continue;
+
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ fast_forward_allowed = FALSE;
+ cc += 2;
+#ifdef SUPPORT_UNICODE
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
+ continue;
- switch(*cc)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRPOSSTAR:
- case OP_CRPOSPLUS:
- return TRUE;
- }
- break;
- }
-return FALSE;
-}
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ /* The type or prop opcode is skipped in the next iteration. */
+ cc += 1;
-static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
-{
-PCRE2_SPTR cc = common->start;
-PCRE2_SPTR end;
+ if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
+ {
+ accelerated_start = cc - 1;
+ break;
+ }
-/* Skip not repeated brackets. */
-while (TRUE)
- {
- switch(*cc)
- {
- case OP_SOD:
- case OP_SOM:
- case OP_SET_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- /* Zero width assertions. */
- cc++;
- continue;
- }
+ if (count == 0)
+ count = 1;
+ fast_forward_allowed = FALSE;
+ continue;
- if (*cc != OP_BRA && *cc != OP_CBRA)
- break;
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ case OP_TYPEEXACT:
+ case OP_TYPEPOSUPTO:
+ cc += IMM2_SIZE;
+ /* Fall through */
+
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ case OP_TYPEPOSQUERY:
+ /* The type or prop opcode is skipped in the next iteration. */
+ fast_forward_allowed = FALSE;
+ if (count == 0)
+ count = 1;
+ cc += 1;
+ continue;
+
+ case OP_STAR:
+ case OP_MINSTAR:
+ case OP_PLUS:
+ case OP_MINPLUS:
+ case OP_POSSTAR:
+ case OP_POSPLUS:
+
+ case OP_STARI:
+ case OP_MINSTARI:
+ case OP_PLUSI:
+ case OP_MINPLUSI:
+ case OP_POSSTARI:
+ case OP_POSPLUSI:
+
+ case OP_NOTSTAR:
+ case OP_NOTMINSTAR:
+ case OP_NOTPLUS:
+ case OP_NOTMINPLUS:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSPLUS:
+
+ case OP_NOTSTARI:
+ case OP_NOTMINSTARI:
+ case OP_NOTPLUSI:
+ case OP_NOTMINPLUSI:
+ case OP_NOTPOSSTARI:
+ case OP_NOTPOSPLUSI:
+ accelerated_start = cc;
+ cc += 2;
+#ifdef SUPPORT_UNICODE
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ break;
- end = cc + GET(cc, 1);
- if (*end != OP_KET || PRIVATE_DATA(end) != 0)
- return FALSE;
- if (*cc == OP_CBRA)
- {
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- return FALSE;
- cc += IMM2_SIZE;
- }
- cc += 1 + LINK_SIZE;
- }
+ case OP_UPTO:
+ case OP_MINUPTO:
+ case OP_EXACT:
+ case OP_POSUPTO:
+ case OP_UPTOI:
+ case OP_MINUPTOI:
+ case OP_EXACTI:
+ case OP_POSUPTOI:
+ case OP_NOTUPTO:
+ case OP_NOTMINUPTO:
+ case OP_NOTEXACT:
+ case OP_NOTPOSUPTO:
+ case OP_NOTUPTOI:
+ case OP_NOTMINUPTOI:
+ case OP_NOTEXACTI:
+ case OP_NOTPOSUPTOI:
+ cc += IMM2_SIZE;
+ /* Fall through */
+
+ case OP_QUERY:
+ case OP_MINQUERY:
+ case OP_POSQUERY:
+ case OP_QUERYI:
+ case OP_MINQUERYI:
+ case OP_POSQUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTMINQUERY:
+ case OP_NOTPOSQUERY:
+ case OP_NOTQUERYI:
+ case OP_NOTMINQUERYI:
+ case OP_NOTPOSQUERYI:
+ fast_forward_allowed = FALSE;
+ if (count == 0)
+ count = 1;
+ cc += 2;
+#ifdef SUPPORT_UNICODE
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ continue;
-if (is_accelerated_repeat(cc))
- {
- common->fast_forward_bc_ptr = cc;
- common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
- *private_data_start += sizeof(sljit_sw);
- return TRUE;
- }
-return FALSE;
-}
+ case OP_CLASS:
+ case OP_NCLASS:
+#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
+ case OP_XCLASS:
+ accelerated_start = cc;
+ cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
+#else
+ accelerated_start = cc;
+ cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
+#endif
-static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
-{
- PCRE2_SPTR next_alt;
+ switch (*cc)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ cc++;
+ break;
- SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
+ cc += 2 * IMM2_SIZE;
+ /* Fall through */
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ case OP_CRPOSQUERY:
+ cc++;
+ if (count == 0)
+ count = 1;
+ /* Fall through */
+ default:
+ accelerated_start = NULL;
+ fast_forward_allowed = FALSE;
+ break;
+ }
+ continue;
- if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- return;
+ case OP_ONCE:
+ case OP_BRA:
+ case OP_CBRA:
+ end = cc + GET(cc, 1);
- next_alt = bracketend(cc) - (1 + LINK_SIZE);
- if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
- return;
+ if (*end == OP_KET && PRIVATE_DATA(end) == 0)
+ {
+ if (*cc == OP_CBRA)
+ {
+ if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ break;
+ cc += IMM2_SIZE;
+ }
- do
- {
- next_alt = cc + GET(cc, 1);
+ cc += 1 + LINK_SIZE;
+ continue;
+ }
- cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
+ fast_forward_allowed = FALSE;
+ if (depth >= 4)
+ break;
- while (TRUE)
- {
- switch(*cc)
+ end = bracketend(cc) - (1 + LINK_SIZE);
+ if (*end != OP_KET || PRIVATE_DATA(end) != 0)
+ break;
+
+ if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ break;
+
+ count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
+ if (count < EARLY_FAIL_ENHANCE_MAX)
{
- case OP_SOD:
- case OP_SOM:
- case OP_SET_SOM:
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- case OP_EODN:
- case OP_EOD:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_DOLL:
- case OP_DOLLM:
- /* Zero width assertions. */
- cc++;
+ cc = end + (1 + LINK_SIZE);
continue;
}
break;
- }
- if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
- detect_fast_fail(common, cc, private_data_start, depth - 1);
+ case OP_KET:
+ SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
+ if (cc >= next_alt)
+ break;
+ cc += 1 + LINK_SIZE;
+ continue;
+ }
- if (is_accelerated_repeat(cc))
+ if (accelerated_start != NULL)
{
- common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
+ if (count == 0)
+ {
+ count++;
+
+ if (fast_forward_allowed && *next_alt == OP_KET)
+ {
+ common->fast_forward_bc_ptr = accelerated_start;
+ common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
+ *private_data_start += sizeof(sljit_sw);
+ }
+ else
+ {
+ common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
- if (common->fast_fail_start_ptr == 0)
- common->fast_fail_start_ptr = *private_data_start;
+ if (common->early_fail_start_ptr == 0)
+ common->early_fail_start_ptr = *private_data_start;
- *private_data_start += sizeof(sljit_sw);
- common->fast_fail_end_ptr = *private_data_start;
+ *private_data_start += sizeof(sljit_sw);
+ common->early_fail_end_ptr = *private_data_start;
- if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
- return;
+ if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
+ return EARLY_FAIL_ENHANCE_MAX;
+ }
+ }
+ else
+ {
+ common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
+
+ if (common->early_fail_start_ptr == 0)
+ common->early_fail_start_ptr = *private_data_start;
+
+ *private_data_start += 2 * sizeof(sljit_sw);
+ common->early_fail_end_ptr = *private_data_start;
+
+ if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
+ return EARLY_FAIL_ENHANCE_MAX;
+ }
+
+ count++;
+
+ if (count < EARLY_FAIL_ENHANCE_MAX)
+ continue;
}
- cc = next_alt;
+ break;
}
- while (*cc == OP_ALT);
+
+ if (*cc != OP_ALT && *cc != OP_KET)
+ result = EARLY_FAIL_ENHANCE_MAX;
+ else if (result < count)
+ result = count;
+
+ fast_forward_allowed = FALSE;
+ cc = next_alt;
+ }
+while (*cc == OP_ALT);
+
+return result;
}
static int get_class_iterator_size(PCRE2_SPTR cc)
@@ -1586,6 +1791,8 @@ while (cc < ccend)
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRAPOS:
@@ -2163,6 +2370,8 @@ while (cc < ccend)
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRAPOS:
@@ -2487,6 +2696,8 @@ while (cc < ccend)
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRAPOS:
@@ -2660,8 +2871,8 @@ while (cc < ccend)
}
if (common->control_head_ptr != 0 && !control_head_found)
{
- shared_srcw[0] = common->control_head_ptr;
- shared_count = 1;
+ private_srcw[0] = common->control_head_ptr;
+ private_count = 1;
control_head_found = TRUE;
}
cc += 1 + 2 + cc[1];
@@ -2671,8 +2882,8 @@ while (cc < ccend)
SLJIT_ASSERT(common->control_head_ptr != 0);
if (!control_head_found)
{
- shared_srcw[0] = common->control_head_ptr;
- shared_count = 1;
+ private_srcw[0] = common->control_head_ptr;
+ private_count = 1;
control_head_found = TRUE;
}
cc++;
@@ -2756,7 +2967,7 @@ PCRE2_SPTR end = bracketend(cc);
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
/* Assert captures then. */
-if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
+if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
current_offset = NULL;
/* Conditional block does not. */
if (*cc == OP_COND || *cc == OP_SCOND)
@@ -2768,7 +2979,7 @@ if (has_alternatives)
while (cc < end)
{
- if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
+ if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
cc = set_then_offsets(common, cc, current_offset);
else
{
@@ -2938,16 +3149,54 @@ else
}
}
-static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
+static SLJIT_INLINE void reset_early_fail(compiler_common *common)
{
DEFINE_COMPILER;
+sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
+sljit_u32 uncleared_size;
+sljit_s32 src = SLJIT_IMM;
sljit_s32 i;
+struct sljit_label *loop;
+
+SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
-SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
+if (size == sizeof(sljit_sw))
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
+ return;
+ }
+
+if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
+ {
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
+ src = TMP3;
+ }
+
+if (size <= 6 * sizeof(sljit_sw))
+ {
+ for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
+ return;
+ }
+
+GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
+
+uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
-OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
+OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
+
+loop = LABEL();
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
+CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
+
+if (uncleared_size >= sizeof(sljit_sw))
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
+
+if (uncleared_size >= 2 * sizeof(sljit_sw))
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
}
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
@@ -3193,16 +3442,19 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR
unsigned int c;
#ifdef SUPPORT_UNICODE
-if (common->utf)
+if (common->utf || common->ucp)
{
- GETCHAR(c, cc);
- if (c > 127)
+ if (common->utf)
{
- return c != UCD_OTHERCASE(c);
+ GETCHAR(c, cc);
}
-#if PCRE2_CODE_UNIT_WIDTH != 8
+ else
+ c = *cc;
+
+ if (c > 127)
+ return c != UCD_OTHERCASE(c);
+
return common->fcc[c] != c;
-#endif
}
else
#endif
@@ -3214,10 +3466,8 @@ static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigne
{
/* Returns with the othercase. */
#ifdef SUPPORT_UNICODE
-if (common->utf && c > 127)
- {
+if ((common->utf || common->ucp) && c > 127)
return UCD_OTHERCASE(c);
- }
#endif
return TABLE_GET(c, common->fcc, c);
}
@@ -3231,15 +3481,19 @@ int n;
#endif
#ifdef SUPPORT_UNICODE
-if (common->utf)
+if (common->utf || common->ucp)
{
- GETCHAR(c, cc);
+ if (common->utf)
+ {
+ GETCHAR(c, cc);
+ }
+ else
+ c = *cc;
+
if (c <= 127)
oc = common->fcc[c];
else
- {
oc = UCD_OTHERCASE(c);
- }
}
else
{
@@ -4083,7 +4337,7 @@ jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
@@ -4096,7 +4350,7 @@ jump = JUMP(SLJIT_NOT_ZERO);
/* Three byte sequence. */
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* Four byte sequence. */
JUMPHERE(jump);
@@ -4106,7 +4360,7 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfreadtype8(compiler_common *common)
@@ -4131,18 +4385,18 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(compare);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* We only have types for characters less than 256. */
JUMPHERE(jump);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfreadchar_invalid(compiler_common *common)
@@ -4182,7 +4436,7 @@ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump);
@@ -4225,7 +4479,7 @@ if (has_cmov)
}
else
exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump);
@@ -4254,7 +4508,7 @@ else
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(buffer_end_close);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
@@ -4271,7 +4525,7 @@ exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* Three-byte sequence. */
JUMPHERE(jump);
@@ -4301,7 +4555,7 @@ for (i = 0; i < 11; i++)
sljit_set_label(exit_invalid[i], exit_invalid_label);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfreadnewline_invalid(compiler_common *common)
@@ -4332,7 +4586,7 @@ if (common->nltype != NLTYPE_ANY)
JUMPHERE(jump[0]);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
- sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+ OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
return;
}
@@ -4363,14 +4617,14 @@ JUMPHERE(jump[0]);
JUMPHERE(jump[4]);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* Two byte long newline: 0x85. */
JUMPHERE(jump[1]);
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* Three byte long newlines: 0x2028 and 0x2029. */
JUMPHERE(jump[2]);
@@ -4385,7 +4639,7 @@ CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfmoveback_invalid(compiler_common *common)
@@ -4414,7 +4668,7 @@ jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* Three-byte sequence. */
JUMPHERE(jump);
@@ -4427,7 +4681,7 @@ jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* Four-byte sequence. */
JUMPHERE(jump);
@@ -4440,7 +4694,7 @@ exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
exit_ok_label = LABEL();
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
/* Two-byte sequence. */
JUMPHERE(buffer_start_close);
@@ -4470,7 +4724,7 @@ sljit_set_label(exit_invalid[5], exit_invalid_label);
sljit_set_label(exit_invalid[6], exit_invalid_label);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(exit_invalid[4]);
/* -2 + 4 = 2 */
@@ -4481,7 +4735,7 @@ for (i = 0; i < 4; i++)
sljit_set_label(exit_invalid[i], exit_invalid_label);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfpeakcharback(compiler_common *common)
@@ -4518,7 +4772,7 @@ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfpeakcharback_invalid(compiler_common *common)
@@ -4548,7 +4802,7 @@ two_byte_entry = LABEL();
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump[1]);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
@@ -4586,7 +4840,7 @@ if (has_cmov)
else
exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump[1]);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
@@ -4612,7 +4866,7 @@ else
exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump[0]);
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -4635,7 +4889,7 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump[0]);
exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
@@ -4650,7 +4904,7 @@ for (i = 0; i < 8; i++)
sljit_set_label(exit_invalid[i], exit_invalid_label);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
@@ -4680,13 +4934,13 @@ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(exit_invalid[0]);
JUMPHERE(exit_invalid[1]);
JUMPHERE(exit_invalid[2]);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfreadnewline_invalid(compiler_common *common)
@@ -4713,12 +4967,12 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(exit_invalid[0]);
JUMPHERE(exit_invalid[1]);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfmoveback_invalid(compiler_common *common)
@@ -4738,7 +4992,7 @@ exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(exit_invalid[0]);
JUMPHERE(exit_invalid[1]);
@@ -4746,7 +5000,7 @@ JUMPHERE(exit_invalid[2]);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_utfpeakcharback_invalid(compiler_common *common)
@@ -4771,14 +5025,14 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
JUMPHERE(jump);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(exit_invalid[0]);
JUMPHERE(exit_invalid[1]);
JUMPHERE(exit_invalid[2]);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
@@ -4824,7 +5078,7 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_getucdtype(compiler_common *common)
@@ -4871,7 +5125,7 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
#endif /* SUPPORT_UNICODE */
@@ -5159,6 +5413,8 @@ while (TRUE)
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
cc = bracketend(cc);
continue;
@@ -5458,7 +5714,12 @@ while (TRUE)
#endif
{
chr = *cc;
- othercase[0] = TABLE_GET(chr, common->fcc, chr);
+#ifdef SUPPORT_UNICODE
+ if (common->ucp && chr > 127)
+ othercase[0] = UCD_OTHERCASE(chr);
+ else
+#endif
+ othercase[0] = TABLE_GET(chr, common->fcc, chr);
}
}
else
@@ -5887,8 +6148,8 @@ oc = first_char;
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
{
oc = TABLE_GET(first_char, common->fcc, first_char);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (first_char > 127 && common->utf)
+#if defined SUPPORT_UNICODE
+ if (first_char > 127 && (common->utf || common->ucp))
oc = UCD_OTHERCASE(first_char);
#endif
}
@@ -6072,67 +6333,80 @@ if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
}
-static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
+static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
{
DEFINE_COMPILER;
struct sljit_label *loop;
struct sljit_jump *toolong;
-struct sljit_jump *alreadyfound;
+struct sljit_jump *already_found;
struct sljit_jump *found;
-struct sljit_jump *foundoc = NULL;
-struct sljit_jump *notfound;
+struct sljit_jump *found_oc = NULL;
+jump_list *not_found = NULL;
sljit_u32 oc, bit;
SLJIT_ASSERT(common->req_char_ptr != 0);
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
-OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
-toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
-alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
+toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
+already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
if (has_firstchar)
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
else
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
-loop = LABEL();
-notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
-
-OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
oc = req_char;
if (caseless)
{
oc = TABLE_GET(req_char, common->fcc, req_char);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (req_char > 127 && common->utf)
+#if defined SUPPORT_UNICODE
+ if (req_char > 127 && (common->utf || common->ucp))
oc = UCD_OTHERCASE(req_char);
#endif
}
-if (req_char == oc)
- found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+
+#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
+if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
+ {
+ not_found = fast_requested_char_simd(common, req_char, oc);
+ }
else
+#endif
{
- bit = req_char ^ oc;
- if (is_powerof2(bit))
- {
- OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
- found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
- }
+ loop = LABEL();
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
+
+ if (req_char == oc)
+ found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
else
{
- found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
- foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
+ bit = req_char ^ oc;
+ if (is_powerof2(bit))
+ {
+ OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
+ found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
+ }
+ else
+ {
+ found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
+ found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
+ }
}
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+ JUMPTO(SLJIT_JUMP, loop);
+
+ JUMPHERE(found);
+ if (found_oc)
+ JUMPHERE(found_oc);
}
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
-JUMPTO(SLJIT_JUMP, loop);
-JUMPHERE(found);
-if (foundoc)
- JUMPHERE(foundoc);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
-JUMPHERE(alreadyfound);
+
+JUMPHERE(already_found);
JUMPHERE(toolong);
-return notfound;
+return not_found;
}
static void do_revertframes(compiler_common *common)
@@ -6170,7 +6444,7 @@ JUMPTO(SLJIT_JUMP, mainloop);
JUMPHERE(jump);
jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
/* End of reverting values. */
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump);
OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
@@ -6240,7 +6514,7 @@ else
/* Testing char type. */
#ifdef SUPPORT_UNICODE
-if (common->use_ucp)
+if (common->ucp)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
@@ -6286,7 +6560,7 @@ peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
valid_utf = LABEL();
-if (common->use_ucp)
+if (common->ucp)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
@@ -6326,7 +6600,7 @@ set_jumps(skipread_list, LABEL());
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
-sljit_emit_fast_return(compiler, TMP1, 0);
+OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
#ifdef SUPPORT_UNICODE
if (common->invalid_utf)
@@ -6338,12 +6612,12 @@ if (common->invalid_utf)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
- sljit_emit_fast_return(compiler, TMP1, 0);
+ OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
set_jumps(invalid_utf2, LABEL());
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
- sljit_emit_fast_return(compiler, TMP1, 0);
+ OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
}
#endif /* SUPPORT_UNICODE */
}
@@ -6633,7 +6907,7 @@ if (common->utf)
#endif
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void check_hspace(compiler_common *common)
@@ -6672,7 +6946,7 @@ if (common->utf)
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void check_vspace(compiler_common *common)
@@ -6700,7 +6974,7 @@ if (common->utf)
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
-sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
+OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
}
static void do_casefulcmp(compiler_common *common)
@@ -6780,7 +7054,7 @@ if (char1_reg == STR_END)
OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
}
-sljit_emit_fast_return(compiler, TMP1, 0);
+OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
}
static void do_caselesscmp(compiler_common *common)
@@ -6878,7 +7152,7 @@ if (char2_reg == STACK_TOP)
}
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
-sljit_emit_fast_return(compiler, TMP1, 0);
+OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
}
static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
@@ -7189,7 +7463,13 @@ cc = ccbegin;
if ((cc[-1] & XCL_NOT) != 0)
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
else
+ {
+#ifdef SUPPORT_UNICODE
+ read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
+#else /* !SUPPORT_UNICODE */
read_char(common, min, max, NULL, 0);
+#endif /* SUPPORT_UNICODE */
+ }
if ((cc[-1] & XCL_HASPROP) == 0)
{
@@ -7275,16 +7555,11 @@ if (needstype || needsscript)
/* Before anything else, we deal with scripts. */
if (needsscript)
{
-// PH hacking
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
-
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
-
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
- // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
ccbegin = cc;
@@ -7328,28 +7603,19 @@ if (needstype || needsscript)
{
if (!needschar)
{
-// PH hacking
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0);
+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
-
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
-
-// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
}
else
{
-// PH hacking
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
-
+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
-
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
typereg = RETURN_ADDR;
}
@@ -8728,16 +8994,13 @@ if (common->utf && *cc == OP_REFI)
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
-// PH hacking
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
-
+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
-
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
@@ -9597,7 +9860,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
}
else
{
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
+ SLJIT_ASSERT(extrasize == 3);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
}
}
@@ -9929,7 +10193,7 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA)
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
matchingpath += IMM2_SIZE;
}
-else if (opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
+else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
{
/* Other brackets simply allocate the next entry. */
private_data_ptr = PRIVATE_DATA(ccbegin);
@@ -10114,7 +10378,7 @@ else if (opcode == OP_CBRA || opcode == OP_SCBRA)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
}
}
-else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
+else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
{
/* Saving the previous value. */
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
@@ -10240,6 +10504,9 @@ compile_matchingpath(common, matchingpath, cc, backtrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return NULL;
+if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+
if (opcode == OP_ONCE)
match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
@@ -10840,8 +11107,8 @@ backtrack_common *backtrack;
PCRE2_UCHAR opcode;
PCRE2_UCHAR type;
sljit_u32 max = 0, exact;
-BOOL fast_fail;
-sljit_s32 fast_str_ptr;
+sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
+sljit_s32 early_fail_type;
BOOL charpos_enabled;
PCRE2_UCHAR charpos_char;
unsigned int charpos_othercasebit;
@@ -10855,21 +11122,27 @@ int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
int tmp_base, tmp_offset;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+BOOL use_tmp;
+#endif
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
-fast_str_ptr = PRIVATE_DATA(cc + 1);
-fast_fail = TRUE;
+early_fail_type = (early_fail_ptr & 0x7);
+early_fail_ptr >>= 3;
-SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr);
+/* During recursion, these optimizations are disabled. */
+if (common->early_fail_start_ptr == 0)
+ {
+ early_fail_ptr = 0;
+ early_fail_type = type_skip;
+ }
-if (cc == common->fast_forward_bc_ptr)
- fast_fail = FALSE;
-else if (common->fast_fail_start_ptr == 0)
- fast_str_ptr = 0;
+SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
+ || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
-SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0
- || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr));
+if (early_fail_type == type_fail)
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
@@ -10884,13 +11157,11 @@ else
tmp_offset = POSSESSIVE0;
}
-if (fast_fail && fast_str_ptr != 0)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
-
/* Handle fixed part first. */
if (exact > 1)
{
- SLJIT_ASSERT(fast_str_ptr == 0);
+ SLJIT_ASSERT(early_fail_ptr == 0);
+
if (common->mode == PCRE2_JIT_COMPLETE
#ifdef SUPPORT_UNICODE
&& !common->utf
@@ -10915,18 +11186,31 @@ if (exact > 1)
}
}
else if (exact == 1)
+ {
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
+ if (early_fail_type == type_fail_range)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
+ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
+
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
+ }
+ }
+
switch(opcode)
{
case OP_STAR:
case OP_UPTO:
- SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR);
+ SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
if (type == OP_ANYNL || type == OP_EXTUNI)
{
SLJIT_ASSERT(private_data_ptr == 0);
- SLJIT_ASSERT(fast_str_ptr == 0);
+ SLJIT_ASSERT(early_fail_ptr == 0);
allocate_stack(common, 2);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
@@ -10945,7 +11229,7 @@ switch(opcode)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
}
- /* We cannot use TMP3 because of this allocate_stack. */
+ /* We cannot use TMP3 because of allocate_stack. */
allocate_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
JUMPTO(SLJIT_JUMP, label);
@@ -10971,8 +11255,8 @@ switch(opcode)
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
process_partial_match(common);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_END, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
}
@@ -11002,8 +11286,8 @@ switch(opcode)
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
}
@@ -11030,7 +11314,7 @@ switch(opcode)
if (charpos_enabled)
{
charpos_char = end[1];
- /* Consumpe the OP_CHAR opcode. */
+ /* Consume the OP_CHAR opcode. */
end += 2;
#if PCRE2_CODE_UNIT_WIDTH == 8
SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
@@ -11062,8 +11346,8 @@ switch(opcode)
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
}
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
JUMPHERE(jump);
detect_partial_match(common, &backtrack->topbacktracks);
@@ -11076,6 +11360,7 @@ switch(opcode)
allocate_stack(common, 2);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
+
if (opcode == OP_UPTO)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
@@ -11085,53 +11370,55 @@ switch(opcode)
/* Search the last instance of charpos_char. */
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
detect_partial_match(common, &no_match);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
if (charpos_othercasebit != 0)
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
+
if (opcode == OP_STAR)
{
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ JUMPTO(SLJIT_JUMP, label);
}
else
{
jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
JUMPHERE(jump);
- }
-
- if (opcode == OP_UPTO)
- {
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
- else
- JUMPTO(SLJIT_JUMP, label);
set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
}
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- else if (common->utf)
+ else
{
if (private_data_ptr == 0)
allocate_stack(common, 2);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+ use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
+ SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
+ if (common->utf)
+ OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
+#endif
if (opcode == OP_UPTO)
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
detect_partial_match(common, &no_match);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
+ compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+ if (common->utf)
+ OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
+#endif
if (opcode == OP_UPTO)
{
@@ -11142,39 +11429,29 @@ switch(opcode)
detect_partial_match_to(common, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
- }
+ set_jumps(no_char1_match, LABEL());
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+ if (common->utf)
+ {
+ set_jumps(no_match, LABEL());
+ if (use_tmp)
+ {
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+ OP1(SLJIT_MOV, base, offset0, TMP3, 0);
+ }
+ else
+ OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
+ }
+ else
#endif
- else
- {
- if (private_data_ptr == 0)
- allocate_stack(common, 2);
-
- OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
- if (opcode == OP_UPTO)
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
-
- detect_partial_match(common, &no_match);
- label = LABEL();
- compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
- if (opcode == OP_UPTO)
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ set_jumps(no_match, LABEL());
+ OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
}
- detect_partial_match_to(common, label);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-
- set_jumps(no_char1_match, LABEL());
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- set_jumps(no_match, LABEL());
- OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
}
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
@@ -11185,12 +11462,12 @@ switch(opcode)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
break;
case OP_MINUPTO:
- SLJIT_ASSERT(fast_str_ptr == 0);
+ SLJIT_ASSERT(early_fail_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 2);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
@@ -11200,7 +11477,7 @@ switch(opcode)
case OP_QUERY:
case OP_MINQUERY:
- SLJIT_ASSERT(fast_str_ptr == 0);
+ SLJIT_ASSERT(early_fail_ptr == 0);
if (private_data_ptr == 0)
allocate_stack(common, 1);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
@@ -11221,8 +11498,8 @@ switch(opcode)
{
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
process_partial_match(common);
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_END, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
break;
}
@@ -11238,16 +11515,17 @@ switch(opcode)
set_jumps(no_match, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
- if (fast_str_ptr != 0)
+ if (early_fail_ptr != 0)
{
- if (tmp_base == TMP3)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, TMP3, 0);
+ if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
else
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
}
break;
}
#endif
+
detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
@@ -11257,12 +11535,12 @@ switch(opcode)
set_jumps(no_char1_match, LABEL());
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_match, LABEL());
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
+ if (early_fail_ptr != 0)
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
break;
case OP_POSUPTO:
- SLJIT_ASSERT(fast_str_ptr == 0);
+ SLJIT_ASSERT(early_fail_ptr == 0);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
@@ -11298,9 +11576,6 @@ switch(opcode)
process_partial_match(common);
JUMPHERE(jump);
}
-
- if (fast_str_ptr != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
break;
}
@@ -11320,7 +11595,7 @@ switch(opcode)
break;
case OP_POSQUERY:
- SLJIT_ASSERT(fast_str_ptr == 0);
+ SLJIT_ASSERT(early_fail_ptr == 0);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
@@ -11695,6 +11970,8 @@ while (cc < ccend)
count_match(common);
break;
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRA:
@@ -12232,6 +12509,7 @@ else if (has_alternatives)
SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
+ sljit_emit_op0(compiler, SLJIT_ENDBR);
}
else
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
@@ -12298,6 +12576,9 @@ if (has_alternatives)
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
+ if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+
if (opcode == OP_SCRIPT_RUN)
match_script_run_common(common, private_data_ptr, current);
}
@@ -12379,7 +12660,10 @@ if (has_alternatives)
}
}
else
+ {
sljit_set_put_label(put_label, LABEL());
+ sljit_emit_op0(compiler, SLJIT_ENDBR);
+ }
}
COMPILE_BACKTRACKINGPATH(current->top);
@@ -12427,7 +12711,7 @@ if (offset != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
}
}
-else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
+else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
{
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
@@ -12775,6 +13059,8 @@ while (current)
compile_assert_backtrackingpath(common, current);
break;
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRA:
@@ -12872,7 +13158,7 @@ jump_list *match = NULL;
struct sljit_jump *next_alt = NULL;
struct sljit_jump *accept_exit = NULL;
struct sljit_label *quit;
-struct sljit_put_label *put_label;
+struct sljit_put_label *put_label = NULL;
/* Recurse captures then. */
common->then_trap = NULL;
@@ -12969,6 +13255,7 @@ while (1)
{
sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
sljit_set_put_label(put_label, LABEL());
+ sljit_emit_op0(compiler, SLJIT_ENDBR);
}
else
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
@@ -12977,7 +13264,10 @@ while (1)
free_stack(common, has_accept ? 2 : 1);
}
else if (alt_max > 3)
+ {
sljit_set_put_label(put_label, LABEL());
+ sljit_emit_op0(compiler, SLJIT_ENDBR);
+ }
else
{
JUMPHERE(next_alt);
@@ -13011,7 +13301,7 @@ copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
-sljit_emit_fast_return(compiler, TMP2, 0);
+OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
if (common->quit != NULL)
{
@@ -13036,7 +13326,7 @@ if (has_accept)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
- sljit_emit_fast_return(compiler, TMP2, 0);
+ OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
}
if (common->accept != NULL)
@@ -13060,7 +13350,7 @@ copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, priva
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
-sljit_emit_fast_return(compiler, TMP2, 0);
+OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
}
#undef COMPILE_BACKTRACKINGPATH
@@ -13092,9 +13382,9 @@ struct sljit_label *reset_match_label;
struct sljit_label *quit_label;
struct sljit_jump *jump;
struct sljit_jump *minlength_check_failed = NULL;
-struct sljit_jump *reqbyte_notfound = NULL;
struct sljit_jump *empty_match = NULL;
struct sljit_jump *end_anchor_failed = NULL;
+jump_list *reqcu_not_found = NULL;
SLJIT_ASSERT(tables);
@@ -13122,8 +13412,8 @@ common->read_only_data_head = NULL;
common->fcc = tables + fcc_offset;
common->lcc = (sljit_sw)(tables + lcc_offset);
common->mode = mode;
-common->might_be_empty = re->minlength == 0;
-common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY) != 0;
+common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
+common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
common->nltype = NLTYPE_FIXED;
switch(re->newline_convention)
{
@@ -13160,7 +13450,7 @@ common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
#ifdef SUPPORT_UNICODE
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
common->utf = (re->overall_options & PCRE2_UTF) != 0;
-common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
+common->ucp = (re->overall_options & PCRE2_UCP) != 0;
if (common->utf)
{
if (common->nltype == NLTYPE_ANY)
@@ -13272,13 +13562,10 @@ memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
set_private_data_ptrs(common, &private_data_size, ccend);
-if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
- {
- if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
- detect_fast_fail(common, common->start, &private_data_size, 4);
- }
+if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
+ detect_early_fail(common, common->start, &private_data_size, 0, 0);
-SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);
+SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
{
@@ -13322,8 +13609,8 @@ OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_sta
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
-if (common->fast_fail_start_ptr < common->fast_fail_end_ptr)
- reset_fast_fail(common);
+if (common->early_fail_start_ptr < common->early_fail_end_ptr)
+ reset_early_fail(common);
if (mode == PCRE2_JIT_PARTIAL_SOFT)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
@@ -13360,7 +13647,7 @@ if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PC
minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
}
if (common->req_char_ptr != 0)
- reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
+ reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
/* Store the current STR_PTR in OVECTOR(0). */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
@@ -13369,7 +13656,7 @@ OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
if (common->capture_last_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
if (common->fast_forward_bc_ptr != NULL)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
if (common->start_ptr != OVECTOR(0))
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
@@ -13416,6 +13703,8 @@ if (common->abort != NULL)
set_jumps(common->abort, common->abort_label);
if (minlength_check_failed != NULL)
SET_LABEL(minlength_check_failed, common->abort_label);
+
+sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
if (common->failed_match != NULL)
@@ -13468,7 +13757,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
}
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
- (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr);
+ (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
if ((re->overall_options & PCRE2_ANCHORED) == 0)
{
@@ -13493,8 +13782,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
}
/* No more remaining characters. */
-if (reqbyte_notfound != NULL)
- JUMPHERE(reqbyte_notfound);
+if (reqcu_not_found != NULL)
+ set_jumps(reqcu_not_found, LABEL());
if (mode == PCRE2_JIT_PARTIAL_SOFT)
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
@@ -13519,8 +13808,8 @@ if (common->might_be_empty)
}
common->fast_forward_bc_ptr = NULL;
-common->fast_fail_start_ptr = 0;
-common->fast_fail_end_ptr = 0;
+common->early_fail_start_ptr = 0;
+common->early_fail_end_ptr = 0;
common->currententry = common->entries;
common->local_quit_available = TRUE;
quit_label = common->quit_label;
@@ -13563,7 +13852,7 @@ OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
-sljit_emit_fast_return(compiler, TMP1, 0);
+OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
/* Allocation failed. */
JUMPHERE(jump);
@@ -13742,11 +14031,6 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options)
{
pcre2_real_code *re = (pcre2_real_code *)code;
-#ifdef SUPPORT_JIT
-executable_functions *functions = (executable_functions *)re->executable_jit;
-static int executable_allocator_is_working = 0;
-#endif
-
if (code == NULL)
return PCRE2_ERROR_NULL;
@@ -13779,6 +14063,11 @@ actions are needed:
avoid compiler warnings.
*/
+#ifdef SUPPORT_JIT
+executable_functions *functions = (executable_functions *)re->executable_jit;
+static int executable_allocator_is_working = 0;
+#endif
+
if ((options & PCRE2_JIT_INVALID_UTF) != 0)
{
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
diff --git a/src/pcre2_jit_misc.c b/src/pcre2_jit_misc.c
index efdb055..36abdba 100644
--- a/src/pcre2_jit_misc.c
+++ b/src/pcre2_jit_misc.c
@@ -145,6 +145,11 @@ maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
if (jit_stack == NULL) return NULL;
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
+if (jit_stack->stack == NULL)
+ {
+ jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
+ return NULL;
+ }
return jit_stack;
#endif
diff --git a/src/pcre2_jit_neon_inc.h b/src/pcre2_jit_neon_inc.h
index 55b1f32..66373b6 100644
--- a/src/pcre2_jit_neon_inc.h
+++ b/src/pcre2_jit_neon_inc.h
@@ -117,11 +117,16 @@ PCRE2_UCHAR char2a = ic.c.c3;
# ifdef FFCPS_CHAR1A2A
cmp1a = VDUPQ(char1a);
cmp2a = VDUPQ(char2a);
+cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
+cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
# else
PCRE2_UCHAR char1b = ic.c.c2;
PCRE2_UCHAR char2b = ic.c.c4;
if (char1a == char1b)
+ {
cmp1a = VDUPQ(char1a);
+ cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
+ }
else
{
sljit_u32 bit1 = char1a ^ char1b;
@@ -140,7 +145,10 @@ else
}
if (char2a == char2b)
+ {
cmp2a = VDUPQ(char2a);
+ cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
+ }
else
{
sljit_u32 bit2 = char2a ^ char2b;
@@ -208,8 +216,16 @@ if (p1 < str_ptr)
else
data2 = shift_left_n_lanes(data, offs1 - offs2);
-data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
-data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
+if (compare1_type == compare_match1)
+ data = VCEQQ(data, cmp1a);
+else
+ data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
+
+if (compare2_type == compare_match1)
+ data2 = VCEQQ(data2, cmp2a);
+else
+ data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
+
vect_t eq = VANDQ(data, data2);
#endif
@@ -275,8 +291,14 @@ while (str_ptr < str_end)
data = VCEQQ(data, cmp1a);
data2 = VCEQQ(data2, cmp2a);
# else
- data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
- data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
+ if (compare1_type == compare_match1)
+ data = VCEQQ(data, cmp1a);
+ else
+ data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
+ if (compare2_type == compare_match1)
+ data2 = VCEQQ(data2, cmp2a);
+ else
+ data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
# endif
eq = VANDQ(data, data2);
diff --git a/src/pcre2_jit_simd_inc.h b/src/pcre2_jit_simd_inc.h
index f7d56b2..5673d33 100644
--- a/src/pcre2_jit_simd_inc.h
+++ b/src/pcre2_jit_simd_inc.h
@@ -344,6 +344,136 @@ if (common->utf && offset > 0)
#endif
}
+#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
+
+static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+struct sljit_jump *quit;
+jump_list *not_found = NULL;
+sse2_compare_type compare_type = sse2_compare_match1;
+sljit_u8 instruction[8];
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data_ind = 0;
+sljit_s32 tmp_ind = 1;
+sljit_s32 cmp1_ind = 2;
+sljit_s32 cmp2_ind = 3;
+sljit_u32 bit = 0;
+int i;
+
+if (char1 != char2)
+ {
+ bit = char1 ^ char2;
+ compare_type = sse2_compare_match1i;
+
+ if (!is_powerof2(bit))
+ {
+ bit = 0;
+ compare_type = sse2_compare_match2;
+ }
+ }
+
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
+OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
+
+/* First part (unaligned start) */
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
+
+SLJIT_ASSERT(tmp1_reg_ind < 8);
+
+/* MOVD xmm, r/m32 */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+instruction[2] = 0x6e;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (char1 != char2)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
+
+ /* MOVD xmm, r/m32 */
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
+
+/* PSHUFD xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x70;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
+instruction[4] = 0;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+if (char1 != char2)
+ {
+ /* PSHUFD xmm1, xmm2/m128, imm8 */
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
+ sljit_emit_op_custom(compiler, instruction, 5);
+ }
+
+OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
+
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
+for (i = 0; i < 4; i++)
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+/* Second part (aligned) */
+start = LABEL();
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
+for (i = 0; i < 4; i++)
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
+
+JUMPHERE(quit);
+
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+
+OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
+return not_found;
+}
+
#ifndef _WIN64
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index a9b3880..a29fffa 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -638,6 +638,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
+ { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
/* Conditional blocks. */
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
@@ -859,6 +860,8 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
+ { MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
+ { MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
/* Recurse and control verbs. */
{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
@@ -1353,10 +1356,11 @@ static int regression_tests(void)
ovector8_1[i] = -2;
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
ovector8_2[i] = -2;
+ pcre2_set_match_limit_8(mcontext8, 10000000);
}
if (re8) {
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
- current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1392,6 +1396,7 @@ static int regression_tests(void)
ovector16_1[i] = -2;
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
ovector16_2[i] = -2;
+ pcre2_set_match_limit_16(mcontext16, 10000000);
}
if (re16) {
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
@@ -1400,7 +1405,7 @@ static int regression_tests(void)
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
- current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1436,6 +1441,7 @@ static int regression_tests(void)
ovector32_1[i] = -2;
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
ovector32_2[i] = -2;
+ pcre2_set_match_limit_32(mcontext32, 10000000);
}
if (re32) {
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
@@ -1444,7 +1450,7 @@ static int regression_tests(void)
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
- current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL);
+ current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@@ -1962,6 +1968,8 @@ static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cas
{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
+ { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
+
/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c
index 8c93b4b..56d2494 100644
--- a/src/pcre2_maketables.c
+++ b/src/pcre2_maketables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,11 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains the external function pcre2_maketables(), which builds
character tables for PCRE2 in the current locale. The file is compiled on its
-own as part of the PCRE2 library. However, it is also included in the
-compilation of dftables.c, in which case the macro DFTABLES is defined. */
+own as part of the PCRE2 library. It is also included in the compilation of
+pcre2_dftables.c as a freestanding program, in which case the macro
+PCRE2_DFTABLES is defined. */
-#ifndef DFTABLES
+#ifndef PCRE2_DFTABLES /* Compiling the library */
# ifdef HAVE_CONFIG_H
# include "config.h"
# endif
@@ -61,28 +62,29 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via a general context malloc, if
-supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
-program) malloc() is used, and the function has a different name so as not to
-clash with the prototype in pcre2.h.
+supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables
+freestanding auxiliary program) malloc() is used, and the function has a
+different name so as not to clash with the prototype in pcre2.h.
-Arguments: none when DFTABLES is defined
- else a PCRE2 general context or NULL
+Arguments: none when PCRE2_DFTABLES is defined
+ else a PCRE2 general context or NULL
Returns: pointer to the contiguous block of data
+ else NULL if memory allocation failed
*/
-#ifdef DFTABLES /* Included in freestanding dftables.c program */
+#ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */
static const uint8_t *maketables(void)
{
-uint8_t *yield = (uint8_t *)malloc(tables_length);
+uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH);
-#else /* Not DFTABLES, compiling the library */
+#else /* Not PCRE2_DFTABLES, that is, compiling the library */
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
pcre2_maketables(pcre2_general_context *gcontext)
{
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
- gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
- malloc(tables_length));
-#endif /* DFTABLES */
+ gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) :
+ malloc(TABLES_LENGTH));
+#endif /* PCRE2_DFTABLES */
int i;
uint8_t *p;
@@ -103,8 +105,8 @@ exclusive ones - in some locales things may be different.
Note that the table for "space" includes everything "isspace" gives, including
VT in the default locale. This makes it work for the POSIX class [:space:].
-From release 8.34 is is also correct for Perl space, because Perl added VT at
-release 5.18.
+From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl
+space, because Perl added VT at release 5.18.
Note also that it is possible for a character to be alnum or alpha without
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
@@ -114,24 +116,24 @@ test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
- if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
- if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
- if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
- if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
- if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
- if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
- if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
- if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
- if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
- if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
- if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
+ if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7);
+ if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7);
+ if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7);
+ if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7);
+ if (i == '_') p[cbit_word + i/8] |= 1u << (i&7);
+ if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7);
+ if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7);
+ if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7);
+ if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7);
+ if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7);
+ if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7);
}
p += cbit_length;
/* Finally, the character type table. In this, we used to exclude VT from the
white space chars, because Perl didn't recognize it as such for \s and for
-comments within regexes. However, Perl changed at release 5.18, so PCRE changed
-at release 8.34. */
+comments within regexes. However, Perl changed at release 5.18, so PCRE1
+changed at release 8.34 and it's always been this way for PCRE2. */
for (i = 0; i < 256; i++)
{
@@ -147,7 +149,7 @@ for (i = 0; i < 256; i++)
return yield;
}
-#ifndef DFTABLES
+#ifndef PCRE2_DFTABLES /* Compiling the library */
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
{
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 48e7b9d..11289d5 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2015-2019 University of Cambridge
+ New API code Copyright (c) 2015-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -381,8 +381,12 @@ length = Fovector[offset+1] - Fovector[offset];
if (caseless)
{
#if defined SUPPORT_UNICODE
- if ((mb->poptions & PCRE2_UTF) != 0)
+ BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
+
+ if (utf || (mb->poptions & PCRE2_UCP) != 0)
{
+ PCRE2_SPTR endptr = p + length;
+
/* Match characters up to the end of the reference. NOTE: the number of
code units matched may differ, because in UTF-8 there are some characters
whose upper and lower case codes have different numbers of bytes. For
@@ -390,16 +394,25 @@ if (caseless)
bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
sequence of two of the latter. It is important, therefore, to check the
length along the reference, not along the subject (earlier code did this
- wrong). */
+ wrong). UCP without uses Unicode properties but without UTF encoding. */
- PCRE2_SPTR endptr = p + length;
while (p < endptr)
{
uint32_t c, d;
const ucd_record *ur;
if (eptr >= mb->end_subject) return 1; /* Partial match */
- GETCHARINC(c, eptr);
- GETCHARINC(d, p);
+
+ if (utf)
+ {
+ GETCHARINC(c, eptr);
+ GETCHARINC(d, p);
+ }
+ else
+ {
+ c = *eptr++;
+ d = *p++;
+ }
+
ur = GET_UCD(d);
if (c != d && c != (uint32_t)((int)d + ur->other_case))
{
@@ -415,7 +428,7 @@ if (caseless)
else
#endif
- /* Not in UTF mode */
+ /* Not in UTF or UCP mode */
{
for (; length > 0; length--)
{
@@ -432,7 +445,8 @@ if (caseless)
}
/* In the caseful case, we can just compare the code units, whether or not we
-are in UTF mode. When partial matching, we have to do this unit-by-unit. */
+are in UTF and/or UCP mode. When partial matching, we have to do this unit by
+unit. */
else
{
@@ -574,8 +588,8 @@ match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
heapframe *F; /* Current frame pointer */
heapframe *N = NULL; /* Temporary frame pointers */
heapframe *P = NULL;
-heapframe *assert_accept_frame; /* For passing back the frame with captures */
-PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
+heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */
+PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
@@ -598,12 +612,13 @@ BOOL condition; /* Used in conditional groups */
BOOL cur_is_word; /* Used in "word" tests */
BOOL prev_is_word; /* Used in "word" tests */
-/* UTF flag */
+/* UTF and UCP flags */
#ifdef SUPPORT_UNICODE
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
+BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
#else
-BOOL utf = FALSE;
+BOOL utf = FALSE; /* Required for convenience even when no Unicode support */
#endif
/* This is the length of the last part of a backtracking frame that must be
@@ -928,6 +943,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
else
#endif
+
/* Not UTF mode */
{
if (mb->end_subject - Feptr < 1)
@@ -987,10 +1003,30 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
}
}
+
+ /* If UCP is set without UTF we must do the same as above, but with one
+ character per code unit. */
+
+ else if (ucp)
+ {
+ uint32_t cc = UCHAR21(Feptr);
+ fc = Fecode[1];
+ if (fc < 128)
+ {
+ if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
+ }
+ else
+ {
+ if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
+ }
+ Feptr++;
+ Fecode += 2;
+ }
+
else
#endif /* SUPPORT_UNICODE */
- /* Not UTF mode; use the table for characters < 256. */
+ /* Not UTF or UCP mode; use the table for characters < 256. */
{
if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
!= TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
@@ -1010,6 +1046,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
+
#ifdef SUPPORT_UNICODE
if (utf)
{
@@ -1026,15 +1063,42 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (ch > 127)
ch = UCD_OTHERCASE(ch);
else
- ch = TABLE_GET(ch, mb->fcc, ch);
+ ch = (mb->fcc)[ch];
+ if (ch == fc) RRETURN(MATCH_NOMATCH);
+ }
+ }
+
+ /* UCP without UTF is as above, but with one character per code unit. */
+
+ else if (ucp)
+ {
+ uint32_t ch;
+ fc = UCHAR21INC(Feptr);
+ ch = Fecode[1];
+ Fecode += 2;
+
+ if (ch == fc)
+ {
+ RRETURN(MATCH_NOMATCH); /* Caseful match */
+ }
+ else if (Fop == OP_NOTI) /* If caseless */
+ {
+ if (ch > 127)
+ ch = UCD_OTHERCASE(ch);
+ else
+ ch = (mb->fcc)[ch];
if (ch == fc) RRETURN(MATCH_NOMATCH);
}
}
+
else
#endif /* SUPPORT_UNICODE */
+
+ /* Neither UTF nor UCP is set */
+
{
uint32_t ch = Fecode[1];
- fc = *Feptr++;
+ fc = UCHAR21INC(Feptr);
if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
RRETURN(MATCH_NOMATCH);
Fecode += 2;
@@ -1244,7 +1308,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
#endif /* SUPPORT_UNICODE */
/* When not in UTF mode, load a single-code-unit character. Then proceed as
- above. */
+ above, using Unicode casing if either UTF or UCP is set. */
Lc = *Fecode++;
@@ -1253,11 +1317,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (Fop >= OP_STARI)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
- /* Lc must be < 128 in UTF-8 mode. */
+#ifdef SUPPORT_UNICODE
+ if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
+ else
+#endif /* SUPPORT_UNICODE */
+ /* Lc will be < 128 in UTF-8 mode. */
Loc = mb->fcc[Lc];
#else /* 16-bit & 32-bit */
#ifdef SUPPORT_UNICODE
- if (utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
+ if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
else
#endif /* SUPPORT_UNICODE */
Loc = TABLE_GET(Lc, mb->fcc, Lc);
@@ -1490,7 +1558,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (Fop >= OP_NOTSTARI) /* Caseless */
{
#ifdef SUPPORT_UNICODE
- if (utf && Lc > 127)
+ if ((utf || ucp) && Lc > 127)
Loc = UCD_OTHERCASE(Lc);
else
#endif /* SUPPORT_UNICODE */
@@ -6045,7 +6113,6 @@ BOOL firstline;
BOOL has_first_cu = FALSE;
BOOL has_req_cu = FALSE;
BOOL startline;
-BOOL utf;
#if PCRE2_CODE_UNIT_WIDTH == 8
BOOL memchr_not_found_first_cu = FALSE;
@@ -6069,13 +6136,19 @@ PCRE2_SPTR match_partial;
BOOL use_jit;
#endif
+/* This flag is needed even when Unicode is not supported for convenience
+(it is used by the IS_NEWLINE macro). */
+
+BOOL utf = FALSE;
+
#ifdef SUPPORT_UNICODE
+BOOL ucp = FALSE;
BOOL allow_invalid;
uint32_t fragment_options = 0;
#ifdef SUPPORT_JIT
BOOL jit_checked_utf = FALSE;
#endif
-#endif
+#endif /* SUPPORT_UNICODE */
PCRE2_SIZE frame_size;
@@ -6091,7 +6164,8 @@ proves to be too small, it is replaced by a larger one on the heap. To get a
vector of the size required that is aligned for pointers, allocate it as a
vector of pointers. */
-PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)];
+PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
+ PCRE2_KEEP_UNINITIALIZED;
mb->stack_frames = (heapframe *)stack_frames_vector;
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
@@ -6147,12 +6221,13 @@ use_jit = (re->executable_jit != NULL &&
(options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
#endif
-/* Initialize UTF parameters. */
+/* Initialize UTF/UCP parameters. */
-utf = (re->overall_options & PCRE2_UTF) != 0;
#ifdef SUPPORT_UNICODE
+utf = (re->overall_options & PCRE2_UTF) != 0;
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
-#endif
+ucp = (re->overall_options & PCRE2_UCP) != 0;
+#endif /* SUPPORT_UNICODE */
/* Convert the partial matching flags into an integer. */
@@ -6589,9 +6664,13 @@ if ((re->flags & PCRE2_FIRSTSET) != 0)
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
{
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
+#else
+ if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
#endif
+#endif /* SUPPORT_UNICODE */
}
}
else
@@ -6607,9 +6686,13 @@ if ((re->flags & PCRE2_LASTSET) != 0)
if ((re->flags & PCRE2_LASTCASELESS) != 0)
{
req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
+#else
+ if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
#endif
+#endif /* SUPPORT_UNICODE */
}
}
@@ -6756,15 +6839,16 @@ for(;;)
#endif
}
- /* If we can't find the required code unit, having reached the true end
- of the subject, break the bumpalong loop, to force a match failure,
- except when doing partial matching, when we let the next cycle run at
- the end of the subject. To see why, consider the pattern /(?<=abc)def/,
- which partially matches "abc", even though the string does not contain
- the starting character "d". If we have not reached the true end of the
- subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
- we also let the cycle run, because the matching string is legitimately
- allowed to start with the first code unit of a newline. */
+ /* If we can't find the required first code unit, having reached the
+ true end of the subject, break the bumpalong loop, to force a match
+ failure, except when doing partial matching, when we let the next cycle
+ run at the end of the subject. To see why, consider the pattern
+ /(?<=abc)def/, which partially matches "abc", even though the string
+ does not contain the starting character "d". If we have not reached the
+ true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
+ temporarily modified) we also let the cycle run, because the matching
+ string is legitimately allowed to start with the first code unit of a
+ newline. */
if (mb->partial == 0 && start_match >= mb->end_subject)
{
diff --git a/src/pcre2_serialize.c b/src/pcre2_serialize.c
index cec1a03..ba17a26 100644
--- a/src/pcre2_serialize.c
+++ b/src/pcre2_serialize.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -90,7 +90,7 @@ if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
/* Compute total size. */
-total_size = sizeof(pcre2_serialized_data) + tables_length;
+total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH;
tables = NULL;
for (i = 0; i < number_of_codes; i++)
@@ -121,8 +121,8 @@ data->number_of_codes = number_of_codes;
/* Copy all compiled code data. */
dst_bytes = bytes + sizeof(pcre2_serialized_data);
-memcpy(dst_bytes, tables, tables_length);
-dst_bytes += tables_length;
+memcpy(dst_bytes, tables, TABLES_LENGTH);
+dst_bytes += TABLES_LENGTH;
for (i = 0; i < number_of_codes; i++)
{
@@ -189,12 +189,12 @@ src_bytes = bytes + sizeof(pcre2_serialized_data);
/* Decode tables. The reference count for the tables is stored immediately
following them. */
-tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
+tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data);
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
-memcpy(tables, src_bytes, tables_length);
-*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
-src_bytes += tables_length;
+memcpy(tables, src_bytes, TABLES_LENGTH);
+*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes;
+src_bytes += TABLES_LENGTH;
/* Decode the byte stream. We must not try to read the size from the compiled
code block in the stream, because it might be unaligned, which causes errors on
diff --git a/src/pcre2_study.c b/src/pcre2_study.c
index 2883868..9bbb375 100644
--- a/src/pcre2_study.c
+++ b/src/pcre2_study.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */
/* Returns from set_start_bits() */
-enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
+enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };
/*************************************************
@@ -772,15 +772,19 @@ Arguments:
p points to the first code unit of the character
caseless TRUE if caseless
utf TRUE for UTF mode
+ ucp TRUE for UCP mode
Returns: pointer after the character
*/
static PCRE2_SPTR
-set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
+set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
+ BOOL ucp)
{
uint32_t c = *p++; /* First code unit */
-(void)utf; /* Stop compiler warning when UTF not supported */
+
+(void)utf; /* Stop compiler warnings when UTF not supported */
+(void)ucp;
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
0xff. */
@@ -810,22 +814,26 @@ if (utf)
if (caseless)
{
#ifdef SUPPORT_UNICODE
- if (utf)
+ if (utf || ucp)
{
-#if PCRE2_CODE_UNIT_WIDTH == 8
- PCRE2_UCHAR buff[6];
c = UCD_OTHERCASE(c);
- (void)PRIV(ord2utf)(c, buff);
- SET_BIT(buff[0]);
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (utf)
+ {
+ PCRE2_UCHAR buff[6];
+ (void)PRIV(ord2utf)(c, buff);
+ SET_BIT(buff[0]);
+ }
+ else if (c < 256) SET_BIT(c);
#else /* 16-bit or 32-bit mode */
- c = UCD_OTHERCASE(c);
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
#endif
}
+
else
#endif /* SUPPORT_UNICODE */
- /* Not UTF */
+ /* Not UTF or UCP */
if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
}
@@ -924,19 +932,26 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
must continue at the outer level to find at least one mandatory code unit. At
the outermost level, this function fails unless the result is SSB_DONE.
+We restrict recursion (for nested groups) to 1000 to avoid stack overflow
+issues.
+
Arguments:
re points to the compiled regex block
code points to an expression
utf TRUE if in UTF mode
+ ucp TRUE if in UCP mode
+ depthptr pointer to recurse depth
Returns: SSB_FAIL => Failed to find any starting code units
SSB_DONE => Found mandatory starting code units
SSB_CONTINUE => Found optional starting code units
SSB_UNKNOWN => Hit an unrecognized opcode
+ SSB_TOODEEP => Recursion is too deep
*/
static int
-set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
+set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, BOOL ucp,
+ int *depthptr)
{
uint32_t c;
int yield = SSB_DONE;
@@ -947,6 +962,9 @@ int table_limit = utf? 16:32;
int table_limit = 32;
#endif
+*depthptr += 1;
+if (*depthptr > 1000) return SSB_TOODEEP;
+
do
{
BOOL try_next = TRUE;
@@ -1103,13 +1121,17 @@ do
case OP_SCRIPT_RUN:
case OP_ASSERT:
case OP_ASSERT_NA:
- rc = set_start_bits(re, tcode, utf);
- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
- if (rc == SSB_DONE) try_next = FALSE; else
+ rc = set_start_bits(re, tcode, utf, ucp, depthptr);
+ if (rc == SSB_DONE)
+ {
+ try_next = FALSE;
+ }
+ else if (rc == SSB_CONTINUE)
{
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
}
+ else return rc; /* FAIL, UNKNOWN, or TOODEEP */
break;
/* If we hit ALT or KET, it means we haven't found anything mandatory in
@@ -1155,8 +1177,8 @@ do
case OP_BRAZERO:
case OP_BRAMINZERO:
case OP_BRAPOSZERO:
- rc = set_start_bits(re, ++tcode, utf);
- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
+ rc = set_start_bits(re, ++tcode, utf, ucp, depthptr);
+ if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
@@ -1177,7 +1199,7 @@ do
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
- tcode = set_table_bit(re, tcode + 1, FALSE, utf);
+ tcode = set_table_bit(re, tcode + 1, FALSE, utf, ucp);
break;
case OP_STARI:
@@ -1186,7 +1208,7 @@ do
case OP_QUERYI:
case OP_MINQUERYI:
case OP_POSQUERYI:
- tcode = set_table_bit(re, tcode + 1, TRUE, utf);
+ tcode = set_table_bit(re, tcode + 1, TRUE, utf, ucp);
break;
/* Single-char upto sets the bit and tries the next */
@@ -1194,13 +1216,13 @@ do
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
- tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf);
+ tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf, ucp);
break;
case OP_UPTOI:
case OP_MINUPTOI:
case OP_POSUPTOI:
- tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf);
+ tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf, ucp);
break;
/* At least one single char sets the bit and stops */
@@ -1212,7 +1234,7 @@ do
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
- (void)set_table_bit(re, tcode + 1, FALSE, utf);
+ (void)set_table_bit(re, tcode + 1, FALSE, utf, ucp);
try_next = FALSE;
break;
@@ -1223,7 +1245,7 @@ do
case OP_PLUSI:
case OP_MINPLUSI:
case OP_POSPLUSI:
- (void)set_table_bit(re, tcode + 1, TRUE, utf);
+ (void)set_table_bit(re, tcode + 1, TRUE, utf, ucp);
try_next = FALSE;
break;
@@ -1652,6 +1674,7 @@ PRIV(study)(pcre2_real_code *re)
int count = 0;
PCRE2_UCHAR *code;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
+BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;
/* Find start of compiled code */
@@ -1664,7 +1687,8 @@ code units. */
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{
- int rc = set_start_bits(re, code, utf);
+ int depth = 0;
+ int rc = set_start_bits(re, code, utf, ucp, &depth);
if (rc == SSB_UNKNOWN) return 1;
/* If a list of starting code units was set up, scan the list to see if only
@@ -1712,27 +1736,27 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
}
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
- mode, only values < 128 can be used. */
+ mode, only values < 128 can be used. In all the other cases, c is a
+ character value. */
#if PCRE2_CODE_UNIT_WIDTH == 8
- if (c > 127) goto DONE;
+ if (utf && c > 127) goto DONE;
#endif
- if (a < 0) a = c; /* First one found */
+ if (a < 0) a = c; /* First one found, save in a */
else if (b < 0) /* Second one found */
{
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
#ifdef SUPPORT_UNICODE
-#if PCRE2_CODE_UNIT_WIDTH == 8
- if (utf && UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
-#else /* 16-bit or 32-bit */
- if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
- if (utf && c > 127) d = UCD_OTHERCASE(c);
-#endif /* Code width */
+ if (utf || ucp)
+ {
+ if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
+ if (c > 127) d = UCD_OTHERCASE(c);
+ }
#endif /* SUPPORT_UNICODE */
- if (d != a) goto DONE; /* Not other case of a */
- b = c;
+ if (d != a) goto DONE; /* Not the other case of a */
+ b = c; /* Save second in b */
}
else goto DONE; /* More than two characters found */
}
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index ec3dd66..981a106 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -49,8 +49,9 @@ POSSIBILITY OF SUCH DAMAGE.
#define SUBSTITUTE_OPTIONS \
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
- PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
- PCRE2_SUBSTITUTE_UNSET_EMPTY)
+ PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
+ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
+ PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
@@ -194,6 +195,7 @@ overflow, either give an error immediately, or keep on, accumulating the
length. */
#define CHECKMEMCPY(from,length) \
+ { \
if (!overflowed && lengthleft < length) \
{ \
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
@@ -209,7 +211,8 @@ length. */
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
buff_offset += length; \
lengthleft -= length; \
- }
+ } \
+ }
/* Here's the function */
@@ -226,11 +229,14 @@ int forcecasereset = 0;
uint32_t ovector_count;
uint32_t goptions = 0;
uint32_t suboptions;
-BOOL match_data_created = FALSE;
-BOOL literal = FALSE;
+pcre2_match_data *internal_match_data = NULL;
+BOOL escaped_literal = FALSE;
BOOL overflowed = FALSE;
+BOOL use_existing_match;
+BOOL replacement_only;
#ifdef SUPPORT_UNICODE
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
+BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
#endif
PCRE2_UCHAR temp[6];
PCRE2_SPTR ptr;
@@ -248,23 +254,54 @@ lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
-/* Partial matching is not valid. This must come after setting *blength to
+/* Partial matching is not valid. This must come after setting *blength to
PCRE2_UNSET, so as not to imply an offset in the replacement. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
-/* If no match data block is provided, create one. */
+/* Check for using a match that has already happened. Note that the subject
+pointer in the match data may be NULL after a no-match. */
+
+use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
+replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
+
+/* If starting from an existing match, there must be an externally provided
+match data block. We create an internal match_data block in two cases: (a) an
+external one is not supplied (and we are not starting from an existing match);
+(b) an existing match is to be used for the first substitution. In the latter
+case, we copy the existing match into the internal block. This ensures that no
+changes are made to the existing match data block. */
if (match_data == NULL)
{
+ pcre2_general_context *gcontext;
+ if (use_existing_match) return PCRE2_ERROR_NULL;
+ gcontext = (mcontext == NULL)?
+ (pcre2_general_context *)code :
+ (pcre2_general_context *)mcontext;
+ match_data = internal_match_data =
+ pcre2_match_data_create_from_pattern(code, gcontext);
+ if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
+ }
+
+else if (use_existing_match)
+ {
pcre2_general_context *gcontext = (mcontext == NULL)?
(pcre2_general_context *)code :
(pcre2_general_context *)mcontext;
- match_data = pcre2_match_data_create_from_pattern(code, gcontext);
- if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
- match_data_created = TRUE;
+ int pairs = (code->top_bracket + 1 < match_data->oveccount)?
+ code->top_bracket + 1 : match_data->oveccount;
+ internal_match_data = pcre2_match_data_create(match_data->oveccount,
+ gcontext);
+ if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
+ memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
+ + 2*pairs*sizeof(PCRE2_SIZE));
+ match_data = internal_match_data;
}
+
+/* Remember ovector details */
+
ovector = pcre2_get_ovector_pointer(match_data);
ovector_count = pcre2_get_ovector_count(match_data);
@@ -286,7 +323,7 @@ repend = replacement + rlength;
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
{
- rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
+ rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
if (rc != 0)
{
match_data->leftchar = 0;
@@ -300,7 +337,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
suboptions = options & SUBSTITUTE_OPTIONS;
options &= ~SUBSTITUTE_OPTIONS;
-/* Copy up to the start offset */
+/* Error if the start match offset is greater than the length of the subject. */
if (start_offset > length)
{
@@ -308,9 +345,13 @@ if (start_offset > length)
rc = PCRE2_ERROR_BADOFFSET;
goto EXIT;
}
-CHECKMEMCPY(subject, start_offset);
-/* Loop for global substituting. */
+/* Copy up to the start offset, unless only the replacement is required. */
+
+if (!replacement_only) CHECKMEMCPY(subject, start_offset);
+
+/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
+match is taken from the match_data that was passed in. */
subs = 0;
do
@@ -318,7 +359,12 @@ do
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
uint32_t ptrstackptr = 0;
- rc = pcre2_match(code, subject, length, start_offset, options|goptions,
+ if (use_existing_match)
+ {
+ rc = match_data->rc;
+ use_existing_match = FALSE;
+ }
+ else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext);
#ifdef SUPPORT_UNICODE
@@ -364,44 +410,44 @@ do
#endif
}
- /* Copy what we have advanced past, reset the special global options, and
- continue to the next match. */
+ /* Copy what we have advanced past (unless not required), reset the special
+ global options, and continue to the next match. */
fraglength = start_offset - save_start;
- CHECKMEMCPY(subject + save_start, fraglength);
+ if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
goptions = 0;
continue;
}
/* Handle a successful match. Matches that use \K to end before they start
or start before the current point in the subject are not supported. */
-
+
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
-
- /* Check for the same match as previous. This is legitimate after matching an
+
+ /* Check for the same match as previous. This is legitimate after matching an
empty string that starts after the initial match offset. We have tried again
at the match point in case the pattern is one like /(?<=\G.)/ which can never
match at its starting point, so running the match achieves the bumpalong. If
we do get the same (null) match at the original match point, it isn't such a
pattern, so we now do the empty string magic. In all other cases, a repeat
match should never occur. */
-
+
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
- {
- if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
- {
- goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
- ovecsave[2] = start_offset;
- continue; /* Back to the top of the loop */
+ {
+ if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
+ {
+ goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ ovecsave[2] = start_offset;
+ continue; /* Back to the top of the loop */
}
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
- goto EXIT;
- }
-
+ goto EXIT;
+ }
+
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
@@ -412,21 +458,30 @@ do
}
subs++;
- /* Copy the text leading up to the match, and remember where the insert
- begins and how many ovector pairs are set. */
+ /* Copy the text leading up to the match (unless not required), and remember
+ where the insert begins and how many ovector pairs are set. */
if (rc == 0) rc = ovector_count;
fraglength = ovector[0] - start_offset;
- CHECKMEMCPY(subject + start_offset, fraglength);
+ if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
scb.output_offsets[0] = buff_offset;
scb.oveccount = rc;
- /* Process the replacement string. Literal mode is set by \Q, but only in
- extended mode when backslashes are being interpreted. In extended mode we
- must handle nested substrings that are to be reprocessed. */
+ /* Process the replacement string. If the entire replacement is literal, just
+ copy it with length check. */
ptr = replacement;
- for (;;)
+ if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
+ {
+ CHECKMEMCPY(ptr, rlength);
+ }
+
+ /* Within a non-literal replacement, which must be scanned character by
+ character, local literal mode can be set by \Q, but only in extended mode
+ when backslashes are being interpreted. In extended mode we must handle
+ nested substrings that are to be reprocessed. */
+
+ else for (;;)
{
uint32_t ch;
unsigned int chlen;
@@ -443,11 +498,11 @@ do
/* Handle the next character */
- if (literal)
+ if (escaped_literal)
{
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
{
- literal = FALSE;
+ escaped_literal = FALSE;
ptr += 2;
continue;
}
@@ -704,7 +759,7 @@ do
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
- if (utf)
+ if (utf || ucp)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
@@ -784,7 +839,7 @@ do
continue;
case ESC_Q:
- literal = TRUE;
+ escaped_literal = TRUE;
continue;
case 0: /* Data character */
@@ -806,7 +861,7 @@ do
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
- if (utf)
+ if (utf || ucp)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
@@ -835,53 +890,59 @@ do
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
- /* The replacement has been copied to the output, or its size has been
- remembered. Do the callout if there is one and we have done an actual
+ /* The replacement has been copied to the output, or its size has been
+ remembered. Do the callout if there is one and we have done an actual
replacement. */
-
+
if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
{
- scb.subscount = subs;
+ scb.subscount = subs;
scb.output_offsets[1] = buff_offset;
- rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
+ rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
- /* A non-zero return means cancel this substitution. Instead, copy the
+ /* A non-zero return means cancel this substitution. Instead, copy the
matched string fragment. */
if (rc != 0)
{
PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
-
+
buff_offset -= newlength;
lengthleft += newlength;
- CHECKMEMCPY(subject + ovector[0], oldlength);
-
+ if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
+
/* A negative return means do not do any more. */
-
+
if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
}
- }
-
+ }
+
/* Save the details of this match. See above for how this data is used. If we
- matched an empty string, do the magic for global matches. Finally, update the
- start offset to point to the rest of the subject string. */
-
- ovecsave[0] = ovector[0];
- ovecsave[1] = ovector[1];
+ matched an empty string, do the magic for global matches. Update the start
+ offset to point to the rest of the subject string. If we re-used an existing
+ match for the first match, switch to the internal match data block. */
+
+ ovecsave[0] = ovector[0];
+ ovecsave[1] = ovector[1];
ovecsave[2] = start_offset;
-
+
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
start_offset = ovector[1];
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
-/* Copy the rest of the subject. */
+/* Copy the rest of the subject unless not required, and terminate the output
+with a binary zero. */
+
+if (!replacement_only)
+ {
+ fraglength = length - start_offset;
+ CHECKMEMCPY(subject + start_offset, fraglength);
+ }
-fraglength = length - start_offset;
-CHECKMEMCPY(subject + start_offset, fraglength);
temp[0] = 0;
-CHECKMEMCPY(temp , 1);
+CHECKMEMCPY(temp, 1);
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
and matching has carried on after a full buffer, in order to compute the length
@@ -903,7 +964,7 @@ else
}
EXIT:
-if (match_data_created) pcre2_match_data_free(match_data);
+if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
else match_data->rc = rc;
return rc;
diff --git a/src/pcre2_tables.c b/src/pcre2_tables.c
index 25531d9..b10de45 100644
--- a/src/pcre2_tables.c
+++ b/src/pcre2_tables.c
@@ -265,6 +265,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
+#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
#define STRING_Cn0 STR_C STR_n "\0"
#define STRING_Co0 STR_C STR_o "\0"
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
@@ -275,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0"
#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
@@ -306,6 +308,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
+#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0"
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
@@ -429,6 +432,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
+#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
@@ -464,6 +468,7 @@ const char PRIV(utt_names)[] =
STRING_Chakma0
STRING_Cham0
STRING_Cherokee0
+ STRING_Chorasmian0
STRING_Cn0
STRING_Co0
STRING_Common0
@@ -474,6 +479,7 @@ const char PRIV(utt_names)[] =
STRING_Cyrillic0
STRING_Deseret0
STRING_Devanagari0
+ STRING_Dives_Akuru0
STRING_Dogra0
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
@@ -505,6 +511,7 @@ const char PRIV(utt_names)[] =
STRING_Katakana0
STRING_Kayah_Li0
STRING_Kharoshthi0
+ STRING_Khitan_Small_Script0
STRING_Khmer0
STRING_Khojki0
STRING_Khudawadi0
@@ -628,6 +635,7 @@ const char PRIV(utt_names)[] =
STRING_Xsp0
STRING_Xuc0
STRING_Xwd0
+ STRING_Yezidi0
STRING_Yi0
STRING_Z0
STRING_Zanabazar_Square0
@@ -663,176 +671,180 @@ const ucp_type_table PRIV(utt)[] = {
{ 203, PT_SC, ucp_Chakma },
{ 210, PT_SC, ucp_Cham },
{ 215, PT_SC, ucp_Cherokee },
- { 224, PT_PC, ucp_Cn },
- { 227, PT_PC, ucp_Co },
- { 230, PT_SC, ucp_Common },
- { 237, PT_SC, ucp_Coptic },
- { 244, PT_PC, ucp_Cs },
- { 247, PT_SC, ucp_Cuneiform },
- { 257, PT_SC, ucp_Cypriot },
- { 265, PT_SC, ucp_Cyrillic },
- { 274, PT_SC, ucp_Deseret },
- { 282, PT_SC, ucp_Devanagari },
- { 293, PT_SC, ucp_Dogra },
- { 299, PT_SC, ucp_Duployan },
- { 308, PT_SC, ucp_Egyptian_Hieroglyphs },
- { 329, PT_SC, ucp_Elbasan },
- { 337, PT_SC, ucp_Elymaic },
- { 345, PT_SC, ucp_Ethiopic },
- { 354, PT_SC, ucp_Georgian },
- { 363, PT_SC, ucp_Glagolitic },
- { 374, PT_SC, ucp_Gothic },
- { 381, PT_SC, ucp_Grantha },
- { 389, PT_SC, ucp_Greek },
- { 395, PT_SC, ucp_Gujarati },
- { 404, PT_SC, ucp_Gunjala_Gondi },
- { 418, PT_SC, ucp_Gurmukhi },
- { 427, PT_SC, ucp_Han },
- { 431, PT_SC, ucp_Hangul },
- { 438, PT_SC, ucp_Hanifi_Rohingya },
- { 454, PT_SC, ucp_Hanunoo },
- { 462, PT_SC, ucp_Hatran },
- { 469, PT_SC, ucp_Hebrew },
- { 476, PT_SC, ucp_Hiragana },
- { 485, PT_SC, ucp_Imperial_Aramaic },
- { 502, PT_SC, ucp_Inherited },
- { 512, PT_SC, ucp_Inscriptional_Pahlavi },
- { 534, PT_SC, ucp_Inscriptional_Parthian },
- { 557, PT_SC, ucp_Javanese },
- { 566, PT_SC, ucp_Kaithi },
- { 573, PT_SC, ucp_Kannada },
- { 581, PT_SC, ucp_Katakana },
- { 590, PT_SC, ucp_Kayah_Li },
- { 599, PT_SC, ucp_Kharoshthi },
- { 610, PT_SC, ucp_Khmer },
- { 616, PT_SC, ucp_Khojki },
- { 623, PT_SC, ucp_Khudawadi },
- { 633, PT_GC, ucp_L },
- { 635, PT_LAMP, 0 },
- { 638, PT_SC, ucp_Lao },
- { 642, PT_SC, ucp_Latin },
- { 648, PT_SC, ucp_Lepcha },
- { 655, PT_SC, ucp_Limbu },
- { 661, PT_SC, ucp_Linear_A },
- { 670, PT_SC, ucp_Linear_B },
- { 679, PT_SC, ucp_Lisu },
- { 684, PT_PC, ucp_Ll },
- { 687, PT_PC, ucp_Lm },
- { 690, PT_PC, ucp_Lo },
- { 693, PT_PC, ucp_Lt },
- { 696, PT_PC, ucp_Lu },
- { 699, PT_SC, ucp_Lycian },
- { 706, PT_SC, ucp_Lydian },
- { 713, PT_GC, ucp_M },
- { 715, PT_SC, ucp_Mahajani },
- { 724, PT_SC, ucp_Makasar },
- { 732, PT_SC, ucp_Malayalam },
- { 742, PT_SC, ucp_Mandaic },
- { 750, PT_SC, ucp_Manichaean },
- { 761, PT_SC, ucp_Marchen },
- { 769, PT_SC, ucp_Masaram_Gondi },
- { 783, PT_PC, ucp_Mc },
- { 786, PT_PC, ucp_Me },
- { 789, PT_SC, ucp_Medefaidrin },
- { 801, PT_SC, ucp_Meetei_Mayek },
- { 814, PT_SC, ucp_Mende_Kikakui },
- { 828, PT_SC, ucp_Meroitic_Cursive },
- { 845, PT_SC, ucp_Meroitic_Hieroglyphs },
- { 866, PT_SC, ucp_Miao },
- { 871, PT_PC, ucp_Mn },
- { 874, PT_SC, ucp_Modi },
- { 879, PT_SC, ucp_Mongolian },
- { 889, PT_SC, ucp_Mro },
- { 893, PT_SC, ucp_Multani },
- { 901, PT_SC, ucp_Myanmar },
- { 909, PT_GC, ucp_N },
- { 911, PT_SC, ucp_Nabataean },
- { 921, PT_SC, ucp_Nandinagari },
- { 933, PT_PC, ucp_Nd },
- { 936, PT_SC, ucp_New_Tai_Lue },
- { 948, PT_SC, ucp_Newa },
- { 953, PT_SC, ucp_Nko },
- { 957, PT_PC, ucp_Nl },
- { 960, PT_PC, ucp_No },
- { 963, PT_SC, ucp_Nushu },
- { 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
- { 992, PT_SC, ucp_Ogham },
- { 998, PT_SC, ucp_Ol_Chiki },
- { 1007, PT_SC, ucp_Old_Hungarian },
- { 1021, PT_SC, ucp_Old_Italic },
- { 1032, PT_SC, ucp_Old_North_Arabian },
- { 1050, PT_SC, ucp_Old_Permic },
- { 1061, PT_SC, ucp_Old_Persian },
- { 1073, PT_SC, ucp_Old_Sogdian },
- { 1085, PT_SC, ucp_Old_South_Arabian },
- { 1103, PT_SC, ucp_Old_Turkic },
- { 1114, PT_SC, ucp_Oriya },
- { 1120, PT_SC, ucp_Osage },
- { 1126, PT_SC, ucp_Osmanya },
- { 1134, PT_GC, ucp_P },
- { 1136, PT_SC, ucp_Pahawh_Hmong },
- { 1149, PT_SC, ucp_Palmyrene },
- { 1159, PT_SC, ucp_Pau_Cin_Hau },
- { 1171, PT_PC, ucp_Pc },
- { 1174, PT_PC, ucp_Pd },
- { 1177, PT_PC, ucp_Pe },
- { 1180, PT_PC, ucp_Pf },
- { 1183, PT_SC, ucp_Phags_Pa },
- { 1192, PT_SC, ucp_Phoenician },
- { 1203, PT_PC, ucp_Pi },
- { 1206, PT_PC, ucp_Po },
- { 1209, PT_PC, ucp_Ps },
- { 1212, PT_SC, ucp_Psalter_Pahlavi },
- { 1228, PT_SC, ucp_Rejang },
- { 1235, PT_SC, ucp_Runic },
- { 1241, PT_GC, ucp_S },
- { 1243, PT_SC, ucp_Samaritan },
- { 1253, PT_SC, ucp_Saurashtra },
- { 1264, PT_PC, ucp_Sc },
- { 1267, PT_SC, ucp_Sharada },
- { 1275, PT_SC, ucp_Shavian },
- { 1283, PT_SC, ucp_Siddham },
- { 1291, PT_SC, ucp_SignWriting },
- { 1303, PT_SC, ucp_Sinhala },
- { 1311, PT_PC, ucp_Sk },
- { 1314, PT_PC, ucp_Sm },
- { 1317, PT_PC, ucp_So },
- { 1320, PT_SC, ucp_Sogdian },
- { 1328, PT_SC, ucp_Sora_Sompeng },
- { 1341, PT_SC, ucp_Soyombo },
- { 1349, PT_SC, ucp_Sundanese },
- { 1359, PT_SC, ucp_Syloti_Nagri },
- { 1372, PT_SC, ucp_Syriac },
- { 1379, PT_SC, ucp_Tagalog },
- { 1387, PT_SC, ucp_Tagbanwa },
- { 1396, PT_SC, ucp_Tai_Le },
- { 1403, PT_SC, ucp_Tai_Tham },
- { 1412, PT_SC, ucp_Tai_Viet },
- { 1421, PT_SC, ucp_Takri },
- { 1427, PT_SC, ucp_Tamil },
- { 1433, PT_SC, ucp_Tangut },
- { 1440, PT_SC, ucp_Telugu },
- { 1447, PT_SC, ucp_Thaana },
- { 1454, PT_SC, ucp_Thai },
- { 1459, PT_SC, ucp_Tibetan },
- { 1467, PT_SC, ucp_Tifinagh },
- { 1476, PT_SC, ucp_Tirhuta },
- { 1484, PT_SC, ucp_Ugaritic },
- { 1493, PT_SC, ucp_Unknown },
- { 1501, PT_SC, ucp_Vai },
- { 1505, PT_SC, ucp_Wancho },
- { 1512, PT_SC, ucp_Warang_Citi },
- { 1524, PT_ALNUM, 0 },
- { 1528, PT_PXSPACE, 0 },
- { 1532, PT_SPACE, 0 },
- { 1536, PT_UCNC, 0 },
- { 1540, PT_WORD, 0 },
- { 1544, PT_SC, ucp_Yi },
- { 1547, PT_GC, ucp_Z },
- { 1549, PT_SC, ucp_Zanabazar_Square },
- { 1566, PT_PC, ucp_Zl },
- { 1569, PT_PC, ucp_Zp },
- { 1572, PT_PC, ucp_Zs }
+ { 224, PT_SC, ucp_Chorasmian },
+ { 235, PT_PC, ucp_Cn },
+ { 238, PT_PC, ucp_Co },
+ { 241, PT_SC, ucp_Common },
+ { 248, PT_SC, ucp_Coptic },
+ { 255, PT_PC, ucp_Cs },
+ { 258, PT_SC, ucp_Cuneiform },
+ { 268, PT_SC, ucp_Cypriot },
+ { 276, PT_SC, ucp_Cyrillic },
+ { 285, PT_SC, ucp_Deseret },
+ { 293, PT_SC, ucp_Devanagari },
+ { 304, PT_SC, ucp_Dives_Akuru },
+ { 316, PT_SC, ucp_Dogra },
+ { 322, PT_SC, ucp_Duployan },
+ { 331, PT_SC, ucp_Egyptian_Hieroglyphs },
+ { 352, PT_SC, ucp_Elbasan },
+ { 360, PT_SC, ucp_Elymaic },
+ { 368, PT_SC, ucp_Ethiopic },
+ { 377, PT_SC, ucp_Georgian },
+ { 386, PT_SC, ucp_Glagolitic },
+ { 397, PT_SC, ucp_Gothic },
+ { 404, PT_SC, ucp_Grantha },
+ { 412, PT_SC, ucp_Greek },
+ { 418, PT_SC, ucp_Gujarati },
+ { 427, PT_SC, ucp_Gunjala_Gondi },
+ { 441, PT_SC, ucp_Gurmukhi },
+ { 450, PT_SC, ucp_Han },
+ { 454, PT_SC, ucp_Hangul },
+ { 461, PT_SC, ucp_Hanifi_Rohingya },
+ { 477, PT_SC, ucp_Hanunoo },
+ { 485, PT_SC, ucp_Hatran },
+ { 492, PT_SC, ucp_Hebrew },
+ { 499, PT_SC, ucp_Hiragana },
+ { 508, PT_SC, ucp_Imperial_Aramaic },
+ { 525, PT_SC, ucp_Inherited },
+ { 535, PT_SC, ucp_Inscriptional_Pahlavi },
+ { 557, PT_SC, ucp_Inscriptional_Parthian },
+ { 580, PT_SC, ucp_Javanese },
+ { 589, PT_SC, ucp_Kaithi },
+ { 596, PT_SC, ucp_Kannada },
+ { 604, PT_SC, ucp_Katakana },
+ { 613, PT_SC, ucp_Kayah_Li },
+ { 622, PT_SC, ucp_Kharoshthi },
+ { 633, PT_SC, ucp_Khitan_Small_Script },
+ { 653, PT_SC, ucp_Khmer },
+ { 659, PT_SC, ucp_Khojki },
+ { 666, PT_SC, ucp_Khudawadi },
+ { 676, PT_GC, ucp_L },
+ { 678, PT_LAMP, 0 },
+ { 681, PT_SC, ucp_Lao },
+ { 685, PT_SC, ucp_Latin },
+ { 691, PT_SC, ucp_Lepcha },
+ { 698, PT_SC, ucp_Limbu },
+ { 704, PT_SC, ucp_Linear_A },
+ { 713, PT_SC, ucp_Linear_B },
+ { 722, PT_SC, ucp_Lisu },
+ { 727, PT_PC, ucp_Ll },
+ { 730, PT_PC, ucp_Lm },
+ { 733, PT_PC, ucp_Lo },
+ { 736, PT_PC, ucp_Lt },
+ { 739, PT_PC, ucp_Lu },
+ { 742, PT_SC, ucp_Lycian },
+ { 749, PT_SC, ucp_Lydian },
+ { 756, PT_GC, ucp_M },
+ { 758, PT_SC, ucp_Mahajani },
+ { 767, PT_SC, ucp_Makasar },
+ { 775, PT_SC, ucp_Malayalam },
+ { 785, PT_SC, ucp_Mandaic },
+ { 793, PT_SC, ucp_Manichaean },
+ { 804, PT_SC, ucp_Marchen },
+ { 812, PT_SC, ucp_Masaram_Gondi },
+ { 826, PT_PC, ucp_Mc },
+ { 829, PT_PC, ucp_Me },
+ { 832, PT_SC, ucp_Medefaidrin },
+ { 844, PT_SC, ucp_Meetei_Mayek },
+ { 857, PT_SC, ucp_Mende_Kikakui },
+ { 871, PT_SC, ucp_Meroitic_Cursive },
+ { 888, PT_SC, ucp_Meroitic_Hieroglyphs },
+ { 909, PT_SC, ucp_Miao },
+ { 914, PT_PC, ucp_Mn },
+ { 917, PT_SC, ucp_Modi },
+ { 922, PT_SC, ucp_Mongolian },
+ { 932, PT_SC, ucp_Mro },
+ { 936, PT_SC, ucp_Multani },
+ { 944, PT_SC, ucp_Myanmar },
+ { 952, PT_GC, ucp_N },
+ { 954, PT_SC, ucp_Nabataean },
+ { 964, PT_SC, ucp_Nandinagari },
+ { 976, PT_PC, ucp_Nd },
+ { 979, PT_SC, ucp_New_Tai_Lue },
+ { 991, PT_SC, ucp_Newa },
+ { 996, PT_SC, ucp_Nko },
+ { 1000, PT_PC, ucp_Nl },
+ { 1003, PT_PC, ucp_No },
+ { 1006, PT_SC, ucp_Nushu },
+ { 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+ { 1035, PT_SC, ucp_Ogham },
+ { 1041, PT_SC, ucp_Ol_Chiki },
+ { 1050, PT_SC, ucp_Old_Hungarian },
+ { 1064, PT_SC, ucp_Old_Italic },
+ { 1075, PT_SC, ucp_Old_North_Arabian },
+ { 1093, PT_SC, ucp_Old_Permic },
+ { 1104, PT_SC, ucp_Old_Persian },
+ { 1116, PT_SC, ucp_Old_Sogdian },
+ { 1128, PT_SC, ucp_Old_South_Arabian },
+ { 1146, PT_SC, ucp_Old_Turkic },
+ { 1157, PT_SC, ucp_Oriya },
+ { 1163, PT_SC, ucp_Osage },
+ { 1169, PT_SC, ucp_Osmanya },
+ { 1177, PT_GC, ucp_P },
+ { 1179, PT_SC, ucp_Pahawh_Hmong },
+ { 1192, PT_SC, ucp_Palmyrene },
+ { 1202, PT_SC, ucp_Pau_Cin_Hau },
+ { 1214, PT_PC, ucp_Pc },
+ { 1217, PT_PC, ucp_Pd },
+ { 1220, PT_PC, ucp_Pe },
+ { 1223, PT_PC, ucp_Pf },
+ { 1226, PT_SC, ucp_Phags_Pa },
+ { 1235, PT_SC, ucp_Phoenician },
+ { 1246, PT_PC, ucp_Pi },
+ { 1249, PT_PC, ucp_Po },
+ { 1252, PT_PC, ucp_Ps },
+ { 1255, PT_SC, ucp_Psalter_Pahlavi },
+ { 1271, PT_SC, ucp_Rejang },
+ { 1278, PT_SC, ucp_Runic },
+ { 1284, PT_GC, ucp_S },
+ { 1286, PT_SC, ucp_Samaritan },
+ { 1296, PT_SC, ucp_Saurashtra },
+ { 1307, PT_PC, ucp_Sc },
+ { 1310, PT_SC, ucp_Sharada },
+ { 1318, PT_SC, ucp_Shavian },
+ { 1326, PT_SC, ucp_Siddham },
+ { 1334, PT_SC, ucp_SignWriting },
+ { 1346, PT_SC, ucp_Sinhala },
+ { 1354, PT_PC, ucp_Sk },
+ { 1357, PT_PC, ucp_Sm },
+ { 1360, PT_PC, ucp_So },
+ { 1363, PT_SC, ucp_Sogdian },
+ { 1371, PT_SC, ucp_Sora_Sompeng },
+ { 1384, PT_SC, ucp_Soyombo },
+ { 1392, PT_SC, ucp_Sundanese },
+ { 1402, PT_SC, ucp_Syloti_Nagri },
+ { 1415, PT_SC, ucp_Syriac },
+ { 1422, PT_SC, ucp_Tagalog },
+ { 1430, PT_SC, ucp_Tagbanwa },
+ { 1439, PT_SC, ucp_Tai_Le },
+ { 1446, PT_SC, ucp_Tai_Tham },
+ { 1455, PT_SC, ucp_Tai_Viet },
+ { 1464, PT_SC, ucp_Takri },
+ { 1470, PT_SC, ucp_Tamil },
+ { 1476, PT_SC, ucp_Tangut },
+ { 1483, PT_SC, ucp_Telugu },
+ { 1490, PT_SC, ucp_Thaana },
+ { 1497, PT_SC, ucp_Thai },
+ { 1502, PT_SC, ucp_Tibetan },
+ { 1510, PT_SC, ucp_Tifinagh },
+ { 1519, PT_SC, ucp_Tirhuta },
+ { 1527, PT_SC, ucp_Ugaritic },
+ { 1536, PT_SC, ucp_Unknown },
+ { 1544, PT_SC, ucp_Vai },
+ { 1548, PT_SC, ucp_Wancho },
+ { 1555, PT_SC, ucp_Warang_Citi },
+ { 1567, PT_ALNUM, 0 },
+ { 1571, PT_PXSPACE, 0 },
+ { 1575, PT_SPACE, 0 },
+ { 1579, PT_UCNC, 0 },
+ { 1583, PT_WORD, 0 },
+ { 1587, PT_SC, ucp_Yezidi },
+ { 1594, PT_SC, ucp_Yi },
+ { 1597, PT_GC, ucp_Z },
+ { 1599, PT_SC, ucp_Zanabazar_Square },
+ { 1616, PT_PC, ucp_Zl },
+ { 1619, PT_PC, ucp_Zp },
+ { 1622, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/src/pcre2_ucd.c b/src/pcre2_ucd.c
index 55ba03b..46e23ff 100644
--- a/src/pcre2_ucd.c
+++ b/src/pcre2_ucd.c
@@ -20,7 +20,7 @@ needed. */
/* Unicode character database. */
/* This file was autogenerated by the MultiStage2.py script. */
-/* Total size: 99316 bytes, block size: 128. */
+/* Total size: 101044 bytes, block size: 128. */
/* The tables herein are needed only when UCP support is built,
and in PCRE2 that happens automatically with UTF support.
@@ -39,7 +39,7 @@ const uint16_t PRIV(ucd_stage2)[] = {0};
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
#else
-const char *PRIV(unicode_version) = "12.1.0";
+const char *PRIV(unicode_version) = "13.0.0";
/* If the 32-bit library is run in non-32-bit mode, character values
greater than 0x10ffff may be encountered. For these we set up a
@@ -116,15 +116,16 @@ set of decimal digits. It is used to ensure that all the digits in
a script run come from the same set. */
const uint32_t PRIV(ucd_digit_sets)[] = {
- 63, /* Number of subsequent values */
+ 65, /* Number of subsequent values */
0x00039, 0x00669, 0x006f9, 0x007c9, 0x0096f, 0x009ef, 0x00a6f, 0x00aef,
0x00b6f, 0x00bef, 0x00c6f, 0x00cef, 0x00d6f, 0x00def, 0x00e59, 0x00ed9,
0x00f29, 0x01049, 0x01099, 0x017e9, 0x01819, 0x0194f, 0x019d9, 0x01a89,
0x01a99, 0x01b59, 0x01bb9, 0x01c49, 0x01c59, 0x0a629, 0x0a8d9, 0x0a909,
0x0a9d9, 0x0a9f9, 0x0aa59, 0x0abf9, 0x0ff19, 0x104a9, 0x10d39, 0x1106f,
0x110f9, 0x1113f, 0x111d9, 0x112f9, 0x11459, 0x114d9, 0x11659, 0x116c9,
- 0x11739, 0x118e9, 0x11c59, 0x11d59, 0x11da9, 0x16a69, 0x16b59, 0x1d7d7,
- 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x1e149, 0x1e2f9, 0x1e959,
+ 0x11739, 0x118e9, 0x11959, 0x11c59, 0x11d59, 0x11da9, 0x16a69, 0x16b59,
+ 0x1d7d7, 0x1d7e1, 0x1d7eb, 0x1d7f5, 0x1d7ff, 0x1e149, 0x1e2f9, 0x1e959,
+ 0x1fbf9,
};
/* This vector is a list of lists of scripts for the Script Extension
@@ -136,14 +137,14 @@ const uint8_t PRIV(ucd_script_sets)[] = {
/* 4 */ 1, 144, 0,
/* 7 */ 1, 50, 0,
/* 10 */ 1, 56, 0,
- /* 13 */ 2, 17, 0,
- /* 16 */ 3, 15, 0,
- /* 19 */ 4, 23, 0,
- /* 22 */ 6, 84, 0,
- /* 25 */ 12, 36, 0,
- /* 28 */ 13, 18, 0,
- /* 31 */ 13, 34, 0,
- /* 34 */ 13, 118, 0,
+ /* 13 */ 3, 15, 0,
+ /* 16 */ 4, 23, 0,
+ /* 19 */ 6, 84, 0,
+ /* 22 */ 12, 36, 0,
+ /* 25 */ 13, 18, 0,
+ /* 28 */ 13, 34, 0,
+ /* 31 */ 13, 118, 0,
+ /* 34 */ 13, 50, 0,
/* 37 */ 15, 107, 0,
/* 40 */ 15, 150, 0,
/* 43 */ 15, 100, 0,
@@ -152,35 +153,37 @@ const uint8_t PRIV(ucd_script_sets)[] = {
/* 52 */ 107, 54, 0,
/* 55 */ 21, 108, 0,
/* 58 */ 22, 129, 0,
- /* 61 */ 27, 30, 0,
- /* 64 */ 29, 150, 0,
- /* 67 */ 34, 38, 0,
- /* 70 */ 38, 65, 0,
- /* 73 */ 1, 50, 56, 0,
- /* 77 */ 3, 96, 49, 0,
- /* 81 */ 96, 39, 53, 0,
- /* 85 */ 12, 110, 36, 0,
- /* 89 */ 15, 107, 29, 0,
- /* 93 */ 15, 107, 34, 0,
- /* 97 */ 23, 27, 30, 0,
- /* 101 */ 69, 34, 39, 0,
- /* 105 */ 1, 144, 50, 56, 0,
- /* 110 */ 3, 15, 107, 29, 0,
- /* 115 */ 7, 25, 52, 51, 0,
- /* 120 */ 15, 142, 85, 111, 0,
- /* 125 */ 4, 24, 23, 27, 30, 0,
- /* 131 */ 4, 24, 23, 27, 30, 61, 0,
- /* 138 */ 15, 29, 37, 44, 54, 55, 0,
- /* 145 */ 132, 1, 95, 112, 121, 144, 148, 50, 0,
- /* 154 */ 3, 15, 107, 29, 150, 44, 55, 124, 0,
- /* 163 */ 15, 142, 21, 22, 108, 85, 111, 114, 109, 102, 124, 0,
- /* 175 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 54, 55, 124, 0,
- /* 188 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 100, 54, 55, 124, 0,
- /* 202 */ 15, 142, 21, 22, 108, 29, 85, 111, 114, 150, 109, 102, 124, 0,
- /* 216 */ 15, 142, 21, 22, 108, 29, 85, 111, 37, 114, 150, 109, 102, 124, 0,
- /* 231 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0,
- /* 252 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 35, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0,
- /* 274 */
+ /* 61 */ 23, 34, 0,
+ /* 64 */ 27, 30, 0,
+ /* 67 */ 29, 150, 0,
+ /* 70 */ 34, 38, 0,
+ /* 73 */ 38, 65, 0,
+ /* 76 */ 1, 50, 56, 0,
+ /* 80 */ 1, 56, 156, 0,
+ /* 84 */ 3, 96, 49, 0,
+ /* 88 */ 96, 39, 53, 0,
+ /* 92 */ 12, 110, 36, 0,
+ /* 96 */ 15, 107, 29, 0,
+ /* 100 */ 15, 107, 34, 0,
+ /* 104 */ 23, 27, 30, 0,
+ /* 108 */ 69, 34, 39, 0,
+ /* 112 */ 3, 15, 107, 29, 0,
+ /* 117 */ 7, 25, 52, 51, 0,
+ /* 122 */ 15, 142, 85, 111, 0,
+ /* 127 */ 1, 144, 50, 56, 156, 0,
+ /* 133 */ 4, 24, 23, 27, 30, 0,
+ /* 139 */ 4, 24, 23, 27, 30, 61, 0,
+ /* 146 */ 15, 29, 37, 44, 54, 55, 0,
+ /* 153 */ 132, 1, 95, 112, 121, 144, 148, 50, 0,
+ /* 162 */ 3, 15, 107, 29, 150, 44, 55, 124, 0,
+ /* 171 */ 15, 142, 21, 22, 108, 85, 111, 114, 109, 102, 124, 0,
+ /* 183 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 54, 55, 124, 0,
+ /* 196 */ 3, 15, 107, 21, 22, 29, 34, 37, 44, 100, 54, 55, 124, 0,
+ /* 210 */ 15, 142, 21, 22, 108, 29, 85, 111, 114, 150, 109, 102, 124, 0,
+ /* 224 */ 15, 142, 21, 22, 108, 29, 85, 111, 37, 114, 150, 109, 102, 124, 0,
+ /* 239 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0,
+ /* 260 */ 3, 15, 142, 143, 138, 107, 21, 22, 29, 35, 111, 37, 150, 44, 109, 48, 49, 102, 54, 55, 124, 0,
+ /* 282 */
};
/* These are the main two-stage UCD tables. The fields in each record are:
@@ -189,7 +192,7 @@ offset to multichar other cases or zero (8 bits), offset to other case
or zero (32 bits, signed), script extension (16 bits, signed), and a dummy
16-bit field to make the whole thing a multiple of 4 bytes. */
-const ucd_record PRIV(ucd_records)[] = { /* 11508 bytes, record size 12 */
+const ucd_record PRIV(ucd_records)[] = { /* 11700 bytes, record size 12 */
{ 10, 0, 2, 0, 0, 10, 256, }, /* 0 */
{ 10, 0, 2, 0, 0, 10, 0, }, /* 1 */
{ 10, 0, 1, 0, 0, 10, 0, }, /* 2 */
@@ -387,9 +390,9 @@ const ucd_record PRIV(ucd_records)[] = { /* 11508 bytes, record size 12 */
{ 13, 9, 12, 88, 1, 13, 0, }, /* 194 */
{ 13, 5, 12, 88, -1, 13, 0, }, /* 195 */
{ 13, 26, 12, 0, 0, 13, 0, }, /* 196 */
- { 13, 12, 3, 0, 0, -34, 0, }, /* 197 */
- { 13, 12, 3, 0, 0, -28, 0, }, /* 198 */
- { 28, 12, 3, 0, 0, -31, 0, }, /* 199 */
+ { 13, 12, 3, 0, 0, -31, 0, }, /* 197 */
+ { 13, 12, 3, 0, 0, -25, 0, }, /* 198 */
+ { 28, 12, 3, 0, 0, -28, 0, }, /* 199 */
{ 13, 11, 3, 0, 0, 13, 0, }, /* 200 */
{ 13, 9, 12, 0, 15, 13, 0, }, /* 201 */
{ 13, 5, 12, 0, -15, 13, 0, }, /* 202 */
@@ -398,281 +401,281 @@ const ucd_record PRIV(ucd_records)[] = { /* 11508 bytes, record size 12 */
{ 2, 21, 12, 0, 0, 2, 0, }, /* 205 */
{ 2, 5, 12, 0, 0, 2, 0, }, /* 206 */
{ 2, 5, 12, 0, -48, 2, 0, }, /* 207 */
- { 10, 21, 12, 0, 0, -13, 0, }, /* 208 */
- { 2, 17, 12, 0, 0, 2, 0, }, /* 209 */
- { 2, 26, 12, 0, 0, 2, 0, }, /* 210 */
- { 2, 23, 12, 0, 0, 2, 0, }, /* 211 */
- { 26, 12, 3, 0, 0, 26, 0, }, /* 212 */
- { 26, 17, 12, 0, 0, 26, 0, }, /* 213 */
- { 26, 21, 12, 0, 0, 26, 0, }, /* 214 */
- { 26, 7, 12, 0, 0, 26, 0, }, /* 215 */
- { 1, 1, 4, 0, 0, 1, 0, }, /* 216 */
- { 10, 1, 4, 0, 0, 10, 0, }, /* 217 */
- { 1, 25, 12, 0, 0, 1, 0, }, /* 218 */
- { 1, 21, 12, 0, 0, 1, 0, }, /* 219 */
- { 1, 23, 12, 0, 0, 1, 0, }, /* 220 */
- { 10, 21, 12, 0, 0, -105, 0, }, /* 221 */
- { 1, 26, 12, 0, 0, 1, 0, }, /* 222 */
- { 1, 12, 3, 0, 0, 1, 0, }, /* 223 */
- { 1, 1, 2, 0, 0, -73, 0, }, /* 224 */
- { 1, 7, 12, 0, 0, 1, 0, }, /* 225 */
- { 10, 6, 12, 0, 0, -145, 0, }, /* 226 */
- { 28, 12, 3, 0, 0, -7, 0, }, /* 227 */
- { 1, 13, 12, 0, 0, -10, 0, }, /* 228 */
- { 1, 21, 12, 0, 0, -4, 0, }, /* 229 */
- { 1, 6, 12, 0, 0, 1, 0, }, /* 230 */
- { 1, 13, 12, 0, 0, 1, 0, }, /* 231 */
- { 50, 21, 12, 0, 0, 50, 0, }, /* 232 */
- { 50, 1, 4, 0, 0, 50, 0, }, /* 233 */
- { 50, 7, 12, 0, 0, 50, 0, }, /* 234 */
- { 50, 12, 3, 0, 0, 50, 0, }, /* 235 */
- { 56, 7, 12, 0, 0, 56, 0, }, /* 236 */
- { 56, 12, 3, 0, 0, 56, 0, }, /* 237 */
- { 64, 13, 12, 0, 0, 64, 0, }, /* 238 */
- { 64, 7, 12, 0, 0, 64, 0, }, /* 239 */
- { 64, 12, 3, 0, 0, 64, 0, }, /* 240 */
- { 64, 6, 12, 0, 0, 64, 0, }, /* 241 */
- { 64, 26, 12, 0, 0, 64, 0, }, /* 242 */
- { 64, 21, 12, 0, 0, 64, 0, }, /* 243 */
- { 64, 23, 12, 0, 0, 64, 0, }, /* 244 */
- { 90, 7, 12, 0, 0, 90, 0, }, /* 245 */
- { 90, 12, 3, 0, 0, 90, 0, }, /* 246 */
- { 90, 6, 12, 0, 0, 90, 0, }, /* 247 */
- { 90, 21, 12, 0, 0, 90, 0, }, /* 248 */
- { 95, 7, 12, 0, 0, 95, 0, }, /* 249 */
- { 95, 12, 3, 0, 0, 95, 0, }, /* 250 */
- { 95, 21, 12, 0, 0, 95, 0, }, /* 251 */
- { 15, 12, 3, 0, 0, 15, 0, }, /* 252 */
- { 15, 10, 5, 0, 0, 15, 0, }, /* 253 */
- { 15, 7, 12, 0, 0, 15, 0, }, /* 254 */
- { 28, 12, 3, 0, 0, -188, 0, }, /* 255 */
- { 28, 12, 3, 0, 0, -175, 0, }, /* 256 */
- { 10, 21, 12, 0, 0, -231, 0, }, /* 257 */
- { 10, 21, 12, 0, 0, -252, 0, }, /* 258 */
- { 15, 13, 12, 0, 0, -120, 0, }, /* 259 */
- { 15, 21, 12, 0, 0, 15, 0, }, /* 260 */
- { 15, 6, 12, 0, 0, 15, 0, }, /* 261 */
- { 3, 7, 12, 0, 0, 3, 0, }, /* 262 */
- { 3, 12, 3, 0, 0, 3, 0, }, /* 263 */
- { 3, 10, 5, 0, 0, 3, 0, }, /* 264 */
- { 3, 10, 3, 0, 0, 3, 0, }, /* 265 */
- { 3, 13, 12, 0, 0, -77, 0, }, /* 266 */
- { 3, 23, 12, 0, 0, 3, 0, }, /* 267 */
- { 3, 15, 12, 0, 0, 3, 0, }, /* 268 */
- { 3, 26, 12, 0, 0, 3, 0, }, /* 269 */
- { 3, 21, 12, 0, 0, 3, 0, }, /* 270 */
- { 22, 12, 3, 0, 0, 22, 0, }, /* 271 */
- { 22, 10, 5, 0, 0, 22, 0, }, /* 272 */
- { 22, 7, 12, 0, 0, 22, 0, }, /* 273 */
- { 22, 13, 12, 0, 0, -58, 0, }, /* 274 */
- { 22, 21, 12, 0, 0, 22, 0, }, /* 275 */
- { 21, 12, 3, 0, 0, 21, 0, }, /* 276 */
- { 21, 10, 5, 0, 0, 21, 0, }, /* 277 */
- { 21, 7, 12, 0, 0, 21, 0, }, /* 278 */
- { 21, 13, 12, 0, 0, -55, 0, }, /* 279 */
- { 21, 21, 12, 0, 0, 21, 0, }, /* 280 */
- { 21, 23, 12, 0, 0, 21, 0, }, /* 281 */
- { 44, 12, 3, 0, 0, 44, 0, }, /* 282 */
- { 44, 10, 5, 0, 0, 44, 0, }, /* 283 */
- { 44, 7, 12, 0, 0, 44, 0, }, /* 284 */
- { 44, 10, 3, 0, 0, 44, 0, }, /* 285 */
- { 44, 13, 12, 0, 0, 44, 0, }, /* 286 */
- { 44, 26, 12, 0, 0, 44, 0, }, /* 287 */
- { 44, 15, 12, 0, 0, 44, 0, }, /* 288 */
- { 54, 12, 3, 0, 0, 54, 0, }, /* 289 */
- { 54, 7, 12, 0, 0, 54, 0, }, /* 290 */
- { 54, 10, 3, 0, 0, 54, 0, }, /* 291 */
- { 54, 10, 5, 0, 0, 54, 0, }, /* 292 */
- { 54, 13, 12, 0, 0, -52, 0, }, /* 293 */
- { 54, 15, 12, 0, 0, -52, 0, }, /* 294 */
- { 54, 26, 12, 0, 0, -52, 0, }, /* 295 */
- { 54, 26, 12, 0, 0, 54, 0, }, /* 296 */
- { 54, 23, 12, 0, 0, 54, 0, }, /* 297 */
- { 55, 12, 3, 0, 0, 55, 0, }, /* 298 */
- { 55, 10, 5, 0, 0, 55, 0, }, /* 299 */
- { 55, 7, 12, 0, 0, 55, 0, }, /* 300 */
- { 55, 13, 12, 0, 0, 55, 0, }, /* 301 */
- { 55, 21, 12, 0, 0, 55, 0, }, /* 302 */
- { 55, 15, 12, 0, 0, 55, 0, }, /* 303 */
- { 55, 26, 12, 0, 0, 55, 0, }, /* 304 */
- { 29, 7, 12, 0, 0, 29, 0, }, /* 305 */
- { 29, 12, 3, 0, 0, 29, 0, }, /* 306 */
- { 29, 10, 5, 0, 0, 29, 0, }, /* 307 */
- { 29, 21, 12, 0, 0, 29, 0, }, /* 308 */
- { 29, 10, 3, 0, 0, 29, 0, }, /* 309 */
- { 29, 13, 12, 0, 0, -64, 0, }, /* 310 */
- { 37, 12, 3, 0, 0, 37, 0, }, /* 311 */
- { 37, 10, 5, 0, 0, 37, 0, }, /* 312 */
- { 37, 7, 12, 0, 0, 37, 0, }, /* 313 */
- { 37, 10, 3, 0, 0, 37, 0, }, /* 314 */
- { 37, 7, 4, 0, 0, 37, 0, }, /* 315 */
- { 37, 26, 12, 0, 0, 37, 0, }, /* 316 */
- { 37, 15, 12, 0, 0, 37, 0, }, /* 317 */
- { 37, 13, 12, 0, 0, 37, 0, }, /* 318 */
+ { 2, 17, 12, 0, 0, 2, 0, }, /* 208 */
+ { 2, 26, 12, 0, 0, 2, 0, }, /* 209 */
+ { 2, 23, 12, 0, 0, 2, 0, }, /* 210 */
+ { 26, 12, 3, 0, 0, 26, 0, }, /* 211 */
+ { 26, 17, 12, 0, 0, 26, 0, }, /* 212 */
+ { 26, 21, 12, 0, 0, 26, 0, }, /* 213 */
+ { 26, 7, 12, 0, 0, 26, 0, }, /* 214 */
+ { 1, 1, 4, 0, 0, 1, 0, }, /* 215 */
+ { 10, 1, 4, 0, 0, 10, 0, }, /* 216 */
+ { 1, 25, 12, 0, 0, 1, 0, }, /* 217 */
+ { 1, 21, 12, 0, 0, 1, 0, }, /* 218 */
+ { 1, 23, 12, 0, 0, 1, 0, }, /* 219 */
+ { 10, 21, 12, 0, 0, -127, 0, }, /* 220 */
+ { 1, 26, 12, 0, 0, 1, 0, }, /* 221 */
+ { 1, 12, 3, 0, 0, 1, 0, }, /* 222 */
+ { 1, 1, 2, 0, 0, -76, 0, }, /* 223 */
+ { 1, 7, 12, 0, 0, 1, 0, }, /* 224 */
+ { 10, 6, 12, 0, 0, -153, 0, }, /* 225 */
+ { 28, 12, 3, 0, 0, -7, 0, }, /* 226 */
+ { 1, 13, 12, 0, 0, -80, 0, }, /* 227 */
+ { 1, 21, 12, 0, 0, -4, 0, }, /* 228 */
+ { 1, 6, 12, 0, 0, 1, 0, }, /* 229 */
+ { 1, 13, 12, 0, 0, 1, 0, }, /* 230 */
+ { 50, 21, 12, 0, 0, 50, 0, }, /* 231 */
+ { 50, 1, 4, 0, 0, 50, 0, }, /* 232 */
+ { 50, 7, 12, 0, 0, 50, 0, }, /* 233 */
+ { 50, 12, 3, 0, 0, 50, 0, }, /* 234 */
+ { 56, 7, 12, 0, 0, 56, 0, }, /* 235 */
+ { 56, 12, 3, 0, 0, 56, 0, }, /* 236 */
+ { 64, 13, 12, 0, 0, 64, 0, }, /* 237 */
+ { 64, 7, 12, 0, 0, 64, 0, }, /* 238 */
+ { 64, 12, 3, 0, 0, 64, 0, }, /* 239 */
+ { 64, 6, 12, 0, 0, 64, 0, }, /* 240 */
+ { 64, 26, 12, 0, 0, 64, 0, }, /* 241 */
+ { 64, 21, 12, 0, 0, 64, 0, }, /* 242 */
+ { 64, 23, 12, 0, 0, 64, 0, }, /* 243 */
+ { 90, 7, 12, 0, 0, 90, 0, }, /* 244 */
+ { 90, 12, 3, 0, 0, 90, 0, }, /* 245 */
+ { 90, 6, 12, 0, 0, 90, 0, }, /* 246 */
+ { 90, 21, 12, 0, 0, 90, 0, }, /* 247 */
+ { 95, 7, 12, 0, 0, 95, 0, }, /* 248 */
+ { 95, 12, 3, 0, 0, 95, 0, }, /* 249 */
+ { 95, 21, 12, 0, 0, 95, 0, }, /* 250 */
+ { 15, 12, 3, 0, 0, 15, 0, }, /* 251 */
+ { 15, 10, 5, 0, 0, 15, 0, }, /* 252 */
+ { 15, 7, 12, 0, 0, 15, 0, }, /* 253 */
+ { 28, 12, 3, 0, 0, -196, 0, }, /* 254 */
+ { 28, 12, 3, 0, 0, -183, 0, }, /* 255 */
+ { 10, 21, 12, 0, 0, -239, 0, }, /* 256 */
+ { 10, 21, 12, 0, 0, -260, 0, }, /* 257 */
+ { 15, 13, 12, 0, 0, -122, 0, }, /* 258 */
+ { 15, 21, 12, 0, 0, 15, 0, }, /* 259 */
+ { 15, 6, 12, 0, 0, 15, 0, }, /* 260 */
+ { 3, 7, 12, 0, 0, 3, 0, }, /* 261 */
+ { 3, 12, 3, 0, 0, 3, 0, }, /* 262 */
+ { 3, 10, 5, 0, 0, 3, 0, }, /* 263 */
+ { 3, 10, 3, 0, 0, 3, 0, }, /* 264 */
+ { 3, 13, 12, 0, 0, -84, 0, }, /* 265 */
+ { 3, 23, 12, 0, 0, 3, 0, }, /* 266 */
+ { 3, 15, 12, 0, 0, 3, 0, }, /* 267 */
+ { 3, 26, 12, 0, 0, 3, 0, }, /* 268 */
+ { 3, 21, 12, 0, 0, 3, 0, }, /* 269 */
+ { 22, 12, 3, 0, 0, 22, 0, }, /* 270 */
+ { 22, 10, 5, 0, 0, 22, 0, }, /* 271 */
+ { 22, 7, 12, 0, 0, 22, 0, }, /* 272 */
+ { 22, 13, 12, 0, 0, -58, 0, }, /* 273 */
+ { 22, 21, 12, 0, 0, 22, 0, }, /* 274 */
+ { 21, 12, 3, 0, 0, 21, 0, }, /* 275 */
+ { 21, 10, 5, 0, 0, 21, 0, }, /* 276 */
+ { 21, 7, 12, 0, 0, 21, 0, }, /* 277 */
+ { 21, 13, 12, 0, 0, -55, 0, }, /* 278 */
+ { 21, 21, 12, 0, 0, 21, 0, }, /* 279 */
+ { 21, 23, 12, 0, 0, 21, 0, }, /* 280 */
+ { 44, 12, 3, 0, 0, 44, 0, }, /* 281 */
+ { 44, 10, 5, 0, 0, 44, 0, }, /* 282 */
+ { 44, 7, 12, 0, 0, 44, 0, }, /* 283 */
+ { 44, 10, 3, 0, 0, 44, 0, }, /* 284 */
+ { 44, 13, 12, 0, 0, 44, 0, }, /* 285 */
+ { 44, 26, 12, 0, 0, 44, 0, }, /* 286 */
+ { 44, 15, 12, 0, 0, 44, 0, }, /* 287 */
+ { 54, 12, 3, 0, 0, 54, 0, }, /* 288 */
+ { 54, 7, 12, 0, 0, 54, 0, }, /* 289 */
+ { 54, 10, 3, 0, 0, 54, 0, }, /* 290 */
+ { 54, 10, 5, 0, 0, 54, 0, }, /* 291 */
+ { 54, 13, 12, 0, 0, -52, 0, }, /* 292 */
+ { 54, 15, 12, 0, 0, -52, 0, }, /* 293 */
+ { 54, 26, 12, 0, 0, -52, 0, }, /* 294 */
+ { 54, 26, 12, 0, 0, 54, 0, }, /* 295 */
+ { 54, 23, 12, 0, 0, 54, 0, }, /* 296 */
+ { 55, 12, 3, 0, 0, 55, 0, }, /* 297 */
+ { 55, 10, 5, 0, 0, 55, 0, }, /* 298 */
+ { 55, 7, 12, 0, 0, 55, 0, }, /* 299 */
+ { 55, 13, 12, 0, 0, 55, 0, }, /* 300 */
+ { 55, 21, 12, 0, 0, 55, 0, }, /* 301 */
+ { 55, 15, 12, 0, 0, 55, 0, }, /* 302 */
+ { 55, 26, 12, 0, 0, 55, 0, }, /* 303 */
+ { 29, 7, 12, 0, 0, 29, 0, }, /* 304 */
+ { 29, 12, 3, 0, 0, 29, 0, }, /* 305 */
+ { 29, 10, 5, 0, 0, 29, 0, }, /* 306 */
+ { 29, 21, 12, 0, 0, 29, 0, }, /* 307 */
+ { 29, 10, 3, 0, 0, 29, 0, }, /* 308 */
+ { 29, 13, 12, 0, 0, -67, 0, }, /* 309 */
+ { 37, 12, 3, 0, 0, 37, 0, }, /* 310 */
+ { 37, 10, 5, 0, 0, 37, 0, }, /* 311 */
+ { 37, 7, 12, 0, 0, 37, 0, }, /* 312 */
+ { 37, 10, 3, 0, 0, 37, 0, }, /* 313 */
+ { 37, 7, 4, 0, 0, 37, 0, }, /* 314 */
+ { 37, 26, 12, 0, 0, 37, 0, }, /* 315 */
+ { 37, 15, 12, 0, 0, 37, 0, }, /* 316 */
+ { 37, 13, 12, 0, 0, 37, 0, }, /* 317 */
+ { 48, 12, 3, 0, 0, 48, 0, }, /* 318 */
{ 48, 10, 5, 0, 0, 48, 0, }, /* 319 */
{ 48, 7, 12, 0, 0, 48, 0, }, /* 320 */
- { 48, 12, 3, 0, 0, 48, 0, }, /* 321 */
- { 48, 10, 3, 0, 0, 48, 0, }, /* 322 */
- { 48, 13, 12, 0, 0, 48, 0, }, /* 323 */
- { 48, 21, 12, 0, 0, 48, 0, }, /* 324 */
- { 57, 7, 12, 0, 0, 57, 0, }, /* 325 */
- { 57, 12, 3, 0, 0, 57, 0, }, /* 326 */
- { 57, 7, 5, 0, 0, 57, 0, }, /* 327 */
- { 57, 6, 12, 0, 0, 57, 0, }, /* 328 */
- { 57, 21, 12, 0, 0, 57, 0, }, /* 329 */
- { 57, 13, 12, 0, 0, 57, 0, }, /* 330 */
- { 33, 7, 12, 0, 0, 33, 0, }, /* 331 */
- { 33, 12, 3, 0, 0, 33, 0, }, /* 332 */
- { 33, 7, 5, 0, 0, 33, 0, }, /* 333 */
- { 33, 6, 12, 0, 0, 33, 0, }, /* 334 */
- { 33, 13, 12, 0, 0, 33, 0, }, /* 335 */
- { 58, 7, 12, 0, 0, 58, 0, }, /* 336 */
- { 58, 26, 12, 0, 0, 58, 0, }, /* 337 */
- { 58, 21, 12, 0, 0, 58, 0, }, /* 338 */
- { 58, 12, 3, 0, 0, 58, 0, }, /* 339 */
- { 58, 13, 12, 0, 0, 58, 0, }, /* 340 */
- { 58, 15, 12, 0, 0, 58, 0, }, /* 341 */
- { 58, 22, 12, 0, 0, 58, 0, }, /* 342 */
- { 58, 18, 12, 0, 0, 58, 0, }, /* 343 */
- { 58, 10, 5, 0, 0, 58, 0, }, /* 344 */
- { 39, 7, 12, 0, 0, 39, 0, }, /* 345 */
- { 39, 10, 12, 0, 0, 39, 0, }, /* 346 */
- { 39, 12, 3, 0, 0, 39, 0, }, /* 347 */
- { 39, 10, 5, 0, 0, 39, 0, }, /* 348 */
- { 39, 13, 12, 0, 0, -81, 0, }, /* 349 */
- { 39, 21, 12, 0, 0, 39, 0, }, /* 350 */
- { 39, 13, 12, 0, 0, 39, 0, }, /* 351 */
- { 39, 26, 12, 0, 0, 39, 0, }, /* 352 */
- { 17, 9, 12, 0, 7264, 17, 0, }, /* 353 */
- { 17, 5, 12, 0, 3008, 17, 0, }, /* 354 */
- { 10, 21, 12, 0, 0, -49, 0, }, /* 355 */
- { 17, 6, 12, 0, 0, 17, 0, }, /* 356 */
- { 24, 7, 6, 0, 0, 24, 0, }, /* 357 */
- { 24, 7, 7, 0, 0, 24, 0, }, /* 358 */
- { 24, 7, 8, 0, 0, 24, 0, }, /* 359 */
- { 16, 7, 12, 0, 0, 16, 0, }, /* 360 */
- { 16, 12, 3, 0, 0, 16, 0, }, /* 361 */
- { 16, 21, 12, 0, 0, 16, 0, }, /* 362 */
- { 16, 15, 12, 0, 0, 16, 0, }, /* 363 */
- { 16, 26, 12, 0, 0, 16, 0, }, /* 364 */
- { 9, 9, 12, 0, 38864, 9, 0, }, /* 365 */
- { 9, 9, 12, 0, 8, 9, 0, }, /* 366 */
- { 9, 5, 12, 0, -8, 9, 0, }, /* 367 */
- { 8, 17, 12, 0, 0, 8, 0, }, /* 368 */
- { 8, 7, 12, 0, 0, 8, 0, }, /* 369 */
- { 8, 26, 12, 0, 0, 8, 0, }, /* 370 */
- { 8, 21, 12, 0, 0, 8, 0, }, /* 371 */
- { 41, 29, 12, 0, 0, 41, 0, }, /* 372 */
- { 41, 7, 12, 0, 0, 41, 0, }, /* 373 */
- { 41, 22, 12, 0, 0, 41, 0, }, /* 374 */
- { 41, 18, 12, 0, 0, 41, 0, }, /* 375 */
- { 46, 7, 12, 0, 0, 46, 0, }, /* 376 */
- { 46, 14, 12, 0, 0, 46, 0, }, /* 377 */
- { 51, 7, 12, 0, 0, 51, 0, }, /* 378 */
- { 51, 12, 3, 0, 0, 51, 0, }, /* 379 */
- { 25, 7, 12, 0, 0, 25, 0, }, /* 380 */
- { 25, 12, 3, 0, 0, 25, 0, }, /* 381 */
- { 10, 21, 12, 0, 0, -115, 0, }, /* 382 */
- { 7, 7, 12, 0, 0, 7, 0, }, /* 383 */
- { 7, 12, 3, 0, 0, 7, 0, }, /* 384 */
- { 52, 7, 12, 0, 0, 52, 0, }, /* 385 */
- { 52, 12, 3, 0, 0, 52, 0, }, /* 386 */
- { 32, 7, 12, 0, 0, 32, 0, }, /* 387 */
- { 32, 12, 3, 0, 0, 32, 0, }, /* 388 */
- { 32, 10, 5, 0, 0, 32, 0, }, /* 389 */
- { 32, 21, 12, 0, 0, 32, 0, }, /* 390 */
- { 32, 6, 12, 0, 0, 32, 0, }, /* 391 */
- { 32, 23, 12, 0, 0, 32, 0, }, /* 392 */
- { 32, 13, 12, 0, 0, 32, 0, }, /* 393 */
- { 32, 15, 12, 0, 0, 32, 0, }, /* 394 */
- { 38, 21, 12, 0, 0, 38, 0, }, /* 395 */
- { 10, 21, 12, 0, 0, -70, 0, }, /* 396 */
- { 38, 17, 12, 0, 0, 38, 0, }, /* 397 */
- { 38, 12, 3, 0, 0, 38, 0, }, /* 398 */
- { 38, 1, 2, 0, 0, 38, 0, }, /* 399 */
- { 38, 13, 12, 0, 0, 38, 0, }, /* 400 */
- { 38, 7, 12, 0, 0, 38, 0, }, /* 401 */
- { 38, 6, 12, 0, 0, 38, 0, }, /* 402 */
- { 35, 7, 12, 0, 0, 35, 0, }, /* 403 */
- { 35, 12, 3, 0, 0, 35, 0, }, /* 404 */
- { 35, 10, 5, 0, 0, 35, 0, }, /* 405 */
- { 35, 26, 12, 0, 0, 35, 0, }, /* 406 */
- { 35, 21, 12, 0, 0, 35, 0, }, /* 407 */
- { 35, 13, 12, 0, 0, 35, 0, }, /* 408 */
- { 53, 7, 12, 0, 0, 53, 0, }, /* 409 */
- { 40, 7, 12, 0, 0, 40, 0, }, /* 410 */
- { 40, 13, 12, 0, 0, 40, 0, }, /* 411 */
- { 40, 15, 12, 0, 0, 40, 0, }, /* 412 */
- { 40, 26, 12, 0, 0, 40, 0, }, /* 413 */
- { 32, 26, 12, 0, 0, 32, 0, }, /* 414 */
- { 6, 7, 12, 0, 0, 6, 0, }, /* 415 */
- { 6, 12, 3, 0, 0, 6, 0, }, /* 416 */
- { 6, 10, 5, 0, 0, 6, 0, }, /* 417 */
- { 6, 21, 12, 0, 0, 6, 0, }, /* 418 */
- { 91, 7, 12, 0, 0, 91, 0, }, /* 419 */
- { 91, 10, 5, 0, 0, 91, 0, }, /* 420 */
- { 91, 12, 3, 0, 0, 91, 0, }, /* 421 */
- { 91, 10, 12, 0, 0, 91, 0, }, /* 422 */
- { 91, 13, 12, 0, 0, 91, 0, }, /* 423 */
- { 91, 21, 12, 0, 0, 91, 0, }, /* 424 */
- { 91, 6, 12, 0, 0, 91, 0, }, /* 425 */
- { 28, 11, 3, 0, 0, 28, 0, }, /* 426 */
- { 62, 12, 3, 0, 0, 62, 0, }, /* 427 */
- { 62, 10, 5, 0, 0, 62, 0, }, /* 428 */
- { 62, 7, 12, 0, 0, 62, 0, }, /* 429 */
- { 62, 10, 3, 0, 0, 62, 0, }, /* 430 */
- { 62, 13, 12, 0, 0, 62, 0, }, /* 431 */
- { 62, 21, 12, 0, 0, 62, 0, }, /* 432 */
- { 62, 26, 12, 0, 0, 62, 0, }, /* 433 */
- { 76, 12, 3, 0, 0, 76, 0, }, /* 434 */
- { 76, 10, 5, 0, 0, 76, 0, }, /* 435 */
- { 76, 7, 12, 0, 0, 76, 0, }, /* 436 */
- { 76, 13, 12, 0, 0, 76, 0, }, /* 437 */
- { 93, 7, 12, 0, 0, 93, 0, }, /* 438 */
- { 93, 12, 3, 0, 0, 93, 0, }, /* 439 */
- { 93, 10, 5, 0, 0, 93, 0, }, /* 440 */
- { 93, 21, 12, 0, 0, 93, 0, }, /* 441 */
- { 70, 7, 12, 0, 0, 70, 0, }, /* 442 */
- { 70, 10, 5, 0, 0, 70, 0, }, /* 443 */
- { 70, 12, 3, 0, 0, 70, 0, }, /* 444 */
- { 70, 21, 12, 0, 0, 70, 0, }, /* 445 */
- { 70, 13, 12, 0, 0, 70, 0, }, /* 446 */
- { 73, 13, 12, 0, 0, 73, 0, }, /* 447 */
- { 73, 7, 12, 0, 0, 73, 0, }, /* 448 */
- { 73, 6, 12, 0, 0, 73, 0, }, /* 449 */
- { 73, 21, 12, 0, 0, 73, 0, }, /* 450 */
- { 13, 5, 12, 63, -6222, 13, 0, }, /* 451 */
- { 13, 5, 12, 67, -6221, 13, 0, }, /* 452 */
- { 13, 5, 12, 71, -6212, 13, 0, }, /* 453 */
- { 13, 5, 12, 75, -6210, 13, 0, }, /* 454 */
- { 13, 5, 12, 79, -6210, 13, 0, }, /* 455 */
- { 13, 5, 12, 79, -6211, 13, 0, }, /* 456 */
- { 13, 5, 12, 84, -6204, 13, 0, }, /* 457 */
- { 13, 5, 12, 88, -6180, 13, 0, }, /* 458 */
- { 13, 5, 12, 108, 35267, 13, 0, }, /* 459 */
- { 17, 9, 12, 0, -3008, 17, 0, }, /* 460 */
- { 76, 21, 12, 0, 0, 76, 0, }, /* 461 */
- { 28, 12, 3, 0, 0, -110, 0, }, /* 462 */
- { 28, 12, 3, 0, 0, 15, 0, }, /* 463 */
- { 10, 21, 12, 0, 0, -37, 0, }, /* 464 */
- { 28, 12, 3, 0, 0, -16, 0, }, /* 465 */
- { 28, 12, 3, 0, 0, -43, 0, }, /* 466 */
- { 28, 12, 3, 0, 0, -138, 0, }, /* 467 */
- { 10, 10, 5, 0, 0, -16, 0, }, /* 468 */
- { 10, 7, 12, 0, 0, -40, 0, }, /* 469 */
- { 10, 7, 12, 0, 0, -16, 0, }, /* 470 */
- { 10, 7, 12, 0, 0, 15, 0, }, /* 471 */
- { 10, 7, 12, 0, 0, -154, 0, }, /* 472 */
- { 10, 7, 12, 0, 0, -37, 0, }, /* 473 */
- { 28, 12, 3, 0, 0, -89, 0, }, /* 474 */
- { 10, 10, 5, 0, 0, 3, 0, }, /* 475 */
- { 28, 12, 3, 0, 0, -37, 0, }, /* 476 */
- { 10, 7, 12, 0, 0, 150, 0, }, /* 477 */
- { 13, 5, 12, 0, 0, 13, 0, }, /* 478 */
- { 13, 6, 12, 0, 0, 13, 0, }, /* 479 */
- { 34, 5, 12, 0, 35332, 34, 0, }, /* 480 */
- { 34, 5, 12, 0, 3814, 34, 0, }, /* 481 */
- { 34, 5, 12, 0, 35384, 34, 0, }, /* 482 */
+ { 48, 10, 3, 0, 0, 48, 0, }, /* 321 */
+ { 48, 13, 12, 0, 0, 48, 0, }, /* 322 */
+ { 48, 21, 12, 0, 0, 48, 0, }, /* 323 */
+ { 57, 7, 12, 0, 0, 57, 0, }, /* 324 */
+ { 57, 12, 3, 0, 0, 57, 0, }, /* 325 */
+ { 57, 7, 5, 0, 0, 57, 0, }, /* 326 */
+ { 57, 6, 12, 0, 0, 57, 0, }, /* 327 */
+ { 57, 21, 12, 0, 0, 57, 0, }, /* 328 */
+ { 57, 13, 12, 0, 0, 57, 0, }, /* 329 */
+ { 33, 7, 12, 0, 0, 33, 0, }, /* 330 */
+ { 33, 12, 3, 0, 0, 33, 0, }, /* 331 */
+ { 33, 7, 5, 0, 0, 33, 0, }, /* 332 */
+ { 33, 6, 12, 0, 0, 33, 0, }, /* 333 */
+ { 33, 13, 12, 0, 0, 33, 0, }, /* 334 */
+ { 58, 7, 12, 0, 0, 58, 0, }, /* 335 */
+ { 58, 26, 12, 0, 0, 58, 0, }, /* 336 */
+ { 58, 21, 12, 0, 0, 58, 0, }, /* 337 */
+ { 58, 12, 3, 0, 0, 58, 0, }, /* 338 */
+ { 58, 13, 12, 0, 0, 58, 0, }, /* 339 */
+ { 58, 15, 12, 0, 0, 58, 0, }, /* 340 */
+ { 58, 22, 12, 0, 0, 58, 0, }, /* 341 */
+ { 58, 18, 12, 0, 0, 58, 0, }, /* 342 */
+ { 58, 10, 5, 0, 0, 58, 0, }, /* 343 */
+ { 39, 7, 12, 0, 0, 39, 0, }, /* 344 */
+ { 39, 10, 12, 0, 0, 39, 0, }, /* 345 */
+ { 39, 12, 3, 0, 0, 39, 0, }, /* 346 */
+ { 39, 10, 5, 0, 0, 39, 0, }, /* 347 */
+ { 39, 13, 12, 0, 0, -88, 0, }, /* 348 */
+ { 39, 21, 12, 0, 0, 39, 0, }, /* 349 */
+ { 39, 13, 12, 0, 0, 39, 0, }, /* 350 */
+ { 39, 26, 12, 0, 0, 39, 0, }, /* 351 */
+ { 17, 9, 12, 0, 7264, 17, 0, }, /* 352 */
+ { 17, 5, 12, 0, 3008, 17, 0, }, /* 353 */
+ { 10, 21, 12, 0, 0, -49, 0, }, /* 354 */
+ { 17, 6, 12, 0, 0, 17, 0, }, /* 355 */
+ { 24, 7, 6, 0, 0, 24, 0, }, /* 356 */
+ { 24, 7, 7, 0, 0, 24, 0, }, /* 357 */
+ { 24, 7, 8, 0, 0, 24, 0, }, /* 358 */
+ { 16, 7, 12, 0, 0, 16, 0, }, /* 359 */
+ { 16, 12, 3, 0, 0, 16, 0, }, /* 360 */
+ { 16, 21, 12, 0, 0, 16, 0, }, /* 361 */
+ { 16, 15, 12, 0, 0, 16, 0, }, /* 362 */
+ { 16, 26, 12, 0, 0, 16, 0, }, /* 363 */
+ { 9, 9, 12, 0, 38864, 9, 0, }, /* 364 */
+ { 9, 9, 12, 0, 8, 9, 0, }, /* 365 */
+ { 9, 5, 12, 0, -8, 9, 0, }, /* 366 */
+ { 8, 17, 12, 0, 0, 8, 0, }, /* 367 */
+ { 8, 7, 12, 0, 0, 8, 0, }, /* 368 */
+ { 8, 26, 12, 0, 0, 8, 0, }, /* 369 */
+ { 8, 21, 12, 0, 0, 8, 0, }, /* 370 */
+ { 41, 29, 12, 0, 0, 41, 0, }, /* 371 */
+ { 41, 7, 12, 0, 0, 41, 0, }, /* 372 */
+ { 41, 22, 12, 0, 0, 41, 0, }, /* 373 */
+ { 41, 18, 12, 0, 0, 41, 0, }, /* 374 */
+ { 46, 7, 12, 0, 0, 46, 0, }, /* 375 */
+ { 46, 14, 12, 0, 0, 46, 0, }, /* 376 */
+ { 51, 7, 12, 0, 0, 51, 0, }, /* 377 */
+ { 51, 12, 3, 0, 0, 51, 0, }, /* 378 */
+ { 25, 7, 12, 0, 0, 25, 0, }, /* 379 */
+ { 25, 12, 3, 0, 0, 25, 0, }, /* 380 */
+ { 10, 21, 12, 0, 0, -117, 0, }, /* 381 */
+ { 7, 7, 12, 0, 0, 7, 0, }, /* 382 */
+ { 7, 12, 3, 0, 0, 7, 0, }, /* 383 */
+ { 52, 7, 12, 0, 0, 52, 0, }, /* 384 */
+ { 52, 12, 3, 0, 0, 52, 0, }, /* 385 */
+ { 32, 7, 12, 0, 0, 32, 0, }, /* 386 */
+ { 32, 12, 3, 0, 0, 32, 0, }, /* 387 */
+ { 32, 10, 5, 0, 0, 32, 0, }, /* 388 */
+ { 32, 21, 12, 0, 0, 32, 0, }, /* 389 */
+ { 32, 6, 12, 0, 0, 32, 0, }, /* 390 */
+ { 32, 23, 12, 0, 0, 32, 0, }, /* 391 */
+ { 32, 13, 12, 0, 0, 32, 0, }, /* 392 */
+ { 32, 15, 12, 0, 0, 32, 0, }, /* 393 */
+ { 38, 21, 12, 0, 0, 38, 0, }, /* 394 */
+ { 10, 21, 12, 0, 0, -73, 0, }, /* 395 */
+ { 38, 17, 12, 0, 0, 38, 0, }, /* 396 */
+ { 38, 12, 3, 0, 0, 38, 0, }, /* 397 */
+ { 38, 1, 2, 0, 0, 38, 0, }, /* 398 */
+ { 38, 13, 12, 0, 0, 38, 0, }, /* 399 */
+ { 38, 7, 12, 0, 0, 38, 0, }, /* 400 */
+ { 38, 6, 12, 0, 0, 38, 0, }, /* 401 */
+ { 35, 7, 12, 0, 0, 35, 0, }, /* 402 */
+ { 35, 12, 3, 0, 0, 35, 0, }, /* 403 */
+ { 35, 10, 5, 0, 0, 35, 0, }, /* 404 */
+ { 35, 26, 12, 0, 0, 35, 0, }, /* 405 */
+ { 35, 21, 12, 0, 0, 35, 0, }, /* 406 */
+ { 35, 13, 12, 0, 0, 35, 0, }, /* 407 */
+ { 53, 7, 12, 0, 0, 53, 0, }, /* 408 */
+ { 40, 7, 12, 0, 0, 40, 0, }, /* 409 */
+ { 40, 13, 12, 0, 0, 40, 0, }, /* 410 */
+ { 40, 15, 12, 0, 0, 40, 0, }, /* 411 */
+ { 40, 26, 12, 0, 0, 40, 0, }, /* 412 */
+ { 32, 26, 12, 0, 0, 32, 0, }, /* 413 */
+ { 6, 7, 12, 0, 0, 6, 0, }, /* 414 */
+ { 6, 12, 3, 0, 0, 6, 0, }, /* 415 */
+ { 6, 10, 5, 0, 0, 6, 0, }, /* 416 */
+ { 6, 21, 12, 0, 0, 6, 0, }, /* 417 */
+ { 91, 7, 12, 0, 0, 91, 0, }, /* 418 */
+ { 91, 10, 5, 0, 0, 91, 0, }, /* 419 */
+ { 91, 12, 3, 0, 0, 91, 0, }, /* 420 */
+ { 91, 10, 12, 0, 0, 91, 0, }, /* 421 */
+ { 91, 13, 12, 0, 0, 91, 0, }, /* 422 */
+ { 91, 21, 12, 0, 0, 91, 0, }, /* 423 */
+ { 91, 6, 12, 0, 0, 91, 0, }, /* 424 */
+ { 28, 11, 3, 0, 0, 28, 0, }, /* 425 */
+ { 62, 12, 3, 0, 0, 62, 0, }, /* 426 */
+ { 62, 10, 5, 0, 0, 62, 0, }, /* 427 */
+ { 62, 7, 12, 0, 0, 62, 0, }, /* 428 */
+ { 62, 10, 3, 0, 0, 62, 0, }, /* 429 */
+ { 62, 13, 12, 0, 0, 62, 0, }, /* 430 */
+ { 62, 21, 12, 0, 0, 62, 0, }, /* 431 */
+ { 62, 26, 12, 0, 0, 62, 0, }, /* 432 */
+ { 76, 12, 3, 0, 0, 76, 0, }, /* 433 */
+ { 76, 10, 5, 0, 0, 76, 0, }, /* 434 */
+ { 76, 7, 12, 0, 0, 76, 0, }, /* 435 */
+ { 76, 13, 12, 0, 0, 76, 0, }, /* 436 */
+ { 93, 7, 12, 0, 0, 93, 0, }, /* 437 */
+ { 93, 12, 3, 0, 0, 93, 0, }, /* 438 */
+ { 93, 10, 5, 0, 0, 93, 0, }, /* 439 */
+ { 93, 21, 12, 0, 0, 93, 0, }, /* 440 */
+ { 70, 7, 12, 0, 0, 70, 0, }, /* 441 */
+ { 70, 10, 5, 0, 0, 70, 0, }, /* 442 */
+ { 70, 12, 3, 0, 0, 70, 0, }, /* 443 */
+ { 70, 21, 12, 0, 0, 70, 0, }, /* 444 */
+ { 70, 13, 12, 0, 0, 70, 0, }, /* 445 */
+ { 73, 13, 12, 0, 0, 73, 0, }, /* 446 */
+ { 73, 7, 12, 0, 0, 73, 0, }, /* 447 */
+ { 73, 6, 12, 0, 0, 73, 0, }, /* 448 */
+ { 73, 21, 12, 0, 0, 73, 0, }, /* 449 */
+ { 13, 5, 12, 63, -6222, 13, 0, }, /* 450 */
+ { 13, 5, 12, 67, -6221, 13, 0, }, /* 451 */
+ { 13, 5, 12, 71, -6212, 13, 0, }, /* 452 */
+ { 13, 5, 12, 75, -6210, 13, 0, }, /* 453 */
+ { 13, 5, 12, 79, -6210, 13, 0, }, /* 454 */
+ { 13, 5, 12, 79, -6211, 13, 0, }, /* 455 */
+ { 13, 5, 12, 84, -6204, 13, 0, }, /* 456 */
+ { 13, 5, 12, 88, -6180, 13, 0, }, /* 457 */
+ { 13, 5, 12, 108, 35267, 13, 0, }, /* 458 */
+ { 17, 9, 12, 0, -3008, 17, 0, }, /* 459 */
+ { 76, 21, 12, 0, 0, 76, 0, }, /* 460 */
+ { 28, 12, 3, 0, 0, -112, 0, }, /* 461 */
+ { 28, 12, 3, 0, 0, 15, 0, }, /* 462 */
+ { 10, 21, 12, 0, 0, -37, 0, }, /* 463 */
+ { 28, 12, 3, 0, 0, -13, 0, }, /* 464 */
+ { 28, 12, 3, 0, 0, -43, 0, }, /* 465 */
+ { 28, 12, 3, 0, 0, -146, 0, }, /* 466 */
+ { 10, 10, 5, 0, 0, -13, 0, }, /* 467 */
+ { 10, 7, 12, 0, 0, -40, 0, }, /* 468 */
+ { 10, 7, 12, 0, 0, -13, 0, }, /* 469 */
+ { 10, 7, 12, 0, 0, 15, 0, }, /* 470 */
+ { 10, 7, 12, 0, 0, -162, 0, }, /* 471 */
+ { 10, 7, 12, 0, 0, -37, 0, }, /* 472 */
+ { 28, 12, 3, 0, 0, -96, 0, }, /* 473 */
+ { 10, 10, 5, 0, 0, 3, 0, }, /* 474 */
+ { 28, 12, 3, 0, 0, -37, 0, }, /* 475 */
+ { 10, 7, 12, 0, 0, 150, 0, }, /* 476 */
+ { 13, 5, 12, 0, 0, 13, 0, }, /* 477 */
+ { 13, 6, 12, 0, 0, 13, 0, }, /* 478 */
+ { 34, 5, 12, 0, 35332, 34, 0, }, /* 479 */
+ { 34, 5, 12, 0, 3814, 34, 0, }, /* 480 */
+ { 34, 5, 12, 0, 35384, 34, 0, }, /* 481 */
+ { 28, 12, 3, 0, 0, -34, 0, }, /* 482 */
{ 34, 9, 12, 92, 1, 34, 0, }, /* 483 */
{ 34, 5, 12, 92, -1, 34, 0, }, /* 484 */
{ 34, 5, 12, 92, -58, 34, 0, }, /* 485 */
@@ -699,10 +702,10 @@ const ucd_record PRIV(ucd_records)[] = { /* 11508 bytes, record size 12 */
{ 28, 1, 13, 0, 0, 28, 0, }, /* 506 */
{ 10, 27, 2, 0, 0, 10, 0, }, /* 507 */
{ 10, 28, 2, 0, 0, 10, 0, }, /* 508 */
- { 10, 29, 12, 0, 0, -67, 0, }, /* 509 */
+ { 10, 29, 12, 0, 0, -70, 0, }, /* 509 */
{ 10, 21, 14, 0, 0, 10, 0, }, /* 510 */
{ 0, 2, 2, 0, 0, 0, 0, }, /* 511 */
- { 28, 12, 3, 0, 0, -93, 0, }, /* 512 */
+ { 28, 12, 3, 0, 0, -100, 0, }, /* 512 */
{ 10, 9, 12, 0, 0, 10, 0, }, /* 513 */
{ 10, 5, 12, 0, 0, 10, 0, }, /* 514 */
{ 20, 9, 12, 96, -7517, 20, 0, }, /* 515 */
@@ -743,31 +746,31 @@ const ucd_record PRIV(ucd_records)[] = { /* 11508 bytes, record size 12 */
{ 59, 21, 12, 0, 0, 59, 0, }, /* 550 */
{ 59, 12, 3, 0, 0, 59, 0, }, /* 551 */
{ 13, 12, 3, 0, 0, 13, 0, }, /* 552 */
- { 10, 21, 12, 0, 0, -28, 0, }, /* 553 */
+ { 10, 21, 12, 0, 0, -25, 0, }, /* 553 */
{ 23, 26, 12, 0, 0, 23, 0, }, /* 554 */
- { 10, 21, 12, 0, 0, -131, 0, }, /* 555 */
- { 10, 21, 12, 0, 0, -125, 0, }, /* 556 */
+ { 10, 21, 12, 0, 0, -139, 0, }, /* 555 */
+ { 10, 21, 12, 0, 0, -133, 0, }, /* 556 */
{ 23, 6, 12, 0, 0, 23, 0, }, /* 557 */
{ 10, 7, 12, 0, 0, 23, 0, }, /* 558 */
{ 23, 14, 12, 0, 0, 23, 0, }, /* 559 */
- { 10, 22, 12, 0, 0, -131, 0, }, /* 560 */
- { 10, 18, 12, 0, 0, -131, 0, }, /* 561 */
- { 10, 26, 12, 0, 0, -125, 0, }, /* 562 */
- { 10, 17, 12, 0, 0, -125, 0, }, /* 563 */
- { 10, 22, 12, 0, 0, -125, 0, }, /* 564 */
- { 10, 18, 12, 0, 0, -125, 0, }, /* 565 */
- { 28, 12, 3, 0, 0, -19, 0, }, /* 566 */
+ { 10, 22, 12, 0, 0, -139, 0, }, /* 560 */
+ { 10, 18, 12, 0, 0, -139, 0, }, /* 561 */
+ { 10, 26, 12, 0, 0, -133, 0, }, /* 562 */
+ { 10, 17, 12, 0, 0, -133, 0, }, /* 563 */
+ { 10, 22, 12, 0, 0, -133, 0, }, /* 564 */
+ { 10, 18, 12, 0, 0, -133, 0, }, /* 565 */
+ { 28, 12, 3, 0, 0, -16, 0, }, /* 566 */
{ 24, 10, 3, 0, 0, 24, 0, }, /* 567 */
- { 10, 17, 14, 0, 0, -125, 0, }, /* 568 */
- { 10, 6, 12, 0, 0, -61, 0, }, /* 569 */
- { 10, 7, 12, 0, 0, -97, 0, }, /* 570 */
- { 10, 21, 14, 0, 0, -97, 0, }, /* 571 */
+ { 10, 17, 14, 0, 0, -133, 0, }, /* 568 */
+ { 10, 6, 12, 0, 0, -64, 0, }, /* 569 */
+ { 10, 7, 12, 0, 0, -104, 0, }, /* 570 */
+ { 10, 21, 14, 0, 0, -104, 0, }, /* 571 */
{ 10, 26, 12, 0, 0, 23, 0, }, /* 572 */
{ 27, 7, 12, 0, 0, 27, 0, }, /* 573 */
- { 28, 12, 3, 0, 0, -61, 0, }, /* 574 */
- { 10, 24, 12, 0, 0, -61, 0, }, /* 575 */
+ { 28, 12, 3, 0, 0, -64, 0, }, /* 574 */
+ { 10, 24, 12, 0, 0, -64, 0, }, /* 575 */
{ 27, 6, 12, 0, 0, 27, 0, }, /* 576 */
- { 10, 17, 12, 0, 0, -61, 0, }, /* 577 */
+ { 10, 17, 12, 0, 0, -64, 0, }, /* 577 */
{ 30, 7, 12, 0, 0, 30, 0, }, /* 578 */
{ 30, 6, 12, 0, 0, 30, 0, }, /* 579 */
{ 4, 7, 12, 0, 0, 4, 0, }, /* 580 */
@@ -795,360 +798,376 @@ const ucd_record PRIV(ucd_records)[] = { /* 11508 bytes, record size 12 */
{ 79, 14, 12, 0, 0, 79, 0, }, /* 602 */
{ 79, 12, 3, 0, 0, 79, 0, }, /* 603 */
{ 79, 21, 12, 0, 0, 79, 0, }, /* 604 */
- { 34, 9, 12, 0, -35332, 34, 0, }, /* 605 */
- { 34, 9, 12, 0, -42280, 34, 0, }, /* 606 */
- { 34, 5, 12, 0, 48, 34, 0, }, /* 607 */
- { 34, 9, 12, 0, -42308, 34, 0, }, /* 608 */
- { 34, 9, 12, 0, -42319, 34, 0, }, /* 609 */
- { 34, 9, 12, 0, -42315, 34, 0, }, /* 610 */
- { 34, 9, 12, 0, -42305, 34, 0, }, /* 611 */
- { 34, 9, 12, 0, -42258, 34, 0, }, /* 612 */
- { 34, 9, 12, 0, -42282, 34, 0, }, /* 613 */
- { 34, 9, 12, 0, -42261, 34, 0, }, /* 614 */
- { 34, 9, 12, 0, 928, 34, 0, }, /* 615 */
- { 34, 9, 12, 0, -48, 34, 0, }, /* 616 */
- { 34, 9, 12, 0, -42307, 34, 0, }, /* 617 */
- { 34, 9, 12, 0, -35384, 34, 0, }, /* 618 */
- { 49, 7, 12, 0, 0, 49, 0, }, /* 619 */
- { 49, 12, 3, 0, 0, 49, 0, }, /* 620 */
- { 49, 10, 5, 0, 0, 49, 0, }, /* 621 */
- { 49, 26, 12, 0, 0, 49, 0, }, /* 622 */
- { 10, 15, 12, 0, 0, -216, 0, }, /* 623 */
- { 10, 15, 12, 0, 0, -202, 0, }, /* 624 */
- { 10, 26, 12, 0, 0, -163, 0, }, /* 625 */
- { 10, 23, 12, 0, 0, -163, 0, }, /* 626 */
- { 65, 7, 12, 0, 0, 65, 0, }, /* 627 */
- { 65, 21, 12, 0, 0, 65, 0, }, /* 628 */
- { 75, 10, 5, 0, 0, 75, 0, }, /* 629 */
- { 75, 7, 12, 0, 0, 75, 0, }, /* 630 */
- { 75, 12, 3, 0, 0, 75, 0, }, /* 631 */
- { 75, 21, 12, 0, 0, 75, 0, }, /* 632 */
- { 75, 13, 12, 0, 0, 75, 0, }, /* 633 */
- { 15, 12, 3, 0, 0, -16, 0, }, /* 634 */
- { 15, 7, 12, 0, 0, -46, 0, }, /* 635 */
- { 69, 13, 12, 0, 0, 69, 0, }, /* 636 */
- { 69, 7, 12, 0, 0, 69, 0, }, /* 637 */
- { 69, 12, 3, 0, 0, 69, 0, }, /* 638 */
- { 10, 21, 12, 0, 0, -101, 0, }, /* 639 */
- { 69, 21, 12, 0, 0, 69, 0, }, /* 640 */
- { 74, 7, 12, 0, 0, 74, 0, }, /* 641 */
- { 74, 12, 3, 0, 0, 74, 0, }, /* 642 */
- { 74, 10, 5, 0, 0, 74, 0, }, /* 643 */
- { 74, 21, 12, 0, 0, 74, 0, }, /* 644 */
- { 84, 12, 3, 0, 0, 84, 0, }, /* 645 */
- { 84, 10, 5, 0, 0, 84, 0, }, /* 646 */
- { 84, 7, 12, 0, 0, 84, 0, }, /* 647 */
- { 84, 21, 12, 0, 0, 84, 0, }, /* 648 */
- { 10, 6, 12, 0, 0, -22, 0, }, /* 649 */
- { 84, 13, 12, 0, 0, 84, 0, }, /* 650 */
- { 39, 6, 12, 0, 0, 39, 0, }, /* 651 */
- { 68, 7, 12, 0, 0, 68, 0, }, /* 652 */
- { 68, 12, 3, 0, 0, 68, 0, }, /* 653 */
- { 68, 10, 5, 0, 0, 68, 0, }, /* 654 */
- { 68, 13, 12, 0, 0, 68, 0, }, /* 655 */
- { 68, 21, 12, 0, 0, 68, 0, }, /* 656 */
- { 92, 7, 12, 0, 0, 92, 0, }, /* 657 */
- { 92, 12, 3, 0, 0, 92, 0, }, /* 658 */
- { 92, 6, 12, 0, 0, 92, 0, }, /* 659 */
- { 92, 21, 12, 0, 0, 92, 0, }, /* 660 */
- { 87, 7, 12, 0, 0, 87, 0, }, /* 661 */
- { 87, 10, 5, 0, 0, 87, 0, }, /* 662 */
- { 87, 12, 3, 0, 0, 87, 0, }, /* 663 */
- { 87, 21, 12, 0, 0, 87, 0, }, /* 664 */
- { 87, 6, 12, 0, 0, 87, 0, }, /* 665 */
- { 34, 5, 12, 0, -928, 34, 0, }, /* 666 */
- { 9, 5, 12, 0, -38864, 9, 0, }, /* 667 */
- { 87, 13, 12, 0, 0, 87, 0, }, /* 668 */
- { 24, 7, 9, 0, 0, 24, 0, }, /* 669 */
- { 24, 7, 10, 0, 0, 24, 0, }, /* 670 */
- { 0, 4, 12, 0, 0, 0, 0, }, /* 671 */
- { 0, 3, 12, 0, 0, 0, 0, }, /* 672 */
- { 26, 25, 12, 0, 0, 26, 0, }, /* 673 */
- { 1, 24, 12, 0, 0, 1, 0, }, /* 674 */
- { 1, 7, 12, 0, 0, -10, 0, }, /* 675 */
- { 1, 26, 12, 0, 0, -10, 0, }, /* 676 */
- { 10, 6, 3, 0, 0, -61, 0, }, /* 677 */
- { 36, 7, 12, 0, 0, 36, 0, }, /* 678 */
- { 10, 21, 12, 0, 0, -25, 0, }, /* 679 */
- { 10, 15, 12, 0, 0, -85, 0, }, /* 680 */
- { 10, 26, 12, 0, 0, -25, 0, }, /* 681 */
- { 20, 14, 12, 0, 0, 20, 0, }, /* 682 */
- { 20, 15, 12, 0, 0, 20, 0, }, /* 683 */
- { 20, 26, 12, 0, 0, 20, 0, }, /* 684 */
- { 71, 7, 12, 0, 0, 71, 0, }, /* 685 */
- { 67, 7, 12, 0, 0, 67, 0, }, /* 686 */
- { 28, 12, 3, 0, 0, -1, 0, }, /* 687 */
- { 10, 15, 12, 0, 0, -1, 0, }, /* 688 */
- { 42, 7, 12, 0, 0, 42, 0, }, /* 689 */
- { 42, 15, 12, 0, 0, 42, 0, }, /* 690 */
- { 19, 7, 12, 0, 0, 19, 0, }, /* 691 */
- { 19, 14, 12, 0, 0, 19, 0, }, /* 692 */
- { 118, 7, 12, 0, 0, 118, 0, }, /* 693 */
- { 118, 12, 3, 0, 0, 118, 0, }, /* 694 */
- { 60, 7, 12, 0, 0, 60, 0, }, /* 695 */
- { 60, 21, 12, 0, 0, 60, 0, }, /* 696 */
- { 43, 7, 12, 0, 0, 43, 0, }, /* 697 */
- { 43, 21, 12, 0, 0, 43, 0, }, /* 698 */
- { 43, 14, 12, 0, 0, 43, 0, }, /* 699 */
- { 14, 9, 12, 0, 40, 14, 0, }, /* 700 */
- { 14, 5, 12, 0, -40, 14, 0, }, /* 701 */
- { 47, 7, 12, 0, 0, 47, 0, }, /* 702 */
- { 45, 7, 12, 0, 0, 45, 0, }, /* 703 */
- { 45, 13, 12, 0, 0, 45, 0, }, /* 704 */
- { 136, 9, 12, 0, 40, 136, 0, }, /* 705 */
- { 136, 5, 12, 0, -40, 136, 0, }, /* 706 */
- { 106, 7, 12, 0, 0, 106, 0, }, /* 707 */
- { 104, 7, 12, 0, 0, 104, 0, }, /* 708 */
- { 104, 21, 12, 0, 0, 104, 0, }, /* 709 */
- { 110, 7, 12, 0, 0, 110, 0, }, /* 710 */
- { 12, 7, 12, 0, 0, 12, 0, }, /* 711 */
- { 81, 7, 12, 0, 0, 81, 0, }, /* 712 */
- { 81, 21, 12, 0, 0, 81, 0, }, /* 713 */
- { 81, 15, 12, 0, 0, 81, 0, }, /* 714 */
- { 120, 7, 12, 0, 0, 120, 0, }, /* 715 */
- { 120, 26, 12, 0, 0, 120, 0, }, /* 716 */
- { 120, 15, 12, 0, 0, 120, 0, }, /* 717 */
- { 116, 7, 12, 0, 0, 116, 0, }, /* 718 */
- { 116, 15, 12, 0, 0, 116, 0, }, /* 719 */
- { 128, 7, 12, 0, 0, 128, 0, }, /* 720 */
- { 128, 15, 12, 0, 0, 128, 0, }, /* 721 */
- { 66, 7, 12, 0, 0, 66, 0, }, /* 722 */
- { 66, 15, 12, 0, 0, 66, 0, }, /* 723 */
- { 66, 21, 12, 0, 0, 66, 0, }, /* 724 */
- { 72, 7, 12, 0, 0, 72, 0, }, /* 725 */
- { 72, 21, 12, 0, 0, 72, 0, }, /* 726 */
- { 98, 7, 12, 0, 0, 98, 0, }, /* 727 */
- { 97, 7, 12, 0, 0, 97, 0, }, /* 728 */
- { 97, 15, 12, 0, 0, 97, 0, }, /* 729 */
- { 31, 7, 12, 0, 0, 31, 0, }, /* 730 */
- { 31, 12, 3, 0, 0, 31, 0, }, /* 731 */
- { 31, 15, 12, 0, 0, 31, 0, }, /* 732 */
- { 31, 21, 12, 0, 0, 31, 0, }, /* 733 */
- { 88, 7, 12, 0, 0, 88, 0, }, /* 734 */
- { 88, 15, 12, 0, 0, 88, 0, }, /* 735 */
- { 88, 21, 12, 0, 0, 88, 0, }, /* 736 */
- { 117, 7, 12, 0, 0, 117, 0, }, /* 737 */
- { 117, 15, 12, 0, 0, 117, 0, }, /* 738 */
- { 112, 7, 12, 0, 0, 112, 0, }, /* 739 */
- { 112, 26, 12, 0, 0, 112, 0, }, /* 740 */
- { 112, 12, 3, 0, 0, 112, 0, }, /* 741 */
- { 112, 15, 12, 0, 0, 112, 0, }, /* 742 */
- { 112, 21, 12, 0, 0, 112, 0, }, /* 743 */
- { 78, 7, 12, 0, 0, 78, 0, }, /* 744 */
- { 78, 21, 12, 0, 0, 78, 0, }, /* 745 */
- { 83, 7, 12, 0, 0, 83, 0, }, /* 746 */
- { 83, 15, 12, 0, 0, 83, 0, }, /* 747 */
- { 82, 7, 12, 0, 0, 82, 0, }, /* 748 */
- { 82, 15, 12, 0, 0, 82, 0, }, /* 749 */
- { 121, 7, 12, 0, 0, 121, 0, }, /* 750 */
- { 121, 21, 12, 0, 0, 121, 0, }, /* 751 */
- { 121, 15, 12, 0, 0, 121, 0, }, /* 752 */
- { 89, 7, 12, 0, 0, 89, 0, }, /* 753 */
- { 130, 9, 12, 0, 64, 130, 0, }, /* 754 */
- { 130, 5, 12, 0, -64, 130, 0, }, /* 755 */
- { 130, 15, 12, 0, 0, 130, 0, }, /* 756 */
- { 144, 7, 12, 0, 0, 144, 0, }, /* 757 */
- { 144, 12, 3, 0, 0, 144, 0, }, /* 758 */
- { 144, 13, 12, 0, 0, 144, 0, }, /* 759 */
- { 1, 15, 12, 0, 0, 1, 0, }, /* 760 */
- { 147, 7, 12, 0, 0, 147, 0, }, /* 761 */
- { 147, 15, 12, 0, 0, 147, 0, }, /* 762 */
- { 148, 7, 12, 0, 0, 148, 0, }, /* 763 */
- { 148, 12, 3, 0, 0, 148, 0, }, /* 764 */
- { 148, 15, 12, 0, 0, 148, 0, }, /* 765 */
- { 148, 21, 12, 0, 0, 148, 0, }, /* 766 */
- { 149, 7, 12, 0, 0, 149, 0, }, /* 767 */
- { 94, 10, 5, 0, 0, 94, 0, }, /* 768 */
- { 94, 12, 3, 0, 0, 94, 0, }, /* 769 */
- { 94, 7, 12, 0, 0, 94, 0, }, /* 770 */
- { 94, 21, 12, 0, 0, 94, 0, }, /* 771 */
- { 94, 15, 12, 0, 0, 94, 0, }, /* 772 */
- { 94, 13, 12, 0, 0, 94, 0, }, /* 773 */
- { 85, 12, 3, 0, 0, 85, 0, }, /* 774 */
- { 85, 10, 5, 0, 0, 85, 0, }, /* 775 */
- { 85, 7, 12, 0, 0, 85, 0, }, /* 776 */
- { 85, 21, 12, 0, 0, 85, 0, }, /* 777 */
- { 85, 1, 4, 0, 0, 85, 0, }, /* 778 */
- { 101, 7, 12, 0, 0, 101, 0, }, /* 779 */
- { 101, 13, 12, 0, 0, 101, 0, }, /* 780 */
- { 96, 12, 3, 0, 0, 96, 0, }, /* 781 */
- { 96, 7, 12, 0, 0, 96, 0, }, /* 782 */
- { 96, 10, 5, 0, 0, 96, 0, }, /* 783 */
- { 96, 13, 12, 0, 0, 96, 0, }, /* 784 */
- { 96, 21, 12, 0, 0, 96, 0, }, /* 785 */
- { 111, 7, 12, 0, 0, 111, 0, }, /* 786 */
- { 111, 12, 3, 0, 0, 111, 0, }, /* 787 */
- { 111, 21, 12, 0, 0, 111, 0, }, /* 788 */
- { 100, 12, 3, 0, 0, 100, 0, }, /* 789 */
- { 100, 10, 5, 0, 0, 100, 0, }, /* 790 */
- { 100, 7, 12, 0, 0, 100, 0, }, /* 791 */
- { 100, 7, 4, 0, 0, 100, 0, }, /* 792 */
- { 100, 21, 12, 0, 0, 100, 0, }, /* 793 */
- { 100, 13, 12, 0, 0, 100, 0, }, /* 794 */
- { 48, 15, 12, 0, 0, 48, 0, }, /* 795 */
- { 108, 7, 12, 0, 0, 108, 0, }, /* 796 */
- { 108, 10, 5, 0, 0, 108, 0, }, /* 797 */
- { 108, 12, 3, 0, 0, 108, 0, }, /* 798 */
- { 108, 21, 12, 0, 0, 108, 0, }, /* 799 */
- { 129, 7, 12, 0, 0, 129, 0, }, /* 800 */
- { 129, 21, 12, 0, 0, 129, 0, }, /* 801 */
- { 109, 7, 12, 0, 0, 109, 0, }, /* 802 */
- { 109, 12, 3, 0, 0, 109, 0, }, /* 803 */
- { 109, 10, 5, 0, 0, 109, 0, }, /* 804 */
- { 109, 13, 12, 0, 0, 109, 0, }, /* 805 */
- { 107, 12, 3, 0, 0, 107, 0, }, /* 806 */
- { 107, 12, 3, 0, 0, -52, 0, }, /* 807 */
- { 107, 10, 5, 0, 0, 107, 0, }, /* 808 */
- { 107, 10, 5, 0, 0, -52, 0, }, /* 809 */
- { 107, 7, 12, 0, 0, 107, 0, }, /* 810 */
- { 28, 12, 3, 0, 0, -52, 0, }, /* 811 */
- { 107, 10, 3, 0, 0, 107, 0, }, /* 812 */
- { 135, 7, 12, 0, 0, 135, 0, }, /* 813 */
- { 135, 10, 5, 0, 0, 135, 0, }, /* 814 */
- { 135, 12, 3, 0, 0, 135, 0, }, /* 815 */
- { 135, 21, 12, 0, 0, 135, 0, }, /* 816 */
- { 135, 13, 12, 0, 0, 135, 0, }, /* 817 */
- { 124, 7, 12, 0, 0, 124, 0, }, /* 818 */
- { 124, 10, 3, 0, 0, 124, 0, }, /* 819 */
- { 124, 10, 5, 0, 0, 124, 0, }, /* 820 */
- { 124, 12, 3, 0, 0, 124, 0, }, /* 821 */
- { 124, 21, 12, 0, 0, 124, 0, }, /* 822 */
- { 124, 13, 12, 0, 0, 124, 0, }, /* 823 */
- { 123, 7, 12, 0, 0, 123, 0, }, /* 824 */
- { 123, 10, 3, 0, 0, 123, 0, }, /* 825 */
- { 123, 10, 5, 0, 0, 123, 0, }, /* 826 */
- { 123, 12, 3, 0, 0, 123, 0, }, /* 827 */
- { 123, 21, 12, 0, 0, 123, 0, }, /* 828 */
- { 114, 7, 12, 0, 0, 114, 0, }, /* 829 */
- { 114, 10, 5, 0, 0, 114, 0, }, /* 830 */
- { 114, 12, 3, 0, 0, 114, 0, }, /* 831 */
- { 114, 21, 12, 0, 0, 114, 0, }, /* 832 */
- { 114, 13, 12, 0, 0, 114, 0, }, /* 833 */
- { 102, 7, 12, 0, 0, 102, 0, }, /* 834 */
- { 102, 12, 3, 0, 0, 102, 0, }, /* 835 */
- { 102, 10, 5, 0, 0, 102, 0, }, /* 836 */
- { 102, 13, 12, 0, 0, 102, 0, }, /* 837 */
- { 126, 7, 12, 0, 0, 126, 0, }, /* 838 */
- { 126, 12, 3, 0, 0, 126, 0, }, /* 839 */
- { 126, 10, 5, 0, 0, 126, 0, }, /* 840 */
- { 126, 13, 12, 0, 0, 126, 0, }, /* 841 */
- { 126, 15, 12, 0, 0, 126, 0, }, /* 842 */
- { 126, 21, 12, 0, 0, 126, 0, }, /* 843 */
- { 126, 26, 12, 0, 0, 126, 0, }, /* 844 */
- { 142, 7, 12, 0, 0, 142, 0, }, /* 845 */
- { 142, 10, 5, 0, 0, 142, 0, }, /* 846 */
- { 142, 12, 3, 0, 0, 142, 0, }, /* 847 */
- { 142, 21, 12, 0, 0, 142, 0, }, /* 848 */
- { 125, 9, 12, 0, 32, 125, 0, }, /* 849 */
- { 125, 5, 12, 0, -32, 125, 0, }, /* 850 */
- { 125, 13, 12, 0, 0, 125, 0, }, /* 851 */
- { 125, 15, 12, 0, 0, 125, 0, }, /* 852 */
- { 125, 7, 12, 0, 0, 125, 0, }, /* 853 */
- { 150, 7, 12, 0, 0, 150, 0, }, /* 854 */
- { 150, 10, 5, 0, 0, 150, 0, }, /* 855 */
- { 150, 12, 3, 0, 0, 150, 0, }, /* 856 */
- { 150, 21, 12, 0, 0, 150, 0, }, /* 857 */
- { 141, 7, 12, 0, 0, 141, 0, }, /* 858 */
- { 141, 12, 3, 0, 0, 141, 0, }, /* 859 */
- { 141, 10, 5, 0, 0, 141, 0, }, /* 860 */
- { 141, 7, 4, 0, 0, 141, 0, }, /* 861 */
- { 141, 21, 12, 0, 0, 141, 0, }, /* 862 */
- { 140, 7, 12, 0, 0, 140, 0, }, /* 863 */
- { 140, 12, 3, 0, 0, 140, 0, }, /* 864 */
- { 140, 10, 5, 0, 0, 140, 0, }, /* 865 */
- { 140, 7, 4, 0, 0, 140, 0, }, /* 866 */
- { 140, 21, 12, 0, 0, 140, 0, }, /* 867 */
- { 122, 7, 12, 0, 0, 122, 0, }, /* 868 */
- { 133, 7, 12, 0, 0, 133, 0, }, /* 869 */
- { 133, 10, 5, 0, 0, 133, 0, }, /* 870 */
- { 133, 12, 3, 0, 0, 133, 0, }, /* 871 */
- { 133, 21, 12, 0, 0, 133, 0, }, /* 872 */
- { 133, 13, 12, 0, 0, 133, 0, }, /* 873 */
- { 133, 15, 12, 0, 0, 133, 0, }, /* 874 */
- { 134, 21, 12, 0, 0, 134, 0, }, /* 875 */
- { 134, 7, 12, 0, 0, 134, 0, }, /* 876 */
- { 134, 12, 3, 0, 0, 134, 0, }, /* 877 */
- { 134, 10, 5, 0, 0, 134, 0, }, /* 878 */
- { 138, 7, 12, 0, 0, 138, 0, }, /* 879 */
- { 138, 12, 3, 0, 0, 138, 0, }, /* 880 */
- { 138, 7, 4, 0, 0, 138, 0, }, /* 881 */
- { 138, 13, 12, 0, 0, 138, 0, }, /* 882 */
- { 143, 7, 12, 0, 0, 143, 0, }, /* 883 */
- { 143, 10, 5, 0, 0, 143, 0, }, /* 884 */
- { 143, 12, 3, 0, 0, 143, 0, }, /* 885 */
- { 143, 13, 12, 0, 0, 143, 0, }, /* 886 */
- { 145, 7, 12, 0, 0, 145, 0, }, /* 887 */
- { 145, 12, 3, 0, 0, 145, 0, }, /* 888 */
- { 145, 10, 5, 0, 0, 145, 0, }, /* 889 */
- { 145, 21, 12, 0, 0, 145, 0, }, /* 890 */
- { 54, 15, 12, 0, 0, 54, 0, }, /* 891 */
- { 54, 21, 12, 0, 0, 54, 0, }, /* 892 */
- { 63, 7, 12, 0, 0, 63, 0, }, /* 893 */
- { 63, 14, 12, 0, 0, 63, 0, }, /* 894 */
- { 63, 21, 12, 0, 0, 63, 0, }, /* 895 */
- { 80, 7, 12, 0, 0, 80, 0, }, /* 896 */
- { 80, 1, 2, 0, 0, 80, 0, }, /* 897 */
- { 127, 7, 12, 0, 0, 127, 0, }, /* 898 */
- { 115, 7, 12, 0, 0, 115, 0, }, /* 899 */
- { 115, 13, 12, 0, 0, 115, 0, }, /* 900 */
- { 115, 21, 12, 0, 0, 115, 0, }, /* 901 */
- { 103, 7, 12, 0, 0, 103, 0, }, /* 902 */
- { 103, 12, 3, 0, 0, 103, 0, }, /* 903 */
- { 103, 21, 12, 0, 0, 103, 0, }, /* 904 */
- { 119, 7, 12, 0, 0, 119, 0, }, /* 905 */
- { 119, 12, 3, 0, 0, 119, 0, }, /* 906 */
- { 119, 21, 12, 0, 0, 119, 0, }, /* 907 */
- { 119, 26, 12, 0, 0, 119, 0, }, /* 908 */
- { 119, 6, 12, 0, 0, 119, 0, }, /* 909 */
- { 119, 13, 12, 0, 0, 119, 0, }, /* 910 */
- { 119, 15, 12, 0, 0, 119, 0, }, /* 911 */
- { 146, 9, 12, 0, 32, 146, 0, }, /* 912 */
- { 146, 5, 12, 0, -32, 146, 0, }, /* 913 */
- { 146, 15, 12, 0, 0, 146, 0, }, /* 914 */
- { 146, 21, 12, 0, 0, 146, 0, }, /* 915 */
- { 99, 7, 12, 0, 0, 99, 0, }, /* 916 */
- { 99, 12, 3, 0, 0, 99, 0, }, /* 917 */
- { 99, 10, 5, 0, 0, 99, 0, }, /* 918 */
- { 99, 6, 12, 0, 0, 99, 0, }, /* 919 */
- { 137, 6, 12, 0, 0, 137, 0, }, /* 920 */
- { 139, 6, 12, 0, 0, 139, 0, }, /* 921 */
- { 137, 7, 12, 0, 0, 137, 0, }, /* 922 */
- { 139, 7, 12, 0, 0, 139, 0, }, /* 923 */
- { 105, 7, 12, 0, 0, 105, 0, }, /* 924 */
- { 105, 26, 12, 0, 0, 105, 0, }, /* 925 */
- { 105, 12, 3, 0, 0, 105, 0, }, /* 926 */
- { 105, 21, 12, 0, 0, 105, 0, }, /* 927 */
- { 10, 1, 2, 0, 0, 105, 0, }, /* 928 */
- { 10, 10, 3, 0, 0, 10, 0, }, /* 929 */
- { 10, 10, 5, 0, 0, 10, 0, }, /* 930 */
- { 20, 12, 3, 0, 0, 20, 0, }, /* 931 */
- { 131, 26, 12, 0, 0, 131, 0, }, /* 932 */
- { 131, 12, 3, 0, 0, 131, 0, }, /* 933 */
- { 131, 21, 12, 0, 0, 131, 0, }, /* 934 */
- { 18, 12, 3, 0, 0, 18, 0, }, /* 935 */
- { 151, 7, 12, 0, 0, 151, 0, }, /* 936 */
- { 151, 12, 3, 0, 0, 151, 0, }, /* 937 */
- { 151, 6, 12, 0, 0, 151, 0, }, /* 938 */
- { 151, 13, 12, 0, 0, 151, 0, }, /* 939 */
- { 151, 26, 12, 0, 0, 151, 0, }, /* 940 */
- { 152, 7, 12, 0, 0, 152, 0, }, /* 941 */
- { 152, 12, 3, 0, 0, 152, 0, }, /* 942 */
- { 152, 13, 12, 0, 0, 152, 0, }, /* 943 */
- { 152, 23, 12, 0, 0, 152, 0, }, /* 944 */
- { 113, 7, 12, 0, 0, 113, 0, }, /* 945 */
- { 113, 15, 12, 0, 0, 113, 0, }, /* 946 */
- { 113, 12, 3, 0, 0, 113, 0, }, /* 947 */
- { 132, 9, 12, 0, 34, 132, 0, }, /* 948 */
- { 132, 5, 12, 0, -34, 132, 0, }, /* 949 */
- { 132, 12, 3, 0, 0, 132, 0, }, /* 950 */
- { 132, 6, 12, 0, 0, 132, 0, }, /* 951 */
- { 132, 13, 12, 0, 0, 132, 0, }, /* 952 */
- { 132, 21, 12, 0, 0, 132, 0, }, /* 953 */
- { 0, 2, 14, 0, 0, 0, 0, }, /* 954 */
- { 10, 26, 11, 0, 0, 10, 0, }, /* 955 */
- { 27, 26, 12, 0, 0, 27, 0, }, /* 956 */
- { 10, 24, 3, 0, 0, 10, 0, }, /* 957 */
- { 10, 1, 3, 0, 0, 10, 0, }, /* 958 */
+ { 10, 24, 12, 0, 0, -61, 0, }, /* 605 */
+ { 34, 9, 12, 0, -35332, 34, 0, }, /* 606 */
+ { 34, 9, 12, 0, -42280, 34, 0, }, /* 607 */
+ { 34, 5, 12, 0, 48, 34, 0, }, /* 608 */
+ { 34, 9, 12, 0, -42308, 34, 0, }, /* 609 */
+ { 34, 9, 12, 0, -42319, 34, 0, }, /* 610 */
+ { 34, 9, 12, 0, -42315, 34, 0, }, /* 611 */
+ { 34, 9, 12, 0, -42305, 34, 0, }, /* 612 */
+ { 34, 9, 12, 0, -42258, 34, 0, }, /* 613 */
+ { 34, 9, 12, 0, -42282, 34, 0, }, /* 614 */
+ { 34, 9, 12, 0, -42261, 34, 0, }, /* 615 */
+ { 34, 9, 12, 0, 928, 34, 0, }, /* 616 */
+ { 34, 9, 12, 0, -48, 34, 0, }, /* 617 */
+ { 34, 9, 12, 0, -42307, 34, 0, }, /* 618 */
+ { 34, 9, 12, 0, -35384, 34, 0, }, /* 619 */
+ { 49, 7, 12, 0, 0, 49, 0, }, /* 620 */
+ { 49, 12, 3, 0, 0, 49, 0, }, /* 621 */
+ { 49, 10, 5, 0, 0, 49, 0, }, /* 622 */
+ { 49, 26, 12, 0, 0, 49, 0, }, /* 623 */
+ { 10, 15, 12, 0, 0, -224, 0, }, /* 624 */
+ { 10, 15, 12, 0, 0, -210, 0, }, /* 625 */
+ { 10, 26, 12, 0, 0, -171, 0, }, /* 626 */
+ { 10, 23, 12, 0, 0, -171, 0, }, /* 627 */
+ { 65, 7, 12, 0, 0, 65, 0, }, /* 628 */
+ { 65, 21, 12, 0, 0, 65, 0, }, /* 629 */
+ { 75, 10, 5, 0, 0, 75, 0, }, /* 630 */
+ { 75, 7, 12, 0, 0, 75, 0, }, /* 631 */
+ { 75, 12, 3, 0, 0, 75, 0, }, /* 632 */
+ { 75, 21, 12, 0, 0, 75, 0, }, /* 633 */
+ { 75, 13, 12, 0, 0, 75, 0, }, /* 634 */
+ { 15, 12, 3, 0, 0, -13, 0, }, /* 635 */
+ { 15, 7, 12, 0, 0, -46, 0, }, /* 636 */
+ { 69, 13, 12, 0, 0, 69, 0, }, /* 637 */
+ { 69, 7, 12, 0, 0, 69, 0, }, /* 638 */
+ { 69, 12, 3, 0, 0, 69, 0, }, /* 639 */
+ { 10, 21, 12, 0, 0, -108, 0, }, /* 640 */
+ { 69, 21, 12, 0, 0, 69, 0, }, /* 641 */
+ { 74, 7, 12, 0, 0, 74, 0, }, /* 642 */
+ { 74, 12, 3, 0, 0, 74, 0, }, /* 643 */
+ { 74, 10, 5, 0, 0, 74, 0, }, /* 644 */
+ { 74, 21, 12, 0, 0, 74, 0, }, /* 645 */
+ { 84, 12, 3, 0, 0, 84, 0, }, /* 646 */
+ { 84, 10, 5, 0, 0, 84, 0, }, /* 647 */
+ { 84, 7, 12, 0, 0, 84, 0, }, /* 648 */
+ { 84, 21, 12, 0, 0, 84, 0, }, /* 649 */
+ { 10, 6, 12, 0, 0, -19, 0, }, /* 650 */
+ { 84, 13, 12, 0, 0, 84, 0, }, /* 651 */
+ { 39, 6, 12, 0, 0, 39, 0, }, /* 652 */
+ { 68, 7, 12, 0, 0, 68, 0, }, /* 653 */
+ { 68, 12, 3, 0, 0, 68, 0, }, /* 654 */
+ { 68, 10, 5, 0, 0, 68, 0, }, /* 655 */
+ { 68, 13, 12, 0, 0, 68, 0, }, /* 656 */
+ { 68, 21, 12, 0, 0, 68, 0, }, /* 657 */
+ { 92, 7, 12, 0, 0, 92, 0, }, /* 658 */
+ { 92, 12, 3, 0, 0, 92, 0, }, /* 659 */
+ { 92, 6, 12, 0, 0, 92, 0, }, /* 660 */
+ { 92, 21, 12, 0, 0, 92, 0, }, /* 661 */
+ { 87, 7, 12, 0, 0, 87, 0, }, /* 662 */
+ { 87, 10, 5, 0, 0, 87, 0, }, /* 663 */
+ { 87, 12, 3, 0, 0, 87, 0, }, /* 664 */
+ { 87, 21, 12, 0, 0, 87, 0, }, /* 665 */
+ { 87, 6, 12, 0, 0, 87, 0, }, /* 666 */
+ { 34, 5, 12, 0, -928, 34, 0, }, /* 667 */
+ { 9, 5, 12, 0, -38864, 9, 0, }, /* 668 */
+ { 87, 13, 12, 0, 0, 87, 0, }, /* 669 */
+ { 24, 7, 9, 0, 0, 24, 0, }, /* 670 */
+ { 24, 7, 10, 0, 0, 24, 0, }, /* 671 */
+ { 0, 4, 12, 0, 0, 0, 0, }, /* 672 */
+ { 0, 3, 12, 0, 0, 0, 0, }, /* 673 */
+ { 26, 25, 12, 0, 0, 26, 0, }, /* 674 */
+ { 1, 24, 12, 0, 0, 1, 0, }, /* 675 */
+ { 1, 7, 12, 0, 0, -10, 0, }, /* 676 */
+ { 1, 26, 12, 0, 0, -10, 0, }, /* 677 */
+ { 10, 6, 3, 0, 0, -64, 0, }, /* 678 */
+ { 36, 7, 12, 0, 0, 36, 0, }, /* 679 */
+ { 10, 21, 12, 0, 0, -22, 0, }, /* 680 */
+ { 10, 15, 12, 0, 0, -92, 0, }, /* 681 */
+ { 10, 26, 12, 0, 0, -22, 0, }, /* 682 */
+ { 20, 14, 12, 0, 0, 20, 0, }, /* 683 */
+ { 20, 15, 12, 0, 0, 20, 0, }, /* 684 */
+ { 20, 26, 12, 0, 0, 20, 0, }, /* 685 */
+ { 71, 7, 12, 0, 0, 71, 0, }, /* 686 */
+ { 67, 7, 12, 0, 0, 67, 0, }, /* 687 */
+ { 28, 12, 3, 0, 0, -1, 0, }, /* 688 */
+ { 10, 15, 12, 0, 0, -1, 0, }, /* 689 */
+ { 42, 7, 12, 0, 0, 42, 0, }, /* 690 */
+ { 42, 15, 12, 0, 0, 42, 0, }, /* 691 */
+ { 19, 7, 12, 0, 0, 19, 0, }, /* 692 */
+ { 19, 14, 12, 0, 0, 19, 0, }, /* 693 */
+ { 118, 7, 12, 0, 0, 118, 0, }, /* 694 */
+ { 118, 12, 3, 0, 0, 118, 0, }, /* 695 */
+ { 60, 7, 12, 0, 0, 60, 0, }, /* 696 */
+ { 60, 21, 12, 0, 0, 60, 0, }, /* 697 */
+ { 43, 7, 12, 0, 0, 43, 0, }, /* 698 */
+ { 43, 21, 12, 0, 0, 43, 0, }, /* 699 */
+ { 43, 14, 12, 0, 0, 43, 0, }, /* 700 */
+ { 14, 9, 12, 0, 40, 14, 0, }, /* 701 */
+ { 14, 5, 12, 0, -40, 14, 0, }, /* 702 */
+ { 47, 7, 12, 0, 0, 47, 0, }, /* 703 */
+ { 45, 7, 12, 0, 0, 45, 0, }, /* 704 */
+ { 45, 13, 12, 0, 0, 45, 0, }, /* 705 */
+ { 136, 9, 12, 0, 40, 136, 0, }, /* 706 */
+ { 136, 5, 12, 0, -40, 136, 0, }, /* 707 */
+ { 106, 7, 12, 0, 0, 106, 0, }, /* 708 */
+ { 104, 7, 12, 0, 0, 104, 0, }, /* 709 */
+ { 104, 21, 12, 0, 0, 104, 0, }, /* 710 */
+ { 110, 7, 12, 0, 0, 110, 0, }, /* 711 */
+ { 12, 7, 12, 0, 0, 12, 0, }, /* 712 */
+ { 81, 7, 12, 0, 0, 81, 0, }, /* 713 */
+ { 81, 21, 12, 0, 0, 81, 0, }, /* 714 */
+ { 81, 15, 12, 0, 0, 81, 0, }, /* 715 */
+ { 120, 7, 12, 0, 0, 120, 0, }, /* 716 */
+ { 120, 26, 12, 0, 0, 120, 0, }, /* 717 */
+ { 120, 15, 12, 0, 0, 120, 0, }, /* 718 */
+ { 116, 7, 12, 0, 0, 116, 0, }, /* 719 */
+ { 116, 15, 12, 0, 0, 116, 0, }, /* 720 */
+ { 128, 7, 12, 0, 0, 128, 0, }, /* 721 */
+ { 128, 15, 12, 0, 0, 128, 0, }, /* 722 */
+ { 66, 7, 12, 0, 0, 66, 0, }, /* 723 */
+ { 66, 15, 12, 0, 0, 66, 0, }, /* 724 */
+ { 66, 21, 12, 0, 0, 66, 0, }, /* 725 */
+ { 72, 7, 12, 0, 0, 72, 0, }, /* 726 */
+ { 72, 21, 12, 0, 0, 72, 0, }, /* 727 */
+ { 98, 7, 12, 0, 0, 98, 0, }, /* 728 */
+ { 97, 7, 12, 0, 0, 97, 0, }, /* 729 */
+ { 97, 15, 12, 0, 0, 97, 0, }, /* 730 */
+ { 31, 7, 12, 0, 0, 31, 0, }, /* 731 */
+ { 31, 12, 3, 0, 0, 31, 0, }, /* 732 */
+ { 31, 15, 12, 0, 0, 31, 0, }, /* 733 */
+ { 31, 21, 12, 0, 0, 31, 0, }, /* 734 */
+ { 88, 7, 12, 0, 0, 88, 0, }, /* 735 */
+ { 88, 15, 12, 0, 0, 88, 0, }, /* 736 */
+ { 88, 21, 12, 0, 0, 88, 0, }, /* 737 */
+ { 117, 7, 12, 0, 0, 117, 0, }, /* 738 */
+ { 117, 15, 12, 0, 0, 117, 0, }, /* 739 */
+ { 112, 7, 12, 0, 0, 112, 0, }, /* 740 */
+ { 112, 26, 12, 0, 0, 112, 0, }, /* 741 */
+ { 112, 12, 3, 0, 0, 112, 0, }, /* 742 */
+ { 112, 15, 12, 0, 0, 112, 0, }, /* 743 */
+ { 112, 21, 12, 0, 0, 112, 0, }, /* 744 */
+ { 78, 7, 12, 0, 0, 78, 0, }, /* 745 */
+ { 78, 21, 12, 0, 0, 78, 0, }, /* 746 */
+ { 83, 7, 12, 0, 0, 83, 0, }, /* 747 */
+ { 83, 15, 12, 0, 0, 83, 0, }, /* 748 */
+ { 82, 7, 12, 0, 0, 82, 0, }, /* 749 */
+ { 82, 15, 12, 0, 0, 82, 0, }, /* 750 */
+ { 121, 7, 12, 0, 0, 121, 0, }, /* 751 */
+ { 121, 21, 12, 0, 0, 121, 0, }, /* 752 */
+ { 121, 15, 12, 0, 0, 121, 0, }, /* 753 */
+ { 89, 7, 12, 0, 0, 89, 0, }, /* 754 */
+ { 130, 9, 12, 0, 64, 130, 0, }, /* 755 */
+ { 130, 5, 12, 0, -64, 130, 0, }, /* 756 */
+ { 130, 15, 12, 0, 0, 130, 0, }, /* 757 */
+ { 144, 7, 12, 0, 0, 144, 0, }, /* 758 */
+ { 144, 12, 3, 0, 0, 144, 0, }, /* 759 */
+ { 144, 13, 12, 0, 0, 144, 0, }, /* 760 */
+ { 1, 15, 12, 0, 0, 1, 0, }, /* 761 */
+ { 156, 7, 12, 0, 0, 156, 0, }, /* 762 */
+ { 156, 12, 3, 0, 0, 156, 0, }, /* 763 */
+ { 156, 17, 12, 0, 0, 156, 0, }, /* 764 */
+ { 147, 7, 12, 0, 0, 147, 0, }, /* 765 */
+ { 147, 15, 12, 0, 0, 147, 0, }, /* 766 */
+ { 148, 7, 12, 0, 0, 148, 0, }, /* 767 */
+ { 148, 12, 3, 0, 0, 148, 0, }, /* 768 */
+ { 148, 15, 12, 0, 0, 148, 0, }, /* 769 */
+ { 148, 21, 12, 0, 0, 148, 0, }, /* 770 */
+ { 153, 7, 12, 0, 0, 153, 0, }, /* 771 */
+ { 153, 15, 12, 0, 0, 153, 0, }, /* 772 */
+ { 149, 7, 12, 0, 0, 149, 0, }, /* 773 */
+ { 94, 10, 5, 0, 0, 94, 0, }, /* 774 */
+ { 94, 12, 3, 0, 0, 94, 0, }, /* 775 */
+ { 94, 7, 12, 0, 0, 94, 0, }, /* 776 */
+ { 94, 21, 12, 0, 0, 94, 0, }, /* 777 */
+ { 94, 15, 12, 0, 0, 94, 0, }, /* 778 */
+ { 94, 13, 12, 0, 0, 94, 0, }, /* 779 */
+ { 85, 12, 3, 0, 0, 85, 0, }, /* 780 */
+ { 85, 10, 5, 0, 0, 85, 0, }, /* 781 */
+ { 85, 7, 12, 0, 0, 85, 0, }, /* 782 */
+ { 85, 21, 12, 0, 0, 85, 0, }, /* 783 */
+ { 85, 1, 4, 0, 0, 85, 0, }, /* 784 */
+ { 101, 7, 12, 0, 0, 101, 0, }, /* 785 */
+ { 101, 13, 12, 0, 0, 101, 0, }, /* 786 */
+ { 96, 12, 3, 0, 0, 96, 0, }, /* 787 */
+ { 96, 7, 12, 0, 0, 96, 0, }, /* 788 */
+ { 96, 10, 5, 0, 0, 96, 0, }, /* 789 */
+ { 96, 13, 12, 0, 0, 96, 0, }, /* 790 */
+ { 96, 21, 12, 0, 0, 96, 0, }, /* 791 */
+ { 111, 7, 12, 0, 0, 111, 0, }, /* 792 */
+ { 111, 12, 3, 0, 0, 111, 0, }, /* 793 */
+ { 111, 21, 12, 0, 0, 111, 0, }, /* 794 */
+ { 100, 12, 3, 0, 0, 100, 0, }, /* 795 */
+ { 100, 10, 5, 0, 0, 100, 0, }, /* 796 */
+ { 100, 7, 12, 0, 0, 100, 0, }, /* 797 */
+ { 100, 7, 4, 0, 0, 100, 0, }, /* 798 */
+ { 100, 21, 12, 0, 0, 100, 0, }, /* 799 */
+ { 100, 13, 12, 0, 0, 100, 0, }, /* 800 */
+ { 48, 15, 12, 0, 0, 48, 0, }, /* 801 */
+ { 108, 7, 12, 0, 0, 108, 0, }, /* 802 */
+ { 108, 10, 5, 0, 0, 108, 0, }, /* 803 */
+ { 108, 12, 3, 0, 0, 108, 0, }, /* 804 */
+ { 108, 21, 12, 0, 0, 108, 0, }, /* 805 */
+ { 129, 7, 12, 0, 0, 129, 0, }, /* 806 */
+ { 129, 21, 12, 0, 0, 129, 0, }, /* 807 */
+ { 109, 7, 12, 0, 0, 109, 0, }, /* 808 */
+ { 109, 12, 3, 0, 0, 109, 0, }, /* 809 */
+ { 109, 10, 5, 0, 0, 109, 0, }, /* 810 */
+ { 109, 13, 12, 0, 0, 109, 0, }, /* 811 */
+ { 107, 12, 3, 0, 0, 107, 0, }, /* 812 */
+ { 107, 12, 3, 0, 0, -52, 0, }, /* 813 */
+ { 107, 10, 5, 0, 0, 107, 0, }, /* 814 */
+ { 107, 10, 5, 0, 0, -52, 0, }, /* 815 */
+ { 107, 7, 12, 0, 0, 107, 0, }, /* 816 */
+ { 28, 12, 3, 0, 0, -52, 0, }, /* 817 */
+ { 107, 10, 3, 0, 0, 107, 0, }, /* 818 */
+ { 135, 7, 12, 0, 0, 135, 0, }, /* 819 */
+ { 135, 10, 5, 0, 0, 135, 0, }, /* 820 */
+ { 135, 12, 3, 0, 0, 135, 0, }, /* 821 */
+ { 135, 21, 12, 0, 0, 135, 0, }, /* 822 */
+ { 135, 13, 12, 0, 0, 135, 0, }, /* 823 */
+ { 124, 7, 12, 0, 0, 124, 0, }, /* 824 */
+ { 124, 10, 3, 0, 0, 124, 0, }, /* 825 */
+ { 124, 10, 5, 0, 0, 124, 0, }, /* 826 */
+ { 124, 12, 3, 0, 0, 124, 0, }, /* 827 */
+ { 124, 21, 12, 0, 0, 124, 0, }, /* 828 */
+ { 124, 13, 12, 0, 0, 124, 0, }, /* 829 */
+ { 123, 7, 12, 0, 0, 123, 0, }, /* 830 */
+ { 123, 10, 3, 0, 0, 123, 0, }, /* 831 */
+ { 123, 10, 5, 0, 0, 123, 0, }, /* 832 */
+ { 123, 12, 3, 0, 0, 123, 0, }, /* 833 */
+ { 123, 21, 12, 0, 0, 123, 0, }, /* 834 */
+ { 114, 7, 12, 0, 0, 114, 0, }, /* 835 */
+ { 114, 10, 5, 0, 0, 114, 0, }, /* 836 */
+ { 114, 12, 3, 0, 0, 114, 0, }, /* 837 */
+ { 114, 21, 12, 0, 0, 114, 0, }, /* 838 */
+ { 114, 13, 12, 0, 0, 114, 0, }, /* 839 */
+ { 102, 7, 12, 0, 0, 102, 0, }, /* 840 */
+ { 102, 12, 3, 0, 0, 102, 0, }, /* 841 */
+ { 102, 10, 5, 0, 0, 102, 0, }, /* 842 */
+ { 102, 13, 12, 0, 0, 102, 0, }, /* 843 */
+ { 126, 7, 12, 0, 0, 126, 0, }, /* 844 */
+ { 126, 12, 3, 0, 0, 126, 0, }, /* 845 */
+ { 126, 10, 5, 0, 0, 126, 0, }, /* 846 */
+ { 126, 13, 12, 0, 0, 126, 0, }, /* 847 */
+ { 126, 15, 12, 0, 0, 126, 0, }, /* 848 */
+ { 126, 21, 12, 0, 0, 126, 0, }, /* 849 */
+ { 126, 26, 12, 0, 0, 126, 0, }, /* 850 */
+ { 142, 7, 12, 0, 0, 142, 0, }, /* 851 */
+ { 142, 10, 5, 0, 0, 142, 0, }, /* 852 */
+ { 142, 12, 3, 0, 0, 142, 0, }, /* 853 */
+ { 142, 21, 12, 0, 0, 142, 0, }, /* 854 */
+ { 125, 9, 12, 0, 32, 125, 0, }, /* 855 */
+ { 125, 5, 12, 0, -32, 125, 0, }, /* 856 */
+ { 125, 13, 12, 0, 0, 125, 0, }, /* 857 */
+ { 125, 15, 12, 0, 0, 125, 0, }, /* 858 */
+ { 125, 7, 12, 0, 0, 125, 0, }, /* 859 */
+ { 154, 7, 12, 0, 0, 154, 0, }, /* 860 */
+ { 154, 10, 3, 0, 0, 154, 0, }, /* 861 */
+ { 154, 10, 5, 0, 0, 154, 0, }, /* 862 */
+ { 154, 12, 3, 0, 0, 154, 0, }, /* 863 */
+ { 154, 7, 4, 0, 0, 154, 0, }, /* 864 */
+ { 154, 21, 12, 0, 0, 154, 0, }, /* 865 */
+ { 154, 13, 12, 0, 0, 154, 0, }, /* 866 */
+ { 150, 7, 12, 0, 0, 150, 0, }, /* 867 */
+ { 150, 10, 5, 0, 0, 150, 0, }, /* 868 */
+ { 150, 12, 3, 0, 0, 150, 0, }, /* 869 */
+ { 150, 21, 12, 0, 0, 150, 0, }, /* 870 */
+ { 141, 7, 12, 0, 0, 141, 0, }, /* 871 */
+ { 141, 12, 3, 0, 0, 141, 0, }, /* 872 */
+ { 141, 10, 5, 0, 0, 141, 0, }, /* 873 */
+ { 141, 7, 4, 0, 0, 141, 0, }, /* 874 */
+ { 141, 21, 12, 0, 0, 141, 0, }, /* 875 */
+ { 140, 7, 12, 0, 0, 140, 0, }, /* 876 */
+ { 140, 12, 3, 0, 0, 140, 0, }, /* 877 */
+ { 140, 10, 5, 0, 0, 140, 0, }, /* 878 */
+ { 140, 7, 4, 0, 0, 140, 0, }, /* 879 */
+ { 140, 21, 12, 0, 0, 140, 0, }, /* 880 */
+ { 122, 7, 12, 0, 0, 122, 0, }, /* 881 */
+ { 133, 7, 12, 0, 0, 133, 0, }, /* 882 */
+ { 133, 10, 5, 0, 0, 133, 0, }, /* 883 */
+ { 133, 12, 3, 0, 0, 133, 0, }, /* 884 */
+ { 133, 21, 12, 0, 0, 133, 0, }, /* 885 */
+ { 133, 13, 12, 0, 0, 133, 0, }, /* 886 */
+ { 133, 15, 12, 0, 0, 133, 0, }, /* 887 */
+ { 134, 21, 12, 0, 0, 134, 0, }, /* 888 */
+ { 134, 7, 12, 0, 0, 134, 0, }, /* 889 */
+ { 134, 12, 3, 0, 0, 134, 0, }, /* 890 */
+ { 134, 10, 5, 0, 0, 134, 0, }, /* 891 */
+ { 138, 7, 12, 0, 0, 138, 0, }, /* 892 */
+ { 138, 12, 3, 0, 0, 138, 0, }, /* 893 */
+ { 138, 7, 4, 0, 0, 138, 0, }, /* 894 */
+ { 138, 13, 12, 0, 0, 138, 0, }, /* 895 */
+ { 143, 7, 12, 0, 0, 143, 0, }, /* 896 */
+ { 143, 10, 5, 0, 0, 143, 0, }, /* 897 */
+ { 143, 12, 3, 0, 0, 143, 0, }, /* 898 */
+ { 143, 13, 12, 0, 0, 143, 0, }, /* 899 */
+ { 145, 7, 12, 0, 0, 145, 0, }, /* 900 */
+ { 145, 12, 3, 0, 0, 145, 0, }, /* 901 */
+ { 145, 10, 5, 0, 0, 145, 0, }, /* 902 */
+ { 145, 21, 12, 0, 0, 145, 0, }, /* 903 */
+ { 54, 15, 12, 0, 0, 54, 0, }, /* 904 */
+ { 54, 21, 12, 0, 0, 54, 0, }, /* 905 */
+ { 63, 7, 12, 0, 0, 63, 0, }, /* 906 */
+ { 63, 14, 12, 0, 0, 63, 0, }, /* 907 */
+ { 63, 21, 12, 0, 0, 63, 0, }, /* 908 */
+ { 80, 7, 12, 0, 0, 80, 0, }, /* 909 */
+ { 80, 1, 2, 0, 0, 80, 0, }, /* 910 */
+ { 127, 7, 12, 0, 0, 127, 0, }, /* 911 */
+ { 115, 7, 12, 0, 0, 115, 0, }, /* 912 */
+ { 115, 13, 12, 0, 0, 115, 0, }, /* 913 */
+ { 115, 21, 12, 0, 0, 115, 0, }, /* 914 */
+ { 103, 7, 12, 0, 0, 103, 0, }, /* 915 */
+ { 103, 12, 3, 0, 0, 103, 0, }, /* 916 */
+ { 103, 21, 12, 0, 0, 103, 0, }, /* 917 */
+ { 119, 7, 12, 0, 0, 119, 0, }, /* 918 */
+ { 119, 12, 3, 0, 0, 119, 0, }, /* 919 */
+ { 119, 21, 12, 0, 0, 119, 0, }, /* 920 */
+ { 119, 26, 12, 0, 0, 119, 0, }, /* 921 */
+ { 119, 6, 12, 0, 0, 119, 0, }, /* 922 */
+ { 119, 13, 12, 0, 0, 119, 0, }, /* 923 */
+ { 119, 15, 12, 0, 0, 119, 0, }, /* 924 */
+ { 146, 9, 12, 0, 32, 146, 0, }, /* 925 */
+ { 146, 5, 12, 0, -32, 146, 0, }, /* 926 */
+ { 146, 15, 12, 0, 0, 146, 0, }, /* 927 */
+ { 146, 21, 12, 0, 0, 146, 0, }, /* 928 */
+ { 99, 7, 12, 0, 0, 99, 0, }, /* 929 */
+ { 99, 12, 3, 0, 0, 99, 0, }, /* 930 */
+ { 99, 10, 5, 0, 0, 99, 0, }, /* 931 */
+ { 99, 6, 12, 0, 0, 99, 0, }, /* 932 */
+ { 137, 6, 12, 0, 0, 137, 0, }, /* 933 */
+ { 139, 6, 12, 0, 0, 139, 0, }, /* 934 */
+ { 155, 12, 3, 0, 0, 155, 0, }, /* 935 */
+ { 23, 10, 5, 0, 0, 23, 0, }, /* 936 */
+ { 137, 7, 12, 0, 0, 137, 0, }, /* 937 */
+ { 155, 7, 12, 0, 0, 155, 0, }, /* 938 */
+ { 139, 7, 12, 0, 0, 139, 0, }, /* 939 */
+ { 105, 7, 12, 0, 0, 105, 0, }, /* 940 */
+ { 105, 26, 12, 0, 0, 105, 0, }, /* 941 */
+ { 105, 12, 3, 0, 0, 105, 0, }, /* 942 */
+ { 105, 21, 12, 0, 0, 105, 0, }, /* 943 */
+ { 10, 1, 2, 0, 0, 105, 0, }, /* 944 */
+ { 10, 10, 3, 0, 0, 10, 0, }, /* 945 */
+ { 10, 10, 5, 0, 0, 10, 0, }, /* 946 */
+ { 20, 12, 3, 0, 0, 20, 0, }, /* 947 */
+ { 131, 26, 12, 0, 0, 131, 0, }, /* 948 */
+ { 131, 12, 3, 0, 0, 131, 0, }, /* 949 */
+ { 131, 21, 12, 0, 0, 131, 0, }, /* 950 */
+ { 18, 12, 3, 0, 0, 18, 0, }, /* 951 */
+ { 151, 7, 12, 0, 0, 151, 0, }, /* 952 */
+ { 151, 12, 3, 0, 0, 151, 0, }, /* 953 */
+ { 151, 6, 12, 0, 0, 151, 0, }, /* 954 */
+ { 151, 13, 12, 0, 0, 151, 0, }, /* 955 */
+ { 151, 26, 12, 0, 0, 151, 0, }, /* 956 */
+ { 152, 7, 12, 0, 0, 152, 0, }, /* 957 */
+ { 152, 12, 3, 0, 0, 152, 0, }, /* 958 */
+ { 152, 13, 12, 0, 0, 152, 0, }, /* 959 */
+ { 152, 23, 12, 0, 0, 152, 0, }, /* 960 */
+ { 113, 7, 12, 0, 0, 113, 0, }, /* 961 */
+ { 113, 15, 12, 0, 0, 113, 0, }, /* 962 */
+ { 113, 12, 3, 0, 0, 113, 0, }, /* 963 */
+ { 132, 9, 12, 0, 34, 132, 0, }, /* 964 */
+ { 132, 5, 12, 0, -34, 132, 0, }, /* 965 */
+ { 132, 12, 3, 0, 0, 132, 0, }, /* 966 */
+ { 132, 6, 12, 0, 0, 132, 0, }, /* 967 */
+ { 132, 13, 12, 0, 0, 132, 0, }, /* 968 */
+ { 132, 21, 12, 0, 0, 132, 0, }, /* 969 */
+ { 0, 2, 14, 0, 0, 0, 0, }, /* 970 */
+ { 10, 26, 11, 0, 0, 10, 0, }, /* 971 */
+ { 27, 26, 12, 0, 0, 27, 0, }, /* 972 */
+ { 10, 24, 3, 0, 0, 10, 0, }, /* 973 */
+ { 10, 1, 3, 0, 0, 10, 0, }, /* 974 */
};
const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
@@ -1185,37 +1204,37 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+F000 */
126,126, 98, 98,127,128,129,130,131,131,132,133,134,135,136,137, /* U+F800 */
138,139,140,141,142,143,144,145,146,147,148,142,149,149,150,142, /* U+10000 */
-151,152,153,154,155,156,157,158,159,160,161,142,162,142,163,164, /* U+10800 */
-165,166,167,168,169,170,171,142,172,173,142,174,175,176,177,142, /* U+11000 */
-178,179,142,180,181,182,142,142,183,184,185,186,142,187,142,188, /* U+11800 */
-189,189,189,189,189,189,189,190,191,189,192,142,142,142,142,142, /* U+12000 */
+151,152,153,154,155,156,157,158,159,160,161,142,162,163,164,165, /* U+10800 */
+166,167,168,169,170,171,172,142,173,174,142,175,176,177,178,142, /* U+11000 */
+179,180,181,182,183,184,142,142,185,186,187,188,142,189,142,190, /* U+11800 */
+191,191,191,191,191,191,191,192,193,191,194,142,142,142,142,142, /* U+12000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+12800 */
-193,193,193,193,193,193,193,193,194,142,142,142,142,142,142,142, /* U+13000 */
+195,195,195,195,195,195,195,195,196,142,142,142,142,142,142,142, /* U+13000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+13800 */
-142,142,142,142,142,142,142,142,195,195,195,195,196,142,142,142, /* U+14000 */
+142,142,142,142,142,142,142,142,197,197,197,197,198,142,142,142, /* U+14000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+14800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+15000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+15800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+16000 */
-197,197,197,197,198,199,200,201,142,142,142,142,202,203,204,205, /* U+16800 */
-206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206, /* U+17000 */
-206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206, /* U+17800 */
-206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,207, /* U+18000 */
-206,206,206,206,206,208,142,142,142,142,142,142,142,142,142,142, /* U+18800 */
+199,199,199,199,200,201,202,203,142,142,142,142,204,205,206,207, /* U+16800 */
+208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,208, /* U+17000 */
+208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,208, /* U+17800 */
+208,208,208,208,208,208,208,208,208,208,208,208,208,208,208,209, /* U+18000 */
+208,208,208,208,208,208,210,210,210,211,212,142,142,142,142,142, /* U+18800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+19000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+19800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1A000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1A800 */
-209,210,211,212,212,213,142,142,142,142,142,142,142,142,142,142, /* U+1B000 */
-142,142,142,142,142,142,142,142,214,215,142,142,142,142,142,142, /* U+1B800 */
+213,214,215,216,216,217,142,142,142,142,142,142,142,142,142,142, /* U+1B000 */
+142,142,142,142,142,142,142,142,218,219,142,142,142,142,142,142, /* U+1B800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1C000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+1C800 */
- 71,216,217,218,219,220,221,142,222,223,224,225,226,227,228,229, /* U+1D000 */
-230,230,230,230,231,232,142,142,142,142,142,142,142,142,142,142, /* U+1D800 */
-233,142,234,142,142,235,142,142,142,142,142,142,142,142,142,142, /* U+1E000 */
-236,237,238,142,142,142,142,142,239,240,241,142,242,243,142,142, /* U+1E800 */
-244,245,246,247,248,249,250,251,250,250,252,250,253,254,255,256, /* U+1F000 */
-257,258,259,260,261,262,249,249,249,249,249,249,249,249,249,263, /* U+1F800 */
+ 71,220,221,222,223,224,225,142,226,227,228,229,230,231,232,233, /* U+1D000 */
+234,234,234,234,235,236,142,142,142,142,142,142,142,142,142,142, /* U+1D800 */
+237,142,238,142,142,239,142,142,142,142,142,142,142,142,142,142, /* U+1E000 */
+240,241,242,142,142,142,142,142,243,244,245,142,246,247,142,142, /* U+1E800 */
+248,249,250,251,252,253,254,255,254,254,256,254,257,258,259,260, /* U+1F000 */
+261,262,263,264,265,266, 71,267,253,253,253,253,253,253,253,268, /* U+1F800 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+20000 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+20800 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+21000 */
@@ -1236,21 +1255,21 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+28800 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+29000 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+29800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,264, 98, 98, /* U+2A000 */
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,269, 98, 98, /* U+2A000 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2A800 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,265, 98, /* U+2B000 */
-266, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2B800 */
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,270, 98, /* U+2B000 */
+271, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2B800 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2C000 */
- 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,267, 98, 98, /* U+2C800 */
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,272, 98, 98, /* U+2C800 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2D000 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2D800 */
98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+2E000 */
- 98, 98, 98, 98, 98, 98, 98,268,142,142,142,142,142,142,142,142, /* U+2E800 */
+ 98, 98, 98, 98, 98, 98, 98,273,142,142,142,142,142,142,142,142, /* U+2E800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+2F000 */
- 98, 98, 98, 98,269,142,142,142,142,142,142,142,142,142,142,142, /* U+2F800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+30000 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+30800 */
-142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+31000 */
+ 98, 98, 98, 98,274,142,142,142,142,142,142,142,142,142,142,142, /* U+2F800 */
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+30000 */
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, /* U+30800 */
+ 98, 98, 98, 98, 98, 98,275,142,142,142,142,142,142,142,142,142, /* U+31000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+31800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+32000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+32800 */
@@ -1600,8 +1619,8 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DE800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DF000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+DF800 */
-270,271,272,273,271,271,271,271,271,271,271,271,271,271,271,271, /* U+E0000 */
-271,271,271,271,271,271,271,271,271,271,271,271,271,271,271,271, /* U+E0800 */
+276,277,278,279,277,277,277,277,277,277,277,277,277,277,277,277, /* U+E0000 */
+277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277, /* U+E0800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E1000 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E1800 */
142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, /* U+E2000 */
@@ -1663,7 +1682,7 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FE000 */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FE800 */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+FF000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,274, /* U+FF800 */
+126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,280, /* U+FF800 */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+100000 */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+100800 */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+101000 */
@@ -1695,10 +1714,10 @@ const uint16_t PRIV(ucd_stage1)[] = { /* 17408 bytes */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10E000 */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10E800 */
126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, /* U+10F000 */
-126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,274, /* U+10F800 */
+126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,280, /* U+10F800 */
};
-const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
+const uint16_t PRIV(ucd_stage2)[] = { /* 71936 bytes, block = 128 */
/* block 0 */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -1810,474 +1829,474 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,
/* block 11 */
-207,207,207,207,207,207,207,206,206,208,209,120,120,210,210,211,
-120,212,212,212,212,212,212,212,212,212,212,212,212,212,212,212,
-212,212,212,212,212,212,212,212,212,212,212,212,212,212,212,212,
-212,212,212,212,212,212,212,212,212,212,212,212,212,212,213,212,
-214,212,212,214,212,212,214,212,120,120,120,120,120,120,120,120,
-215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,
-215,215,215,215,215,215,215,215,215,215,215,120,120,120,120,215,
-215,215,215,214,214,120,120,120,120,120,120,120,120,120,120,120,
+207,207,207,207,207,207,207,206,206,205,208,120,120,209,209,210,
+120,211,211,211,211,211,211,211,211,211,211,211,211,211,211,211,
+211,211,211,211,211,211,211,211,211,211,211,211,211,211,211,211,
+211,211,211,211,211,211,211,211,211,211,211,211,211,211,212,211,
+213,211,211,213,211,211,213,211,120,120,120,120,120,120,120,120,
+214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,
+214,214,214,214,214,214,214,214,214,214,214,120,120,120,120,214,
+214,214,214,213,213,120,120,120,120,120,120,120,120,120,120,120,
/* block 12 */
-216,216,216,216,216,217,218,218,218,219,219,220,221,219,222,222,
-223,223,223,223,223,223,223,223,223,223,223,221,224,120,219,221,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-226,225,225,225,225,225,225,225,225,225,225,227,227,227,227,227,
-227,227,227,227,227,227,223,223,223,223,223,223,223,223,223,223,
-228,228,228,228,228,228,228,228,228,228,219,219,219,219,225,225,
-227,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
+215,215,215,215,215,216,217,217,217,218,218,219,220,218,221,221,
+222,222,222,222,222,222,222,222,222,222,222,220,223,120,218,220,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+225,224,224,224,224,224,224,224,224,224,224,226,226,226,226,226,
+226,226,226,226,226,226,222,222,222,222,222,222,222,222,222,222,
+227,227,227,227,227,227,227,227,227,227,218,218,218,218,224,224,
+226,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
/* block 13 */
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,229,225,223,223,223,223,223,223,223,217,222,223,
-223,223,223,223,223,230,230,223,223,222,223,223,223,223,225,225,
-231,231,231,231,231,231,231,231,231,231,225,225,225,222,222,225,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,228,224,222,222,222,222,222,222,222,216,221,222,
+222,222,222,222,222,229,229,222,222,221,222,222,222,222,224,224,
+230,230,230,230,230,230,230,230,230,230,224,224,224,221,221,224,
/* block 14 */
-232,232,232,232,232,232,232,232,232,232,232,232,232,232,120,233,
-234,235,234,234,234,234,234,234,234,234,234,234,234,234,234,234,
+231,231,231,231,231,231,231,231,231,231,231,231,231,231,120,232,
+233,234,233,233,233,233,233,233,233,233,233,233,233,233,233,233,
+233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,233,
234,234,234,234,234,234,234,234,234,234,234,234,234,234,234,234,
-235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,
-235,235,235,235,235,235,235,235,235,235,235,120,120,234,234,234,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
+234,234,234,234,234,234,234,234,234,234,234,120,120,233,233,233,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
/* block 15 */
-236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,
-236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,236,
-236,236,236,236,236,236,237,237,237,237,237,237,237,237,237,237,
-237,236,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-238,238,238,238,238,238,238,238,238,238,239,239,239,239,239,239,
-239,239,239,239,239,239,239,239,239,239,239,239,239,239,239,239,
-239,239,239,239,239,239,239,239,239,239,239,240,240,240,240,240,
-240,240,240,240,241,241,242,243,243,243,241,120,120,240,244,244,
+235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,
+235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,
+235,235,235,235,235,235,236,236,236,236,236,236,236,236,236,236,
+236,235,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+237,237,237,237,237,237,237,237,237,237,238,238,238,238,238,238,
+238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,
+238,238,238,238,238,238,238,238,238,238,238,239,239,239,239,239,
+239,239,239,239,240,240,241,242,242,242,240,120,120,239,243,243,
/* block 16 */
-245,245,245,245,245,245,245,245,245,245,245,245,245,245,245,245,
-245,245,245,245,245,245,246,246,246,246,247,246,246,246,246,246,
-246,246,246,246,247,246,246,246,247,246,246,246,246,246,120,120,
-248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,120,
-249,249,249,249,249,249,249,249,249,249,249,249,249,249,249,249,
-249,249,249,249,249,249,249,249,249,250,250,250,120,120,251,120,
-234,234,234,234,234,234,234,234,234,234,234,120,120,120,120,120,
+244,244,244,244,244,244,244,244,244,244,244,244,244,244,244,244,
+244,244,244,244,244,244,245,245,245,245,246,245,245,245,245,245,
+245,245,245,245,246,245,245,245,246,245,245,245,245,245,120,120,
+247,247,247,247,247,247,247,247,247,247,247,247,247,247,247,120,
+248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,
+248,248,248,248,248,248,248,248,248,249,249,249,120,120,250,120,
+233,233,233,233,233,233,233,233,233,233,233,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 17 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,120,225,225,225,225,225,225,225,225,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,223,223,223,223,223,223,223,223,223,223,223,223,223,
-223,223,217,223,223,223,223,223,223,223,223,223,223,223,223,223,
-223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,120,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,120,120,120,120,120,120,120,120,
+120,120,120,222,222,222,222,222,222,222,222,222,222,222,222,222,
+222,222,216,222,222,222,222,222,222,222,222,222,222,222,222,222,
+222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
/* block 18 */
-252,252,252,253,254,254,254,254,254,254,254,254,254,254,254,254,
-254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,
-254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,
-254,254,254,254,254,254,254,254,254,254,252,253,252,254,253,253,
-253,252,252,252,252,252,252,252,252,253,253,253,253,252,253,253,
-254,255,256,113,113,252,252,252,254,254,254,254,254,254,254,254,
-254,254,252,252,257,258,259,259,259,259,259,259,259,259,259,259,
-260,261,254,254,254,254,254,254,254,254,254,254,254,254,254,254,
+251,251,251,252,253,253,253,253,253,253,253,253,253,253,253,253,
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,
+253,253,253,253,253,253,253,253,253,253,251,252,251,253,252,252,
+252,251,251,251,251,251,251,251,251,252,252,252,252,251,252,252,
+253,254,255,113,113,251,251,251,253,253,253,253,253,253,253,253,
+253,253,251,251,256,257,258,258,258,258,258,258,258,258,258,258,
+259,260,253,253,253,253,253,253,253,253,253,253,253,253,253,253,
/* block 19 */
-262,263,264,264,120,262,262,262,262,262,262,262,262,120,120,262,
-262,120,120,262,262,262,262,262,262,262,262,262,262,262,262,262,
-262,262,262,262,262,262,262,262,262,120,262,262,262,262,262,262,
-262,120,262,120,120,120,262,262,262,262,120,120,263,262,265,264,
-264,263,263,263,263,120,120,264,264,120,120,264,264,263,262,120,
-120,120,120,120,120,120,120,265,120,120,120,120,262,262,120,262,
-262,262,263,263,120,120,266,266,266,266,266,266,266,266,266,266,
-262,262,267,267,268,268,268,268,268,268,269,267,262,270,263,120,
+261,262,263,263,120,261,261,261,261,261,261,261,261,120,120,261,
+261,120,120,261,261,261,261,261,261,261,261,261,261,261,261,261,
+261,261,261,261,261,261,261,261,261,120,261,261,261,261,261,261,
+261,120,261,120,120,120,261,261,261,261,120,120,262,261,264,263,
+263,262,262,262,262,120,120,263,263,120,120,263,263,262,261,120,
+120,120,120,120,120,120,120,264,120,120,120,120,261,261,120,261,
+261,261,262,262,120,120,265,265,265,265,265,265,265,265,265,265,
+261,261,266,266,267,267,267,267,267,267,268,266,261,269,262,120,
/* block 20 */
-120,271,271,272,120,273,273,273,273,273,273,120,120,120,120,273,
-273,120,120,273,273,273,273,273,273,273,273,273,273,273,273,273,
-273,273,273,273,273,273,273,273,273,120,273,273,273,273,273,273,
-273,120,273,273,120,273,273,120,273,273,120,120,271,120,272,272,
-272,271,271,120,120,120,120,271,271,120,120,271,271,271,120,120,
-120,271,120,120,120,120,120,120,120,273,273,273,273,120,273,120,
-120,120,120,120,120,120,274,274,274,274,274,274,274,274,274,274,
-271,271,273,273,273,271,275,120,120,120,120,120,120,120,120,120,
+120,270,270,271,120,272,272,272,272,272,272,120,120,120,120,272,
+272,120,120,272,272,272,272,272,272,272,272,272,272,272,272,272,
+272,272,272,272,272,272,272,272,272,120,272,272,272,272,272,272,
+272,120,272,272,120,272,272,120,272,272,120,120,270,120,271,271,
+271,270,270,120,120,120,120,270,270,120,120,270,270,270,120,120,
+120,270,120,120,120,120,120,120,120,272,272,272,272,120,272,120,
+120,120,120,120,120,120,273,273,273,273,273,273,273,273,273,273,
+270,270,272,272,272,270,274,120,120,120,120,120,120,120,120,120,
/* block 21 */
-120,276,276,277,120,278,278,278,278,278,278,278,278,278,120,278,
-278,278,120,278,278,278,278,278,278,278,278,278,278,278,278,278,
-278,278,278,278,278,278,278,278,278,120,278,278,278,278,278,278,
-278,120,278,278,120,278,278,278,278,278,120,120,276,278,277,277,
-277,276,276,276,276,276,120,276,276,277,120,277,277,276,120,120,
-278,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-278,278,276,276,120,120,279,279,279,279,279,279,279,279,279,279,
-280,281,120,120,120,120,120,120,120,278,276,276,276,276,276,276,
+120,275,275,276,120,277,277,277,277,277,277,277,277,277,120,277,
+277,277,120,277,277,277,277,277,277,277,277,277,277,277,277,277,
+277,277,277,277,277,277,277,277,277,120,277,277,277,277,277,277,
+277,120,277,277,120,277,277,277,277,277,120,120,275,277,276,276,
+276,275,275,275,275,275,120,275,275,276,120,276,276,275,120,120,
+277,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+277,277,275,275,120,120,278,278,278,278,278,278,278,278,278,278,
+279,280,120,120,120,120,120,120,120,277,275,275,275,275,275,275,
/* block 22 */
-120,282,283,283,120,284,284,284,284,284,284,284,284,120,120,284,
-284,120,120,284,284,284,284,284,284,284,284,284,284,284,284,284,
-284,284,284,284,284,284,284,284,284,120,284,284,284,284,284,284,
-284,120,284,284,120,284,284,284,284,284,120,120,282,284,285,282,
-283,282,282,282,282,120,120,283,283,120,120,283,283,282,120,120,
-120,120,120,120,120,120,282,285,120,120,120,120,284,284,120,284,
-284,284,282,282,120,120,286,286,286,286,286,286,286,286,286,286,
-287,284,288,288,288,288,288,288,120,120,120,120,120,120,120,120,
+120,281,282,282,120,283,283,283,283,283,283,283,283,120,120,283,
+283,120,120,283,283,283,283,283,283,283,283,283,283,283,283,283,
+283,283,283,283,283,283,283,283,283,120,283,283,283,283,283,283,
+283,120,283,283,120,283,283,283,283,283,120,120,281,283,284,281,
+282,281,281,281,281,120,120,282,282,120,120,282,282,281,120,120,
+120,120,120,120,120,281,281,284,120,120,120,120,283,283,120,283,
+283,283,281,281,120,120,285,285,285,285,285,285,285,285,285,285,
+286,283,287,287,287,287,287,287,120,120,120,120,120,120,120,120,
/* block 23 */
-120,120,289,290,120,290,290,290,290,290,290,120,120,120,290,290,
-290,120,290,290,290,290,120,120,120,290,290,120,290,120,290,290,
-120,120,120,290,290,120,120,120,290,290,290,120,120,120,290,290,
-290,290,290,290,290,290,290,290,290,290,120,120,120,120,291,292,
-289,292,292,120,120,120,292,292,292,120,292,292,292,289,120,120,
-290,120,120,120,120,120,120,291,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,293,293,293,293,293,293,293,293,293,293,
-294,294,294,295,296,296,296,296,296,297,296,120,120,120,120,120,
+120,120,288,289,120,289,289,289,289,289,289,120,120,120,289,289,
+289,120,289,289,289,289,120,120,120,289,289,120,289,120,289,289,
+120,120,120,289,289,120,120,120,289,289,289,120,120,120,289,289,
+289,289,289,289,289,289,289,289,289,289,120,120,120,120,290,291,
+288,291,291,120,120,120,291,291,291,120,291,291,291,288,120,120,
+289,120,120,120,120,120,120,290,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,292,292,292,292,292,292,292,292,292,292,
+293,293,293,294,295,295,295,295,295,296,295,120,120,120,120,120,
/* block 24 */
-298,299,299,299,298,300,300,300,300,300,300,300,300,120,300,300,
-300,120,300,300,300,300,300,300,300,300,300,300,300,300,300,300,
-300,300,300,300,300,300,300,300,300,120,300,300,300,300,300,300,
-300,300,300,300,300,300,300,300,300,300,120,120,120,300,298,298,
-298,299,299,299,299,120,298,298,298,120,298,298,298,298,120,120,
-120,120,120,120,120,298,298,120,300,300,300,120,120,120,120,120,
-300,300,298,298,120,120,301,301,301,301,301,301,301,301,301,301,
-120,120,120,120,120,120,120,302,303,303,303,303,303,303,303,304,
+297,298,298,298,297,299,299,299,299,299,299,299,299,120,299,299,
+299,120,299,299,299,299,299,299,299,299,299,299,299,299,299,299,
+299,299,299,299,299,299,299,299,299,120,299,299,299,299,299,299,
+299,299,299,299,299,299,299,299,299,299,120,120,120,299,297,297,
+297,298,298,298,298,120,297,297,297,120,297,297,297,297,120,120,
+120,120,120,120,120,297,297,120,299,299,299,120,120,120,120,120,
+299,299,297,297,120,120,300,300,300,300,300,300,300,300,300,300,
+120,120,120,120,120,120,120,301,302,302,302,302,302,302,302,303,
/* block 25 */
-305,306,307,307,308,305,305,305,305,305,305,305,305,120,305,305,
-305,120,305,305,305,305,305,305,305,305,305,305,305,305,305,305,
-305,305,305,305,305,305,305,305,305,120,305,305,305,305,305,305,
-305,305,305,305,120,305,305,305,305,305,120,120,306,305,307,306,
-307,307,309,307,307,120,306,307,307,120,307,307,306,306,120,120,
-120,120,120,120,120,309,309,120,120,120,120,120,120,120,305,120,
-305,305,306,306,120,120,310,310,310,310,310,310,310,310,310,310,
-120,305,305,120,120,120,120,120,120,120,120,120,120,120,120,120,
+304,305,306,306,307,304,304,304,304,304,304,304,304,120,304,304,
+304,120,304,304,304,304,304,304,304,304,304,304,304,304,304,304,
+304,304,304,304,304,304,304,304,304,120,304,304,304,304,304,304,
+304,304,304,304,120,304,304,304,304,304,120,120,305,304,306,305,
+306,306,308,306,306,120,305,306,306,120,306,306,305,305,120,120,
+120,120,120,120,120,308,308,120,120,120,120,120,120,120,304,120,
+304,304,305,305,120,120,309,309,309,309,309,309,309,309,309,309,
+120,304,304,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 26 */
-311,311,312,312,120,313,313,313,313,313,313,313,313,120,313,313,
-313,120,313,313,313,313,313,313,313,313,313,313,313,313,313,313,
-313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,
-313,313,313,313,313,313,313,313,313,313,313,311,311,313,314,312,
-312,311,311,311,311,120,312,312,312,120,312,312,312,311,315,316,
-120,120,120,120,313,313,313,314,317,317,317,317,317,317,317,313,
-313,313,311,311,120,120,318,318,318,318,318,318,318,318,318,318,
-317,317,317,317,317,317,317,317,317,316,313,313,313,313,313,313,
+310,310,311,311,312,312,312,312,312,312,312,312,312,120,312,312,
+312,120,312,312,312,312,312,312,312,312,312,312,312,312,312,312,
+312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,
+312,312,312,312,312,312,312,312,312,312,312,310,310,312,313,311,
+311,310,310,310,310,120,311,311,311,120,311,311,311,310,314,315,
+120,120,120,120,312,312,312,313,316,316,316,316,316,316,316,312,
+312,312,310,310,120,120,317,317,317,317,317,317,317,317,317,317,
+316,316,316,316,316,316,316,316,316,315,312,312,312,312,312,312,
/* block 27 */
-120,120,319,319,120,320,320,320,320,320,320,320,320,320,320,320,
+120,318,319,319,120,320,320,320,320,320,320,320,320,320,320,320,
320,320,320,320,320,320,320,120,120,120,320,320,320,320,320,320,
320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,320,
320,320,120,320,320,320,320,320,320,320,320,320,120,320,120,120,
-320,320,320,320,320,320,320,120,120,120,321,120,120,120,120,322,
-319,319,321,321,321,120,321,120,319,319,319,319,319,319,319,322,
-120,120,120,120,120,120,323,323,323,323,323,323,323,323,323,323,
-120,120,319,319,324,120,120,120,120,120,120,120,120,120,120,120,
+320,320,320,320,320,320,320,120,120,120,318,120,120,120,120,321,
+319,319,318,318,318,120,318,120,319,319,319,319,319,319,319,321,
+120,120,120,120,120,120,322,322,322,322,322,322,322,322,322,322,
+120,120,319,319,323,120,120,120,120,120,120,120,120,120,120,120,
/* block 28 */
-120,325,325,325,325,325,325,325,325,325,325,325,325,325,325,325,
-325,325,325,325,325,325,325,325,325,325,325,325,325,325,325,325,
-325,325,325,325,325,325,325,325,325,325,325,325,325,325,325,325,
-325,326,325,327,326,326,326,326,326,326,326,120,120,120,120, 6,
-325,325,325,325,325,325,328,326,326,326,326,326,326,326,326,329,
-330,330,330,330,330,330,330,330,330,330,329,329,120,120,120,120,
+120,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,
+324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,
+324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,324,
+324,325,324,326,325,325,325,325,325,325,325,120,120,120,120, 6,
+324,324,324,324,324,324,327,325,325,325,325,325,325,325,325,328,
+329,329,329,329,329,329,329,329,329,329,328,328,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 29 */
-120,331,331,120,331,120,331,331,331,331,331,120,331,331,331,331,
-331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,
-331,331,331,331,120,331,120,331,331,331,331,331,331,331,331,331,
-331,332,331,333,332,332,332,332,332,332,332,332,332,331,120,120,
-331,331,331,331,331,120,334,120,332,332,332,332,332,332,120,120,
-335,335,335,335,335,335,335,335,335,335,120,120,331,331,331,331,
+120,330,330,120,330,120,330,330,330,330,330,120,330,330,330,330,
+330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,
+330,330,330,330,120,330,120,330,330,330,330,330,330,330,330,330,
+330,331,330,332,331,331,331,331,331,331,331,331,331,330,120,120,
+330,330,330,330,330,120,333,120,331,331,331,331,331,331,120,120,
+334,334,334,334,334,334,334,334,334,334,120,120,330,330,330,330,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 30 */
-336,337,337,337,338,338,338,338,338,338,338,338,338,338,338,338,
-338,338,338,337,338,337,337,337,339,339,337,337,337,337,337,337,
-340,340,340,340,340,340,340,340,340,340,341,341,341,341,341,341,
-341,341,341,341,337,339,337,339,337,339,342,343,342,343,344,344,
-336,336,336,336,336,336,336,336,120,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
-336,336,336,336,336,336,336,336,336,336,336,336,336,120,120,120,
-120,339,339,339,339,339,339,339,339,339,339,339,339,339,339,344,
+335,336,336,336,337,337,337,337,337,337,337,337,337,337,337,337,
+337,337,337,336,337,336,336,336,338,338,336,336,336,336,336,336,
+339,339,339,339,339,339,339,339,339,339,340,340,340,340,340,340,
+340,340,340,340,336,338,336,338,336,338,341,342,341,342,343,343,
+335,335,335,335,335,335,335,335,120,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,120,120,120,
+120,338,338,338,338,338,338,338,338,338,338,338,338,338,338,343,
/* block 31 */
-339,339,339,339,339,338,339,339,336,336,336,336,336,339,339,339,
-339,339,339,339,339,339,339,339,120,339,339,339,339,339,339,339,
-339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
-339,339,339,339,339,339,339,339,339,339,339,339,339,120,337,337,
-337,337,337,337,337,337,339,337,337,337,337,337,337,120,337,337,
-338,338,338,338,338, 20, 20, 20, 20,338,338,120,120,120,120,120,
+338,338,338,338,338,337,338,338,335,335,335,335,335,338,338,338,
+338,338,338,338,338,338,338,338,120,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,338,
+338,338,338,338,338,338,338,338,338,338,338,338,338,120,336,336,
+336,336,336,336,336,336,338,336,336,336,336,336,336,120,336,336,
+337,337,337,337,337, 20, 20, 20, 20,337,337,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 32 */
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-345,345,345,345,345,345,345,345,345,345,345,346,346,347,347,347,
-347,348,347,347,347,347,347,347,346,347,347,348,348,347,347,345,
-349,349,349,349,349,349,349,349,349,349,350,350,350,350,350,350,
-345,345,345,345,345,345,348,348,347,347,345,345,345,345,347,347,
-347,345,346,346,346,345,345,346,346,346,346,346,346,346,345,345,
-345,347,347,347,347,345,345,345,345,345,345,345,345,345,345,345,
+344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,
+344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,
+344,344,344,344,344,344,344,344,344,344,344,345,345,346,346,346,
+346,347,346,346,346,346,346,346,345,346,346,347,347,346,346,344,
+348,348,348,348,348,348,348,348,348,348,349,349,349,349,349,349,
+344,344,344,344,344,344,347,347,346,346,344,344,344,344,346,346,
+346,344,345,345,345,344,344,345,345,345,345,345,345,345,344,344,
+344,346,346,346,346,344,344,344,344,344,344,344,344,344,344,344,
/* block 33 */
-345,345,347,346,348,347,347,346,346,346,346,346,346,347,345,346,
-351,351,351,351,351,351,351,351,351,351,346,346,346,347,352,352,
+344,344,346,345,347,346,346,345,345,345,345,345,345,346,344,345,
+350,350,350,350,350,350,350,350,350,350,345,345,345,346,351,351,
+352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,
+352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,352,
+352,352,352,352,352,352,120,352,120,120,120,120,120,352,120,120,
353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,
353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,353,
-353,353,353,353,353,353,120,353,120,120,120,120,120,353,120,120,
-354,354,354,354,354,354,354,354,354,354,354,354,354,354,354,354,
-354,354,354,354,354,354,354,354,354,354,354,354,354,354,354,354,
-354,354,354,354,354,354,354,354,354,354,354,355,356,354,354,354,
+353,353,353,353,353,353,353,353,353,353,353,354,355,353,353,353,
/* block 34 */
+356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
+356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
+356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
+356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
+356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
+356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
+
+/* block 35 */
357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
+357,357,357,357,357,357,357,357,358,358,358,358,358,358,358,358,
358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-
-/* block 35 */
358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,359,359,359,359,359,359,359,359,
+358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
+
+/* block 36 */
359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,120,359,359,359,359,120,120,
+359,359,359,359,359,359,359,120,359,120,359,359,359,359,120,120,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
-
-/* block 36 */
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,120,360,360,360,360,120,120,
-360,360,360,360,360,360,360,120,360,120,360,360,360,360,120,120,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
/* block 37 */
-360,360,360,360,360,360,360,360,360,120,360,360,360,360,120,120,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,120,360,360,360,360,120,120,360,360,360,360,360,360,360,120,
-360,120,360,360,360,360,120,120,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,120,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
+359,359,359,359,359,359,359,359,359,120,359,359,359,359,120,120,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,120,359,359,359,359,120,120,359,359,359,359,359,359,359,120,
+359,120,359,359,359,359,120,120,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
/* block 38 */
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,120,360,360,360,360,120,120,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,360,360,360,360,120,120,361,361,361,
-362,362,362,362,362,362,362,362,362,363,363,363,363,363,363,363,
-363,363,363,363,363,363,363,363,363,363,363,363,363,120,120,120,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,120,359,359,359,359,120,120,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,359,359,359,359,120,120,360,360,360,
+361,361,361,361,361,361,361,361,361,362,362,362,362,362,362,362,
+362,362,362,362,362,362,362,362,362,362,362,362,362,120,120,120,
/* block 39 */
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-364,364,364,364,364,364,364,364,364,364,120,120,120,120,120,120,
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-366,366,366,366,366,366,120,120,367,367,367,367,367,367,120,120,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+363,363,363,363,363,363,363,363,363,363,120,120,120,120,120,120,
+364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
+364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
+364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
+364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
+364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
+365,365,365,365,365,365,120,120,366,366,366,366,366,366,120,120,
/* block 40 */
-368,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
+367,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
/* block 41 */
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
/* block 42 */
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,370,371,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,369,370,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
/* block 43 */
-372,373,373,373,373,373,373,373,373,373,373,373,373,373,373,373,
-373,373,373,373,373,373,373,373,373,373,373,374,375,120,120,120,
-376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,
-376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,
-376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,
-376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,
-376,376,376,376,376,376,376,376,376,376,376, 5, 5, 5,377,377,
-377,376,376,376,376,376,376,376,376,120,120,120,120,120,120,120,
+371,372,372,372,372,372,372,372,372,372,372,372,372,372,372,372,
+372,372,372,372,372,372,372,372,372,372,372,373,374,120,120,120,
+375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,
+375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,
+375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,
+375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,375,
+375,375,375,375,375,375,375,375,375,375,375, 5, 5, 5,376,376,
+376,375,375,375,375,375,375,375,375,120,120,120,120,120,120,120,
/* block 44 */
-378,378,378,378,378,378,378,378,378,378,378,378,378,120,378,378,
-378,378,379,379,379,120,120,120,120,120,120,120,120,120,120,120,
-380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,380,
-380,380,381,381,381,382,382,120,120,120,120,120,120,120,120,120,
-383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,
-383,383,384,384,120,120,120,120,120,120,120,120,120,120,120,120,
-385,385,385,385,385,385,385,385,385,385,385,385,385,120,385,385,
-385,120,386,386,120,120,120,120,120,120,120,120,120,120,120,120,
+377,377,377,377,377,377,377,377,377,377,377,377,377,120,377,377,
+377,377,378,378,378,120,120,120,120,120,120,120,120,120,120,120,
+379,379,379,379,379,379,379,379,379,379,379,379,379,379,379,379,
+379,379,380,380,380,381,381,120,120,120,120,120,120,120,120,120,
+382,382,382,382,382,382,382,382,382,382,382,382,382,382,382,382,
+382,382,383,383,120,120,120,120,120,120,120,120,120,120,120,120,
+384,384,384,384,384,384,384,384,384,384,384,384,384,120,384,384,
+384,120,385,385,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 45 */
-387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
-387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
-387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
-387,387,387,387,388,388,389,388,388,388,388,388,388,388,389,389,
-389,389,389,389,389,389,388,389,389,388,388,388,388,388,388,388,
-388,388,388,388,390,390,390,391,390,390,390,392,387,388,120,120,
+386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,
+386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,
+386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,386,
+386,386,386,386,387,387,388,387,387,387,387,387,387,387,388,388,
+388,388,388,388,388,388,387,388,388,387,387,387,387,387,387,387,
+387,387,387,387,389,389,389,390,389,389,389,391,386,387,120,120,
+392,392,392,392,392,392,392,392,392,392,120,120,120,120,120,120,
393,393,393,393,393,393,393,393,393,393,120,120,120,120,120,120,
-394,394,394,394,394,394,394,394,394,394,120,120,120,120,120,120,
/* block 46 */
-395,395,396,396,395,396,397,395,395,395,395,398,398,398,399,120,
-400,400,400,400,400,400,400,400,400,400,120,120,120,120,120,120,
-401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,
-401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,
-401,401,401,402,401,401,401,401,401,401,401,401,401,401,401,401,
-401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,
-401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,
-401,401,401,401,401,401,401,401,401,120,120,120,120,120,120,120,
+394,394,395,395,394,395,396,394,394,394,394,397,397,397,398,120,
+399,399,399,399,399,399,399,399,399,399,120,120,120,120,120,120,
+400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,
+400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,
+400,400,400,401,400,400,400,400,400,400,400,400,400,400,400,400,
+400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,
+400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,
+400,400,400,400,400,400,400,400,400,120,120,120,120,120,120,120,
/* block 47 */
-401,401,401,401,401,398,398,401,401,401,401,401,401,401,401,401,
-401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,401,
-401,401,401,401,401,401,401,401,401,398,401,120,120,120,120,120,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,120,120,120,120,120,120,120,120,120,120,
+400,400,400,400,400,397,397,400,400,400,400,400,400,400,400,400,
+400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,400,
+400,400,400,400,400,400,400,400,400,397,400,120,120,120,120,120,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,368,
+368,368,368,368,368,368,120,120,120,120,120,120,120,120,120,120,
/* block 48 */
-403,403,403,403,403,403,403,403,403,403,403,403,403,403,403,403,
-403,403,403,403,403,403,403,403,403,403,403,403,403,403,403,120,
-404,404,404,405,405,405,405,404,404,405,405,405,120,120,120,120,
-405,405,404,405,405,405,405,405,405,404,404,404,120,120,120,120,
-406,120,120,120,407,407,408,408,408,408,408,408,408,408,408,408,
-409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,
-409,409,409,409,409,409,409,409,409,409,409,409,409,409,120,120,
-409,409,409,409,409,120,120,120,120,120,120,120,120,120,120,120,
+402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,
+402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,120,
+403,403,403,404,404,404,404,403,403,404,404,404,120,120,120,120,
+404,404,403,404,404,404,404,404,404,403,403,403,120,120,120,120,
+405,120,120,120,406,406,407,407,407,407,407,407,407,407,407,407,
+408,408,408,408,408,408,408,408,408,408,408,408,408,408,408,408,
+408,408,408,408,408,408,408,408,408,408,408,408,408,408,120,120,
+408,408,408,408,408,120,120,120,120,120,120,120,120,120,120,120,
/* block 49 */
-410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,
-410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,
-410,410,410,410,410,410,410,410,410,410,410,410,120,120,120,120,
-410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,
-410,410,410,410,410,410,410,410,410,410,120,120,120,120,120,120,
-411,411,411,411,411,411,411,411,411,411,412,120,120,120,413,413,
-414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
-414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
+409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,
+409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,
+409,409,409,409,409,409,409,409,409,409,409,409,120,120,120,120,
+409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,409,
+409,409,409,409,409,409,409,409,409,409,120,120,120,120,120,120,
+410,410,410,410,410,410,410,410,410,410,411,120,120,120,412,412,
+413,413,413,413,413,413,413,413,413,413,413,413,413,413,413,413,
+413,413,413,413,413,413,413,413,413,413,413,413,413,413,413,413,
/* block 50 */
-415,415,415,415,415,415,415,415,415,415,415,415,415,415,415,415,
-415,415,415,415,415,415,415,416,416,417,417,416,120,120,418,418,
-419,419,419,419,419,419,419,419,419,419,419,419,419,419,419,419,
-419,419,419,419,419,419,419,419,419,419,419,419,419,419,419,419,
-419,419,419,419,419,419,419,419,419,419,419,419,419,419,419,419,
-419,419,419,419,419,420,421,420,421,421,421,421,421,421,421,120,
-421,422,421,422,422,421,421,421,421,421,421,421,421,420,420,420,
-420,420,420,421,421,421,421,421,421,421,421,421,421,120,120,421,
+414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
+414,414,414,414,414,414,414,415,415,416,416,415,120,120,417,417,
+418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,
+418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,
+418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,418,
+418,418,418,418,418,419,420,419,420,420,420,420,420,420,420,120,
+420,421,420,421,421,420,420,420,420,420,420,420,420,419,419,419,
+419,419,419,420,420,420,420,420,420,420,420,420,420,120,120,420,
/* block 51 */
-423,423,423,423,423,423,423,423,423,423,120,120,120,120,120,120,
-423,423,423,423,423,423,423,423,423,423,120,120,120,120,120,120,
-424,424,424,424,424,424,424,425,424,424,424,424,424,424,120,120,
-113,113,113,113,113,113,113,113,113,113,113,113,113,113,426,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+422,422,422,422,422,422,422,422,422,422,120,120,120,120,120,120,
+422,422,422,422,422,422,422,422,422,422,120,120,120,120,120,120,
+423,423,423,423,423,423,423,424,423,423,423,423,423,423,120,120,
+113,113,113,113,113,113,113,113,113,113,113,113,113,113,425,113,
+113,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 52 */
-427,427,427,427,428,429,429,429,429,429,429,429,429,429,429,429,
-429,429,429,429,429,429,429,429,429,429,429,429,429,429,429,429,
-429,429,429,429,429,429,429,429,429,429,429,429,429,429,429,429,
-429,429,429,429,427,430,427,427,427,427,427,428,427,428,428,428,
-428,428,427,428,428,429,429,429,429,429,429,429,120,120,120,120,
-431,431,431,431,431,431,431,431,431,431,432,432,432,432,432,432,
-432,433,433,433,433,433,433,433,433,433,433,427,427,427,427,427,
-427,427,427,427,433,433,433,433,433,433,433,433,433,120,120,120,
+426,426,426,426,427,428,428,428,428,428,428,428,428,428,428,428,
+428,428,428,428,428,428,428,428,428,428,428,428,428,428,428,428,
+428,428,428,428,428,428,428,428,428,428,428,428,428,428,428,428,
+428,428,428,428,426,429,426,426,426,426,426,427,426,427,427,427,
+427,427,426,427,427,428,428,428,428,428,428,428,120,120,120,120,
+430,430,430,430,430,430,430,430,430,430,431,431,431,431,431,431,
+431,432,432,432,432,432,432,432,432,432,432,426,426,426,426,426,
+426,426,426,426,432,432,432,432,432,432,432,432,432,120,120,120,
/* block 53 */
-434,434,435,436,436,436,436,436,436,436,436,436,436,436,436,436,
-436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,436,
-436,435,434,434,434,434,435,435,434,434,435,434,434,434,436,436,
-437,437,437,437,437,437,437,437,437,437,436,436,436,436,436,436,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,439,440,439,439,440,440,440,439,440,439,
-439,439,440,440,120,120,120,120,120,120,120,120,441,441,441,441,
+433,433,434,435,435,435,435,435,435,435,435,435,435,435,435,435,
+435,435,435,435,435,435,435,435,435,435,435,435,435,435,435,435,
+435,434,433,433,433,433,434,434,433,433,434,433,433,433,435,435,
+436,436,436,436,436,436,436,436,436,436,435,435,435,435,435,435,
+437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
+437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
+437,437,437,437,437,437,438,439,438,438,439,439,439,438,439,438,
+438,438,439,439,120,120,120,120,120,120,120,120,440,440,440,440,
/* block 54 */
-442,442,442,442,442,442,442,442,442,442,442,442,442,442,442,442,
-442,442,442,442,442,442,442,442,442,442,442,442,442,442,442,442,
-442,442,442,442,443,443,443,443,443,443,443,443,444,444,444,444,
-444,444,444,444,443,443,444,444,120,120,120,445,445,445,445,445,
-446,446,446,446,446,446,446,446,446,446,120,120,120,442,442,442,
-447,447,447,447,447,447,447,447,447,447,448,448,448,448,448,448,
-448,448,448,448,448,448,448,448,448,448,448,448,448,448,448,448,
-448,448,448,448,448,448,448,448,449,449,449,449,449,449,450,450,
+441,441,441,441,441,441,441,441,441,441,441,441,441,441,441,441,
+441,441,441,441,441,441,441,441,441,441,441,441,441,441,441,441,
+441,441,441,441,442,442,442,442,442,442,442,442,443,443,443,443,
+443,443,443,443,442,442,443,443,120,120,120,444,444,444,444,444,
+445,445,445,445,445,445,445,445,445,445,120,120,120,441,441,441,
+446,446,446,446,446,446,446,446,446,446,447,447,447,447,447,447,
+447,447,447,447,447,447,447,447,447,447,447,447,447,447,447,447,
+447,447,447,447,447,447,447,447,448,448,448,448,448,448,449,449,
/* block 55 */
-451,452,453,454,455,456,457,458,459,120,120,120,120,120,120,120,
-460,460,460,460,460,460,460,460,460,460,460,460,460,460,460,460,
-460,460,460,460,460,460,460,460,460,460,460,460,460,460,460,460,
-460,460,460,460,460,460,460,460,460,460,460,120,120,460,460,460,
-461,461,461,461,461,461,461,461,120,120,120,120,120,120,120,120,
-462,463,462,464,463,465,465,466,465,466,467,463,466,466,463,463,
-466,468,463,463,463,463,463,463,463,469,470,471,471,465,471,471,
-471,471,472,473,474,470,470,475,476,476,477,120,120,120,120,120,
+450,451,452,453,454,455,456,457,458,120,120,120,120,120,120,120,
+459,459,459,459,459,459,459,459,459,459,459,459,459,459,459,459,
+459,459,459,459,459,459,459,459,459,459,459,459,459,459,459,459,
+459,459,459,459,459,459,459,459,459,459,459,120,120,459,459,459,
+460,460,460,460,460,460,460,460,120,120,120,120,120,120,120,120,
+461,462,461,463,462,464,464,465,464,465,466,462,465,465,462,462,
+465,467,462,462,462,462,462,462,462,468,469,470,470,464,470,470,
+470,470,471,472,473,469,469,474,475,475,476,120,120,120,120,120,
/* block 56 */
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35,128,128,128,128,128,478,110,110,110,110,
+ 35, 35, 35, 35, 35, 35,128,128,128,128,128,477,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,121,121,121,
121,121,110,110,110,110,121,121,121,121,121, 35, 35, 35, 35, 35,
- 35, 35, 35, 35, 35, 35, 35, 35,479,480, 35, 35, 35,481, 35, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35,478,479, 35, 35, 35,480, 35, 35,
/* block 57 */
- 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,482, 35,
+ 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,481, 35,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,121,
114,114,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
-113,113,113,113,113,113,113,113,113,113,120,113,113,113,113,113,
+113,113,113,113,113,113,113,113,482,113,120,113,113,113,113,113,
/* block 58 */
32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
@@ -2335,8 +2354,8 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-113,113,113,113,113,113,113,113,113,113,113,113,113,426,426,426,
-426,113,426,426,426,113,113,113,113,113,113,113,113,113,113,113,
+113,113,113,113,113,113,113,113,113,113,113,113,113,425,425,425,
+425,113,425,425,425,113,113,113,113,113,113,113,113,113,113,113,
512,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 64 */
@@ -2511,7 +2530,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
/* block 81 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20,120,120, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20,120, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
@@ -2550,12 +2569,12 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
550,120,120,120,120,120,120,120,120,120,120,120,120,120,120,551,
/* block 85 */
-360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,360,
-360,360,360,360,360,360,360,120,120,120,120,120,120,120,120,120,
-360,360,360,360,360,360,360,120,360,360,360,360,360,360,360,120,
-360,360,360,360,360,360,360,120,360,360,360,360,360,360,360,120,
-360,360,360,360,360,360,360,120,360,360,360,360,360,360,360,120,
-360,360,360,360,360,360,360,120,360,360,360,360,360,360,360,120,
+359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
+359,359,359,359,359,359,359,120,120,120,120,120,120,120,120,120,
+359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120,
+359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120,
+359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120,
+359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120,
552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
@@ -2565,7 +2584,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
23, 27, 7, 8, 7, 8, 7, 8, 7, 8, 5, 5, 5, 5, 5,111,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, 10, 5, 5, 5, 5,
10, 5, 7,553, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+ 20, 20, 5,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -2633,7 +2652,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,120,
572,572,582,582,582,582,572,572,572,572,572,572,572,572,572,572,
580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
-580,580,580,580,580,580,580,580,580,580,580,120,120,120,120,120,
+580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,572,
572,572,572,572,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -2693,7 +2712,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,120,120,120,120,120,120,120,120,120,120,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
@@ -2707,7 +2726,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+586,586,586,586,586,586,586,586,586,586,586,586,586,120,120,120,
/* block 101 */
587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
@@ -2757,11 +2776,11 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
192,193,192,193,192,193,192,193,192,193,597,598,192,193,192,193,
192,193,192,193,192,193,192,193,192,193,192,193,192,193,192,193,
192,193,192,193,192,193,192,193,192,193,192,193,192,193,599,198,
-200,200,200,600,552,552,552,552,552,552,552,552,552,552,600,479,
+200,200,200,600,552,552,552,552,552,552,552,552,552,552,600,478,
/* block 106 */
192,193,192,193,192,193,192,193,192,193,192,193,192,193,192,193,
-192,193,192,193,192,193,192,193,192,193,192,193,479,479,552,552,
+192,193,192,193,192,193,192,193,192,193,192,193,478,478,552,552,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
@@ -2770,196 +2789,186 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
603,603,604,604,604,604,604,604,120,120,120,120,120,120,120,120,
/* block 107 */
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+605,605,605,605,605,605,605,605, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15,111,111,111,111,111,111,111,111,111,
15, 15, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
35, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
-110, 35, 35, 35, 35, 35, 35, 35, 35, 32, 33, 32, 33,605, 32, 33,
+110, 35, 35, 35, 35, 35, 35, 35, 35, 32, 33, 32, 33,606, 32, 33,
/* block 108 */
- 32, 33, 32, 33, 32, 33, 32, 33,111, 15, 15, 32, 33,606, 35, 22,
- 32, 33, 32, 33,607, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
- 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,608,609,610,611,608, 35,
-612,613,614,615, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
-120,120, 32, 33,616,617,618,120,120,120,120,120,120,120,120,120,
+ 32, 33, 32, 33, 32, 33, 32, 33,111, 15, 15, 32, 33,607, 35, 22,
+ 32, 33, 32, 33,608, 35, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
+ 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,609,610,611,612,609, 35,
+613,614,615,616, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33, 32, 33,
+120,120, 32, 33,617,618,619, 32, 33, 32, 33,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120, 22,110,110, 35, 22, 22, 22, 22, 22,
+120,120,120,120,120, 32, 33, 22,110,110, 35, 22, 22, 22, 22, 22,
/* block 109 */
-619,619,620,619,619,619,620,619,619,619,619,620,619,619,619,619,
-619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,619,
-619,619,619,621,621,620,620,621,622,622,622,622,120,120,120,120,
-623,623,623,624,624,624,625,625,626,625,120,120,120,120,120,120,
-627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,
-627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,
-627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,627,
-627,627,627,627,628,628,628,628,120,120,120,120,120,120,120,120,
+620,620,621,620,620,620,621,620,620,620,620,621,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,622,622,621,621,622,623,623,623,623,621,120,120,120,
+624,624,624,625,625,625,626,626,627,626,120,120,120,120,120,120,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,629,629,629,629,120,120,120,120,120,120,120,120,
/* block 110 */
-629,629,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
-630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
-630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
-630,630,630,630,629,629,629,629,629,629,629,629,629,629,629,629,
-629,629,629,629,631,631,120,120,120,120,120,120,120,120,632,632,
-633,633,633,633,633,633,633,633,633,633,120,120,120,120,120,120,
-252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,
-252,634,254,635,254,254,254,254,260,260,260,254,260,254,254,252,
+630,630,631,631,631,631,631,631,631,631,631,631,631,631,631,631,
+631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,
+631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,
+631,631,631,631,630,630,630,630,630,630,630,630,630,630,630,630,
+630,630,630,630,632,632,120,120,120,120,120,120,120,120,633,633,
+634,634,634,634,634,634,634,634,634,634,120,120,120,120,120,120,
+251,251,251,251,251,251,251,251,251,251,251,251,251,251,251,251,
+251,635,253,636,253,253,253,253,259,259,259,253,259,253,253,251,
/* block 111 */
-636,636,636,636,636,636,636,636,636,636,637,637,637,637,637,637,
-637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,
-637,637,637,637,637,637,638,638,638,638,638,638,638,638,639,640,
-641,641,641,641,641,641,641,641,641,641,641,641,641,641,641,641,
-641,641,641,641,641,641,641,642,642,642,642,642,642,642,642,642,
-642,642,643,643,120,120,120,120,120,120,120,120,120,120,120,644,
-357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
-357,357,357,357,357,357,357,357,357,357,357,357,357,120,120,120,
+637,637,637,637,637,637,637,637,637,637,638,638,638,638,638,638,
+638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,
+638,638,638,638,638,638,639,639,639,639,639,639,639,639,640,641,
+642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,
+642,642,642,642,642,642,642,643,643,643,643,643,643,643,643,643,
+643,643,644,644,120,120,120,120,120,120,120,120,120,120,120,645,
+356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
+356,356,356,356,356,356,356,356,356,356,356,356,356,120,120,120,
/* block 112 */
-645,645,645,646,647,647,647,647,647,647,647,647,647,647,647,647,
-647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
-647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
-647,647,647,645,646,646,645,645,645,645,646,646,645,645,646,646,
-646,648,648,648,648,648,648,648,648,648,648,648,648,648,120,649,
-650,650,650,650,650,650,650,650,650,650,120,120,120,120,648,648,
-345,345,345,345,345,347,651,345,345,345,345,345,345,345,345,345,
-351,351,351,351,351,351,351,351,351,351,345,345,345,345,345,120,
+646,646,646,647,648,648,648,648,648,648,648,648,648,648,648,648,
+648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,
+648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,
+648,648,648,646,647,647,646,646,646,646,647,647,646,646,647,647,
+647,649,649,649,649,649,649,649,649,649,649,649,649,649,120,650,
+651,651,651,651,651,651,651,651,651,651,120,120,120,120,649,649,
+344,344,344,344,344,346,652,344,344,344,344,344,344,344,344,344,
+350,350,350,350,350,350,350,350,350,350,344,344,344,344,344,120,
/* block 113 */
-652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,
-652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,
-652,652,652,652,652,652,652,652,652,653,653,653,653,653,653,654,
-654,653,653,654,654,653,653,120,120,120,120,120,120,120,120,120,
-652,652,652,653,652,652,652,652,652,652,652,652,653,654,120,120,
-655,655,655,655,655,655,655,655,655,655,120,120,656,656,656,656,
-345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
-651,345,345,345,345,345,345,352,352,352,345,346,347,346,345,345,
+653,653,653,653,653,653,653,653,653,653,653,653,653,653,653,653,
+653,653,653,653,653,653,653,653,653,653,653,653,653,653,653,653,
+653,653,653,653,653,653,653,653,653,654,654,654,654,654,654,655,
+655,654,654,655,655,654,654,120,120,120,120,120,120,120,120,120,
+653,653,653,654,653,653,653,653,653,653,653,653,654,655,120,120,
+656,656,656,656,656,656,656,656,656,656,120,120,657,657,657,657,
+344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,344,
+652,344,344,344,344,344,344,351,351,351,344,345,346,345,344,344,
/* block 114 */
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-658,657,658,658,658,657,657,658,658,657,657,657,657,657,658,658,
-657,658,657,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,657,657,659,660,660,
-661,661,661,661,661,661,661,661,661,661,661,662,663,663,662,662,
-664,664,661,665,665,662,663,120,120,120,120,120,120,120,120,120,
+658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
+658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
+658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
+659,658,659,659,659,658,658,659,659,658,658,658,658,658,659,659,
+658,659,658,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,658,658,660,661,661,
+662,662,662,662,662,662,662,662,662,662,662,663,664,664,663,663,
+665,665,662,666,666,663,664,120,120,120,120,120,120,120,120,120,
/* block 115 */
-120,360,360,360,360,360,360,120,120,360,360,360,360,360,360,120,
-120,360,360,360,360,360,360,120,120,120,120,120,120,120,120,120,
-360,360,360,360,360,360,360,120,360,360,360,360,360,360,360,120,
+120,359,359,359,359,359,359,120,120,359,359,359,359,359,359,120,
+120,359,359,359,359,359,359,120,120,120,120,120,120,120,120,120,
+359,359,359,359,359,359,359,120,359,359,359,359,359,359,359,120,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
- 35, 35, 35,666, 35, 35, 35, 35, 35, 35, 35, 15,110,110,110,110,
- 35, 35, 35, 35, 35,128, 35, 35,120,120,120,120,120,120,120,120,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
+ 35, 35, 35,667, 35, 35, 35, 35, 35, 35, 35, 15,110,110,110,110,
+ 35, 35, 35, 35, 35,128, 35, 35, 35,110, 15, 15,120,120,120,120,
+668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,
/* block 116 */
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-661,661,661,661,661,661,661,661,661,661,661,661,661,661,661,661,
-661,661,661,661,661,661,661,661,661,661,661,661,661,661,661,661,
-661,661,661,662,662,663,662,662,663,662,662,664,662,663,120,120,
-668,668,668,668,668,668,668,668,668,668,120,120,120,120,120,120,
+668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,
+668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,
+668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,
+668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,668,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,
+662,662,662,663,663,664,663,663,664,663,663,665,663,664,120,120,
+669,669,669,669,669,669,669,669,669,669,120,120,120,120,120,120,
/* block 117 */
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
/* block 118 */
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
/* block 119 */
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
/* block 120 */
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
/* block 121 */
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
/* block 122 */
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-
-/* block 123 */
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-669,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,669,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,669,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-
-/* block 124 */
-670,670,670,670,670,670,670,670,669,670,670,670,670,670,670,670,
-670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
-670,670,670,670,120,120,120,120,120,120,120,120,120,120,120,120,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,120,120,120,120,359,359,359,359,359,
-359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
-359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,359,
-359,359,359,359,359,359,359,359,359,359,359,359,120,120,120,120,
-
-/* block 125 */
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
+
+/* block 123 */
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+670,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,671,671,671,671,670,671,671,671,
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,670,671,671,671,671,671,671,671,671,671,671,671,
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+
+/* block 124 */
+671,671,671,671,671,671,671,671,670,671,671,671,671,671,671,671,
671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,671,
+671,671,671,671,120,120,120,120,120,120,120,120,120,120,120,120,
+357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
+357,357,357,357,357,357,357,120,120,120,120,358,358,358,358,358,
+358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
+358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
+358,358,358,358,358,358,358,358,358,358,358,358,120,120,120,120,
-/* block 126 */
+/* block 125 */
672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
@@ -2969,6 +2978,16 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+/* block 126 */
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+
/* block 127 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
@@ -2991,53 +3010,53 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
/* block 129 */
35, 35, 35, 35, 35, 35, 35,120,120,120,120,120,120,120,120,120,
-120,120,120,206,206,206,206,206,120,120,120,120,120,215,212,215,
-215,215,215,215,215,215,215,215,215,673,215,215,215,215,215,215,
-215,215,215,215,215,215,215,120,215,215,215,215,215,120,215,120,
-215,215,120,215,215,120,215,215,215,215,215,215,215,215,215,215,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
+120,120,120,206,206,206,206,206,120,120,120,120,120,214,211,214,
+214,214,214,214,214,214,214,214,214,674,214,214,214,214,214,214,
+214,214,214,214,214,214,214,120,214,214,214,214,214,120,214,120,
+214,214,120,214,214,120,214,214,214,214,214,214,214,214,214,214,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
/* block 130 */
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,674,674,674,674,674,674,674,674,674,674,674,674,674,674,
-674,674,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
+675,675,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
/* block 131 */
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
/* block 132 */
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225, 8, 7,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224, 8, 7,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
/* block 133 */
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-120,120,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,120,120,120,120,120,120,120,120,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+120,120,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-225,225,675,225,225,225,225,225,225,225,225,225,220,676,120,120,
+224,224,676,224,224,224,224,224,224,224,224,224,219,677,120,120,
/* block 134 */
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
@@ -3047,17 +3066,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
8, 7, 8, 7, 8,556,556, 7, 8, 5, 5, 5, 5, 16, 16, 16,
5, 5, 5,120, 5, 5, 5, 5, 10, 7, 8, 7, 8, 7, 8, 5,
5, 5, 9, 10, 9, 9, 9,120, 5, 6, 5, 5,120,120,120,120,
-225,225,225,225,225,120,225,225,225,225,225,225,225,225,225,225,
+224,224,224,224,224,120,224,224,224,224,224,224,224,224,224,224,
/* block 135 */
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,120,120, 24,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,120,120, 24,
/* block 136 */
120, 5, 5, 5, 6, 5, 5, 5, 7, 8, 5, 9, 5, 10, 5, 5,
@@ -3071,7 +3090,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
/* block 137 */
578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,677,677,
+578,578,578,578,578,578,578,578,578,578,578,578,578,578,678,678,
581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,
581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,120,
120,120,581,581,581,581,581,581,120,120,581,581,581,581,581,581,
@@ -3080,39 +3099,39 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
511,511,511,511,511,511,511,511,511, 24, 24, 24, 20, 20,120,120,
/* block 138 */
-678,678,678,678,678,678,678,678,678,678,678,678,120,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,120,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,120,678,678,120,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,120,120,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,120,120,
+679,679,679,679,679,679,679,679,679,679,679,679,120,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,120,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,120,679,679,120,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,120,120,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 139 */
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,678,
-678,678,678,678,678,678,678,678,678,678,678,120,120,120,120,120,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,679,679,120,120,120,120,120,
/* block 140 */
-679,679,679,120,120,120,120,680,680,680,680,680,680,680,680,680,
-680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,
-680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,680,
-680,680,680,680,120,120,120,681,681,681,681,681,681,681,681,681,
-682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,
-682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,
-682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,
-682,682,682,682,682,683,683,683,683,684,684,684,684,684,684,684,
+680,680,680,120,120,120,120,681,681,681,681,681,681,681,681,681,
+681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,
+681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,
+681,681,681,681,120,120,120,682,682,682,682,682,682,682,682,682,
+683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,
+683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,
+683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,683,
+683,683,683,683,683,684,684,684,684,685,685,685,685,685,685,685,
/* block 141 */
-684,684,684,684,684,684,684,684,684,684,683,683,684,684,684,120,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120,
-684,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+685,685,685,685,685,685,685,685,685,685,684,684,685,685,685,120,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,
+685,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
@@ -3130,159 +3149,159 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 143 */
-685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,
-685,685,685,685,685,685,685,685,685,685,685,685,685,120,120,120,
-686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
-686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
-686,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-687,688,688,688,688,688,688,688,688,688,688,688,688,688,688,688,
-688,688,688,688,688,688,688,688,688,688,688,688,120,120,120,120,
+686,686,686,686,686,686,686,686,686,686,686,686,686,120,120,120,
+687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,687,
+687,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+688,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
+689,689,689,689,689,689,689,689,689,689,689,689,120,120,120,120,
/* block 144 */
-689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
-689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,689,
-690,690,690,690,120,120,120,120,120,120,120,120,120,689,689,689,
-691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,
-691,692,691,691,691,691,691,691,691,691,692,120,120,120,120,120,
-693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,
-693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,693,
-693,693,693,693,693,693,694,694,694,694,694,120,120,120,120,120,
+690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,
+690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,690,
+691,691,691,691,120,120,120,120,120,120,120,120,120,690,690,690,
+692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,692,
+692,693,692,692,692,692,692,692,692,692,693,120,120,120,120,120,
+694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,
+694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,694,
+694,694,694,694,694,694,695,695,695,695,695,120,120,120,120,120,
/* block 145 */
-695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,
-695,695,695,695,695,695,695,695,695,695,695,695,695,695,120,696,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,697,
-697,697,697,697,120,120,120,120,697,697,697,697,697,697,697,697,
-698,699,699,699,699,699,120,120,120,120,120,120,120,120,120,120,
+696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,
+696,696,696,696,696,696,696,696,696,696,696,696,696,696,120,697,
+698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,
+698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,698,
+698,698,698,698,120,120,120,120,698,698,698,698,698,698,698,698,
+699,700,700,700,700,700,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 146 */
-700,700,700,700,700,700,700,700,700,700,700,700,700,700,700,700,
-700,700,700,700,700,700,700,700,700,700,700,700,700,700,700,700,
-700,700,700,700,700,700,700,700,701,701,701,701,701,701,701,701,
701,701,701,701,701,701,701,701,701,701,701,701,701,701,701,701,
701,701,701,701,701,701,701,701,701,701,701,701,701,701,701,701,
+701,701,701,701,701,701,701,701,702,702,702,702,702,702,702,702,
702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
-702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
+703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
+703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
+703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
/* block 147 */
-703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
-703,703,703,703,703,703,703,703,703,703,703,703,703,703,120,120,
-704,704,704,704,704,704,704,704,704,704,120,120,120,120,120,120,
-705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,
-705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,705,
-705,705,705,705,120,120,120,120,706,706,706,706,706,706,706,706,
+704,704,704,704,704,704,704,704,704,704,704,704,704,704,704,704,
+704,704,704,704,704,704,704,704,704,704,704,704,704,704,120,120,
+705,705,705,705,705,705,705,705,705,705,120,120,120,120,120,120,
+706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,
706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,706,
-706,706,706,706,706,706,706,706,706,706,706,706,120,120,120,120,
+706,706,706,706,120,120,120,120,707,707,707,707,707,707,707,707,
+707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,
+707,707,707,707,707,707,707,707,707,707,707,707,120,120,120,120,
/* block 148 */
-707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,
-707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,
-707,707,707,707,707,707,707,707,120,120,120,120,120,120,120,120,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,120,120,120,120,120,120,120,120,120,120,120,709,
+708,708,708,708,708,708,708,708,120,120,120,120,120,120,120,120,
+709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
+709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
+709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
+709,709,709,709,120,120,120,120,120,120,120,120,120,120,120,710,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 149 */
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
/* block 150 */
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,710,120,120,120,120,120,120,120,120,120,
-710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
-710,710,710,710,710,710,120,120,120,120,120,120,120,120,120,120,
-710,710,710,710,710,710,710,710,120,120,120,120,120,120,120,120,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,711,120,120,120,120,120,120,120,120,120,
+711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
+711,711,711,711,711,711,120,120,120,120,120,120,120,120,120,120,
+711,711,711,711,711,711,711,711,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 151 */
-711,711,711,711,711,711,120,120,711,120,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,120,711,711,120,120,120,711,120,120,711,
+712,712,712,712,712,712,120,120,712,120,712,712,712,712,712,712,
+712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,120,713,714,714,714,714,714,714,714,714,
-715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
-715,715,715,715,715,715,715,716,716,717,717,717,717,717,717,717,
+712,712,712,712,712,712,120,712,712,120,120,120,712,120,120,712,
+713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,
+713,713,713,713,713,713,120,714,715,715,715,715,715,715,715,715,
+716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,
+716,716,716,716,716,716,716,717,717,718,718,718,718,718,718,718,
/* block 152 */
-718,718,718,718,718,718,718,718,718,718,718,718,718,718,718,718,
-718,718,718,718,718,718,718,718,718,718,718,718,718,718,718,120,
-120,120,120,120,120,120,120,719,719,719,719,719,719,719,719,719,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
+719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,120,
+120,120,120,120,120,120,120,720,720,720,720,720,720,720,720,720,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,720,
-720,720,720,120,720,720,120,120,120,120,120,721,721,721,721,721,
+721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,
+721,721,721,120,721,721,120,120,120,120,120,722,722,722,722,722,
/* block 153 */
-722,722,722,722,722,722,722,722,722,722,722,722,722,722,722,722,
-722,722,722,722,722,722,723,723,723,723,723,723,120,120,120,724,
-725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,
-725,725,725,725,725,725,725,725,725,725,120,120,120,120,120,726,
+723,723,723,723,723,723,723,723,723,723,723,723,723,723,723,723,
+723,723,723,723,723,723,724,724,724,724,724,724,120,120,120,725,
+726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
+726,726,726,726,726,726,726,726,726,726,120,120,120,120,120,727,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 154 */
-727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
-727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,
-728,728,728,728,728,728,728,728,120,120,120,120,729,729,728,728,
-729,729,729,729,729,729,729,729,729,729,729,729,729,729,729,729,
-120,120,729,729,729,729,729,729,729,729,729,729,729,729,729,729,
-729,729,729,729,729,729,729,729,729,729,729,729,729,729,729,729,
+728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,728,
729,729,729,729,729,729,729,729,729,729,729,729,729,729,729,729,
+729,729,729,729,729,729,729,729,120,120,120,120,730,730,729,729,
+730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
+120,120,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
+730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
+730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
/* block 155 */
-730,731,731,731,120,731,731,120,120,120,120,120,731,731,731,731,
-730,730,730,730,120,730,730,730,120,730,730,730,730,730,730,730,
-730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
-730,730,730,730,730,730,120,120,731,731,731,120,120,120,120,731,
-732,732,732,732,732,732,732,732,732,120,120,120,120,120,120,120,
+731,732,732,732,120,732,732,120,120,120,120,120,732,732,732,732,
+731,731,731,731,120,731,731,731,120,731,731,731,731,731,731,731,
+731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
+731,731,731,731,731,731,120,120,732,732,732,120,120,120,120,732,
733,733,733,733,733,733,733,733,733,120,120,120,120,120,120,120,
-734,734,734,734,734,734,734,734,734,734,734,734,734,734,734,734,
-734,734,734,734,734,734,734,734,734,734,734,734,734,735,735,736,
+734,734,734,734,734,734,734,734,734,120,120,120,120,120,120,120,
+735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,
+735,735,735,735,735,735,735,735,735,735,735,735,735,736,736,737,
/* block 156 */
-737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
-737,737,737,737,737,737,737,737,737,737,737,737,737,738,738,738,
+738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,738,
+738,738,738,738,738,738,738,738,738,738,738,738,738,739,739,739,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-739,739,739,739,739,739,739,739,740,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,741,741,120,120,120,120,742,742,742,742,742,
-743,743,743,743,743,743,743,120,120,120,120,120,120,120,120,120,
+740,740,740,740,740,740,740,740,741,740,740,740,740,740,740,740,
+740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,740,
+740,740,740,740,740,742,742,120,120,120,120,743,743,743,743,743,
+744,744,744,744,744,744,744,120,120,120,120,120,120,120,120,120,
/* block 157 */
-744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,
-744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,
-744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,744,
-744,744,744,744,744,744,120,120,120,745,745,745,745,745,745,745,
-746,746,746,746,746,746,746,746,746,746,746,746,746,746,746,746,
-746,746,746,746,746,746,120,120,747,747,747,747,747,747,747,747,
-748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,748,
-748,748,748,120,120,120,120,120,749,749,749,749,749,749,749,749,
+745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,
+745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,
+745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,745,
+745,745,745,745,745,745,120,120,120,746,746,746,746,746,746,746,
+747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,
+747,747,747,747,747,747,120,120,748,748,748,748,748,748,748,748,
+749,749,749,749,749,749,749,749,749,749,749,749,749,749,749,749,
+749,749,749,120,120,120,120,120,750,750,750,750,750,750,750,750,
/* block 158 */
-750,750,750,750,750,750,750,750,750,750,750,750,750,750,750,750,
-750,750,120,120,120,120,120,120,120,751,751,751,751,120,120,120,
-120,120,120,120,120,120,120,120,120,752,752,752,752,752,752,752,
+751,751,751,751,751,751,751,751,751,751,751,751,751,751,751,751,
+751,751,120,120,120,120,120,120,120,752,752,752,752,120,120,120,
+120,120,120,120,120,120,120,120,120,753,753,753,753,753,753,753,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -3290,30 +3309,30 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 159 */
-753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,
-753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,
-753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,
-753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,753,
-753,753,753,753,753,753,753,753,753,120,120,120,120,120,120,120,
+754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,
+754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,
+754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,
+754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,
+754,754,754,754,754,754,754,754,754,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 160 */
-754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,
-754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,
-754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,754,
-754,754,754,120,120,120,120,120,120,120,120,120,120,120,120,120,
755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,
755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,
755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,755,
-755,755,755,120,120,120,120,120,120,120,756,756,756,756,756,756,
+755,755,755,120,120,120,120,120,120,120,120,120,120,120,120,120,
+756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,
+756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,
+756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,756,
+756,756,756,120,120,120,120,120,120,120,757,757,757,757,757,757,
/* block 161 */
-757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
-757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
-757,757,757,757,758,758,758,758,120,120,120,120,120,120,120,120,
-759,759,759,759,759,759,759,759,759,759,120,120,120,120,120,120,
+758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,
+758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,
+758,758,758,758,759,759,759,759,120,120,120,120,120,120,120,120,
+760,760,760,760,760,760,760,760,760,760,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -3326,350 +3345,370 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-760,760,760,760,760,760,760,760,760,760,760,760,760,760,760,760,
-760,760,760,760,760,760,760,760,760,760,760,760,760,760,760,120,
+761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,
+761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,120,
/* block 163 */
-761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,761,
-761,761,761,761,761,761,761,761,761,761,761,761,761,762,762,762,
-762,762,762,762,762,762,762,761,120,120,120,120,120,120,120,120,
-763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,763,
-763,763,763,763,763,763,764,764,764,764,764,764,764,764,764,764,
-764,765,765,765,765,766,766,766,766,766,120,120,120,120,120,120,
+762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,
+762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,762,
+762,762,762,762,762,762,762,762,762,762,120,763,763,764,120,120,
+762,762,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 164 */
+765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,
+765,765,765,765,765,765,765,765,765,765,765,765,765,766,766,766,
+766,766,766,766,766,766,766,765,120,120,120,120,120,120,120,120,
+767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,
+767,767,767,767,767,767,768,768,768,768,768,768,768,768,768,768,
+768,769,769,769,769,770,770,770,770,770,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+
+/* block 165 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+771,771,771,771,771,771,771,771,771,771,771,771,771,771,771,771,
+771,771,771,771,771,772,772,772,772,772,772,772,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,767,
-767,767,767,767,767,767,767,120,120,120,120,120,120,120,120,120,
-
-/* block 165 */
-768,769,768,770,770,770,770,770,770,770,770,770,770,770,770,770,
-770,770,770,770,770,770,770,770,770,770,770,770,770,770,770,770,
-770,770,770,770,770,770,770,770,770,770,770,770,770,770,770,770,
-770,770,770,770,770,770,770,770,769,769,769,769,769,769,769,769,
-769,769,769,769,769,769,769,771,771,771,771,771,771,771,120,120,
-120,120,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
-772,772,772,772,772,772,773,773,773,773,773,773,773,773,773,773,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,769,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,120,120,120,120,120,120,120,120,120,
/* block 166 */
-774,774,775,776,776,776,776,776,776,776,776,776,776,776,776,776,
+774,775,774,776,776,776,776,776,776,776,776,776,776,776,776,776,
776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
-775,775,775,774,774,774,774,775,775,774,774,777,777,778,777,777,
-777,777,120,120,120,120,120,120,120,120,120,120,120,778,120,120,
-779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,779,
-779,779,779,779,779,779,779,779,779,120,120,120,120,120,120,120,
-780,780,780,780,780,780,780,780,780,780,120,120,120,120,120,120,
+776,776,776,776,776,776,776,776,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,777,777,777,777,777,777,777,120,120,
+120,120,778,778,778,778,778,778,778,778,778,778,778,778,778,778,
+778,778,778,778,778,778,779,779,779,779,779,779,779,779,779,779,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,775,
/* block 167 */
-781,781,781,782,782,782,782,782,782,782,782,782,782,782,782,782,
+780,780,781,782,782,782,782,782,782,782,782,782,782,782,782,782,
782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,
-782,782,782,782,782,782,782,781,781,781,781,781,783,781,781,781,
-781,781,781,781,781,120,784,784,784,784,784,784,784,784,784,784,
-785,785,785,785,782,783,783,120,120,120,120,120,120,120,120,120,
-786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,
-786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,786,
-786,786,786,787,788,788,786,120,120,120,120,120,120,120,120,120,
+782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,782,
+781,781,781,780,780,780,780,781,781,780,780,783,783,784,783,783,
+783,783,120,120,120,120,120,120,120,120,120,120,120,784,120,120,
+785,785,785,785,785,785,785,785,785,785,785,785,785,785,785,785,
+785,785,785,785,785,785,785,785,785,120,120,120,120,120,120,120,
+786,786,786,786,786,786,786,786,786,786,120,120,120,120,120,120,
/* block 168 */
-789,789,790,791,791,791,791,791,791,791,791,791,791,791,791,791,
-791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,
-791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,
-791,791,791,790,790,790,789,789,789,789,789,789,789,789,789,790,
-790,791,792,792,791,793,793,793,793,789,789,789,789,793,120,120,
-794,794,794,794,794,794,794,794,794,794,791,793,791,793,793,793,
-120,795,795,795,795,795,795,795,795,795,795,795,795,795,795,795,
-795,795,795,795,795,120,120,120,120,120,120,120,120,120,120,120,
+787,787,787,788,788,788,788,788,788,788,788,788,788,788,788,788,
+788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,788,
+788,788,788,788,788,788,788,787,787,787,787,787,789,787,787,787,
+787,787,787,787,787,120,790,790,790,790,790,790,790,790,790,790,
+791,791,791,791,788,789,789,788,120,120,120,120,120,120,120,120,
+792,792,792,792,792,792,792,792,792,792,792,792,792,792,792,792,
+792,792,792,792,792,792,792,792,792,792,792,792,792,792,792,792,
+792,792,792,793,794,794,792,120,120,120,120,120,120,120,120,120,
/* block 169 */
-796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,120,796,796,796,796,796,796,796,796,796,796,796,796,796,
-796,796,796,796,796,796,796,796,796,796,796,796,797,797,797,798,
-798,798,797,797,798,797,798,798,799,799,799,799,799,799,798,120,
+795,795,796,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,796,796,796,795,795,795,795,795,795,795,795,795,796,
+796,797,798,798,797,799,799,799,799,795,795,795,795,799,796,795,
+800,800,800,800,800,800,800,800,800,800,797,799,797,799,799,799,
+120,801,801,801,801,801,801,801,801,801,801,801,801,801,801,801,
+801,801,801,801,801,120,120,120,120,120,120,120,120,120,120,120,
+
+/* block 170 */
+802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,120,802,802,802,802,802,802,802,802,802,802,802,802,802,
+802,802,802,802,802,802,802,802,802,802,802,802,803,803,803,804,
+804,804,803,803,804,803,804,804,805,805,805,805,805,805,804,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 170 */
-800,800,800,800,800,800,800,120,800,120,800,800,800,800,120,800,
-800,800,800,800,800,800,800,800,800,800,800,800,800,800,120,800,
-800,800,800,800,800,800,800,800,800,801,120,120,120,120,120,120,
-802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
-802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,
-802,802,802,802,802,802,802,802,802,802,802,802,802,802,802,803,
-804,804,804,803,803,803,803,803,803,803,803,120,120,120,120,120,
-805,805,805,805,805,805,805,805,805,805,120,120,120,120,120,120,
-
/* block 171 */
-806,807,808,809,120,810,810,810,810,810,810,810,810,120,120,810,
-810,120,120,810,810,810,810,810,810,810,810,810,810,810,810,810,
-810,810,810,810,810,810,810,810,810,120,810,810,810,810,810,810,
-810,120,810,810,120,810,810,810,810,810,120,811,807,810,812,808,
-806,808,808,808,808,120,120,808,808,120,120,808,808,808,120,120,
-810,120,120,120,120,120,120,812,120,120,120,120,120,810,810,810,
-810,810,808,808,120,120,806,806,806,806,806,806,806,120,120,120,
-806,806,806,806,806,120,120,120,120,120,120,120,120,120,120,120,
+806,806,806,806,806,806,806,120,806,120,806,806,806,806,120,806,
+806,806,806,806,806,806,806,806,806,806,806,806,806,806,120,806,
+806,806,806,806,806,806,806,806,806,807,120,120,120,120,120,120,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,809,
+810,810,810,809,809,809,809,809,809,809,809,120,120,120,120,120,
+811,811,811,811,811,811,811,811,811,811,120,120,120,120,120,120,
/* block 172 */
-813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,
-813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,
-813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,813,
-813,813,813,813,813,814,814,814,815,815,815,815,815,815,815,815,
-814,814,815,815,815,814,815,813,813,813,813,816,816,816,816,816,
-817,817,817,817,817,817,817,817,817,817,120,816,120,816,815,813,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+812,813,814,815,120,816,816,816,816,816,816,816,816,120,120,816,
+816,120,120,816,816,816,816,816,816,816,816,816,816,816,816,816,
+816,816,816,816,816,816,816,816,816,120,816,816,816,816,816,816,
+816,120,816,816,120,816,816,816,816,816,120,817,813,816,818,814,
+812,814,814,814,814,120,120,814,814,120,120,814,814,814,120,120,
+816,120,120,120,120,120,120,818,120,120,120,120,120,816,816,816,
+816,816,814,814,120,120,812,812,812,812,812,812,812,120,120,120,
+812,812,812,812,812,120,120,120,120,120,120,120,120,120,120,120,
/* block 173 */
-818,818,818,818,818,818,818,818,818,818,818,818,818,818,818,818,
-818,818,818,818,818,818,818,818,818,818,818,818,818,818,818,818,
-818,818,818,818,818,818,818,818,818,818,818,818,818,818,818,818,
-819,820,820,821,821,821,821,821,821,820,821,820,820,819,820,821,
-821,820,821,821,818,818,822,818,120,120,120,120,120,120,120,120,
-823,823,823,823,823,823,823,823,823,823,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,
+819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,
+819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,819,
+819,819,819,819,819,820,820,820,821,821,821,821,821,821,821,821,
+820,820,821,821,821,820,821,819,819,819,819,822,822,822,822,822,
+823,823,823,823,823,823,823,823,823,823,822,822,120,822,821,819,
+819,819,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 174 */
824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
-824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,825,
-826,826,827,827,827,827,120,120,826,826,826,826,827,827,826,827,
-827,828,828,828,828,828,828,828,828,828,828,828,828,828,828,828,
-828,828,828,828,828,828,828,828,824,824,824,824,827,827,120,120,
+824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,824,
+825,826,826,827,827,827,827,827,827,826,827,826,826,825,826,827,
+827,826,827,827,824,824,828,824,120,120,120,120,120,120,120,120,
+829,829,829,829,829,829,829,829,829,829,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 175 */
-829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
-829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
-829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,829,
-830,830,830,831,831,831,831,831,831,831,831,830,830,831,830,831,
-831,832,832,832,829,120,120,120,120,120,120,120,120,120,120,120,
-833,833,833,833,833,833,833,833,833,833,120,120,120,120,120,120,
-395,395,395,395,395,395,395,395,395,395,395,395,395,120,120,120,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,
+830,830,830,830,830,830,830,830,830,830,830,830,830,830,830,831,
+832,832,833,833,833,833,120,120,832,832,832,832,833,833,832,833,
+833,834,834,834,834,834,834,834,834,834,834,834,834,834,834,834,
+834,834,834,834,834,834,834,834,830,830,830,830,833,833,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 176 */
-834,834,834,834,834,834,834,834,834,834,834,834,834,834,834,834,
-834,834,834,834,834,834,834,834,834,834,834,834,834,834,834,834,
-834,834,834,834,834,834,834,834,834,834,834,835,836,835,836,836,
-835,835,835,835,835,835,836,835,834,120,120,120,120,120,120,120,
-837,837,837,837,837,837,837,837,837,837,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,
+835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,
+835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,835,
+836,836,836,837,837,837,837,837,837,837,837,836,836,837,836,837,
+837,838,838,838,835,120,120,120,120,120,120,120,120,120,120,120,
+839,839,839,839,839,839,839,839,839,839,120,120,120,120,120,120,
+394,394,394,394,394,394,394,394,394,394,394,394,394,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 177 */
-838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,838,
-838,838,838,838,838,838,838,838,838,838,838,120,120,839,839,839,
-840,840,839,839,839,839,840,839,839,839,839,839,120,120,120,120,
-841,841,841,841,841,841,841,841,841,841,842,842,843,843,843,844,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,
+840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,840,
+840,840,840,840,840,840,840,840,840,840,840,841,842,841,842,842,
+841,841,841,841,841,841,842,841,840,120,120,120,120,120,120,120,
+843,843,843,843,843,843,843,843,843,843,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 178 */
-845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
-845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,845,
-845,845,845,845,845,845,845,845,845,845,845,845,846,846,846,847,
-847,847,847,847,847,847,847,847,846,847,847,848,120,120,120,120,
+844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,844,
+844,844,844,844,844,844,844,844,844,844,844,120,120,845,845,845,
+846,846,845,845,845,845,846,845,845,845,845,845,120,120,120,120,
+847,847,847,847,847,847,847,847,847,847,848,848,849,849,849,850,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 179 */
+851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,
+851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,851,
+851,851,851,851,851,851,851,851,851,851,851,851,852,852,852,853,
+853,853,853,853,853,853,853,853,852,853,853,854,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
-849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,849,
-850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,
-850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,850,
-851,851,851,851,851,851,851,851,851,851,852,852,852,852,852,852,
-852,852,852,120,120,120,120,120,120,120,120,120,120,120,120,853,
/* block 180 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-854,854,854,854,854,854,854,854,120,120,854,854,854,854,854,854,
-854,854,854,854,854,854,854,854,854,854,854,854,854,854,854,854,
-854,854,854,854,854,854,854,854,854,854,854,854,854,854,854,854,
-854,855,855,855,856,856,856,856,120,120,856,856,855,855,855,855,
-856,854,857,854,855,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+855,855,855,855,855,855,855,855,855,855,855,855,855,855,855,855,
+855,855,855,855,855,855,855,855,855,855,855,855,855,855,855,855,
+856,856,856,856,856,856,856,856,856,856,856,856,856,856,856,856,
+856,856,856,856,856,856,856,856,856,856,856,856,856,856,856,856,
+857,857,857,857,857,857,857,857,857,857,858,858,858,858,858,858,
+858,858,858,120,120,120,120,120,120,120,120,120,120,120,120,859,
/* block 181 */
-858,859,859,859,859,859,859,859,859,859,859,858,858,858,858,858,
-858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,
-858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,858,
-858,858,858,859,859,859,859,859,859,860,861,859,859,859,859,862,
-862,862,862,862,862,862,862,859,120,120,120,120,120,120,120,120,
-863,864,864,864,864,864,864,865,865,864,864,864,863,863,863,863,
-863,863,863,863,863,863,863,863,863,863,863,863,863,863,863,863,
-863,863,863,863,863,863,863,863,863,863,863,863,863,863,863,863,
+860,860,860,860,860,860,860,120,120,860,120,120,860,860,860,860,
+860,860,860,860,120,860,860,120,860,860,860,860,860,860,860,860,
+860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,860,
+861,862,862,862,862,862,120,862,862,120,120,863,863,862,863,864,
+862,864,862,863,865,865,865,120,120,120,120,120,120,120,120,120,
+866,866,866,866,866,866,866,866,866,866,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 182 */
-863,863,863,863,866,866,866,866,866,866,864,864,864,864,864,864,
-864,864,864,864,864,864,864,865,864,864,867,867,867,863,867,867,
-867,867,867,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,
-868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,
-868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,
-868,868,868,868,868,868,868,868,868,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+867,867,867,867,867,867,867,867,120,120,867,867,867,867,867,867,
+867,867,867,867,867,867,867,867,867,867,867,867,867,867,867,867,
+867,867,867,867,867,867,867,867,867,867,867,867,867,867,867,867,
+867,868,868,868,869,869,869,869,120,120,869,869,868,868,868,868,
+869,867,870,867,868,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 183 */
-869,869,869,869,869,869,869,869,869,120,869,869,869,869,869,869,
-869,869,869,869,869,869,869,869,869,869,869,869,869,869,869,869,
-869,869,869,869,869,869,869,869,869,869,869,869,869,869,869,870,
-871,871,871,871,871,871,871,120,871,871,871,871,871,871,870,871,
-869,872,872,872,872,872,120,120,120,120,120,120,120,120,120,120,
-873,873,873,873,873,873,873,873,873,873,874,874,874,874,874,874,
-874,874,874,874,874,874,874,874,874,874,874,874,874,120,120,120,
-875,875,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
+871,872,872,872,872,872,872,872,872,872,872,871,871,871,871,871,
+871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,
+871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,871,
+871,871,871,872,872,872,872,872,872,873,874,872,872,872,872,875,
+875,875,875,875,875,875,875,872,120,120,120,120,120,120,120,120,
+876,877,877,877,877,877,877,878,878,877,877,877,876,876,876,876,
+876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
+876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
/* block 184 */
-876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,876,
-120,120,877,877,877,877,877,877,877,877,877,877,877,877,877,877,
-877,877,877,877,877,877,877,877,120,878,877,877,877,877,877,877,
-877,878,877,877,878,877,877,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+876,876,876,876,879,879,879,879,879,879,877,877,877,877,877,877,
+877,877,877,877,877,877,877,878,877,877,880,880,880,876,880,880,
+880,880,880,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,
+881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,
+881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,881,
+881,881,881,881,881,881,881,881,881,120,120,120,120,120,120,120,
/* block 185 */
-879,879,879,879,879,879,879,120,879,879,120,879,879,879,879,879,
-879,879,879,879,879,879,879,879,879,879,879,879,879,879,879,879,
-879,879,879,879,879,879,879,879,879,879,879,879,879,879,879,879,
-879,880,880,880,880,880,880,120,120,120,880,120,880,880,120,880,
-880,880,880,880,880,880,881,880,120,120,120,120,120,120,120,120,
-882,882,882,882,882,882,882,882,882,882,120,120,120,120,120,120,
-883,883,883,883,883,883,120,883,883,120,883,883,883,883,883,883,
-883,883,883,883,883,883,883,883,883,883,883,883,883,883,883,883,
+882,882,882,882,882,882,882,882,882,120,882,882,882,882,882,882,
+882,882,882,882,882,882,882,882,882,882,882,882,882,882,882,882,
+882,882,882,882,882,882,882,882,882,882,882,882,882,882,882,883,
+884,884,884,884,884,884,884,120,884,884,884,884,884,884,883,884,
+882,885,885,885,885,885,120,120,120,120,120,120,120,120,120,120,
+886,886,886,886,886,886,886,886,886,886,887,887,887,887,887,887,
+887,887,887,887,887,887,887,887,887,887,887,887,887,120,120,120,
+888,888,889,889,889,889,889,889,889,889,889,889,889,889,889,889,
/* block 186 */
-883,883,883,883,883,883,883,883,883,883,884,884,884,884,884,120,
-885,885,120,884,884,885,884,885,883,120,120,120,120,120,120,120,
-886,886,886,886,886,886,886,886,886,886,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+889,889,889,889,889,889,889,889,889,889,889,889,889,889,889,889,
+120,120,890,890,890,890,890,890,890,890,890,890,890,890,890,890,
+890,890,890,890,890,890,890,890,120,891,890,890,890,890,890,890,
+890,891,890,890,891,890,890,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 187 */
+892,892,892,892,892,892,892,120,892,892,120,892,892,892,892,892,
+892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,
+892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,892,
+892,893,893,893,893,893,893,120,120,120,893,120,893,893,120,893,
+893,893,893,893,893,893,894,893,120,120,120,120,120,120,120,120,
+895,895,895,895,895,895,895,895,895,895,120,120,120,120,120,120,
+896,896,896,896,896,896,120,896,896,120,896,896,896,896,896,896,
+896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
+
+/* block 188 */
+896,896,896,896,896,896,896,896,896,896,897,897,897,897,897,120,
+898,898,120,897,897,898,897,898,896,120,120,120,120,120,120,120,
+899,899,899,899,899,899,899,899,899,899,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-887,887,887,887,887,887,887,887,887,887,887,887,887,887,887,887,
-887,887,887,888,888,889,889,890,890,120,120,120,120,120,120,120,
-/* block 188 */
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+/* block 189 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,891,
-294,294,891,294,891,296,296,296,296,296,296,296,296,297,297,297,
-297,296,296,296,296,296,296,296,296,296,296,296,296,296,296,296,
-296,296,120,120,120,120,120,120,120,120,120,120,120,120,120,892,
-
-/* block 189 */
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-
-/* block 190 */
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+900,900,900,900,900,900,900,900,900,900,900,900,900,900,900,900,
+900,900,900,901,901,902,902,903,903,120,120,120,120,120,120,120,
+
+/* block 190 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+590,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+904,904,904,904,904,904,904,904,904,904,904,904,904,904,904,904,
+293,293,904,293,904,295,295,295,295,295,295,295,295,296,296,296,
+296,295,295,295,295,295,295,295,295,295,295,295,295,295,295,295,
+295,295,120,120,120,120,120,120,120,120,120,120,120,120,120,905,
/* block 191 */
-894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,
-894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,
-894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,
-894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,
-894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,
-894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,
-894,894,894,894,894,894,894,894,894,894,894,894,894,894,894,120,
-895,895,895,895,895,120,120,120,120,120,120,120,120,120,120,120,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
/* block 192 */
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,893,
-893,893,893,893,120,120,120,120,120,120,120,120,120,120,120,120,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 193 */
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
+907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,
+907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,
+907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,
+907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,
+907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,
+907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,
+907,907,907,907,907,907,907,907,907,907,907,907,907,907,907,120,
+908,908,908,908,908,120,120,120,120,120,120,120,120,120,120,120,
/* block 194 */
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,
-896,896,896,896,896,896,896,896,896,896,896,896,896,896,896,120,
-897,897,897,897,897,897,897,897,897,120,120,120,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,906,
+906,906,906,906,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 195 */
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
/* block 196 */
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,898,
-898,898,898,898,898,898,898,120,120,120,120,120,120,120,120,120,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,
+909,909,909,909,909,909,909,909,909,909,909,909,909,909,909,120,
+910,910,910,910,910,910,910,910,910,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 197 */
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+
+/* block 198 */
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,911,
+911,911,911,911,911,911,911,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+
+/* block 199 */
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
@@ -3679,38 +3718,38 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
-/* block 198 */
+/* block 200 */
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
601,601,601,601,601,601,601,601,601,120,120,120,120,120,120,120,
-899,899,899,899,899,899,899,899,899,899,899,899,899,899,899,899,
-899,899,899,899,899,899,899,899,899,899,899,899,899,899,899,120,
-900,900,900,900,900,900,900,900,900,900,120,120,120,120,901,901,
+912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
+912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,120,
+913,913,913,913,913,913,913,913,913,913,120,120,120,120,914,914,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 199 */
+/* block 201 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,902,
-902,902,902,902,902,902,902,902,902,902,902,902,902,902,120,120,
-903,903,903,903,903,904,120,120,120,120,120,120,120,120,120,120,
+915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,915,
+915,915,915,915,915,915,915,915,915,915,915,915,915,915,120,120,
+916,916,916,916,916,917,120,120,120,120,120,120,120,120,120,120,
-/* block 200 */
-905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,
-905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,
-905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,
-906,906,906,906,906,906,906,907,907,907,907,907,908,908,908,908,
-909,909,909,909,907,908,120,120,120,120,120,120,120,120,120,120,
-910,910,910,910,910,910,910,910,910,910,120,911,911,911,911,911,
-911,911,120,905,905,905,905,905,905,905,905,905,905,905,905,905,
-905,905,905,905,905,905,905,905,120,120,120,120,120,905,905,905,
+/* block 202 */
+918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
+918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
+918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
+919,919,919,919,919,919,919,920,920,920,920,920,921,921,921,921,
+922,922,922,922,920,921,120,120,120,120,120,120,120,120,120,120,
+923,923,923,923,923,923,923,923,923,923,120,924,924,924,924,924,
+924,924,120,918,918,918,918,918,918,918,918,918,918,918,918,918,
+918,918,918,918,918,918,918,918,120,120,120,120,120,918,918,918,
-/* block 201 */
-905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,905,
+/* block 203 */
+918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -3719,19 +3758,19 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 202 */
+/* block 204 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,912,
-913,913,913,913,913,913,913,913,913,913,913,913,913,913,913,913,
-913,913,913,913,913,913,913,913,913,913,913,913,913,913,913,913,
+925,925,925,925,925,925,925,925,925,925,925,925,925,925,925,925,
+925,925,925,925,925,925,925,925,925,925,925,925,925,925,925,925,
+926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,
+926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,926,
-/* block 203 */
-914,914,914,914,914,914,914,914,914,914,914,914,914,914,914,914,
-914,914,914,914,914,914,914,915,915,915,915,120,120,120,120,120,
+/* block 205 */
+927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,927,
+927,927,927,927,927,927,927,928,928,928,928,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -3739,57 +3778,77 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 204 */
-916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,
-916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,
-916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,
-916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,916,
-916,916,916,916,916,916,916,916,916,916,916,120,120,120,120,917,
-916,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
-918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
-918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,918,
+/* block 206 */
+929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,
+929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,
+929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,
+929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,929,
+929,929,929,929,929,929,929,929,929,929,929,120,120,120,120,930,
+929,931,931,931,931,931,931,931,931,931,931,931,931,931,931,931,
+931,931,931,931,931,931,931,931,931,931,931,931,931,931,931,931,
+931,931,931,931,931,931,931,931,931,931,931,931,931,931,931,931,
-/* block 205 */
-918,918,918,918,918,918,918,918,120,120,120,120,120,120,120,917,
-917,917,917,919,919,919,919,919,919,919,919,919,919,919,919,919,
-120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+/* block 207 */
+931,931,931,931,931,931,931,931,120,120,120,120,120,120,120,930,
+930,930,930,932,932,932,932,932,932,932,932,932,932,932,932,932,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-920,921, 5,111,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-
-/* block 206 */
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-
-/* block 207 */
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,120,120,120,120,120,120,120,120,
+933,934, 5,111,935,120,120,120,120,120,120,120,120,120,120,120,
+936,936,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
/* block 208 */
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,922,
-922,922,922,120,120,120,120,120,120,120,120,120,120,120,120,120,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
/* block 209 */
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,937,
+937,937,937,937,937,937,937,937,120,120,120,120,120,120,120,120,
+
+/* block 210 */
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+
+/* block 211 */
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,938,
+938,938,938,938,938,938,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+
+/* block 212 */
+937,937,937,937,937,937,937,937,937,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+
+/* block 213 */
578,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
@@ -3799,7 +3858,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
-/* block 210 */
+/* block 214 */
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
@@ -3809,7 +3868,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
-/* block 211 */
+/* block 215 */
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -3817,49 +3876,49 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
573,573,573,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,578,578,578,578,120,120,120,120,120,120,120,120,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
-/* block 212 */
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
+/* block 216 */
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
-/* block 213 */
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,923,
-923,923,923,923,923,923,923,923,923,923,923,923,120,120,120,120,
+/* block 217 */
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,939,
+939,939,939,939,939,939,939,939,939,939,939,939,120,120,120,120,
-/* block 214 */
-924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,
-924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,
-924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,
-924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,
-924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,
-924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,924,
-924,924,924,924,924,924,924,924,924,924,924,120,120,120,120,120,
-924,924,924,924,924,924,924,924,924,924,924,924,924,120,120,120,
+/* block 218 */
+940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,
+940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,
+940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,
+940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,
+940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,
+940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,940,
+940,940,940,940,940,940,940,940,940,940,940,120,120,120,120,120,
+940,940,940,940,940,940,940,940,940,940,940,940,940,120,120,120,
-/* block 215 */
-924,924,924,924,924,924,924,924,924,120,120,120,120,120,120,120,
-924,924,924,924,924,924,924,924,924,924,120,120,925,926,926,927,
-928,928,928,928,120,120,120,120,120,120,120,120,120,120,120,120,
+/* block 219 */
+940,940,940,940,940,940,940,940,940,120,120,120,120,120,120,120,
+940,940,940,940,940,940,940,940,940,940,120,120,941,942,942,943,
+944,944,944,944,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 216 */
+/* block 220 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
@@ -3869,17 +3928,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20,120,120,120,120,120,120,120,120,120,120,
-/* block 217 */
+/* block 221 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20,120,120, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20,929,930,113,113,113, 20, 20, 20,930,929,929,
-929,929,929, 24, 24, 24, 24, 24, 24, 24, 24,113,113,113,113,113,
+ 20, 20, 20, 20, 20,945,946,113,113,113, 20, 20, 20,946,945,945,
+945,945,945, 24, 24, 24, 24, 24, 24, 24, 24,113,113,113,113,113,
-/* block 218 */
+/* block 222 */
113,113,113, 20, 20,113,113,113,113,113,113,113, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20,113,113,113,113, 20, 20,
@@ -3889,17 +3948,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 219 */
-684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,
-684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,
-684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,
-684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,684,
-684,684,931,931,931,684,120,120,120,120,120,120,120,120,120,120,
+/* block 223 */
+685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,
+685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,
+685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,
+685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,
+685,685,947,947,947,685,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 220 */
+/* block 224 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -3909,7 +3968,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 221 */
+/* block 225 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
@@ -3919,7 +3978,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,582,
582,582, 25, 25, 25, 25, 25, 25, 25,120,120,120,120,120,120,120,
-/* block 222 */
+/* block 226 */
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
513,513,513,513,513,513,513,513,513,513,514,514,514,514,514,514,
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
@@ -3929,7 +3988,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
514,514,514,514,514,514,514,514,513,513,513,513,513,513,513,513,
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
-/* block 223 */
+/* block 227 */
513,513,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
514,514,514,514,514,514,514,514,514,514,514,514,513,120,513,513,
120,120,513,120,120,513,513,120,120,513,513,513,513,120,513,513,
@@ -3939,7 +3998,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
513,513,513,513,513,513,513,513,513,513,514,514,514,514,514,514,
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-/* block 224 */
+/* block 228 */
514,514,514,514,513,513,120,513,513,513,513,120,120,513,513,513,
513,513,513,513,513,120,513,513,513,513,513,513,513,120,514,514,
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
@@ -3949,7 +4008,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
514,514,514,514,514,514,514,514,514,514,514,514,513,513,513,513,
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
-/* block 225 */
+/* block 229 */
513,513,513,513,513,513,514,514,514,514,514,514,514,514,514,514,
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
@@ -3959,7 +4018,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
513,513,513,513,513,513,513,513,513,513,513,513,513,513,514,514,
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-/* block 226 */
+/* block 230 */
514,514,514,514,514,514,514,514,513,513,513,513,513,513,513,513,
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
513,513,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
@@ -3969,7 +4028,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
-/* block 227 */
+/* block 231 */
513,513,513,513,513,513,513,513,513,513,514,514,514,514,514,514,
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
514,514,514,514,514,514,120,120,513,513,513,513,513,513,513,513,
@@ -3979,7 +4038,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
514,514,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
513,513,513,513,513,513,513,513,513,513,513, 9,514,514,514,514,
-/* block 228 */
+/* block 232 */
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
514,514,514,514,514, 9,514,514,514,514,514,514,513,513,513,513,
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
@@ -3989,7 +4048,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, 9,
514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
-/* block 229 */
+/* block 233 */
514,514,514,514,514,514,514,514,514, 9,514,514,514,514,514,514,
513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
513,513,513,513,513,513,513,513,513, 9,514,514,514,514,514,514,
@@ -3999,97 +4058,97 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-/* block 230 */
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
-932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,932,
+/* block 234 */
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
+948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
-/* block 231 */
-933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,
-933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,
-933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,
-933,933,933,933,933,933,933,932,932,932,932,933,933,933,933,933,
-933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,
-933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,
-933,933,933,933,933,933,933,933,933,933,933,933,933,932,932,932,
-932,932,932,932,932,933,932,932,932,932,932,932,932,932,932,932,
+/* block 235 */
+949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
+949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
+949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
+949,949,949,949,949,949,949,948,948,948,948,949,949,949,949,949,
+949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
+949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
+949,949,949,949,949,949,949,949,949,949,949,949,949,948,948,948,
+948,948,948,948,948,949,948,948,948,948,948,948,948,948,948,948,
-/* block 232 */
-932,932,932,932,933,932,932,934,934,934,934,934,120,120,120,120,
-120,120,120,120,120,120,120,120,120,120,120,933,933,933,933,933,
-120,933,933,933,933,933,933,933,933,933,933,933,933,933,933,933,
+/* block 236 */
+948,948,948,948,949,948,948,950,950,950,950,950,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,949,949,949,949,949,
+120,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 233 */
-935,935,935,935,935,935,935,120,935,935,935,935,935,935,935,935,
-935,935,935,935,935,935,935,935,935,120,120,935,935,935,935,935,
-935,935,120,935,935,120,935,935,935,935,935,120,120,120,120,120,
+/* block 237 */
+951,951,951,951,951,951,951,120,951,951,951,951,951,951,951,951,
+951,951,951,951,951,951,951,951,951,120,120,951,951,951,951,951,
+951,951,120,951,951,120,951,951,951,951,951,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 234 */
-936,936,936,936,936,936,936,936,936,936,936,936,936,936,936,936,
-936,936,936,936,936,936,936,936,936,936,936,936,936,936,936,936,
-936,936,936,936,936,936,936,936,936,936,936,936,936,120,120,120,
-937,937,937,937,937,937,937,938,938,938,938,938,938,938,120,120,
-939,939,939,939,939,939,939,939,939,939,120,120,120,120,936,940,
+/* block 238 */
+952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,
+952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,952,
+952,952,952,952,952,952,952,952,952,952,952,952,952,120,120,120,
+953,953,953,953,953,953,953,954,954,954,954,954,954,954,120,120,
+955,955,955,955,955,955,955,955,955,955,120,120,120,120,952,956,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 235 */
+/* block 239 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-941,941,941,941,941,941,941,941,941,941,941,941,941,941,941,941,
-941,941,941,941,941,941,941,941,941,941,941,941,941,941,941,941,
-941,941,941,941,941,941,941,941,941,941,941,941,942,942,942,942,
-943,943,943,943,943,943,943,943,943,943,120,120,120,120,120,944,
+957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,
+957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,957,
+957,957,957,957,957,957,957,957,957,957,957,957,958,958,958,958,
+959,959,959,959,959,959,959,959,959,959,120,120,120,120,120,960,
-/* block 236 */
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
+/* block 240 */
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
-/* block 237 */
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,945,
-945,945,945,945,945,120,120,946,946,946,946,946,946,946,946,946,
-947,947,947,947,947,947,947,120,120,120,120,120,120,120,120,120,
+/* block 241 */
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,961,
+961,961,961,961,961,120,120,962,962,962,962,962,962,962,962,962,
+963,963,963,963,963,963,963,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 238 */
-948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
-948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,948,
-948,948,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
-949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,949,
-949,949,949,949,950,950,950,950,950,950,950,951,120,120,120,120,
-952,952,952,952,952,952,952,952,952,952,120,120,120,120,953,953,
+/* block 242 */
+964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,
+964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,964,
+964,964,965,965,965,965,965,965,965,965,965,965,965,965,965,965,
+965,965,965,965,965,965,965,965,965,965,965,965,965,965,965,965,
+965,965,965,965,966,966,966,966,966,966,966,967,120,120,120,120,
+968,968,968,968,968,968,968,968,968,968,120,120,120,120,969,969,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 239 */
+/* block 243 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -4099,7 +4158,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-/* block 240 */
+/* block 244 */
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 20, 25, 25, 25,
@@ -4109,7 +4168,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 241 */
+/* block 245 */
120, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 20, 25,
@@ -4119,87 +4178,87 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 242 */
-225,225,225,225,120,225,225,225,225,225,225,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,225,
-120,225,225,120,225,120,120,225,120,225,225,225,225,225,225,225,
-225,225,225,120,225,225,225,225,120,225,120,225,120,120,120,120,
-120,120,225,120,120,120,120,225,120,225,120,225,120,225,225,225,
-120,225,225,120,225,120,120,225,120,225,120,225,120,225,120,225,
-120,225,225,120,225,120,120,225,225,225,225,120,225,225,225,225,
-225,225,225,120,225,225,225,225,120,225,225,225,225,120,225,120,
+/* block 246 */
+224,224,224,224,120,224,224,224,224,224,224,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+120,224,224,120,224,120,120,224,120,224,224,224,224,224,224,224,
+224,224,224,120,224,224,224,224,120,224,120,224,120,120,120,120,
+120,120,224,120,120,120,120,224,120,224,120,224,120,224,224,224,
+120,224,224,120,224,120,120,224,120,224,120,224,120,224,120,224,
+120,224,224,120,224,120,120,224,224,224,224,120,224,224,224,224,
+224,224,224,120,224,224,224,224,120,224,224,224,224,120,224,120,
-/* block 243 */
-225,225,225,225,225,225,225,225,225,225,120,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,120,120,120,120,
-120,225,225,225,120,225,225,225,225,225,120,225,225,225,225,225,
-225,225,225,225,225,225,225,225,225,225,225,225,120,120,120,120,
+/* block 247 */
+224,224,224,224,224,224,224,224,224,224,120,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,120,120,120,120,
+120,224,224,224,120,224,224,224,224,224,120,224,224,224,224,224,
+224,224,224,224,224,224,224,224,224,224,224,224,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-218,218,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+217,217,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 244 */
+/* block 248 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,954,954,954,954,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-/* block 245 */
+/* block 249 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21,954,954,954,954,954,954,954,954,954,954,954,954,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,954,
-954, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-954, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-954, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,
+970, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+970, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+970, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21,954,954,954,954,954,954,954,954,954,954,
+ 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970,
-/* block 246 */
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,954,954,954,
+/* block 250 */
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 21, 21, 21,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,954,954,954,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21,
21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21,
-/* block 247 */
+/* block 251 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 20,
20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,955,955,955,955,955,955,955,955,955,955,
-955,955,955,955,955,955,955,955,955,955,955,955,955,955,955,955,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,971,971,971,971,971,971,971,971,971,971,
+971,971,971,971,971,971,971,971,971,971,971,971,971,971,971,971,
-/* block 248 */
-956, 21, 21,954,954,954,954,954,954,954,954,954,954,954,954,954,
+/* block 252 */
+972, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,970,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
- 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20,954,954,954,954,
- 20, 20, 20, 20, 20, 20, 20, 20, 20,954,954,954,954,954,954,954,
-584,584,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
- 21, 21, 21, 21, 21, 21,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
+ 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20,970,970,970,970,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970,970,
+584,584,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
-/* block 249 */
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
+/* block 253 */
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
-/* block 250 */
+/* block 254 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
@@ -4209,7 +4268,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-/* block 251 */
+/* block 255 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
@@ -4217,9 +4276,9 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,957,957,957,957,957,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,973,973,973,973,973,
-/* block 252 */
+/* block 256 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
@@ -4229,7 +4288,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-/* block 253 */
+/* block 257 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
@@ -4239,17 +4298,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-/* block 254 */
+/* block 258 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21,954,954,954,954,954,954,954,954,954,954,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,954,954,954,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,954,954,954,954,954,
+ 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,
-/* block 255 */
+/* block 259 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
@@ -4257,99 +4316,109 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20,954,954,954,954,954,954,954,954,954,954,954,954,
+ 20, 20, 20, 20,970,970,970,970,970,970,970,970,970,970,970,970,
-/* block 256 */
+/* block 260 */
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 21, 21, 21, 21,954,954,954,954,954,954,954,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
+ 20, 20, 20, 20, 20, 21, 21, 21, 21,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
-/* block 257 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,954,954,954,954,
+/* block 261 */
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20,954,954,954,954,954,954,954,954,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,954,954,954,954,954,954,
+ 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970,970,970,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-/* block 258 */
- 20, 20, 20, 20, 20, 20, 20, 20,954,954,954,954,954,954,954,954,
+/* block 262 */
+ 20, 20, 20, 20, 20, 20, 20, 20,970,970,970,970,970,970,970,970,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,970,970,
+ 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
-/* block 259 */
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,954, 21, 21, 21,
+/* block 263 */
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21,954, 21, 21, 21, 21,954,954,954, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21,970, 21, 21, 21, 21, 21, 21,
-/* block 260 */
+/* block 264 */
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21,954,954, 21, 21, 21, 21, 21, 21,954,954,954, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,954,954, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
-/* block 261 */
+/* block 265 */
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21,954,954,954,954,954,954,954,954,954,954,954,954,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,954,954,
- 21, 21, 21, 21,954,954,954,954, 21, 21, 21,954,954,954,954,954,
+ 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,
+ 21, 21, 21, 21, 21,970,970,970, 21, 21, 21,970,970,970,970,970,
-/* block 262 */
- 21, 21, 21,954,954,954,954,954,954,954,954,954,954,954,954,954,
- 21, 21, 21, 21, 21, 21,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
+/* block 266 */
+ 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,
+ 21, 21, 21,970,970,970,970,970,970,970,970,970,970,970,970,970,
+ 21, 21, 21, 21, 21, 21, 21,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
-/* block 263 */
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,954,
-954,954,954,954,954,954,954,954,954,954,954,954,954,954,120,120,
+/* block 267 */
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20,120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,120,120,120,120,120,120,
-/* block 264 */
+/* block 268 */
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,970,
+970,970,970,970,970,970,970,970,970,970,970,970,970,970,120,120,
+
+/* block 269 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,120,120,120,120,120,120,120,120,120,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 265 */
+/* block 270 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
@@ -4359,7 +4428,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-/* block 266 */
+/* block 271 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,120,120,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
@@ -4369,7 +4438,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-/* block 267 */
+/* block 272 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -4379,7 +4448,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-/* block 268 */
+/* block 273 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
@@ -4389,7 +4458,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
586,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 269 */
+/* block 274 */
586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
586,586,586,586,586,586,586,586,586,586,586,586,586,586,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
@@ -4399,17 +4468,27 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
-/* block 270 */
+/* block 275 */
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+586,586,586,586,586,586,586,586,586,586,586,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,120,
+
+/* block 276 */
511, 24,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
-958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,
-958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,
-958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,
-958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,
-958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,
-958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,958,
-
-/* block 271 */
+974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,
+974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,
+974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,
+974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,
+974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,
+974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,974,
+
+/* block 277 */
511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
@@ -4419,7 +4498,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
-/* block 272 */
+/* block 278 */
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
@@ -4429,7 +4508,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
-/* block 273 */
+/* block 279 */
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
@@ -4439,15 +4518,15 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 70400 bytes, block = 128 */
113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,
511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
-/* block 274 */
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
-672,672,672,672,672,672,672,672,672,672,672,672,672,672,120,120,
+/* block 280 */
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,673,
+673,673,673,673,673,673,673,673,673,673,673,673,673,673,120,120,
};
diff --git a/src/pcre2_ucp.h b/src/pcre2_ucp.h
index 84b22fb..9538062 100644
--- a/src/pcre2_ucp.h
+++ b/src/pcre2_ucp.h
@@ -286,7 +286,12 @@ enum {
ucp_Elymaic,
ucp_Nandinagari,
ucp_Nyiakeng_Puachue_Hmong,
- ucp_Wancho
+ ucp_Wancho,
+ /* New for Unicode 13.0.0 */
+ ucp_Chorasmian,
+ ucp_Dives_Akuru,
+ ucp_Khitan_Small_Script,
+ ucp_Yezidi
};
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c
index 96e8bff..e47ea78 100644
--- a/src/pcre2_valid_utf.c
+++ b/src/pcre2_valid_utf.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -347,7 +347,7 @@ for (p = string; length > 0; p++)
length--;
if ((*p & 0xfc00) != 0xdc00)
{
- *erroroffset = p - string;
+ *erroroffset = p - string - 1;
return PCRE2_ERROR_UTF16_ERR2;
}
}
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index 12fe95e..10314a5 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS.
The header can be found in the special z/OS distribution, which is available
from www.zaconsultants.net or from www.cbttape.org.
- Copyright (c) 1997-2019 University of Cambridge
+ Copyright (c) 1997-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -1666,6 +1666,44 @@ switch(endlinetype)
/*************************************************
+* Output newline at end *
+*************************************************/
+
+/* This function is called if the final line of a file has been written to
+stdout, but it does not have a terminating newline.
+
+Arguments: none
+Returns: nothing
+*/
+
+static void
+write_final_newline(void)
+{
+switch(endlinetype)
+ {
+ default: /* Just in case */
+ case PCRE2_NEWLINE_LF:
+ case PCRE2_NEWLINE_ANY:
+ case PCRE2_NEWLINE_ANYCRLF:
+ fprintf(stdout, "\n");
+ break;
+
+ case PCRE2_NEWLINE_CR:
+ fprintf(stdout, "\r");
+ break;
+
+ case PCRE2_NEWLINE_CRLF:
+ fprintf(stdout, "\r\n");
+ break;
+
+ case PCRE2_NEWLINE_NUL:
+ fprintf(stdout, "%c", 0);
+ break;
+ }
+}
+
+
+/*************************************************
* Print the previous "after" lines *
*************************************************/
@@ -1689,9 +1727,9 @@ do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
if (after_context > 0 && lastmatchnumber > 0)
{
int count = 0;
+ int ellength = 0;
while (lastmatchrestart < endptr && count < after_context)
{
- int ellength;
char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
if (ellength == 0 && pp == main_buffer + bufsize) break;
if (printname != NULL) fprintf(stdout, "%s-", printname);
@@ -1700,7 +1738,17 @@ if (after_context > 0 && lastmatchnumber > 0)
lastmatchrestart = pp;
count++;
}
- if (count > 0) hyphenpending = TRUE;
+
+ /* If we have printed any lines, arrange for a hyphen separator if anything
+ else follows. Also, if the last line is the final line in the file and it had
+ no newline, add one. */
+
+ if (count > 0)
+ {
+ hyphenpending = TRUE;
+ if (ellength == 0 && lastmatchrestart >= endptr)
+ write_final_newline();
+ }
}
}
@@ -2437,6 +2485,7 @@ char *endptr;
PCRE2_SIZE bufflength;
BOOL binary = FALSE;
BOOL endhyphenpending = FALSE;
+BOOL lines_printed = FALSE;
BOOL input_line_buffered = line_buffered;
FILE *in = NULL; /* Ensure initialized */
@@ -2777,6 +2826,8 @@ while (ptr < endptr)
else
{
+ lines_printed = TRUE;
+
/* See if there is a requirement to print some "after" lines from a
previous match. We never print any overlaps. */
@@ -2825,7 +2876,8 @@ while (ptr < endptr)
int linecount = 0;
char *p = ptr;
- while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
+ while (p > main_buffer &&
+ (lastmatchnumber == 0 || p > lastmatchrestart) &&
linecount < before_context)
{
linecount++;
@@ -2981,6 +3033,12 @@ while (ptr < endptr)
lastmatchrestart = ptr + linelength + endlinelength;
lastmatchnumber = linenumber + 1;
+
+ /* If a line was printed and we are now at the end of the file and the last
+ line had no newline, output one. */
+
+ if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
+ write_final_newline();
}
/* For a match in multiline inverted mode (which of course did not cause
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 57bd110..3f4fef4 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam.
Written by Philip Hazel
Original code Copyright (c) 1997-2012 University of Cambridge
- Rewritten code Copyright (c) 2016-2019 University of Cambridge
+ Rewritten code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -389,12 +389,14 @@ typedef struct cmdstruct {
int value;
} cmdstruct;
-enum { CMD_FORBID_UTF, CMD_LOAD, CMD_NEWLINE_DEFAULT, CMD_PATTERN,
- CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT, CMD_UNKNOWN };
+enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
+ CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
+ CMD_UNKNOWN };
static cmdstruct cmdlist[] = {
{ "forbid_utf", CMD_FORBID_UTF },
{ "load", CMD_LOAD },
+ { "loadtables", CMD_LOADTABLES },
{ "newline_default", CMD_NEWLINE_DEFAULT },
{ "pattern", CMD_PATTERN },
{ "perltest", CMD_PERLTEST },
@@ -502,13 +504,16 @@ so many of them that they are split into two fields. */
#define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
#define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
-#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000004u
-#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000008u
-#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000010u
-#define CTL2_SUBJECT_LITERAL 0x00000020u
-#define CTL2_CALLOUT_NO_WHERE 0x00000040u
-#define CTL2_CALLOUT_EXTRA 0x00000080u
-#define CTL2_ALLVECTOR 0x00000100u
+#define CTL2_SUBSTITUTE_LITERAL 0x00000004u
+#define CTL2_SUBSTITUTE_MATCHED 0x00000008u
+#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000010u
+#define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
+#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000040u
+#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000080u
+#define CTL2_SUBJECT_LITERAL 0x00000100u
+#define CTL2_CALLOUT_NO_WHERE 0x00000200u
+#define CTL2_CALLOUT_EXTRA 0x00000400u
+#define CTL2_ALLVECTOR 0x00000800u
#define CTL2_NL_SET 0x40000000u /* Informational */
#define CTL2_BSR_SET 0x80000000u /* Informational */
@@ -530,7 +535,10 @@ different things in the two cases. */
#define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
CTL2_SUBSTITUTE_EXTENDED|\
+ CTL2_SUBSTITUTE_LITERAL|\
+ CTL2_SUBSTITUTE_MATCHED|\
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
+ CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
CTL2_SUBSTITUTE_UNSET_EMPTY|\
CTL2_ALLVECTOR)
@@ -610,127 +618,130 @@ typedef struct modstruct {
} modstruct;
static modstruct modlist[] = {
- { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
- { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
- { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
- { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
- { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
- { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
- { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
- { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
- { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
- { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
- { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
- { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
- { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
- { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
- { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
- { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
- { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
- { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
- { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
- { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
- { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
- { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
- { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
- { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
- { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
- { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
- { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
- { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
- { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
- { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
- { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
- { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
- { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
- { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
- { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
- { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
- { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
- { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
- { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
- { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
- { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
- { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
- { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
- { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
- { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
- { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
- { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
- { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
- { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
- { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
- { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
- { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
- { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
- { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
- { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
- { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
- { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
- { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
- { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
- { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
- { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
- { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
- { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
- { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
- { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
- { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
- { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
- { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
- { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
- { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
- { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
- { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
- { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
- { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
- { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
- { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
- { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
- { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
- { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
- { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
- { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
- { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
- { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
- { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
- { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
- { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
- { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
- { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
- { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
- { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
- { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
- { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
- { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
- { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
- { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
- { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
- { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
- { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
- { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
- { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
- { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
- { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
- { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
- { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
- { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
- { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
- { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
- { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
- { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
- { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
- { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
- { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
- { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
- { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
- { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
- { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
- { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
- { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
- { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
- { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
- { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
+ { "aftertext", MOD_PNDP, MOD_CTL, CTL_AFTERTEXT, PO(control) },
+ { "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
+ { "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
+ { "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
+ { "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
+ { "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
+ { "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
+ { "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
+ { "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
+ { "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
+ { "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
+ { "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
+ { "auto_callout", MOD_PAT, MOD_OPT, PCRE2_AUTO_CALLOUT, PO(options) },
+ { "bad_escape_is_literal", MOD_CTC, MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
+ { "bincode", MOD_PAT, MOD_CTL, CTL_BINCODE, PO(control) },
+ { "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
+ { "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
+ { "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
+ { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
+ { "callout_extra", MOD_DAT, MOD_CTL, CTL2_CALLOUT_EXTRA, DO(control2) },
+ { "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
+ { "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
+ { "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
+ { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
+ { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
+ { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
+ { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
+ { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
+ { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
+ { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
+ { "copy_matched_subject", MOD_DAT, MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
+ { "debug", MOD_PAT, MOD_CTL, CTL_DEBUG, PO(control) },
+ { "depth_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) },
+ { "dfa", MOD_DAT, MOD_CTL, CTL_DFA, DO(control) },
+ { "dfa_restart", MOD_DAT, MOD_OPT, PCRE2_DFA_RESTART, DO(options) },
+ { "dfa_shortest", MOD_DAT, MOD_OPT, PCRE2_DFA_SHORTEST, DO(options) },
+ { "dollar_endonly", MOD_PAT, MOD_OPT, PCRE2_DOLLAR_ENDONLY, PO(options) },
+ { "dotall", MOD_PATP, MOD_OPT, PCRE2_DOTALL, PO(options) },
+ { "dupnames", MOD_PATP, MOD_OPT, PCRE2_DUPNAMES, PO(options) },
+ { "endanchored", MOD_PD, MOD_OPT, PCRE2_ENDANCHORED, PD(options) },
+ { "escaped_cr_is_lf", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
+ { "expand", MOD_PAT, MOD_CTL, CTL_EXPAND, PO(control) },
+ { "extended", MOD_PATP, MOD_OPT, PCRE2_EXTENDED, PO(options) },
+ { "extended_more", MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE, PO(options) },
+ { "extra_alt_bsux", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALT_BSUX, CO(extra_options) },
+ { "find_limits", MOD_DAT, MOD_CTL, CTL_FINDLIMITS, DO(control) },
+ { "firstline", MOD_PAT, MOD_OPT, PCRE2_FIRSTLINE, PO(options) },
+ { "framesize", MOD_PAT, MOD_CTL, CTL_FRAMESIZE, PO(control) },
+ { "fullbincode", MOD_PAT, MOD_CTL, CTL_FULLBINCODE, PO(control) },
+ { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
+ { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
+ { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
+ { "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
+ { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
+ { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
+ { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
+ { "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
+ { "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
+ { "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
+ { "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
+ { "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
+ { "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
+ { "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
+ { "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
+ { "match_line", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_LINE, CO(extra_options) },
+ { "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
+ { "match_word", MOD_CTC, MOD_OPT, PCRE2_EXTRA_MATCH_WORD, CO(extra_options) },
+ { "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
+ { "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
+ { "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
+ { "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
+ { "never_ucp", MOD_PAT, MOD_OPT, PCRE2_NEVER_UCP, PO(options) },
+ { "never_utf", MOD_PAT, MOD_OPT, PCRE2_NEVER_UTF, PO(options) },
+ { "newline", MOD_CTC, MOD_NL, 0, CO(newline_convention) },
+ { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
+ { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
+ { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
+ { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
+ { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
+ { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
+ { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
+ { "notempty", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY, DO(options) },
+ { "notempty_atstart", MOD_DAT, MOD_OPT, PCRE2_NOTEMPTY_ATSTART, DO(options) },
+ { "noteol", MOD_DAT, MOD_OPT, PCRE2_NOTEOL, DO(options) },
+ { "null_context", MOD_PD, MOD_CTL, CTL_NULLCONTEXT, PO(control) },
+ { "offset", MOD_DAT, MOD_INT, 0, DO(offset) },
+ { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)},
+ { "ovector", MOD_DAT, MOD_INT, 0, DO(oveccount) },
+ { "parens_nest_limit", MOD_CTC, MOD_INT, 0, CO(parens_nest_limit) },
+ { "partial_hard", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
+ { "partial_soft", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
+ { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
+ { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
+ { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
+ { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
+ { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
+ { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
+ { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
+ { "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
+ { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(depth_limit) }, /* Obsolete synonym */
+ { "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
+ { "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
+ { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
+ { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
+ { "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
+ { "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
+ { "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
+ { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
+ { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) },
+ { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) },
+ { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
+ { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
+ { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
+ { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
+ { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
+ { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
+ { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
+ { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
+ { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
+ { "use_length", MOD_PAT, MOD_CTL, CTL_USE_LENGTH, PO(control) },
+ { "use_offset_limit", MOD_PAT, MOD_OPT, PCRE2_USE_OFFSET_LIMIT, PO(options) },
+ { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
+ { "utf8_input", MOD_PAT, MOD_CTL, CTL_UTF8_INPUT, PO(control) },
+ { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
};
#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
@@ -948,6 +959,8 @@ static int *dfa_workspace = NULL;
static const uint8_t *locale_tables = NULL;
static const uint8_t *use_tables = NULL;
static uint8_t locale_name[32];
+static uint8_t *tables3 = NULL; /* For binary-loaded tables */
+static uint32_t loadtables_length = 0;
/* We need buffers for building 16/32-bit strings; 8-bit strings don't need
rebuilding, but set up the same naming scheme for use in macros. The "buffer"
@@ -2967,15 +2980,15 @@ return (int)(pp - p);
*************************************************/
/* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
-For printing *MARK strings, a negative length is given. If handed a NULL file,
-just counts chars without printing (because pchar() does that). */
+For printing *MARK strings, a negative length is given, indicating that the
+length is in the first code unit. If handed a NULL file, this function just
+counts chars without printing (because pchar() does that). */
static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
{
uint32_t c = 0;
int yield = 0;
-
-if (length < 0) length = p[-1];
+if (length < 0) length = *p++;
while (length-- > 0)
{
if (utf)
@@ -3004,13 +3017,14 @@ return yield;
*************************************************/
/* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
-For printing *MARK strings, a negative length is given. If handed a NULL file,
-just counts chars without printing. */
+For printing *MARK strings, a negative length is given, indicating that the
+length is in the first code unit. If handed a NULL file, just counts chars
+without printing. */
static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
{
int yield = 0;
-if (length < 0) length = p[-1];
+if (length < 0) length = *p++;
while (length-- > 0)
{
uint32_t c = *p++ & 0xffff;
@@ -3038,15 +3052,15 @@ return yield;
*************************************************/
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
-For printing *MARK strings, a negative length is given. If handed a NULL file,
-just counts chars without printing. */
+For printing *MARK strings, a negative length is given, indicating that the
+length is in the first code unit. If handed a NULL file, just counts chars
+without printing. */
static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
{
int yield = 0;
(void)(utf); /* Avoid compiler warning */
-
-if (length < 0) length = p[-1];
+if (length < 0) length = *p++;
while (length-- > 0)
{
uint32_t c = *p++;
@@ -4085,7 +4099,7 @@ Returns: nothing
static void
show_controls(uint32_t controls, uint32_t controls2, const char *before)
{
-fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
@@ -4123,7 +4137,10 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
+ ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
+ ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
+ ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
@@ -4783,12 +4800,13 @@ Arguments:
buffptr point after the #command
mode open mode
fptr points to the FILE variable
+ name name of # command
Returns: PR_OK or PR_ABEND
*/
static int
-open_file(uint8_t *buffptr, const char *mode, FILE **fptr)
+open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
{
char *endf;
char *filename = (char *)buffptr;
@@ -4798,7 +4816,7 @@ while (endf > filename && isspace(endf[-1])) endf--;
if (endf == filename)
{
- fprintf(outfile, "** File name expected after #save\n");
+ fprintf(outfile, "** File name expected after %s\n", name);
return PR_ABEND;
}
@@ -4964,7 +4982,7 @@ switch(cmd)
return PR_OK;
}
- rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f);
+ rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
if (rc != PR_OK) return rc;
PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
@@ -5003,7 +5021,7 @@ switch(cmd)
/* Load a set of compiled patterns from a file onto the stack */
case CMD_LOAD:
- rc = open_file(argptr+1, BINARY_INPUT_MODE, &f);
+ rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
if (rc != PR_OK) return rc;
serial_size = 0;
@@ -5055,6 +5073,32 @@ switch(cmd)
free(serial);
break;
+
+ /* Load a set of binary tables into tables3. */
+
+ case CMD_LOADTABLES:
+ rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
+ if (rc != PR_OK) return rc;
+
+ if (tables3 == NULL)
+ {
+ (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
+ tables3 = malloc(loadtables_length);
+ }
+
+ if (tables3 == NULL)
+ {
+ fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
+ yield = PR_ABEND;
+ }
+ else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
+ {
+ fprintf(outfile, "** Wrong return from fread()\n");
+ yield = PR_ABEND;
+ }
+
+ fclose(f);
+ break;
}
return yield;
@@ -5370,8 +5414,19 @@ else switch (pat_patctl.tables_id)
case 0: use_tables = NULL; break;
case 1: use_tables = tables1; break;
case 2: use_tables = tables2; break;
+
+ case 3:
+ if (tables3 == NULL)
+ {
+ fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
+ "been loaded\n");
+ return PR_SKIP;
+ }
+ use_tables = tables3;
+ break;
+
default:
- fprintf(outfile, "** 'Tables' must specify 0, 1, or 2.\n");
+ fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
return PR_SKIP;
}
@@ -6256,7 +6311,7 @@ if (cb->mark != last_callout_mark)
else
{
fprintf(outfile, "Latest Mark: ");
- PCHARSV(cb->mark, 0, -1, utf, outfile);
+ PCHARSV(cb->mark, -1, -1, utf, outfile);
putc('\n', outfile);
}
last_callout_mark = cb->mark;
@@ -7228,6 +7283,7 @@ if (dat_datctl.replacement[0] != 0)
uint8_t rbuffer[REPLACE_BUFFSIZE];
uint8_t nbuffer[REPLACE_BUFFSIZE];
uint32_t xoptions;
+ uint32_t emoption; /* External match option */
PCRE2_SIZE j, rlen, nsize, erroroffset;
BOOL badutf = FALSE;
@@ -7252,12 +7308,30 @@ if (dat_datctl.replacement[0] != 0)
if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
- xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
+ /* Check for a test that does substitution after an initial external match.
+ If this is set, we run the external match, but leave the interpretation of
+ its output to pcre2_substitute(). */
+
+ emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
+ PCRE2_SUBSTITUTE_MATCHED;
+
+ if (emoption != 0)
+ {
+ PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
+ dat_datctl.options, match_data, use_dat_context);
+ }
+
+ xoptions = emoption |
+ (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
PCRE2_SUBSTITUTE_GLOBAL) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
PCRE2_SUBSTITUTE_EXTENDED) |
+ (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
+ PCRE2_SUBSTITUTE_LITERAL) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
+ (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
+ PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
(((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
@@ -7758,7 +7832,7 @@ for (gmatched = 0;; gmatched++)
TESTFLD(match_data, mark, !=, NULL))
{
fprintf(outfile, "MK: ");
- PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
+ PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
fprintf(outfile, "\n");
}
@@ -7790,7 +7864,7 @@ for (gmatched = 0;; gmatched++)
TESTFLD(match_data, mark, !=, NULL))
{
fprintf(outfile, ", mark=");
- PCHARS(rubriclength, CASTFLD(void *, match_data, mark), 0, -1, utf,
+ PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
outfile);
rubriclength += 7;
}
@@ -7889,7 +7963,7 @@ for (gmatched = 0;; gmatched++)
TESTFLD(match_data, mark, !=, NULL))
{
fprintf(outfile, ", mark = ");
- PCHARSV(CASTFLD(void *, match_data, mark), 0, -1, utf, outfile);
+ PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
}
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
fprintf(outfile, " (JIT)");
@@ -9081,6 +9155,7 @@ free(dbuffer);
free(pbuffer8);
free(dfa_workspace);
free((void *)locale_tables);
+free(tables3);
PCRE2_MATCH_DATA_FREE(match_data);
SUB1(pcre2_code_free, compiled_code);
diff --git a/src/sljit/sljitConfig.h b/src/sljit/sljitConfig.h
index d54b5e6..4560450 100644
--- a/src/sljit/sljitConfig.h
+++ b/src/sljit/sljitConfig.h
@@ -27,6 +27,10 @@
#ifndef _SLJIT_CONFIG_H_
#define _SLJIT_CONFIG_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* --------------------------------------------------------------------- */
/* Custom defines */
/* --------------------------------------------------------------------- */
@@ -65,12 +69,19 @@
#define SLJIT_UTIL_GLOBAL_LOCK 1
#endif
-/* Implements a stack like data structure (by using mmap / VirtualAlloc). */
+/* Implements a stack like data structure (by using mmap / VirtualAlloc */
+/* or a custom allocator). */
#ifndef SLJIT_UTIL_STACK
/* Enabled by default */
#define SLJIT_UTIL_STACK 1
#endif
+/* Uses user provided allocator to allocate the stack (see SLJIT_UTIL_STACK) */
+#ifndef SLJIT_UTIL_SIMPLE_STACK_ALLOCATION
+/* Disabled by default */
+#define SLJIT_UTIL_SIMPLE_STACK_ALLOCATION 0
+#endif
+
/* Single threaded application. Does not require any locks. */
#ifndef SLJIT_SINGLE_THREADED
/* Disabled by default. */
@@ -144,4 +155,8 @@
/* For further configurations, see the beginning of sljitConfigInternal.h */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
#endif
diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h
index acba9da..049ed2f 100644
--- a/src/sljit/sljitConfigInternal.h
+++ b/src/sljit/sljitConfigInternal.h
@@ -27,6 +27,20 @@
#ifndef _SLJIT_CONFIG_INTERNAL_H_
#define _SLJIT_CONFIG_INTERNAL_H_
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE)))
+#include <stdio.h>
+#endif
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG \
+ && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS)))
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/*
SLJIT defines the following architecture dependent types and macros:
@@ -191,6 +205,24 @@
#define SLJIT_CONFIG_SPARC 1
#endif
+/***********************************************************/
+/* Intel Control-flow Enforcement Technology (CET) spport. */
+/***********************************************************/
+
+#ifdef SLJIT_CONFIG_X86
+#if defined(__CET__)
+#define SLJIT_CONFIG_X86_CET 1
+#endif
+#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
+#if defined(__GNUC__)
+#if !defined (__SHSTK__)
+#error "-mshstk is needed to compile with -fcf-protection"
+#endif
+#include <x86intrin.h>
+#endif
+#endif
+#endif
+
/**********************************/
/* External function definitions. */
/**********************************/
@@ -265,6 +297,7 @@
/* Type of public API functions. */
/*********************************/
+#ifndef SLJIT_API_FUNC_ATTRIBUTE
#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC)
/* Static ABI functions. For all-in-one programs. */
@@ -278,6 +311,7 @@
#else
#define SLJIT_API_FUNC_ATTRIBUTE
#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */
+#endif /* defined SLJIT_API_FUNC_ATTRIBUTE */
/****************************/
/* Instruction cache flush. */
@@ -287,7 +321,7 @@
#if __has_builtin(__builtin___clear_cache)
#define SLJIT_CACHE_FLUSH(from, to) \
- __builtin___clear_cache((char*)from, (char*)to)
+ __builtin___clear_cache((char*)(from), (char*)(to))
#endif /* __has_builtin(__builtin___clear_cache) */
#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */
@@ -318,7 +352,7 @@
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
#define SLJIT_CACHE_FLUSH(from, to) \
- __builtin___clear_cache((char*)from, (char*)to)
+ __builtin___clear_cache((char*)(from), (char*)(to))
#elif defined __ANDROID__
@@ -451,6 +485,25 @@ typedef double sljit_f64;
#define SLJIT_BIG_ENDIAN 1
#endif
+#ifndef SLJIT_MIPS_REV
+
+/* Auto detecting mips revision. */
+#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6)
+#define SLJIT_MIPS_REV 6
+#elif (defined __mips_isa_rev && __mips_isa_rev >= 1) \
+ || (defined __clang__ && defined _MIPS_ARCH_OCTEON) \
+ || (defined __clang__ && defined _MIPS_ARCH_P5600)
+/* clang either forgets to define (clang-7) __mips_isa_rev at all
+ * or sets it to zero (clang-8,-9) for -march=octeon (MIPS64 R2+)
+ * and -march=p5600 (MIPS32 R5).
+ * It also sets the __mips macro to 64 or 32 for -mipsN when N <= 5
+ * (should be set to N exactly) so we cannot rely on this too.
+ */
+#define SLJIT_MIPS_REV 1
+#endif
+
+#endif /* !SLJIT_MIPS_REV */
+
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
#define SLJIT_BIG_ENDIAN 1
@@ -679,24 +732,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
/* Debug and verbose related macros. */
/*************************************/
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
-#include <stdio.h>
-#endif
-
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
#if !defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE)
/* SLJIT_HALT_PROCESS must halt the process. */
#ifndef SLJIT_HALT_PROCESS
-#include <stdlib.h>
-
#define SLJIT_HALT_PROCESS() \
abort();
#endif /* !SLJIT_HALT_PROCESS */
-#include <stdio.h>
-
#endif /* !SLJIT_ASSERT || !SLJIT_UNREACHABLE */
/* Feel free to redefine these two macros. */
@@ -742,4 +787,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#endif /* !SLJIT_COMPILE_ASSERT */
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
#endif
diff --git a/src/sljit/sljitExecAllocator.c b/src/sljit/sljitExecAllocator.c
index 92ddb94..7653907 100644
--- a/src/sljit/sljitExecAllocator.c
+++ b/src/sljit/sljitExecAllocator.c
@@ -106,10 +106,10 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
static SLJIT_INLINE int get_map_jit_flag()
{
+/* On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a version
+ of macOS where it's OK to have more than one JIT block.
+ On non-macOS systems, returns MAP_JIT if it is defined. */
#if TARGET_OS_OSX
- /* On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a version
- of macOS where it's OK to have more than one JIT block. On non-macOS systems, returns
- MAP_JIT if it is defined. */
static int map_jit_flag = -1;
/* The following code is thread safe because multiple initialization
@@ -124,12 +124,19 @@ static SLJIT_INLINE int get_map_jit_flag()
/* Kernel version for 10.14.0 (Mojave) */
if (atoi(name.release) >= 18) {
/* Only use MAP_JIT if a hardened runtime is used, because MAP_JIT is incompatible with fork(). */
- void *ptr = mmap(NULL, getpagesize(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+ /* mirroring page size detection from sljit_allocate_stack */
+ long page_size = sysconf(_SC_PAGESIZE);
+ /* Should never happen */
+ if (page_size < 0)
+ page_size = 4096;
+
+ void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
if (ptr == MAP_FAILED) {
map_jit_flag = MAP_JIT;
} else {
- munmap(ptr, getpagesize());
+ munmap(ptr, page_size);
}
}
}
diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c
index 9bab0c3..86772cc 100644
--- a/src/sljit/sljitLir.c
+++ b/src/sljit/sljitLir.c
@@ -926,7 +926,8 @@ static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, s
static const char* op0_names[] = {
(char*)"breakpoint", (char*)"nop", (char*)"lmul.uw", (char*)"lmul.sw",
- (char*)"divmod.u", (char*)"divmod.s", (char*)"div.u", (char*)"div.s"
+ (char*)"divmod.u", (char*)"divmod.s", (char*)"div.u", (char*)"div.s",
+ (char*)"endbr", (char*)"skip_frames_before_return"
};
static const char* op1_names[] = {
@@ -943,6 +944,12 @@ static const char* op2_names[] = {
(char*)"shl", (char*)"lshr", (char*)"ashr",
};
+static const char* op_src_names[] = {
+ (char*)"fast_return", (char*)"skip_frames_before_fast_return",
+ (char*)"prefetch_l1", (char*)"prefetch_l2",
+ (char*)"prefetch_l3", (char*)"prefetch_once",
+};
+
static const char* fop1_names[] = {
(char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv",
(char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg",
@@ -1152,37 +1159,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_c
CHECK_RETURN_OK;
}
-static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
-#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_SRC(src, srcw);
- CHECK_ARGUMENT(src != SLJIT_IMM);
- compiler->last_flags = 0;
-#endif
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
- if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " fast_return ");
- sljit_verbose_param(compiler, src, srcw);
- fprintf(compiler->verbose, "\n");
- }
-#endif
- CHECK_RETURN_OK;
-}
-
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW)
- || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW));
- CHECK_ARGUMENT(op < SLJIT_LMUL_UW || compiler->scratches >= 2);
- if (op >= SLJIT_LMUL_UW)
+ || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW)
+ || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN));
+ CHECK_ARGUMENT(GET_OPCODE(op) < SLJIT_LMUL_UW || GET_OPCODE(op) >= SLJIT_ENDBR || compiler->scratches >= 2);
+ if ((GET_OPCODE(op) >= SLJIT_LMUL_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN)
compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
{
fprintf(compiler->verbose, " %s", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]);
- if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW) {
+ if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) {
fprintf(compiler->verbose, (op & SLJIT_I32_OP) ? "32" : "w");
}
fprintf(compiler->verbose, "\n");
@@ -1224,7 +1215,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
break;
}
- FUNCTION_CHECK_DST(dst, dstw, 1);
+ FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op));
FUNCTION_CHECK_SRC(src, srcw);
if (GET_OPCODE(op) >= SLJIT_NOT) {
@@ -1304,7 +1295,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
break;
}
- FUNCTION_CHECK_DST(dst, dstw, 1);
+ FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op));
FUNCTION_CHECK_SRC(src1, src1w);
FUNCTION_CHECK_SRC(src2, src2w);
compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
@@ -1325,6 +1316,33 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(op >= SLJIT_FAST_RETURN && op <= SLJIT_PREFETCH_ONCE);
+ FUNCTION_CHECK_SRC(src, srcw);
+
+ if (op == SLJIT_FAST_RETURN || op == SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN)
+ {
+ CHECK_ARGUMENT(src != SLJIT_IMM);
+ compiler->last_flags = 0;
+ }
+ else if (op >= SLJIT_PREFETCH_L1 && op <= SLJIT_PREFETCH_ONCE)
+ {
+ CHECK_ARGUMENT(src & SLJIT_MEM);
+ }
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " %s ", op_src_names[op - SLJIT_OP_SRC_BASE]);
+ sljit_verbose_param(compiler, src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 reg)
{
SLJIT_UNUSED_ARG(reg);
@@ -2016,7 +2034,7 @@ static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *comp
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
- || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1))
+ || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1))
static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
@@ -2381,15 +2399,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNREACHABLE();
- return SLJIT_ERR_UNSUPPORTED;
-}
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
SLJIT_UNUSED_ARG(compiler);
@@ -2429,6 +2438,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return SLJIT_ERR_UNSUPPORTED;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(srcw);
+ SLJIT_UNREACHABLE();
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
SLJIT_UNREACHABLE();
@@ -2549,6 +2569,13 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw
SLJIT_UNREACHABLE();
}
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label)
+{
+ SLJIT_UNUSED_ARG(put_label);
+ SLJIT_UNUSED_ARG(label);
+ SLJIT_UNREACHABLE();
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
SLJIT_UNUSED_ARG(compiler);
diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h
index 836d25c..72595bb 100644
--- a/src/sljit/sljitLir.h
+++ b/src/sljit/sljitLir.h
@@ -80,6 +80,10 @@ of sljitConfigInternal.h */
#include "sljitConfigInternal.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/* --------------------------------------------------------------------- */
/* Error codes */
/* --------------------------------------------------------------------- */
@@ -154,10 +158,10 @@ of sljitConfigInternal.h */
*/
/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1
- or sljit_emit_op2 operations the result is discarded. If no status
- flags are set, no instructions are emitted for these operations. Data
- prefetch is a special exception, see SLJIT_MOV operation. Other SLJIT
- operations do not support SLJIT_UNUSED as a destination operand. */
+ or sljit_emit_op2 operations the result is discarded. Some status
+ flags must be set when the destination is SLJIT_UNUSED, because the
+ operation would have no effect otherwise. Other SLJIT operations do
+ not support SLJIT_UNUSED as a destination operand. */
#define SLJIT_UNUSED 0
/* Scratch registers. */
@@ -567,10 +571,14 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
#define SLJIT_HAS_FPU 0
/* [Limitation] Some registers are virtual registers. */
#define SLJIT_HAS_VIRTUAL_REGISTERS 1
+/* [Emulated] Has zero register (setting a memory location to zero is efficient). */
+#define SLJIT_HAS_ZERO_REGISTER 2
/* [Emulated] Count leading zero is supported. */
-#define SLJIT_HAS_CLZ 2
+#define SLJIT_HAS_CLZ 3
+/* [Emulated] Conditional move is supported. */
+#define SLJIT_HAS_CMOV 4
/* [Emulated] Conditional move is supported. */
-#define SLJIT_HAS_CMOV 3
+#define SLJIT_HAS_PREFETCH 5
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* [Not emulated] SSE2 support is available on x86. */
@@ -658,10 +666,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
sljit_s32 src, sljit_sw srcw);
/* Generating entry and exit points for fast call functions (see SLJIT_FAST_CALL).
- Both sljit_emit_fast_enter and sljit_emit_fast_return functions preserve the
+ Both sljit_emit_fast_enter and SLJIT_FAST_RETURN operations preserve the
values of all registers and stack frame. The return address is stored in the
dst argument of sljit_emit_fast_enter, and this return address can be passed
- to sljit_emit_fast_return to continue the execution after the fast call.
+ to SLJIT_FAST_RETURN to continue the execution after the fast call.
Fast calls are cheap operations (usually only a single call instruction is
emitted) but they do not preserve any registers. However the callee function
@@ -669,16 +677,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
efficiently exploited by various optimizations. Registers can be saved
manually by the callee function if needed.
- Although returning to different address by sljit_emit_fast_return is possible,
+ Although returning to different address by SLJIT_FAST_RETURN is possible,
this address usually cannot be predicted by the return address predictor of
- modern CPUs which may reduce performance. Furthermore using sljit_emit_ijump
- to return is also inefficient since return address prediction is usually
- triggered by a specific form of ijump.
+ modern CPUs which may reduce performance. Furthermore certain security
+ enhancement technologies such as Intel Control-flow Enforcement Technology
+ (CET) may disallow returning to a different address.
Flags: - (does not modify flags). */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw);
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw);
/*
Source and destination operands for arithmetical instructions
@@ -887,6 +894,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
the behaviour is undefined. */
#define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7)
#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_I32_OP)
+/* Flags: - (does not modify flags)
+ ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64
+ when Intel Control-flow Enforcement Technology (CET) is enabled.
+ No instruction for other architectures. */
+#define SLJIT_ENDBR (SLJIT_OP0_BASE + 8)
+/* Flags: - (may destroy flags)
+ Skip stack frames before return. */
+#define SLJIT_SKIP_FRAMES_BEFORE_RETURN (SLJIT_OP0_BASE + 9)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op);
@@ -904,15 +919,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
U32 - unsigned int (32 bit) data transfer
S32 - signed int (32 bit) data transfer
P - pointer (sljit_p) data transfer
-
- If the destination of a MOV instruction is SLJIT_UNUSED and the source
- operand is a memory address the compiler emits a prefetch instruction
- if this instruction is supported by the current CPU. Higher data sizes
- bring the data closer to the core: a MOV with word size loads the data
- into a higher level cache than a byte size. Otherwise the type does not
- affect the prefetch instruction. Furthermore a prefetch instruction
- never fails, so it can be used to prefetch a data from an address and
- check whether that address is NULL afterwards.
*/
/* Flags: - (does not modify flags) */
@@ -1017,8 +1023,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
+/* Starting index of opcodes for sljit_emit_op2. */
+#define SLJIT_OP_SRC_BASE 128
+
+/* Note: src cannot be an immedate value
+ Flags: - (does not modify flags) */
+#define SLJIT_FAST_RETURN (SLJIT_OP_SRC_BASE + 0)
+/* Skip stack frames before fast return.
+ Note: src cannot be an immedate value
+ Flags: may destroy flags. */
+#define SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN (SLJIT_OP_SRC_BASE + 1)
+/* Prefetch value into the level 1 data cache
+ Note: if the target CPU does not support data prefetch,
+ no instructions are emitted.
+ Note: this instruction never fails, even if the memory address is invalid.
+ Flags: - (does not modify flags) */
+#define SLJIT_PREFETCH_L1 (SLJIT_OP_SRC_BASE + 2)
+/* Prefetch value into the level 2 data cache
+ Note: same as SLJIT_PREFETCH_L1 if the target CPU
+ does not support this instruction form.
+ Note: this instruction never fails, even if the memory address is invalid.
+ Flags: - (does not modify flags) */
+#define SLJIT_PREFETCH_L2 (SLJIT_OP_SRC_BASE + 3)
+/* Prefetch value into the level 3 data cache
+ Note: same as SLJIT_PREFETCH_L2 if the target CPU
+ does not support this instruction form.
+ Note: this instruction never fails, even if the memory address is invalid.
+ Flags: - (does not modify flags) */
+#define SLJIT_PREFETCH_L3 (SLJIT_OP_SRC_BASE + 4)
+/* Prefetch a value which is only used once (and can be discarded afterwards)
+ Note: same as SLJIT_PREFETCH_L1 if the target CPU
+ does not support this instruction form.
+ Note: this instruction never fails, even if the memory address is invalid.
+ Flags: - (does not modify flags) */
+#define SLJIT_PREFETCH_ONCE (SLJIT_OP_SRC_BASE + 5)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw);
+
/* Starting index of opcodes for sljit_emit_fop1. */
-#define SLJIT_FOP1_BASE 128
+#define SLJIT_FOP1_BASE 160
/* Flags: - (does not modify flags) */
#define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0)
@@ -1057,7 +1101,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
sljit_s32 src, sljit_sw srcw);
/* Starting index of opcodes for sljit_emit_fop2. */
-#define SLJIT_FOP2_BASE 160
+#define SLJIT_FOP2_BASE 192
/* Flags: - (does not modify flags) */
#define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0)
@@ -1161,7 +1205,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
/* Unconditional jump types. */
#define SLJIT_JUMP 24
- /* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */
+ /* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */
#define SLJIT_FAST_CALL 25
/* Called function must be declared with the SLJIT_FUNC attribute. */
#define SLJIT_CALL 26
@@ -1490,4 +1534,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler,
sljit_s32 current_flags);
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
#endif /* _SLJIT_LIR_H_ */
diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c
index 71f7bcd..5d180c2 100644
--- a/src/sljit/sljitNativeARM_32.c
+++ b/src/sljit/sljitNativeARM_32.c
@@ -666,6 +666,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
+
+ next_addr = compute_next_addr(label, jump, const_, put_label);
}
}
}
@@ -870,6 +872,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+ case SLJIT_HAS_PREFETCH:
+#endif
return 1;
default:
@@ -1676,6 +1681,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
}
return SLJIT_SUCCESS;
+ case SLJIT_ENDBR:
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return SLJIT_SUCCESS;
}
return SLJIT_SUCCESS;
@@ -1690,14 +1698,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
-#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
- if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
- return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
-#endif
- return SLJIT_SUCCESS;
- }
-
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
@@ -1779,6 +1779,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
+
+ return push_inst(compiler, BX | RM(TMP_REG2));
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ return SLJIT_SUCCESS;
+ case SLJIT_PREFETCH_L1:
+ case SLJIT_PREFETCH_L2:
+ case SLJIT_PREFETCH_L3:
+ case SLJIT_PREFETCH_ONCE:
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+ SLJIT_ASSERT(src & SLJIT_MEM);
+ return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
+#else /* !SLJIT_CONFIG_ARM_V7 */
+ return SLJIT_SUCCESS;
+#endif /* SLJIT_CONFIG_ARM_V7 */
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
@@ -2041,22 +2075,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
-
- if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
- else
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
-
- return push_inst(compiler, BX | RM(TMP_REG2));
-}
-
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */
@@ -2615,11 +2633,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
}
else {
if (is_type1_transfer) {
- if (memw > 4095 && memw < -4095)
+ if (memw > 4095 || memw < -4095)
return SLJIT_ERR_UNSUPPORTED;
}
else {
- if (memw > 255 && memw < -255)
+ if (memw > 255 || memw < -255)
return SLJIT_ERR_UNSUPPORTED;
}
}
diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c
index e15b345..eaca095 100644
--- a/src/sljit/sljitNativeARM_64.c
+++ b/src/sljit/sljitNativeARM_64.c
@@ -396,6 +396,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
+ case SLJIT_HAS_PREFETCH:
return 1;
default:
@@ -1154,6 +1155,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1));
+ case SLJIT_ENDBR:
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return SLJIT_SUCCESS;
}
return SLJIT_SUCCESS;
@@ -1171,23 +1175,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
- if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) {
- SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
-
- if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
- dst = 5;
- else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
- dst = 3;
- else
- dst = 1;
-
- /* Signed word sized load is the prefetch instruction. */
- return emit_op_mem(compiler, WORD_SIZE | SIGNED, dst, src, srcw, TMP_REG1);
- }
- return SLJIT_SUCCESS;
- }
-
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
op = GET_OPCODE(op);
@@ -1327,6 +1314,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
+
+ return push_inst(compiler, RET | RN(TMP_LR));
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ return SLJIT_SUCCESS;
+ case SLJIT_PREFETCH_L1:
+ case SLJIT_PREFETCH_L2:
+ case SLJIT_PREFETCH_L3:
+ case SLJIT_PREFETCH_ONCE:
+ SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
+
+ /* The reg_map[op] should provide the appropriate constant. */
+ if (op == SLJIT_PREFETCH_L1)
+ op = 1;
+ else if (op == SLJIT_PREFETCH_L2)
+ op = 3;
+ else if (op == SLJIT_PREFETCH_L3)
+ op = 5;
+ else
+ op = 2;
+
+ /* Signed word sized load is the prefetch instruction. */
+ return emit_op_mem(compiler, WORD_SIZE | SIGNED, op, src, srcw, TMP_REG1);
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
@@ -1578,20 +1605,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
- else
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
-
- return push_inst(compiler, RET | RN(TMP_LR));
-}
-
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */
@@ -1865,7 +1878,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
- if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -256))
+ if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
return SLJIT_ERR_UNSUPPORTED;
if (type & SLJIT_MEM_SUPP)
@@ -1915,7 +1928,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
CHECK_ERROR();
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
- if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -256))
+ if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
return SLJIT_ERR_UNSUPPORTED;
if (type & SLJIT_MEM_SUPP)
diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c
index cdfe4a4..a81e008 100644
--- a/src/sljit/sljitNativeARM_T2_32.c
+++ b/src/sljit/sljitNativeARM_T2_32.c
@@ -480,6 +480,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
+ case SLJIT_HAS_PREFETCH:
return 1;
default:
@@ -1328,6 +1329,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
}
return SLJIT_SUCCESS;
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
+ case SLJIT_ENDBR:
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return SLJIT_SUCCESS;
}
return SLJIT_SUCCESS;
@@ -1345,13 +1349,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
- /* Since TMP_PC has index 15, IS_2_LO_REGS and IS_3_LO_REGS checks always fail. */
- if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
- return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
- return SLJIT_SUCCESS;
- }
-
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
op = GET_OPCODE(op);
@@ -1475,6 +1472,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2);
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
+
+ return push_inst16(compiler, BX | RN3(TMP_REG2));
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ return SLJIT_SUCCESS;
+ case SLJIT_PREFETCH_L1:
+ case SLJIT_PREFETCH_L2:
+ case SLJIT_PREFETCH_L3:
+ case SLJIT_PREFETCH_ONCE:
+ return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
@@ -1728,22 +1754,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
-
- if (FAST_IS_REG(src))
- FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
- else
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
-
- return push_inst16(compiler, BX | RN3(TMP_REG2));
-}
-
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */
@@ -2264,7 +2274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
- if ((mem & OFFS_REG_MASK) || (memw > 255 && memw < -255))
+ if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255))
return SLJIT_ERR_UNSUPPORTED;
if (type & SLJIT_MEM_SUPP)
diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c
index 16dec05..777627b 100644
--- a/src/sljit/sljitNativeMIPS_32.c
+++ b/src/sljit/sljitNativeMIPS_32.c
@@ -86,12 +86,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8) {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
-#else
+#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
}
@@ -105,12 +105,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16) {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
-#else
+#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
}
@@ -129,12 +129,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
-#else
+#else /* SLJIT_MIPS_REV < 1 */
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
@@ -149,7 +149,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
return SLJIT_SUCCESS;
case SLJIT_ADD:
@@ -368,21 +368,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) || (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
-#else /* !SLJIT_MIPS_R1 && !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_R1 || SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 1 */
}
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c
index a6a2bcc..479244d 100644
--- a/src/sljit/sljitNativeMIPS_64.c
+++ b/src/sljit/sljitNativeMIPS_64.c
@@ -220,12 +220,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
-#else
+#else /* SLJIT_MIPS_REV < 1 */
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
@@ -240,7 +240,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
return SLJIT_SUCCESS;
case SLJIT_ADD:
@@ -459,26 +459,27 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst));
-#elif (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_I32_OP)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
-#else /* !SLJIT_MIPS_R6 && !SLJIT_MIPS_R1 */
+#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
}
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c
index 7d1d087..88df904 100644
--- a/src/sljit/sljitNativeMIPS_common.c
+++ b/src/sljit/sljitNativeMIPS_common.c
@@ -25,15 +25,16 @@
*/
/* Latest MIPS architecture. */
-/* Automatically detect SLJIT_MIPS_R1 */
-#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6)
-#define SLJIT_MIPS_R6 1
+#ifndef __mips_hard_float
+/* Disable automatic detection, covers both -msoft-float and -mno-float */
+#undef SLJIT_IS_FPU_AVAILABLE
+#define SLJIT_IS_FPU_AVAILABLE 0
#endif
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
{
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
return "MIPS32-R6" SLJIT_CPUINFO;
@@ -41,7 +42,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
return "MIPS64-R6" SLJIT_CPUINFO;
#endif /* SLJIT_CONFIG_MIPS_32 */
-#elif (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
return "MIPS32-R1" SLJIT_CPUINFO;
@@ -49,9 +50,9 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
return "MIPS64-R1" SLJIT_CPUINFO;
#endif /* SLJIT_CONFIG_MIPS_32 */
-#else /* SLJIT_MIPS_R1 */
+#else /* SLJIT_MIPS_REV < 1 */
return "MIPS III" SLJIT_CPUINFO;
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
}
/* Length of an instruction word
@@ -117,11 +118,11 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define FR(dr) (freg_map[dr])
#define HI(opcode) ((opcode) << 26)
#define LO(opcode) (opcode)
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
/* CMP.cond.fmt */
/* S = (20 << 21) D = (21 << 21) */
#define CMP_FMT_S (20 << 21)
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
/* S = (16 << 21) D = (17 << 21) */
#define FMT_S (16 << 21)
#define FMT_D (17 << 21)
@@ -134,13 +135,13 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define ANDI (HI(12))
#define B (HI(4))
#define BAL (HI(1) | (17 << 16))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define BC1EQZ (HI(17) | (9 << 21) | FT(TMP_FREG3))
#define BC1NEZ (HI(17) | (13 << 21) | FT(TMP_FREG3))
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#define BC1F (HI(17) | (8 << 21))
#define BC1T (HI(17) | (8 << 21) | (1 << 16))
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
#define BEQ (HI(4))
#define BGEZ (HI(1) | (1 << 16))
#define BGTZ (HI(7))
@@ -149,23 +150,23 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define BNE (HI(5))
#define BREAK (HI(0) | LO(13))
#define CFC1 (HI(17) | (2 << 21))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define C_UEQ_S (HI(17) | CMP_FMT_S | LO(3))
#define C_ULE_S (HI(17) | CMP_FMT_S | LO(7))
#define C_ULT_S (HI(17) | CMP_FMT_S | LO(5))
#define C_UN_S (HI(17) | CMP_FMT_S | LO(1))
#define C_FD (FD(TMP_FREG3))
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#define C_UEQ_S (HI(17) | FMT_S | LO(51))
#define C_ULE_S (HI(17) | FMT_S | LO(55))
#define C_ULT_S (HI(17) | FMT_S | LO(53))
#define C_UN_S (HI(17) | FMT_S | LO(49))
#define C_FD (0)
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
#define CVT_S_S (HI(17) | FMT_S | LO(32))
#define DADDIU (HI(25))
#define DADDU (HI(0) | LO(45))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define DDIV (HI(0) | (2 << 6) | LO(30))
#define DDIVU (HI(0) | (2 << 6) | LO(31))
#define DMOD (HI(0) | (3 << 6) | LO(30))
@@ -176,14 +177,14 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define DMUHU (HI(0) | (3 << 6) | LO(29))
#define DMUL (HI(0) | (2 << 6) | LO(28))
#define DMULU (HI(0) | (2 << 6) | LO(29))
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#define DDIV (HI(0) | LO(30))
#define DDIVU (HI(0) | LO(31))
#define DIV (HI(0) | LO(26))
#define DIVU (HI(0) | LO(27))
#define DMULT (HI(0) | LO(28))
#define DMULTU (HI(0) | LO(29))
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
#define DIV_S (HI(17) | FMT_S | LO(3))
#define DSLL (HI(0) | LO(56))
#define DSLL32 (HI(0) | LO(60))
@@ -198,33 +199,33 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define J (HI(2))
#define JAL (HI(3))
#define JALR (HI(0) | LO(9))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define JR (HI(0) | LO(9))
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#define JR (HI(0) | LO(8))
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
#define LD (HI(55))
#define LUI (HI(15))
#define LW (HI(35))
#define MFC1 (HI(17))
-#if !(defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
-#define MFHI (HI(0) | LO(16))
-#define MFLO (HI(0) | LO(18))
-#else /* SLJIT_MIPS_R6 */
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define MOD (HI(0) | (3 << 6) | LO(26))
#define MODU (HI(0) | (3 << 6) | LO(27))
-#endif /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
+#define MFHI (HI(0) | LO(16))
+#define MFLO (HI(0) | LO(18))
+#endif /* SLJIT_MIPS_REV >= 6 */
#define MOV_S (HI(17) | FMT_S | LO(6))
#define MTC1 (HI(17) | (4 << 21))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define MUH (HI(0) | (3 << 6) | LO(24))
#define MUHU (HI(0) | (3 << 6) | LO(25))
#define MUL (HI(0) | (2 << 6) | LO(24))
#define MULU (HI(0) | (2 << 6) | LO(25))
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#define MULT (HI(0) | LO(24))
#define MULTU (HI(0) | LO(25))
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
#define MUL_S (HI(17) | FMT_S | LO(2))
#define NEG_S (HI(17) | FMT_S | LO(7))
#define NOP (HI(0) | LO(0))
@@ -251,23 +252,23 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
#define XOR (HI(0) | LO(38))
#define XORI (HI(14))
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) || (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
#define CLZ (HI(28) | LO(32))
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define DCLZ (LO(18))
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#define DCLZ (HI(28) | LO(36))
#define MOVF (HI(0) | (0 << 16) | LO(1))
#define MOVN (HI(0) | LO(11))
#define MOVT (HI(0) | (1 << 16) | LO(1))
#define MOVZ (HI(0) | LO(10))
#define MUL (HI(28) | LO(2))
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
#define PREF (HI(51))
#define PREFX (HI(19) | LO(15))
#define SEB (HI(31) | (16 << 6) | LO(32))
#define SEH (HI(31) | (24 << 6) | LO(32))
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define ADDU_W ADDU
@@ -303,10 +304,10 @@ static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags)
{
if (flags & IS_BIT26_COND)
return (1 << 26);
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
if (flags & IS_BIT23_COND)
return (1 << 23);
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
return (1 << 16);
}
@@ -683,12 +684,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#else
#error "FIR check is not implemented for this architecture"
#endif
+ case SLJIT_HAS_ZERO_REGISTER:
+ return 1;
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
+ case SLJIT_HAS_PREFETCH:
return 1;
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
default:
return fir;
@@ -1230,7 +1234,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return push_inst(compiler, NOP, UNMOVABLE_INS);
case SLJIT_LMUL_UW:
case SLJIT_LMUL_SW:
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULU : DMUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMUHU : DMUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
@@ -1240,7 +1244,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#endif /* SLJIT_CONFIG_MIPS_64 */
FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0)));
return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1));
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
#else /* !SLJIT_CONFIG_MIPS_64 */
@@ -1248,13 +1252,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#endif /* SLJIT_CONFIG_MIPS_64 */
FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (int_op) {
FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
@@ -1270,11 +1274,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#endif /* SLJIT_CONFIG_MIPS_64 */
FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0)));
return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1));
-#else /* !SLJIT_MIPS_R6 */
-#if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#else /* SLJIT_MIPS_REV < 6 */
+#if !(defined SLJIT_MIPS_REV)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* !SLJIT_MIPS_R1 */
+#endif /* !SLJIT_MIPS_REV */
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (int_op)
FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
@@ -1285,13 +1289,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#endif /* SLJIT_CONFIG_MIPS_64 */
FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
+ case SLJIT_ENDBR:
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return SLJIT_SUCCESS;
}
return SLJIT_SUCCESS;
}
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
sljit_s32 src, sljit_sw srcw)
{
@@ -1312,7 +1319,7 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
return push_inst(compiler, PREFX | S(src & REG_MASK) | T(OFFS_REG(src)), MOVABLE_INS);
}
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@@ -1329,14 +1336,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
- if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
- return emit_prefetch(compiler, src, srcw);
-#endif
- return SLJIT_SUCCESS;
- }
-
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT)
flags |= INT_DATA | SIGNED_DATA;
@@ -1463,6 +1462,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
#endif
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
+
+ FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ return SLJIT_SUCCESS;
+ case SLJIT_PREFETCH_L1:
+ case SLJIT_PREFETCH_L2:
+ case SLJIT_PREFETCH_L3:
+ case SLJIT_PREFETCH_ONCE:
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+ return emit_prefetch(compiler, src, srcw);
+#else /* SLJIT_MIPS_REV < 1 */
+ return SLJIT_SUCCESS;
+#endif /* SLJIT_MIPS_REV >= 1 */
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
@@ -1732,25 +1763,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
ADJUST_LOCAL_OFFSET(dst, dstw);
if (FAST_IS_REG(dst))
- return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
+ return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), UNMOVABLE_INS);
/* Memory. */
- return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG));
- else
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
-
- FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
- return push_inst(compiler, NOP, UNMOVABLE_INS);
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw));
+ compiler->delay_slot = UNMOVABLE_INS;
+ return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
@@ -1790,7 +1808,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
flags = IS_BIT26_COND; \
delay_check = src;
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define BR_T() \
inst = BC1NEZ; \
@@ -1801,7 +1819,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
flags = IS_BIT23_COND; \
delay_check = FCSR_FCC;
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
#define BR_T() \
inst = BC1T | JUMP_LENGTH; \
@@ -1812,7 +1830,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
flags = IS_BIT16_COND; \
delay_check = FCSR_FCC;
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
{
@@ -2123,11 +2141,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
case SLJIT_GREATER_EQUAL_F64:
case SLJIT_UNORDERED_F64:
case SLJIT_ORDERED_F64:
-#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
FAIL_IF(push_inst(compiler, MFC1 | TA(dst_ar) | FS(TMP_FREG3), dst_ar));
-#else /* !SLJIT_MIPS_R6 */
+#else /* SLJIT_MIPS_REV < 6 */
FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar));
-#endif /* SLJIT_MIPS_R6 */
+#endif /* SLJIT_MIPS_REV >= 6 */
FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar));
FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar));
src_ar = dst_ar;
@@ -2167,14 +2185,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
sljit_s32 dst_reg,
sljit_s32 src, sljit_sw srcw)
{
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
sljit_ins ins;
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
CHECK_ERROR();
CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
@@ -2231,9 +2249,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg));
-#else
+#else /* SLJIT_MIPS_REV < 1 */
return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
-#endif
+#endif /* SLJIT_MIPS_REV >= 1 */
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c
index e827514..590f91c 100644
--- a/src/sljit/sljitNativePPC_common.c
+++ b/src/sljit/sljitNativePPC_common.c
@@ -626,7 +626,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif
+ /* A saved register is set to a zero value. */
+ case SLJIT_HAS_ZERO_REGISTER:
case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_PREFETCH:
return 1;
default:
@@ -1158,6 +1161,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#else
return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
#endif
+ case SLJIT_ENDBR:
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return SLJIT_SUCCESS;
}
return SLJIT_SUCCESS;
@@ -1203,13 +1209,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
- if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
- return emit_prefetch(compiler, src, srcw);
-
- return SLJIT_SUCCESS;
- }
-
op = GET_OPCODE(op);
if ((src & SLJIT_IMM) && srcw == 0)
src = TMP_ZERO;
@@ -1536,6 +1535,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, MTLR | S(src)));
+ else {
+ FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
+ FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
+ }
+
+ return push_inst(compiler, BLR);
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ return SLJIT_SUCCESS;
+ case SLJIT_PREFETCH_L1:
+ case SLJIT_PREFETCH_L2:
+ case SLJIT_PREFETCH_L3:
+ case SLJIT_PREFETCH_ONCE:
+ return emit_prefetch(compiler, src, srcw);
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
@@ -1854,22 +1882,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, MTLR | S(src)));
- else {
- FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
- FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
- }
-
- return push_inst(compiler, BLR);
-}
-
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */
diff --git a/src/sljit/sljitNativeSPARC_common.c b/src/sljit/sljitNativeSPARC_common.c
index bfa4ece..7d6be6c 100644
--- a/src/sljit/sljitNativeSPARC_common.c
+++ b/src/sljit/sljitNativeSPARC_common.c
@@ -451,6 +451,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif
+ case SLJIT_HAS_ZERO_REGISTER:
+ return 1;
+
#if (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
case SLJIT_HAS_CMOV:
return 1;
@@ -872,6 +875,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#else
#error "Implementation required"
#endif
+ case SLJIT_ENDBR:
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return SLJIT_SUCCESS;
}
return SLJIT_SUCCESS;
@@ -888,9 +894,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
- return SLJIT_SUCCESS;
-
op = GET_OPCODE(op);
switch (op) {
case SLJIT_MOV:
@@ -971,6 +974,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
+
+ FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ case SLJIT_PREFETCH_L1:
+ case SLJIT_PREFETCH_L2:
+ case SLJIT_PREFETCH_L3:
+ case SLJIT_PREFETCH_ONCE:
+ return SLJIT_SUCCESS;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
@@ -1215,25 +1245,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
ADJUST_LOCAL_OFFSET(dst, dstw);
if (FAST_IS_REG(dst))
- return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst));
+ return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), UNMOVABLE_INS);
/* Memory. */
- return emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
- else
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
-
- FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
- return push_inst(compiler, NOP, UNMOVABLE_INS);
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw));
+ compiler->delay_slot = UNMOVABLE_INS;
+ return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
diff --git a/src/sljit/sljitNativeTILEGX_64.c b/src/sljit/sljitNativeTILEGX_64.c
index 003f43a..d69ecd6 100644
--- a/src/sljit/sljitNativeTILEGX_64.c
+++ b/src/sljit/sljitNativeTILEGX_64.c
@@ -1564,24 +1564,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
-{
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (FAST_IS_REG(src))
- FAIL_IF(ADD(RA, reg_map[src], ZERO));
-
- else if (src & SLJIT_MEM)
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
-
- else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, RA, srcw));
-
- return JR(RA);
-}
-
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
sljit_s32 overflow_ra = 0;
@@ -2184,6 +2166,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
SLJIT_UNREACHABLE();
+ case SLJIT_ENDBR:
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return SLJIT_SUCCESS;
}
return SLJIT_SUCCESS;
@@ -2293,6 +2278,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ if (FAST_IS_REG(src))
+ FAIL_IF(ADD(RA, reg_map[src], ZERO));
+
+ else
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
+
+ return JR(RA);
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ return SLJIT_SUCCESS;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
{
struct sljit_label *label;
diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c
index 34a3a3d..79a7e8b 100644
--- a/src/sljit/sljitNativeX86_32.c
+++ b/src/sljit/sljitNativeX86_32.c
@@ -76,6 +76,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+ /* Emit ENDBR32 at function entry if needed. */
+ FAIL_IF(emit_endbranch(compiler));
+
args = get_arg_count(arg_types);
compiler->args = args;
@@ -307,14 +310,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#endif
- size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
+ size = 2 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
(compiler->saveds <= 3 ? compiler->saveds : 3);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (compiler->args > 2)
size += 2;
-#else
- if (compiler->args > 0)
- size += 2;
#endif
inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
FAIL_IF(!inst);
@@ -367,6 +367,8 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+ /* We don't support (%ebp). */
+ SLJIT_ASSERT(!(b & SLJIT_MEM) || immb || reg_map[b & REG_MASK] != 5);
size &= 0xf;
inst_size = size;
@@ -863,14 +865,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
+static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
CHECK_EXTRA_REGS(src, srcw, (void)0);
if (FAST_IS_REG(src)) {
@@ -894,3 +892,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
RET();
return SLJIT_SUCCESS;
}
+
+static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
+{
+ sljit_s32 size, saved_size;
+ sljit_s32 has_f64_aligment;
+
+ /* Don't adjust shadow stack if it isn't enabled. */
+ if (!cpu_has_shadow_stack ())
+ return SLJIT_SUCCESS;
+
+ SLJIT_ASSERT(compiler->args >= 0);
+ SLJIT_ASSERT(compiler->local_size > 0);
+
+#if !defined(__APPLE__)
+ has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT;
+#else
+ has_f64_aligment = 0;
+#endif
+
+ size = compiler->local_size;
+ saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + (compiler->saveds <= 3 ? compiler->saveds : 3)) * sizeof(sljit_uw);
+ if (has_f64_aligment) {
+ /* mov TMP_REG1, [esp + local_size]. */
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size);
+ /* mov TMP_REG1, [TMP_REG1+ saved_size]. */
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size);
+ /* Move return address to [esp]. */
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0);
+ size = 0;
+ } else
+ size += saved_size;
+
+ return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size);
+}
diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c
index 5758711..e85b56a 100644
--- a/src/sljit/sljitNativeX86_64.c
+++ b/src/sljit/sljitNativeX86_64.c
@@ -135,6 +135,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+ /* Emit ENDBR64 at function entry if needed. */
+ FAIL_IF(emit_endbranch(compiler));
+
compiler->mode32 = 0;
#ifdef _WIN64
@@ -796,14 +799,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
+static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
- CHECK_ERROR();
- CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
-
if (FAST_IS_REG(src)) {
if (reg_map[src] < 8) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
@@ -898,3 +897,22 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
return SLJIT_SUCCESS;
}
+
+static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
+{
+ sljit_s32 tmp, size;
+
+ /* Don't adjust shadow stack if it isn't enabled. */
+ if (!cpu_has_shadow_stack ())
+ return SLJIT_SUCCESS;
+
+ size = compiler->local_size;
+ tmp = compiler->scratches;
+ if (tmp >= SLJIT_FIRST_SAVED_REG)
+ size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * sizeof(sljit_uw);
+ tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+ if (SLJIT_S0 >= tmp)
+ size += (SLJIT_S0 - tmp + 1) * sizeof(sljit_uw);
+
+ return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size);
+}
diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c
index 6296da5..74965e3 100644
--- a/src/sljit/sljitNativeX86_common.c
+++ b/src/sljit/sljitNativeX86_common.c
@@ -657,6 +657,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
get_cpu_features();
return cpu_has_cmov;
+ case SLJIT_HAS_PREFETCH:
+ return 1;
+
case SLJIT_HAS_SSE2:
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
@@ -702,6 +705,171 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
+static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w);
+
+static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
+{
+#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
+ /* Emit endbr32/endbr64 when CET is enabled. */
+ sljit_u8 *inst;
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
+ FAIL_IF(!inst);
+ INC_SIZE(4);
+ *inst++ = 0xf3;
+ *inst++ = 0x0f;
+ *inst++ = 0x1e;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ *inst = 0xfb;
+#else
+ *inst = 0xfa;
+#endif
+#else
+ SLJIT_UNUSED_ARG(compiler);
+#endif
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
+{
+#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
+ sljit_u8 *inst;
+ sljit_s32 size;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ size = 5;
+#else
+ size = 4;
+#endif
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
+ FAIL_IF(!inst);
+ INC_SIZE(size);
+ *inst++ = 0xf3;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
+#endif
+ *inst++ = 0x0f;
+ *inst++ = 0x1e;
+ *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7);
+#else
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(reg);
+#endif
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
+{
+#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
+ sljit_u8 *inst;
+ sljit_s32 size;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ size = 5;
+#else
+ size = 4;
+#endif
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
+ FAIL_IF(!inst);
+ INC_SIZE(size);
+ *inst++ = 0xf3;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
+#endif
+ *inst++ = 0x0f;
+ *inst++ = 0xae;
+ *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
+#else
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(reg);
+#endif
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
+{
+#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
+ return _get_ssp() != 0;
+#else
+ return 0;
+#endif
+}
+
+static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
+ sljit_s32 src, sljit_sw srcw, sljit_s32 base, sljit_sw disp)
+{
+#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
+ sljit_u8 *inst;
+
+ sljit_s32 size_before_rdssp_inst = compiler->size;
+
+ /* Generate "RDSSP TMP_REG1". */
+ FAIL_IF(emit_rdssp(compiler, TMP_REG1));
+
+ /* Load return address on shadow stack into TMP_REG1. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 5);
+
+ /* Hand code unsupported "mov 0x0(%ebp),%ebp". */
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
+ FAIL_IF(!inst);
+ INC_SIZE(3);
+ *inst++ = 0x8b;
+ *inst++ = 0x6d;
+ *inst = 0;
+#else /* !SLJIT_CONFIG_X86_32 */
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
+#endif /* SLJIT_CONFIG_X86_32 */
+
+ if (src == SLJIT_UNUSED) {
+ /* Return address is on stack. */
+ src = SLJIT_MEM1(base);
+ srcw = disp;
+ }
+
+ /* Compare return address against TMP_REG1. */
+ FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
+
+ /* Generate JZ to skip shadow stack ajdustment when shadow
+ stack matches normal stack. */
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
+ sljit_uw size_jz_after_cmp_inst = compiler->size;
+ sljit_u8 *jz_after_cmp_inst = inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ /* REX_W is not necessary. */
+ compiler->mode32 = 1;
+#endif
+ /* Load 1 into TMP_REG1. */
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
+
+ /* Generate "INCSSP TMP_REG1". */
+ FAIL_IF(emit_incssp(compiler, TMP_REG1));
+
+ /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+ INC_SIZE(2);
+ *inst++ = JMP_i8;
+ *inst = size_before_rdssp_inst - compiler->size;
+
+ *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
+#else /* SLJIT_CONFIG_X86_CET */
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(srcw);
+ SLJIT_UNUSED_ARG(base);
+ SLJIT_UNUSED_ARG(disp);
+#endif /* SLJIT_CONFIG_X86_CET */
+ return SLJIT_SUCCESS;
+}
+
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#include "sljitNativeX86_32.c"
#else
@@ -905,6 +1073,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
#endif
break;
+ case SLJIT_ENDBR:
+ return emit_endbranch(compiler);
+ case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+ return skip_frames_before_return(compiler);
}
return SLJIT_SUCCESS;
@@ -1074,12 +1246,12 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
*inst++ = GROUP_0F;
*inst++ = PREFETCH;
- if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
- *inst |= (3 << 3);
- else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
- *inst |= (2 << 3);
- else
+ if (op == SLJIT_PREFETCH_L1)
*inst |= (1 << 3);
+ else if (op == SLJIT_PREFETCH_L2)
+ *inst |= (2 << 3);
+ else if (op == SLJIT_PREFETCH_L3)
+ *inst |= (3 << 3);
return SLJIT_SUCCESS;
}
@@ -1284,12 +1456,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
compiler->mode32 = op_flags & SLJIT_I32_OP;
#endif
- if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
- if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
- return emit_prefetch(compiler, op, src, srcw);
- return SLJIT_SUCCESS;
- }
-
op = GET_OPCODE(op);
if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
@@ -2150,6 +2316,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
if (!HAS_FLAGS(op)) {
if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
+ if (SLOW_IS_REG(dst) && src2 == dst) {
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
+ return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
+ }
}
if (dst == SLJIT_UNUSED)
@@ -2186,6 +2356,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+ switch (op) {
+ case SLJIT_FAST_RETURN:
+ return emit_fast_return(compiler, src, srcw);
+ case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+ /* Don't adjust shadow stack if it isn't enabled. */
+ if (!cpu_has_shadow_stack ())
+ return SLJIT_SUCCESS;
+ return adjust_shadow_stack(compiler, src, srcw, SLJIT_UNUSED, 0);
+ case SLJIT_PREFETCH_L1:
+ case SLJIT_PREFETCH_L2:
+ case SLJIT_PREFETCH_L3:
+ case SLJIT_PREFETCH_ONCE:
+ return emit_prefetch(compiler, op, src, srcw);
+ }
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
diff --git a/src/sljit/sljitProtExecAllocator.c b/src/sljit/sljitProtExecAllocator.c
index 8a5b2b3..3f412fe 100644
--- a/src/sljit/sljitProtExecAllocator.c
+++ b/src/sljit/sljitProtExecAllocator.c
@@ -70,7 +70,6 @@
struct chunk_header {
void *executable;
- int fd;
};
/*
@@ -96,8 +95,20 @@ struct chunk_header {
#endif
#endif
+#if !(defined(__NetBSD__) && defined(MAP_REMAPDUP))
int mkostemp(char *template, int flags);
+
+#ifdef __NetBSD__
+/*
+ * this is a workaround for NetBSD < 8 that lacks a system provided
+ * secure_getenv function.
+ * ideally this should never be used, as the standard allocator is
+ * a preferred option for those systems and should be used instead.
+ */
+#define secure_getenv(name) issetugid() ? NULL : getenv(name)
+#else
char *secure_getenv(const char *name);
+#endif
static SLJIT_INLINE int create_tempfile(void)
{
@@ -108,6 +119,13 @@ static SLJIT_INLINE int create_tempfile(void)
char *dir;
size_t len;
+#ifdef HAVE_MEMFD_CREATE
+ /* this is a GNU extension, make sure to use -D_GNU_SOURCE */
+ fd = memfd_create("sljit", MFD_CLOEXEC);
+ if (fd != -1)
+ return fd;
+#endif
+
#ifdef P_tmpdir
len = (P_tmpdir != NULL) ? strlen(P_tmpdir) : 0;
@@ -125,6 +143,7 @@ static SLJIT_INLINE int create_tempfile(void)
#endif
dir = secure_getenv("TMPDIR");
+
if (dir) {
len = strlen(dir);
if (len > 0 && len < sizeof(tmp_name)) {
@@ -189,23 +208,50 @@ static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size)
retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
if (retval->executable == MAP_FAILED) {
- munmap(retval, size);
+ munmap((void *)retval, size);
close(fd);
return NULL;
}
- retval->fd = fd;
+ close(fd);
+ return retval;
+}
+#else
+static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size)
+{
+ struct chunk_header *retval;
+ void *maprx;
+
+ retval = (struct chunk_header *)mmap(NULL, size,
+ PROT_MPROTECT(PROT_EXEC|PROT_WRITE|PROT_READ),
+ MAP_ANON, -1, 0);
+
+ if (retval == MAP_FAILED)
+ return NULL;
+
+ maprx = mremap(retval, size, NULL, size, MAP_REMAPDUP);
+ if (maprx == MAP_FAILED) {
+ munmap((void *)retval, size);
+ return NULL;
+ }
+
+ if (mprotect(retval, size, PROT_READ | PROT_WRITE) == -1 ||
+ mprotect(maprx, size, PROT_READ | PROT_EXEC) == -1) {
+ munmap(maprx, size);
+ munmap((void *)retval, size);
+ return NULL;
+ }
+ retval->executable = maprx;
return retval;
}
+#endif /* NetBSD >= 8 */
static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
{
struct chunk_header *header = ((struct chunk_header *)chunk) - 1;
- int fd = header->fd;
munmap(header->executable, size);
- munmap(header, size);
- close(fd);
+ munmap((void *)header, size);
}
/* --------------------------------------------------------------------- */
@@ -385,7 +431,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
if (total_size - free_block->size > (allocated_size * 3 / 2)) {
total_size -= free_block->size;
sljit_remove_free_block(free_block);
- free_chunk(free_block, free_block->size + sizeof(struct block_header));
+ free_chunk(free_block, free_block->size +
+ sizeof(struct chunk_header) +
+ sizeof(struct block_header));
}
}
@@ -406,7 +454,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
total_size -= free_block->size;
sljit_remove_free_block(free_block);
- free_chunk(free_block, free_block->size + sizeof(struct block_header));
+ free_chunk(free_block, free_block->size +
+ sizeof(struct chunk_header) +
+ sizeof(struct block_header));
}
free_block = next_free_block;
}
diff --git a/src/sljit/sljitUtils.c b/src/sljit/sljitUtils.c
index 857492a..0276fa1 100644
--- a/src/sljit/sljitUtils.c
+++ b/src/sljit/sljitUtils.c
@@ -152,15 +152,23 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void)
#ifdef _WIN32
#include "windows.h"
-#else
+#else /* !_WIN32 */
/* Provides mmap function. */
#include <sys/types.h>
#include <sys/mman.h>
+
#ifndef MAP_ANON
#ifdef MAP_ANONYMOUS
#define MAP_ANON MAP_ANONYMOUS
-#endif
-#endif
+#endif /* MAP_ANONYMOUS */
+#endif /* !MAP_ANON */
+
+#ifndef MADV_DONTNEED
+#ifdef POSIX_MADV_DONTNEED
+#define MADV_DONTNEED POSIX_MADV_DONTNEED
+#endif /* POSIX_MADV_DONTNEED */
+#endif /* !MADV_DONTNEED */
+
/* For detecting the page size. */
#include <unistd.h>
@@ -198,35 +206,85 @@ static SLJIT_INLINE sljit_s32 open_dev_zero(void)
#endif /* SLJIT_SINGLE_THREADED */
-#endif
+#endif /* !MAP_ANON */
-#endif
+#endif /* _WIN32 */
#endif /* SLJIT_UTIL_STACK || SLJIT_EXECUTABLE_ALLOCATOR */
+#endif /* SLJIT_EXECUTABLE_ALLOCATOR || SLJIT_UTIL_GLOBAL_LOCK */
+
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
-/* Planning to make it even more clever in the future. */
-static sljit_sw sljit_page_align = 0;
+#if (defined SLJIT_UTIL_SIMPLE_STACK_ALLOCATION && SLJIT_UTIL_SIMPLE_STACK_ALLOCATION)
SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data)
{
struct sljit_stack *stack;
void *ptr;
-#ifdef _WIN32
- SYSTEM_INFO si;
-#endif
SLJIT_UNUSED_ARG(allocator_data);
+
if (start_size > max_size || start_size < 1)
return NULL;
+ stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data);
+ if (stack == NULL)
+ return NULL;
+
+ ptr = SLJIT_MALLOC(max_size, allocator_data);
+ if (ptr == NULL) {
+ SLJIT_FREE(stack, allocator_data);
+ return NULL;
+ }
+
+ stack->min_start = (sljit_u8 *)ptr;
+ stack->end = stack->min_start + max_size;
+ stack->start = stack->end - start_size;
+ stack->top = stack->end;
+ return stack;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
+{
+ SLJIT_UNUSED_ARG(allocator_data);
+ SLJIT_FREE((void*)stack->min_start, allocator_data);
+ SLJIT_FREE(stack, allocator_data);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start)
+{
+ if ((new_start < stack->min_start) || (new_start >= stack->end))
+ return NULL;
+ stack->start = new_start;
+ return new_start;
+}
+
+#else /* !SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */
+
#ifdef _WIN32
+
+SLJIT_INLINE static sljit_sw get_page_alignment(void) {
+ SYSTEM_INFO si;
+ static sljit_sw sljit_page_align;
if (!sljit_page_align) {
GetSystemInfo(&si);
sljit_page_align = si.dwPageSize - 1;
}
-#else
+ return sljit_page_align;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
+{
+ SLJIT_UNUSED_ARG(allocator_data);
+ VirtualFree((void*)stack->min_start, 0, MEM_RELEASE);
+ SLJIT_FREE(stack, allocator_data);
+}
+
+#else /* ! defined _WIN32 */
+
+SLJIT_INLINE static sljit_sw get_page_alignment(void) {
+ static sljit_sw sljit_page_align;
if (!sljit_page_align) {
sljit_page_align = sysconf(_SC_PAGESIZE);
/* Should never happen. */
@@ -234,14 +292,36 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(slj
sljit_page_align = 4096;
sljit_page_align--;
}
-#endif
+ return sljit_page_align;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
+{
+ SLJIT_UNUSED_ARG(allocator_data);
+ munmap((void*)stack->min_start, stack->end - stack->min_start);
+ SLJIT_FREE(stack, allocator_data);
+}
+
+#endif /* defined _WIN32 */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw start_size, sljit_uw max_size, void *allocator_data)
+{
+ struct sljit_stack *stack;
+ void *ptr;
+ sljit_sw page_align;
+
+ SLJIT_UNUSED_ARG(allocator_data);
+
+ if (start_size > max_size || start_size < 1)
+ return NULL;
stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data);
- if (!stack)
+ if (stack == NULL)
return NULL;
/* Align max_size. */
- max_size = (max_size + sljit_page_align) & ~sljit_page_align;
+ page_align = get_page_alignment();
+ max_size = (max_size + page_align) & ~page_align;
#ifdef _WIN32
ptr = VirtualAlloc(NULL, max_size, MEM_RESERVE, PAGE_READWRITE);
@@ -258,18 +338,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(slj
sljit_free_stack(stack, allocator_data);
return NULL;
}
-#else
+#else /* !_WIN32 */
#ifdef MAP_ANON
ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
-#else
+#else /* !MAP_ANON */
if (dev_zero < 0) {
- if (open_dev_zero()) {
+ if (open_dev_zero() != 0) {
SLJIT_FREE(stack, allocator_data);
return NULL;
}
}
ptr = mmap(NULL, max_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
-#endif
+#endif /* MAP_ANON */
if (ptr == MAP_FAILED) {
SLJIT_FREE(stack, allocator_data);
return NULL;
@@ -277,35 +357,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(slj
stack->min_start = (sljit_u8 *)ptr;
stack->end = stack->min_start + max_size;
stack->start = stack->end - start_size;
-#endif
+#endif /* _WIN32 */
+
stack->top = stack->end;
return stack;
}
-#undef PAGE_ALIGN
-
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
-{
- SLJIT_UNUSED_ARG(allocator_data);
-#ifdef _WIN32
- VirtualFree((void*)stack->min_start, 0, MEM_RELEASE);
-#else
- munmap((void*)stack->min_start, stack->end - stack->min_start);
-#endif
- SLJIT_FREE(stack, allocator_data);
-}
-
SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_start)
{
+#if defined _WIN32 || defined(MADV_DONTNEED)
sljit_uw aligned_old_start;
sljit_uw aligned_new_start;
+ sljit_sw page_align;
+#endif
if ((new_start < stack->min_start) || (new_start >= stack->end))
return NULL;
#ifdef _WIN32
- aligned_new_start = (sljit_uw)new_start & ~sljit_page_align;
- aligned_old_start = ((sljit_uw)stack->start) & ~sljit_page_align;
+ page_align = get_page_alignment();
+
+ aligned_new_start = (sljit_uw)new_start & ~page_align;
+ aligned_old_start = ((sljit_uw)stack->start) & ~page_align;
if (aligned_new_start != aligned_old_start) {
if (aligned_new_start < aligned_old_start) {
if (!VirtualAlloc((void*)aligned_new_start, aligned_old_start - aligned_new_start, MEM_COMMIT, PAGE_READWRITE))
@@ -316,24 +389,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st
return NULL;
}
}
-#else
+#elif defined(MADV_DONTNEED)
if (stack->start < new_start) {
- aligned_new_start = (sljit_uw)new_start & ~sljit_page_align;
- aligned_old_start = ((sljit_uw)stack->start) & ~sljit_page_align;
+ page_align = get_page_alignment();
+
+ aligned_new_start = (sljit_uw)new_start & ~page_align;
+ aligned_old_start = ((sljit_uw)stack->start) & ~page_align;
/* If madvise is available, we release the unnecessary space. */
-#if defined(MADV_DONTNEED)
if (aligned_new_start > aligned_old_start)
madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, MADV_DONTNEED);
-#elif defined(POSIX_MADV_DONTNEED)
- if (aligned_new_start > aligned_old_start)
- posix_madvise((void*)aligned_old_start, aligned_new_start - aligned_old_start, POSIX_MADV_DONTNEED);
-#endif
}
-#endif
+#endif /* _WIN32 */
+
stack->start = new_start;
return new_start;
}
-#endif /* SLJIT_UTIL_STACK */
+#endif /* SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */
-#endif
+#endif /* SLJIT_UTIL_STACK */
diff --git a/test-driver b/test-driver
index b8521a4..89dba1e 100755
--- a/test-driver
+++ b/test-driver
@@ -3,7 +3,7 @@
scriptversion=2018-03-07.03; # UTC
-# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+# Copyright (C) 2011-2020 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
diff --git a/testdata/grepoutputN b/testdata/grepoutputN
index ba97e90..caaeb75 100644
--- a/testdata/grepoutputN
+++ b/testdata/grepoutputN
@@ -2,16 +2,20 @@
1:abc 2:def ---------------------------- Test N2 ------------------------------
1:abc def
2:ghi
-jkl---------------------------- Test N3 ------------------------------
+jkl
+---------------------------- Test N3 ------------------------------
2:def 3:
ghi
-jkl---------------------------- Test N4 ------------------------------
+jkl ---------------------------- Test N4 ------------------------------
2:ghi
-jkl---------------------------- Test N5 ------------------------------
+jkl
+---------------------------- Test N5 ------------------------------
1:abc 2:def
3:ghi
-4:jkl---------------------------- Test N6 ------------------------------
+4:jkl
+---------------------------- Test N6 ------------------------------
1:abc 2:def
3:ghi
-4:jkl---------------------------- Test N7 ------------------------------
-1:abcZERO2:def
+4:jkl
+---------------------------- Test N7 ------------------------------
+1:abcZERO2:defZERO
diff --git a/testdata/testbtables b/testdata/testbtables
new file mode 100644
index 0000000..b7aeeaf
--- /dev/null
+++ b/testdata/testbtables
Binary files differ
diff --git a/testdata/testinput1 b/testdata/testinput1
index f5159d6..8d952e2 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6386,4 +6386,45 @@ ef) x/x,mark
/^(?<A>a)(?(<A>)b)((?<=b).*)$/
abc
+/^(a\1?){4}$/
+ aaaa
+ aaaaaa
+
+/^((\1+)|\d)+133X$/
+ 111133X
+
+/^(?=.*(?=(([A-Z]).*(?(1)\1)))(?!.+\2)){26}/i
+ The quick brown fox jumps over the lazy dog.
+ Jackdaws love my big sphinx of quartz.
+ Pack my box with five dozen liquor jugs.
+\= Expect no match
+ The quick brown fox jumps over the lazy cat.
+ Hackdaws love my big sphinx of quartz.
+ Pack my fox with five dozen liquor jugs.
+
+/^(?>.*?([A-Z])(?!.*\1)){26}/i
+ The quick brown fox jumps over the lazy dog.
+ Jackdaws love my big sphinx of quartz.
+ Pack my box with five dozen liquor jugs.
+\= Expect no match
+ The quick brown fox jumps over the lazy cat.
+ Hackdaws love my big sphinx of quartz.
+ Pack my fox with five dozen liquor jugs.
+
+"(?<=X(?(DEFINE)(A)))X(*F)"
+\= Expect no match
+ AXYZ
+
+"(?<=X(?(DEFINE)(A)))."
+ AXYZ
+
+"(?<=X(?(DEFINE)(.*))Y)."
+ AXYZ
+
+"(?<=X(?(DEFINE)(Y))(?1))."
+ AXYZ
+
+"(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word"
+ word
+
# End of testinput1
diff --git a/testdata/testinput10 b/testdata/testinput10
index 3813709..b3c3197 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -570,8 +570,10 @@
/[\xff\x{ffff}]/I,utf
/[\xff\x{ff}]/I,utf
+ abc\x{ff}def
/[\xff\x{ff}]/I
+ abc\x{ff}def
/[Ss]/I
@@ -585,4 +587,27 @@
abc\x80\=startchar
abc\x80\=startchar,offset=3
+/\x{c1}+\x{e1}/iIB,ucp
+ \x{c1}\x{c1}\x{c1}
+ \x{e1}\x{e1}\x{e1}
+
+/a|\x{c1}/iI,ucp
+ \x{e1}xxx
+
+/a|\x{c1}/iI,utf
+ \x{e1}xxx
+
+/\x{c1}|\x{e1}/iI,ucp
+
+/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{e1}Y
+
+/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
+ X\x{c1}Y
+
+# Without UTF or UCP characters > 127 have only one case in the default locale.
+
+/X(\x{e1})Y/replace=>\U$1<,substitute_extended
+ X\x{e1}Y
+
# End of testinput10
diff --git a/testdata/testinput12 b/testdata/testinput12
index bed00a5..9b4f8d3 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -444,6 +444,12 @@
\= Expect no match
A\x{d800}B
A\x{110000}B
+
+/aa/utf,ucp,match_invalid_utf,global
+ aa\x{d800}aa
+
+/aa/utf,ucp,match_invalid_utf,global
+ \x{d800}aa
# ----------------------------------------------------
@@ -463,4 +469,81 @@
/(?:\x{ff}|\x{3000})/I,utf
+# ----------------------------------------------------
+# UCP and casing tests
+
+/\x{120}/i,I
+
+/\x{c1}/i,I,ucp
+
+/[\x{120}\x{121}]/iB,ucp
+
+/[ab\x{120}]+/iB,ucp
+ aABb\x{121}\x{120}
+
+/\x{c1}/i,no_start_optimize
+\= Expect no match
+ \x{e1}
+
+/\x{120}\x{c1}/i,ucp,no_start_optimize
+ \x{121}\x{e1}
+
+/\x{120}\x{c1}/i,ucp
+ \x{121}\x{e1}
+
+/[^\x{120}]/i,no_start_optimize
+ \x{121}
+
+/[^\x{120}]/i,ucp,no_start_optimize
+\= Expect no match
+ \x{121}
+
+/[^\x{120}]/i
+ \x{121}
+
+/[^\x{120}]/i,ucp
+\= Expect no match
+ \x{121}
+
+/\x{120}{2}/i,ucp
+ \x{121}\x{121}
+
+/[^\x{120}]{2}/i,ucp
+\= Expect no match
+ \x{121}\x{121}
+
+/\x{c1}+\x{e1}/iB,ucp
+ \x{c1}\x{c1}\x{c1}
+
+/\x{c1}+\x{e1}/iIB,ucp
+ \x{c1}\x{c1}\x{c1}
+ \x{e1}\x{e1}\x{e1}
+
+/a|\x{c1}/iI,ucp
+ \x{e1}xxx
+
+/\x{c1}|\x{e1}/iI,ucp
+
+/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{e1}Y
+
+/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{121}Y
+
+/s/i,ucp
+ \x{17f}
+
+/s/i,utf
+ \x{17f}
+
+/[^s]/i,ucp
+\= Expect no match
+ \x{17f}
+
+/[^s]/i,utf
+\= Expect no match
+ \x{17f}
+
+# ----------------------------------------------------
+
# End of testinput12
diff --git a/testdata/testinput14 b/testdata/testinput14
index f97f3ec..8a17ae7 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -1,9 +1,12 @@
-# These test special (mostly error) UTF features of DFA matching. They are a
-# selection of the more comprehensive tests that are run for non-DFA matching.
-# The output is different for the different widths.
+# These test special UTF and UCP features of DFA matching. The output is
+# different for the different widths.
#subject dfa
+# ----------------------------------------------------
+# These are a selection of the more comprehensive tests that are run for
+# non-DFA matching.
+
/X/utf
XX\x{d800}
XX\x{d800}\=offset=3
@@ -33,5 +36,46 @@
XX\xef\x80\=ph
\xf7\=ph
\xf7\x80\=ph
+
+# ----------------------------------------------------
+# UCP and casing tests - except for the first two, these will all fail in 8-bit
+# mode because they are testing UCP without UTF and use characters > 255.
+
+/\x{c1}/i,no_start_optimize
+\= Expect no match
+ \x{e1}
+
+/\x{c1}+\x{e1}/iB,ucp
+ \x{c1}\x{c1}\x{c1}
+ \x{e1}\x{e1}\x{e1}
+
+/\x{120}\x{c1}/i,ucp,no_start_optimize
+ \x{121}\x{e1}
+
+/\x{120}\x{c1}/i,ucp
+ \x{121}\x{e1}
+
+/[^\x{120}]/i,no_start_optimize
+ \x{121}
+
+/[^\x{120}]/i,ucp,no_start_optimize
+\= Expect no match
+ \x{121}
+
+/[^\x{120}]/i
+ \x{121}
+
+/[^\x{120}]/i,ucp
+\= Expect no match
+ \x{121}
+
+/\x{120}{2}/i,ucp
+ \x{121}\x{121}
+
+/[^\x{120}]{2}/i,ucp
+\= Expect no match
+ \x{121}\x{121}
+
+# ----------------------------------------------------
# End of testinput14
diff --git a/testdata/testinput2 b/testdata/testinput2
index 655e519..c816c5f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -324,16 +324,7 @@
\= Expect no match
fooabar
-# This one is here because Perl behaves differently; see also the following.
-
-/^(a\1?){4}$/I
-\= Expect no match
- aaaa
- aaaaaa
-
-# Perl does not fail these two for the final subjects. Neither did PCRE until
-# release 8.01. The problem is in backtracking into a subpattern that contains
-# a recursive reference to itself. PCRE has now made these into atomic patterns.
+# Perl does not fail these two for the final subjects.
/^(xa|=?\1a){2}$/
xa=xaa
@@ -4592,6 +4583,12 @@ B)x/alt_verbnames,mark
/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended
abcd
+/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended,substitute_literal
+ >>abcd<<
+
+/abcd/g,replace=\$1$2\,substitute_literal
+ XabcdYabcdZ
+
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
abcDE
@@ -4603,6 +4600,7 @@ B)x/alt_verbnames,mark
ac
ab\=replace=${1:+$1\:$1:$2}
ac\=replace=${1:+$1\:$1:$2}
+ >>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal
/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2}
ab
@@ -4642,6 +4640,13 @@ B)x/alt_verbnames,mark
/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1
aaBB
+
+/abcd/replace=wxyz,substitute_matched
+ abcd
+ pqrs
+
+/abcd/g
+ >abcd1234abcd5678<\=replace=wxyz,substitute_matched
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
@@ -5665,6 +5670,9 @@ a)"xI
/\A(*napla:.*\b(\w++))(?>.*?\b\1\b){3}/
word1 word3 word1 word2 word3 word2 word2 word1 word3 word4
+/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/
+ word1 word3 word1 word2 word3 word2 word2 word1 word3 word4
+
/(*plb:(.)..|(.)...)(\1|\2)/
abcdb\=offset=4
abcda\=offset=4
@@ -5673,6 +5681,10 @@ a)"xI
abcdb\=offset=4
abcda\=offset=4
+/(?<*(.)..|(.)...)(\1|\2)/
+ abcdb\=offset=4
+ abcda\=offset=4
+
/(*non_atomic_positive_lookahead:ab)/B
/(*non_atomic_positive_lookbehind:ab)/B
@@ -5772,4 +5784,84 @@ a)"xI
/(a)?a/I
manm
+/^(?|(\*)(*napla:\S*_(\2?+.+))|(\w)(?=\S*_(\2?+\1)))+_\2$/
+ *abc_12345abc
+
+/^(?|(\*)(*napla:\S*_(\3?+.+))|(\w)(?=\S*_((\2?+\1))))+_\2$/
+ *abc_12345abc
+
+/^((\1+)(?C)|\d)+133X$/
+ 111133X\=callout_capture
+
+/abc/replace=xyz,substitute_replacement_only
+ 123abc456
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z,substitute_replacement_only
+ "abcde-abcde-"
+
+/a(b)c|xyz/g,replace=<$0>,substitute_callout,substitute_replacement_only
+ abcdefabcpqr
+ abxyzpqrabcxyz
+ 12abc34xyz99abc55\=substitute_stop=2
+ 12abc34xyz99abc55\=substitute_skip=1
+ 12abc34xyz99abc55\=substitute_skip=2
+
+/a(..)d/replace=>$1<,substitute_matched
+ xyzabcdxyzabcdxyz
+ xyzabcdxyzabcdxyz\=ovector=2
+\= Expect error
+ xyzabcdxyzabcdxyz\=ovector=1
+
+/a(..)d/g,replace=>$1<,substitute_matched
+ xyzabcdxyzabcdxyz
+ xyzabcdxyzabcdxyz\=ovector=2
+\= Expect error
+ xyzabcdxyzabcdxyz\=ovector=1
+ xyzabcdxyzabcdxyz\=ovector=1,substitute_unset_empty
+
+/55|a(..)d/g,replace=>$1<,substitute_matched
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+\= Expect error
+ xyz55abcdxyzabcdxyz\=ovector=2
+
+/55|a(..)d/replace=>$1<,substitute_matched
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+
+/55|a(..)d/replace=>$1<
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+
+/55|a(..)d/g,replace=>$1<
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+
+# Expect non-fixed-length error
+
+"(?<=X(?(DEFINE)(.*))(?1))."
+
+/\sxxx\s/tables=1
+\= Expect no match
+ AB\x{85}xxx\x{a0}XYZ
+
+/\sxxx\s/tables=2
+ AB\x{85}xxx\x{a0}XYZ
+
+/^\w+/tables=2
+ École
+
+/^\w+/tables=3
+ École
+
+#loadtables ./testdata/testbtables
+
+/^\w+/tables=3
+ École
+
+/"(*MARK:>" 00 "<).."/hex,mark,no_start_optimize
+ AB
+ A\=ph
+\= Expect no match
+ A
+
+/"(*MARK:>" 00 "<).(?C1)."/hex,mark,no_start_optimize
+ AB
+
# End of testinput2
diff --git a/testdata/testinput4 b/testdata/testinput4
index 0871835..0bdac57 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -804,10 +804,10 @@
\x{4d00}
\x{4db4}
\x{4db5}
+ \x{4db6}
\= Expect no match
a
\x{2b0}
- \x{4db6}
/^\p{Lt}/utf
\x{1c5}
diff --git a/testdata/testinput5 b/testdata/testinput5
index d90b66d..50dfda1 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2081,7 +2081,6 @@
\x{655}
/^\p{Common}/utf
- \x{589}
\x{60c}
\x{61f}
\x{964}
@@ -2158,6 +2157,11 @@
/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
\x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
+# Some Unicode 13.0.0 new script characters
+
+/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
+ \x{10FB0}\x{11900}\x{18B00}\x{10E80}
+
# -------
# Test reference and errors in non-ASCII characters in group names
@@ -2179,4 +2183,9 @@
/\p{Any}*xyz/I
+/(|ß)7/caseless,ucp
+
+/(\xc1)\1/i,ucp
+ \xc1\xe1\=no_jit
+
# End of testinput5
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index ad2175b..470e412 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10112,4 +10112,78 @@ No match
1: a
2: c
+/^(a\1?){4}$/
+ aaaa
+ 0: aaaa
+ 1: a
+ aaaaaa
+ 0: aaaaaa
+ 1: aa
+
+/^((\1+)|\d)+133X$/
+ 111133X
+ 0: 111133X
+ 1: 11
+ 2: 11
+
+/^(?=.*(?=(([A-Z]).*(?(1)\1)))(?!.+\2)){26}/i
+ The quick brown fox jumps over the lazy dog.
+ 0:
+ 1: quick brown fox jumps over the lazy dog.
+ 2: q
+ Jackdaws love my big sphinx of quartz.
+ 0:
+ 1: Jackdaws love my big sphinx of quartz.
+ 2: J
+ Pack my box with five dozen liquor jugs.
+ 0:
+ 1: Pack my box with five dozen liquor jugs.
+ 2: P
+\= Expect no match
+ The quick brown fox jumps over the lazy cat.
+No match
+ Hackdaws love my big sphinx of quartz.
+No match
+ Pack my fox with five dozen liquor jugs.
+No match
+
+/^(?>.*?([A-Z])(?!.*\1)){26}/i
+ The quick brown fox jumps over the lazy dog.
+ 0: The quick brown fox jumps over the lazy dog
+ 1: g
+ Jackdaws love my big sphinx of quartz.
+ 0: Jackdaws love my big sphinx of quartz
+ 1: z
+ Pack my box with five dozen liquor jugs.
+ 0: Pack my box with five dozen liquor jugs
+ 1: s
+\= Expect no match
+ The quick brown fox jumps over the lazy cat.
+No match
+ Hackdaws love my big sphinx of quartz.
+No match
+ Pack my fox with five dozen liquor jugs.
+No match
+
+"(?<=X(?(DEFINE)(A)))X(*F)"
+\= Expect no match
+ AXYZ
+No match
+
+"(?<=X(?(DEFINE)(A)))."
+ AXYZ
+ 0: Y
+
+"(?<=X(?(DEFINE)(.*))Y)."
+ AXYZ
+ 0: Z
+
+"(?<=X(?(DEFINE)(Y))(?1))."
+ AXYZ
+ 0: Z
+
+"(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word"
+ word
+ 0: word
+
# End of testinput1
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 775c2ab..59af535 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1780,11 +1780,15 @@ Capture group count = 0
Options: utf
Starting code units: \xc3
Subject length lower bound = 1
+ abc\x{ff}def
+ 0: \x{ff}
/[\xff\x{ff}]/I
Capture group count = 0
-Starting code units: \xff
+First code unit = \xff
Subject length lower bound = 1
+ abc\x{ff}def
+ 0: \xff
/[Ss]/I
Capture group count = 0
@@ -1813,4 +1817,58 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
abc\x80\=startchar,offset=3
Error -36 (bad UTF-8 offset)
+/\x{c1}+\x{e1}/iIB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Last code unit = \xe1 (caseless)
+Subject length lower bound = 2
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+ \x{e1}\x{e1}\x{e1}
+ 0: \xe1\xe1\xe1
+
+/a|\x{c1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+Starting code units: A a \xc1 \xe1
+Subject length lower bound = 1
+ \x{e1}xxx
+ 0: \xe1
+
+/a|\x{c1}/iI,utf
+Capture group count = 0
+Options: caseless utf
+Starting code units: A a \xc3
+Subject length lower bound = 1
+ \x{e1}xxx
+ 0: \x{e1}
+
+/\x{c1}|\x{e1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Subject length lower bound = 1
+
+/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{e1}Y
+ 1: >\xc1<
+
+/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended
+ X\x{c1}Y
+ 1: >\xe1<
+
+# Without UTF or UCP characters > 127 have only one case in the default locale.
+
+/X(\x{e1})Y/replace=>\U$1<,substitute_extended
+ X\x{e1}Y
+ 1: >\xe1<
+
# End of testinput10
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 3006bc1..84c4858 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -533,7 +533,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
XX\x{110000}
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
XX\x{d800}\x{1234}
-Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
+Failed: error -25: UTF-16 error: invalid low surrogate at offset 2
\= Expect no match
XX\x{d800}\=offset=3
No match
@@ -1576,6 +1576,15 @@ No match
No match
A\x{110000}B
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
+
+/aa/utf,ucp,match_invalid_utf,global
+ aa\x{d800}aa
+ 0: aa
+ 0: aa
+
+/aa/utf,ucp,match_invalid_utf,global
+ \x{d800}aa
+ 0: aa
# ----------------------------------------------------
@@ -1613,7 +1622,7 @@ Subject length lower bound = 1
/[Ss]/I
Capture group count = 0
-Starting code units: S s
+First code unit = 'S' (caseless)
Subject length lower bound = 1
/[Ss]/I,utf
@@ -1628,4 +1637,148 @@ Options: utf
Starting code units: \xff
Subject length lower bound = 1
+# ----------------------------------------------------
+# UCP and casing tests
+
+/\x{120}/i,I
+Capture group count = 0
+Options: caseless
+First code unit = \x{120}
+Subject length lower bound = 1
+
+/\x{c1}/i,I,ucp
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Subject length lower bound = 1
+
+/[\x{120}\x{121}]/iB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{120}
+ Ket
+ End
+------------------------------------------------------------------
+
+/[ab\x{120}]+/iB,ucp
+------------------------------------------------------------------
+ Bra
+ [ABab\x{120}-\x{121}]++
+ Ket
+ End
+------------------------------------------------------------------
+ aABb\x{121}\x{120}
+ 0: aABb\x{121}\x{120}
+
+/\x{c1}/i,no_start_optimize
+\= Expect no match
+ \x{e1}
+No match
+
+/\x{120}\x{c1}/i,ucp,no_start_optimize
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/\x{120}\x{c1}/i,ucp
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/[^\x{120}]/i,no_start_optimize
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp,no_start_optimize
+\= Expect no match
+ \x{121}
+No match
+
+/[^\x{120}]/i
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp
+\= Expect no match
+ \x{121}
+No match
+
+/\x{120}{2}/i,ucp
+ \x{121}\x{121}
+ 0: \x{121}\x{121}
+
+/[^\x{120}]{2}/i,ucp
+\= Expect no match
+ \x{121}\x{121}
+No match
+
+/\x{c1}+\x{e1}/iB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+
+/\x{c1}+\x{e1}/iIB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Last code unit = \xe1 (caseless)
+Subject length lower bound = 2
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+ \x{e1}\x{e1}\x{e1}
+ 0: \xe1\xe1\xe1
+
+/a|\x{c1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+Starting code units: A a \xc1 \xe1
+Subject length lower bound = 1
+ \x{e1}xxx
+ 0: \xe1
+
+/\x{c1}|\x{e1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Subject length lower bound = 1
+
+/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{e1}Y
+ 1: >\xc1<
+
+/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{121}Y
+ 1: >\x{120}<
+
+/s/i,ucp
+ \x{17f}
+ 0: \x{17f}
+
+/s/i,utf
+ \x{17f}
+ 0: \x{17f}
+
+/[^s]/i,ucp
+\= Expect no match
+ \x{17f}
+No match
+
+/[^s]/i,utf
+\= Expect no match
+ \x{17f}
+No match
+
+# ----------------------------------------------------
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index ad240e2..03b6e39 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1574,6 +1574,15 @@ No match
No match
A\x{110000}B
No match
+
+/aa/utf,ucp,match_invalid_utf,global
+ aa\x{d800}aa
+ 0: aa
+ 0: aa
+
+/aa/utf,ucp,match_invalid_utf,global
+ \x{d800}aa
+ 0: aa
# ----------------------------------------------------
@@ -1611,7 +1620,7 @@ Subject length lower bound = 1
/[Ss]/I
Capture group count = 0
-Starting code units: S s
+First code unit = 'S' (caseless)
Subject length lower bound = 1
/[Ss]/I,utf
@@ -1626,4 +1635,148 @@ Options: utf
Starting code units: \xff
Subject length lower bound = 1
+# ----------------------------------------------------
+# UCP and casing tests
+
+/\x{120}/i,I
+Capture group count = 0
+Options: caseless
+First code unit = \x{120}
+Subject length lower bound = 1
+
+/\x{c1}/i,I,ucp
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Subject length lower bound = 1
+
+/[\x{120}\x{121}]/iB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{120}
+ Ket
+ End
+------------------------------------------------------------------
+
+/[ab\x{120}]+/iB,ucp
+------------------------------------------------------------------
+ Bra
+ [ABab\x{120}-\x{121}]++
+ Ket
+ End
+------------------------------------------------------------------
+ aABb\x{121}\x{120}
+ 0: aABb\x{121}\x{120}
+
+/\x{c1}/i,no_start_optimize
+\= Expect no match
+ \x{e1}
+No match
+
+/\x{120}\x{c1}/i,ucp,no_start_optimize
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/\x{120}\x{c1}/i,ucp
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/[^\x{120}]/i,no_start_optimize
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp,no_start_optimize
+\= Expect no match
+ \x{121}
+No match
+
+/[^\x{120}]/i
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp
+\= Expect no match
+ \x{121}
+No match
+
+/\x{120}{2}/i,ucp
+ \x{121}\x{121}
+ 0: \x{121}\x{121}
+
+/[^\x{120}]{2}/i,ucp
+\= Expect no match
+ \x{121}\x{121}
+No match
+
+/\x{c1}+\x{e1}/iB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+
+/\x{c1}+\x{e1}/iIB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Last code unit = \xe1 (caseless)
+Subject length lower bound = 2
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+ \x{e1}\x{e1}\x{e1}
+ 0: \xe1\xe1\xe1
+
+/a|\x{c1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+Starting code units: A a \xc1 \xe1
+Subject length lower bound = 1
+ \x{e1}xxx
+ 0: \xe1
+
+/\x{c1}|\x{e1}/iI,ucp
+Capture group count = 0
+Options: caseless ucp
+First code unit = \xc1 (caseless)
+Subject length lower bound = 1
+
+/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{e1}Y
+ 1: >\xc1<
+
+/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
+ X\x{121}Y
+ 1: >\x{120}<
+
+/s/i,ucp
+ \x{17f}
+ 0: \x{17f}
+
+/s/i,utf
+ \x{17f}
+ 0: \x{17f}
+
+/[^s]/i,ucp
+\= Expect no match
+ \x{17f}
+No match
+
+/[^s]/i,utf
+\= Expect no match
+ \x{17f}
+No match
+
+# ----------------------------------------------------
+
# End of testinput12
diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16
index 05b7d48..61541f6 100644
--- a/testdata/testoutput14-16
+++ b/testdata/testoutput14-16
@@ -1,9 +1,12 @@
-# These test special (mostly error) UTF features of DFA matching. They are a
-# selection of the more comprehensive tests that are run for non-DFA matching.
-# The output is different for the different widths.
+# These test special UTF and UCP features of DFA matching. The output is
+# different for the different widths.
#subject dfa
+# ----------------------------------------------------
+# These are a selection of the more comprehensive tests that are run for
+# non-DFA matching.
+
/X/utf
XX\x{d800}
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
@@ -30,7 +33,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
XX\x{110000}
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
XX\x{d800}\x{1234}
-Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
+Failed: error -25: UTF-16 error: invalid low surrogate at offset 2
/badutf/utf
X\xdf
@@ -57,5 +60,66 @@ No match
No match
\xf7\x80\=ph
No match
+
+# ----------------------------------------------------
+# UCP and casing tests - except for the first two, these will all fail in 8-bit
+# mode because they are testing UCP without UTF and use characters > 255.
+
+/\x{c1}/i,no_start_optimize
+\= Expect no match
+ \x{e1}
+No match
+
+/\x{c1}+\x{e1}/iB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+ 1: \xc1\xc1
+ \x{e1}\x{e1}\x{e1}
+ 0: \xe1\xe1\xe1
+ 1: \xe1\xe1
+
+/\x{120}\x{c1}/i,ucp,no_start_optimize
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/\x{120}\x{c1}/i,ucp
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/[^\x{120}]/i,no_start_optimize
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp,no_start_optimize
+\= Expect no match
+ \x{121}
+No match
+
+/[^\x{120}]/i
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp
+\= Expect no match
+ \x{121}
+No match
+
+/\x{120}{2}/i,ucp
+ \x{121}\x{121}
+ 0: \x{121}\x{121}
+
+/[^\x{120}]{2}/i,ucp
+\= Expect no match
+ \x{121}\x{121}
+No match
+
+# ----------------------------------------------------
# End of testinput14
diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32
index 30d7fa6..f1f65b7 100644
--- a/testdata/testoutput14-32
+++ b/testdata/testoutput14-32
@@ -1,9 +1,12 @@
-# These test special (mostly error) UTF features of DFA matching. They are a
-# selection of the more comprehensive tests that are run for non-DFA matching.
-# The output is different for the different widths.
+# These test special UTF and UCP features of DFA matching. The output is
+# different for the different widths.
#subject dfa
+# ----------------------------------------------------
+# These are a selection of the more comprehensive tests that are run for
+# non-DFA matching.
+
/X/utf
XX\x{d800}
Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2
@@ -57,5 +60,66 @@ No match
No match
\xf7\x80\=ph
No match
+
+# ----------------------------------------------------
+# UCP and casing tests - except for the first two, these will all fail in 8-bit
+# mode because they are testing UCP without UTF and use characters > 255.
+
+/\x{c1}/i,no_start_optimize
+\= Expect no match
+ \x{e1}
+No match
+
+/\x{c1}+\x{e1}/iB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+ 1: \xc1\xc1
+ \x{e1}\x{e1}\x{e1}
+ 0: \xe1\xe1\xe1
+ 1: \xe1\xe1
+
+/\x{120}\x{c1}/i,ucp,no_start_optimize
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/\x{120}\x{c1}/i,ucp
+ \x{121}\x{e1}
+ 0: \x{121}\xe1
+
+/[^\x{120}]/i,no_start_optimize
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp,no_start_optimize
+\= Expect no match
+ \x{121}
+No match
+
+/[^\x{120}]/i
+ \x{121}
+ 0: \x{121}
+
+/[^\x{120}]/i,ucp
+\= Expect no match
+ \x{121}
+No match
+
+/\x{120}{2}/i,ucp
+ \x{121}\x{121}
+ 0: \x{121}\x{121}
+
+/[^\x{120}]{2}/i,ucp
+\= Expect no match
+ \x{121}\x{121}
+No match
+
+# ----------------------------------------------------
# End of testinput14
diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8
index 1fb0dc1..aa62414 100644
--- a/testdata/testoutput14-8
+++ b/testdata/testoutput14-8
@@ -1,9 +1,12 @@
-# These test special (mostly error) UTF features of DFA matching. They are a
-# selection of the more comprehensive tests that are run for non-DFA matching.
-# The output is different for the different widths.
+# These test special UTF and UCP features of DFA matching. The output is
+# different for the different widths.
#subject dfa
+# ----------------------------------------------------
+# These are a selection of the more comprehensive tests that are run for
+# non-DFA matching.
+
/X/utf
XX\x{d800}
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
@@ -57,5 +60,66 @@ Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
\xf7\x80\=ph
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
+
+# ----------------------------------------------------
+# UCP and casing tests - except for the first two, these will all fail in 8-bit
+# mode because they are testing UCP without UTF and use characters > 255.
+
+/\x{c1}/i,no_start_optimize
+\= Expect no match
+ \x{e1}
+No match
+
+/\x{c1}+\x{e1}/iB,ucp
+------------------------------------------------------------------
+ Bra
+ /i \x{c1}+
+ /i \x{e1}
+ Ket
+ End
+------------------------------------------------------------------
+ \x{c1}\x{c1}\x{c1}
+ 0: \xc1\xc1\xc1
+ 1: \xc1\xc1
+ \x{e1}\x{e1}\x{e1}
+ 0: \xe1\xe1\xe1
+ 1: \xe1\xe1
+
+/\x{120}\x{c1}/i,ucp,no_start_optimize
+Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
+ \x{121}\x{e1}
+
+/\x{120}\x{c1}/i,ucp
+Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
+ \x{121}\x{e1}
+
+/[^\x{120}]/i,no_start_optimize
+Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
+ \x{121}
+
+/[^\x{120}]/i,ucp,no_start_optimize
+Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{121}
+
+/[^\x{120}]/i
+Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
+ \x{121}
+
+/[^\x{120}]/i,ucp
+Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{121}
+
+/\x{120}{2}/i,ucp
+Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
+ \x{121}\x{121}
+
+/[^\x{120}]{2}/i,ucp
+Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{121}\x{121}
+
+# ----------------------------------------------------
# End of testinput14
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index c733c12..c90efef 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -809,24 +809,7 @@ Subject length lower bound = 3
fooabar
No match
-# This one is here because Perl behaves differently; see also the following.
-
-/^(a\1?){4}$/I
-Capture group count = 1
-Max back reference = 1
-Compile options: <none>
-Overall options: anchored
-First code unit = 'a'
-Subject length lower bound = 4
-\= Expect no match
- aaaa
-No match
- aaaaaa
-No match
-
-# Perl does not fail these two for the final subjects. Neither did PCRE until
-# release 8.01. The problem is in backtracking into a subpattern that contains
-# a recursive reference to itself. PCRE has now made these into atomic patterns.
+# Perl does not fail these two for the final subjects.
/^(xa|=?\1a){2}$/
xa=xaa
@@ -10060,7 +10043,6 @@ No match
------------------------------------------------------------------
Bra
^
- Once
CBra 1
ab
CBra 2
@@ -10071,8 +10053,6 @@ No match
Alt
x
Ket
- Ket
- Once
CBra 1
ab
CBra 2
@@ -10083,7 +10063,6 @@ No match
Alt
x
Ket
- Ket
$
Ket
End
@@ -10479,27 +10458,23 @@ Failed: error 168 at offset 3: \c must be followed by a printable ASCII characte
/(?P<abn>(?P=abn)xxx)/B
------------------------------------------------------------------
Bra
- Once
CBra 1
\1
xxx
Ket
Ket
- Ket
End
------------------------------------------------------------------
/(a\1z)/B
------------------------------------------------------------------
Bra
- Once
CBra 1
a
\1
z
Ket
Ket
- Ket
End
------------------------------------------------------------------
@@ -10987,6 +10962,12 @@ Matched, but too many substrings
Assert
abc
Ket
+ Assert
+ abc
+ Ket
+ Assert
+ abc
+ Ket
abc
Ket
End
@@ -10998,6 +10979,10 @@ Matched, but too many substrings
Assert
abc
Ket
+ Brazero
+ Assert
+ abc
+ Ket
abc
Ket
End
@@ -11006,9 +10991,15 @@ Matched, but too many substrings
/(?=abc)++abc/B
------------------------------------------------------------------
Bra
+ Once
Assert
abc
Ket
+ Brazero
+ Assert
+ abc
+ Ket
+ Ket
abc
Ket
End
@@ -11299,27 +11290,23 @@ No match
/(?P<abn>(?P=abn)xxx)/B
------------------------------------------------------------------
Bra
- Once
CBra 1
\1
xxx
Ket
Ket
- Ket
End
------------------------------------------------------------------
/(a\1z)/B
------------------------------------------------------------------
Bra
- Once
CBra 1
a
\1
z
Ket
Ket
- Ket
End
------------------------------------------------------------------
@@ -13319,7 +13306,6 @@ Failed: error 144 at offset 5: subpattern name must start with a non-digit
Bra
Brazero
SCBra 1
- Once
CBra 2
CBra 3
a
@@ -13331,7 +13317,6 @@ Failed: error 144 at offset 5: subpattern name must start with a non-digit
Ket
Recurse
Ket
- Ket
KetRmax
a?+
Ket
@@ -13999,7 +13984,6 @@ Matched, but too many substrings
/((?+1)(\1))/B
------------------------------------------------------------------
Bra
- Once
CBra 1
Recurse
CBra 2
@@ -14007,7 +13991,6 @@ Matched, but too many substrings
Ket
Ket
Ket
- Ket
End
------------------------------------------------------------------
@@ -14425,7 +14408,6 @@ Subject length lower bound = 1
------------------------------------------------------------------
Bra
Any
- Once
CBra 1
Recurse
Recurse
@@ -14434,7 +14416,6 @@ Subject length lower bound = 1
Alt
$
Ket
- Ket
CBra 2
Ket
Ket
@@ -14445,7 +14426,6 @@ Subject length lower bound = 1
------------------------------------------------------------------
Bra
Any
- Once
CBra 1
Recurse
Recurse
@@ -14457,7 +14437,6 @@ Subject length lower bound = 1
Alt
$
Ket
- Ket
CBra 3
Ket
Ket
@@ -14815,6 +14794,14 @@ No match
abcd
1: w\x0dx\x82y\xdbz(12\$34$$\x345$)
+/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended,substitute_literal
+ >>abcd<<
+ 1: >>w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$)<<
+
+/abcd/g,replace=\$1$2\,substitute_literal
+ XabcdYabcdZ
+ 2: X\$1$2\Y\$1$2\Z
+
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
abcDE
1: aBcBCbcdEdeabAByzDone
@@ -14832,6 +14819,8 @@ Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement
1: b:b
ac\=replace=${1:+$1\:$1:$2}
1: c
+ >>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal
+ 1: >>${1:+$1\:$1:$2}<<
/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2}
ab
@@ -14886,6 +14875,16 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1
aaBB
1: AAbbaa..AAbBaa
+
+/abcd/replace=wxyz,substitute_matched
+ abcd
+ 1: wxyz
+ pqrs
+ 0: pqrs
+
+/abcd/g
+ >abcd1234abcd5678<\=replace=wxyz,substitute_matched
+ 2: >wxyz1234wxyz5678<
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
Capture group count = 2
@@ -16627,6 +16626,19 @@ No match
Assert
Any
Ket
+ Assert
+ Any
+ Ket
+ Assert
+ Any
+ Ket
+ Assert
+ Any
+ Ket
+ Brazero
+ Assert
+ Any
+ Ket
x
Ket
Ket
@@ -17105,6 +17117,11 @@ No match
0: word1 word3 word1 word2 word3 word2 word2 word1 word3
1: word3
+/\A(?*.*\b(\w++))(?>.*?\b\1\b){3}/
+ word1 word3 word1 word2 word3 word2 word2 word1 word3 word4
+ 0: word1 word3 word1 word2 word3 word2 word2 word1 word3
+ 1: word3
+
/(*plb:(.)..|(.)...)(\1|\2)/
abcdb\=offset=4
0: b
@@ -17126,6 +17143,18 @@ No match
2: a
3: a
+/(?<*(.)..|(.)...)(\1|\2)/
+ abcdb\=offset=4
+ 0: b
+ 1: b
+ 2: <unset>
+ 3: b
+ abcda\=offset=4
+ 0: a
+ 1: <unset>
+ 2: a
+ 3: a
+
/(*non_atomic_positive_lookahead:ab)/B
------------------------------------------------------------------
Bra
@@ -17435,6 +17464,163 @@ Subject length lower bound = 1
manm
0: a
+/^(?|(\*)(*napla:\S*_(\2?+.+))|(\w)(?=\S*_(\2?+\1)))+_\2$/
+ *abc_12345abc
+ 0: *abc_12345abc
+ 1: c
+ 2: 12345abc
+
+/^(?|(\*)(*napla:\S*_(\3?+.+))|(\w)(?=\S*_((\2?+\1))))+_\2$/
+ *abc_12345abc
+ 0: *abc_12345abc
+ 1: c
+ 2: 12345abc
+ 3: 12345abc
+
+/^((\1+)(?C)|\d)+133X$/
+ 111133X\=callout_capture
+Callout 0: last capture = 2
+ 1: 1
+ 2: 111
+--->111133X
+ ^ ^ |
+Callout 0: last capture = 2
+ 1: 3
+ 2: 3
+--->111133X
+ ^ ^ |
+Callout 0: last capture = 2
+ 1: 1
+ 2: 11
+--->111133X
+ ^ ^ |
+Callout 0: last capture = 2
+ 1: 3
+ 2: 3
+--->111133X
+ ^ ^ |
+ 0: 111133X
+ 1: 11
+ 2: 11
+
+/abc/replace=xyz,substitute_replacement_only
+ 123abc456
+ 1: xyz
+
+/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z,substitute_replacement_only
+ "abcde-abcde-"
+ 2: Xb+dZXb+dZ
+
+/a(b)c|xyz/g,replace=<$0>,substitute_callout,substitute_replacement_only
+ abcdefabcpqr
+ 1(2) Old 0 3 "abc" New 0 5 "<abc>"
+ 2(2) Old 6 9 "abc" New 5 10 "<abc>"
+ 2: <abc><abc>
+ abxyzpqrabcxyz
+ 1(1) Old 2 5 "xyz" New 0 5 "<xyz>"
+ 2(2) Old 8 11 "abc" New 5 10 "<abc>"
+ 3(1) Old 11 14 "xyz" New 10 15 "<xyz>"
+ 3: <xyz><abc><xyz>
+ 12abc34xyz99abc55\=substitute_stop=2
+ 1(2) Old 2 5 "abc" New 0 5 "<abc>"
+ 2(1) Old 7 10 "xyz" New 5 10 "<xyz> STOPPED"
+ 2: <abc>
+ 12abc34xyz99abc55\=substitute_skip=1
+ 1(2) Old 2 5 "abc" New 0 5 "<abc> SKIPPED"
+ 2(1) Old 7 10 "xyz" New 0 5 "<xyz>"
+ 3(2) Old 12 15 "abc" New 5 10 "<abc>"
+ 3: <xyz><abc>
+ 12abc34xyz99abc55\=substitute_skip=2
+ 1(2) Old 2 5 "abc" New 0 5 "<abc>"
+ 2(1) Old 7 10 "xyz" New 5 10 "<xyz> SKIPPED"
+ 3(2) Old 12 15 "abc" New 5 10 "<abc>"
+ 3: <abc><abc>
+
+/a(..)d/replace=>$1<,substitute_matched
+ xyzabcdxyzabcdxyz
+ 1: xyz>bc<xyzabcdxyz
+ xyzabcdxyzabcdxyz\=ovector=2
+ 1: xyz>bc<xyzabcdxyz
+\= Expect error
+ xyzabcdxyzabcdxyz\=ovector=1
+Failed: error -54 at offset 3 in replacement: requested value is not available
+
+/a(..)d/g,replace=>$1<,substitute_matched
+ xyzabcdxyzabcdxyz
+ 2: xyz>bc<xyz>bc<xyz
+ xyzabcdxyzabcdxyz\=ovector=2
+ 2: xyz>bc<xyz>bc<xyz
+\= Expect error
+ xyzabcdxyzabcdxyz\=ovector=1
+Failed: error -54 at offset 3 in replacement: requested value is not available
+ xyzabcdxyzabcdxyz\=ovector=1,substitute_unset_empty
+Failed: error -54 at offset 3 in replacement: requested value is not available
+
+/55|a(..)d/g,replace=>$1<,substitute_matched
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+ 3: xyz><>bc<xyz>bc<xyz
+\= Expect error
+ xyz55abcdxyzabcdxyz\=ovector=2
+Failed: error -55 at offset 3 in replacement: requested value is not set
+
+/55|a(..)d/replace=>$1<,substitute_matched
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+ 1: xyz><abcdxyzabcdxyz
+
+/55|a(..)d/replace=>$1<
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+ 1: xyz><abcdxyzabcdxyz
+
+/55|a(..)d/g,replace=>$1<
+ xyz55abcdxyzabcdxyz\=ovector=2,substitute_unset_empty
+ 3: xyz><>bc<xyz>bc<xyz
+
+# Expect non-fixed-length error
+
+"(?<=X(?(DEFINE)(.*))(?1))."
+Failed: error 125 at offset 0: lookbehind assertion is not fixed length
+
+/\sxxx\s/tables=1
+\= Expect no match
+ AB\x{85}xxx\x{a0}XYZ
+No match
+
+/\sxxx\s/tables=2
+ AB\x{85}xxx\x{a0}XYZ
+ 0: \x85xxx\xa0
+
+/^\w+/tables=2
+ École
+ 0: \xc3
+
+/^\w+/tables=3
+** 'Tables = 3' is invalid: binary tables have not been loaded
+ École
+
+#loadtables ./testdata/testbtables
+
+/^\w+/tables=3
+ École
+ 0: \xc3
+
+/"(*MARK:>" 00 "<).."/hex,mark,no_start_optimize
+ AB
+ 0: AB
+MK: >\x00<
+ A\=ph
+Partial match, mark=>\x00<: A
+\= Expect no match
+ A
+No match, mark = >\x00<
+
+/"(*MARK:>" 00 "<).(?C1)."/hex,mark,no_start_optimize
+ AB
+--->AB
+ 1 ^^ .
+Latest Mark: >\x00<
+ 0: AB
+MK: >\x00<
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 2c8037b..245a036 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -1342,13 +1342,13 @@ No match
0: \x{4db4}
\x{4db5}
0: \x{4db5}
+ \x{4db6}
+ 0: \x{4db6}
\= Expect no match
a
No match
\x{2b0}
No match
- \x{4db6}
-No match
/^\p{Lt}/utf
\x{1c5}
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index ecf01fb..c2f8c3d 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -4736,8 +4736,6 @@ No match
No match
/^\p{Common}/utf
- \x{589}
- 0: \x{589}
\x{60c}
0: \x{60c}
\x{61f}
@@ -4900,6 +4898,12 @@ MK: ABC
\x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
0: \x{10fe5}\x{119ac}\x{1e10e}\x{1e2d1}
+# Some Unicode 13.0.0 new script characters
+
+/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
+ \x{10FB0}\x{11900}\x{18B00}\x{10E80}
+ 0: \x{10fb0}\x{11900}\x{18b00}\x{10e80}
+
# -------
# Test reference and errors in non-ASCII characters in group names
@@ -4938,4 +4942,11 @@ Overall options: anchored
Last code unit = 'z'
Subject length lower bound = 3
+/(|ß)7/caseless,ucp
+
+/(\xc1)\1/i,ucp
+ \xc1\xe1\=no_jit
+ 0: \xc1\xe1
+ 1: \xc1
+
# End of testinput5
diff --git a/testdata/testoutput8-16-2 b/testdata/testoutput8-16-2
index ff3474b..569a860 100644
--- a/testdata/testoutput8-16-2
+++ b/testdata/testoutput8-16-2
@@ -720,41 +720,37 @@ Memory allocation (code space): 14
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 39 Bra
+ 0 35 Bra
2 Brazero
- 3 32 SCBra 1
- 6 27 Once
- 8 12 CBra 2
- 11 7 CBra 3
- 14 a
- 16 \2
- 18 7 Ket
- 20 11 Alt
- 22 5 CBra 4
- 25 a*
- 27 5 Ket
- 29 22 Recurse
- 31 23 Ket
- 33 27 Ket
- 35 32 KetRmax
- 37 a?+
- 39 39 Ket
- 41 End
+ 3 28 SCBra 1
+ 6 12 CBra 2
+ 9 7 CBra 3
+ 12 a
+ 14 \2
+ 16 7 Ket
+ 18 11 Alt
+ 20 5 CBra 4
+ 23 a*
+ 25 5 Ket
+ 27 20 Recurse
+ 29 23 Ket
+ 31 28 KetRmax
+ 33 a?+
+ 35 35 Ket
+ 37 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 20 Bra
- 2 16 Once
- 4 12 CBra 1
- 7 9 Recurse
- 9 5 CBra 2
- 12 \1
- 14 5 Ket
- 16 12 Ket
- 18 16 Ket
- 20 20 Ket
- 22 End
+ 0 16 Bra
+ 2 12 CBra 1
+ 5 7 Recurse
+ 7 5 CBra 2
+ 10 \1
+ 12 5 Ket
+ 14 12 Ket
+ 16 16 Ket
+ 18 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 14
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 28 Bra
+ 0 24 Bra
2 Any
- 3 18 Once
- 5 7 CBra 1
- 8 23 Recurse
- 10 0 Recurse
- 12 4 Alt
- 14 \1
- 16 3 Alt
- 18 $
- 19 14 Ket
- 21 18 Ket
- 23 3 CBra 2
- 26 3 Ket
- 28 28 Ket
- 30 End
+ 3 7 CBra 1
+ 6 19 Recurse
+ 8 0 Recurse
+ 10 4 Alt
+ 12 \1
+ 14 3 Alt
+ 16 $
+ 17 14 Ket
+ 19 3 CBra 2
+ 22 3 Ket
+ 24 24 Ket
+ 26 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 35 Bra
+ 0 31 Bra
2 Any
- 3 25 Once
- 5 14 CBra 1
- 8 30 Recurse
- 10 0 Recurse
- 12 3 CBra 2
- 15 3 Ket
- 17 12 Recurse
- 19 4 Alt
- 21 \1
- 23 3 Alt
- 25 $
- 26 21 Ket
- 28 25 Ket
- 30 3 CBra 3
- 33 3 Ket
- 35 35 Ket
- 37 End
+ 3 14 CBra 1
+ 6 26 Recurse
+ 8 0 Recurse
+ 10 3 CBra 2
+ 13 3 Ket
+ 15 10 Recurse
+ 17 4 Alt
+ 19 \1
+ 21 3 Alt
+ 23 $
+ 24 21 Ket
+ 26 3 CBra 3
+ 29 3 Ket
+ 31 31 Ket
+ 33 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-16-3 b/testdata/testoutput8-16-3
index dab9fa8..80ee1c9 100644
--- a/testdata/testoutput8-16-3
+++ b/testdata/testoutput8-16-3
@@ -720,41 +720,37 @@ Memory allocation (code space): 18
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 52 Bra
+ 0 46 Bra
3 Brazero
- 4 43 SCBra 1
- 8 36 Once
- 11 15 CBra 2
- 15 8 CBra 3
- 19 a
- 21 \2
- 23 8 Ket
- 26 15 Alt
- 29 6 CBra 4
- 33 a*
- 35 6 Ket
- 38 29 Recurse
- 41 30 Ket
- 44 36 Ket
- 47 43 KetRmax
- 50 a?+
- 52 52 Ket
- 55 End
+ 4 37 SCBra 1
+ 8 15 CBra 2
+ 12 8 CBra 3
+ 16 a
+ 18 \2
+ 20 8 Ket
+ 23 15 Alt
+ 26 6 CBra 4
+ 30 a*
+ 32 6 Ket
+ 35 26 Recurse
+ 38 30 Ket
+ 41 37 KetRmax
+ 44 a?+
+ 46 46 Ket
+ 49 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 28 Bra
- 3 22 Once
- 6 16 CBra 1
- 10 13 Recurse
- 13 6 CBra 2
- 17 \1
- 19 6 Ket
- 22 16 Ket
- 25 22 Ket
- 28 28 Ket
- 31 End
+ 0 22 Bra
+ 3 16 CBra 1
+ 7 10 Recurse
+ 10 6 CBra 2
+ 14 \1
+ 16 6 Ket
+ 19 16 Ket
+ 22 22 Ket
+ 25 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 18
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 39 Bra
+ 0 33 Bra
3 Any
- 4 25 Once
- 7 10 CBra 1
- 11 32 Recurse
- 14 0 Recurse
- 17 5 Alt
- 20 \1
- 22 4 Alt
- 25 $
- 26 19 Ket
- 29 25 Ket
- 32 4 CBra 2
- 36 4 Ket
- 39 39 Ket
- 42 End
+ 4 10 CBra 1
+ 8 26 Recurse
+ 11 0 Recurse
+ 14 5 Alt
+ 17 \1
+ 19 4 Alt
+ 22 $
+ 23 19 Ket
+ 26 4 CBra 2
+ 30 4 Ket
+ 33 33 Ket
+ 36 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 49 Bra
+ 0 43 Bra
3 Any
- 4 35 Once
- 7 20 CBra 1
- 11 42 Recurse
- 14 0 Recurse
- 17 4 CBra 2
- 21 4 Ket
- 24 17 Recurse
- 27 5 Alt
- 30 \1
- 32 4 Alt
- 35 $
- 36 29 Ket
- 39 35 Ket
- 42 4 CBra 3
- 46 4 Ket
- 49 49 Ket
- 52 End
+ 4 20 CBra 1
+ 8 36 Recurse
+ 11 0 Recurse
+ 14 4 CBra 2
+ 18 4 Ket
+ 21 14 Recurse
+ 24 5 Alt
+ 27 \1
+ 29 4 Alt
+ 32 $
+ 33 29 Ket
+ 36 4 CBra 3
+ 40 4 Ket
+ 43 43 Ket
+ 46 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-16-4 b/testdata/testoutput8-16-4
index dab9fa8..80ee1c9 100644
--- a/testdata/testoutput8-16-4
+++ b/testdata/testoutput8-16-4
@@ -720,41 +720,37 @@ Memory allocation (code space): 18
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 52 Bra
+ 0 46 Bra
3 Brazero
- 4 43 SCBra 1
- 8 36 Once
- 11 15 CBra 2
- 15 8 CBra 3
- 19 a
- 21 \2
- 23 8 Ket
- 26 15 Alt
- 29 6 CBra 4
- 33 a*
- 35 6 Ket
- 38 29 Recurse
- 41 30 Ket
- 44 36 Ket
- 47 43 KetRmax
- 50 a?+
- 52 52 Ket
- 55 End
+ 4 37 SCBra 1
+ 8 15 CBra 2
+ 12 8 CBra 3
+ 16 a
+ 18 \2
+ 20 8 Ket
+ 23 15 Alt
+ 26 6 CBra 4
+ 30 a*
+ 32 6 Ket
+ 35 26 Recurse
+ 38 30 Ket
+ 41 37 KetRmax
+ 44 a?+
+ 46 46 Ket
+ 49 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 28 Bra
- 3 22 Once
- 6 16 CBra 1
- 10 13 Recurse
- 13 6 CBra 2
- 17 \1
- 19 6 Ket
- 22 16 Ket
- 25 22 Ket
- 28 28 Ket
- 31 End
+ 0 22 Bra
+ 3 16 CBra 1
+ 7 10 Recurse
+ 10 6 CBra 2
+ 14 \1
+ 16 6 Ket
+ 19 16 Ket
+ 22 22 Ket
+ 25 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 18
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 39 Bra
+ 0 33 Bra
3 Any
- 4 25 Once
- 7 10 CBra 1
- 11 32 Recurse
- 14 0 Recurse
- 17 5 Alt
- 20 \1
- 22 4 Alt
- 25 $
- 26 19 Ket
- 29 25 Ket
- 32 4 CBra 2
- 36 4 Ket
- 39 39 Ket
- 42 End
+ 4 10 CBra 1
+ 8 26 Recurse
+ 11 0 Recurse
+ 14 5 Alt
+ 17 \1
+ 19 4 Alt
+ 22 $
+ 23 19 Ket
+ 26 4 CBra 2
+ 30 4 Ket
+ 33 33 Ket
+ 36 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 49 Bra
+ 0 43 Bra
3 Any
- 4 35 Once
- 7 20 CBra 1
- 11 42 Recurse
- 14 0 Recurse
- 17 4 CBra 2
- 21 4 Ket
- 24 17 Recurse
- 27 5 Alt
- 30 \1
- 32 4 Alt
- 35 $
- 36 29 Ket
- 39 35 Ket
- 42 4 CBra 3
- 46 4 Ket
- 49 49 Ket
- 52 End
+ 4 20 CBra 1
+ 8 36 Recurse
+ 11 0 Recurse
+ 14 4 CBra 2
+ 18 4 Ket
+ 21 14 Recurse
+ 24 5 Alt
+ 27 \1
+ 29 4 Alt
+ 32 $
+ 33 29 Ket
+ 36 4 CBra 3
+ 40 4 Ket
+ 43 43 Ket
+ 46 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-32-2 b/testdata/testoutput8-32-2
index 7d1c931..91d96c9 100644
--- a/testdata/testoutput8-32-2
+++ b/testdata/testoutput8-32-2
@@ -720,41 +720,37 @@ Memory allocation (code space): 28
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 39 Bra
+ 0 35 Bra
2 Brazero
- 3 32 SCBra 1
- 6 27 Once
- 8 12 CBra 2
- 11 7 CBra 3
- 14 a
- 16 \2
- 18 7 Ket
- 20 11 Alt
- 22 5 CBra 4
- 25 a*
- 27 5 Ket
- 29 22 Recurse
- 31 23 Ket
- 33 27 Ket
- 35 32 KetRmax
- 37 a?+
- 39 39 Ket
- 41 End
+ 3 28 SCBra 1
+ 6 12 CBra 2
+ 9 7 CBra 3
+ 12 a
+ 14 \2
+ 16 7 Ket
+ 18 11 Alt
+ 20 5 CBra 4
+ 23 a*
+ 25 5 Ket
+ 27 20 Recurse
+ 29 23 Ket
+ 31 28 KetRmax
+ 33 a?+
+ 35 35 Ket
+ 37 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 20 Bra
- 2 16 Once
- 4 12 CBra 1
- 7 9 Recurse
- 9 5 CBra 2
- 12 \1
- 14 5 Ket
- 16 12 Ket
- 18 16 Ket
- 20 20 Ket
- 22 End
+ 0 16 Bra
+ 2 12 CBra 1
+ 5 7 Recurse
+ 7 5 CBra 2
+ 10 \1
+ 12 5 Ket
+ 14 12 Ket
+ 16 16 Ket
+ 18 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 28
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 28 Bra
+ 0 24 Bra
2 Any
- 3 18 Once
- 5 7 CBra 1
- 8 23 Recurse
- 10 0 Recurse
- 12 4 Alt
- 14 \1
- 16 3 Alt
- 18 $
- 19 14 Ket
- 21 18 Ket
- 23 3 CBra 2
- 26 3 Ket
- 28 28 Ket
- 30 End
+ 3 7 CBra 1
+ 6 19 Recurse
+ 8 0 Recurse
+ 10 4 Alt
+ 12 \1
+ 14 3 Alt
+ 16 $
+ 17 14 Ket
+ 19 3 CBra 2
+ 22 3 Ket
+ 24 24 Ket
+ 26 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 35 Bra
+ 0 31 Bra
2 Any
- 3 25 Once
- 5 14 CBra 1
- 8 30 Recurse
- 10 0 Recurse
- 12 3 CBra 2
- 15 3 Ket
- 17 12 Recurse
- 19 4 Alt
- 21 \1
- 23 3 Alt
- 25 $
- 26 21 Ket
- 28 25 Ket
- 30 3 CBra 3
- 33 3 Ket
- 35 35 Ket
- 37 End
+ 3 14 CBra 1
+ 6 26 Recurse
+ 8 0 Recurse
+ 10 3 CBra 2
+ 13 3 Ket
+ 15 10 Recurse
+ 17 4 Alt
+ 19 \1
+ 21 3 Alt
+ 23 $
+ 24 21 Ket
+ 26 3 CBra 3
+ 29 3 Ket
+ 31 31 Ket
+ 33 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-32-3 b/testdata/testoutput8-32-3
index 7d1c931..91d96c9 100644
--- a/testdata/testoutput8-32-3
+++ b/testdata/testoutput8-32-3
@@ -720,41 +720,37 @@ Memory allocation (code space): 28
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 39 Bra
+ 0 35 Bra
2 Brazero
- 3 32 SCBra 1
- 6 27 Once
- 8 12 CBra 2
- 11 7 CBra 3
- 14 a
- 16 \2
- 18 7 Ket
- 20 11 Alt
- 22 5 CBra 4
- 25 a*
- 27 5 Ket
- 29 22 Recurse
- 31 23 Ket
- 33 27 Ket
- 35 32 KetRmax
- 37 a?+
- 39 39 Ket
- 41 End
+ 3 28 SCBra 1
+ 6 12 CBra 2
+ 9 7 CBra 3
+ 12 a
+ 14 \2
+ 16 7 Ket
+ 18 11 Alt
+ 20 5 CBra 4
+ 23 a*
+ 25 5 Ket
+ 27 20 Recurse
+ 29 23 Ket
+ 31 28 KetRmax
+ 33 a?+
+ 35 35 Ket
+ 37 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 20 Bra
- 2 16 Once
- 4 12 CBra 1
- 7 9 Recurse
- 9 5 CBra 2
- 12 \1
- 14 5 Ket
- 16 12 Ket
- 18 16 Ket
- 20 20 Ket
- 22 End
+ 0 16 Bra
+ 2 12 CBra 1
+ 5 7 Recurse
+ 7 5 CBra 2
+ 10 \1
+ 12 5 Ket
+ 14 12 Ket
+ 16 16 Ket
+ 18 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 28
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 28 Bra
+ 0 24 Bra
2 Any
- 3 18 Once
- 5 7 CBra 1
- 8 23 Recurse
- 10 0 Recurse
- 12 4 Alt
- 14 \1
- 16 3 Alt
- 18 $
- 19 14 Ket
- 21 18 Ket
- 23 3 CBra 2
- 26 3 Ket
- 28 28 Ket
- 30 End
+ 3 7 CBra 1
+ 6 19 Recurse
+ 8 0 Recurse
+ 10 4 Alt
+ 12 \1
+ 14 3 Alt
+ 16 $
+ 17 14 Ket
+ 19 3 CBra 2
+ 22 3 Ket
+ 24 24 Ket
+ 26 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 35 Bra
+ 0 31 Bra
2 Any
- 3 25 Once
- 5 14 CBra 1
- 8 30 Recurse
- 10 0 Recurse
- 12 3 CBra 2
- 15 3 Ket
- 17 12 Recurse
- 19 4 Alt
- 21 \1
- 23 3 Alt
- 25 $
- 26 21 Ket
- 28 25 Ket
- 30 3 CBra 3
- 33 3 Ket
- 35 35 Ket
- 37 End
+ 3 14 CBra 1
+ 6 26 Recurse
+ 8 0 Recurse
+ 10 3 CBra 2
+ 13 3 Ket
+ 15 10 Recurse
+ 17 4 Alt
+ 19 \1
+ 21 3 Alt
+ 23 $
+ 24 21 Ket
+ 26 3 CBra 3
+ 29 3 Ket
+ 31 31 Ket
+ 33 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-32-4 b/testdata/testoutput8-32-4
index 7d1c931..91d96c9 100644
--- a/testdata/testoutput8-32-4
+++ b/testdata/testoutput8-32-4
@@ -720,41 +720,37 @@ Memory allocation (code space): 28
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 39 Bra
+ 0 35 Bra
2 Brazero
- 3 32 SCBra 1
- 6 27 Once
- 8 12 CBra 2
- 11 7 CBra 3
- 14 a
- 16 \2
- 18 7 Ket
- 20 11 Alt
- 22 5 CBra 4
- 25 a*
- 27 5 Ket
- 29 22 Recurse
- 31 23 Ket
- 33 27 Ket
- 35 32 KetRmax
- 37 a?+
- 39 39 Ket
- 41 End
+ 3 28 SCBra 1
+ 6 12 CBra 2
+ 9 7 CBra 3
+ 12 a
+ 14 \2
+ 16 7 Ket
+ 18 11 Alt
+ 20 5 CBra 4
+ 23 a*
+ 25 5 Ket
+ 27 20 Recurse
+ 29 23 Ket
+ 31 28 KetRmax
+ 33 a?+
+ 35 35 Ket
+ 37 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 20 Bra
- 2 16 Once
- 4 12 CBra 1
- 7 9 Recurse
- 9 5 CBra 2
- 12 \1
- 14 5 Ket
- 16 12 Ket
- 18 16 Ket
- 20 20 Ket
- 22 End
+ 0 16 Bra
+ 2 12 CBra 1
+ 5 7 Recurse
+ 7 5 CBra 2
+ 10 \1
+ 12 5 Ket
+ 14 12 Ket
+ 16 16 Ket
+ 18 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 28
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 28 Bra
+ 0 24 Bra
2 Any
- 3 18 Once
- 5 7 CBra 1
- 8 23 Recurse
- 10 0 Recurse
- 12 4 Alt
- 14 \1
- 16 3 Alt
- 18 $
- 19 14 Ket
- 21 18 Ket
- 23 3 CBra 2
- 26 3 Ket
- 28 28 Ket
- 30 End
+ 3 7 CBra 1
+ 6 19 Recurse
+ 8 0 Recurse
+ 10 4 Alt
+ 12 \1
+ 14 3 Alt
+ 16 $
+ 17 14 Ket
+ 19 3 CBra 2
+ 22 3 Ket
+ 24 24 Ket
+ 26 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 35 Bra
+ 0 31 Bra
2 Any
- 3 25 Once
- 5 14 CBra 1
- 8 30 Recurse
- 10 0 Recurse
- 12 3 CBra 2
- 15 3 Ket
- 17 12 Recurse
- 19 4 Alt
- 21 \1
- 23 3 Alt
- 25 $
- 26 21 Ket
- 28 25 Ket
- 30 3 CBra 3
- 33 3 Ket
- 35 35 Ket
- 37 End
+ 3 14 CBra 1
+ 6 26 Recurse
+ 8 0 Recurse
+ 10 3 CBra 2
+ 13 3 Ket
+ 15 10 Recurse
+ 17 4 Alt
+ 19 \1
+ 21 3 Alt
+ 23 $
+ 24 21 Ket
+ 26 3 CBra 3
+ 29 3 Ket
+ 31 31 Ket
+ 33 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-8-2 b/testdata/testoutput8-8-2
index 4c4e6a8..8393d5c 100644
--- a/testdata/testoutput8-8-2
+++ b/testdata/testoutput8-8-2
@@ -720,41 +720,37 @@ Memory allocation (code space): 10
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 57 Bra
+ 0 51 Bra
3 Brazero
- 4 48 SCBra 1
- 9 40 Once
- 12 18 CBra 2
- 17 10 CBra 3
- 22 a
- 24 \2
- 27 10 Ket
- 30 16 Alt
- 33 7 CBra 4
- 38 a*
- 40 7 Ket
- 43 33 Recurse
- 46 34 Ket
- 49 40 Ket
- 52 48 KetRmax
- 55 a?+
- 57 57 Ket
- 60 End
+ 4 42 SCBra 1
+ 9 18 CBra 2
+ 14 10 CBra 3
+ 19 a
+ 21 \2
+ 24 10 Ket
+ 27 16 Alt
+ 30 7 CBra 4
+ 35 a*
+ 37 7 Ket
+ 40 30 Recurse
+ 43 34 Ket
+ 46 42 KetRmax
+ 49 a?+
+ 51 51 Ket
+ 54 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 31 Bra
- 3 25 Once
- 6 19 CBra 1
- 11 14 Recurse
- 14 8 CBra 2
- 19 \1
- 22 8 Ket
- 25 19 Ket
- 28 25 Ket
- 31 31 Ket
- 34 End
+ 0 25 Bra
+ 3 19 CBra 1
+ 8 11 Recurse
+ 11 8 CBra 2
+ 16 \1
+ 19 8 Ket
+ 22 19 Ket
+ 25 25 Ket
+ 28 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 10
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 42 Bra
+ 0 36 Bra
3 Any
- 4 27 Once
- 7 11 CBra 1
- 12 34 Recurse
- 15 0 Recurse
- 18 6 Alt
- 21 \1
- 24 4 Alt
- 27 $
- 28 21 Ket
- 31 27 Ket
- 34 5 CBra 2
- 39 5 Ket
- 42 42 Ket
- 45 End
+ 4 11 CBra 1
+ 9 28 Recurse
+ 12 0 Recurse
+ 15 6 Alt
+ 18 \1
+ 21 4 Alt
+ 24 $
+ 25 21 Ket
+ 28 5 CBra 2
+ 33 5 Ket
+ 36 36 Ket
+ 39 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 53 Bra
+ 0 47 Bra
3 Any
- 4 38 Once
- 7 22 CBra 1
- 12 45 Recurse
- 15 0 Recurse
- 18 5 CBra 2
- 23 5 Ket
- 26 18 Recurse
- 29 6 Alt
- 32 \1
- 35 4 Alt
- 38 $
- 39 32 Ket
- 42 38 Ket
- 45 5 CBra 3
- 50 5 Ket
- 53 53 Ket
- 56 End
+ 4 22 CBra 1
+ 9 39 Recurse
+ 12 0 Recurse
+ 15 5 CBra 2
+ 20 5 Ket
+ 23 15 Recurse
+ 26 6 Alt
+ 29 \1
+ 32 4 Alt
+ 35 $
+ 36 32 Ket
+ 39 5 CBra 3
+ 44 5 Ket
+ 47 47 Ket
+ 50 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-8-3 b/testdata/testoutput8-8-3
index 7eb5142..963700a 100644
--- a/testdata/testoutput8-8-3
+++ b/testdata/testoutput8-8-3
@@ -720,41 +720,37 @@ Memory allocation (code space): 12
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 70 Bra
+ 0 62 Bra
4 Brazero
- 5 59 SCBra 1
- 11 49 Once
- 15 21 CBra 2
- 21 11 CBra 3
- 27 a
- 29 \2
- 32 11 Ket
- 36 20 Alt
- 40 8 CBra 4
- 46 a*
- 48 8 Ket
- 52 40 Recurse
- 56 41 Ket
- 60 49 Ket
- 64 59 KetRmax
- 68 a?+
- 70 70 Ket
- 74 End
+ 5 51 SCBra 1
+ 11 21 CBra 2
+ 17 11 CBra 3
+ 23 a
+ 25 \2
+ 28 11 Ket
+ 32 20 Alt
+ 36 8 CBra 4
+ 42 a*
+ 44 8 Ket
+ 48 36 Recurse
+ 52 41 Ket
+ 56 51 KetRmax
+ 60 a?+
+ 62 62 Ket
+ 66 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 39 Bra
- 4 31 Once
- 8 23 CBra 1
- 14 18 Recurse
- 18 9 CBra 2
- 24 \1
- 27 9 Ket
- 31 23 Ket
- 35 31 Ket
- 39 39 Ket
- 43 End
+ 0 31 Bra
+ 4 23 CBra 1
+ 10 14 Recurse
+ 14 9 CBra 2
+ 20 \1
+ 23 9 Ket
+ 27 23 Ket
+ 31 31 Ket
+ 35 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 12
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 53 Bra
+ 0 45 Bra
4 Any
- 5 34 Once
- 9 14 CBra 1
- 15 43 Recurse
- 19 0 Recurse
- 23 7 Alt
- 27 \1
- 30 5 Alt
- 34 $
- 35 26 Ket
- 39 34 Ket
- 43 6 CBra 2
- 49 6 Ket
- 53 53 Ket
- 57 End
+ 5 14 CBra 1
+ 11 35 Recurse
+ 15 0 Recurse
+ 19 7 Alt
+ 23 \1
+ 26 5 Alt
+ 30 $
+ 31 26 Ket
+ 35 6 CBra 2
+ 41 6 Ket
+ 45 45 Ket
+ 49 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 67 Bra
+ 0 59 Bra
4 Any
- 5 48 Once
- 9 28 CBra 1
- 15 57 Recurse
- 19 0 Recurse
- 23 6 CBra 2
- 29 6 Ket
- 33 23 Recurse
- 37 7 Alt
- 41 \1
- 44 5 Alt
- 48 $
- 49 40 Ket
- 53 48 Ket
- 57 6 CBra 3
- 63 6 Ket
- 67 67 Ket
- 71 End
+ 5 28 CBra 1
+ 11 49 Recurse
+ 15 0 Recurse
+ 19 6 CBra 2
+ 25 6 Ket
+ 29 19 Recurse
+ 33 7 Alt
+ 37 \1
+ 40 5 Alt
+ 44 $
+ 45 40 Ket
+ 49 6 CBra 3
+ 55 6 Ket
+ 59 59 Ket
+ 63 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/testoutput8-8-4 b/testdata/testoutput8-8-4
index 4d9bcd6..8e19908 100644
--- a/testdata/testoutput8-8-4
+++ b/testdata/testoutput8-8-4
@@ -720,41 +720,37 @@ Memory allocation (code space): 14
/(((a\2)|(a*)\g<-1>))*a?/
------------------------------------------------------------------
- 0 83 Bra
+ 0 73 Bra
5 Brazero
- 6 70 SCBra 1
- 13 58 Once
- 18 24 CBra 2
- 25 12 CBra 3
- 32 a
- 34 \2
- 37 12 Ket
- 42 24 Alt
- 47 9 CBra 4
- 54 a*
- 56 9 Ket
- 61 47 Recurse
- 66 48 Ket
- 71 58 Ket
- 76 70 KetRmax
- 81 a?+
- 83 83 Ket
- 88 End
+ 6 60 SCBra 1
+ 13 24 CBra 2
+ 20 12 CBra 3
+ 27 a
+ 29 \2
+ 32 12 Ket
+ 37 24 Alt
+ 42 9 CBra 4
+ 49 a*
+ 51 9 Ket
+ 56 42 Recurse
+ 61 48 Ket
+ 66 60 KetRmax
+ 71 a?+
+ 73 73 Ket
+ 78 End
------------------------------------------------------------------
/((?+1)(\1))/
------------------------------------------------------------------
- 0 47 Bra
- 5 37 Once
- 10 27 CBra 1
- 17 22 Recurse
- 22 10 CBra 2
- 29 \1
- 32 10 Ket
- 37 27 Ket
- 42 37 Ket
- 47 47 Ket
- 52 End
+ 0 37 Bra
+ 5 27 CBra 1
+ 12 17 Recurse
+ 17 10 CBra 2
+ 24 \1
+ 27 10 Ket
+ 32 27 Ket
+ 37 37 Ket
+ 42 End
------------------------------------------------------------------
"(?1)(?#?'){2}(a)"
@@ -771,45 +767,41 @@ Memory allocation (code space): 14
/.((?2)(?R)|\1|$)()/
------------------------------------------------------------------
- 0 64 Bra
+ 0 54 Bra
5 Any
- 6 41 Once
- 11 17 CBra 1
- 18 52 Recurse
- 23 0 Recurse
- 28 8 Alt
- 33 \1
- 36 6 Alt
- 41 $
- 42 31 Ket
- 47 41 Ket
- 52 7 CBra 2
- 59 7 Ket
- 64 64 Ket
- 69 End
+ 6 17 CBra 1
+ 13 42 Recurse
+ 18 0 Recurse
+ 23 8 Alt
+ 28 \1
+ 31 6 Alt
+ 36 $
+ 37 31 Ket
+ 42 7 CBra 2
+ 49 7 Ket
+ 54 54 Ket
+ 59 End
------------------------------------------------------------------
/.((?3)(?R)()(?2)|\1|$)()/
------------------------------------------------------------------
- 0 81 Bra
+ 0 71 Bra
5 Any
- 6 58 Once
- 11 34 CBra 1
- 18 69 Recurse
- 23 0 Recurse
- 28 7 CBra 2
- 35 7 Ket
- 40 28 Recurse
- 45 8 Alt
- 50 \1
- 53 6 Alt
- 58 $
- 59 48 Ket
- 64 58 Ket
- 69 7 CBra 3
- 76 7 Ket
- 81 81 Ket
- 86 End
+ 6 34 CBra 1
+ 13 59 Recurse
+ 18 0 Recurse
+ 23 7 CBra 2
+ 30 7 Ket
+ 35 23 Recurse
+ 40 8 Alt
+ 45 \1
+ 48 6 Alt
+ 53 $
+ 54 48 Ket
+ 59 7 CBra 3
+ 66 7 Ket
+ 71 71 Ket
+ 76 End
------------------------------------------------------------------
/(?1)()((((((\1++))\x85)+)|))/
diff --git a/testdata/wintestoutput3 b/testdata/wintestoutput3
index be856b1..b1894b6 100644
--- a/testdata/wintestoutput3
+++ b/testdata/wintestoutput3
@@ -88,13 +88,13 @@ No match
0: école
/\w/I
-Capturing subpattern count = 0
+Capture group count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1
/\w/I,locale=french
-Capturing subpattern count = 0
+Capture group count = 0
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
ƒ Š Œ Ž š œ ž Ÿ ª ² ³ µ ¹ º À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö
@@ -165,7 +165,7 @@ No match
Ket
End
------------------------------------------------------------------
-Capturing subpattern count = 0
+Capture group count = 0
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z ƒ Š Œ Ž š œ ž Ÿ ª µ º
À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å