summaryrefslogtreecommitdiff
path: root/doc/pcre2demo.3
diff options
context:
space:
mode:
Diffstat (limited to 'doc/pcre2demo.3')
-rw-r--r--doc/pcre2demo.356
1 files changed, 34 insertions, 22 deletions
diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3
index 5deed0a..c02dcd9 100644
--- a/doc/pcre2demo.3
+++ b/doc/pcre2demo.3
@@ -20,28 +20,31 @@
*************************************************/
/* This is a demonstration program to illustrate a straightforward way of
-calling the PCRE2 regular expression library from a C program. See the
+using the PCRE2 regular expression library from a C program. See the
pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit
-width. This demonstration program uses the 8-bit library.
+width. This demonstration program uses the 8-bit library. The default is to
+process each code unit as a separate character, but if the pattern begins with
+"(*UTF)", both it and the subject are treated as UTF-8 strings, where
+characters may occupy multiple code units.
In Unix-like environments, if PCRE2 is installed in your standard system
libraries, you should be able to compile this program using this command:
-gcc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
If PCRE2 is not installed in a standard place, it is likely to be installed
with support for the pkg-config mechanism. If you have pkg-config, you can
compile this program using this command:
-gcc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
-If you do not have pkg-config, you may have to use this:
+If you do not have pkg-config, you may have to use something like this:
-gcc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e
+cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e
-R/usr/local/lib -lpcre2-8 -o pcre2demo
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
@@ -56,9 +59,14 @@ the following line. */
/* #define PCRE2_STATIC */
-/* This macro must be defined before including pcre2.h. For a program that uses
-only one code unit width, it makes it possible to use generic function names
-such as pcre2_compile(). */
+/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
+For a program that uses only one code unit width, setting it to 8, 16, or 32
+makes it possible to use generic function names such as pcre2_compile(). Note
+that just changing 8 to 16 (for example) is not sufficient to convert this
+program to process 16-bit characters. Even in a fully 16-bit environment, where
+string-handling functions such as strcmp() and printf() work with 16-bit
+characters, the code for handling the table of named substrings will still need
+to be modified. */
#define PCRE2_CODE_UNIT_WIDTH 8
@@ -79,19 +87,19 @@ int main(int argc, char **argv)
{
pcre2_code *re;
PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */
-PCRE2_SPTR subject; /* the appropriate width (8, 16, or 32 bits). */
+PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */
PCRE2_SPTR name_table;
int crlf_is_newline;
int errornumber;
int find_all;
int i;
-int namecount;
-int name_entry_size;
int rc;
int utf8;
uint32_t option_bits;
+uint32_t namecount;
+uint32_t name_entry_size;
uint32_t newline;
PCRE2_SIZE erroroffset;
@@ -106,15 +114,19 @@ pcre2_match_data *match_data;
* First, sort out the command line. There is only one possible option at *
* the moment, "-g" to request repeated matching to find all occurrences, *
* like Perl's /g option. We set the variable find_all to a non-zero value *
-* if the -g option is present. Apart from that, there must be exactly two *
-* arguments. *
+* if the -g option is present. *
**************************************************************************/
find_all = 0;
for (i = 1; i < argc; i++)
{
if (strcmp(argv[i], "-g") == 0) find_all = 1;
- else break;
+ else if (argv[i][0] == '-')
+ {
+ printf("Unrecognised option %s\en", argv[i]);
+ return 1;
+ }
+ else break;
}
/* After the options, we require exactly two arguments, which are the pattern,
@@ -122,7 +134,7 @@ and the subject string. */
if (argc - i != 2)
{
- printf("Two arguments required: a regex and a subject string\en");
+ printf("Exactly two arguments required: a regex and a subject string\en");
return 1;
}
@@ -201,7 +213,7 @@ if (rc < 0)
stored. */
ovector = pcre2_get_ovector_pointer(match_data);
-printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
+printf("Match succeeded at offset %d\en", (int)ovector[0]);
/*************************************************************************
@@ -242,7 +254,7 @@ we have to extract the count of named parentheses from the pattern. */
PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
&namecount); /* where to put the answer */
-if (namecount <= 0) printf("No named substrings\en"); else
+if (namecount == 0) printf("No named substrings\en"); else
{
PCRE2_SPTR tabptr;
printf("Named substrings\en");
@@ -330,8 +342,8 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
for (;;)
{
- uint32_t options = 0; /* Normally no options */
- PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
+ uint32_t options = 0; /* Normally no options */
+ PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
/* If the previous match was for an empty string, we are finished if we are
at the end of the subject. Otherwise, arrange to run another match at the
@@ -371,7 +383,7 @@ for (;;)
{
if (options == 0) break; /* All matches found */
ovector[1] = start_offset + 1; /* Advance one code unit */
- if (crlf_is_newline && /* If CRLF is newline & */
+ if (crlf_is_newline && /* If CRLF is a newline & */
start_offset < subject_length - 1 && /* we are at CRLF, */
subject[start_offset] == '\er' &&
subject[start_offset + 1] == '\en')
@@ -417,7 +429,7 @@ for (;;)
printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start);
}
- if (namecount <= 0) printf("No named substrings\en"); else
+ if (namecount == 0) printf("No named substrings\en"); else
{
PCRE2_SPTR tabptr = name_table;
printf("Named substrings\en");