summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Estes <wlestes@users.sourceforge.net>2001-07-24 20:23:36 +0000
committerWill Estes <wlestes@users.sourceforge.net>2001-07-24 20:23:36 +0000
commit5fd717cd0c50e392c39623824d412a6548d5c836 (patch)
treebe50d2d21f9d5890c2b5a3162f246836850ca308
parent940f3dbdbb8fb1d366fca01858a9287fe43f7db0 (diff)
re-add these files
-rw-r--r--examples/fastwc/README56
-rw-r--r--examples/fastwc/mywc.c26
-rw-r--r--examples/fastwc/wc1.l18
-rw-r--r--examples/fastwc/wc2.l20
-rw-r--r--examples/fastwc/wc3.l24
-rw-r--r--examples/fastwc/wc4.l27
-rw-r--r--examples/fastwc/wc5.l24
7 files changed, 195 insertions, 0 deletions
diff --git a/examples/fastwc/README b/examples/fastwc/README
new file mode 100644
index 0000000..0dd3afe
--- /dev/null
+++ b/examples/fastwc/README
@@ -0,0 +1,56 @@
+This directory contains some examples illustrating techniques for extracting
+high-performance from flex scanners. Each program implements a simplified
+version of the Unix "wc" tool: read text from stdin and print the number of
+characters, words, and lines present in the text. All programs were compiled
+using gcc (version unavailable, sorry) with the -O flag, and run on a
+SPARCstation 1+. The input used was a PostScript file, mainly containing
+figures, with the following "wc" counts:
+
+ lines words characters
+ 214217 635954 2592172
+
+
+The basic principles illustrated by these programs are:
+
+ - match as much text with each rule as possible
+ - adding rules does not slow you down!
+ - avoid backing up
+
+and the big caveat that comes with them is:
+
+ - you buy performance with decreased maintainability; make
+ sure you really need it before applying the above techniques.
+
+See the "Performance Considerations" section of flexdoc for more
+details regarding these principles.
+
+
+The different versions of "wc":
+
+ mywc.c
+ a simple but fairly efficient C version
+
+ wc1.l a naive flex "wc" implementation
+
+ wc2.l somewhat faster; adds rules to match multiple tokens at once
+
+ wc3.l faster still; adds more rules to match longer runs of tokens
+
+ wc4.l fastest; still more rules added; hard to do much better
+ using flex (or, I suspect, hand-coding)
+
+ wc5.l identical to wc3.l except one rule has been slightly
+ shortened, introducing backing-up
+
+Timing results (all times in user CPU seconds):
+
+ program time notes
+ ------- ---- -----
+ wc1 16.4 default flex table compression (= -Cem)
+ wc1 6.7 -Cf compression option
+ /bin/wc 5.8 Sun's standard "wc" tool
+ mywc 4.6 simple but better C implementation!
+ wc2 4.6 as good as C implementation; built using -Cf
+ wc3 3.8 -Cf
+ wc4 3.3 -Cf
+ wc5 5.7 -Cf; ouch, backing up is expensive
diff --git a/examples/fastwc/mywc.c b/examples/fastwc/mywc.c
new file mode 100644
index 0000000..92e5a36
--- /dev/null
+++ b/examples/fastwc/mywc.c
@@ -0,0 +1,26 @@
+/* A simple but fairly efficient C version of the Unix "wc" tool */
+
+#include <stdio.h>
+#include <ctype.h>
+
+main()
+{
+ register int c, cc = 0, wc = 0, lc = 0;
+ FILE *f = stdin;
+
+ while ((c = getc(f)) != EOF) {
+ ++cc;
+ if (isgraph(c)) {
+ ++wc;
+ do {
+ c = getc(f);
+ if (c == EOF)
+ goto done;
+ ++cc;
+ } while (isgraph(c));
+ }
+ if (c == '\n')
+ ++lc;
+ }
+done: printf( "%8d%8d%8d\n", lc, wc, cc );
+}
diff --git a/examples/fastwc/wc1.l b/examples/fastwc/wc1.l
new file mode 100644
index 0000000..d6696bc
--- /dev/null
+++ b/examples/fastwc/wc1.l
@@ -0,0 +1,18 @@
+/* First cut at a flex-based "wc" tool. */
+
+ws [ \t]
+nonws [^ \t\n]
+
+%%
+ int cc = 0, wc = 0, lc = 0;
+
+{nonws}+ cc += yyleng; ++wc;
+
+{ws}+ cc += yyleng;
+
+\n ++lc; ++cc;
+
+<<EOF>> {
+ printf( "%8d %8d %8d\n", lc, wc, cc );
+ yyterminate();
+ }
diff --git a/examples/fastwc/wc2.l b/examples/fastwc/wc2.l
new file mode 100644
index 0000000..bd63cd4
--- /dev/null
+++ b/examples/fastwc/wc2.l
@@ -0,0 +1,20 @@
+/* Somewhat faster "wc" tool: match more text with each rule */
+
+ws [ \t]
+nonws [^ \t\n]
+word {ws}*{nonws}+
+
+%%
+ int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}* cc += yyleng; ++wc;
+{word}{ws}*\n cc += yyleng; ++wc; ++lc;
+
+{ws}+ cc += yyleng;
+
+\n+ cc += yyleng; lc += yyleng;
+
+<<EOF>> {
+ printf( "%8d %8d %8d\n", lc, wc, cc );
+ yyterminate();
+ }
diff --git a/examples/fastwc/wc3.l b/examples/fastwc/wc3.l
new file mode 100644
index 0000000..7c5f2e2
--- /dev/null
+++ b/examples/fastwc/wc3.l
@@ -0,0 +1,24 @@
+/* Somewhat faster still: potentially match a lot of text with each rule */
+
+ws [ \t]
+nonws [^ \t\n]
+word {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+ int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}* cc += yyleng; ++wc;
+{word}{ws}*\n cc += yyleng; ++wc; ++lc;
+{words}{word}{ws}* cc += yyleng; wc += 2;
+{words}{2}{word}{ws}* cc += yyleng; wc += 3;
+{words}{3}{word}{ws}* cc += yyleng; wc += 4;
+
+{ws}+ cc += yyleng;
+
+\n+ cc += yyleng; lc += yyleng;
+
+<<EOF>> {
+ printf( "%8d %8d %8d\n", lc, wc, cc );
+ yyterminate();
+ }
diff --git a/examples/fastwc/wc4.l b/examples/fastwc/wc4.l
new file mode 100644
index 0000000..cbe56f6
--- /dev/null
+++ b/examples/fastwc/wc4.l
@@ -0,0 +1,27 @@
+/* Fastest version of wc: add rules to pick up newlines, too */
+
+ws [ \t]
+nonws [^ \t\n]
+word {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+ int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}* ++wc; cc += yyleng;
+{word}{ws}*\n ++wc; cc += yyleng; ++lc;
+{words}{word}{ws}* wc += 2; cc += yyleng;
+{words}{word}{ws}*\n wc += 2; cc += yyleng; ++lc;
+{words}{2}{word}{ws}* wc += 3; cc += yyleng;
+{words}{2}{word}{ws}*\n wc += 3; cc += yyleng; ++lc;
+{words}{3}{word}{ws}* wc += 4; cc += yyleng;
+{words}{3}{word}{ws}*\n wc += 4; cc += yyleng; ++lc;
+
+{ws}+ cc += yyleng;
+
+\n+ cc += yyleng; lc += yyleng;
+
+<<EOF>> {
+ printf( "%8d %8d %8d\n", lc, wc, cc );
+ yyterminate();
+ }
diff --git a/examples/fastwc/wc5.l b/examples/fastwc/wc5.l
new file mode 100644
index 0000000..8fe17b6
--- /dev/null
+++ b/examples/fastwc/wc5.l
@@ -0,0 +1,24 @@
+/* Oops; slight change from wc3.l introduces backtracking */
+
+ws [ \t]
+nonws [^ \t\n]
+word {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+ int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}* cc += yyleng; ++wc;
+{word}{ws}*\n cc += yyleng; ++wc; ++lc;
+{words}{word} cc += yyleng; wc += 2; /* oops */
+{words}{2}{word}{ws}* cc += yyleng; wc += 3;
+{words}{3}{word}{ws}* cc += yyleng; wc += 4;
+
+{ws}+ cc += yyleng;
+
+\n+ cc += yyleng; lc += yyleng;
+
+<<EOF>> {
+ printf( "%8d %8d %8d\n", lc, wc, cc );
+ yyterminate();
+ }