diff options
author | Will Estes <wlestes@users.sourceforge.net> | 2001-07-24 20:23:36 +0000 |
---|---|---|
committer | Will Estes <wlestes@users.sourceforge.net> | 2001-07-24 20:23:36 +0000 |
commit | 5fd717cd0c50e392c39623824d412a6548d5c836 (patch) | |
tree | be50d2d21f9d5890c2b5a3162f246836850ca308 | |
parent | 940f3dbdbb8fb1d366fca01858a9287fe43f7db0 (diff) |
re-add these files
-rw-r--r-- | examples/fastwc/README | 56 | ||||
-rw-r--r-- | examples/fastwc/mywc.c | 26 | ||||
-rw-r--r-- | examples/fastwc/wc1.l | 18 | ||||
-rw-r--r-- | examples/fastwc/wc2.l | 20 | ||||
-rw-r--r-- | examples/fastwc/wc3.l | 24 | ||||
-rw-r--r-- | examples/fastwc/wc4.l | 27 | ||||
-rw-r--r-- | examples/fastwc/wc5.l | 24 |
7 files changed, 195 insertions, 0 deletions
diff --git a/examples/fastwc/README b/examples/fastwc/README new file mode 100644 index 0000000..0dd3afe --- /dev/null +++ b/examples/fastwc/README @@ -0,0 +1,56 @@ +This directory contains some examples illustrating techniques for extracting +high-performance from flex scanners. Each program implements a simplified +version of the Unix "wc" tool: read text from stdin and print the number of +characters, words, and lines present in the text. All programs were compiled +using gcc (version unavailable, sorry) with the -O flag, and run on a +SPARCstation 1+. The input used was a PostScript file, mainly containing +figures, with the following "wc" counts: + + lines words characters + 214217 635954 2592172 + + +The basic principles illustrated by these programs are: + + - match as much text with each rule as possible + - adding rules does not slow you down! + - avoid backing up + +and the big caveat that comes with them is: + + - you buy performance with decreased maintainability; make + sure you really need it before applying the above techniques. + +See the "Performance Considerations" section of flexdoc for more +details regarding these principles. + + +The different versions of "wc": + + mywc.c + a simple but fairly efficient C version + + wc1.l a naive flex "wc" implementation + + wc2.l somewhat faster; adds rules to match multiple tokens at once + + wc3.l faster still; adds more rules to match longer runs of tokens + + wc4.l fastest; still more rules added; hard to do much better + using flex (or, I suspect, hand-coding) + + wc5.l identical to wc3.l except one rule has been slightly + shortened, introducing backing-up + +Timing results (all times in user CPU seconds): + + program time notes + ------- ---- ----- + wc1 16.4 default flex table compression (= -Cem) + wc1 6.7 -Cf compression option + /bin/wc 5.8 Sun's standard "wc" tool + mywc 4.6 simple but better C implementation! + wc2 4.6 as good as C implementation; built using -Cf + wc3 3.8 -Cf + wc4 3.3 -Cf + wc5 5.7 -Cf; ouch, backing up is expensive diff --git a/examples/fastwc/mywc.c b/examples/fastwc/mywc.c new file mode 100644 index 0000000..92e5a36 --- /dev/null +++ b/examples/fastwc/mywc.c @@ -0,0 +1,26 @@ +/* A simple but fairly efficient C version of the Unix "wc" tool */ + +#include <stdio.h> +#include <ctype.h> + +main() +{ + register int c, cc = 0, wc = 0, lc = 0; + FILE *f = stdin; + + while ((c = getc(f)) != EOF) { + ++cc; + if (isgraph(c)) { + ++wc; + do { + c = getc(f); + if (c == EOF) + goto done; + ++cc; + } while (isgraph(c)); + } + if (c == '\n') + ++lc; + } +done: printf( "%8d%8d%8d\n", lc, wc, cc ); +} diff --git a/examples/fastwc/wc1.l b/examples/fastwc/wc1.l new file mode 100644 index 0000000..d6696bc --- /dev/null +++ b/examples/fastwc/wc1.l @@ -0,0 +1,18 @@ +/* First cut at a flex-based "wc" tool. */ + +ws [ \t] +nonws [^ \t\n] + +%% + int cc = 0, wc = 0, lc = 0; + +{nonws}+ cc += yyleng; ++wc; + +{ws}+ cc += yyleng; + +\n ++lc; ++cc; + +<<EOF>> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc2.l b/examples/fastwc/wc2.l new file mode 100644 index 0000000..bd63cd4 --- /dev/null +++ b/examples/fastwc/wc2.l @@ -0,0 +1,20 @@ +/* Somewhat faster "wc" tool: match more text with each rule */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* cc += yyleng; ++wc; +{word}{ws}*\n cc += yyleng; ++wc; ++lc; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<<EOF>> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc3.l b/examples/fastwc/wc3.l new file mode 100644 index 0000000..7c5f2e2 --- /dev/null +++ b/examples/fastwc/wc3.l @@ -0,0 +1,24 @@ +/* Somewhat faster still: potentially match a lot of text with each rule */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ +words {word}{ws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* cc += yyleng; ++wc; +{word}{ws}*\n cc += yyleng; ++wc; ++lc; +{words}{word}{ws}* cc += yyleng; wc += 2; +{words}{2}{word}{ws}* cc += yyleng; wc += 3; +{words}{3}{word}{ws}* cc += yyleng; wc += 4; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<<EOF>> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc4.l b/examples/fastwc/wc4.l new file mode 100644 index 0000000..cbe56f6 --- /dev/null +++ b/examples/fastwc/wc4.l @@ -0,0 +1,27 @@ +/* Fastest version of wc: add rules to pick up newlines, too */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ +words {word}{ws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* ++wc; cc += yyleng; +{word}{ws}*\n ++wc; cc += yyleng; ++lc; +{words}{word}{ws}* wc += 2; cc += yyleng; +{words}{word}{ws}*\n wc += 2; cc += yyleng; ++lc; +{words}{2}{word}{ws}* wc += 3; cc += yyleng; +{words}{2}{word}{ws}*\n wc += 3; cc += yyleng; ++lc; +{words}{3}{word}{ws}* wc += 4; cc += yyleng; +{words}{3}{word}{ws}*\n wc += 4; cc += yyleng; ++lc; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<<EOF>> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc5.l b/examples/fastwc/wc5.l new file mode 100644 index 0000000..8fe17b6 --- /dev/null +++ b/examples/fastwc/wc5.l @@ -0,0 +1,24 @@ +/* Oops; slight change from wc3.l introduces backtracking */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ +words {word}{ws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* cc += yyleng; ++wc; +{word}{ws}*\n cc += yyleng; ++wc; ++lc; +{words}{word} cc += yyleng; wc += 2; /* oops */ +{words}{2}{word}{ws}* cc += yyleng; wc += 3; +{words}{3}{word}{ws}* cc += yyleng; wc += 4; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<<EOF>> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } |