From 5fd717cd0c50e392c39623824d412a6548d5c836 Mon Sep 17 00:00:00 2001 From: Will Estes Date: Tue, 24 Jul 2001 20:23:36 +0000 Subject: re-add these files --- examples/fastwc/README | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ examples/fastwc/mywc.c | 26 +++++++++++++++++++++++ examples/fastwc/wc1.l | 18 ++++++++++++++++ examples/fastwc/wc2.l | 20 ++++++++++++++++++ examples/fastwc/wc3.l | 24 ++++++++++++++++++++++ examples/fastwc/wc4.l | 27 ++++++++++++++++++++++++ examples/fastwc/wc5.l | 24 ++++++++++++++++++++++ 7 files changed, 195 insertions(+) create mode 100644 examples/fastwc/README create mode 100644 examples/fastwc/mywc.c create mode 100644 examples/fastwc/wc1.l create mode 100644 examples/fastwc/wc2.l create mode 100644 examples/fastwc/wc3.l create mode 100644 examples/fastwc/wc4.l create mode 100644 examples/fastwc/wc5.l (limited to 'examples') diff --git a/examples/fastwc/README b/examples/fastwc/README new file mode 100644 index 0000000..0dd3afe --- /dev/null +++ b/examples/fastwc/README @@ -0,0 +1,56 @@ +This directory contains some examples illustrating techniques for extracting +high-performance from flex scanners. Each program implements a simplified +version of the Unix "wc" tool: read text from stdin and print the number of +characters, words, and lines present in the text. All programs were compiled +using gcc (version unavailable, sorry) with the -O flag, and run on a +SPARCstation 1+. The input used was a PostScript file, mainly containing +figures, with the following "wc" counts: + + lines words characters + 214217 635954 2592172 + + +The basic principles illustrated by these programs are: + + - match as much text with each rule as possible + - adding rules does not slow you down! + - avoid backing up + +and the big caveat that comes with them is: + + - you buy performance with decreased maintainability; make + sure you really need it before applying the above techniques. + +See the "Performance Considerations" section of flexdoc for more +details regarding these principles. + + +The different versions of "wc": + + mywc.c + a simple but fairly efficient C version + + wc1.l a naive flex "wc" implementation + + wc2.l somewhat faster; adds rules to match multiple tokens at once + + wc3.l faster still; adds more rules to match longer runs of tokens + + wc4.l fastest; still more rules added; hard to do much better + using flex (or, I suspect, hand-coding) + + wc5.l identical to wc3.l except one rule has been slightly + shortened, introducing backing-up + +Timing results (all times in user CPU seconds): + + program time notes + ------- ---- ----- + wc1 16.4 default flex table compression (= -Cem) + wc1 6.7 -Cf compression option + /bin/wc 5.8 Sun's standard "wc" tool + mywc 4.6 simple but better C implementation! + wc2 4.6 as good as C implementation; built using -Cf + wc3 3.8 -Cf + wc4 3.3 -Cf + wc5 5.7 -Cf; ouch, backing up is expensive diff --git a/examples/fastwc/mywc.c b/examples/fastwc/mywc.c new file mode 100644 index 0000000..92e5a36 --- /dev/null +++ b/examples/fastwc/mywc.c @@ -0,0 +1,26 @@ +/* A simple but fairly efficient C version of the Unix "wc" tool */ + +#include +#include + +main() +{ + register int c, cc = 0, wc = 0, lc = 0; + FILE *f = stdin; + + while ((c = getc(f)) != EOF) { + ++cc; + if (isgraph(c)) { + ++wc; + do { + c = getc(f); + if (c == EOF) + goto done; + ++cc; + } while (isgraph(c)); + } + if (c == '\n') + ++lc; + } +done: printf( "%8d%8d%8d\n", lc, wc, cc ); +} diff --git a/examples/fastwc/wc1.l b/examples/fastwc/wc1.l new file mode 100644 index 0000000..d6696bc --- /dev/null +++ b/examples/fastwc/wc1.l @@ -0,0 +1,18 @@ +/* First cut at a flex-based "wc" tool. */ + +ws [ \t] +nonws [^ \t\n] + +%% + int cc = 0, wc = 0, lc = 0; + +{nonws}+ cc += yyleng; ++wc; + +{ws}+ cc += yyleng; + +\n ++lc; ++cc; + +<> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc2.l b/examples/fastwc/wc2.l new file mode 100644 index 0000000..bd63cd4 --- /dev/null +++ b/examples/fastwc/wc2.l @@ -0,0 +1,20 @@ +/* Somewhat faster "wc" tool: match more text with each rule */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* cc += yyleng; ++wc; +{word}{ws}*\n cc += yyleng; ++wc; ++lc; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc3.l b/examples/fastwc/wc3.l new file mode 100644 index 0000000..7c5f2e2 --- /dev/null +++ b/examples/fastwc/wc3.l @@ -0,0 +1,24 @@ +/* Somewhat faster still: potentially match a lot of text with each rule */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ +words {word}{ws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* cc += yyleng; ++wc; +{word}{ws}*\n cc += yyleng; ++wc; ++lc; +{words}{word}{ws}* cc += yyleng; wc += 2; +{words}{2}{word}{ws}* cc += yyleng; wc += 3; +{words}{3}{word}{ws}* cc += yyleng; wc += 4; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc4.l b/examples/fastwc/wc4.l new file mode 100644 index 0000000..cbe56f6 --- /dev/null +++ b/examples/fastwc/wc4.l @@ -0,0 +1,27 @@ +/* Fastest version of wc: add rules to pick up newlines, too */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ +words {word}{ws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* ++wc; cc += yyleng; +{word}{ws}*\n ++wc; cc += yyleng; ++lc; +{words}{word}{ws}* wc += 2; cc += yyleng; +{words}{word}{ws}*\n wc += 2; cc += yyleng; ++lc; +{words}{2}{word}{ws}* wc += 3; cc += yyleng; +{words}{2}{word}{ws}*\n wc += 3; cc += yyleng; ++lc; +{words}{3}{word}{ws}* wc += 4; cc += yyleng; +{words}{3}{word}{ws}*\n wc += 4; cc += yyleng; ++lc; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } diff --git a/examples/fastwc/wc5.l b/examples/fastwc/wc5.l new file mode 100644 index 0000000..8fe17b6 --- /dev/null +++ b/examples/fastwc/wc5.l @@ -0,0 +1,24 @@ +/* Oops; slight change from wc3.l introduces backtracking */ + +ws [ \t] +nonws [^ \t\n] +word {ws}*{nonws}+ +words {word}{ws}+ + +%% + int cc = 0, wc = 0, lc = 0; + +{word}{ws}* cc += yyleng; ++wc; +{word}{ws}*\n cc += yyleng; ++wc; ++lc; +{words}{word} cc += yyleng; wc += 2; /* oops */ +{words}{2}{word}{ws}* cc += yyleng; wc += 3; +{words}{3}{word}{ws}* cc += yyleng; wc += 4; + +{ws}+ cc += yyleng; + +\n+ cc += yyleng; lc += yyleng; + +<> { + printf( "%8d %8d %8d\n", lc, wc, cc ); + yyterminate(); + } -- cgit v1.2.3