re-add these files

author: Will Estes <wlestes@users.sourceforge.net> 2001-07-24 20:23:36 +0000
committer: Will Estes <wlestes@users.sourceforge.net> 2001-07-24 20:23:36 +0000
commit: 5fd717cd0c50e392c39623824d412a6548d5c836 (patch)
tree: be50d2d21f9d5890c2b5a3162f246836850ca308
parent: 940f3dbdbb8fb1d366fca01858a9287fe43f7db0 (diff)
7 files changed, 195 insertions, 0 deletions
diff --git a/examples/fastwc/README b/examples/fastwc/README
new file mode 100644
index 0000000..0dd3afe
--- /dev/null
+++ b/examples/fastwc/README
@@ -0,0 +1,56 @@
+This directory contains some examples illustrating techniques for extracting
+high-performance from flex scanners.  Each program implements a simplified
+version of the Unix "wc" tool: read text from stdin and print the number of
+characters, words, and lines present in the text.  All programs were compiled
+using gcc (version unavailable, sorry) with the -O flag, and run on a
+SPARCstation 1+.  The input used was a PostScript file, mainly containing
+figures, with the following "wc" counts:
+
+	lines  words  characters
+	214217 635954 2592172
+
+
+The basic principles illustrated by these programs are:
+
+	- match as much text with each rule as possible
+	- adding rules does not slow you down!
+	- avoid backing up
+
+and the big caveat that comes with them is:
+
+	- you buy performance with decreased maintainability; make
+	  sure you really need it before applying the above techniques.
+
+See the "Performance Considerations" section of flexdoc for more
+details regarding these principles.
+
+
+The different versions of "wc":
+
+	mywc.c
+		a simple but fairly efficient C version
+
+	wc1.l	a naive flex "wc" implementation
+
+	wc2.l	somewhat faster; adds rules to match multiple tokens at once
+
+	wc3.l	faster still; adds more rules to match longer runs of tokens
+
+	wc4.l	fastest; still more rules added; hard to do much better
+		using flex (or, I suspect, hand-coding)
+
+	wc5.l	identical to wc3.l except one rule has been slightly
+		shortened, introducing backing-up
+
+Timing results (all times in user CPU seconds):
+
+	program	  time 	 notes
+	-------   ----   -----
+	wc1       16.4   default flex table compression (= -Cem)
+	wc1        6.7   -Cf compression option
+	/bin/wc	   5.8	 Sun's standard "wc" tool
+	mywc	   4.6   simple but better C implementation!
+	wc2	   4.6   as good as C implementation; built using -Cf
+	wc3	   3.8   -Cf
+	wc4	   3.3   -Cf
+	wc5	   5.7   -Cf; ouch, backing up is expensive
diff --git a/examples/fastwc/mywc.c b/examples/fastwc/mywc.c
new file mode 100644
index 0000000..92e5a36
--- /dev/null
+++ b/examples/fastwc/mywc.c
@@ -0,0 +1,26 @@
+/* A simple but fairly efficient C version of the Unix "wc" tool */
+
+#include <stdio.h>
+#include <ctype.h>
+
+main()
+{
+	register int c, cc = 0, wc = 0, lc = 0;
+	FILE *f = stdin;
+
+	while ((c = getc(f)) != EOF) {
+		++cc;
+		if (isgraph(c)) {
+			++wc;
+			do {
+				c = getc(f);
+				if (c == EOF)
+					goto done;
+				++cc;
+			} while (isgraph(c));
+		}
+		if (c == '\n')
+			++lc;
+	}
+done:	printf( "%8d%8d%8d\n", lc, wc, cc );
+}
diff --git a/examples/fastwc/wc1.l b/examples/fastwc/wc1.l
new file mode 100644
index 0000000..d6696bc
--- /dev/null
+++ b/examples/fastwc/wc1.l
@@ -0,0 +1,18 @@
+/* First cut at a flex-based "wc" tool. */
+
+ws    [ \t]
+nonws [^ \t\n]
+
+%%
+	int cc = 0, wc = 0, lc = 0;
+
+{nonws}+	cc += yyleng; ++wc;
+
+{ws}+		cc += yyleng;
+
+\n		++lc; ++cc;
+
+<<EOF>>		{
+		printf( "%8d %8d %8d\n", lc, wc, cc );
+		yyterminate();
+		}
diff --git a/examples/fastwc/wc2.l b/examples/fastwc/wc2.l
new file mode 100644
index 0000000..bd63cd4
--- /dev/null
+++ b/examples/fastwc/wc2.l
@@ -0,0 +1,20 @@
+/* Somewhat faster "wc" tool: match more text with each rule */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+
+%%
+	int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*	cc += yyleng; ++wc;
+{word}{ws}*\n	cc += yyleng; ++wc; ++lc;
+
+{ws}+		cc += yyleng;
+
+\n+		cc += yyleng; lc += yyleng;
+
+<<EOF>>		{
+		printf( "%8d %8d %8d\n", lc, wc, cc );
+		yyterminate();
+		}
diff --git a/examples/fastwc/wc3.l b/examples/fastwc/wc3.l
new file mode 100644
index 0000000..7c5f2e2
--- /dev/null
+++ b/examples/fastwc/wc3.l
@@ -0,0 +1,24 @@
+/* Somewhat faster still: potentially match a lot of text with each rule */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+	int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*		cc += yyleng; ++wc;
+{word}{ws}*\n		cc += yyleng; ++wc; ++lc;
+{words}{word}{ws}*	cc += yyleng; wc += 2;
+{words}{2}{word}{ws}*	cc += yyleng; wc += 3;
+{words}{3}{word}{ws}*	cc += yyleng; wc += 4;
+
+{ws}+			cc += yyleng;
+
+\n+			cc += yyleng; lc += yyleng;
+
+<<EOF>>		{
+		printf( "%8d %8d %8d\n", lc, wc, cc );
+		yyterminate();
+		}
diff --git a/examples/fastwc/wc4.l b/examples/fastwc/wc4.l
new file mode 100644
index 0000000..cbe56f6
--- /dev/null
+++ b/examples/fastwc/wc4.l
@@ -0,0 +1,27 @@
+/* Fastest version of wc: add rules to pick up newlines, too */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+	int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*		++wc; cc += yyleng;
+{word}{ws}*\n		++wc; cc += yyleng; ++lc;
+{words}{word}{ws}*	wc += 2; cc += yyleng;
+{words}{word}{ws}*\n	wc += 2; cc += yyleng; ++lc;
+{words}{2}{word}{ws}*	wc += 3; cc += yyleng;
+{words}{2}{word}{ws}*\n	wc += 3; cc += yyleng; ++lc;
+{words}{3}{word}{ws}*	wc += 4; cc += yyleng;
+{words}{3}{word}{ws}*\n	wc += 4; cc += yyleng; ++lc;
+
+{ws}+			cc += yyleng;
+
+\n+			cc += yyleng; lc += yyleng;
+
+<<EOF>>		{
+		printf( "%8d %8d %8d\n", lc, wc, cc );
+		yyterminate();
+		}
diff --git a/examples/fastwc/wc5.l b/examples/fastwc/wc5.l
new file mode 100644
index 0000000..8fe17b6
--- /dev/null
+++ b/examples/fastwc/wc5.l
@@ -0,0 +1,24 @@
+/* Oops; slight change from wc3.l introduces backtracking */
+
+ws    [ \t]
+nonws [^ \t\n]
+word  {ws}*{nonws}+
+words {word}{ws}+
+
+%%
+	int cc = 0, wc = 0, lc = 0;
+
+{word}{ws}*		cc += yyleng; ++wc;
+{word}{ws}*\n		cc += yyleng; ++wc; ++lc;
+{words}{word}		cc += yyleng; wc += 2;	/* oops */
+{words}{2}{word}{ws}*	cc += yyleng; wc += 3;
+{words}{3}{word}{ws}*	cc += yyleng; wc += 4;
+
+{ws}+			cc += yyleng;
+
+\n+			cc += yyleng; lc += yyleng;
+
+<<EOF>>		{
+		printf( "%8d %8d %8d\n", lc, wc, cc );
+		yyterminate();
+		}
author	Will Estes <wlestes@users.sourceforge.net>	2001-07-24 20:23:36 +0000
committer	Will Estes <wlestes@users.sourceforge.net>	2001-07-24 20:23:36 +0000
commit	5fd717cd0c50e392c39623824d412a6548d5c836 (patch)
tree	be50d2d21f9d5890c2b5a3162f246836850ca308
parent	940f3dbdbb8fb1d366fca01858a9287fe43f7db0 (diff)