summaryrefslogtreecommitdiff
path: root/include/swbasicfilter.h
blob: 29ea8a07b229c84d3407ad6287841c3060ca9147 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/******************************************************************************
 *
 *  swbasicfilter.h -	definition of class SWBasicFilter.  An SWFilter
 *  		       	impl that provides some basic methods that
 *  	       		many filter will need and can use as a starting
 *     			point
 *
 * $Id: swbasicfilter.h 2833 2013-06-29 06:40:28Z chrislit $
 *
 * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
 *	CrossWire Bible Society
 *	P. O. Box 2528
 *	Tempe, AZ  85280-2528
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation version 2.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 */

#ifndef SWBASICFILTER_H
#define SWBASICFILTER_H

#include <swfilter.h>
#include <swbuf.h>

SWORD_NAMESPACE_START


// not a protected inner class because MSVC++ sucks and can't handle it
class SWDLLEXPORT BasicFilterUserData {
public:
	BasicFilterUserData(const SWModule *module, const SWKey *key) { this->module = module; this->key = key; suspendTextPassThru = false; supressAdjacentWhitespace = false; }
	virtual ~BasicFilterUserData() {}
	const SWModule *module;
	const SWKey *key;
	SWBuf lastTextNode;
	SWBuf lastSuspendSegment;
	bool suspendTextPassThru;
	bool supressAdjacentWhitespace;
};

/** A filter providing commonly used functionality.
 * This filter has facilities for handling SGML/HTML/XML like tokens and
 * escape strings (like SGML entities). It has the facility for just
 * substituting the given tokens and escape strings to other strings and for
 * "manual" custom token handling.
 *
 * In this class the functions with arguments looking as <code>char
 * **buf</code> write a character sequnce at address specified by
 * <code>*buf</code> address and change <code>*buf</code> to point past
 * the last char of the written sequence.
 */
class SWDLLEXPORT SWBasicFilter : public virtual SWFilter {

class Private;

	char *tokenStart;
	char *tokenEnd;
	char *escStart;
	char *escEnd;
	char escStartLen;
	char escEndLen;
	char tokenStartLen;
	char tokenEndLen;
	bool escStringCaseSensitive;
	bool tokenCaseSensitive;
	bool passThruUnknownToken;
	bool passThruUnknownEsc;
	bool passThruNumericEsc;
	char processStages;


	Private *p;
public:

	SWBasicFilter();
	virtual char processText(SWBuf &text, const SWKey *key = 0, const SWModule *module = 0);
	virtual ~SWBasicFilter();

protected:

	virtual BasicFilterUserData *createUserData(const SWModule *module, const SWKey *key) {
		return new BasicFilterUserData(module, key);
	}

	// STAGEs
	static const char INITIALIZE;	// flag for indicating processing before char loop
	static const char PRECHAR;	// flag for indicating processing at top in char loop
	static const char POSTCHAR;	// flag for indicating processing at bottom in char loop
	static const char FINALIZE;	// flag for indicating processing after char loop


	/** Sets the beginning of escape sequence (by default "&").*/
	void setEscapeStart(const char *escStart);

	/** Sets the end of escape sequence (by default ";").*/
	void setEscapeEnd(const char *escEnd);

	/** Sets the beginning of token start sequence (by default "<").*/
	void setTokenStart(const char *tokenStart);

	/** Sets the end of token start sequence (by default ">").*/
	void setTokenEnd(const char *tokenEnd);

	/** Sets whether to pass thru an unknown token unchanged
	 *	or just remove it.
	 * Default is false.*/
	void setPassThruUnknownToken(bool val);

	/** Sets whether to pass thru an unknown escape sequence unchanged
	 *	or just remove it.
	 *	Default is false.
	 */
	void setPassThruUnknownEscapeString(bool val);

	/** Sets whether to pass thru a numeric escape sequence unchanged
	 *	or allow it to be handled otherwise.
	 * Default is false.*/
	void setPassThruNumericEscapeString(bool val);

	/** Are escapeStrings case sensitive or not? Call this
	 *	function before addEscapeStingSubstitute()
	 */
	void setEscapeStringCaseSensitive(bool val);

	/** Registers an esc control sequence that can pass unchanged
	 */
	void addAllowedEscapeString(const char *findString);

	/** Unregisters an esc control sequence that can pass unchanged
	 */
	void removeAllowedEscapeString(const char *findString);

	/** Registers an esc control sequence
	 */
	void addEscapeStringSubstitute(const char *findString, const char *replaceString);

	/** Unregisters an esc control sequence
	 */
	void removeEscapeStringSubstitute(const char *findString);

	/** This function performs the substitution of escapeStrings */
	bool substituteEscapeString(SWBuf &buf, const char *escString);

	/** This passes allowed escapeStrings */
	bool passAllowedEscapeString(SWBuf &buf, const char *escString);

	/** This appends escString to buf as an entity */
	void appendEscapeString(SWBuf &buf, const char *escString);

	/** Are tokens case sensitive (like in GBF) or not? Call this
	 *	function before addTokenSubstitute()
	 */
	void setTokenCaseSensitive(bool val);

	/** Registers a simple token substitutions.  Usually called from the
	 *	c-tor of a subclass
	 */
	void addTokenSubstitute(const char *findString, const char *replaceString);

	/** Unregisters a simple token substitute
	 */
	void removeTokenSubstitute(const char *findString);

	/** This function performs the substitution of tokens */
	bool substituteToken(SWBuf &buf, const char *token);

	/** This function is called for every token encountered in the input text.
	 * @param buf the output buffer
	 * @param token the token (e.g. <code>"p align='left'"</code>
	 * @param userData user storage space for data transient to 1 full buffer parse
	 * @return subclasses should return true if they handled the token, or false if they did not.
	 */
	virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData);

	virtual bool processStage(char /*stage*/, SWBuf &/*text*/, char *&/*from*/, BasicFilterUserData * /*userData*/) { return false; }
	virtual void setStageProcessing(char stages) { processStages = stages; }	// see STATICs up above

	/** This function is called for every escape sequence encountered in the input text.
	 * @param buf the output buffer 
	 * @param escString the escape sequence (e.g. <code>"amp"</code> for &amp;amp;)
	 * @param userData user storage space for data transient to 1 full buffer parse
	 * @return <code>false</code> if was not handled and should be handled in
	 * @return subclasses should return true if they handled the esc seq, or false if they did not.
	 */
	virtual bool handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData);

	/** This function is called for all numeric escape sequences. If passThrough
	 * @param buf the output buffer 
	 * @param escString the escape sequence (e.g. <code>"#235"</code> for &amp;235;)
	 * @return subclasses should return true if they handled the esc seq, or false if they did not.
         */
	virtual bool handleNumericEscapeString(SWBuf &buf, const char *escString);


};

SWORD_NAMESPACE_END
#endif