/ OSX / libsecurity_codesigning / antlr2 / antlr / TokenStreamRewriteEngine.hpp
TokenStreamRewriteEngine.hpp
  1  #ifndef INC_TokenStreamRewriteEngine_hpp__
  2  #define INC_TokenStreamRewriteEngine_hpp__
  3  
  4  /* ANTLR Translator Generator
  5   * Project led by Terence Parr at http://www.jGuru.com
  6   * Software rights: http://www.antlr.org/license.html
  7   */
  8  
  9  #include <string>
 10  #include <list>
 11  #include <vector>
 12  #include <map>
 13  #include <utility>
 14  #include <ostream>
 15  #include <iterator>
 16  #include <cassert>
 17  #include <algorithm>
 18  
 19  #include <antlr/config.hpp>
 20  
 21  #include <antlr/TokenStream.hpp>
 22  #include <antlr/TokenWithIndex.hpp>
 23  #include <antlr/BitSet.hpp>
 24  
 25  #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
 26  namespace antlr {
 27  #endif
 28  
 29  /** This token stream tracks the *entire* token stream coming from
 30   *	 a lexer, but does not pass on the whitespace (or whatever else
 31   *	 you want to discard) to the parser.
 32   *
 33   *	 This class can then be asked for the ith token in the input stream.
 34   *	 Useful for dumping out the input stream exactly after doing some
 35   *	 augmentation or other manipulations.	Tokens are index from 0..n-1
 36   *
 37   *	 You can insert stuff, replace, and delete chunks.	 Note that the
 38   *	 operations are done lazily--only if you convert the buffer to a
 39   *	 String.	 This is very efficient because you are not moving data around
 40   *	 all the time.	 As the buffer of tokens is converted to strings, the
 41   *	 toString() method(s) check to see if there is an operation at the
 42   *	 current index.  If so, the operation is done and then normal String
 43   *	 rendering continues on the buffer.	 This is like having multiple Turing
 44   *	 machine instruction streams (programs) operating on a single input tape. :)
 45   *
 46   *	 Since the operations are done lazily at toString-time, operations do not
 47   *	 screw up the token index values.  That is, an insert operation at token
 48   *	 index i does not change the index values for tokens i+1..n-1.
 49   *
 50   *	 Because operations never actually alter the buffer, you may always get
 51   *	 the original token stream back without undoing anything.  Since
 52   *	 the instructions are queued up, you can easily simulate transactions and
 53   *	 roll back any changes if there is an error just by removing instructions.
 54   *	 For example,
 55   *
 56   *			TokenStreamRewriteEngine rewriteEngine =
 57   *				new TokenStreamRewriteEngine(lexer);
 58   *		  JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
 59   *		  ...
 60   *		  rewriteEngine.insertAfter("pass1", t, "foobar");}
 61   *			rewriteEngine.insertAfter("pass2", u, "start");}
 62   *			System.out.println(rewriteEngine.toString("pass1"));
 63   *			System.out.println(rewriteEngine.toString("pass2"));
 64   *
 65   *	 You can also have multiple "instruction streams" and get multiple
 66   *	 rewrites from a single pass over the input.	 Just name the instruction
 67   *	 streams and use that name again when printing the buffer.	This could be
 68   *	 useful for generating a C file and also its header file--all from the
 69   *	 same buffer.
 70   *
 71   *	 If you don't use named rewrite streams, a "default" stream is used.
 72   *
 73   *	 Terence Parr, parrt@cs.usfca.edu
 74   *	 University of San Francisco
 75   *	 February 2004
 76   */
 77  class TokenStreamRewriteEngine : public TokenStream
 78  {
 79  public:
 80  	typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
 81  	static const char* DEFAULT_PROGRAM_NAME;
 82  #ifndef NO_STATIC_CONSTS
 83  	static const size_t MIN_TOKEN_INDEX;
 84  	static const int PROGRAM_INIT_SIZE;
 85  #else
 86  	enum {
 87  		MIN_TOKEN_INDEX = 0,
 88  		PROGRAM_INIT_SIZE = 100
 89  	};
 90  #endif
 91  
 92  	struct tokenToStream {
 93  		tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
 94  		template <typename T> void operator() ( const T& t ) {
 95  			out << t->getText();
 96  		}
 97  		ANTLR_USE_NAMESPACE(std)ostream& out;
 98  	};
 99  
100  	class RewriteOperation {
101  	protected:
102  		RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
103  		: index(idx), text(txt)
104  		{
105  		}
106  	public:
107  		virtual ~RewriteOperation()
108  		{
109  		}
110  		/** Execute the rewrite operation by possibly adding to the buffer.
111  		 *	 Return the index of the next token to operate on.
112  		 */
113  		virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
114  			return index;
115  		}
116  		virtual size_t getIndex() const {
117  			return index;
118  		}
119  		virtual const char* type() const {
120  			return "RewriteOperation";
121  		}
122  	protected:
123  		size_t index;
124  		ANTLR_USE_NAMESPACE(std)string text;
125  	};
126  
127  	struct executeOperation {
128  		ANTLR_USE_NAMESPACE(std)ostream& out;
129  		executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
130  		void operator () ( RewriteOperation* t ) {
131  			t->execute(out);
132  		}
133  	};
134  
135  	/// list of rewrite operations
136  	typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
137  	/// map program name to <program counter,program> tuple
138  	typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
139  
140  	class InsertBeforeOp : public RewriteOperation
141  	{
142  	public:
143  		InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
144  		: RewriteOperation(index, text)
145  		{
146  		}
147  		virtual ~InsertBeforeOp() {}
148  		virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
149  		{
150  			out << text;
151  			return index;
152  		}
153  		virtual const char* type() const {
154  			return "InsertBeforeOp";
155  		}
156  	};
157  
158  	class ReplaceOp : public RewriteOperation
159  	{
160  	public:
161  		ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
162  		: RewriteOperation(from,text)
163  		, lastIndex(to)
164  		{
165  		}
166  		virtual ~ReplaceOp() {}
167  		virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
168  			out << text;
169  			return lastIndex+1;
170  		}
171  		virtual const char* type() const {
172  			return "ReplaceOp";
173  		}
174  	protected:
175  		size_t lastIndex;
176  	};
177  
178  	class DeleteOp : public ReplaceOp {
179  	public:
180  		DeleteOp(size_t from, size_t to)
181  		: ReplaceOp(from,to,"")
182  		{
183  		}
184  		virtual const char* type() const {
185  			return "DeleteOp";
186  		}
187  	};
188  
189  	TokenStreamRewriteEngine(TokenStream& upstream);
190  
191  	TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
192  
193  	RefToken nextToken( void );
194  
195  	void rollback(size_t instructionIndex) {
196  		rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
197  	}
198  
199  	/** Rollback the instruction stream for a program so that
200  	 *	 the indicated instruction (via instructionIndex) is no
201  	 *	 longer in the stream.	UNTESTED!
202  	 */
203  	void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
204  					  size_t instructionIndex );
205  
206  	void deleteProgram() {
207  		deleteProgram(DEFAULT_PROGRAM_NAME);
208  	}
209  
210  	/** Reset the program so that no instructions exist */
211  	void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
212  		rollback(programName, MIN_TOKEN_INDEX);
213  	}
214  
215  	void insertAfter( RefTokenWithIndex t,
216  							const ANTLR_USE_NAMESPACE(std)string& text )
217  	{
218  		insertAfter(DEFAULT_PROGRAM_NAME, t, text);
219  	}
220  
221  	void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
222  		insertAfter(DEFAULT_PROGRAM_NAME, index, text);
223  	}
224  
225  	void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
226  							RefTokenWithIndex t,
227  							const ANTLR_USE_NAMESPACE(std)string& text )
228  	{
229  		insertAfter(programName, t->getIndex(), text);
230  	}
231  
232  	void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
233  							size_t index,
234  							const ANTLR_USE_NAMESPACE(std)string& text )
235  	{
236  		// to insert after, just insert before next index (even if past end)
237  		insertBefore(programName,index+1, text);
238  	}
239  
240  	void insertBefore( RefTokenWithIndex t,
241  							 const ANTLR_USE_NAMESPACE(std)string& text )
242  	{
243  		// std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
244  		insertBefore(DEFAULT_PROGRAM_NAME, t, text);
245  	}
246  
247  	void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
248  		insertBefore(DEFAULT_PROGRAM_NAME, index, text);
249  	}
250  
251  	void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
252  							 RefTokenWithIndex t,
253  							 const ANTLR_USE_NAMESPACE(std)string& text )
254  	{
255  		insertBefore(programName, t->getIndex(), text);
256  	}
257  
258  	void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
259  							 size_t index,
260  							 const ANTLR_USE_NAMESPACE(std)string& text )
261  	{
262  		addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
263  	}
264  
265  	void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
266  	{
267  		replace(DEFAULT_PROGRAM_NAME, index, index, text);
268  	}
269  
270  	void replace( size_t from, size_t to,
271  					  const ANTLR_USE_NAMESPACE(std)string& text)
272  	{
273  		replace(DEFAULT_PROGRAM_NAME, from, to, text);
274  	}
275  
276  	void replace( RefTokenWithIndex indexT,
277  					  const ANTLR_USE_NAMESPACE(std)string& text )
278  	{
279  		replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
280  	}
281  
282  	void replace( RefTokenWithIndex from,
283  					  RefTokenWithIndex to,
284  					  const ANTLR_USE_NAMESPACE(std)string& text )
285  	{
286  		replace(DEFAULT_PROGRAM_NAME, from, to, text);
287  	}
288  
289  	void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
290  					 size_t from, size_t to,
291  					 const ANTLR_USE_NAMESPACE(std)string& text )
292  	{
293  		addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
294  	}
295  
296  	void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
297  					  RefTokenWithIndex from,
298  					  RefTokenWithIndex to,
299  					  const ANTLR_USE_NAMESPACE(std)string& text )
300  	{
301  		replace(programName,
302  				  from->getIndex(),
303  				  to->getIndex(),
304  				  text);
305  	}
306  
307  	void remove(size_t index) {
308  		remove(DEFAULT_PROGRAM_NAME, index, index);
309  	}
310  
311  	void remove(size_t from, size_t to) {
312  		remove(DEFAULT_PROGRAM_NAME, from, to);
313  	}
314  
315  	void remove(RefTokenWithIndex indexT) {
316  		remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
317  	}
318  
319  	void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
320  		remove(DEFAULT_PROGRAM_NAME, from, to);
321  	}
322  
323  	void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
324  					 size_t from, size_t to)
325  	{
326  		replace(programName,from,to,"");
327  	}
328  
329  	void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
330  					 RefTokenWithIndex from, RefTokenWithIndex to )
331  	{
332  		replace(programName,from,to,"");
333  	}
334  
335  	void discard(int ttype) {
336  		discardMask.add(ttype);
337  	}
338  
339  	RefToken getToken( size_t i )
340  	{
341  		return RefToken(tokens.at(i));
342  	}
343  
344  	size_t getTokenStreamSize() const {
345  		return tokens.size();
346  	}
347  
348  	void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
349  		ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
350  	}
351  
352  	void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
353  								  size_t start, size_t end ) const;
354  
355  	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
356  		toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
357  	}
358  
359  	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
360  						const ANTLR_USE_NAMESPACE(std)string& programName ) const
361  	{
362  		toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
363  	}
364  
365  	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
366  						size_t start, size_t end ) const
367  	{
368  		toStream(out, DEFAULT_PROGRAM_NAME, start, end);
369  	}
370  
371  	void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
372  						const ANTLR_USE_NAMESPACE(std)string& programName,
373  						size_t firstToken, size_t lastToken ) const;
374  
375  	void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
376  		toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
377  	}
378  
379  	void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
380  							  size_t start, size_t end ) const;
381  
382  	size_t getLastRewriteTokenIndex() const {
383  		return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
384  	}
385  
386  	/** Return the last index for the program named programName
387  	 * return 0 if the program does not exist or the program is empty.
388  	 * (Note this is different from the java implementation that returns -1)
389  	 */
390  	size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
391  		program_map::const_iterator rewrites = programs.find(programName);
392  
393  		if( rewrites == programs.end() )
394  			return 0;
395  
396  		const operation_list& prog = rewrites->second;
397  		if( !prog.empty() )
398  		{
399  			operation_list::const_iterator last = prog.end();
400  			--last;
401  			return (*last)->getIndex();
402  		}
403  		return 0;
404  	}
405  
406  protected:
407  	/** If op.index > lastRewriteTokenIndexes, just add to the end.
408  	 *	 Otherwise, do linear */
409  	void addToSortedRewriteList(RewriteOperation* op) {
410  		addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
411  	}
412  
413  	void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
414  										  RewriteOperation* op );
415  
416  protected:
417  	/** Who do we suck tokens from? */
418  	TokenStream& stream;
419  	/** track index of tokens */
420  	size_t index;
421  
422  	/** Track the incoming list of tokens */
423  	token_list tokens;
424  
425  	/** You may have multiple, named streams of rewrite operations.
426  	 *  I'm calling these things "programs."
427  	 *  Maps String (name) -> rewrite (List)
428  	 */
429  	program_map programs;
430  
431  	/** Which (whitespace) token(s) to throw out */
432  	BitSet discardMask;
433  };
434  
435  #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
436  }
437  #endif
438  
439  #endif