ANTLRUtil.cpp
1 /* ANTLR Translator Generator 2 * Project led by Terence Parr at http://www.jGuru.com 3 * Software rights: http://www.antlr.org/license.html 4 * 5 * $Id:$ 6 */ 7 8 #include <antlr/config.hpp> 9 #include <antlr/IOException.hpp> 10 #include <antlr/ANTLRUtil.hpp> 11 12 #include <istream> 13 #include <cctype> 14 #include <string> 15 16 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE 17 namespace antlr { 18 #endif 19 20 /** Eat whitespace from the input stream 21 * @param is the stream to read from 22 */ 23 ANTLR_USE_NAMESPACE(std)istream& eatwhite( ANTLR_USE_NAMESPACE(std)istream& is ) 24 { 25 char c; 26 while( is.get(c) ) 27 { 28 #ifdef ANTLR_CCTYPE_NEEDS_STD 29 if( !ANTLR_USE_NAMESPACE(std)isspace(c) ) 30 #else 31 if( !isspace(c) ) 32 #endif 33 { 34 is.putback(c); 35 break; 36 } 37 } 38 return is; 39 } 40 41 /** Read a string enclosed by '"' from a stream. Also handles escaping of \". 42 * Skips leading whitespace. 43 * @param in the istream to read from. 44 * @returns the string read from file exclusive the '"' 45 * @throws IOException if string is badly formatted 46 */ 47 ANTLR_USE_NAMESPACE(std)string read_string( ANTLR_USE_NAMESPACE(std)istream& in ) 48 { 49 char ch; 50 ANTLR_USE_NAMESPACE(std)string ret(""); 51 // States for a simple state machine... 52 enum { START, READING, ESCAPE, FINISHED }; 53 int state = START; 54 55 eatwhite(in); 56 57 while( state != FINISHED && in.get(ch) ) 58 { 59 switch( state ) 60 { 61 case START: 62 // start state: check wether starting with " then switch to READING 63 if( ch != '"' ) 64 throw IOException("string must start with '\"'"); 65 state = READING; 66 continue; 67 case READING: 68 // reading state: look out for escape sequences and closing " 69 if( ch == '\\' ) // got escape sequence 70 { 71 state = ESCAPE; 72 continue; 73 } 74 if( ch == '"' ) // close quote -> stop 75 { 76 state = FINISHED; 77 continue; 78 } 79 ret += ch; // else append... 80 continue; 81 case ESCAPE: 82 switch(ch) 83 { 84 case '\\': 85 ret += ch; 86 state = READING; 87 continue; 88 case '"': 89 ret += ch; 90 state = READING; 91 continue; 92 case '0': 93 ret += '\0'; 94 state = READING; 95 continue; 96 default: // unrecognized escape is not mapped 97 ret += '\\'; 98 ret += ch; 99 state = READING; 100 continue; 101 } 102 } 103 } 104 if( state != FINISHED ) 105 throw IOException("badly formatted string: "+ret); 106 107 return ret; 108 } 109 110 /* Read a ([A-Z][0-9][a-z]_)* kindoff thing. Skips leading whitespace. 111 * @param in the istream to read from. 112 */ 113 ANTLR_USE_NAMESPACE(std)string read_identifier( ANTLR_USE_NAMESPACE(std)istream& in ) 114 { 115 char ch; 116 ANTLR_USE_NAMESPACE(std)string ret(""); 117 118 eatwhite(in); 119 120 while( in.get(ch) ) 121 { 122 #ifdef ANTLR_CCTYPE_NEEDS_STD 123 if( ANTLR_USE_NAMESPACE(std)isupper(ch) || 124 ANTLR_USE_NAMESPACE(std)islower(ch) || 125 ANTLR_USE_NAMESPACE(std)isdigit(ch) || 126 ch == '_' ) 127 #else 128 if( isupper(ch) || islower(ch) || isdigit(ch) || ch == '_' ) 129 #endif 130 ret += ch; 131 else 132 { 133 in.putback(ch); 134 break; 135 } 136 } 137 return ret; 138 } 139 140 /** Read a attribute="value" thing. Leading whitespace is skipped. 141 * Between attribute and '=' no whitespace is allowed. After the '=' it is 142 * permitted. 143 * @param in the istream to read from. 144 * @param attribute string the attribute name is put in 145 * @param value string the value of the attribute is put in 146 * @throws IOException if something is fishy. E.g. malformed quoting 147 * or missing '=' 148 */ 149 void read_AttributeNValue( ANTLR_USE_NAMESPACE(std)istream& in, 150 ANTLR_USE_NAMESPACE(std)string& attribute, 151 ANTLR_USE_NAMESPACE(std)string& value ) 152 { 153 attribute = read_identifier(in); 154 155 char ch; 156 if( in.get(ch) && ch == '=' ) 157 value = read_string(in); 158 else 159 throw IOException("invalid attribute=value thing "+attribute); 160 } 161 162 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE 163 } 164 #endif