parser.rs
1 #![allow(unused)] 2 3 #[derive(Debug)] 4 pub struct Parser<'a> { 5 data: &'a str, 6 ast: Vec<AstNode<'a>>, 7 } 8 9 impl<'a> Parser<'a> { 10 pub fn new(data: &'a str) -> Self { 11 Self { data, ast: vec![] } 12 } 13 14 pub fn parse(self) -> Self { 15 let tokenizer = Tokenizer::new(self.data).tokenize(); 16 println!("{tokenizer:?}"); 17 // 18 // let mut found_let = false; 19 // let mut is_var_name: Option<&'a str> = None; 20 // let mut found_equal = false; 21 // 22 // for token in tokenizer.tokens { 23 // match token { 24 // Token::Let => found_let = true, 25 // Token::Str(s) => { 26 // if let Some(v) = is_var_name 27 // && found_equal 28 // { 29 // let var = Variable::String(v.to_string()); 30 // self.ast 31 // .push(AstNode::Variable((is_var_name.unwrap(), var))); 32 // } 33 // if found_let { 34 // is_var_name = Some(s); 35 // } 36 // } 37 // Token::Equal => found_equal = true, 38 // Token::Number(s) => { 39 // if let Some(v) = is_var_name 40 // && found_equal 41 // { 42 // let var = Variable::U64(v.parse::<u64>().unwrap()); 43 // self.ast 44 // .push(AstNode::Variable((is_var_name.unwrap(), var))); 45 // } 46 // if found_let { 47 // is_var_name = Some(s); 48 // } 49 // } 50 // Token::Semicolon => { 51 // found_let = false; 52 // found_equal = false; 53 // is_var_name = None; 54 // } 55 // } 56 // } 57 58 self 59 } 60 } 61 62 #[derive(Debug)] 63 pub struct Tokenizer<'a> { 64 data: &'a str, 65 tokens: Vec<Token<'a>>, 66 } 67 68 #[derive(Debug)] 69 pub enum TokenizerState { 70 Searching, 71 Variable, 72 Struct, 73 } 74 75 impl<'a> Tokenizer<'a> { 76 pub fn new(data: &'a str) -> Self { 77 Self { 78 data, 79 tokens: vec![], 80 } 81 } 82 83 pub fn push(&mut self, token: Token<'a>) { 84 // println!("pushing {token:?}"); 85 self.tokens.push(token); 86 } 87 88 pub fn tokenize(mut self) -> Self { 89 let mut word = None; 90 91 let mut state = TokenizerState::Searching; 92 let src = self.data.trim(); 93 println!("src {src:?}"); 94 95 for (i, c) in src.chars().enumerate() { 96 match state { 97 TokenizerState::Searching => match c { 98 'a'..='z' | 'A'..='Z' => { 99 if word.is_none() { 100 word = Some(i); 101 } 102 } 103 ' ' => { 104 if let Some(s) = word { 105 let w = self.data[s..i].trim(); 106 match w { 107 "let" => { 108 self.push(Token::Let); 109 state = TokenizerState::Variable; 110 word = None; 111 } 112 "struct" => { 113 self.push(Token::Struct); 114 state = TokenizerState::Struct; 115 word = None; 116 } 117 _ => {} 118 } 119 } 120 } 121 122 c => println!("found: {c:?}"), 123 }, 124 // found let 125 TokenizerState::Variable => { 126 println!("tokenizing variable"); 127 self.variable_state_machine(i); 128 state = TokenizerState::Searching; 129 } 130 TokenizerState::Struct => { 131 println!("tokenizing struct"); 132 self.struct_state_machine(i); 133 state = TokenizerState::Searching; 134 println!("finished tokenizing struct"); 135 } 136 } 137 } 138 139 self 140 } 141 142 fn variable_state_machine(&mut self, idx: usize) { 143 #[derive(Debug)] 144 enum VariableState { 145 Equal, 146 VarIdent, 147 VarValue, 148 Semicolon, 149 Number, 150 Str, 151 } 152 153 #[derive(Debug)] 154 enum Ident { 155 Str(usize), 156 Number(usize), 157 } 158 159 let mut state = VariableState::VarIdent; 160 161 let mut ident = None; 162 163 let mut quote = None; 164 println!("src {:?}", self.data[idx..].trim()); 165 166 for (i, ch) in self.data[idx..].chars().enumerate() { 167 println!("{state:?}"); 168 match state { 169 VariableState::VarIdent => match ch { 170 '=' => { 171 state = VariableState::Equal; 172 } 173 'a'..='z' | 'A'..='Z' => { 174 if ident.is_none() { 175 ident = Some(Ident::Str(i)); 176 } 177 } 178 _ => {} 179 }, 180 VariableState::Equal => { 181 if let Some(iden) = ident { 182 match iden { 183 Ident::Str(id) => { 184 self.push(Token::Str( 185 self.data[idx + id..idx + i.saturating_sub(1)].trim(), 186 )); 187 } 188 Ident::Number(id) => self.push(Token::Number( 189 self.data[idx + id..idx + i.saturating_sub(1)].trim(), 190 )), 191 } 192 self.push(Token::Equal); 193 state = VariableState::VarValue; 194 ident = None; 195 } 196 } 197 VariableState::Semicolon => { 198 self.push(Token::Semicolon); 199 return; 200 } 201 VariableState::Number => match ch { 202 ' ' => { 203 if let Some(iden) = ident { 204 match iden { 205 Ident::Str(id) => panic!("incorrect ident type"), 206 207 Ident::Number(id) => self.push(Token::Number( 208 self.data[idx + id..idx + i.saturating_sub(1)].trim(), 209 )), 210 } 211 212 ident = None; 213 } 214 } 215 ';' => { 216 if let Some(iden) = ident { 217 match iden { 218 Ident::Str(id) => panic!("incorrect ident type"), 219 220 Ident::Number(id) => { 221 self.push(Token::Number(self.data[idx + id..idx + i].trim())) 222 } 223 } 224 225 ident = None; 226 } 227 state = VariableState::Semicolon; 228 } 229 230 'a'..='z' | 'A'..='Z' => { 231 panic!("expected number found string"); 232 } 233 234 '0'..='9' => continue, 235 236 c => panic!("found {c}"), 237 }, 238 VariableState::Str => match ch { 239 ' ' => { 240 if let Some(iden) = ident { 241 match iden { 242 Ident::Str(id) => self.push(Token::Str( 243 self.data[idx + id..idx + i.saturating_sub(1)].trim(), 244 )), 245 246 Ident::Number(id) => panic!("incorrect ident type"), 247 } 248 249 ident = None; 250 } 251 } 252 ';' => { 253 if let Some(iden) = ident { 254 match iden { 255 Ident::Str(id) => self.push(Token::Str( 256 self.data[idx + id..idx + i.saturating_sub(1)].trim(), 257 )), 258 259 Ident::Number(id) => panic!("incorrect ident type"), 260 } 261 262 ident = None; 263 } 264 state = VariableState::Semicolon; 265 } 266 267 '"' => match quote { 268 None => quote = Some(i), 269 Some(ix) => { 270 let val = self.data[idx + ix..idx + i.saturating_sub(1)].trim(); 271 println!("val {val:?}"); 272 self.push(Token::Str(val)); 273 ident = None; 274 } 275 }, 276 'a'..='z' | 'A'..='Z' => { 277 if ident.is_none() { 278 ident = Some(Ident::Str(i)); 279 } 280 continue; 281 } 282 '0'..='9' => { 283 panic!( 284 "trying to tokenize string: number or number in a string is unsupported" 285 ); 286 } 287 _ => panic!("{} {ch}", &self.data[..idx + i]), 288 }, 289 VariableState::VarValue => match ch { 290 'a'..='z' | 'A'..='Z' => { 291 ident = Some(Ident::Str(i)); 292 state = VariableState::Str; 293 } 294 '0'..='9' => { 295 ident = Some(Ident::Number(i)); 296 state = VariableState::Number; 297 } 298 _ => {} 299 }, 300 } 301 } 302 } 303 304 fn struct_state_machine(&mut self, idx: usize) { 305 #[derive(Debug, Clone, Copy, PartialEq)] 306 enum StructState { 307 Name, 308 LeftBracket, 309 VarIdent, 310 Colon, 311 } 312 313 let mut state = StructState::Name; 314 315 let mut found_white_space = false; 316 let mut ident = None; 317 318 println!("src {:?}", self.data[idx..].trim()); 319 320 for (i, ch) in self.data[idx..].chars().enumerate() { 321 println!("state {state:?}"); 322 match state { 323 StructState::Name => match ch { 324 'a'..='z' | 'A'..='Z' => { 325 if ident.is_none() { 326 ident = Some(i); 327 } 328 } 329 ' ' | '\n' => { 330 self.push(Token::WhiteSpace); 331 if let Some(iden) = ident { 332 let var = self.data[idx + iden..idx + i].trim(); 333 println!("pushing var {var}"); 334 self.push(Token::Str(var)); 335 state = StructState::LeftBracket; 336 ident = None; 337 } 338 } 339 c => panic!("{c:?}"), 340 }, 341 StructState::LeftBracket => match ch { 342 '{' => { 343 if let Some(iden) = ident { 344 self.push(Token::Str(&self.data[idx + iden..idx + i])); 345 } 346 347 self.push(Token::LeftAngleBracket); 348 state = StructState::VarIdent; 349 } 350 ' ' | '\n' => continue, 351 c => panic!("{c:?}"), 352 }, 353 StructState::VarIdent => match ch { 354 '}' => self.push(Token::RightAngleBracket), 355 ' ' => { 356 if !found_white_space { 357 self.push(Token::WhiteSpace); 358 found_white_space = true; 359 } 360 } 361 362 ':' => { 363 match ident { 364 Some(iden) => { 365 let var = self.data[idx + iden..idx + i].trim(); 366 println!("var {var:?}"); 367 self.push(Token::Str(var)); 368 ident = None; 369 } 370 None => { 371 panic!("expect <name>:"); 372 } 373 } 374 self.push(Token::Colon); 375 state = StructState::Colon; 376 found_white_space = false; 377 } 378 'a'..='z' | 'A'..='Z' => { 379 if ident.is_none() { 380 ident = Some(i); 381 } 382 } 383 _ => {} 384 }, 385 StructState::Colon => match ch { 386 'a'..='z' | 'A'..='Z' => { 387 if ident.is_none() { 388 ident = Some(i); 389 } 390 } 391 ',' => match ident { 392 Some(iden) => { 393 self.push(Token::Str(self.data[idx + iden..idx + i].trim())); 394 ident = None; 395 found_white_space = false; 396 } 397 None => { 398 continue; 399 } 400 }, 401 '}' => { 402 println!("ident {ident:?}"); 403 if let Some(iden) = ident { 404 self.push(Token::Str(self.data[idx + iden..idx + i].trim())); 405 ident = None; 406 } 407 self.push(Token::RightAngleBracket); 408 return; 409 } 410 411 _ => {} 412 }, 413 } 414 } 415 } 416 } 417 418 #[derive(Debug, PartialEq)] 419 pub enum Token<'a> { 420 Let, 421 Equal, 422 Number(&'a str), 423 WhiteSpace, 424 Str(&'a str), 425 Semicolon, 426 LeftAngleBracket, 427 RightAngleBracket, 428 Struct, 429 Colon, 430 TypeID(TypeID), 431 } 432 433 #[derive(Debug)] 434 pub enum Declaration { 435 Variable, 436 Struct, 437 } 438 439 #[derive(Debug)] 440 pub enum Builtin { 441 Link, 442 Move, 443 Exec, 444 } 445 446 #[derive(Debug)] 447 pub enum Variable { 448 I8(i8), 449 I16(i16), 450 I32(i32), 451 I64(i64), 452 453 U8(u8), 454 U16(u16), 455 U32(u32), 456 U64(u64), 457 458 String(String), 459 } 460 461 impl Variable { 462 pub fn id(&self) -> TypeID { 463 match self { 464 Variable::I8(_) => TypeID::I8, 465 Variable::I16(_) => TypeID::I16, 466 Variable::I32(_) => TypeID::I32, 467 Variable::I64(_) => TypeID::I64, 468 Variable::U8(_) => TypeID::U8, 469 Variable::U16(_) => TypeID::U16, 470 Variable::U32(_) => TypeID::U32, 471 Variable::U64(_) => TypeID::U64, 472 Variable::String(_) => TypeID::String, 473 } 474 } 475 } 476 477 #[derive(Debug, PartialEq)] 478 pub enum TypeID { 479 I8, 480 I16, 481 I32, 482 I64, 483 484 U8, 485 U16, 486 U32, 487 U64, 488 489 String, 490 } 491 492 #[derive(Debug)] 493 pub enum AstNode<'a> { 494 Builtin(Builtin), 495 Variable((&'a str, Variable)), 496 } 497 498 #[cfg(test)] 499 mod test { 500 use crate::parser::{Token, Tokenizer}; 501 502 #[test] 503 fn str_var() { 504 let src = r#"let foo="bar";"#; 505 let tokenizer = Tokenizer::new(src).tokenize(); 506 println!("{:?}", tokenizer.tokens); 507 assert_eq!(*tokenizer.tokens.get(4).unwrap(), Token::Str("bar")) 508 } 509 510 #[test] 511 fn num_var() { 512 let src = "let foo = 10; "; 513 let tokenizer = Tokenizer::new(src).tokenize(); 514 assert_eq!(*tokenizer.tokens.get(4).unwrap(), Token::Number("10")) 515 } 516 517 #[test] 518 fn multiple_vars() { 519 let src = "let foo = \"bar\"; let foo = 10;"; 520 let tokenizer = Tokenizer::new(src).tokenize(); 521 println!("{tokenizer:?}"); 522 } 523 524 #[test] 525 fn parse_struct() { 526 let src = "struct foo { 527 bar: i32, 528 }"; 529 530 let tokenizer = Tokenizer::new(src).tokenize(); 531 532 assert_eq!( 533 *tokenizer.tokens.as_slice(), 534 [ 535 Token::Struct, 536 Token::WhiteSpace, 537 Token::Str("foo"), 538 Token::LeftAngleBracket, 539 Token::WhiteSpace, 540 Token::Str("bar"), 541 Token::Colon, 542 Token::Str("i32"), 543 Token::RightAngleBracket 544 ] 545 ); 546 547 println!("{tokenizer:?}"); 548 } 549 }