/ src / parser.rs
parser.rs
  1  #![allow(unused)]
  2  
  3  #[derive(Debug)]
  4  pub struct Parser<'a> {
  5      data: &'a str,
  6      ast: Vec<AstNode<'a>>,
  7  }
  8  
  9  impl<'a> Parser<'a> {
 10      pub fn new(data: &'a str) -> Self {
 11          Self { data, ast: vec![] }
 12      }
 13  
 14      pub fn parse(self) -> Self {
 15          let tokenizer = Tokenizer::new(self.data).tokenize();
 16          println!("{tokenizer:?}");
 17          //
 18          // let mut found_let = false;
 19          // let mut is_var_name: Option<&'a str> = None;
 20          // let mut found_equal = false;
 21          //
 22          // for token in tokenizer.tokens {
 23          //     match token {
 24          //         Token::Let => found_let = true,
 25          //         Token::Str(s) => {
 26          //             if let Some(v) = is_var_name
 27          //                 && found_equal
 28          //             {
 29          //                 let var = Variable::String(v.to_string());
 30          //                 self.ast
 31          //                     .push(AstNode::Variable((is_var_name.unwrap(), var)));
 32          //             }
 33          //             if found_let {
 34          //                 is_var_name = Some(s);
 35          //             }
 36          //         }
 37          //         Token::Equal => found_equal = true,
 38          //         Token::Number(s) => {
 39          //             if let Some(v) = is_var_name
 40          //                 && found_equal
 41          //             {
 42          //                 let var = Variable::U64(v.parse::<u64>().unwrap());
 43          //                 self.ast
 44          //                     .push(AstNode::Variable((is_var_name.unwrap(), var)));
 45          //             }
 46          //             if found_let {
 47          //                 is_var_name = Some(s);
 48          //             }
 49          //         }
 50          //         Token::Semicolon => {
 51          //             found_let = false;
 52          //             found_equal = false;
 53          //             is_var_name = None;
 54          //         }
 55          //     }
 56          // }
 57  
 58          self
 59      }
 60  }
 61  
 62  #[derive(Debug)]
 63  pub struct Tokenizer<'a> {
 64      data: &'a str,
 65      tokens: Vec<Token<'a>>,
 66  }
 67  
 68  #[derive(Debug)]
 69  pub enum TokenizerState {
 70      Searching,
 71      Variable,
 72      Struct,
 73  }
 74  
 75  impl<'a> Tokenizer<'a> {
 76      pub fn new(data: &'a str) -> Self {
 77          Self {
 78              data,
 79              tokens: vec![],
 80          }
 81      }
 82  
 83      pub fn push(&mut self, token: Token<'a>) {
 84          // println!("pushing {token:?}");
 85          self.tokens.push(token);
 86      }
 87  
 88      pub fn tokenize(mut self) -> Self {
 89          let mut word = None;
 90  
 91          let mut state = TokenizerState::Searching;
 92          let src = self.data.trim();
 93          println!("src {src:?}");
 94  
 95          for (i, c) in src.chars().enumerate() {
 96              match state {
 97                  TokenizerState::Searching => match c {
 98                      'a'..='z' | 'A'..='Z' => {
 99                          if word.is_none() {
100                              word = Some(i);
101                          }
102                      }
103                      ' ' => {
104                          if let Some(s) = word {
105                              let w = self.data[s..i].trim();
106                              match w {
107                                  "let" => {
108                                      self.push(Token::Let);
109                                      state = TokenizerState::Variable;
110                                      word = None;
111                                  }
112                                  "struct" => {
113                                      self.push(Token::Struct);
114                                      state = TokenizerState::Struct;
115                                      word = None;
116                                  }
117                                  _ => {}
118                              }
119                          }
120                      }
121  
122                      c => println!("found: {c:?}"),
123                  },
124                  // found let
125                  TokenizerState::Variable => {
126                      println!("tokenizing variable");
127                      self.variable_state_machine(i);
128                      state = TokenizerState::Searching;
129                  }
130                  TokenizerState::Struct => {
131                      println!("tokenizing struct");
132                      self.struct_state_machine(i);
133                      state = TokenizerState::Searching;
134                      println!("finished tokenizing struct");
135                  }
136              }
137          }
138  
139          self
140      }
141  
142      fn variable_state_machine(&mut self, idx: usize) {
143          #[derive(Debug)]
144          enum VariableState {
145              Equal,
146              VarIdent,
147              VarValue,
148              Semicolon,
149              Number,
150              Str,
151          }
152  
153          #[derive(Debug)]
154          enum Ident {
155              Str(usize),
156              Number(usize),
157          }
158  
159          let mut state = VariableState::VarIdent;
160  
161          let mut ident = None;
162  
163          let mut quote = None;
164          println!("src {:?}", self.data[idx..].trim());
165  
166          for (i, ch) in self.data[idx..].chars().enumerate() {
167              println!("{state:?}");
168              match state {
169                  VariableState::VarIdent => match ch {
170                      '=' => {
171                          state = VariableState::Equal;
172                      }
173                      'a'..='z' | 'A'..='Z' => {
174                          if ident.is_none() {
175                              ident = Some(Ident::Str(i));
176                          }
177                      }
178                      _ => {}
179                  },
180                  VariableState::Equal => {
181                      if let Some(iden) = ident {
182                          match iden {
183                              Ident::Str(id) => {
184                                  self.push(Token::Str(
185                                      self.data[idx + id..idx + i.saturating_sub(1)].trim(),
186                                  ));
187                              }
188                              Ident::Number(id) => self.push(Token::Number(
189                                  self.data[idx + id..idx + i.saturating_sub(1)].trim(),
190                              )),
191                          }
192                          self.push(Token::Equal);
193                          state = VariableState::VarValue;
194                          ident = None;
195                      }
196                  }
197                  VariableState::Semicolon => {
198                      self.push(Token::Semicolon);
199                      return;
200                  }
201                  VariableState::Number => match ch {
202                      ' ' => {
203                          if let Some(iden) = ident {
204                              match iden {
205                                  Ident::Str(id) => panic!("incorrect ident type"),
206  
207                                  Ident::Number(id) => self.push(Token::Number(
208                                      self.data[idx + id..idx + i.saturating_sub(1)].trim(),
209                                  )),
210                              }
211  
212                              ident = None;
213                          }
214                      }
215                      ';' => {
216                          if let Some(iden) = ident {
217                              match iden {
218                                  Ident::Str(id) => panic!("incorrect ident type"),
219  
220                                  Ident::Number(id) => {
221                                      self.push(Token::Number(self.data[idx + id..idx + i].trim()))
222                                  }
223                              }
224  
225                              ident = None;
226                          }
227                          state = VariableState::Semicolon;
228                      }
229  
230                      'a'..='z' | 'A'..='Z' => {
231                          panic!("expected number found string");
232                      }
233  
234                      '0'..='9' => continue,
235  
236                      c => panic!("found {c}"),
237                  },
238                  VariableState::Str => match ch {
239                      ' ' => {
240                          if let Some(iden) = ident {
241                              match iden {
242                                  Ident::Str(id) => self.push(Token::Str(
243                                      self.data[idx + id..idx + i.saturating_sub(1)].trim(),
244                                  )),
245  
246                                  Ident::Number(id) => panic!("incorrect ident type"),
247                              }
248  
249                              ident = None;
250                          }
251                      }
252                      ';' => {
253                          if let Some(iden) = ident {
254                              match iden {
255                                  Ident::Str(id) => self.push(Token::Str(
256                                      self.data[idx + id..idx + i.saturating_sub(1)].trim(),
257                                  )),
258  
259                                  Ident::Number(id) => panic!("incorrect ident type"),
260                              }
261  
262                              ident = None;
263                          }
264                          state = VariableState::Semicolon;
265                      }
266  
267                      '"' => match quote {
268                          None => quote = Some(i),
269                          Some(ix) => {
270                              let val = self.data[idx + ix..idx + i.saturating_sub(1)].trim();
271                              println!("val {val:?}");
272                              self.push(Token::Str(val));
273                              ident = None;
274                          }
275                      },
276                      'a'..='z' | 'A'..='Z' => {
277                          if ident.is_none() {
278                              ident = Some(Ident::Str(i));
279                          }
280                          continue;
281                      }
282                      '0'..='9' => {
283                          panic!(
284                              "trying to tokenize string: number or number in a string is unsupported"
285                          );
286                      }
287                      _ => panic!("{} {ch}", &self.data[..idx + i]),
288                  },
289                  VariableState::VarValue => match ch {
290                      'a'..='z' | 'A'..='Z' => {
291                          ident = Some(Ident::Str(i));
292                          state = VariableState::Str;
293                      }
294                      '0'..='9' => {
295                          ident = Some(Ident::Number(i));
296                          state = VariableState::Number;
297                      }
298                      _ => {}
299                  },
300              }
301          }
302      }
303  
304      fn struct_state_machine(&mut self, idx: usize) {
305          #[derive(Debug, Clone, Copy, PartialEq)]
306          enum StructState {
307              Name,
308              LeftBracket,
309              VarIdent,
310              Colon,
311          }
312  
313          let mut state = StructState::Name;
314  
315          let mut found_white_space = false;
316          let mut ident = None;
317  
318          println!("src {:?}", self.data[idx..].trim());
319  
320          for (i, ch) in self.data[idx..].chars().enumerate() {
321              println!("state {state:?}");
322              match state {
323                  StructState::Name => match ch {
324                      'a'..='z' | 'A'..='Z' => {
325                          if ident.is_none() {
326                              ident = Some(i);
327                          }
328                      }
329                      ' ' | '\n' => {
330                          self.push(Token::WhiteSpace);
331                          if let Some(iden) = ident {
332                              let var = self.data[idx + iden..idx + i].trim();
333                              println!("pushing var {var}");
334                              self.push(Token::Str(var));
335                              state = StructState::LeftBracket;
336                              ident = None;
337                          }
338                      }
339                      c => panic!("{c:?}"),
340                  },
341                  StructState::LeftBracket => match ch {
342                      '{' => {
343                          if let Some(iden) = ident {
344                              self.push(Token::Str(&self.data[idx + iden..idx + i]));
345                          }
346  
347                          self.push(Token::LeftAngleBracket);
348                          state = StructState::VarIdent;
349                      }
350                      ' ' | '\n' => continue,
351                      c => panic!("{c:?}"),
352                  },
353                  StructState::VarIdent => match ch {
354                      '}' => self.push(Token::RightAngleBracket),
355                      ' ' => {
356                          if !found_white_space {
357                              self.push(Token::WhiteSpace);
358                              found_white_space = true;
359                          }
360                      }
361  
362                      ':' => {
363                          match ident {
364                              Some(iden) => {
365                                  let var = self.data[idx + iden..idx + i].trim();
366                                  println!("var {var:?}");
367                                  self.push(Token::Str(var));
368                                  ident = None;
369                              }
370                              None => {
371                                  panic!("expect <name>:");
372                              }
373                          }
374                          self.push(Token::Colon);
375                          state = StructState::Colon;
376                          found_white_space = false;
377                      }
378                      'a'..='z' | 'A'..='Z' => {
379                          if ident.is_none() {
380                              ident = Some(i);
381                          }
382                      }
383                      _ => {}
384                  },
385                  StructState::Colon => match ch {
386                      'a'..='z' | 'A'..='Z' => {
387                          if ident.is_none() {
388                              ident = Some(i);
389                          }
390                      }
391                      ',' => match ident {
392                          Some(iden) => {
393                              self.push(Token::Str(self.data[idx + iden..idx + i].trim()));
394                              ident = None;
395                              found_white_space = false;
396                          }
397                          None => {
398                              continue;
399                          }
400                      },
401                      '}' => {
402                          println!("ident {ident:?}");
403                          if let Some(iden) = ident {
404                              self.push(Token::Str(self.data[idx + iden..idx + i].trim()));
405                              ident = None;
406                          }
407                          self.push(Token::RightAngleBracket);
408                          return;
409                      }
410  
411                      _ => {}
412                  },
413              }
414          }
415      }
416  }
417  
418  #[derive(Debug, PartialEq)]
419  pub enum Token<'a> {
420      Let,
421      Equal,
422      Number(&'a str),
423      WhiteSpace,
424      Str(&'a str),
425      Semicolon,
426      LeftAngleBracket,
427      RightAngleBracket,
428      Struct,
429      Colon,
430      TypeID(TypeID),
431  }
432  
433  #[derive(Debug)]
434  pub enum Declaration {
435      Variable,
436      Struct,
437  }
438  
439  #[derive(Debug)]
440  pub enum Builtin {
441      Link,
442      Move,
443      Exec,
444  }
445  
446  #[derive(Debug)]
447  pub enum Variable {
448      I8(i8),
449      I16(i16),
450      I32(i32),
451      I64(i64),
452  
453      U8(u8),
454      U16(u16),
455      U32(u32),
456      U64(u64),
457  
458      String(String),
459  }
460  
461  impl Variable {
462      pub fn id(&self) -> TypeID {
463          match self {
464              Variable::I8(_) => TypeID::I8,
465              Variable::I16(_) => TypeID::I16,
466              Variable::I32(_) => TypeID::I32,
467              Variable::I64(_) => TypeID::I64,
468              Variable::U8(_) => TypeID::U8,
469              Variable::U16(_) => TypeID::U16,
470              Variable::U32(_) => TypeID::U32,
471              Variable::U64(_) => TypeID::U64,
472              Variable::String(_) => TypeID::String,
473          }
474      }
475  }
476  
477  #[derive(Debug, PartialEq)]
478  pub enum TypeID {
479      I8,
480      I16,
481      I32,
482      I64,
483  
484      U8,
485      U16,
486      U32,
487      U64,
488  
489      String,
490  }
491  
492  #[derive(Debug)]
493  pub enum AstNode<'a> {
494      Builtin(Builtin),
495      Variable((&'a str, Variable)),
496  }
497  
498  #[cfg(test)]
499  mod test {
500      use crate::parser::{Token, Tokenizer};
501  
502      #[test]
503      fn str_var() {
504          let src = r#"let foo="bar";"#;
505          let tokenizer = Tokenizer::new(src).tokenize();
506          println!("{:?}", tokenizer.tokens);
507          assert_eq!(*tokenizer.tokens.get(4).unwrap(), Token::Str("bar"))
508      }
509  
510      #[test]
511      fn num_var() {
512          let src = "let foo = 10; ";
513          let tokenizer = Tokenizer::new(src).tokenize();
514          assert_eq!(*tokenizer.tokens.get(4).unwrap(), Token::Number("10"))
515      }
516  
517      #[test]
518      fn multiple_vars() {
519          let src = "let foo = \"bar\"; let foo = 10;";
520          let tokenizer = Tokenizer::new(src).tokenize();
521          println!("{tokenizer:?}");
522      }
523  
524      #[test]
525      fn parse_struct() {
526          let src = "struct foo {
527              bar: i32,
528          }";
529  
530          let tokenizer = Tokenizer::new(src).tokenize();
531  
532          assert_eq!(
533              *tokenizer.tokens.as_slice(),
534              [
535                  Token::Struct,
536                  Token::WhiteSpace,
537                  Token::Str("foo"),
538                  Token::LeftAngleBracket,
539                  Token::WhiteSpace,
540                  Token::Str("bar"),
541                  Token::Colon,
542                  Token::Str("i32"),
543                  Token::RightAngleBracket
544              ]
545          );
546  
547          println!("{tokenizer:?}");
548      }
549  }