/ src / parser.rs
parser.rs
  1  #![allow(clippy::range_plus_one)]
  2  
  3  use crate::arborist::{self as a, PlainToken as P, TokenTree as Tt};
  4  use crate::ast::*;
  5  use crate::compiler_types::{Name, Span, Spanned};
  6  use num_derive::FromPrimitive;
  7  use num_traits::FromPrimitive;
  8  
  9  type Error = Spanned<ErrorKind>;
 10  type Result<T> = std::result::Result<T, Error>;
 11  type Parsed<T> = Result<Option<T>>;
 12  
 13  #[derive(Clone, Debug)]
 14  pub enum ErrorKind {
 15      Custom(&'static str),
 16  }
 17  
 18  const fn err_span(message: &'static str, span: Span) -> Error {
 19      Error {
 20          span,
 21          kind: ErrorKind::Custom(message),
 22      }
 23  }
 24  
 25  #[derive(Clone, Copy, Debug, FromPrimitive, Eq, Ord, PartialEq, PartialOrd)]
 26  #[repr(u8)]
 27  enum Level {
 28      Min = 0,
 29      Or = 1,
 30      And = 2,
 31      Equal = 3,
 32      Add = 4,
 33      Mul = 5,
 34      Prefix = 6,
 35      Postfix = 7,
 36      Max = 8,
 37  }
 38  
 39  impl Level {
 40      pub fn higher(self) -> Self {
 41          Self::from_u8(self as u8 + 1).expect("can't go higher than Level::Max")
 42      }
 43  }
 44  
 45  struct Parser<'src> {
 46      tokens: &'src [Spanned<Tt>],
 47      i: usize,
 48      source: &'src str,
 49      start_span: Span,
 50      end_span: Span,
 51  }
 52  
 53  impl<'src> Parser<'src> {
 54      fn peek(&self) -> Option<Spanned<&'src Tt>> {
 55          self.tokens
 56              .get(self.i)
 57              .map(|Spanned { kind, span }| Spanned {
 58                  kind,
 59                  span: span.clone(),
 60              })
 61      }
 62      fn next(&mut self) -> Option<Spanned<&'src Tt>> {
 63          let tt = self.peek()?;
 64          self.i += 1;
 65          Some(tt)
 66      }
 67      fn just(&mut self, token: P) -> Option<Span> {
 68          let Spanned { kind, span } = self.peek()?;
 69          if *kind == Tt::Plain(token) {
 70              self.next().unwrap();
 71              Some(span)
 72          } else {
 73              None
 74          }
 75      }
 76      fn get_previous_span(&self) -> Span {
 77          if self.i == 0 {
 78              self.start_span.clone()
 79          } else {
 80              self.tokens[self.i - 1].span.clone()
 81          }
 82      }
 83      fn get_span_checked(&self) -> Option<Span> {
 84          self.peek().map(|t| t.span)
 85      }
 86      fn get_span(&self) -> Span {
 87          self.get_span_checked()
 88              .unwrap_or_else(|| self.end_span.clone())
 89      }
 90      fn err(&self, message: &'static str) -> Error {
 91          err_span(message, self.get_span())
 92      }
 93      fn err_previous(&self, message: &'static str) -> Error {
 94          err_span(message, self.get_previous_span())
 95      }
 96      fn name(&mut self) -> Option<Name> {
 97          self.just(P::Name).map(|span| Name {
 98              kind: self.source[span.clone()].into(),
 99              span,
100          })
101      }
102      fn int(&mut self) -> Option<(u64, Option<Name>)> {
103          let span = self.just(P::Int)?;
104          let mut src_str = &self.source[span.clone()];
105          let original_len = src_str.len();
106          let mut int: u64 = 0;
107          while let Some(c) = src_str.chars().next() {
108              match c {
109                  '_' => {}
110                  '0'..='9' => {
111                      let digit = u64::from(c as u8 - b'0');
112                      int = int.wrapping_mul(10).wrapping_add(digit);
113                  }
114                  _ => break,
115              }
116              src_str = &src_str[c.len_utf8()..];
117          }
118          let suffix = if src_str.is_empty() {
119              None
120          } else {
121              let kind = src_str.into();
122              let span = span.start + (original_len - src_str.len())..span.end;
123              Some(Name { kind, span })
124          };
125          Some((int, suffix))
126      }
127      fn spanned<O>(&mut self, f: impl FnOnce(&mut Self) -> Result<O>) -> Result<Spanned<O>> {
128          let span_start = self.get_span().start;
129          f(self).map(|kind| {
130              let span_end = self.get_previous_span().end;
131              let span = span_start..span_end;
132              Spanned { kind, span }
133          })
134      }
135      fn spanned2<O>(&mut self, f: impl FnOnce(&mut Self) -> Parsed<O>) -> Parsed<Spanned<O>> {
136          self.spanned(f)
137              .map(|Spanned { kind, span }| kind.map(|kind| Spanned { kind, span }))
138      }
139      fn parse_item<O>(
140          mut item_parser: impl FnMut(&mut Self) -> Result<O>,
141          tokens: &'src a::Item,
142          source: &'src str,
143          start_span: Span,
144          end_span: Span,
145      ) -> Result<O> {
146          let mut this = Self {
147              tokens,
148              i: 0,
149              source,
150              start_span,
151              end_span,
152          };
153          let o = item_parser(&mut this)?;
154          match this.get_span_checked() {
155              Some(span) => Err(Spanned {
156                  kind: ErrorKind::Custom("expected end of item"),
157                  span,
158              }),
159              None => Ok(o),
160          }
161      }
162      fn item<O>(
163          &self,
164          item_parser: impl FnMut(&mut Self) -> Result<O>,
165          item: &'src a::Item,
166          start_span: Span,
167          end_span: Span,
168      ) -> Result<O> {
169          Self::parse_item(item_parser, item, self.source, start_span, end_span)
170      }
171      fn parse_block<O>(
172          mut item_parser: impl FnMut(&mut Self) -> Result<O>,
173          block: &'src a::Block,
174          source: &'src str,
175          start_span: Span,
176          end_span: Span,
177      ) -> Result<Vec<O>> {
178          block
179              .iter()
180              .map(|tokens| {
181                  let mut this = Self {
182                      tokens,
183                      i: 0,
184                      source,
185                      start_span: start_span.clone(),
186                      end_span: end_span.clone(),
187                  };
188                  let o = item_parser(&mut this)?;
189                  match this.get_span_checked() {
190                      Some(span) => Err(Spanned {
191                          kind: ErrorKind::Custom("expected end of item"),
192                          span,
193                      }),
194                      None => Ok(o),
195                  }
196              })
197              .collect()
198      }
199      // These lifetimes are a little too strict, but it doesn't matter at all.
200      fn block<O>(
201          &self,
202          item_parser: impl FnMut(&mut Self) -> Result<O>,
203          block: &'src a::Block,
204          start_span: Span,
205          end_span: Span,
206      ) -> Result<Vec<O>> {
207          Self::parse_block(item_parser, block, self.source, start_span, end_span)
208      }
209      #[allow(clippy::unnecessary_wraps)]
210      fn ty(&mut self) -> Parsed<Ty> {
211          if let Some(fn_span) = self.just(P::Fn) {
212              let Some(Spanned {
213                  kind: Tt::Paren(params, _),
214                  span,
215              }) = self.peek()
216              else {
217                  return Err(self.err("expected function type parameters"));
218              };
219              self.next().unwrap();
220              let params = self.block(
221                  |this| {
222                      this.ty()
223                          .transpose()
224                          .unwrap_or_else(|| Err(this.err("expected parameter type")))
225                  },
226                  params,
227                  span.start..span.start,
228                  span.end..span.end,
229              )?;
230              let returns = self.ty()?.map(Box::new);
231              return Ok(Some(Spanned {
232                  kind: TyKind::Function(params, returns),
233                  span: fn_span.start..self.get_previous_span().end,
234              }));
235          }
236          let Some(name) = self.name() else {
237              return Ok(None);
238          };
239          let mut ty = Ty {
240              span: name.span.clone(),
241              kind: TyKind::Name(name),
242          };
243          while let Some(deref_span) = self.just(P::Hat) {
244              let span = ty.span.start..deref_span.end;
245              ty = Ty {
246                  kind: TyKind::Pointer(Box::new(ty)),
247                  span,
248              };
249          }
250          Ok(Some(ty))
251      }
252      fn param(&mut self) -> Result<(Name, Ty)> {
253          let Some(name) = self.name() else {
254              return Err(self.err("expected function parameter"));
255          };
256          let ty = self
257              .ty()
258              .transpose()
259              .unwrap_or_else(|| Err(self.err("expected function parameter type")))?;
260          Ok((name, ty))
261      }
262      fn expr(&mut self) -> Result<Expr> {
263          self.expr_at(Level::Min)
264      }
265      fn expr_at(&mut self, level: Level) -> Result<Expr> {
266          const BIN_OPS: &[(P, Level, BinOpKind)] = &[
267              (P::LessThanEqual, Level::Equal, BinOpKind::CmpLe),
268              (P::Plus, Level::Add, BinOpKind::Add),
269              (P::Minus, Level::Add, BinOpKind::Sub),
270              (P::Asterisk, Level::Mul, BinOpKind::Mul),
271          ];
272          const UNARY_OPS: &[(P, UnaryOpKind)] = &[(P::Minus, UnaryOpKind::Neg)];
273          let span_start = self.get_span().start;
274          let atom = if let Some(Spanned {
275              kind: Tt::Paren(block, multi),
276              span,
277          }) = self.peek()
278          {
279              if *multi {
280                  return Err(self.err("tuples are not yet implemented"));
281              }
282              assert_eq!(block.len(), 1);
283              self.next().unwrap();
284              let start_span = span.start + 1..span.start + 1;
285              let end_span = span.end - 1..span.end - 1;
286              let expr = self.item(Self::expr, &block[0], start_span, end_span)?;
287              ExprKind::Paren(Box::new(expr))
288          } else if let Some(Spanned {
289              kind: Tt::IndentedBlock(_),
290              ..
291          }) = self.peek()
292          {
293              let block = self.colon_block()?;
294              ExprKind::Block(block)
295          } else if let Some(name) = self.name() {
296              ExprKind::Place(PlaceKind::Var(name))
297          } else if let Some((int, suffix)) = self.int() {
298              ExprKind::Int(int, suffix)
299          } else if self.just(P::If).is_some() {
300              let condition = self.expr()?;
301              let then_block = self.colon_block()?;
302              let else_block = self.else_block()?;
303              ExprKind::If(Box::new(condition), then_block, else_block)
304          } else if self.just(P::While).is_some() {
305              let condition = self.expr()?;
306              let body = self.colon_block()?;
307              if matches!(
308                  self.peek(),
309                  Some(Spanned {
310                      kind: Tt::ElseBlock(_),
311                      ..
312                  })
313              ) {
314                  return Err(self.err("else blocks for while loops not yet implemented"));
315              }
316              ExprKind::While(Box::new(condition), body)
317          } else {
318              let b = 'b: {
319                  let Some(Spanned {
320                      kind: &Tt::Plain(token),
321                      span,
322                  }) = self.next()
323                  else {
324                      break 'b None;
325                  };
326                  let Some(kind) = UNARY_OPS
327                      .iter()
328                      .find_map(|&(t, k)| (t == token).then_some(k))
329                  else {
330                      break 'b None;
331                  };
332                  if level > Level::Prefix {
333                      // We will surely never hit this error path. We never call .expr() with a precedence level higher than Prefix.
334                      return Err(self.err("this prefix operator's precedence is too low"));
335                  }
336                  let rhs = self.expr_at(Level::Prefix)?;
337                  Some(ExprKind::UnaryOp(UnaryOp { kind, span }, Box::new(rhs)))
338              };
339              b.ok_or_else(|| self.err_previous("unexpected token while parsing expression"))?
340          };
341          let mut e = Expr {
342              kind: atom,
343              span: span_start..self.get_previous_span().end,
344          };
345          // NOTE: These postfix operators have the highest precedence, so we skip the precedence level check and do this in a separate loop.
346          loop {
347              let span: Span;
348              let kind: ExprKind;
349              if let Some(deref_span) = self.just(P::Hat) {
350                  span = e.span.start..deref_span.end;
351                  kind = ExprKind::Place(PlaceKind::Deref(Box::new(e), deref_span));
352              } else if let Some(ref_span) = self.just(P::At) {
353                  span = e.span.start..ref_span.end;
354                  let op = UnaryOp {
355                      kind: UnaryOpKind::Ref,
356                      span: ref_span,
357                  };
358                  kind = ExprKind::UnaryOp(op, Box::new(e));
359              } else if self.just(P::Dot).is_some() {
360                  let field = self
361                      .name()
362                      .ok_or_else(|| self.err("expected field or method name after dot"))?;
363                  if matches!(
364                      self.peek(),
365                      Some(Spanned {
366                          kind: Tt::Paren(..),
367                          span: _
368                      })
369                  ) {
370                      return Err(self.err("not yet implemented: method calls"));
371                  }
372                  span = e.span.start..field.span.end;
373                  kind = ExprKind::Place(PlaceKind::Field(Box::new(e), field));
374              } else if self.just(P::As).is_some() {
375                  let Some(ty) = self.ty()? else {
376                      return Err(self.err_previous("expected type after `as`"));
377                  };
378                  span = e.span.start..ty.span.end;
379                  kind = ExprKind::As(Box::new(e), ty);
380              } else if let Some(Spanned {
381                  kind: Tt::Paren(args, _),
382                  span: args_span,
383              }) = self.peek()
384              {
385                  self.next().unwrap();
386                  let args = self.block(
387                      Self::expr,
388                      args,
389                      args_span.start + 1..args_span.start + 1,
390                      args_span.end - 1..args_span.end - 1,
391                  )?;
392                  span = e.span.start..args_span.end;
393                  kind = ExprKind::Call(Box::new(e), args);
394              } else if let Some(Spanned {
395                  kind: Tt::Square(block, multi),
396                  span: index_span,
397              }) = self.peek()
398              {
399                  if *multi {
400                      return Err(self.err("indexing cannot have multiple arguments"));
401                  }
402                  assert_eq!(block.len(), 1);
403                  self.next().unwrap();
404                  let start_span = index_span.start + 1..index_span.start + 1;
405                  let end_span = index_span.end - 1..index_span.end - 1;
406                  let expr = self.item(Self::expr, &block[0], start_span, end_span)?;
407                  span = e.span.start..index_span.end;
408                  kind = ExprKind::Place(PlaceKind::Index(Box::new(e), Box::new(expr), index_span));
409              } else {
410                  break;
411              }
412              e = Expr { kind, span };
413          }
414          loop {
415              let Some(Spanned {
416                  kind: Tt::Plain(kind),
417                  span,
418              }) = self.peek()
419              else {
420                  break;
421              };
422              let Some(&(_, op_level, op_kind)) = BIN_OPS.iter().find(|(op, _, _)| kind == op) else {
423                  break;
424              };
425              if op_level < level {
426                  break;
427              }
428              self.next().unwrap();
429              let rhs = self.expr_at(op_level.higher())?;
430              let op = BinOp {
431                  kind: op_kind,
432                  span,
433              };
434              let span = e.span.start..rhs.span.end;
435              e = Expr {
436                  kind: ExprKind::BinOp(op, Box::new(e), Box::new(rhs)),
437                  span,
438              };
439          }
440          // assignment
441          if level <= Level::Min && self.just(P::Equals).is_some() {
442              let rhs = self.expr()?;
443              match e.kind {
444                  ExprKind::Place(p) => {
445                      let lhs = Place {
446                          kind: p,
447                          span: e.span,
448                      };
449                      let span = lhs.span.start..rhs.span.end;
450                      e = Expr {
451                          kind: ExprKind::Assign(lhs, Box::new(rhs)),
452                          span,
453                      };
454                  }
455                  _ => return Err(err_span("cannot assign to this kind of expression", e.span)),
456              }
457          }
458          Ok(e)
459      }
460      fn stmt(&mut self) -> Result<Stmt> {
461          self.spanned(Self::stmt_kind)
462      }
463      fn stmt_kind(&mut self) -> Result<StmtKind> {
464          if self.just(P::Let).is_some() {
465              let name = self
466                  .name()
467                  .ok_or_else(|| self.err("expected name for let statement"))?;
468              let ty = self.ty()?;
469              if self.just(P::Equals).is_none() {
470                  return Err(self.err("expected `=` for let statement"));
471              }
472              let body = self.expr()?;
473              Ok(StmtKind::Let(name, ty, body))
474          } else {
475              self.expr().map(StmtKind::Expr)
476          }
477      }
478      fn colon_block(&mut self) -> Result<Block> {
479          let Some(Spanned {
480              kind: Tt::IndentedBlock(stmts),
481              span,
482          }) = self.peek()
483          else {
484              return Err(self.err("expected beginning of block"));
485          };
486          self.next().unwrap();
487          let start_span = span.start + 1..span.start + 1;
488          let end_span = span.end..span.end;
489          self.block(Self::stmt, stmts, start_span, end_span)
490              .map(Block)
491      }
492      fn else_block(&mut self) -> Parsed<Block> {
493          let Some(Spanned {
494              kind: Tt::ElseBlock(stmts),
495              span,
496          }) = self.peek()
497          else {
498              return Ok(None);
499          };
500          self.next().unwrap();
501          let start_span = span.start + 4..span.start + 4;
502          let end_span = span.end..span.end;
503          self.block(Self::stmt, stmts, start_span, end_span)
504              .map(|x| Some(Block(x)))
505      }
506      fn decl(&mut self) -> Parsed<Decl> {
507          self.spanned2(Self::decl_kind)
508      }
509      #[allow(clippy::single_match_else)]
510      fn decl_kind(&mut self) -> Parsed<DeclKind> {
511          let Some(Spanned { kind, .. }) = self.next() else {
512              return Ok(None);
513          };
514          let decl = match kind {
515              Tt::Plain(P::Fn) => {
516                  let name = self
517                      .name()
518                      .ok_or_else(|| self.err("expected function name"))?;
519                  // use .peek() so we get the correct span for the error
520                  let Some(Spanned {
521                      kind: Tt::Paren(params, _),
522                      span,
523                  }) = self.peek()
524                  else {
525                      return Err(self.err("expected paren list of function parameters"));
526                  };
527                  self.next().unwrap();
528                  let start_span = span.start + 1..span.start + 1;
529                  let end_span = span.end - 1..span.end - 1;
530                  let parameters = self.block(Self::param, params, start_span, end_span)?;
531                  let returns = self.ty()?;
532                  let body = self.colon_block()?;
533                  DeclKind::Function(Function {
534                      name,
535                      parameters,
536                      returns,
537                      body,
538                  })
539              }
540              Tt::Plain(P::Extern) => {
541                  self.just(P::Fn)
542                      .ok_or_else(|| self.err("expected fn keyword after extern"))?;
543                  let name = self
544                      .name()
545                      .ok_or_else(|| self.err("expected extern function name"))?;
546                  let Some(Spanned {
547                      kind: Tt::Paren(params, _),
548                      span,
549                  }) = self.peek()
550                  else {
551                      return Err(self.err("expected paren list of extern function parameters"));
552                  };
553                  self.next().unwrap();
554                  let start_span = span.start + 1..span.start + 1;
555                  let end_span = span.end - 1..span.end - 1;
556                  let parameters = self.block(
557                      |this| {
558                          this.ty().and_then(|x| {
559                              x.ok_or_else(|| this.err("expected extern function parameter type"))
560                          })
561                      },
562                      params,
563                      start_span,
564                      end_span,
565                  )?;
566                  let returns = self.ty()?;
567                  DeclKind::ExternFunction(ExternFunction {
568                      name,
569                      parameters,
570                      returns,
571                  })
572              }
573              Tt::Plain(P::Struct) => {
574                  let name = self
575                      .name()
576                      .ok_or_else(|| self.err("expected struct name"))?;
577  
578                  let Some(Spanned {
579                      kind: Tt::IndentedBlock(struct_body),
580                      span,
581                  }) = self.peek()
582                  else {
583                      return Err(self.err("expected beginning of struct block"));
584                  };
585                  self.next().unwrap();
586                  let start_span = span.start + 1..span.start + 1;
587                  let end_span = span.end..span.end;
588                  let fields = self.block(Self::param, struct_body, start_span, end_span)?;
589                  DeclKind::Struct(Struct { name, fields })
590              }
591              _ => {
592                  self.i -= 1; // Hacky, I know.
593                  return Ok(None);
594              }
595          };
596          Ok(Some(decl))
597      }
598      fn parse_program(block: &'src a::Block, source: &'src str) -> Result<Program> {
599          let start_span = 0..0;
600          let end_span = source.len()..source.len();
601          Self::parse_block(
602              |this| {
603                  this.decl().transpose().unwrap_or_else(|| {
604                      Err(this.err("unexpected token while parsing top level declaration"))
605                  })
606              },
607              block,
608              source,
609              start_span,
610              end_span,
611          )
612          .map(|decls| Program { decls })
613      }
614  }
615  
616  pub fn parse(block: &a::Block, source: &str) -> Result<Program> {
617      Parser::parse_program(block, source)
618  }