parser.rs
1 #![allow(clippy::range_plus_one)] 2 3 use crate::arborist::{self as a, PlainToken as P, TokenTree as Tt}; 4 use crate::ast::*; 5 use crate::compiler_types::{Name, Span, Spanned}; 6 use num_derive::FromPrimitive; 7 use num_traits::FromPrimitive; 8 9 type Error = Spanned<ErrorKind>; 10 type Result<T> = std::result::Result<T, Error>; 11 type Parsed<T> = Result<Option<T>>; 12 13 #[derive(Clone, Debug)] 14 pub enum ErrorKind { 15 Custom(&'static str), 16 } 17 18 const fn err_span(message: &'static str, span: Span) -> Error { 19 Error { 20 span, 21 kind: ErrorKind::Custom(message), 22 } 23 } 24 25 #[derive(Clone, Copy, Debug, FromPrimitive, Eq, Ord, PartialEq, PartialOrd)] 26 #[repr(u8)] 27 enum Level { 28 Min = 0, 29 Or = 1, 30 And = 2, 31 Equal = 3, 32 Add = 4, 33 Mul = 5, 34 Prefix = 6, 35 Postfix = 7, 36 Max = 8, 37 } 38 39 impl Level { 40 pub fn higher(self) -> Self { 41 Self::from_u8(self as u8 + 1).expect("can't go higher than Level::Max") 42 } 43 } 44 45 struct Parser<'src> { 46 tokens: &'src [Spanned<Tt>], 47 i: usize, 48 source: &'src str, 49 start_span: Span, 50 end_span: Span, 51 } 52 53 impl<'src> Parser<'src> { 54 fn peek(&self) -> Option<Spanned<&'src Tt>> { 55 self.tokens 56 .get(self.i) 57 .map(|Spanned { kind, span }| Spanned { 58 kind, 59 span: span.clone(), 60 }) 61 } 62 fn next(&mut self) -> Option<Spanned<&'src Tt>> { 63 let tt = self.peek()?; 64 self.i += 1; 65 Some(tt) 66 } 67 fn just(&mut self, token: P) -> Option<Span> { 68 let Spanned { kind, span } = self.peek()?; 69 if *kind == Tt::Plain(token) { 70 self.next().unwrap(); 71 Some(span) 72 } else { 73 None 74 } 75 } 76 fn get_previous_span(&self) -> Span { 77 if self.i == 0 { 78 self.start_span.clone() 79 } else { 80 self.tokens[self.i - 1].span.clone() 81 } 82 } 83 fn get_span_checked(&self) -> Option<Span> { 84 self.peek().map(|t| t.span) 85 } 86 fn get_span(&self) -> Span { 87 self.get_span_checked() 88 .unwrap_or_else(|| self.end_span.clone()) 89 } 90 fn err(&self, message: &'static str) -> Error { 91 err_span(message, self.get_span()) 92 } 93 fn err_previous(&self, message: &'static str) -> Error { 94 err_span(message, self.get_previous_span()) 95 } 96 fn name(&mut self) -> Option<Name> { 97 self.just(P::Name).map(|span| Name { 98 kind: self.source[span.clone()].into(), 99 span, 100 }) 101 } 102 fn int(&mut self) -> Option<(u64, Option<Name>)> { 103 let span = self.just(P::Int)?; 104 let mut src_str = &self.source[span.clone()]; 105 let original_len = src_str.len(); 106 let mut int: u64 = 0; 107 while let Some(c) = src_str.chars().next() { 108 match c { 109 '_' => {} 110 '0'..='9' => { 111 let digit = u64::from(c as u8 - b'0'); 112 int = int.wrapping_mul(10).wrapping_add(digit); 113 } 114 _ => break, 115 } 116 src_str = &src_str[c.len_utf8()..]; 117 } 118 let suffix = if src_str.is_empty() { 119 None 120 } else { 121 let kind = src_str.into(); 122 let span = span.start + (original_len - src_str.len())..span.end; 123 Some(Name { kind, span }) 124 }; 125 Some((int, suffix)) 126 } 127 fn spanned<O>(&mut self, f: impl FnOnce(&mut Self) -> Result<O>) -> Result<Spanned<O>> { 128 let span_start = self.get_span().start; 129 f(self).map(|kind| { 130 let span_end = self.get_previous_span().end; 131 let span = span_start..span_end; 132 Spanned { kind, span } 133 }) 134 } 135 fn spanned2<O>(&mut self, f: impl FnOnce(&mut Self) -> Parsed<O>) -> Parsed<Spanned<O>> { 136 self.spanned(f) 137 .map(|Spanned { kind, span }| kind.map(|kind| Spanned { kind, span })) 138 } 139 fn parse_item<O>( 140 mut item_parser: impl FnMut(&mut Self) -> Result<O>, 141 tokens: &'src a::Item, 142 source: &'src str, 143 start_span: Span, 144 end_span: Span, 145 ) -> Result<O> { 146 let mut this = Self { 147 tokens, 148 i: 0, 149 source, 150 start_span, 151 end_span, 152 }; 153 let o = item_parser(&mut this)?; 154 match this.get_span_checked() { 155 Some(span) => Err(Spanned { 156 kind: ErrorKind::Custom("expected end of item"), 157 span, 158 }), 159 None => Ok(o), 160 } 161 } 162 fn item<O>( 163 &self, 164 item_parser: impl FnMut(&mut Self) -> Result<O>, 165 item: &'src a::Item, 166 start_span: Span, 167 end_span: Span, 168 ) -> Result<O> { 169 Self::parse_item(item_parser, item, self.source, start_span, end_span) 170 } 171 fn parse_block<O>( 172 mut item_parser: impl FnMut(&mut Self) -> Result<O>, 173 block: &'src a::Block, 174 source: &'src str, 175 start_span: Span, 176 end_span: Span, 177 ) -> Result<Vec<O>> { 178 block 179 .iter() 180 .map(|tokens| { 181 let mut this = Self { 182 tokens, 183 i: 0, 184 source, 185 start_span: start_span.clone(), 186 end_span: end_span.clone(), 187 }; 188 let o = item_parser(&mut this)?; 189 match this.get_span_checked() { 190 Some(span) => Err(Spanned { 191 kind: ErrorKind::Custom("expected end of item"), 192 span, 193 }), 194 None => Ok(o), 195 } 196 }) 197 .collect() 198 } 199 // These lifetimes are a little too strict, but it doesn't matter at all. 200 fn block<O>( 201 &self, 202 item_parser: impl FnMut(&mut Self) -> Result<O>, 203 block: &'src a::Block, 204 start_span: Span, 205 end_span: Span, 206 ) -> Result<Vec<O>> { 207 Self::parse_block(item_parser, block, self.source, start_span, end_span) 208 } 209 #[allow(clippy::unnecessary_wraps)] 210 fn ty(&mut self) -> Parsed<Ty> { 211 if let Some(fn_span) = self.just(P::Fn) { 212 let Some(Spanned { 213 kind: Tt::Paren(params, _), 214 span, 215 }) = self.peek() 216 else { 217 return Err(self.err("expected function type parameters")); 218 }; 219 self.next().unwrap(); 220 let params = self.block( 221 |this| { 222 this.ty() 223 .transpose() 224 .unwrap_or_else(|| Err(this.err("expected parameter type"))) 225 }, 226 params, 227 span.start..span.start, 228 span.end..span.end, 229 )?; 230 let returns = self.ty()?.map(Box::new); 231 return Ok(Some(Spanned { 232 kind: TyKind::Function(params, returns), 233 span: fn_span.start..self.get_previous_span().end, 234 })); 235 } 236 let Some(name) = self.name() else { 237 return Ok(None); 238 }; 239 let mut ty = Ty { 240 span: name.span.clone(), 241 kind: TyKind::Name(name), 242 }; 243 while let Some(deref_span) = self.just(P::Hat) { 244 let span = ty.span.start..deref_span.end; 245 ty = Ty { 246 kind: TyKind::Pointer(Box::new(ty)), 247 span, 248 }; 249 } 250 Ok(Some(ty)) 251 } 252 fn param(&mut self) -> Result<(Name, Ty)> { 253 let Some(name) = self.name() else { 254 return Err(self.err("expected function parameter")); 255 }; 256 let ty = self 257 .ty() 258 .transpose() 259 .unwrap_or_else(|| Err(self.err("expected function parameter type")))?; 260 Ok((name, ty)) 261 } 262 fn expr(&mut self) -> Result<Expr> { 263 self.expr_at(Level::Min) 264 } 265 fn expr_at(&mut self, level: Level) -> Result<Expr> { 266 const BIN_OPS: &[(P, Level, BinOpKind)] = &[ 267 (P::LessThanEqual, Level::Equal, BinOpKind::CmpLe), 268 (P::Plus, Level::Add, BinOpKind::Add), 269 (P::Minus, Level::Add, BinOpKind::Sub), 270 (P::Asterisk, Level::Mul, BinOpKind::Mul), 271 ]; 272 const UNARY_OPS: &[(P, UnaryOpKind)] = &[(P::Minus, UnaryOpKind::Neg)]; 273 let span_start = self.get_span().start; 274 let atom = if let Some(Spanned { 275 kind: Tt::Paren(block, multi), 276 span, 277 }) = self.peek() 278 { 279 if *multi { 280 return Err(self.err("tuples are not yet implemented")); 281 } 282 assert_eq!(block.len(), 1); 283 self.next().unwrap(); 284 let start_span = span.start + 1..span.start + 1; 285 let end_span = span.end - 1..span.end - 1; 286 let expr = self.item(Self::expr, &block[0], start_span, end_span)?; 287 ExprKind::Paren(Box::new(expr)) 288 } else if let Some(Spanned { 289 kind: Tt::IndentedBlock(_), 290 .. 291 }) = self.peek() 292 { 293 let block = self.colon_block()?; 294 ExprKind::Block(block) 295 } else if let Some(name) = self.name() { 296 ExprKind::Place(PlaceKind::Var(name)) 297 } else if let Some((int, suffix)) = self.int() { 298 ExprKind::Int(int, suffix) 299 } else if self.just(P::If).is_some() { 300 let condition = self.expr()?; 301 let then_block = self.colon_block()?; 302 let else_block = self.else_block()?; 303 ExprKind::If(Box::new(condition), then_block, else_block) 304 } else if self.just(P::While).is_some() { 305 let condition = self.expr()?; 306 let body = self.colon_block()?; 307 if matches!( 308 self.peek(), 309 Some(Spanned { 310 kind: Tt::ElseBlock(_), 311 .. 312 }) 313 ) { 314 return Err(self.err("else blocks for while loops not yet implemented")); 315 } 316 ExprKind::While(Box::new(condition), body) 317 } else { 318 let b = 'b: { 319 let Some(Spanned { 320 kind: &Tt::Plain(token), 321 span, 322 }) = self.next() 323 else { 324 break 'b None; 325 }; 326 let Some(kind) = UNARY_OPS 327 .iter() 328 .find_map(|&(t, k)| (t == token).then_some(k)) 329 else { 330 break 'b None; 331 }; 332 if level > Level::Prefix { 333 // We will surely never hit this error path. We never call .expr() with a precedence level higher than Prefix. 334 return Err(self.err("this prefix operator's precedence is too low")); 335 } 336 let rhs = self.expr_at(Level::Prefix)?; 337 Some(ExprKind::UnaryOp(UnaryOp { kind, span }, Box::new(rhs))) 338 }; 339 b.ok_or_else(|| self.err_previous("unexpected token while parsing expression"))? 340 }; 341 let mut e = Expr { 342 kind: atom, 343 span: span_start..self.get_previous_span().end, 344 }; 345 // NOTE: These postfix operators have the highest precedence, so we skip the precedence level check and do this in a separate loop. 346 loop { 347 let span: Span; 348 let kind: ExprKind; 349 if let Some(deref_span) = self.just(P::Hat) { 350 span = e.span.start..deref_span.end; 351 kind = ExprKind::Place(PlaceKind::Deref(Box::new(e), deref_span)); 352 } else if let Some(ref_span) = self.just(P::At) { 353 span = e.span.start..ref_span.end; 354 let op = UnaryOp { 355 kind: UnaryOpKind::Ref, 356 span: ref_span, 357 }; 358 kind = ExprKind::UnaryOp(op, Box::new(e)); 359 } else if self.just(P::Dot).is_some() { 360 let field = self 361 .name() 362 .ok_or_else(|| self.err("expected field or method name after dot"))?; 363 if matches!( 364 self.peek(), 365 Some(Spanned { 366 kind: Tt::Paren(..), 367 span: _ 368 }) 369 ) { 370 return Err(self.err("not yet implemented: method calls")); 371 } 372 span = e.span.start..field.span.end; 373 kind = ExprKind::Place(PlaceKind::Field(Box::new(e), field)); 374 } else if self.just(P::As).is_some() { 375 let Some(ty) = self.ty()? else { 376 return Err(self.err_previous("expected type after `as`")); 377 }; 378 span = e.span.start..ty.span.end; 379 kind = ExprKind::As(Box::new(e), ty); 380 } else if let Some(Spanned { 381 kind: Tt::Paren(args, _), 382 span: args_span, 383 }) = self.peek() 384 { 385 self.next().unwrap(); 386 let args = self.block( 387 Self::expr, 388 args, 389 args_span.start + 1..args_span.start + 1, 390 args_span.end - 1..args_span.end - 1, 391 )?; 392 span = e.span.start..args_span.end; 393 kind = ExprKind::Call(Box::new(e), args); 394 } else if let Some(Spanned { 395 kind: Tt::Square(block, multi), 396 span: index_span, 397 }) = self.peek() 398 { 399 if *multi { 400 return Err(self.err("indexing cannot have multiple arguments")); 401 } 402 assert_eq!(block.len(), 1); 403 self.next().unwrap(); 404 let start_span = index_span.start + 1..index_span.start + 1; 405 let end_span = index_span.end - 1..index_span.end - 1; 406 let expr = self.item(Self::expr, &block[0], start_span, end_span)?; 407 span = e.span.start..index_span.end; 408 kind = ExprKind::Place(PlaceKind::Index(Box::new(e), Box::new(expr), index_span)); 409 } else { 410 break; 411 } 412 e = Expr { kind, span }; 413 } 414 loop { 415 let Some(Spanned { 416 kind: Tt::Plain(kind), 417 span, 418 }) = self.peek() 419 else { 420 break; 421 }; 422 let Some(&(_, op_level, op_kind)) = BIN_OPS.iter().find(|(op, _, _)| kind == op) else { 423 break; 424 }; 425 if op_level < level { 426 break; 427 } 428 self.next().unwrap(); 429 let rhs = self.expr_at(op_level.higher())?; 430 let op = BinOp { 431 kind: op_kind, 432 span, 433 }; 434 let span = e.span.start..rhs.span.end; 435 e = Expr { 436 kind: ExprKind::BinOp(op, Box::new(e), Box::new(rhs)), 437 span, 438 }; 439 } 440 // assignment 441 if level <= Level::Min && self.just(P::Equals).is_some() { 442 let rhs = self.expr()?; 443 match e.kind { 444 ExprKind::Place(p) => { 445 let lhs = Place { 446 kind: p, 447 span: e.span, 448 }; 449 let span = lhs.span.start..rhs.span.end; 450 e = Expr { 451 kind: ExprKind::Assign(lhs, Box::new(rhs)), 452 span, 453 }; 454 } 455 _ => return Err(err_span("cannot assign to this kind of expression", e.span)), 456 } 457 } 458 Ok(e) 459 } 460 fn stmt(&mut self) -> Result<Stmt> { 461 self.spanned(Self::stmt_kind) 462 } 463 fn stmt_kind(&mut self) -> Result<StmtKind> { 464 if self.just(P::Let).is_some() { 465 let name = self 466 .name() 467 .ok_or_else(|| self.err("expected name for let statement"))?; 468 let ty = self.ty()?; 469 if self.just(P::Equals).is_none() { 470 return Err(self.err("expected `=` for let statement")); 471 } 472 let body = self.expr()?; 473 Ok(StmtKind::Let(name, ty, body)) 474 } else { 475 self.expr().map(StmtKind::Expr) 476 } 477 } 478 fn colon_block(&mut self) -> Result<Block> { 479 let Some(Spanned { 480 kind: Tt::IndentedBlock(stmts), 481 span, 482 }) = self.peek() 483 else { 484 return Err(self.err("expected beginning of block")); 485 }; 486 self.next().unwrap(); 487 let start_span = span.start + 1..span.start + 1; 488 let end_span = span.end..span.end; 489 self.block(Self::stmt, stmts, start_span, end_span) 490 .map(Block) 491 } 492 fn else_block(&mut self) -> Parsed<Block> { 493 let Some(Spanned { 494 kind: Tt::ElseBlock(stmts), 495 span, 496 }) = self.peek() 497 else { 498 return Ok(None); 499 }; 500 self.next().unwrap(); 501 let start_span = span.start + 4..span.start + 4; 502 let end_span = span.end..span.end; 503 self.block(Self::stmt, stmts, start_span, end_span) 504 .map(|x| Some(Block(x))) 505 } 506 fn decl(&mut self) -> Parsed<Decl> { 507 self.spanned2(Self::decl_kind) 508 } 509 #[allow(clippy::single_match_else)] 510 fn decl_kind(&mut self) -> Parsed<DeclKind> { 511 let Some(Spanned { kind, .. }) = self.next() else { 512 return Ok(None); 513 }; 514 let decl = match kind { 515 Tt::Plain(P::Fn) => { 516 let name = self 517 .name() 518 .ok_or_else(|| self.err("expected function name"))?; 519 // use .peek() so we get the correct span for the error 520 let Some(Spanned { 521 kind: Tt::Paren(params, _), 522 span, 523 }) = self.peek() 524 else { 525 return Err(self.err("expected paren list of function parameters")); 526 }; 527 self.next().unwrap(); 528 let start_span = span.start + 1..span.start + 1; 529 let end_span = span.end - 1..span.end - 1; 530 let parameters = self.block(Self::param, params, start_span, end_span)?; 531 let returns = self.ty()?; 532 let body = self.colon_block()?; 533 DeclKind::Function(Function { 534 name, 535 parameters, 536 returns, 537 body, 538 }) 539 } 540 Tt::Plain(P::Extern) => { 541 self.just(P::Fn) 542 .ok_or_else(|| self.err("expected fn keyword after extern"))?; 543 let name = self 544 .name() 545 .ok_or_else(|| self.err("expected extern function name"))?; 546 let Some(Spanned { 547 kind: Tt::Paren(params, _), 548 span, 549 }) = self.peek() 550 else { 551 return Err(self.err("expected paren list of extern function parameters")); 552 }; 553 self.next().unwrap(); 554 let start_span = span.start + 1..span.start + 1; 555 let end_span = span.end - 1..span.end - 1; 556 let parameters = self.block( 557 |this| { 558 this.ty().and_then(|x| { 559 x.ok_or_else(|| this.err("expected extern function parameter type")) 560 }) 561 }, 562 params, 563 start_span, 564 end_span, 565 )?; 566 let returns = self.ty()?; 567 DeclKind::ExternFunction(ExternFunction { 568 name, 569 parameters, 570 returns, 571 }) 572 } 573 Tt::Plain(P::Struct) => { 574 let name = self 575 .name() 576 .ok_or_else(|| self.err("expected struct name"))?; 577 578 let Some(Spanned { 579 kind: Tt::IndentedBlock(struct_body), 580 span, 581 }) = self.peek() 582 else { 583 return Err(self.err("expected beginning of struct block")); 584 }; 585 self.next().unwrap(); 586 let start_span = span.start + 1..span.start + 1; 587 let end_span = span.end..span.end; 588 let fields = self.block(Self::param, struct_body, start_span, end_span)?; 589 DeclKind::Struct(Struct { name, fields }) 590 } 591 _ => { 592 self.i -= 1; // Hacky, I know. 593 return Ok(None); 594 } 595 }; 596 Ok(Some(decl)) 597 } 598 fn parse_program(block: &'src a::Block, source: &'src str) -> Result<Program> { 599 let start_span = 0..0; 600 let end_span = source.len()..source.len(); 601 Self::parse_block( 602 |this| { 603 this.decl().transpose().unwrap_or_else(|| { 604 Err(this.err("unexpected token while parsing top level declaration")) 605 }) 606 }, 607 block, 608 source, 609 start_span, 610 end_span, 611 ) 612 .map(|decls| Program { decls }) 613 } 614 } 615 616 pub fn parse(block: &a::Block, source: &str) -> Result<Program> { 617 Parser::parse_program(block, source) 618 }