lib.rs
1 // Copyright (C) 2019-2025 ADnet Contributors 2 // This file is part of the ADL library. 3 4 // The ADL library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 9 // The ADL library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU General Public License for more details. 13 14 // You should have received a copy of the GNU General Public License 15 // along with the ADL library. If not, see <https://www.gnu.org/licenses/>. 16 17 //! The lossless syntax tree and parser for Leo. 18 19 use adl_errors::{AdlError, Handler, ParserError, Result}; 20 use adl_span::Span; 21 use itertools::Itertools as _; 22 23 // Comment me when running `cargo publish`. 24 use lalrpop_util::lalrpop_mod; 25 lalrpop_mod!(pub grammar); 26 // Uncomment me when running `cargo publish` and be sure to generate and copy `grammar.rs` from `target/` into `src/`. 27 //pub mod grammar; 28 29 pub mod tokens; 30 31 use tokens::*; 32 33 /// A tag indicating the nature of a syntax node. 34 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 35 pub enum SyntaxKind { 36 Whitespace, 37 Linebreak, 38 CommentLine, 39 CommentBlock, 40 41 Expression(ExpressionKind), 42 StructMemberInitializer, 43 44 Statement(StatementKind), 45 Type(TypeKind), 46 Token, 47 48 Annotation, 49 AnnotationMember, 50 AnnotationList, 51 52 Parameter, 53 ParameterList, 54 FunctionOutput, 55 FunctionOutputs, 56 Function, 57 Constructor, 58 59 ConstParameter, 60 ConstParameterList, 61 ConstArgumentList, 62 63 StructDeclaration, 64 StructMemberDeclaration, 65 StructMemberDeclarationList, 66 67 Mapping, 68 Storage, 69 70 GlobalConst, 71 72 Import, 73 MainContents, 74 ModuleContents, 75 ProgramDeclaration, 76 } 77 78 #[derive(Copy, Clone, Debug, PartialEq, Eq)] 79 pub enum IntegerLiteralKind { 80 U8, 81 U16, 82 U32, 83 U64, 84 U128, 85 86 I8, 87 I16, 88 I32, 89 I64, 90 I128, 91 } 92 93 #[derive(Copy, Clone, Debug, PartialEq, Eq)] 94 pub enum IntegerTypeKind { 95 U8, 96 U16, 97 U32, 98 U64, 99 U128, 100 101 I8, 102 I16, 103 I32, 104 I64, 105 I128, 106 } 107 108 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 109 pub enum TypeKind { 110 Address, 111 Array, 112 Boolean, 113 Composite, 114 Field, 115 Future, 116 Group, 117 Identifier, 118 Integer(IntegerTypeKind), 119 Mapping, 120 Optional, 121 Scalar, 122 Signature, 123 String, 124 Tuple, 125 Vector, 126 Numeric, 127 Unit, 128 } 129 130 impl From<TypeKind> for SyntaxKind { 131 fn from(value: TypeKind) -> Self { 132 SyntaxKind::Type(value) 133 } 134 } 135 136 impl From<IntegerTypeKind> for TypeKind { 137 fn from(value: IntegerTypeKind) -> Self { 138 TypeKind::Integer(value) 139 } 140 } 141 142 impl From<IntegerTypeKind> for SyntaxKind { 143 fn from(value: IntegerTypeKind) -> Self { 144 SyntaxKind::Type(TypeKind::Integer(value)) 145 } 146 } 147 148 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 149 pub enum ExpressionKind { 150 ArrayAccess, 151 AssociatedConstant, 152 AssociatedFunctionCall, 153 Async, 154 Array, 155 Binary, 156 Call, 157 Cast, 158 Path, 159 Literal(LiteralKind), 160 Locator, 161 MemberAccess, 162 MethodCall, 163 Parenthesized, 164 Repeat, 165 Intrinsic, 166 SpecialAccess, // TODO: fold into Intrinsic 167 Struct, 168 Ternary, 169 Tuple, 170 TupleAccess, 171 Unary, 172 Unit, 173 } 174 175 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 176 pub enum LiteralKind { 177 Address, 178 Boolean, 179 Field, 180 Group, 181 Integer(IntegerLiteralKind), 182 None, 183 Scalar, 184 Unsuffixed, 185 String, 186 } 187 188 impl From<ExpressionKind> for SyntaxKind { 189 fn from(value: ExpressionKind) -> Self { 190 SyntaxKind::Expression(value) 191 } 192 } 193 194 impl From<LiteralKind> for ExpressionKind { 195 fn from(value: LiteralKind) -> Self { 196 ExpressionKind::Literal(value) 197 } 198 } 199 200 impl From<LiteralKind> for SyntaxKind { 201 fn from(value: LiteralKind) -> Self { 202 SyntaxKind::Expression(ExpressionKind::Literal(value)) 203 } 204 } 205 206 impl From<IntegerLiteralKind> for LiteralKind { 207 fn from(value: IntegerLiteralKind) -> Self { 208 LiteralKind::Integer(value) 209 } 210 } 211 212 impl From<IntegerLiteralKind> for ExpressionKind { 213 fn from(value: IntegerLiteralKind) -> Self { 214 ExpressionKind::Literal(LiteralKind::Integer(value)) 215 } 216 } 217 218 impl From<IntegerLiteralKind> for SyntaxKind { 219 fn from(value: IntegerLiteralKind) -> Self { 220 SyntaxKind::Expression(ExpressionKind::Literal(LiteralKind::Integer(value))) 221 } 222 } 223 224 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 225 pub enum StatementKind { 226 Assert, 227 AssertEq, 228 AssertNeq, 229 Assign, 230 Block, 231 Conditional, 232 Const, 233 Definition, 234 Expression, 235 Iteration, 236 Return, 237 } 238 239 impl From<StatementKind> for SyntaxKind { 240 fn from(value: StatementKind) -> Self { 241 SyntaxKind::Statement(value) 242 } 243 } 244 245 /// An untyped node in the lossless syntax tree. 246 #[derive(Debug, Clone)] 247 pub struct SyntaxNode<'a> { 248 /// A tag indicating the nature of the node. 249 pub kind: SyntaxKind, 250 /// The text from the source if applicable. 251 pub text: &'a str, 252 pub span: adl_span::Span, 253 pub children: Vec<SyntaxNode<'a>>, 254 } 255 256 impl<'a> SyntaxNode<'a> { 257 fn new_token(kind: SyntaxKind, token: LalrToken<'a>, children: Vec<Self>) -> Self { 258 Self { kind, text: token.text, span: token.span, children } 259 } 260 261 fn new(kind: impl Into<SyntaxKind>, children: impl IntoIterator<Item = Self>) -> Self { 262 let children: Vec<Self> = children.into_iter().collect(); 263 let lo = children.first().unwrap().span.lo; 264 let hi = children.last().unwrap().span.hi; 265 let span = adl_span::Span { lo, hi }; 266 Self { kind: kind.into(), text: "", span, children } 267 } 268 269 fn suffixed_literal(integer: LalrToken<'a>, suffix: LalrToken<'a>, children: Vec<Self>) -> Self { 270 let kind: SyntaxKind = match suffix.token { 271 Token::Field => LiteralKind::Field.into(), 272 Token::Group => LiteralKind::Group.into(), 273 Token::Scalar => LiteralKind::Scalar.into(), 274 Token::I8 => IntegerLiteralKind::I8.into(), 275 Token::I16 => IntegerLiteralKind::I16.into(), 276 Token::I32 => IntegerLiteralKind::I32.into(), 277 Token::I64 => IntegerLiteralKind::I64.into(), 278 Token::I128 => IntegerLiteralKind::I128.into(), 279 Token::U8 => IntegerLiteralKind::U8.into(), 280 Token::U16 => IntegerLiteralKind::U16.into(), 281 Token::U32 => IntegerLiteralKind::U32.into(), 282 Token::U64 => IntegerLiteralKind::U64.into(), 283 Token::U128 => IntegerLiteralKind::U128.into(), 284 x => panic!("Error in grammar.lalrpop: {x:?}"), 285 }; 286 287 let lo = integer.span.lo; 288 let hi = suffix.span.hi; 289 let span = adl_span::Span { lo, hi }; 290 291 Self { kind, text: integer.text, span, children } 292 } 293 294 fn binary_expression(lhs: Self, op: Self, rhs: Self) -> Self { 295 let span = adl_span::Span { lo: lhs.span.lo, hi: rhs.span.hi }; 296 let children = vec![lhs, op, rhs]; 297 SyntaxNode { kind: ExpressionKind::Binary.into(), text: "", span, children } 298 } 299 } 300 301 fn two_path_components(text: &str) -> Option<(&str, &str)> { 302 let mut iter = text.split("::"); 303 304 match (iter.next(), iter.next(), iter.next()) { 305 (Some(first), Some(second), _) => Some((first, second)), 306 _ => None, 307 } 308 } 309 310 pub fn parse_expression<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> { 311 let parser = grammar::ExprParser::new(); 312 parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer)) 313 } 314 315 pub fn parse_statement<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> { 316 let parser = grammar::StatementParser::new(); 317 parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer)) 318 } 319 320 pub fn parse_module<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> { 321 let parser = grammar::ModuleContentsParser::new(); 322 parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer)) 323 } 324 325 pub fn parse_main<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> { 326 let parser = grammar::MainContentsParser::new(); 327 parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer)) 328 } 329 330 fn check_identifier(token: &LalrToken<'_>, handler: &Handler) { 331 const MAX_IDENTIFIER_LEN: usize = 31usize; 332 if token.token == Token::IdVariants(IdVariants::Identifier) { 333 if token.text.len() > MAX_IDENTIFIER_LEN { 334 handler.emit_err(adl_errors::ParserError::identifier_too_long( 335 token.text, 336 token.text.len(), 337 MAX_IDENTIFIER_LEN, 338 token.span, 339 )); 340 } 341 // These are reserved for compiler-generated names. 342 if token.text.contains("__") { 343 handler.emit_err(ParserError::identifier_cannot_contain_double_underscore(token.text, token.span)); 344 } 345 } 346 } 347 348 fn parse_general<'a>( 349 handler: Handler, 350 source: &'a str, 351 start_pos: u32, 352 parse: impl FnOnce( 353 &mut Lexer<'a>, 354 ) -> Result<SyntaxNode<'a>, lalrpop_util::ParseError<usize, LalrToken<'a>, &'static str>>, 355 ) -> Result<SyntaxNode<'a>> { 356 let mut lexer = tokens::Lexer::new(source, start_pos, handler.clone()); 357 match parse(&mut lexer) { 358 Ok(val) => { 359 handler.last_err()?; 360 Ok(val) 361 } 362 Err(e) => { 363 if matches!(e, lalrpop_util::ParseError::UnrecognizedEof { .. }) { 364 // We don't want to redundantly report the EOF error, when the meaningfull 365 // errors are recorded in the handler. 366 handler.last_err()?; 367 } 368 Err(convert(e, source, start_pos)) 369 } 370 } 371 } 372 373 // We can't implement From<lalrpop_util::ParseError> since both that 374 // trait and adl_errors::Error are defined in other crates. 375 fn convert( 376 error: lalrpop_util::ParseError<usize, LalrToken<'_>, &'static str>, 377 source: &str, 378 start_pos: u32, 379 ) -> AdlError { 380 match error { 381 lalrpop_util::ParseError::UnrecognizedToken { token, expected } => { 382 let expected = expected.iter().flat_map(|s| tokens::Token::str_user(s)).format(", "); 383 ParserError::unexpected(token.1.text, expected, token.1.span).into() 384 } 385 lalrpop_util::ParseError::UnrecognizedEof { location, .. } => { 386 let (lo, hi) = if source.is_empty() { 387 (start_pos, start_pos) 388 } else if location >= source.len() + start_pos as usize { 389 // Generally lalrpop reports the `location` for this error as 390 // one character past the end of the source. So let's 391 // back up one character. 392 // Can't just subtract 1 as we may not be on a character boundary. 393 let lo = source.char_indices().last().unwrap().0 as u32 + start_pos; 394 (lo, lo + 1) 395 } else { 396 (location as u32, location as u32 + 1) 397 }; 398 ParserError::unexpected_eof(Span { lo, hi }).into() 399 } 400 x => panic!("ERR: {x:?}"), 401 } 402 }