lib.rs
  1  // Copyright (C) 2019-2025 ADnet Contributors
  2  // This file is part of the ADL library.
  3  
  4  // The ADL library is free software: you can redistribute it and/or modify
  5  // it under the terms of the GNU General Public License as published by
  6  // the Free Software Foundation, either version 3 of the License, or
  7  // (at your option) any later version.
  8  
  9  // The ADL library is distributed in the hope that it will be useful,
 10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12  // GNU General Public License for more details.
 13  
 14  // You should have received a copy of the GNU General Public License
 15  // along with the ADL library. If not, see <https://www.gnu.org/licenses/>.
 16  
 17  //! The lossless syntax tree and parser for Leo.
 18  
 19  use adl_errors::{AdlError, Handler, ParserError, Result};
 20  use adl_span::Span;
 21  use itertools::Itertools as _;
 22  
 23  // Comment me when running `cargo publish`.
 24  use lalrpop_util::lalrpop_mod;
 25  lalrpop_mod!(pub grammar);
 26  // Uncomment me when running `cargo publish` and be sure to generate and copy `grammar.rs` from `target/` into `src/`.
 27  //pub mod grammar;
 28  
 29  pub mod tokens;
 30  
 31  use tokens::*;
 32  
 33  /// A tag indicating the nature of a syntax node.
 34  #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 35  pub enum SyntaxKind {
 36      Whitespace,
 37      Linebreak,
 38      CommentLine,
 39      CommentBlock,
 40  
 41      Expression(ExpressionKind),
 42      StructMemberInitializer,
 43  
 44      Statement(StatementKind),
 45      Type(TypeKind),
 46      Token,
 47  
 48      Annotation,
 49      AnnotationMember,
 50      AnnotationList,
 51  
 52      Parameter,
 53      ParameterList,
 54      FunctionOutput,
 55      FunctionOutputs,
 56      Function,
 57      Constructor,
 58  
 59      ConstParameter,
 60      ConstParameterList,
 61      ConstArgumentList,
 62  
 63      StructDeclaration,
 64      StructMemberDeclaration,
 65      StructMemberDeclarationList,
 66  
 67      Mapping,
 68      Storage,
 69  
 70      GlobalConst,
 71  
 72      Import,
 73      MainContents,
 74      ModuleContents,
 75      ProgramDeclaration,
 76  }
 77  
 78  #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 79  pub enum IntegerLiteralKind {
 80      U8,
 81      U16,
 82      U32,
 83      U64,
 84      U128,
 85  
 86      I8,
 87      I16,
 88      I32,
 89      I64,
 90      I128,
 91  }
 92  
 93  #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 94  pub enum IntegerTypeKind {
 95      U8,
 96      U16,
 97      U32,
 98      U64,
 99      U128,
100  
101      I8,
102      I16,
103      I32,
104      I64,
105      I128,
106  }
107  
108  #[derive(Clone, Copy, Debug, PartialEq, Eq)]
109  pub enum TypeKind {
110      Address,
111      Array,
112      Boolean,
113      Composite,
114      Field,
115      Future,
116      Group,
117      Identifier,
118      Integer(IntegerTypeKind),
119      Mapping,
120      Optional,
121      Scalar,
122      Signature,
123      String,
124      Tuple,
125      Vector,
126      Numeric,
127      Unit,
128  }
129  
130  impl From<TypeKind> for SyntaxKind {
131      fn from(value: TypeKind) -> Self {
132          SyntaxKind::Type(value)
133      }
134  }
135  
136  impl From<IntegerTypeKind> for TypeKind {
137      fn from(value: IntegerTypeKind) -> Self {
138          TypeKind::Integer(value)
139      }
140  }
141  
142  impl From<IntegerTypeKind> for SyntaxKind {
143      fn from(value: IntegerTypeKind) -> Self {
144          SyntaxKind::Type(TypeKind::Integer(value))
145      }
146  }
147  
148  #[derive(Clone, Copy, Debug, PartialEq, Eq)]
149  pub enum ExpressionKind {
150      ArrayAccess,
151      AssociatedConstant,
152      AssociatedFunctionCall,
153      Async,
154      Array,
155      Binary,
156      Call,
157      Cast,
158      Path,
159      Literal(LiteralKind),
160      Locator,
161      MemberAccess,
162      MethodCall,
163      Parenthesized,
164      Repeat,
165      Intrinsic,
166      SpecialAccess, // TODO: fold into Intrinsic
167      Struct,
168      Ternary,
169      Tuple,
170      TupleAccess,
171      Unary,
172      Unit,
173  }
174  
175  #[derive(Clone, Copy, Debug, PartialEq, Eq)]
176  pub enum LiteralKind {
177      Address,
178      Boolean,
179      Field,
180      Group,
181      Integer(IntegerLiteralKind),
182      None,
183      Scalar,
184      Unsuffixed,
185      String,
186  }
187  
188  impl From<ExpressionKind> for SyntaxKind {
189      fn from(value: ExpressionKind) -> Self {
190          SyntaxKind::Expression(value)
191      }
192  }
193  
194  impl From<LiteralKind> for ExpressionKind {
195      fn from(value: LiteralKind) -> Self {
196          ExpressionKind::Literal(value)
197      }
198  }
199  
200  impl From<LiteralKind> for SyntaxKind {
201      fn from(value: LiteralKind) -> Self {
202          SyntaxKind::Expression(ExpressionKind::Literal(value))
203      }
204  }
205  
206  impl From<IntegerLiteralKind> for LiteralKind {
207      fn from(value: IntegerLiteralKind) -> Self {
208          LiteralKind::Integer(value)
209      }
210  }
211  
212  impl From<IntegerLiteralKind> for ExpressionKind {
213      fn from(value: IntegerLiteralKind) -> Self {
214          ExpressionKind::Literal(LiteralKind::Integer(value))
215      }
216  }
217  
218  impl From<IntegerLiteralKind> for SyntaxKind {
219      fn from(value: IntegerLiteralKind) -> Self {
220          SyntaxKind::Expression(ExpressionKind::Literal(LiteralKind::Integer(value)))
221      }
222  }
223  
224  #[derive(Clone, Copy, Debug, PartialEq, Eq)]
225  pub enum StatementKind {
226      Assert,
227      AssertEq,
228      AssertNeq,
229      Assign,
230      Block,
231      Conditional,
232      Const,
233      Definition,
234      Expression,
235      Iteration,
236      Return,
237  }
238  
239  impl From<StatementKind> for SyntaxKind {
240      fn from(value: StatementKind) -> Self {
241          SyntaxKind::Statement(value)
242      }
243  }
244  
245  /// An untyped node in the lossless syntax tree.
246  #[derive(Debug, Clone)]
247  pub struct SyntaxNode<'a> {
248      /// A tag indicating the nature of the node.
249      pub kind: SyntaxKind,
250      /// The text from the source if applicable.
251      pub text: &'a str,
252      pub span: adl_span::Span,
253      pub children: Vec<SyntaxNode<'a>>,
254  }
255  
256  impl<'a> SyntaxNode<'a> {
257      fn new_token(kind: SyntaxKind, token: LalrToken<'a>, children: Vec<Self>) -> Self {
258          Self { kind, text: token.text, span: token.span, children }
259      }
260  
261      fn new(kind: impl Into<SyntaxKind>, children: impl IntoIterator<Item = Self>) -> Self {
262          let children: Vec<Self> = children.into_iter().collect();
263          let lo = children.first().unwrap().span.lo;
264          let hi = children.last().unwrap().span.hi;
265          let span = adl_span::Span { lo, hi };
266          Self { kind: kind.into(), text: "", span, children }
267      }
268  
269      fn suffixed_literal(integer: LalrToken<'a>, suffix: LalrToken<'a>, children: Vec<Self>) -> Self {
270          let kind: SyntaxKind = match suffix.token {
271              Token::Field => LiteralKind::Field.into(),
272              Token::Group => LiteralKind::Group.into(),
273              Token::Scalar => LiteralKind::Scalar.into(),
274              Token::I8 => IntegerLiteralKind::I8.into(),
275              Token::I16 => IntegerLiteralKind::I16.into(),
276              Token::I32 => IntegerLiteralKind::I32.into(),
277              Token::I64 => IntegerLiteralKind::I64.into(),
278              Token::I128 => IntegerLiteralKind::I128.into(),
279              Token::U8 => IntegerLiteralKind::U8.into(),
280              Token::U16 => IntegerLiteralKind::U16.into(),
281              Token::U32 => IntegerLiteralKind::U32.into(),
282              Token::U64 => IntegerLiteralKind::U64.into(),
283              Token::U128 => IntegerLiteralKind::U128.into(),
284              x => panic!("Error in grammar.lalrpop: {x:?}"),
285          };
286  
287          let lo = integer.span.lo;
288          let hi = suffix.span.hi;
289          let span = adl_span::Span { lo, hi };
290  
291          Self { kind, text: integer.text, span, children }
292      }
293  
294      fn binary_expression(lhs: Self, op: Self, rhs: Self) -> Self {
295          let span = adl_span::Span { lo: lhs.span.lo, hi: rhs.span.hi };
296          let children = vec![lhs, op, rhs];
297          SyntaxNode { kind: ExpressionKind::Binary.into(), text: "", span, children }
298      }
299  }
300  
301  fn two_path_components(text: &str) -> Option<(&str, &str)> {
302      let mut iter = text.split("::");
303  
304      match (iter.next(), iter.next(), iter.next()) {
305          (Some(first), Some(second), _) => Some((first, second)),
306          _ => None,
307      }
308  }
309  
310  pub fn parse_expression<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
311      let parser = grammar::ExprParser::new();
312      parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
313  }
314  
315  pub fn parse_statement<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
316      let parser = grammar::StatementParser::new();
317      parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
318  }
319  
320  pub fn parse_module<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
321      let parser = grammar::ModuleContentsParser::new();
322      parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
323  }
324  
325  pub fn parse_main<'a>(handler: Handler, source: &'a str, start_pos: u32) -> Result<SyntaxNode<'a>> {
326      let parser = grammar::MainContentsParser::new();
327      parse_general(handler.clone(), source, start_pos, |lexer| parser.parse(&handler, lexer))
328  }
329  
330  fn check_identifier(token: &LalrToken<'_>, handler: &Handler) {
331      const MAX_IDENTIFIER_LEN: usize = 31usize;
332      if token.token == Token::IdVariants(IdVariants::Identifier) {
333          if token.text.len() > MAX_IDENTIFIER_LEN {
334              handler.emit_err(adl_errors::ParserError::identifier_too_long(
335                  token.text,
336                  token.text.len(),
337                  MAX_IDENTIFIER_LEN,
338                  token.span,
339              ));
340          }
341          // These are reserved for compiler-generated names.
342          if token.text.contains("__") {
343              handler.emit_err(ParserError::identifier_cannot_contain_double_underscore(token.text, token.span));
344          }
345      }
346  }
347  
348  fn parse_general<'a>(
349      handler: Handler,
350      source: &'a str,
351      start_pos: u32,
352      parse: impl FnOnce(
353          &mut Lexer<'a>,
354      ) -> Result<SyntaxNode<'a>, lalrpop_util::ParseError<usize, LalrToken<'a>, &'static str>>,
355  ) -> Result<SyntaxNode<'a>> {
356      let mut lexer = tokens::Lexer::new(source, start_pos, handler.clone());
357      match parse(&mut lexer) {
358          Ok(val) => {
359              handler.last_err()?;
360              Ok(val)
361          }
362          Err(e) => {
363              if matches!(e, lalrpop_util::ParseError::UnrecognizedEof { .. }) {
364                  // We don't want to redundantly report the EOF error, when the meaningfull
365                  // errors are recorded in the handler.
366                  handler.last_err()?;
367              }
368              Err(convert(e, source, start_pos))
369          }
370      }
371  }
372  
373  // We can't implement From<lalrpop_util::ParseError> since both that
374  // trait and adl_errors::Error are defined in other crates.
375  fn convert(
376      error: lalrpop_util::ParseError<usize, LalrToken<'_>, &'static str>,
377      source: &str,
378      start_pos: u32,
379  ) -> AdlError {
380      match error {
381          lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
382              let expected = expected.iter().flat_map(|s| tokens::Token::str_user(s)).format(", ");
383              ParserError::unexpected(token.1.text, expected, token.1.span).into()
384          }
385          lalrpop_util::ParseError::UnrecognizedEof { location, .. } => {
386              let (lo, hi) = if source.is_empty() {
387                  (start_pos, start_pos)
388              } else if location >= source.len() + start_pos as usize {
389                  // Generally lalrpop reports the `location` for this error as
390                  // one character past the end of the source. So let's
391                  // back up one character.
392                  // Can't just subtract 1 as we may not be on a character boundary.
393                  let lo = source.char_indices().last().unwrap().0 as u32 + start_pos;
394                  (lo, lo + 1)
395              } else {
396                  (location as u32, location as u32 + 1)
397              };
398              ParserError::unexpected_eof(Span { lo, hi }).into()
399          }
400          x => panic!("ERR: {x:?}"),
401      }
402  }