/ console / program / src / data / identifier / parse.rs
parse.rs
  1  // Copyright (c) 2019-2025 Alpha-Delta Network Inc.
  2  // This file is part of the alphavm library.
  3  
  4  // Licensed under the Apache License, Version 2.0 (the "License");
  5  // you may not use this file except in compliance with the License.
  6  // You may obtain a copy of the License at:
  7  
  8  // http://www.apache.org/licenses/LICENSE-2.0
  9  
 10  // Unless required by applicable law or agreed to in writing, software
 11  // distributed under the License is distributed on an "AS IS" BASIS,
 12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  // See the License for the specific language governing permissions and
 14  // limitations under the License.
 15  
 16  use super::*;
 17  
 18  impl<N: Network> Parser for Identifier<N> {
 19      /// Parses a string into an identifier.
 20      ///
 21      /// # Requirements
 22      /// The identifier must be alphanumeric (or underscore).
 23      /// The identifier must not start with a number.
 24      /// The identifier must not be a keyword.
 25      #[inline]
 26      fn parse(string: &str) -> ParserResult<Self> {
 27          // Check for alphanumeric characters and underscores.
 28          map_res(recognize(pair(alpha1, many0(alt((alphanumeric1, tag("_")))))), |identifier: &str| {
 29              Self::from_str(identifier)
 30          })(string)
 31      }
 32  }
 33  
 34  impl<N: Network> FromStr for Identifier<N> {
 35      type Err = Error;
 36  
 37      /// Reads in an identifier from a string.
 38      fn from_str(identifier: &str) -> Result<Self, Self::Err> {
 39          // Ensure the identifier is not an empty string, and starts with an ASCII letter.
 40          match identifier.chars().next() {
 41              Some(character) => ensure!(character.is_ascii_alphabetic(), "Identifier must start with a letter"),
 42              None => bail!("Identifier cannot be empty"),
 43          }
 44  
 45          // Ensure the identifier consists of ASCII letters, ASCII digits, and underscores.
 46          if identifier.chars().any(|character| !character.is_ascii_alphanumeric() && character != '_') {
 47              bail!("Identifier '{identifier}' must consist of letters, digits, and underscores")
 48          }
 49  
 50          // Ensure identifier fits within the data capacity of the base field.
 51          let max_bytes = Field::<N>::size_in_data_bits() / 8; // Note: This intentionally rounds down.
 52          if identifier.len() > max_bytes {
 53              bail!("Identifier is too large. Identifiers must be <= {max_bytes} bytes long")
 54          }
 55  
 56          // Ensure that the identifier is not a literal.
 57          ensure!(
 58              !enum_iterator::all::<crate::LiteralType>().any(|lt| lt.type_name() == identifier),
 59              "Identifier '{identifier}' is a reserved literal type"
 60          );
 61  
 62          // Note: The string bytes themselves are **not** little-endian. Rather, they are order-preserving
 63          // for reconstructing the string when recovering the field element back into bytes.
 64          Ok(Self(
 65              Field::<N>::from_bits_le(&identifier.as_bytes().to_bits_le())?,
 66              u8::try_from(identifier.len()).or_halt_with::<N>("Identifier `from_str` exceeds maximum length"),
 67          ))
 68      }
 69  }
 70  
 71  impl<N: Network> Debug for Identifier<N> {
 72      fn fmt(&self, f: &mut Formatter) -> fmt::Result {
 73          Display::fmt(self, f)
 74      }
 75  }
 76  
 77  impl<N: Network> Display for Identifier<N> {
 78      /// Prints the identifier as a string.
 79      fn fmt(&self, f: &mut Formatter) -> fmt::Result {
 80          // Convert the identifier to bytes.
 81          let bytes = self.0.to_bytes_le().map_err(|_| fmt::Error)?;
 82  
 83          // Parse the bytes as a UTF-8 string.
 84          let string = String::from_utf8(bytes).map_err(|_| fmt::Error)?;
 85  
 86          // Truncate the UTF-8 string at the first instance of '\0'.
 87          match string.split('\0').next() {
 88              // Check that the UTF-8 string matches the expected length.
 89              Some(string) => match string.len() == self.1 as usize {
 90                  // Return the string.
 91                  true => write!(f, "{string}"),
 92                  false => Err(fmt::Error),
 93              },
 94              None => Err(fmt::Error),
 95          }
 96      }
 97  }
 98  
 99  #[cfg(test)]
100  mod tests {
101      use super::*;
102      use crate::data::identifier::tests::{sample_identifier, sample_identifier_as_string};
103      use alphavm_console_network::MainnetV0;
104  
105      type CurrentNetwork = MainnetV0;
106  
107      const ITERATIONS: usize = 100;
108  
109      #[test]
110      fn test_parse() -> Result<()> {
111          // Quick sanity check.
112          let (remainder, candidate) = Identifier::<CurrentNetwork>::parse("foo_bar1")?;
113          assert_eq!("foo_bar1", candidate.to_string());
114          assert_eq!("", remainder);
115  
116          // Must be alphanumeric or underscore.
117          let (remainder, candidate) = Identifier::<CurrentNetwork>::parse("foo_bar~baz")?;
118          assert_eq!("foo_bar", candidate.to_string());
119          assert_eq!("~baz", remainder);
120  
121          // Must be alphanumeric or underscore.
122          let (remainder, candidate) = Identifier::<CurrentNetwork>::parse("foo_bar-baz")?;
123          assert_eq!("foo_bar", candidate.to_string());
124          assert_eq!("-baz", remainder);
125  
126          let mut rng = TestRng::default();
127  
128          // Check random identifiers.
129          for _ in 0..ITERATIONS {
130              // Sample a random fixed-length alphanumeric string, that always starts with an alphabetic character.
131              let expected_string = sample_identifier_as_string::<CurrentNetwork>(&mut rng)?;
132              // Recover the field element from the bits.
133              let expected_field = Field::<CurrentNetwork>::from_bits_le(&expected_string.to_bits_le())?;
134  
135              let (remainder, candidate) = Identifier::<CurrentNetwork>::parse(expected_string.as_str()).unwrap();
136              assert_eq!(expected_string, candidate.to_string());
137              assert_eq!(expected_field, candidate.0);
138              assert_eq!(expected_string.len(), candidate.1 as usize);
139              assert_eq!("", remainder);
140          }
141          Ok(())
142      }
143  
144      #[test]
145      fn test_parse_fails() {
146          // Must not be solely underscores.
147          assert!(Identifier::<CurrentNetwork>::parse("_").is_err());
148          assert!(Identifier::<CurrentNetwork>::parse("__").is_err());
149          assert!(Identifier::<CurrentNetwork>::parse("___").is_err());
150          assert!(Identifier::<CurrentNetwork>::parse("____").is_err());
151  
152          // Must not start with a number.
153          assert!(Identifier::<CurrentNetwork>::parse("1").is_err());
154          assert!(Identifier::<CurrentNetwork>::parse("2").is_err());
155          assert!(Identifier::<CurrentNetwork>::parse("3").is_err());
156          assert!(Identifier::<CurrentNetwork>::parse("1foo").is_err());
157          assert!(Identifier::<CurrentNetwork>::parse("12").is_err());
158          assert!(Identifier::<CurrentNetwork>::parse("111").is_err());
159  
160          // Must fit within the data capacity of a base field element.
161          let identifier =
162              Identifier::<CurrentNetwork>::parse("foo_bar_baz_qux_quux_quuz_corge_grault_garply_waldo_fred_plugh_xyzzy");
163          assert!(identifier.is_err());
164      }
165  
166      #[test]
167      fn test_from_str() -> Result<()> {
168          let candidate = Identifier::<CurrentNetwork>::from_str("foo_bar").unwrap();
169          assert_eq!("foo_bar", candidate.to_string());
170  
171          let mut rng = TestRng::default();
172  
173          for _ in 0..ITERATIONS {
174              // Sample a random fixed-length alphanumeric string, that always starts with an alphabetic character.
175              let expected_string = sample_identifier_as_string::<CurrentNetwork>(&mut rng)?;
176              // Recover the field element from the bits.
177              let expected_field = Field::<CurrentNetwork>::from_bits_le(&expected_string.to_bits_le())?;
178  
179              let candidate = Identifier::<CurrentNetwork>::from_str(&expected_string)?;
180              assert_eq!(expected_string, candidate.to_string());
181              assert_eq!(expected_field, candidate.0);
182              assert_eq!(expected_string.len(), candidate.1 as usize);
183          }
184          Ok(())
185      }
186  
187      #[test]
188      fn test_from_str_fails() {
189          // Must be non-empty.
190          assert!(Identifier::<CurrentNetwork>::from_str("").is_err());
191  
192          // Must be alphanumeric or underscore.
193          assert!(Identifier::<CurrentNetwork>::from_str("foo_bar~baz").is_err());
194          assert!(Identifier::<CurrentNetwork>::from_str("foo_bar-baz").is_err());
195  
196          // Must not be solely underscores.
197          assert!(Identifier::<CurrentNetwork>::from_str("_").is_err());
198          assert!(Identifier::<CurrentNetwork>::from_str("__").is_err());
199          assert!(Identifier::<CurrentNetwork>::from_str("___").is_err());
200          assert!(Identifier::<CurrentNetwork>::from_str("____").is_err());
201  
202          // Must not start with a number.
203          assert!(Identifier::<CurrentNetwork>::from_str("1").is_err());
204          assert!(Identifier::<CurrentNetwork>::from_str("2").is_err());
205          assert!(Identifier::<CurrentNetwork>::from_str("3").is_err());
206          assert!(Identifier::<CurrentNetwork>::from_str("1foo").is_err());
207          assert!(Identifier::<CurrentNetwork>::from_str("12").is_err());
208          assert!(Identifier::<CurrentNetwork>::from_str("111").is_err());
209  
210          // Must not start with underscore.
211          assert!(Identifier::<CurrentNetwork>::from_str("_foo").is_err());
212  
213          // Must be ASCII.
214          assert!(Identifier::<CurrentNetwork>::from_str("\u{03b1}").is_err()); // Greek alpha
215          assert!(Identifier::<CurrentNetwork>::from_str("\u{03b2}").is_err()); // Greek beta
216  
217          // Must fit within the data capacity of a base field element.
218          let identifier = Identifier::<CurrentNetwork>::from_str(
219              "foo_bar_baz_qux_quux_quuz_corge_grault_garply_waldo_fred_plugh_xyzzy",
220          );
221          assert!(identifier.is_err());
222      }
223  
224      #[test]
225      fn test_display() -> Result<()> {
226          let identifier = Identifier::<CurrentNetwork>::from_str("foo_bar")?;
227          assert_eq!("foo_bar", format!("{identifier}"));
228          Ok(())
229      }
230  
231      #[test]
232      fn test_proxy_bits_equivalence() -> Result<()> {
233          let mut rng = TestRng::default();
234          let identifier: Identifier<CurrentNetwork> = sample_identifier(&mut rng)?;
235  
236          // Direct conversion to bytes.
237          let bytes1 = identifier.0.to_bytes_le()?;
238  
239          // Combined conversion via bits.
240          let bits_le = identifier.0.to_bits_le();
241          let bytes2 = bits_le.chunks(8).map(u8::from_bits_le).collect::<Result<Vec<u8>, _>>()?;
242  
243          assert_eq!(bytes1, bytes2);
244  
245          Ok(())
246      }
247  }