parse.rs
1 // Copyright (c) 2019-2025 Alpha-Delta Network Inc. 2 // This file is part of the alphavm library. 3 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at: 7 8 // http://www.apache.org/licenses/LICENSE-2.0 9 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 use super::*; 17 18 impl<N: Network> Parser for Identifier<N> { 19 /// Parses a string into an identifier. 20 /// 21 /// # Requirements 22 /// The identifier must be alphanumeric (or underscore). 23 /// The identifier must not start with a number. 24 /// The identifier must not be a keyword. 25 #[inline] 26 fn parse(string: &str) -> ParserResult<Self> { 27 // Check for alphanumeric characters and underscores. 28 map_res(recognize(pair(alpha1, many0(alt((alphanumeric1, tag("_")))))), |identifier: &str| { 29 Self::from_str(identifier) 30 })(string) 31 } 32 } 33 34 impl<N: Network> FromStr for Identifier<N> { 35 type Err = Error; 36 37 /// Reads in an identifier from a string. 38 fn from_str(identifier: &str) -> Result<Self, Self::Err> { 39 // Ensure the identifier is not an empty string, and starts with an ASCII letter. 40 match identifier.chars().next() { 41 Some(character) => ensure!(character.is_ascii_alphabetic(), "Identifier must start with a letter"), 42 None => bail!("Identifier cannot be empty"), 43 } 44 45 // Ensure the identifier consists of ASCII letters, ASCII digits, and underscores. 46 if identifier.chars().any(|character| !character.is_ascii_alphanumeric() && character != '_') { 47 bail!("Identifier '{identifier}' must consist of letters, digits, and underscores") 48 } 49 50 // Ensure identifier fits within the data capacity of the base field. 51 let max_bytes = Field::<N>::size_in_data_bits() / 8; // Note: This intentionally rounds down. 52 if identifier.len() > max_bytes { 53 bail!("Identifier is too large. Identifiers must be <= {max_bytes} bytes long") 54 } 55 56 // Ensure that the identifier is not a literal. 57 ensure!( 58 !enum_iterator::all::<crate::LiteralType>().any(|lt| lt.type_name() == identifier), 59 "Identifier '{identifier}' is a reserved literal type" 60 ); 61 62 // Note: The string bytes themselves are **not** little-endian. Rather, they are order-preserving 63 // for reconstructing the string when recovering the field element back into bytes. 64 Ok(Self( 65 Field::<N>::from_bits_le(&identifier.as_bytes().to_bits_le())?, 66 u8::try_from(identifier.len()).or_halt_with::<N>("Identifier `from_str` exceeds maximum length"), 67 )) 68 } 69 } 70 71 impl<N: Network> Debug for Identifier<N> { 72 fn fmt(&self, f: &mut Formatter) -> fmt::Result { 73 Display::fmt(self, f) 74 } 75 } 76 77 impl<N: Network> Display for Identifier<N> { 78 /// Prints the identifier as a string. 79 fn fmt(&self, f: &mut Formatter) -> fmt::Result { 80 // Convert the identifier to bytes. 81 let bytes = self.0.to_bytes_le().map_err(|_| fmt::Error)?; 82 83 // Parse the bytes as a UTF-8 string. 84 let string = String::from_utf8(bytes).map_err(|_| fmt::Error)?; 85 86 // Truncate the UTF-8 string at the first instance of '\0'. 87 match string.split('\0').next() { 88 // Check that the UTF-8 string matches the expected length. 89 Some(string) => match string.len() == self.1 as usize { 90 // Return the string. 91 true => write!(f, "{string}"), 92 false => Err(fmt::Error), 93 }, 94 None => Err(fmt::Error), 95 } 96 } 97 } 98 99 #[cfg(test)] 100 mod tests { 101 use super::*; 102 use crate::data::identifier::tests::{sample_identifier, sample_identifier_as_string}; 103 use alphavm_console_network::MainnetV0; 104 105 type CurrentNetwork = MainnetV0; 106 107 const ITERATIONS: usize = 100; 108 109 #[test] 110 fn test_parse() -> Result<()> { 111 // Quick sanity check. 112 let (remainder, candidate) = Identifier::<CurrentNetwork>::parse("foo_bar1")?; 113 assert_eq!("foo_bar1", candidate.to_string()); 114 assert_eq!("", remainder); 115 116 // Must be alphanumeric or underscore. 117 let (remainder, candidate) = Identifier::<CurrentNetwork>::parse("foo_bar~baz")?; 118 assert_eq!("foo_bar", candidate.to_string()); 119 assert_eq!("~baz", remainder); 120 121 // Must be alphanumeric or underscore. 122 let (remainder, candidate) = Identifier::<CurrentNetwork>::parse("foo_bar-baz")?; 123 assert_eq!("foo_bar", candidate.to_string()); 124 assert_eq!("-baz", remainder); 125 126 let mut rng = TestRng::default(); 127 128 // Check random identifiers. 129 for _ in 0..ITERATIONS { 130 // Sample a random fixed-length alphanumeric string, that always starts with an alphabetic character. 131 let expected_string = sample_identifier_as_string::<CurrentNetwork>(&mut rng)?; 132 // Recover the field element from the bits. 133 let expected_field = Field::<CurrentNetwork>::from_bits_le(&expected_string.to_bits_le())?; 134 135 let (remainder, candidate) = Identifier::<CurrentNetwork>::parse(expected_string.as_str()).unwrap(); 136 assert_eq!(expected_string, candidate.to_string()); 137 assert_eq!(expected_field, candidate.0); 138 assert_eq!(expected_string.len(), candidate.1 as usize); 139 assert_eq!("", remainder); 140 } 141 Ok(()) 142 } 143 144 #[test] 145 fn test_parse_fails() { 146 // Must not be solely underscores. 147 assert!(Identifier::<CurrentNetwork>::parse("_").is_err()); 148 assert!(Identifier::<CurrentNetwork>::parse("__").is_err()); 149 assert!(Identifier::<CurrentNetwork>::parse("___").is_err()); 150 assert!(Identifier::<CurrentNetwork>::parse("____").is_err()); 151 152 // Must not start with a number. 153 assert!(Identifier::<CurrentNetwork>::parse("1").is_err()); 154 assert!(Identifier::<CurrentNetwork>::parse("2").is_err()); 155 assert!(Identifier::<CurrentNetwork>::parse("3").is_err()); 156 assert!(Identifier::<CurrentNetwork>::parse("1foo").is_err()); 157 assert!(Identifier::<CurrentNetwork>::parse("12").is_err()); 158 assert!(Identifier::<CurrentNetwork>::parse("111").is_err()); 159 160 // Must fit within the data capacity of a base field element. 161 let identifier = 162 Identifier::<CurrentNetwork>::parse("foo_bar_baz_qux_quux_quuz_corge_grault_garply_waldo_fred_plugh_xyzzy"); 163 assert!(identifier.is_err()); 164 } 165 166 #[test] 167 fn test_from_str() -> Result<()> { 168 let candidate = Identifier::<CurrentNetwork>::from_str("foo_bar").unwrap(); 169 assert_eq!("foo_bar", candidate.to_string()); 170 171 let mut rng = TestRng::default(); 172 173 for _ in 0..ITERATIONS { 174 // Sample a random fixed-length alphanumeric string, that always starts with an alphabetic character. 175 let expected_string = sample_identifier_as_string::<CurrentNetwork>(&mut rng)?; 176 // Recover the field element from the bits. 177 let expected_field = Field::<CurrentNetwork>::from_bits_le(&expected_string.to_bits_le())?; 178 179 let candidate = Identifier::<CurrentNetwork>::from_str(&expected_string)?; 180 assert_eq!(expected_string, candidate.to_string()); 181 assert_eq!(expected_field, candidate.0); 182 assert_eq!(expected_string.len(), candidate.1 as usize); 183 } 184 Ok(()) 185 } 186 187 #[test] 188 fn test_from_str_fails() { 189 // Must be non-empty. 190 assert!(Identifier::<CurrentNetwork>::from_str("").is_err()); 191 192 // Must be alphanumeric or underscore. 193 assert!(Identifier::<CurrentNetwork>::from_str("foo_bar~baz").is_err()); 194 assert!(Identifier::<CurrentNetwork>::from_str("foo_bar-baz").is_err()); 195 196 // Must not be solely underscores. 197 assert!(Identifier::<CurrentNetwork>::from_str("_").is_err()); 198 assert!(Identifier::<CurrentNetwork>::from_str("__").is_err()); 199 assert!(Identifier::<CurrentNetwork>::from_str("___").is_err()); 200 assert!(Identifier::<CurrentNetwork>::from_str("____").is_err()); 201 202 // Must not start with a number. 203 assert!(Identifier::<CurrentNetwork>::from_str("1").is_err()); 204 assert!(Identifier::<CurrentNetwork>::from_str("2").is_err()); 205 assert!(Identifier::<CurrentNetwork>::from_str("3").is_err()); 206 assert!(Identifier::<CurrentNetwork>::from_str("1foo").is_err()); 207 assert!(Identifier::<CurrentNetwork>::from_str("12").is_err()); 208 assert!(Identifier::<CurrentNetwork>::from_str("111").is_err()); 209 210 // Must not start with underscore. 211 assert!(Identifier::<CurrentNetwork>::from_str("_foo").is_err()); 212 213 // Must be ASCII. 214 assert!(Identifier::<CurrentNetwork>::from_str("\u{03b1}").is_err()); // Greek alpha 215 assert!(Identifier::<CurrentNetwork>::from_str("\u{03b2}").is_err()); // Greek beta 216 217 // Must fit within the data capacity of a base field element. 218 let identifier = Identifier::<CurrentNetwork>::from_str( 219 "foo_bar_baz_qux_quux_quuz_corge_grault_garply_waldo_fred_plugh_xyzzy", 220 ); 221 assert!(identifier.is_err()); 222 } 223 224 #[test] 225 fn test_display() -> Result<()> { 226 let identifier = Identifier::<CurrentNetwork>::from_str("foo_bar")?; 227 assert_eq!("foo_bar", format!("{identifier}")); 228 Ok(()) 229 } 230 231 #[test] 232 fn test_proxy_bits_equivalence() -> Result<()> { 233 let mut rng = TestRng::default(); 234 let identifier: Identifier<CurrentNetwork> = sample_identifier(&mut rng)?; 235 236 // Direct conversion to bytes. 237 let bytes1 = identifier.0.to_bytes_le()?; 238 239 // Combined conversion via bits. 240 let bits_le = identifier.0.to_bits_le(); 241 let bytes2 = bits_le.chunks(8).map(u8::from_bits_le).collect::<Result<Vec<u8>, _>>()?; 242 243 assert_eq!(bytes1, bytes2); 244 245 Ok(()) 246 } 247 }