slug.rs
1 //! "Slugs" used as part of on-disk filenames and other similar purposes 2 //! 3 //! Arti uses "slugs" as parts of filenames in many places. 4 //! Slugs are fixed or variable strings which either 5 //! designate the kind of a thing, or which of various things this is. 6 //! 7 //! Slugs have a restricted character set: 8 //! Lowercase ASCII alphanumerics, underscore, hyphen. 9 //! We may extend this to allow additional characters in the future, 10 //! but /, +, and . (the slug separators) will never be valid slug characters. 11 //! Additionally, : will never be a valid slug character, 12 //! because Windows does not allow colons in filenames[^1], 13 //! 14 //! Slugs may not be empty, and they may not start with a hyphen. 15 //! 16 //! Slugs can be concatenated to build file names. 17 //! When concatenating slugs to make filenames, 18 //! they should be separated using `/`, `+`, or `.` 19 //! ([`SLUG_SEPARATOR_CHARS`]). 20 //! Slugs should not be concatenated without separators (for security reasons). 21 //! 22 //! On Windows only, the following slugs are forbidden, 23 //! because of [absurd Windows filename behaviours](https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file): 24 //! `con` `prn` `aux` `nul` 25 //! `com1` `com2` `com3` `com4` `com5` `com6` `com7` `com8` `com9` `com0` 26 //! `lpt1` `lpt2` `lpt3` `lpt4` `lpt5` `lpt6` `lpt7` `lpt8` `lpt9` `lpt0`. 27 //! 28 //! [^1]: <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions> 29 30 pub mod timestamp; 31 32 use std::borrow::Borrow; 33 use std::ffi::OsStr; 34 use std::fmt::{self, Display}; 35 use std::mem; 36 use std::ops::Deref; 37 use std::path::Path; 38 39 use paste::paste; 40 use serde::{Deserialize, Serialize}; 41 use thiserror::Error; 42 43 #[cfg(target_family = "windows")] 44 #[cfg_attr(docsrs, doc(cfg(target_family = "windows")))] 45 pub use os::ForbiddenOnWindows; 46 47 /// An owned slug, checked for syntax 48 /// 49 /// The syntax check can be relied on for safety/soundness. 50 // We adopt this rule so that eventually we could have AsRef<[std::ascii::Char]>, etc. 51 #[derive(Debug, Clone, Serialize, Deserialize)] // 52 #[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] // 53 #[derive(derive_more::Display)] 54 #[serde(try_from = "String", into = "String")] 55 // Box<str> since we don't expect to change the size; that makes it 2 words rather than 3 56 // (But our public APIs are in terms of String.) 57 pub struct Slug(Box<str>); 58 59 /// A borrwed slug, checked for syntax 60 /// 61 /// The syntax check can be relied on for safety/soundness. 62 #[derive(Debug, Serialize)] // 63 #[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] // 64 #[derive(derive_more::Display)] 65 #[serde(transparent)] 66 #[repr(transparent)] // SAFETY: this attribute is needed for unsafe in new_unchecked 67 pub struct SlugRef(str); 68 69 /// Characters which are good to use to separate slugs 70 /// 71 /// Guaranteed to never overlap with the valid slug character set. 72 /// 73 /// We might expand this set, but not ever reduce it. 74 pub const SLUG_SEPARATOR_CHARS: &str = "/+."; 75 76 /// Error for an invalid slug 77 #[derive(Error, Debug, Clone, Eq, PartialEq, Hash)] 78 #[non_exhaustive] 79 pub enum BadSlug { 80 /// Slug contains a forbidden character 81 BadCharacter(char), 82 /// Slug starts with a disallowed character 83 BadFirstCharacter(char), 84 /// An empty slug was supplied where a nonempty one is required 85 EmptySlugNotAllowed, 86 /// We are on Windows and the slug is one of the forbidden ones 87 /// 88 /// On platforms other than Windows, this variant is absent. 89 #[cfg_attr(docsrs, doc(cfg(target_family = "windows")))] 90 #[cfg(target_family = "windows")] 91 ForbiddenOnWindows(ForbiddenOnWindows), 92 } 93 94 /// Types which can perhaps be used as a slug 95 /// 96 /// This is a trait implemented by `str`, `std::fmt::Arguments`, 97 /// and other implementors of `ToString`, for the convenience of call sites: 98 /// APIs can have functions taking an `&(impl TryIntoSlug + ?Sized)` or `&dyn TryIntoSlug` 99 /// and callers then don't need error-handling boilerplate. 100 /// 101 /// Functions that take a `TryIntoSlug` will need to do a runtime syntax check. 102 pub trait TryIntoSlug { 103 /// Convert `self` into a `Slug`, if it has the right syntax 104 fn try_into_slug(&self) -> Result<Slug, BadSlug>; 105 } 106 107 impl<T: ToString + ?Sized> TryIntoSlug for T { 108 fn try_into_slug(&self) -> Result<Slug, BadSlug> { 109 self.to_string().try_into() 110 } 111 } 112 113 impl Slug { 114 /// Make a Slug out of an owned `String`, if it has the correct syntax 115 pub fn new(s: String) -> Result<Slug, BadSlug> { 116 Ok(unsafe { 117 // SAFETY: we check, and then call new_unchecked 118 check_syntax(&s)?; 119 Slug::new_unchecked(s) 120 }) 121 } 122 123 /// Make a Slug out of an owned `String`, without checking the syntax 124 /// 125 /// # Safety 126 /// 127 /// It's the caller's responsibility to check the syntax of the input string. 128 pub unsafe fn new_unchecked(s: String) -> Slug { 129 Slug(s.into()) 130 } 131 } 132 133 impl SlugRef { 134 /// Make a SlugRef out of a `str`, if it has the correct syntax 135 pub fn new(s: &str) -> Result<&SlugRef, BadSlug> { 136 Ok(unsafe { 137 // SAFETY: we check, and then call new_unchecked 138 check_syntax(s)?; 139 SlugRef::new_unchecked(s) 140 }) 141 } 142 143 /// Make a SlugRef out of a `str`, without checking the syntax 144 /// 145 /// # Safety 146 /// 147 /// It's the caller's responsibility to check the syntax of the input string. 148 pub unsafe fn new_unchecked<'s>(s: &'s str) -> &'s SlugRef { 149 unsafe { 150 // SAFETY 151 // SlugRef is repr(transparent). So the alignment and memory layout 152 // are the same, and the pointer metadata is the same too. 153 // The lifetimes is correct by construction. 154 // 155 // We do this, rather than `struct SlugRef<'r>(&'r str)`, 156 // because that way we couldn't impl Deref. 157 mem::transmute::<&'s str, &'s SlugRef>(s) 158 } 159 } 160 161 /// Make an owned `Slug` 162 fn to_slug(&self) -> Slug { 163 unsafe { 164 // SAFETY: self is a SlugRef so our syntax is right 165 Slug::new_unchecked(self.0.into()) 166 } 167 } 168 } 169 170 impl TryFrom<String> for Slug { 171 type Error = BadSlug; 172 fn try_from(s: String) -> Result<Slug, BadSlug> { 173 Slug::new(s) 174 } 175 } 176 177 impl From<Slug> for String { 178 fn from(s: Slug) -> String { 179 s.0.into() 180 } 181 } 182 183 impl<'s> TryFrom<&'s str> for &'s SlugRef { 184 type Error = BadSlug; 185 fn try_from(s: &'s str) -> Result<&'s SlugRef, BadSlug> { 186 SlugRef::new(s) 187 } 188 } 189 190 impl Deref for Slug { 191 type Target = SlugRef; 192 fn deref(&self) -> &SlugRef { 193 unsafe { 194 // SAFETY: self is a Slug so our syntax is right 195 SlugRef::new_unchecked(&self.0) 196 } 197 } 198 } 199 200 impl Borrow<SlugRef> for Slug { 201 fn borrow(&self) -> &SlugRef { 202 self 203 } 204 } 205 impl Borrow<str> for Slug { 206 fn borrow(&self) -> &str { 207 self.as_ref() 208 } 209 } 210 211 impl ToOwned for SlugRef { 212 type Owned = Slug; 213 fn to_owned(&self) -> Slug { 214 self.to_slug() 215 } 216 } 217 218 /// Implement `fn as_...(&self) -> ...` and `AsRef` 219 macro_rules! impl_as_with_inherent { { $ty:ident } => { paste!{ 220 impl SlugRef { 221 #[doc = concat!("Obtain this slug as a `", stringify!($ty), "`")] 222 pub fn [<as_ $ty:snake>](&self) -> &$ty { 223 self.as_ref() 224 } 225 } 226 impl_as_ref!($ty); 227 } } } 228 /// Implement `AsRef` 229 macro_rules! impl_as_ref { { $ty:ty } => { paste!{ 230 impl AsRef<$ty> for SlugRef { 231 fn as_ref(&self) -> &$ty { 232 self.0.as_ref() 233 } 234 } 235 impl AsRef<$ty> for Slug { 236 fn as_ref(&self) -> &$ty { 237 self.deref().as_ref() 238 } 239 } 240 } } } 241 242 impl_as_with_inherent!(str); 243 impl_as_with_inherent!(Path); 244 impl_as_ref!(OsStr); 245 impl_as_ref!([u8]); 246 247 /// Check the string `s` to see if it would be valid as a slug 248 /// 249 /// This is a low-level method for special cases. 250 /// Usually, use [`Slug::new`] etc. 251 // 252 // SAFETY 253 // This function checks the syntax, and is relied on by unsafe code 254 #[allow(clippy::if_same_then_else)] // clippy objects to the repeated Ok(()) 255 pub fn check_syntax(s: &str) -> Result<(), BadSlug> { 256 if s.is_empty() { 257 return Err(BadSlug::EmptySlugNotAllowed); 258 } 259 260 // Slugs are not allowed to start with a hyphen. 261 if s.starts_with('-') { 262 return Err(BadSlug::BadFirstCharacter('-')); 263 } 264 265 // check legal character set 266 for c in s.chars() { 267 if c.is_ascii_lowercase() { 268 Ok(()) 269 } else if c.is_ascii_digit() { 270 Ok(()) 271 } else if c == '_' || c == '-' { 272 Ok(()) 273 } else { 274 Err(BadSlug::BadCharacter(c)) 275 }?; 276 } 277 278 os::check_forbidden(s)?; 279 280 Ok(()) 281 } 282 283 impl Display for BadSlug { 284 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 285 match self { 286 BadSlug::BadCharacter(c) => { 287 let num = u32::from(*c); 288 write!(f, "character {c:?} (U+{num:04X}) is not allowed") 289 } 290 BadSlug::BadFirstCharacter(c) => { 291 let num = u32::from(*c); 292 write!( 293 f, 294 "character {c:?} (U+{num:04X}) is not allowed as the first character" 295 ) 296 } 297 BadSlug::EmptySlugNotAllowed => { 298 write!(f, "empty identifier (empty slug) not allowed") 299 } 300 #[cfg(target_family = "windows")] 301 BadSlug::ForbiddenOnWindows(e) => os::fmt_error(e, f), 302 } 303 } 304 } 305 306 /// Forbidden slug support for Windows 307 #[cfg(target_family = "windows")] 308 mod os { 309 use super::*; 310 311 /// A slug which is forbidden because we are on Windows (as found in an invalid slug error) 312 /// 313 /// This type is available only on Windows platforms. 314 // 315 // Double reference so that BadSlug has to contain only one word, not two 316 pub type ForbiddenOnWindows = &'static &'static str; 317 318 /// The forbidden slugs - windows thinks "C:\\Program Files\lpt0.json" is a printer. 319 const FORBIDDEN: &[&str] = &[ 320 "con", "prn", "aux", "nul", // 321 "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "com0", // 322 "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "lpt0", 323 ]; 324 325 /// Check whether this slug is forbidden here 326 pub(super) fn check_forbidden(s: &str) -> Result<(), BadSlug> { 327 for bad in FORBIDDEN { 328 if s == *bad { 329 return Err(BadSlug::ForbiddenOnWindows(bad)); 330 } 331 } 332 Ok(()) 333 } 334 335 /// Display a forbidden slug error 336 pub(super) fn fmt_error(s: &ForbiddenOnWindows, f: &mut fmt::Formatter) -> fmt::Result { 337 write!(f, "slug (name) {s:?} is not allowed on Windows") 338 } 339 } 340 /// Forbidden slug support for non-Windows 341 #[cfg(not(target_family = "windows"))] 342 mod os { 343 use super::*; 344 345 /// Check whether this slug is forbidden here 346 #[allow(clippy::unnecessary_wraps)] 347 pub(super) fn check_forbidden(_s: &str) -> Result<(), BadSlug> { 348 Ok(()) 349 } 350 } 351 352 #[cfg(test)] 353 mod test { 354 // @@ begin test lint list maintained by maint/add_warning @@ 355 #![allow(clippy::bool_assert_comparison)] 356 #![allow(clippy::clone_on_copy)] 357 #![allow(clippy::dbg_macro)] 358 #![allow(clippy::mixed_attributes_style)] 359 #![allow(clippy::print_stderr)] 360 #![allow(clippy::print_stdout)] 361 #![allow(clippy::single_char_pattern)] 362 #![allow(clippy::unwrap_used)] 363 #![allow(clippy::unchecked_duration_subtraction)] 364 #![allow(clippy::useless_vec)] 365 #![allow(clippy::needless_pass_by_value)] 366 //! <!-- @@ end test lint list maintained by maint/add_warning @@ --> 367 368 use super::*; 369 use itertools::chain; 370 371 #[test] 372 fn bad() { 373 for c in chain!( 374 SLUG_SEPARATOR_CHARS.chars(), // 375 ['\\', ' ', '\n', '\0'] 376 ) { 377 let s = format!("x{c}y"); 378 let e_ref = SlugRef::new(&s).unwrap_err(); 379 assert_eq!(e_ref, BadSlug::BadCharacter(c)); 380 let e_own = Slug::new(s).unwrap_err(); 381 assert_eq!(e_ref, e_own); 382 } 383 } 384 385 #[test] 386 fn good() { 387 let all = chain!( 388 b'a'..=b'z', // 389 b'0'..=b'9', 390 [b'_'], 391 ) 392 .map(char::from); 393 394 let chk = |s: String| { 395 let sref = SlugRef::new(&s).unwrap(); 396 let slug = Slug::new(s.clone()).unwrap(); 397 assert_eq!(sref.to_string(), s); 398 assert_eq!(slug.to_string(), s); 399 }; 400 401 chk(all.clone().collect()); 402 403 for c in all { 404 chk(format!("{c}")); 405 } 406 407 // Hyphens are allowed, but not as the first character 408 chk("a-".into()); 409 chk("a-b".into()); 410 } 411 412 #[test] 413 fn badchar_msg() { 414 let chk = |s: &str, m: &str| { 415 assert_eq!( 416 SlugRef::new(s).unwrap_err().to_string(), 417 m, // 418 ); 419 }; 420 421 chk(".", "character '.' (U+002E) is not allowed"); 422 chk("\0", "character '\\0' (U+0000) is not allowed"); 423 chk( 424 "\u{12345}", 425 "character '\u{12345}' (U+12345) is not allowed", 426 ); 427 chk( 428 "-", 429 "character '-' (U+002D) is not allowed as the first character", 430 ); 431 chk("A", "character 'A' (U+0041) is not allowed"); 432 } 433 434 #[test] 435 fn windows_forbidden() { 436 for s in ["con", "prn", "lpt0"] { 437 let r = SlugRef::new(s); 438 if cfg!(target_family = "windows") { 439 assert_eq!( 440 r.unwrap_err().to_string(), 441 format!("slug (name) \"{s}\" is not allowed on Windows"), 442 ); 443 } else { 444 assert_eq!(r.unwrap().as_str(), s); 445 } 446 } 447 } 448 449 #[test] 450 fn empty_slug() { 451 assert_eq!( 452 SlugRef::new("").unwrap_err().to_string(), 453 "empty identifier (empty slug) not allowed" 454 ); 455 } 456 }