/ crates / tor-persist / src / slug.rs
slug.rs
  1  //! "Slugs" used as part of on-disk filenames and other similar purposes
  2  //!
  3  //! Arti uses "slugs" as parts of filenames in many places.
  4  //! Slugs are fixed or variable strings which either
  5  //! designate the kind of a thing, or which of various things this is.
  6  //!
  7  //! Slugs have a restricted character set:
  8  //! Lowercase ASCII alphanumerics, underscore, hyphen.
  9  //! We may extend this to allow additional characters in the future,
 10  //! but /, +, and . (the slug separators) will never be valid slug characters.
 11  //! Additionally, : will never be a valid slug character,
 12  //! because Windows does not allow colons in filenames[^1],
 13  //!
 14  //! Slugs may not be empty, and they may not start with a hyphen.
 15  //!
 16  //! Slugs can be concatenated to build file names.
 17  //! When concatenating slugs to make filenames,
 18  //! they should be separated using `/`, `+`, or `.`
 19  //! ([`SLUG_SEPARATOR_CHARS`]).
 20  //! Slugs should not be concatenated without separators (for security reasons).
 21  //!
 22  //! On Windows only, the following slugs are forbidden,
 23  //! because of [absurd Windows filename behaviours](https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file):
 24  //! `con` `prn` `aux` `nul`
 25  //! `com1` `com2` `com3` `com4` `com5` `com6` `com7` `com8` `com9` `com0`
 26  //! `lpt1` `lpt2` `lpt3` `lpt4` `lpt5` `lpt6` `lpt7` `lpt8` `lpt9` `lpt0`.
 27  //!
 28  //! [^1]: <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions>
 29  
 30  pub mod timestamp;
 31  
 32  use std::borrow::Borrow;
 33  use std::ffi::OsStr;
 34  use std::fmt::{self, Display};
 35  use std::mem;
 36  use std::ops::Deref;
 37  use std::path::Path;
 38  
 39  use paste::paste;
 40  use serde::{Deserialize, Serialize};
 41  use thiserror::Error;
 42  
 43  #[cfg(target_family = "windows")]
 44  #[cfg_attr(docsrs, doc(cfg(target_family = "windows")))]
 45  pub use os::ForbiddenOnWindows;
 46  
 47  /// An owned slug, checked for syntax
 48  ///
 49  /// The syntax check can be relied on for safety/soundness.
 50  // We adopt this rule so that eventually we could have AsRef<[std::ascii::Char]>, etc.
 51  #[derive(Debug, Clone, Serialize, Deserialize)] //
 52  #[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
 53  #[derive(derive_more::Display)]
 54  #[serde(try_from = "String", into = "String")]
 55  // Box<str> since we don't expect to change the size; that makes it 2 words rather than 3
 56  // (But our public APIs are in terms of String.)
 57  pub struct Slug(Box<str>);
 58  
 59  /// A borrwed slug, checked for syntax
 60  ///
 61  /// The syntax check can be relied on for safety/soundness.
 62  #[derive(Debug, Serialize)] //
 63  #[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
 64  #[derive(derive_more::Display)]
 65  #[serde(transparent)]
 66  #[repr(transparent)] // SAFETY: this attribute is needed for unsafe in new_unchecked
 67  pub struct SlugRef(str);
 68  
 69  /// Characters which are good to use to separate slugs
 70  ///
 71  /// Guaranteed to never overlap with the valid slug character set.
 72  ///
 73  /// We might expand this set, but not ever reduce it.
 74  pub const SLUG_SEPARATOR_CHARS: &str = "/+.";
 75  
 76  /// Error for an invalid slug
 77  #[derive(Error, Debug, Clone, Eq, PartialEq, Hash)]
 78  #[non_exhaustive]
 79  pub enum BadSlug {
 80      /// Slug contains a forbidden character
 81      BadCharacter(char),
 82      /// Slug starts with a disallowed character
 83      BadFirstCharacter(char),
 84      /// An empty slug was supplied where a nonempty one is required
 85      EmptySlugNotAllowed,
 86      /// We are on Windows and the slug is one of the forbidden ones
 87      ///
 88      /// On platforms other than Windows, this variant is absent.
 89      #[cfg_attr(docsrs, doc(cfg(target_family = "windows")))]
 90      #[cfg(target_family = "windows")]
 91      ForbiddenOnWindows(ForbiddenOnWindows),
 92  }
 93  
 94  /// Types which can perhaps be used as a slug
 95  ///
 96  /// This is a trait implemented by `str`, `std::fmt::Arguments`,
 97  /// and other implementors of `ToString`, for the convenience of call sites:
 98  /// APIs can have functions taking an `&(impl TryIntoSlug + ?Sized)` or `&dyn TryIntoSlug`
 99  /// and callers then don't need error-handling boilerplate.
100  ///
101  /// Functions that take a `TryIntoSlug` will need to do a runtime syntax check.
102  pub trait TryIntoSlug {
103      /// Convert `self` into a `Slug`, if it has the right syntax
104      fn try_into_slug(&self) -> Result<Slug, BadSlug>;
105  }
106  
107  impl<T: ToString + ?Sized> TryIntoSlug for T {
108      fn try_into_slug(&self) -> Result<Slug, BadSlug> {
109          self.to_string().try_into()
110      }
111  }
112  
113  impl Slug {
114      /// Make a Slug out of an owned `String`, if it has the correct syntax
115      pub fn new(s: String) -> Result<Slug, BadSlug> {
116          Ok(unsafe {
117              // SAFETY: we check, and then call new_unchecked
118              check_syntax(&s)?;
119              Slug::new_unchecked(s)
120          })
121      }
122  
123      /// Make a Slug out of an owned `String`, without checking the syntax
124      ///
125      /// # Safety
126      ///
127      /// It's the caller's responsibility to check the syntax of the input string.
128      pub unsafe fn new_unchecked(s: String) -> Slug {
129          Slug(s.into())
130      }
131  }
132  
133  impl SlugRef {
134      /// Make a SlugRef out of a `str`, if it has the correct syntax
135      pub fn new(s: &str) -> Result<&SlugRef, BadSlug> {
136          Ok(unsafe {
137              // SAFETY: we check, and then call new_unchecked
138              check_syntax(s)?;
139              SlugRef::new_unchecked(s)
140          })
141      }
142  
143      /// Make a SlugRef out of a `str`, without checking the syntax
144      ///
145      /// # Safety
146      ///
147      /// It's the caller's responsibility to check the syntax of the input string.
148      pub unsafe fn new_unchecked<'s>(s: &'s str) -> &'s SlugRef {
149          unsafe {
150              // SAFETY
151              // SlugRef is repr(transparent).  So the alignment and memory layout
152              // are the same, and the pointer metadata is the same too.
153              // The lifetimes is correct by construction.
154              //
155              // We do this, rather than `struct SlugRef<'r>(&'r str)`,
156              // because that way we couldn't impl Deref.
157              mem::transmute::<&'s str, &'s SlugRef>(s)
158          }
159      }
160  
161      /// Make an owned `Slug`
162      fn to_slug(&self) -> Slug {
163          unsafe {
164              // SAFETY: self is a SlugRef so our syntax is right
165              Slug::new_unchecked(self.0.into())
166          }
167      }
168  }
169  
170  impl TryFrom<String> for Slug {
171      type Error = BadSlug;
172      fn try_from(s: String) -> Result<Slug, BadSlug> {
173          Slug::new(s)
174      }
175  }
176  
177  impl From<Slug> for String {
178      fn from(s: Slug) -> String {
179          s.0.into()
180      }
181  }
182  
183  impl<'s> TryFrom<&'s str> for &'s SlugRef {
184      type Error = BadSlug;
185      fn try_from(s: &'s str) -> Result<&'s SlugRef, BadSlug> {
186          SlugRef::new(s)
187      }
188  }
189  
190  impl Deref for Slug {
191      type Target = SlugRef;
192      fn deref(&self) -> &SlugRef {
193          unsafe {
194              // SAFETY: self is a Slug so our syntax is right
195              SlugRef::new_unchecked(&self.0)
196          }
197      }
198  }
199  
200  impl Borrow<SlugRef> for Slug {
201      fn borrow(&self) -> &SlugRef {
202          self
203      }
204  }
205  impl Borrow<str> for Slug {
206      fn borrow(&self) -> &str {
207          self.as_ref()
208      }
209  }
210  
211  impl ToOwned for SlugRef {
212      type Owned = Slug;
213      fn to_owned(&self) -> Slug {
214          self.to_slug()
215      }
216  }
217  
218  /// Implement `fn as_...(&self) -> ...` and `AsRef`
219  macro_rules! impl_as_with_inherent { { $ty:ident } => { paste!{
220      impl SlugRef {
221          #[doc = concat!("Obtain this slug as a `", stringify!($ty), "`")]
222          pub fn [<as_ $ty:snake>](&self) -> &$ty {
223              self.as_ref()
224          }
225      }
226      impl_as_ref!($ty);
227  } } }
228  /// Implement `AsRef`
229  macro_rules! impl_as_ref { { $ty:ty } => { paste!{
230      impl AsRef<$ty> for SlugRef {
231          fn as_ref(&self) -> &$ty {
232              self.0.as_ref()
233          }
234      }
235      impl AsRef<$ty> for Slug {
236          fn as_ref(&self) -> &$ty {
237              self.deref().as_ref()
238          }
239      }
240  } } }
241  
242  impl_as_with_inherent!(str);
243  impl_as_with_inherent!(Path);
244  impl_as_ref!(OsStr);
245  impl_as_ref!([u8]);
246  
247  /// Check the string `s` to see if it would be valid as a slug
248  ///
249  /// This is a low-level method for special cases.
250  /// Usually, use [`Slug::new`] etc.
251  //
252  // SAFETY
253  // This function checks the syntax, and is relied on by unsafe code
254  #[allow(clippy::if_same_then_else)] // clippy objects to the repeated Ok(())
255  pub fn check_syntax(s: &str) -> Result<(), BadSlug> {
256      if s.is_empty() {
257          return Err(BadSlug::EmptySlugNotAllowed);
258      }
259  
260      // Slugs are not allowed to start with a hyphen.
261      if s.starts_with('-') {
262          return Err(BadSlug::BadFirstCharacter('-'));
263      }
264  
265      // check legal character set
266      for c in s.chars() {
267          if c.is_ascii_lowercase() {
268              Ok(())
269          } else if c.is_ascii_digit() {
270              Ok(())
271          } else if c == '_' || c == '-' {
272              Ok(())
273          } else {
274              Err(BadSlug::BadCharacter(c))
275          }?;
276      }
277  
278      os::check_forbidden(s)?;
279  
280      Ok(())
281  }
282  
283  impl Display for BadSlug {
284      fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
285          match self {
286              BadSlug::BadCharacter(c) => {
287                  let num = u32::from(*c);
288                  write!(f, "character {c:?} (U+{num:04X}) is not allowed")
289              }
290              BadSlug::BadFirstCharacter(c) => {
291                  let num = u32::from(*c);
292                  write!(
293                      f,
294                      "character {c:?} (U+{num:04X}) is not allowed as the first character"
295                  )
296              }
297              BadSlug::EmptySlugNotAllowed => {
298                  write!(f, "empty identifier (empty slug) not allowed")
299              }
300              #[cfg(target_family = "windows")]
301              BadSlug::ForbiddenOnWindows(e) => os::fmt_error(e, f),
302          }
303      }
304  }
305  
306  /// Forbidden slug support for Windows
307  #[cfg(target_family = "windows")]
308  mod os {
309      use super::*;
310  
311      /// A slug which is forbidden because we are on Windows (as found in an invalid slug error)
312      ///
313      /// This type is available only on Windows platforms.
314      //
315      // Double reference so that BadSlug has to contain only one word, not two
316      pub type ForbiddenOnWindows = &'static &'static str;
317  
318      /// The forbidden slugs - windows thinks "C:\\Program Files\lpt0.json" is a printer.
319      const FORBIDDEN: &[&str] = &[
320          "con", "prn", "aux", "nul", //
321          "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "com0", //
322          "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "lpt0",
323      ];
324  
325      /// Check whether this slug is forbidden here
326      pub(super) fn check_forbidden(s: &str) -> Result<(), BadSlug> {
327          for bad in FORBIDDEN {
328              if s == *bad {
329                  return Err(BadSlug::ForbiddenOnWindows(bad));
330              }
331          }
332          Ok(())
333      }
334  
335      /// Display a forbidden slug error
336      pub(super) fn fmt_error(s: &ForbiddenOnWindows, f: &mut fmt::Formatter) -> fmt::Result {
337          write!(f, "slug (name) {s:?} is not allowed on Windows")
338      }
339  }
340  /// Forbidden slug support for non-Windows
341  #[cfg(not(target_family = "windows"))]
342  mod os {
343      use super::*;
344  
345      /// Check whether this slug is forbidden here
346      #[allow(clippy::unnecessary_wraps)]
347      pub(super) fn check_forbidden(_s: &str) -> Result<(), BadSlug> {
348          Ok(())
349      }
350  }
351  
352  #[cfg(test)]
353  mod test {
354      // @@ begin test lint list maintained by maint/add_warning @@
355      #![allow(clippy::bool_assert_comparison)]
356      #![allow(clippy::clone_on_copy)]
357      #![allow(clippy::dbg_macro)]
358      #![allow(clippy::mixed_attributes_style)]
359      #![allow(clippy::print_stderr)]
360      #![allow(clippy::print_stdout)]
361      #![allow(clippy::single_char_pattern)]
362      #![allow(clippy::unwrap_used)]
363      #![allow(clippy::unchecked_duration_subtraction)]
364      #![allow(clippy::useless_vec)]
365      #![allow(clippy::needless_pass_by_value)]
366      //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
367  
368      use super::*;
369      use itertools::chain;
370  
371      #[test]
372      fn bad() {
373          for c in chain!(
374              SLUG_SEPARATOR_CHARS.chars(), //
375              ['\\', ' ', '\n', '\0']
376          ) {
377              let s = format!("x{c}y");
378              let e_ref = SlugRef::new(&s).unwrap_err();
379              assert_eq!(e_ref, BadSlug::BadCharacter(c));
380              let e_own = Slug::new(s).unwrap_err();
381              assert_eq!(e_ref, e_own);
382          }
383      }
384  
385      #[test]
386      fn good() {
387          let all = chain!(
388              b'a'..=b'z', //
389              b'0'..=b'9',
390              [b'_'],
391          )
392          .map(char::from);
393  
394          let chk = |s: String| {
395              let sref = SlugRef::new(&s).unwrap();
396              let slug = Slug::new(s.clone()).unwrap();
397              assert_eq!(sref.to_string(), s);
398              assert_eq!(slug.to_string(), s);
399          };
400  
401          chk(all.clone().collect());
402  
403          for c in all {
404              chk(format!("{c}"));
405          }
406  
407          // Hyphens are allowed, but not as the first character
408          chk("a-".into());
409          chk("a-b".into());
410      }
411  
412      #[test]
413      fn badchar_msg() {
414          let chk = |s: &str, m: &str| {
415              assert_eq!(
416                  SlugRef::new(s).unwrap_err().to_string(),
417                  m, //
418              );
419          };
420  
421          chk(".", "character '.' (U+002E) is not allowed");
422          chk("\0", "character '\\0' (U+0000) is not allowed");
423          chk(
424              "\u{12345}",
425              "character '\u{12345}' (U+12345) is not allowed",
426          );
427          chk(
428              "-",
429              "character '-' (U+002D) is not allowed as the first character",
430          );
431          chk("A", "character 'A' (U+0041) is not allowed");
432      }
433  
434      #[test]
435      fn windows_forbidden() {
436          for s in ["con", "prn", "lpt0"] {
437              let r = SlugRef::new(s);
438              if cfg!(target_family = "windows") {
439                  assert_eq!(
440                      r.unwrap_err().to_string(),
441                      format!("slug (name) \"{s}\" is not allowed on Windows"),
442                  );
443              } else {
444                  assert_eq!(r.unwrap().as_str(), s);
445              }
446          }
447      }
448  
449      #[test]
450      fn empty_slug() {
451          assert_eq!(
452              SlugRef::new("").unwrap_err().to_string(),
453              "empty identifier (empty slug) not allowed"
454          );
455      }
456  }