/ cab / syntax / segment.rs
segment.rs
  1  // For the next poor soul that will step in this file:
  2  //
  3  // Beware that changing even the slighest thing will break 500 other cases. Way
  4  // too many hours have been spent on perfecting this, and every single invariant
  5  // is (probably) intended. Please reconsider editing this file.
  6  //
  7  // Comments? Ha!
  8  
  9  use std::{
 10     mem,
 11     ops,
 12  };
 13  
 14  use cab_util::{
 15     Lazy,
 16     force_ref,
 17     reffed,
 18  };
 19  use ranged::{
 20     IntoSize as _,
 21     IntoSpan as _,
 22     Span,
 23  };
 24  use smallvec::SmallVec;
 25  use ust::{
 26     report::Report,
 27     style::{
 28        self,
 29        StyledExt as _,
 30     },
 31  };
 32  
 33  use crate::{
 34     Kind,
 35     node,
 36     red,
 37     token,
 38  };
 39  
 40  #[must_use]
 41  pub fn unescape(c: char) -> Option<char> {
 42     Some(match c {
 43        ' ' => ' ',
 44        '0' => '\x00', // Null.
 45        'a' => '\x07', // Bell.
 46        'b' => '\x08', // Backspace.
 47        't' => '\x09', // Horizontal tab.
 48        'n' => '\x0A', // New line.
 49        'v' => '\x0B', // Vertical tab.
 50        'f' => '\x0C', // Form feed.
 51        'r' => '\x0D', // Carriage return.
 52        '=' => '=',
 53        '`' => '`',
 54        '"' => '\"',
 55        '\'' => '\'',
 56        '\\' => '\\',
 57  
 58        _ => return None,
 59     })
 60  }
 61  
 62  pub fn unescape_string(s: &str) -> Result<(String, bool), SmallVec<Span, 4>> {
 63     let mut string = String::with_capacity(s.len());
 64     let mut escaped_newline = false;
 65     let mut invalids = SmallVec::<Span, 4>::new();
 66  
 67     let mut chars = s.char_indices().peekable();
 68     while let Some((index, c)) = chars.next() {
 69        if c != '\\' {
 70           string.push(c);
 71           continue;
 72        }
 73  
 74        let Some((_, next)) = chars.next() else {
 75           // When a string ends with '\', it has to be followed by a newline.
 76           // And that escapes the newline.
 77           escaped_newline = true;
 78           continue;
 79        };
 80  
 81        let Some(unescaped) = unescape(next) else {
 82           invalids.push(Span::at(index, '\\'.size() + next.size()));
 83           continue;
 84        };
 85  
 86        string.push(unescaped);
 87     }
 88  
 89     if invalids.is_empty() {
 90        Ok((string, escaped_newline))
 91     } else {
 92        Err(invalids)
 93     }
 94  }
 95  
 96  #[bon::builder]
 97  pub fn escape(
 98     #[builder(start_fn)] c: char,
 99     delimiter: Option<(char, &'static str)>,
100     is_first: bool,
101  ) -> Option<&'static str> {
102     Some(match c {
103        // Turn one line of the `unescape` match to an `escape` match in Helix.
104        // Copy this to your @ register using "@y. Execute using Q.
105        // gst,<S-S><space>=<gt><space><ret><A-)>,t,<right><left><left>mr'"i\\<esc>gs
106        '\x00' => "\\0", // Null.
107        '\x07' => "\\a", // Bell.
108        '\x08' => "\\b", // Backspace.
109        '\x09' => "\\t", // Horizontal tab.
110        '\x0A' => "\\n", // New line.
111        '\x0B' => "\\v", // Vertical tab.
112        '\x0C' => "\\f", // Form feed.
113        '\x0D' => "\\r", // Carriage return.
114  
115        c if let Some((delimiter, delimiter_escaped)) = delimiter
116           && c == delimiter =>
117        {
118           delimiter_escaped
119        },
120  
121        // "=" is not a valid string, but "\=" is.
122        // However, "\==" is also valid and we don't want to over-escape.
123        '=' if is_first => "\\=",
124  
125        _ => return None,
126     })
127  }
128  
129  #[bon::builder]
130  pub fn escape_string<'a>(
131     #[builder(start_fn)] s: &'a str,
132     #[builder(default)] normal_style: style::Style,
133     #[builder(default)] escaped_style: style::Style,
134     delimiter: Option<(char, &'static str)>,
135  ) -> impl Iterator<Item = style::Styled<&'a str>> {
136     // Bon doesn't like generator syntax.
137     escape_string_impl(s, normal_style, escaped_style, delimiter)
138  }
139  
140  fn escape_string_impl<'a>(
141     s: &'a str,
142     normal: style::Style,
143     escaped: style::Style,
144     delimiter: Option<(char, &'static str)>,
145  ) -> impl Iterator<Item = style::Styled<&'a str>> {
146     gen move {
147        let mut literal_start_offset = 0;
148  
149        for (offset, c) in s.char_indices() {
150           let Some(escaped_) = escape(c)
151              .is_first(offset == 0)
152              .maybe_delimiter(delimiter)
153              .call()
154           else {
155              continue;
156           };
157  
158           yield s[literal_start_offset..offset].style(normal);
159           literal_start_offset = offset;
160  
161           yield escaped_.style(escaped);
162           literal_start_offset += c.len_utf8();
163        }
164  
165        yield s[literal_start_offset..s.len()].style(normal);
166     }
167  }
168  
169  reffed! {
170     #[derive(Debug, Clone, PartialEq, Eq, Hash)]
171     enum SegmentRaw {
172        Content(token::Content),
173        Interpolation(node::Interpolation),
174     }
175  }
176  
177  impl SegmentRawRef<'_> {
178     #[must_use]
179     fn span_first_line(self) -> Span {
180        match self {
181           SegmentRawRef::Content(content) => {
182              match content.text().find('\n') {
183                 Some(len) => Span::at(content.span().start, len),
184                 None => content.span(),
185              }
186           },
187  
188           SegmentRawRef::Interpolation(interpolation) => {
189              match interpolation.text().find_char('\n') {
190                 Some(len) => Span::at(interpolation.span().start, len),
191                 None => interpolation.span(),
192              }
193           },
194        }
195     }
196  
197     #[must_use]
198     fn span_last_line(self) -> Span {
199        match self {
200           SegmentRawRef::Content(content) => {
201              match content.text().rfind('\n') {
202                 Some(len) => {
203                    Span::at_end(
204                       content.span().end,
205                       content.text().size() - len - '\n'.size(),
206                    )
207                 },
208                 None => content.span(),
209              }
210           },
211  
212           SegmentRawRef::Interpolation(interpolation) => {
213              match interpolation.text().rfind_char('\n') {
214                 Some(len) => {
215                    Span::at_end(
216                       interpolation.span().end,
217                       interpolation.text().size() - len - '\n'.size(),
218                    )
219                 },
220                 None => interpolation.span(),
221              }
222           },
223        }
224     }
225  }
226  
227  #[derive(Debug, Clone, PartialEq, Eq)]
228  pub enum Segment<'a> {
229     Content { span: Span, content: String },
230     Interpolation(&'a node::Interpolation),
231  }
232  
233  impl Segment<'_> {
234     #[must_use]
235     pub fn is_content(&self) -> bool {
236        matches!(self, &Self::Content { .. })
237     }
238  
239     #[must_use]
240     pub fn is_interpolation(&self) -> bool {
241        matches!(self, &Self::Interpolation(_))
242     }
243  }
244  
245  #[derive(Debug, Clone, PartialEq, Eq)]
246  enum Straight<'a> {
247     Line {
248        span: Span,
249        text: &'a str,
250  
251        is_from_line_start: bool,
252        is_to_line_end:     bool,
253  
254        is_first: bool,
255        is_last:  bool,
256     },
257  
258     Interpolation(&'a node::Interpolation),
259  }
260  
261  #[derive(Debug, Clone, PartialEq, Eq)]
262  pub struct Segments<'a> {
263     span: Span,
264  
265     pub is_multiline: bool,
266  
267     line_span_first: Option<Span>,
268     line_span_last:  Option<Span>,
269  
270     straights: SmallVec<Straight<'a>, 4>,
271  }
272  
273  impl<'a> IntoIterator for Segments<'a> {
274     type Item = Segment<'a>;
275  
276     type IntoIter = impl Iterator<Item = Segment<'a>>;
277  
278     fn into_iter(self) -> Self::IntoIter {
279        gen move {
280           let mut buffer = String::new();
281           let mut buffer_span = None::<Span>;
282  
283           let (indent, indent_width) = self
284              .indent()
285              .expect("string must be valid and not mix indents");
286  
287           for straight in self.straights {
288              match straight {
289                 Straight::Line {
290                    span,
291                    mut text,
292                    is_from_line_start,
293                    is_to_line_end,
294                    is_first,
295                    is_last,
296                 } => {
297                    if self.is_multiline {
298                       // Multiline strings' first and last lines are ignored:
299                       //
300                       // "<ignored>
301                       // <content>
302                       // <ignored>"
303                       if is_first || is_last {
304                          assert!(
305                             text.chars().all(char::is_whitespace),
306                             "multiline string must be valid and not have non-whitespace characters \
307                              in first and last lines"
308                          );
309                          continue;
310                       }
311  
312                       if is_to_line_end {
313                          text = text.trim_end();
314                       }
315  
316                       if is_from_line_start {
317                          text = if text.chars().all(char::is_whitespace) {
318                             ""
319                          } else {
320                             assert!(
321                                text[..indent_width].chars().all(|c| c == indent.unwrap()),
322                                "multiline string must be valid and not mix indents"
323                             );
324                             &text[indent_width..]
325                          }
326                       }
327                    }
328  
329                    let (unescaped, escaped_newline) =
330                       unescape_string(text).expect("string content must be valid");
331  
332                    buffer.push_str(&unescaped);
333  
334                    // Not asserting `escaped_newline -> is_to_line_end`,
335                    // because we still process invalid syntax and
336                    // yield valid segments.
337                    //
338                    // For example, in this code:
339                    //
340                    //   "\
341                    //
342                    // That part with only a \ will `escaped_newline`, but
343                    // it won't be a `is_to_line_end` because the way
344                    // we decide that is just `!line_is_last`, which is false
345                    // as that "line" is the last as there is no closing delimiter.
346                    //
347                    // That's fine for actually valid syntax trees though.
348  
349                    if is_to_line_end && !escaped_newline {
350                       buffer.push('\n');
351                    }
352  
353                    buffer_span.replace(buffer_span.map_or(span, |span_| span_.cover(span)));
354                 },
355  
356                 Straight::Interpolation(interpolation) => {
357                    yield Segment::Content {
358                       span:    buffer_span
359                          .take()
360                          .expect("interpolation must never be the first or last segment"),
361                       content: mem::take(&mut buffer),
362                    };
363  
364                    yield Segment::Interpolation(interpolation);
365                 },
366              }
367           }
368  
369           if let Some(span) = buffer_span {
370              yield Segment::Content {
371                 span,
372                 content: buffer,
373              };
374           }
375        }
376     }
377  }
378  
379  impl Segments<'_> {
380     fn indent(&self) -> Result<(Option<char>, usize), SmallVec<char, 4>> {
381        let mut indents = SmallVec::<char, 4>::new();
382        let mut indent_width = None::<usize>;
383  
384        for straight in &self.straights {
385           let &Straight::Line {
386              text,
387              is_from_line_start: true,
388              is_last: false,
389              ..
390           } = straight
391           else {
392              continue;
393           };
394  
395           if text.chars().all(char::is_whitespace) {
396              continue;
397           }
398  
399           let mut line_indent_width: usize = 0;
400  
401           for c in text.chars() {
402              if !c.is_whitespace() {
403                 break;
404              }
405  
406              line_indent_width += 1;
407  
408              if !indents.contains(&c) {
409                 indents.push(c);
410              }
411           }
412  
413           if let Some(width) = indent_width {
414              indent_width.replace(width.min(line_indent_width));
415           } else {
416              indent_width.replace(line_indent_width);
417           }
418        }
419  
420        if indents.len() > 1 {
421           return Err(indents);
422        }
423  
424        Ok((indents.first().copied(), indent_width.unwrap_or(0)))
425     }
426  
427     pub fn validate(&self, to: &mut Vec<Report>, report: &mut Lazy!(Report)) {
428        for straight in &self.straights {
429           match *straight {
430              Straight::Line { span, text, .. } => {
431                 if let Err(invalids) = unescape_string(text) {
432                    for invalid in invalids {
433                       force_ref!(report).push_primary(invalid.offset(span.start), "invalid escape");
434                    }
435                 }
436              },
437  
438              Straight::Interpolation(interpolation) => interpolation.expression().validate(to),
439           }
440        }
441  
442        if let Err(indents) = self.indent() {
443           force_ref!(report).push_primary(
444              self.span,
445              format!(
446                 "cannot mix different kinds of space in indents: {indents}",
447                 indents = indents
448                    .into_iter()
449                    .map(|c| {
450                       match escape(c).is_first(true).delimiter(('\'', "\\'")).call() {
451                          Some(escaped) => escaped.to_owned(),
452                          None => format!("'{c}'"),
453                       }
454                    })
455                    .intersperse(", ".to_owned())
456                    .collect::<String>(),
457              ),
458           );
459        }
460  
461        if self.is_multiline {
462           for span in [self.line_span_first, self.line_span_last]
463              .into_iter()
464              .flatten()
465           {
466              force_ref!(report).push_primary(span, "first and last lines must be empty");
467           }
468        }
469     }
470  }
471  
472  pub trait Segmented: ops::Deref<Target = red::Node> {
473     fn segments(&self) -> Segments<'_> {
474        let mut is_multiline = false;
475  
476        let mut line_span_first = None::<Span>;
477        let mut line_span_last = None::<Span>;
478  
479        let mut straights = SmallVec::new();
480  
481        let mut previous_segment_span_last_line = None::<Span>;
482        let mut segments = self
483           .children_with_tokens()
484           .filter_map(|child| {
485              match child {
486                 red::ElementRef::Node(node) => {
487                    Some(SegmentRawRef::Interpolation(
488                       <&node::Interpolation>::try_from(node)
489                          .expect("child node of segmented node must be interpolation"),
490                    ))
491                 },
492  
493                 // The reason we are not asserting here is because invalid
494                 // segmented nodes sometimes contain non-content tokens,
495                 // it's not worth it to fix this as it'll error anyway.
496                 red::ElementRef::Token(token) => {
497                    <&token::Content>::try_from(token)
498                       .map(SegmentRawRef::Content)
499                       .ok()
500                 },
501              }
502           })
503           .enumerate()
504           .peekable();
505  
506        while let Some((segment_index, segment)) = segments.next() {
507           let mut segment_is_multiline = false;
508  
509           let segment_is_first = segment_index == 0;
510           let segment_is_last = segments.peek().is_none();
511  
512           match segment {
513              SegmentRawRef::Content(content) => {
514                 let span = content.span();
515  
516                 let mut offset: usize = 0;
517                 let mut lines = content.text().split('\n').enumerate().peekable();
518                 while let Some((line_index, line)) = lines.next() {
519                    let line_is_first = line_index == 0;
520                    let line_is_last = lines.peek().is_none();
521  
522                    if line_is_first && !line_is_last {
523                       segment_is_multiline = true;
524                    }
525  
526                    if segment_is_first && line_is_first {
527                       let suffix_interpolation_span = line_is_last
528                          .then(|| {
529                             segments
530                                .peek()
531                                .map(|&(_, segment)| segment.span_first_line())
532                          })
533                          .flatten();
534  
535                       if let Some(interpolation_span) = suffix_interpolation_span {
536                          line_span_first.replace(span.cover(interpolation_span));
537                       } else {
538                          let line = line.trim_end();
539  
540                          if !line.is_empty() {
541                             line_span_first.replace(Span::at(span.start, line.size()));
542                          }
543                       }
544                    }
545  
546                    if segment_is_last && line_is_last {
547                       let prefix_interpolation_span_last_line = line_is_first
548                          .then_some(previous_segment_span_last_line)
549                          .flatten();
550  
551                       if let Some(interpolation_span_last_line) = prefix_interpolation_span_last_line
552                       {
553                          line_span_last.replace(span.cover(interpolation_span_last_line));
554                       } else {
555                          let line = line.trim_start();
556  
557                          if !line.is_empty() {
558                             line_span_last.replace(Span::at_end(span.end, line.size()));
559                          }
560                       }
561                    }
562  
563                    #[expect(clippy::nonminimal_bool)]
564                    straights.push(Straight::Line {
565                       span: Span::at(content.span().start + offset, line.size()),
566  
567                       text: &content.text()[offset..offset + line.len()],
568  
569                       is_from_line_start: !(segment_is_first && line_is_first)
570                          && !(previous_segment_span_last_line.is_some() && line_is_first),
571                       is_to_line_end:     !line_is_last,
572  
573                       is_first: segment_is_first && line_is_first,
574                       is_last:  segment_is_last && line_is_last,
575                    });
576  
577                    offset += line.len() + '\n'.len_utf8();
578                 }
579              },
580  
581              SegmentRawRef::Interpolation(interpolation) => {
582                 let span = interpolation.span();
583  
584                 if segment_is_first {
585                    line_span_first.replace(span);
586                 }
587  
588                 if segment_is_last {
589                    line_span_last.replace(span);
590                 }
591  
592                 straights.push(Straight::Interpolation(interpolation));
593              },
594           }
595  
596           previous_segment_span_last_line.replace(segment.span_last_line());
597  
598           if segment_is_multiline {
599              is_multiline = true;
600           }
601        }
602  
603        Segments {
604           span: self.span(),
605  
606           is_multiline,
607  
608           line_span_first,
609           line_span_last,
610  
611           straights,
612        }
613     }
614  
615     fn is_trivial(&self) -> bool {
616        let mut segments = self.segments().into_iter().peekable();
617  
618        segments.next().is_some_and(|segment| segment.is_content()) && segments.peek().is_none()
619     }
620  
621     fn validate_closing(&self, to: &mut Vec<Report>, end: Kind, type_: &str) {
622        if self
623           .children_with_tokens()
624           .last()
625           .is_some_and(|token| token.kind() == end)
626        {
627           return;
628        }
629  
630        let start = self
631           .children_with_tokens()
632           .next()
633           .expect("delimited must have tokens");
634  
635        to.push(
636           Report::error(format!("unclosed {type_}"))
637              .secondary(start.span(), format!("{type_} starts here"))
638              .primary(Span::empty(self.span().end), format!("expected {end} here")),
639        );
640     }
641  }