segment.rs
1 // For the next poor soul that will step in this file: 2 // 3 // Beware that changing even the slighest thing will break 500 other cases. Way 4 // too many hours have been spent on perfecting this, and every single invariant 5 // is (probably) intended. Please reconsider editing this file. 6 // 7 // Comments? Ha! 8 9 use std::{ 10 mem, 11 ops, 12 }; 13 14 use cab_util::{ 15 Lazy, 16 force_ref, 17 reffed, 18 }; 19 use ranged::{ 20 IntoSize as _, 21 IntoSpan as _, 22 Span, 23 }; 24 use smallvec::SmallVec; 25 use ust::{ 26 report::Report, 27 style::{ 28 self, 29 StyledExt as _, 30 }, 31 }; 32 33 use crate::{ 34 Kind, 35 node, 36 red, 37 token, 38 }; 39 40 #[must_use] 41 pub fn unescape(c: char) -> Option<char> { 42 Some(match c { 43 ' ' => ' ', 44 '0' => '\x00', // Null. 45 'a' => '\x07', // Bell. 46 'b' => '\x08', // Backspace. 47 't' => '\x09', // Horizontal tab. 48 'n' => '\x0A', // New line. 49 'v' => '\x0B', // Vertical tab. 50 'f' => '\x0C', // Form feed. 51 'r' => '\x0D', // Carriage return. 52 '=' => '=', 53 '`' => '`', 54 '"' => '\"', 55 '\'' => '\'', 56 '\\' => '\\', 57 58 _ => return None, 59 }) 60 } 61 62 pub fn unescape_string(s: &str) -> Result<(String, bool), SmallVec<Span, 4>> { 63 let mut string = String::with_capacity(s.len()); 64 let mut escaped_newline = false; 65 let mut invalids = SmallVec::<Span, 4>::new(); 66 67 let mut chars = s.char_indices().peekable(); 68 while let Some((index, c)) = chars.next() { 69 if c != '\\' { 70 string.push(c); 71 continue; 72 } 73 74 let Some((_, next)) = chars.next() else { 75 // When a string ends with '\', it has to be followed by a newline. 76 // And that escapes the newline. 77 escaped_newline = true; 78 continue; 79 }; 80 81 let Some(unescaped) = unescape(next) else { 82 invalids.push(Span::at(index, '\\'.size() + next.size())); 83 continue; 84 }; 85 86 string.push(unescaped); 87 } 88 89 if invalids.is_empty() { 90 Ok((string, escaped_newline)) 91 } else { 92 Err(invalids) 93 } 94 } 95 96 #[bon::builder] 97 pub fn escape( 98 #[builder(start_fn)] c: char, 99 delimiter: Option<(char, &'static str)>, 100 is_first: bool, 101 ) -> Option<&'static str> { 102 Some(match c { 103 // Turn one line of the `unescape` match to an `escape` match in Helix. 104 // Copy this to your @ register using "@y. Execute using Q. 105 // gst,<S-S><space>=<gt><space><ret><A-)>,t,<right><left><left>mr'"i\\<esc>gs 106 '\x00' => "\\0", // Null. 107 '\x07' => "\\a", // Bell. 108 '\x08' => "\\b", // Backspace. 109 '\x09' => "\\t", // Horizontal tab. 110 '\x0A' => "\\n", // New line. 111 '\x0B' => "\\v", // Vertical tab. 112 '\x0C' => "\\f", // Form feed. 113 '\x0D' => "\\r", // Carriage return. 114 115 c if let Some((delimiter, delimiter_escaped)) = delimiter 116 && c == delimiter => 117 { 118 delimiter_escaped 119 }, 120 121 // "=" is not a valid string, but "\=" is. 122 // However, "\==" is also valid and we don't want to over-escape. 123 '=' if is_first => "\\=", 124 125 _ => return None, 126 }) 127 } 128 129 #[bon::builder] 130 pub fn escape_string<'a>( 131 #[builder(start_fn)] s: &'a str, 132 #[builder(default)] normal_style: style::Style, 133 #[builder(default)] escaped_style: style::Style, 134 delimiter: Option<(char, &'static str)>, 135 ) -> impl Iterator<Item = style::Styled<&'a str>> { 136 // Bon doesn't like generator syntax. 137 escape_string_impl(s, normal_style, escaped_style, delimiter) 138 } 139 140 fn escape_string_impl<'a>( 141 s: &'a str, 142 normal: style::Style, 143 escaped: style::Style, 144 delimiter: Option<(char, &'static str)>, 145 ) -> impl Iterator<Item = style::Styled<&'a str>> { 146 gen move { 147 let mut literal_start_offset = 0; 148 149 for (offset, c) in s.char_indices() { 150 let Some(escaped_) = escape(c) 151 .is_first(offset == 0) 152 .maybe_delimiter(delimiter) 153 .call() 154 else { 155 continue; 156 }; 157 158 yield s[literal_start_offset..offset].style(normal); 159 literal_start_offset = offset; 160 161 yield escaped_.style(escaped); 162 literal_start_offset += c.len_utf8(); 163 } 164 165 yield s[literal_start_offset..s.len()].style(normal); 166 } 167 } 168 169 reffed! { 170 #[derive(Debug, Clone, PartialEq, Eq, Hash)] 171 enum SegmentRaw { 172 Content(token::Content), 173 Interpolation(node::Interpolation), 174 } 175 } 176 177 impl SegmentRawRef<'_> { 178 #[must_use] 179 fn span_first_line(self) -> Span { 180 match self { 181 SegmentRawRef::Content(content) => { 182 match content.text().find('\n') { 183 Some(len) => Span::at(content.span().start, len), 184 None => content.span(), 185 } 186 }, 187 188 SegmentRawRef::Interpolation(interpolation) => { 189 match interpolation.text().find_char('\n') { 190 Some(len) => Span::at(interpolation.span().start, len), 191 None => interpolation.span(), 192 } 193 }, 194 } 195 } 196 197 #[must_use] 198 fn span_last_line(self) -> Span { 199 match self { 200 SegmentRawRef::Content(content) => { 201 match content.text().rfind('\n') { 202 Some(len) => { 203 Span::at_end( 204 content.span().end, 205 content.text().size() - len - '\n'.size(), 206 ) 207 }, 208 None => content.span(), 209 } 210 }, 211 212 SegmentRawRef::Interpolation(interpolation) => { 213 match interpolation.text().rfind_char('\n') { 214 Some(len) => { 215 Span::at_end( 216 interpolation.span().end, 217 interpolation.text().size() - len - '\n'.size(), 218 ) 219 }, 220 None => interpolation.span(), 221 } 222 }, 223 } 224 } 225 } 226 227 #[derive(Debug, Clone, PartialEq, Eq)] 228 pub enum Segment<'a> { 229 Content { span: Span, content: String }, 230 Interpolation(&'a node::Interpolation), 231 } 232 233 impl Segment<'_> { 234 #[must_use] 235 pub fn is_content(&self) -> bool { 236 matches!(self, &Self::Content { .. }) 237 } 238 239 #[must_use] 240 pub fn is_interpolation(&self) -> bool { 241 matches!(self, &Self::Interpolation(_)) 242 } 243 } 244 245 #[derive(Debug, Clone, PartialEq, Eq)] 246 enum Straight<'a> { 247 Line { 248 span: Span, 249 text: &'a str, 250 251 is_from_line_start: bool, 252 is_to_line_end: bool, 253 254 is_first: bool, 255 is_last: bool, 256 }, 257 258 Interpolation(&'a node::Interpolation), 259 } 260 261 #[derive(Debug, Clone, PartialEq, Eq)] 262 pub struct Segments<'a> { 263 span: Span, 264 265 pub is_multiline: bool, 266 267 line_span_first: Option<Span>, 268 line_span_last: Option<Span>, 269 270 straights: SmallVec<Straight<'a>, 4>, 271 } 272 273 impl<'a> IntoIterator for Segments<'a> { 274 type Item = Segment<'a>; 275 276 type IntoIter = impl Iterator<Item = Segment<'a>>; 277 278 fn into_iter(self) -> Self::IntoIter { 279 gen move { 280 let mut buffer = String::new(); 281 let mut buffer_span = None::<Span>; 282 283 let (indent, indent_width) = self 284 .indent() 285 .expect("string must be valid and not mix indents"); 286 287 for straight in self.straights { 288 match straight { 289 Straight::Line { 290 span, 291 mut text, 292 is_from_line_start, 293 is_to_line_end, 294 is_first, 295 is_last, 296 } => { 297 if self.is_multiline { 298 // Multiline strings' first and last lines are ignored: 299 // 300 // "<ignored> 301 // <content> 302 // <ignored>" 303 if is_first || is_last { 304 assert!( 305 text.chars().all(char::is_whitespace), 306 "multiline string must be valid and not have non-whitespace characters \ 307 in first and last lines" 308 ); 309 continue; 310 } 311 312 if is_to_line_end { 313 text = text.trim_end(); 314 } 315 316 if is_from_line_start { 317 text = if text.chars().all(char::is_whitespace) { 318 "" 319 } else { 320 assert!( 321 text[..indent_width].chars().all(|c| c == indent.unwrap()), 322 "multiline string must be valid and not mix indents" 323 ); 324 &text[indent_width..] 325 } 326 } 327 } 328 329 let (unescaped, escaped_newline) = 330 unescape_string(text).expect("string content must be valid"); 331 332 buffer.push_str(&unescaped); 333 334 // Not asserting `escaped_newline -> is_to_line_end`, 335 // because we still process invalid syntax and 336 // yield valid segments. 337 // 338 // For example, in this code: 339 // 340 // "\ 341 // 342 // That part with only a \ will `escaped_newline`, but 343 // it won't be a `is_to_line_end` because the way 344 // we decide that is just `!line_is_last`, which is false 345 // as that "line" is the last as there is no closing delimiter. 346 // 347 // That's fine for actually valid syntax trees though. 348 349 if is_to_line_end && !escaped_newline { 350 buffer.push('\n'); 351 } 352 353 buffer_span.replace(buffer_span.map_or(span, |span_| span_.cover(span))); 354 }, 355 356 Straight::Interpolation(interpolation) => { 357 yield Segment::Content { 358 span: buffer_span 359 .take() 360 .expect("interpolation must never be the first or last segment"), 361 content: mem::take(&mut buffer), 362 }; 363 364 yield Segment::Interpolation(interpolation); 365 }, 366 } 367 } 368 369 if let Some(span) = buffer_span { 370 yield Segment::Content { 371 span, 372 content: buffer, 373 }; 374 } 375 } 376 } 377 } 378 379 impl Segments<'_> { 380 fn indent(&self) -> Result<(Option<char>, usize), SmallVec<char, 4>> { 381 let mut indents = SmallVec::<char, 4>::new(); 382 let mut indent_width = None::<usize>; 383 384 for straight in &self.straights { 385 let &Straight::Line { 386 text, 387 is_from_line_start: true, 388 is_last: false, 389 .. 390 } = straight 391 else { 392 continue; 393 }; 394 395 if text.chars().all(char::is_whitespace) { 396 continue; 397 } 398 399 let mut line_indent_width: usize = 0; 400 401 for c in text.chars() { 402 if !c.is_whitespace() { 403 break; 404 } 405 406 line_indent_width += 1; 407 408 if !indents.contains(&c) { 409 indents.push(c); 410 } 411 } 412 413 if let Some(width) = indent_width { 414 indent_width.replace(width.min(line_indent_width)); 415 } else { 416 indent_width.replace(line_indent_width); 417 } 418 } 419 420 if indents.len() > 1 { 421 return Err(indents); 422 } 423 424 Ok((indents.first().copied(), indent_width.unwrap_or(0))) 425 } 426 427 pub fn validate(&self, to: &mut Vec<Report>, report: &mut Lazy!(Report)) { 428 for straight in &self.straights { 429 match *straight { 430 Straight::Line { span, text, .. } => { 431 if let Err(invalids) = unescape_string(text) { 432 for invalid in invalids { 433 force_ref!(report).push_primary(invalid.offset(span.start), "invalid escape"); 434 } 435 } 436 }, 437 438 Straight::Interpolation(interpolation) => interpolation.expression().validate(to), 439 } 440 } 441 442 if let Err(indents) = self.indent() { 443 force_ref!(report).push_primary( 444 self.span, 445 format!( 446 "cannot mix different kinds of space in indents: {indents}", 447 indents = indents 448 .into_iter() 449 .map(|c| { 450 match escape(c).is_first(true).delimiter(('\'', "\\'")).call() { 451 Some(escaped) => escaped.to_owned(), 452 None => format!("'{c}'"), 453 } 454 }) 455 .intersperse(", ".to_owned()) 456 .collect::<String>(), 457 ), 458 ); 459 } 460 461 if self.is_multiline { 462 for span in [self.line_span_first, self.line_span_last] 463 .into_iter() 464 .flatten() 465 { 466 force_ref!(report).push_primary(span, "first and last lines must be empty"); 467 } 468 } 469 } 470 } 471 472 pub trait Segmented: ops::Deref<Target = red::Node> { 473 fn segments(&self) -> Segments<'_> { 474 let mut is_multiline = false; 475 476 let mut line_span_first = None::<Span>; 477 let mut line_span_last = None::<Span>; 478 479 let mut straights = SmallVec::new(); 480 481 let mut previous_segment_span_last_line = None::<Span>; 482 let mut segments = self 483 .children_with_tokens() 484 .filter_map(|child| { 485 match child { 486 red::ElementRef::Node(node) => { 487 Some(SegmentRawRef::Interpolation( 488 <&node::Interpolation>::try_from(node) 489 .expect("child node of segmented node must be interpolation"), 490 )) 491 }, 492 493 // The reason we are not asserting here is because invalid 494 // segmented nodes sometimes contain non-content tokens, 495 // it's not worth it to fix this as it'll error anyway. 496 red::ElementRef::Token(token) => { 497 <&token::Content>::try_from(token) 498 .map(SegmentRawRef::Content) 499 .ok() 500 }, 501 } 502 }) 503 .enumerate() 504 .peekable(); 505 506 while let Some((segment_index, segment)) = segments.next() { 507 let mut segment_is_multiline = false; 508 509 let segment_is_first = segment_index == 0; 510 let segment_is_last = segments.peek().is_none(); 511 512 match segment { 513 SegmentRawRef::Content(content) => { 514 let span = content.span(); 515 516 let mut offset: usize = 0; 517 let mut lines = content.text().split('\n').enumerate().peekable(); 518 while let Some((line_index, line)) = lines.next() { 519 let line_is_first = line_index == 0; 520 let line_is_last = lines.peek().is_none(); 521 522 if line_is_first && !line_is_last { 523 segment_is_multiline = true; 524 } 525 526 if segment_is_first && line_is_first { 527 let suffix_interpolation_span = line_is_last 528 .then(|| { 529 segments 530 .peek() 531 .map(|&(_, segment)| segment.span_first_line()) 532 }) 533 .flatten(); 534 535 if let Some(interpolation_span) = suffix_interpolation_span { 536 line_span_first.replace(span.cover(interpolation_span)); 537 } else { 538 let line = line.trim_end(); 539 540 if !line.is_empty() { 541 line_span_first.replace(Span::at(span.start, line.size())); 542 } 543 } 544 } 545 546 if segment_is_last && line_is_last { 547 let prefix_interpolation_span_last_line = line_is_first 548 .then_some(previous_segment_span_last_line) 549 .flatten(); 550 551 if let Some(interpolation_span_last_line) = prefix_interpolation_span_last_line 552 { 553 line_span_last.replace(span.cover(interpolation_span_last_line)); 554 } else { 555 let line = line.trim_start(); 556 557 if !line.is_empty() { 558 line_span_last.replace(Span::at_end(span.end, line.size())); 559 } 560 } 561 } 562 563 #[expect(clippy::nonminimal_bool)] 564 straights.push(Straight::Line { 565 span: Span::at(content.span().start + offset, line.size()), 566 567 text: &content.text()[offset..offset + line.len()], 568 569 is_from_line_start: !(segment_is_first && line_is_first) 570 && !(previous_segment_span_last_line.is_some() && line_is_first), 571 is_to_line_end: !line_is_last, 572 573 is_first: segment_is_first && line_is_first, 574 is_last: segment_is_last && line_is_last, 575 }); 576 577 offset += line.len() + '\n'.len_utf8(); 578 } 579 }, 580 581 SegmentRawRef::Interpolation(interpolation) => { 582 let span = interpolation.span(); 583 584 if segment_is_first { 585 line_span_first.replace(span); 586 } 587 588 if segment_is_last { 589 line_span_last.replace(span); 590 } 591 592 straights.push(Straight::Interpolation(interpolation)); 593 }, 594 } 595 596 previous_segment_span_last_line.replace(segment.span_last_line()); 597 598 if segment_is_multiline { 599 is_multiline = true; 600 } 601 } 602 603 Segments { 604 span: self.span(), 605 606 is_multiline, 607 608 line_span_first, 609 line_span_last, 610 611 straights, 612 } 613 } 614 615 fn is_trivial(&self) -> bool { 616 let mut segments = self.segments().into_iter().peekable(); 617 618 segments.next().is_some_and(|segment| segment.is_content()) && segments.peek().is_none() 619 } 620 621 fn validate_closing(&self, to: &mut Vec<Report>, end: Kind, type_: &str) { 622 if self 623 .children_with_tokens() 624 .last() 625 .is_some_and(|token| token.kind() == end) 626 { 627 return; 628 } 629 630 let start = self 631 .children_with_tokens() 632 .next() 633 .expect("delimited must have tokens"); 634 635 to.push( 636 Report::error(format!("unclosed {type_}")) 637 .secondary(start.span(), format!("{type_} starts here")) 638 .primary(Span::empty(self.span().end), format!("expected {end} here")), 639 ); 640 } 641 }