unified_diff.rs
1 //! Formatting support for Git's [diff format](https://git-scm.com/docs/diff-format). 2 use std::fmt; 3 use std::io; 4 use std::path::PathBuf; 5 6 use radicle_surf::diff::FileStats; 7 use thiserror::Error; 8 9 use radicle::git; 10 use radicle::git::raw::Oid; 11 use radicle_surf::diff; 12 use radicle_surf::diff::{Diff, DiffContent, DiffFile, FileDiff, Hunk, Hunks, Line, Modification}; 13 14 use crate::terminal as term; 15 16 #[derive(Debug, Error)] 17 pub enum Error { 18 /// Attempt to decode from a source with no data left. 19 #[error("unexpected end of file")] 20 UnexpectedEof, 21 #[error(transparent)] 22 Io(#[from] io::Error), 23 /// Catchall for syntax error messages. 24 #[error("{0}")] 25 Syntax(String), 26 #[error(transparent)] 27 ParseInt(#[from] std::num::ParseIntError), 28 #[error(transparent)] 29 Utf8(#[from] std::string::FromUtf8Error), 30 } 31 32 impl Error { 33 pub fn syntax(msg: impl ToString) -> Self { 34 Self::Syntax(msg.to_string()) 35 } 36 37 pub fn is_eof(&self) -> bool { 38 match self { 39 Self::UnexpectedEof => true, 40 Self::Io(e) => e.kind() == io::ErrorKind::UnexpectedEof, 41 _ => false, 42 } 43 } 44 } 45 46 /// The kind of FileDiff Header which can be used to print the FileDiff information which precedes 47 /// `Hunks`. 48 #[derive(Debug, Clone, PartialEq)] 49 pub enum FileHeader { 50 Added { 51 path: PathBuf, 52 new: DiffFile, 53 }, 54 Copied { 55 old_path: PathBuf, 56 new_path: PathBuf, 57 }, 58 Deleted { 59 path: PathBuf, 60 old: DiffFile, 61 }, 62 Modified { 63 path: PathBuf, 64 old: DiffFile, 65 new: DiffFile, 66 }, 67 Moved { 68 old_path: PathBuf, 69 new_path: PathBuf, 70 }, 71 } 72 73 impl std::convert::From<&FileDiff> for FileHeader { 74 // TODO: Pathnames with 'unusual names' need to be quoted. 75 fn from(value: &FileDiff) -> Self { 76 match value { 77 FileDiff::Modified(v) => FileHeader::Modified { 78 path: v.path.clone(), 79 old: v.old.clone(), 80 new: v.new.clone(), 81 }, 82 FileDiff::Added(v) => FileHeader::Added { 83 path: v.path.clone(), 84 new: v.new.clone(), 85 }, 86 FileDiff::Copied(_) => todo!(), 87 FileDiff::Deleted(v) => FileHeader::Deleted { 88 path: v.path.clone(), 89 old: v.old.clone(), 90 }, 91 FileDiff::Moved(v) => FileHeader::Moved { 92 old_path: v.old_path.clone(), 93 new_path: v.new_path.clone(), 94 }, 95 } 96 } 97 } 98 99 /// Meta data which precedes a `Hunk`s content. 100 /// 101 /// For example: 102 /// @@ -24,8 +24,6 @@ use radicle_surf::diff::*; 103 #[derive(Clone, Debug, Default, PartialEq)] 104 pub struct HunkHeader { 105 /// Line the hunk started in the old file. 106 pub old_line_no: u32, 107 /// Number of removed and context lines. 108 pub old_size: u32, 109 /// Line the hunk started in the new file. 110 pub new_line_no: u32, 111 /// Number of added and context lines. 112 pub new_size: u32, 113 /// Trailing text for the Hunk Header. 114 /// 115 /// From Git's documentation "Hunk headers mention the name of the function to which the hunk 116 /// applies. See "Defining a custom hunk-header" in gitattributes for details of how to tailor 117 /// to this to specific languages.". It is likely best to leave this empty when generating 118 /// diffs. 119 pub text: Vec<u8>, 120 } 121 122 impl HunkHeader { 123 pub fn old_line_range(&self) -> std::ops::Range<u32> { 124 let start: u32 = self.old_line_no; 125 let end: u32 = self.old_line_no + self.old_size; 126 start..end 127 } 128 129 pub fn new_line_range(&self) -> std::ops::Range<u32> { 130 let start: u32 = self.new_line_no; 131 let end: u32 = self.new_line_no + self.new_size; 132 start..end 133 } 134 } 135 136 /// Diff-related types that can be decoded from the unified diff format. 137 pub trait Decode: Sized { 138 /// Decode, and fail if we reach the end of the stream. 139 fn decode(r: &mut impl io::BufRead) -> Result<Self, Error>; 140 141 /// Decode, and return a `None` if we reached the end of the stream. 142 fn try_decode(r: &mut impl io::BufRead) -> Result<Option<Self>, Error> { 143 match Self::decode(r) { 144 Ok(v) => Ok(Some(v)), 145 Err(Error::UnexpectedEof) => Ok(None), 146 Err(e) => Err(e), 147 } 148 } 149 150 /// Decode from a string input. 151 fn parse(s: &str) -> Result<Self, Error> { 152 Self::from_bytes(s.as_bytes()) 153 } 154 155 /// Decode from a string input. 156 fn from_bytes(bytes: &[u8]) -> Result<Self, Error> { 157 let mut r = io::BufReader::new(bytes); 158 Self::decode(&mut r) 159 } 160 } 161 162 /// Diff-related types that can be encoded intro the unified diff format. 163 pub trait Encode: Sized { 164 /// Encode type into diff writer. 165 fn encode(&self, w: &mut Writer) -> Result<(), Error>; 166 167 /// Encode into unified diff string. 168 fn to_unified_string(&self) -> Result<String, Error> { 169 let mut buf = Vec::new(); 170 let mut w = Writer::new(&mut buf); 171 172 w.encode(self)?; 173 drop(w); 174 175 String::from_utf8(buf).map_err(Error::from) 176 } 177 } 178 179 impl Decode for Diff { 180 /// Decode from git's unified diff format, consuming the entire input. 181 fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> { 182 let mut s = String::new(); 183 184 r.read_to_string(&mut s)?; 185 186 let d = git::raw::Diff::from_buffer(s.as_ref()) 187 .map_err(|e| Error::syntax(format!("decoding unified diff: {}", e)))?; 188 let d = Diff::try_from(d) 189 .map_err(|e| Error::syntax(format!("decoding unified diff: {}", e)))?; 190 191 Ok(d) 192 } 193 } 194 195 impl Encode for Diff { 196 fn encode(&self, w: &mut Writer) -> Result<(), Error> { 197 for fdiff in self.files() { 198 fdiff.encode(w)?; 199 } 200 Ok(()) 201 } 202 } 203 204 impl Decode for DiffContent { 205 fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> { 206 let mut hunks = Vec::default(); 207 let mut additions = 0; 208 let mut deletions = 0; 209 210 while let Some(h) = Hunk::try_decode(r)? { 211 for l in &h.lines { 212 match l { 213 Modification::Addition(_) => additions += 1, 214 Modification::Deletion(_) => deletions += 1, 215 _ => {} 216 } 217 } 218 hunks.push(h); 219 } 220 221 if hunks.is_empty() { 222 Ok(DiffContent::Empty) 223 } else { 224 // TODO: Handle case for binary. 225 Ok(DiffContent::Plain { 226 hunks: Hunks::from(hunks), 227 stats: FileStats { 228 additions, 229 deletions, 230 }, 231 // TODO: Properly handle EndOfLine field 232 eof: diff::EofNewLine::NoneMissing, 233 }) 234 } 235 } 236 } 237 238 impl Encode for DiffContent { 239 fn encode(&self, w: &mut Writer) -> Result<(), Error> { 240 match self { 241 DiffContent::Plain { hunks, .. } => { 242 for h in hunks.iter() { 243 h.encode(w)?; 244 } 245 } 246 DiffContent::Empty => {} 247 DiffContent::Binary => todo!("DiffContent::Binary encoding not implemented"), 248 } 249 Ok(()) 250 } 251 } 252 253 impl Encode for FileDiff { 254 fn encode(&self, w: &mut Writer) -> Result<(), Error> { 255 w.encode(&FileHeader::from(self))?; 256 match self { 257 FileDiff::Modified(f) => { 258 w.encode(&f.diff)?; 259 } 260 FileDiff::Added(f) => { 261 w.encode(&f.diff)?; 262 } 263 FileDiff::Copied(f) => { 264 w.encode(&f.diff)?; 265 } 266 FileDiff::Deleted(f) => { 267 w.encode(&f.diff)?; 268 } 269 FileDiff::Moved(f) => { 270 // Nb. We only display diffs as moves when the file was not changed. 271 w.encode(&f.diff)?; 272 } 273 } 274 275 Ok(()) 276 } 277 } 278 279 impl Encode for FileHeader { 280 fn encode(&self, w: &mut Writer) -> Result<(), Error> { 281 match self { 282 FileHeader::Modified { path, old, new } => { 283 w.meta(format!( 284 "diff --git a/{} b/{}", 285 path.display(), 286 path.display() 287 ))?; 288 289 if old.mode == new.mode { 290 w.meta(format!( 291 "index {}..{} {:o}", 292 term::format::oid(old.oid), 293 term::format::oid(new.oid), 294 u32::from(old.mode.clone()), 295 ))?; 296 } else { 297 w.meta(format!("old mode {:o}", u32::from(old.mode.clone())))?; 298 w.meta(format!("new mode {:o}", u32::from(new.mode.clone())))?; 299 w.meta(format!( 300 "index {}..{}", 301 term::format::oid(old.oid), 302 term::format::oid(new.oid) 303 ))?; 304 } 305 306 w.meta(format!("--- a/{}", path.display()))?; 307 w.meta(format!("+++ b/{}", path.display()))?; 308 } 309 FileHeader::Added { path, new } => { 310 w.meta(format!( 311 "diff --git a/{} b/{}", 312 path.display(), 313 path.display() 314 ))?; 315 316 w.meta(format!("new file mode {:o}", u32::from(new.mode.clone())))?; 317 w.meta(format!( 318 "index {}..{}", 319 term::format::oid(Oid::zero()), 320 term::format::oid(new.oid), 321 ))?; 322 323 w.meta("--- /dev/null")?; 324 w.meta(format!("+++ b/{}", path.display()))?; 325 } 326 FileHeader::Copied { .. } => todo!(), 327 FileHeader::Deleted { path, old } => { 328 w.meta(format!( 329 "diff --git a/{} b/{}", 330 path.display(), 331 path.display() 332 ))?; 333 334 w.meta(format!( 335 "deleted file mode {:o}", 336 u32::from(old.mode.clone()) 337 ))?; 338 w.meta(format!( 339 "index {}..{}", 340 term::format::oid(old.oid), 341 term::format::oid(Oid::zero()) 342 ))?; 343 344 w.meta(format!("--- a/{}", path.display()))?; 345 w.meta("+++ /dev/null".to_string())?; 346 } 347 FileHeader::Moved { old_path, new_path } => { 348 w.meta(format!( 349 "diff --git a/{} b/{}", 350 old_path.display(), 351 new_path.display() 352 ))?; 353 w.meta("similarity index 100%")?; 354 w.meta(format!("rename from {}", old_path.display()))?; 355 w.meta(format!("rename to {}", new_path.display()))?; 356 } 357 }; 358 Ok(()) 359 } 360 } 361 362 impl Decode for HunkHeader { 363 fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> { 364 let mut line = String::default(); 365 if r.read_line(&mut line)? == 0 { 366 return Err(Error::UnexpectedEof); 367 }; 368 369 let mut header = HunkHeader::default(); 370 let s = line 371 .strip_prefix("@@ -") 372 .ok_or(Error::syntax("missing '@@ -'"))?; 373 374 let (old, s) = s 375 .split_once(" +") 376 .ok_or(Error::syntax("missing new line information"))?; 377 let (line_no, size) = old.split_once(',').unwrap_or((old, "1")); 378 379 header.old_line_no = line_no.parse()?; 380 header.old_size = size.parse()?; 381 382 let (new, s) = s 383 .split_once(" @@") 384 .ok_or(Error::syntax("closing '@@' is missing"))?; 385 let (line_no, size) = new.split_once(',').unwrap_or((new, "1")); 386 387 header.new_line_no = line_no.parse()?; 388 header.new_size = size.parse()?; 389 390 let s = s.strip_prefix(' ').unwrap_or(s); 391 header.text = s.as_bytes().to_vec(); 392 393 Ok(header) 394 } 395 } 396 397 impl Encode for HunkHeader { 398 fn encode(&self, w: &mut Writer) -> Result<(), Error> { 399 let old = if self.old_size == 1 { 400 format!("{}", self.old_line_no) 401 } else { 402 format!("{},{}", self.old_line_no, self.old_size) 403 }; 404 let new = if self.new_size == 1 { 405 format!("{}", self.new_line_no) 406 } else { 407 format!("{},{}", self.new_line_no, self.new_size) 408 }; 409 let text = if self.text.is_empty() { 410 "".to_string() 411 } else { 412 format!(" {}", String::from_utf8_lossy(&self.text)) 413 }; 414 w.meta(format!("@@ -{old} +{new} @@{text}"))?; 415 416 Ok(()) 417 } 418 } 419 420 impl Decode for Hunk<Modification> { 421 fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> { 422 let header = HunkHeader::decode(r)?; 423 424 let mut lines = Vec::new(); 425 let mut new_line: u32 = 0; 426 let mut old_line: u32 = 0; 427 428 while old_line < header.old_size || new_line < header.new_size { 429 if old_line > header.old_size { 430 return Err(Error::syntax(format!( 431 "expected '{}' old lines", 432 header.old_size 433 ))); 434 } else if new_line > header.new_size { 435 return Err(Error::syntax(format!( 436 "expected '{0}' new lines", 437 header.new_size 438 ))); 439 } 440 441 let Some(line) = Modification::try_decode(r)? else { 442 return Err(Error::syntax(format!( 443 "expected '{}' old lines and '{}' new lines, but found '{}' and '{}'", 444 header.old_size, header.new_size, old_line, new_line, 445 ))); 446 }; 447 448 let line = match line { 449 Modification::Addition(v) => { 450 let l = Modification::addition(v.line, header.new_line_no + new_line); 451 new_line += 1; 452 l 453 } 454 Modification::Deletion(v) => { 455 let l = Modification::deletion(v.line, header.old_line_no + old_line); 456 old_line += 1; 457 l 458 } 459 Modification::Context { line, .. } => { 460 let l = Modification::Context { 461 line, 462 line_no_old: header.old_line_no + old_line, 463 line_no_new: header.new_line_no + new_line, 464 }; 465 new_line += 1; 466 old_line += 1; 467 l 468 } 469 }; 470 471 lines.push(line); 472 } 473 474 Ok(Hunk { 475 header: Line::from(header.to_unified_string()?), 476 lines, 477 old: header.old_line_range(), 478 new: header.new_line_range(), 479 }) 480 } 481 } 482 483 impl Encode for Hunk<Modification> { 484 fn encode(&self, w: &mut Writer) -> Result<(), Error> { 485 // TODO: Remove trailing newlines accurately. 486 // `trim_end()` will destroy diff information if the diff has a trailing whitespace on 487 // purpose. 488 w.magenta(self.header.from_utf8_lossy().trim_end())?; 489 for l in &self.lines { 490 l.encode(w)?; 491 } 492 493 Ok(()) 494 } 495 } 496 497 impl Decode for Modification { 498 fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> { 499 let mut line = String::new(); 500 if r.read_line(&mut line)? == 0 { 501 return Err(Error::UnexpectedEof); 502 }; 503 504 let mut chars = line.chars(); 505 let l = match chars.next() { 506 Some('+') => Modification::addition(chars.as_str().to_string(), 0), 507 Some('-') => Modification::deletion(chars.as_str().to_string(), 0), 508 Some(' ') => Modification::Context { 509 line: chars.as_str().to_string().into(), 510 line_no_old: 0, 511 line_no_new: 0, 512 }, 513 Some(c) => { 514 return Err(Error::syntax(format!( 515 "indicator character expected, but got '{c}'", 516 ))) 517 } 518 None => return Err(Error::UnexpectedEof), 519 }; 520 521 Ok(l) 522 } 523 } 524 525 impl Encode for Modification { 526 fn encode(&self, w: &mut Writer) -> Result<(), Error> { 527 match self { 528 Modification::Deletion(radicle_surf::diff::Deletion { line, .. }) => { 529 let s = format!("-{}", String::from_utf8_lossy(line.as_bytes()).trim_end()); 530 w.write(s, term::Style::new(term::Color::Red))?; 531 } 532 Modification::Addition(radicle_surf::diff::Addition { line, .. }) => { 533 let s = format!("+{}", String::from_utf8_lossy(line.as_bytes()).trim_end()); 534 w.write(s, term::Style::new(term::Color::Green))?; 535 } 536 Modification::Context { line, .. } => { 537 let s = format!(" {}", String::from_utf8_lossy(line.as_bytes()).trim_end()); 538 w.write(s, term::Style::default().dim())?; 539 } 540 } 541 542 Ok(()) 543 } 544 } 545 546 /// An IO Writer with color printing to the terminal. 547 pub struct Writer<'a> { 548 styled: bool, 549 stream: Box<dyn io::Write + 'a>, 550 } 551 552 impl<'a> Writer<'a> { 553 pub fn new(w: impl io::Write + 'a) -> Self { 554 Self { 555 styled: false, 556 stream: Box::new(w), 557 } 558 } 559 560 pub fn encode<T: Encode>(&mut self, arg: &T) -> Result<(), Error> { 561 arg.encode(self)?; 562 Ok(()) 563 } 564 565 pub fn styled(mut self, value: bool) -> Self { 566 self.styled = value; 567 self 568 } 569 570 pub fn write(&mut self, s: impl fmt::Display, style: term::Style) -> io::Result<()> { 571 if self.styled { 572 writeln!(self.stream, "{}", term::Paint::new(s).with_style(style)) 573 } else { 574 writeln!(self.stream, "{s}") 575 } 576 } 577 578 pub fn meta(&mut self, s: impl fmt::Display) -> io::Result<()> { 579 self.write(s, term::Style::new(term::Color::Yellow)) 580 } 581 582 pub fn magenta(&mut self, s: impl fmt::Display) -> io::Result<()> { 583 self.write(s, term::Style::new(term::Color::Magenta)) 584 } 585 } 586 587 #[cfg(test)] 588 mod test { 589 use super::*; 590 591 #[test] 592 fn test_diff_encode_decode_diff() { 593 let diff_a = diff::Diff::parse(include_str!(concat!( 594 env!("CARGO_MANIFEST_DIR"), 595 "/tests/data/diff.diff" 596 ))) 597 .unwrap(); 598 assert_eq!( 599 include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/diff.diff")), 600 diff_a.to_unified_string().unwrap() 601 ); 602 } 603 604 #[test] 605 fn test_diff_content_encode_decode_content() { 606 let diff_content = diff::DiffContent::parse(include_str!(concat!( 607 env!("CARGO_MANIFEST_DIR"), 608 "/tests/data/diff_body.diff" 609 ))) 610 .unwrap(); 611 assert_eq!( 612 include_str!(concat!( 613 env!("CARGO_MANIFEST_DIR"), 614 "/tests/data/diff_body.diff" 615 )), 616 diff_content.to_unified_string().unwrap() 617 ); 618 } 619 620 // TODO: Test parsing a real diff from this repository. 621 }