/ radicle-cli / src / git / unified_diff.rs
unified_diff.rs
  1  //! Formatting support for Git's [diff format](https://git-scm.com/docs/diff-format).
  2  use std::fmt;
  3  use std::io;
  4  use std::path::PathBuf;
  5  
  6  use radicle_surf::diff::FileStats;
  7  use thiserror::Error;
  8  
  9  use radicle::git;
 10  use radicle::git::raw::Oid;
 11  use radicle_surf::diff;
 12  use radicle_surf::diff::{Diff, DiffContent, DiffFile, FileDiff, Hunk, Hunks, Line, Modification};
 13  
 14  use crate::terminal as term;
 15  
 16  #[derive(Debug, Error)]
 17  pub enum Error {
 18      /// Attempt to decode from a source with no data left.
 19      #[error("unexpected end of file")]
 20      UnexpectedEof,
 21      #[error(transparent)]
 22      Io(#[from] io::Error),
 23      /// Catchall for syntax error messages.
 24      #[error("{0}")]
 25      Syntax(String),
 26      #[error(transparent)]
 27      ParseInt(#[from] std::num::ParseIntError),
 28      #[error(transparent)]
 29      Utf8(#[from] std::string::FromUtf8Error),
 30  }
 31  
 32  impl Error {
 33      pub fn syntax(msg: impl ToString) -> Self {
 34          Self::Syntax(msg.to_string())
 35      }
 36  
 37      pub fn is_eof(&self) -> bool {
 38          match self {
 39              Self::UnexpectedEof => true,
 40              Self::Io(e) => e.kind() == io::ErrorKind::UnexpectedEof,
 41              _ => false,
 42          }
 43      }
 44  }
 45  
 46  /// The kind of FileDiff Header which can be used to print the FileDiff information which precedes
 47  /// `Hunks`.
 48  #[derive(Debug, Clone, PartialEq)]
 49  pub enum FileHeader {
 50      Added {
 51          path: PathBuf,
 52          new: DiffFile,
 53      },
 54      Copied {
 55          old_path: PathBuf,
 56          new_path: PathBuf,
 57      },
 58      Deleted {
 59          path: PathBuf,
 60          old: DiffFile,
 61      },
 62      Modified {
 63          path: PathBuf,
 64          old: DiffFile,
 65          new: DiffFile,
 66      },
 67      Moved {
 68          old_path: PathBuf,
 69          new_path: PathBuf,
 70      },
 71  }
 72  
 73  impl std::convert::From<&FileDiff> for FileHeader {
 74      // TODO: Pathnames with 'unusual names' need to be quoted.
 75      fn from(value: &FileDiff) -> Self {
 76          match value {
 77              FileDiff::Modified(v) => FileHeader::Modified {
 78                  path: v.path.clone(),
 79                  old: v.old.clone(),
 80                  new: v.new.clone(),
 81              },
 82              FileDiff::Added(v) => FileHeader::Added {
 83                  path: v.path.clone(),
 84                  new: v.new.clone(),
 85              },
 86              FileDiff::Copied(_) => todo!(),
 87              FileDiff::Deleted(v) => FileHeader::Deleted {
 88                  path: v.path.clone(),
 89                  old: v.old.clone(),
 90              },
 91              FileDiff::Moved(v) => FileHeader::Moved {
 92                  old_path: v.old_path.clone(),
 93                  new_path: v.new_path.clone(),
 94              },
 95          }
 96      }
 97  }
 98  
 99  /// Meta data which precedes a `Hunk`s content.
100  ///
101  /// For example:
102  /// @@ -24,8 +24,6 @@ use radicle_surf::diff::*;
103  #[derive(Clone, Debug, Default, PartialEq)]
104  pub struct HunkHeader {
105      /// Line the hunk started in the old file.
106      pub old_line_no: u32,
107      /// Number of removed and context lines.
108      pub old_size: u32,
109      /// Line the hunk started in the new file.
110      pub new_line_no: u32,
111      /// Number of added and context lines.
112      pub new_size: u32,
113      /// Trailing text for the Hunk Header.
114      ///
115      /// From Git's documentation "Hunk headers mention the name of the function to which the hunk
116      /// applies. See "Defining a custom hunk-header" in gitattributes for details of how to tailor
117      /// to this to specific languages.".  It is likely best to leave this empty when generating
118      /// diffs.
119      pub text: Vec<u8>,
120  }
121  
122  impl HunkHeader {
123      pub fn old_line_range(&self) -> std::ops::Range<u32> {
124          let start: u32 = self.old_line_no;
125          let end: u32 = self.old_line_no + self.old_size;
126          start..end
127      }
128  
129      pub fn new_line_range(&self) -> std::ops::Range<u32> {
130          let start: u32 = self.new_line_no;
131          let end: u32 = self.new_line_no + self.new_size;
132          start..end
133      }
134  }
135  
136  /// Diff-related types that can be decoded from the unified diff format.
137  pub trait Decode: Sized {
138      /// Decode, and fail if we reach the end of the stream.
139      fn decode(r: &mut impl io::BufRead) -> Result<Self, Error>;
140  
141      /// Decode, and return a `None` if we reached the end of the stream.
142      fn try_decode(r: &mut impl io::BufRead) -> Result<Option<Self>, Error> {
143          match Self::decode(r) {
144              Ok(v) => Ok(Some(v)),
145              Err(Error::UnexpectedEof) => Ok(None),
146              Err(e) => Err(e),
147          }
148      }
149  
150      /// Decode from a string input.
151      fn parse(s: &str) -> Result<Self, Error> {
152          Self::from_bytes(s.as_bytes())
153      }
154  
155      /// Decode from a string input.
156      fn from_bytes(bytes: &[u8]) -> Result<Self, Error> {
157          let mut r = io::BufReader::new(bytes);
158          Self::decode(&mut r)
159      }
160  }
161  
162  /// Diff-related types that can be encoded intro the unified diff format.
163  pub trait Encode: Sized {
164      /// Encode type into diff writer.
165      fn encode(&self, w: &mut Writer) -> Result<(), Error>;
166  
167      /// Encode into unified diff string.
168      fn to_unified_string(&self) -> Result<String, Error> {
169          let mut buf = Vec::new();
170          let mut w = Writer::new(&mut buf);
171  
172          w.encode(self)?;
173          drop(w);
174  
175          String::from_utf8(buf).map_err(Error::from)
176      }
177  }
178  
179  impl Decode for Diff {
180      /// Decode from git's unified diff format, consuming the entire input.
181      fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
182          let mut s = String::new();
183  
184          r.read_to_string(&mut s)?;
185  
186          let d = git::raw::Diff::from_buffer(s.as_ref())
187              .map_err(|e| Error::syntax(format!("decoding unified diff: {}", e)))?;
188          let d = Diff::try_from(d)
189              .map_err(|e| Error::syntax(format!("decoding unified diff: {}", e)))?;
190  
191          Ok(d)
192      }
193  }
194  
195  impl Encode for Diff {
196      fn encode(&self, w: &mut Writer) -> Result<(), Error> {
197          for fdiff in self.files() {
198              fdiff.encode(w)?;
199          }
200          Ok(())
201      }
202  }
203  
204  impl Decode for DiffContent {
205      fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
206          let mut hunks = Vec::default();
207          let mut additions = 0;
208          let mut deletions = 0;
209  
210          while let Some(h) = Hunk::try_decode(r)? {
211              for l in &h.lines {
212                  match l {
213                      Modification::Addition(_) => additions += 1,
214                      Modification::Deletion(_) => deletions += 1,
215                      _ => {}
216                  }
217              }
218              hunks.push(h);
219          }
220  
221          if hunks.is_empty() {
222              Ok(DiffContent::Empty)
223          } else {
224              // TODO: Handle case for binary.
225              Ok(DiffContent::Plain {
226                  hunks: Hunks::from(hunks),
227                  stats: FileStats {
228                      additions,
229                      deletions,
230                  },
231                  // TODO: Properly handle EndOfLine field
232                  eof: diff::EofNewLine::NoneMissing,
233              })
234          }
235      }
236  }
237  
238  impl Encode for DiffContent {
239      fn encode(&self, w: &mut Writer) -> Result<(), Error> {
240          match self {
241              DiffContent::Plain { hunks, .. } => {
242                  for h in hunks.iter() {
243                      h.encode(w)?;
244                  }
245              }
246              DiffContent::Empty => {}
247              DiffContent::Binary => todo!("DiffContent::Binary encoding not implemented"),
248          }
249          Ok(())
250      }
251  }
252  
253  impl Encode for FileDiff {
254      fn encode(&self, w: &mut Writer) -> Result<(), Error> {
255          w.encode(&FileHeader::from(self))?;
256          match self {
257              FileDiff::Modified(f) => {
258                  w.encode(&f.diff)?;
259              }
260              FileDiff::Added(f) => {
261                  w.encode(&f.diff)?;
262              }
263              FileDiff::Copied(f) => {
264                  w.encode(&f.diff)?;
265              }
266              FileDiff::Deleted(f) => {
267                  w.encode(&f.diff)?;
268              }
269              FileDiff::Moved(f) => {
270                  // Nb. We only display diffs as moves when the file was not changed.
271                  w.encode(&f.diff)?;
272              }
273          }
274  
275          Ok(())
276      }
277  }
278  
279  impl Encode for FileHeader {
280      fn encode(&self, w: &mut Writer) -> Result<(), Error> {
281          match self {
282              FileHeader::Modified { path, old, new } => {
283                  w.meta(format!(
284                      "diff --git a/{} b/{}",
285                      path.display(),
286                      path.display()
287                  ))?;
288  
289                  if old.mode == new.mode {
290                      w.meta(format!(
291                          "index {}..{} {:o}",
292                          term::format::oid(old.oid),
293                          term::format::oid(new.oid),
294                          u32::from(old.mode.clone()),
295                      ))?;
296                  } else {
297                      w.meta(format!("old mode {:o}", u32::from(old.mode.clone())))?;
298                      w.meta(format!("new mode {:o}", u32::from(new.mode.clone())))?;
299                      w.meta(format!(
300                          "index {}..{}",
301                          term::format::oid(old.oid),
302                          term::format::oid(new.oid)
303                      ))?;
304                  }
305  
306                  w.meta(format!("--- a/{}", path.display()))?;
307                  w.meta(format!("+++ b/{}", path.display()))?;
308              }
309              FileHeader::Added { path, new } => {
310                  w.meta(format!(
311                      "diff --git a/{} b/{}",
312                      path.display(),
313                      path.display()
314                  ))?;
315  
316                  w.meta(format!("new file mode {:o}", u32::from(new.mode.clone())))?;
317                  w.meta(format!(
318                      "index {}..{}",
319                      term::format::oid(Oid::zero()),
320                      term::format::oid(new.oid),
321                  ))?;
322  
323                  w.meta("--- /dev/null")?;
324                  w.meta(format!("+++ b/{}", path.display()))?;
325              }
326              FileHeader::Copied { .. } => todo!(),
327              FileHeader::Deleted { path, old } => {
328                  w.meta(format!(
329                      "diff --git a/{} b/{}",
330                      path.display(),
331                      path.display()
332                  ))?;
333  
334                  w.meta(format!(
335                      "deleted file mode {:o}",
336                      u32::from(old.mode.clone())
337                  ))?;
338                  w.meta(format!(
339                      "index {}..{}",
340                      term::format::oid(old.oid),
341                      term::format::oid(Oid::zero())
342                  ))?;
343  
344                  w.meta(format!("--- a/{}", path.display()))?;
345                  w.meta("+++ /dev/null".to_string())?;
346              }
347              FileHeader::Moved { old_path, new_path } => {
348                  w.meta(format!(
349                      "diff --git a/{} b/{}",
350                      old_path.display(),
351                      new_path.display()
352                  ))?;
353                  w.meta("similarity index 100%")?;
354                  w.meta(format!("rename from {}", old_path.display()))?;
355                  w.meta(format!("rename to {}", new_path.display()))?;
356              }
357          };
358          Ok(())
359      }
360  }
361  
362  impl Decode for HunkHeader {
363      fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
364          let mut line = String::default();
365          if r.read_line(&mut line)? == 0 {
366              return Err(Error::UnexpectedEof);
367          };
368  
369          let mut header = HunkHeader::default();
370          let s = line
371              .strip_prefix("@@ -")
372              .ok_or(Error::syntax("missing '@@ -'"))?;
373  
374          let (old, s) = s
375              .split_once(" +")
376              .ok_or(Error::syntax("missing new line information"))?;
377          let (line_no, size) = old.split_once(',').unwrap_or((old, "1"));
378  
379          header.old_line_no = line_no.parse()?;
380          header.old_size = size.parse()?;
381  
382          let (new, s) = s
383              .split_once(" @@")
384              .ok_or(Error::syntax("closing '@@' is missing"))?;
385          let (line_no, size) = new.split_once(',').unwrap_or((new, "1"));
386  
387          header.new_line_no = line_no.parse()?;
388          header.new_size = size.parse()?;
389  
390          let s = s.strip_prefix(' ').unwrap_or(s);
391          header.text = s.as_bytes().to_vec();
392  
393          Ok(header)
394      }
395  }
396  
397  impl Encode for HunkHeader {
398      fn encode(&self, w: &mut Writer) -> Result<(), Error> {
399          let old = if self.old_size == 1 {
400              format!("{}", self.old_line_no)
401          } else {
402              format!("{},{}", self.old_line_no, self.old_size)
403          };
404          let new = if self.new_size == 1 {
405              format!("{}", self.new_line_no)
406          } else {
407              format!("{},{}", self.new_line_no, self.new_size)
408          };
409          let text = if self.text.is_empty() {
410              "".to_string()
411          } else {
412              format!(" {}", String::from_utf8_lossy(&self.text))
413          };
414          w.meta(format!("@@ -{old} +{new} @@{text}"))?;
415  
416          Ok(())
417      }
418  }
419  
420  impl Decode for Hunk<Modification> {
421      fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
422          let header = HunkHeader::decode(r)?;
423  
424          let mut lines = Vec::new();
425          let mut new_line: u32 = 0;
426          let mut old_line: u32 = 0;
427  
428          while old_line < header.old_size || new_line < header.new_size {
429              if old_line > header.old_size {
430                  return Err(Error::syntax(format!(
431                      "expected '{}' old lines",
432                      header.old_size
433                  )));
434              } else if new_line > header.new_size {
435                  return Err(Error::syntax(format!(
436                      "expected '{0}' new lines",
437                      header.new_size
438                  )));
439              }
440  
441              let Some(line) = Modification::try_decode(r)? else {
442                      return Err(Error::syntax(format!(
443                          "expected '{}' old lines and '{}' new lines, but found '{}' and '{}'",
444                          header.old_size, header.new_size, old_line, new_line,
445                      )));
446              };
447  
448              let line = match line {
449                  Modification::Addition(v) => {
450                      let l = Modification::addition(v.line, header.new_line_no + new_line);
451                      new_line += 1;
452                      l
453                  }
454                  Modification::Deletion(v) => {
455                      let l = Modification::deletion(v.line, header.old_line_no + old_line);
456                      old_line += 1;
457                      l
458                  }
459                  Modification::Context { line, .. } => {
460                      let l = Modification::Context {
461                          line,
462                          line_no_old: header.old_line_no + old_line,
463                          line_no_new: header.new_line_no + new_line,
464                      };
465                      new_line += 1;
466                      old_line += 1;
467                      l
468                  }
469              };
470  
471              lines.push(line);
472          }
473  
474          Ok(Hunk {
475              header: Line::from(header.to_unified_string()?),
476              lines,
477              old: header.old_line_range(),
478              new: header.new_line_range(),
479          })
480      }
481  }
482  
483  impl Encode for Hunk<Modification> {
484      fn encode(&self, w: &mut Writer) -> Result<(), Error> {
485          // TODO: Remove trailing newlines accurately.
486          // `trim_end()` will destroy diff information if the diff has a trailing whitespace on
487          // purpose.
488          w.magenta(self.header.from_utf8_lossy().trim_end())?;
489          for l in &self.lines {
490              l.encode(w)?;
491          }
492  
493          Ok(())
494      }
495  }
496  
497  impl Decode for Modification {
498      fn decode(r: &mut impl io::BufRead) -> Result<Self, Error> {
499          let mut line = String::new();
500          if r.read_line(&mut line)? == 0 {
501              return Err(Error::UnexpectedEof);
502          };
503  
504          let mut chars = line.chars();
505          let l = match chars.next() {
506              Some('+') => Modification::addition(chars.as_str().to_string(), 0),
507              Some('-') => Modification::deletion(chars.as_str().to_string(), 0),
508              Some(' ') => Modification::Context {
509                  line: chars.as_str().to_string().into(),
510                  line_no_old: 0,
511                  line_no_new: 0,
512              },
513              Some(c) => {
514                  return Err(Error::syntax(format!(
515                      "indicator character expected, but got '{c}'",
516                  )))
517              }
518              None => return Err(Error::UnexpectedEof),
519          };
520  
521          Ok(l)
522      }
523  }
524  
525  impl Encode for Modification {
526      fn encode(&self, w: &mut Writer) -> Result<(), Error> {
527          match self {
528              Modification::Deletion(radicle_surf::diff::Deletion { line, .. }) => {
529                  let s = format!("-{}", String::from_utf8_lossy(line.as_bytes()).trim_end());
530                  w.write(s, term::Style::new(term::Color::Red))?;
531              }
532              Modification::Addition(radicle_surf::diff::Addition { line, .. }) => {
533                  let s = format!("+{}", String::from_utf8_lossy(line.as_bytes()).trim_end());
534                  w.write(s, term::Style::new(term::Color::Green))?;
535              }
536              Modification::Context { line, .. } => {
537                  let s = format!(" {}", String::from_utf8_lossy(line.as_bytes()).trim_end());
538                  w.write(s, term::Style::default().dim())?;
539              }
540          }
541  
542          Ok(())
543      }
544  }
545  
546  /// An IO Writer with color printing to the terminal.
547  pub struct Writer<'a> {
548      styled: bool,
549      stream: Box<dyn io::Write + 'a>,
550  }
551  
552  impl<'a> Writer<'a> {
553      pub fn new(w: impl io::Write + 'a) -> Self {
554          Self {
555              styled: false,
556              stream: Box::new(w),
557          }
558      }
559  
560      pub fn encode<T: Encode>(&mut self, arg: &T) -> Result<(), Error> {
561          arg.encode(self)?;
562          Ok(())
563      }
564  
565      pub fn styled(mut self, value: bool) -> Self {
566          self.styled = value;
567          self
568      }
569  
570      pub fn write(&mut self, s: impl fmt::Display, style: term::Style) -> io::Result<()> {
571          if self.styled {
572              writeln!(self.stream, "{}", term::Paint::new(s).with_style(style))
573          } else {
574              writeln!(self.stream, "{s}")
575          }
576      }
577  
578      pub fn meta(&mut self, s: impl fmt::Display) -> io::Result<()> {
579          self.write(s, term::Style::new(term::Color::Yellow))
580      }
581  
582      pub fn magenta(&mut self, s: impl fmt::Display) -> io::Result<()> {
583          self.write(s, term::Style::new(term::Color::Magenta))
584      }
585  }
586  
587  #[cfg(test)]
588  mod test {
589      use super::*;
590  
591      #[test]
592      fn test_diff_encode_decode_diff() {
593          let diff_a = diff::Diff::parse(include_str!(concat!(
594              env!("CARGO_MANIFEST_DIR"),
595              "/tests/data/diff.diff"
596          )))
597          .unwrap();
598          assert_eq!(
599              include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/diff.diff")),
600              diff_a.to_unified_string().unwrap()
601          );
602      }
603  
604      #[test]
605      fn test_diff_content_encode_decode_content() {
606          let diff_content = diff::DiffContent::parse(include_str!(concat!(
607              env!("CARGO_MANIFEST_DIR"),
608              "/tests/data/diff_body.diff"
609          )))
610          .unwrap();
611          assert_eq!(
612              include_str!(concat!(
613                  env!("CARGO_MANIFEST_DIR"),
614                  "/tests/data/diff_body.diff"
615              )),
616              diff_content.to_unified_string().unwrap()
617          );
618      }
619  
620      // TODO: Test parsing a real diff from this repository.
621  }