Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 110 additions & 39 deletions gix-diff/tests/diff/blob/slider.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
use gix_diff::blob::intern::TokenSource;
use gix_diff::blob::unified_diff::ContextSize;
use gix_diff::blob::{Algorithm, UnifiedDiff};
use gix_diff::blob::Algorithm;
use gix_object::bstr::ByteSlice;
use gix_testtools::bstr::{BString, ByteVec};
use pretty_assertions::StrComparison;
use std::ffi::OsStr;
use std::path::Path;

#[test]
fn baseline() -> gix_testtools::Result {
fn baseline_v1() -> gix_testtools::Result {
use gix_diff::blob::{unified_diff::ContextSize, UnifiedDiff};

let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?;
let asset_dir = worktree_path.join("assets");

Expand All @@ -15,29 +18,9 @@ fn baseline() -> gix_testtools::Result {

for entry in dir {
let entry = entry?;
let file_name = entry.file_name().into_string().expect("to be string");

if !file_name.ends_with(".baseline") {
let Some((file_name, algorithm, old_data, new_data)) = parse_dir_entry(&asset_dir, &entry.file_name()) else {
continue;
}

let parts: Vec<_> = file_name.split('.').collect();
let [name, algorithm, ..] = parts[..] else {
unreachable!()
};
let algorithm = match algorithm {
"myers" => Algorithm::Myers,
"histogram" => Algorithm::Histogram,
_ => unreachable!(),
};

let parts: Vec<_> = name.split('-').collect();
let [old_blob_id, new_blob_id] = parts[..] else {
unreachable!();
};

let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?;
let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?;

let interner = gix_diff::blob::intern::InternedInput::new(
tokens_for_diffing(old_data.as_slice()),
Expand Down Expand Up @@ -70,17 +53,65 @@ fn baseline() -> gix_testtools::Result {
})
.to_string();

let baseline = baseline
.fold(BString::default(), |mut acc, diff_hunk| {
acc.push_str(diff_hunk.header.to_string().as_str());
acc.push(b'\n');
let baseline = baseline.fold_to_unidiff().to_string();
let actual_matches_baseline = actual == baseline;
diffs.push((actual, baseline, actual_matches_baseline, file_name));
}

acc.extend_from_slice(&diff_hunk.lines);
if diffs.is_empty() {
eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions");
}

acc
})
assert_diffs(&diffs);

Ok(())
}

fn tokens_for_diffing(data: &[u8]) -> impl gix_diff::blob::intern::TokenSource<Token = &[u8]> {
gix_diff::blob::sources::byte_lines(data)
}

#[test]
fn baseline_v2() -> gix_testtools::Result {
use gix_diff::blob::v2::{Algorithm, BasicLineDiffPrinter, Diff, InternedInput, UnifiedDiffConfig};

let worktree_path = gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")?;
let asset_dir = worktree_path.join("assets");

let dir = std::fs::read_dir(&worktree_path)?;

let mut diffs = Vec::new();

for entry in dir {
let entry = entry?;
let Some((file_name, algorithm, old_data, new_data)) = parse_dir_entry(&asset_dir, &entry.file_name()) else {
continue;
};

let input = InternedInput::new(old_data.to_str().unwrap(), new_data.to_str().unwrap());
Copy link

Copilot AI Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .unwrap() calls on .to_str() are problematic. The project guidelines prohibit .unwrap() in favor of .expect() with context or proper error handling with ?. Additionally, this assumes the blob data is valid UTF-8, which may not always be true for binary files or files with different encodings. This could cause the test to panic instead of handling non-UTF-8 data gracefully.

Copilot generated this review using guidance from repository custom instructions.
let algorithm = match algorithm {
gix_diff::blob::Algorithm::Myers => Algorithm::Myers,
gix_diff::blob::Algorithm::Histogram => Algorithm::Histogram,
gix_diff::blob::Algorithm::MyersMinimal => Algorithm::MyersMinimal,
};
Comment on lines +92 to +96
Copy link

Copilot AI Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The algorithm mapping is duplicated between parse_dir_entry (lines 138-142) and this section. Since parse_dir_entry already returns the Algorithm enum from the v1 API, this remapping to v2 Algorithm could be extracted into a helper function to avoid duplication and make the relationship between v1 and v2 algorithms clearer.

Copilot uses AI. Check for mistakes.

let mut diff = Diff::compute(algorithm, &input);
diff.postprocess_lines(&input);
Comment on lines +98 to +99
Copy link

Copilot AI Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable name diff is shadowed after mutation (line 98 creates it, line 99 mutates it). Consider renaming to clarify the mutation, or combine the operations if the intermediate state isn't needed.

Copilot uses AI. Check for mistakes.

let actual = diff
.unified_diff(
&BasicLineDiffPrinter(&input.interner),
UnifiedDiffConfig::default(),
&input,
)
.to_string();

let baseline_path = worktree_path.join(&file_name);
let baseline = std::fs::read(baseline_path)?;
let baseline = baseline::Baseline::new(&baseline);

let baseline = baseline.fold_to_unidiff().to_string();

let actual_matches_baseline = actual == baseline;
diffs.push((actual, baseline, actual_matches_baseline, file_name));
}
Expand All @@ -89,6 +120,38 @@ fn baseline() -> gix_testtools::Result {
eprintln!("Slider baseline isn't setup - look at ./gix-diff/tests/README.md for instructions");
}

assert_diffs(&diffs);
Ok(())
}

fn parse_dir_entry(asset_dir: &Path, file_name: &OsStr) -> Option<(String, Algorithm, Vec<u8>, Vec<u8>)> {
let file_name = file_name.to_str().expect("ascii filename").to_owned();

if !file_name.ends_with(".baseline") {
return None;
}

let parts: Vec<_> = file_name.split('.').collect();
let [name, algorithm, ..] = parts[..] else {
unreachable!("BUG: Need file named '<name>.<algorithm>'")
};
let algorithm = match algorithm {
"myers" => Algorithm::Myers,
"histogram" => Algorithm::Histogram,
other => unreachable!("'{other}' is not a supported algorithm"),
};

let parts: Vec<_> = name.split('-').collect();
let [old_blob_id, new_blob_id] = parts[..] else {
unreachable!("BUG: name part of filename must be <old_blob_id>-<new_blob_id>");
Comment on lines +136 to +146
Copy link

Copilot AI Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message format is inconsistent. Line 136 uses "BUG: Need file named", line 141 uses single quotes around the variable without "BUG:", and line 146 uses "BUG: name part of filename must be". Consider using a consistent format for all unreachable error messages, such as starting all with "BUG:" prefix.

Copilot uses AI. Check for mistakes.
};

let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob"))).unwrap();
let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob"))).unwrap();
Comment on lines +149 to +150
Copy link

Copilot AI Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .unwrap() calls violate the project's error handling guidelines. According to the coding guidelines, tests should avoid .unwrap() in favor of .expect() with meaningful context or ?. These file reads should either be propagated with ? or use .expect("meaningful context") to explain why the files are expected to exist.

Copilot generated this review using guidance from repository custom instructions.
(file_name, algorithm, old_data, new_data).into()
}

fn assert_diffs(diffs: &[(String, String, bool, String)]) {
let total_diffs = diffs.len();
let matching_diffs = diffs
.iter()
Expand All @@ -115,16 +178,10 @@ fn baseline() -> gix_testtools::Result {
)
}
);

Ok(())
}

fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
gix_diff::blob::sources::byte_lines(data)
}

mod baseline {
use gix_object::bstr::ByteSlice;
use gix_object::bstr::{ByteSlice, ByteVec};
use std::iter::Peekable;

use gix_diff::blob::unified_diff::{ConsumeHunk, HunkHeader};
Expand Down Expand Up @@ -193,6 +250,20 @@ mod baseline {
}
}

impl Baseline<'_> {
/// Fold all [`DiffHunk`]s we produce into a unified_diff string
Copy link

Copilot AI Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The doc comment uses backticks around DiffHunk and refers to "we produce" but doesn't fully explain what this method does or when it should be used. Consider clarifying that this method converts the baseline hunks into a unified diff format string for comparison purposes.

Suggested change
/// Fold all [`DiffHunk`]s we produce into a unified_diff string
/// Converts all baseline [`DiffHunk`]s into a single unified diff format string.
///
/// This method is typically used to produce a unified diff representation of the baseline hunks,
/// suitable for comparison with other diff outputs in tests or analysis.

Copilot uses AI. Check for mistakes.
pub fn fold_to_unidiff(self) -> BString {
self.fold(BString::default(), |mut acc, diff_hunk| {
acc.push_str(diff_hunk.header.to_string().as_str());
acc.push(b'\n');

acc.extend_from_slice(&diff_hunk.lines);

acc
})
}
}

impl Iterator for Baseline<'_> {
type Item = DiffHunk;

Expand Down
Loading