Skip to content

Preserve line terminator type when using --crlf and -r #3100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions crates/printer/src/standard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3839,4 +3839,41 @@ e
let expected = "4:d\n5-e\n6:d\n";
assert_eq_printed!(expected, got);
}

#[test]
fn regression_crlf_preserve() {
let haystack = "hello\nworld\r\n";
let matcher =
RegexMatcherBuilder::new().crlf(true).build(r".").unwrap();
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
let mut searcher = SearcherBuilder::new()
.line_number(false)
.line_terminator(LineTerminator::crlf())
.build();

searcher
.search_reader(
&matcher,
haystack.as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "hello\nworld\r\n";
assert_eq_printed!(expected, got);

let mut printer = StandardBuilder::new()
.replacement(Some(b"$0".to_vec()))
.build(NoColor::new(vec![]));
searcher
.search_reader(
&matcher,
haystack.as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "hello\nworld\r\n";
assert_eq_printed!(expected, got);
}
}
26 changes: 19 additions & 7 deletions crates/printer/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,23 @@ impl<M: Matcher> Replacer<M> {
// See the giant comment in 'find_iter_at_in_context' below for why we
// do this dance.
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
if is_multi_line {
// Get the line_terminator that was removed (if any) so we can add it back
let line_terminator = if is_multi_line {
if haystack[range.end..].len() >= MAX_LOOK_AHEAD {
haystack = &haystack[..range.end + MAX_LOOK_AHEAD];
}
&[]
} else {
// When searching a single line, we should remove the line
// terminator. Otherwise, it's possible for the regex (via
// look-around) to observe the line terminator and not match
// because of it.
let mut m = Match::new(0, range.end);
trim_line_terminator(searcher, haystack, &mut m);
let line_terminator =
trim_line_terminator(searcher, haystack, &mut m);
haystack = &haystack[..m.end()];
}
line_terminator
};
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
Expand All @@ -96,6 +100,7 @@ impl<M: Matcher> Replacer<M> {
matches.push(Match::new(start, end));
true
},
line_terminator,
)
.map_err(io::Error::error_message)?;
}
Expand Down Expand Up @@ -508,6 +513,7 @@ where
// Otherwise, it's possible for the regex (via look-around) to observe
// the line terminator and not match because of it.
let mut m = Match::new(0, range.end);
// No need to rember the line terminator as we aren't doing a replace here
trim_line_terminator(searcher, bytes, &mut m);
bytes = &bytes[..m.end()];
}
Expand All @@ -523,19 +529,23 @@ where

/// Given a buf and some bounds, if there is a line terminator at the end of
/// the given bounds in buf, then the bounds are trimmed to remove the line
/// terminator.
pub(crate) fn trim_line_terminator(
/// terminator, returning the bounds of the removed line terminator (if any).
pub(crate) fn trim_line_terminator<'b>(
searcher: &Searcher,
buf: &[u8],
buf: &'b [u8],
line: &mut Match,
) {
) -> &'b [u8] {
let lineterm = searcher.line_terminator();
if lineterm.is_suffix(&buf[*line]) {
let mut end = line.end() - 1;
if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
end -= 1;
}
let orig_end = line.end();
*line = line.with_end(end);
&buf[end..orig_end]
} else {
&[]
}
}

Expand All @@ -549,6 +559,7 @@ fn replace_with_captures_in_context<M, F>(
caps: &mut M::Captures,
dst: &mut Vec<u8>,
mut append: F,
line_terminator: &[u8],
) -> Result<(), M::Error>
where
M: Matcher,
Expand All @@ -566,6 +577,7 @@ where
})?;
let end = std::cmp::min(bytes.len(), range.end);
dst.extend(&bytes[last_match..end]);
dst.extend(line_terminator); // Add back any line terminator
Ok(())
}

Expand Down