Skip to content

Commit 310715c

Browse files
author
mtolman
committed
Fixed bug with empty newline at end of output
1 parent 7cae106 commit 310715c

File tree

2 files changed

+124
-6
lines changed

2 files changed

+124
-6
lines changed

src/zero_allocs/slice.zig

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,15 @@ pub const Parser = struct {
138138
return null;
139139
}
140140
defer index += 1;
141+
141142
self.err = self._field_parser.err;
142143
if (self.err) |_| {
143144
return null;
144145
}
146+
145147
end += f.field._data.len + 1;
146148
end = @min(end, self._text.len);
149+
147150
if (f.row_end) {
148151
break;
149152
}
@@ -154,8 +157,27 @@ pub const Parser = struct {
154157
assert(end <= self._text.len);
155158
assert(end >= start);
156159

160+
var data = self._text[start..end];
161+
if (data.len > 0 and data[data.len - 1] == self._opts.column_line_end_prefix) {
162+
data = data[0..(data.len - 1)];
163+
}
164+
165+
if (self._field_parser.done()) {
166+
if (data.len == 0) {
167+
return null;
168+
}
169+
if (self._opts.column_line_end_prefix) |cr| {
170+
if (data.len == 2 and data[0] == cr and data[1] == self._opts.column_line_end) {
171+
return null;
172+
}
173+
}
174+
if (data.len == 1 and data[0] == self._opts.column_line_end) {
175+
return null;
176+
}
177+
}
178+
157179
return Row{
158-
._data = self._text[start..end],
180+
._data = data,
159181
._opts = self._opts,
160182
._len = index,
161183
};
@@ -830,33 +852,33 @@ test "row iterator" {
830852
\\abc,"def",
831853
\\abc"def""geh",
832854
,
833-
.count = 1,
855+
.count = 0,
834856
.err = CsvReadError.UnexpectedQuote,
835857
},
836858
.{
837859
.input =
838860
\\abc,"def",
839861
\\"def"geh",
840862
,
841-
.count = 1,
863+
.count = 0,
842864
.err = CsvReadError.UnexpectedEndOfFile,
843865
},
844866
.{
845867
.input =
846868
\\abc,"def",
847869
\\"def""geh,
848870
,
849-
.count = 1,
871+
.count = 0,
850872
.err = CsvReadError.UnexpectedEndOfFile,
851873
},
852874
.{
853875
.input = "abc,serkj\r",
854-
.count = 1,
876+
.count = 0,
855877
.err = CsvReadError.InvalidLineEnding,
856878
},
857879
.{
858880
.input = "abc,serkj\r1232,232",
859-
.count = 1,
881+
.count = 0,
860882
.err = CsvReadError.InvalidLineEnding,
861883
},
862884
};
@@ -895,3 +917,49 @@ test "row and field iterator" {
895917

896918
try testing.expectEqual(fieldCount, cnt);
897919
}
920+
921+
test "row end empty row" {
922+
const repeat = 9441;
923+
const testing = @import("std").testing;
924+
const input = "2321234423412345678902322\r\n3\r\n4\r\n5\r\n6\r\n7\r\n8\r\n9\r\n1\r\n2\r\n3124,\r\n" ** repeat;
925+
const rows = 11 * repeat;
926+
927+
var parser = Parser.init(input, .{});
928+
var cnt : usize = 0;
929+
while (parser.next()) |_| {
930+
cnt += 1;
931+
}
932+
933+
try testing.expectEqual(rows, cnt);
934+
}
935+
936+
// For my own testing. I don't have the ability to distribute trips.csv at this time
937+
// I uncomment this for testing locally against a 13MB test file
938+
939+
// test "csv" {
940+
// var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
941+
// defer arena.deinit();
942+
// const allocator = arena.allocator();
943+
// var path_buffer: [std.fs.max_path_bytes]u8 = undefined;
944+
// const path = try std.fs.realpathZ("trips.csv", &path_buffer);
945+
//
946+
// const file = try std.fs.openFileAbsolute(path, .{});
947+
// defer file.close();
948+
//
949+
// const mb = (1 << 10) << 10;
950+
// const csv = try file.readToEndAlloc(allocator, 500 * mb);
951+
// var parser = Parser.init(csv, .{});
952+
// var count: usize = 0;
953+
// var lines: usize = 0;
954+
// while (parser.next()) |row| {
955+
// var iter = row.iter();
956+
// lines += 1;
957+
// while (iter.next()) |_| {
958+
// count += 1;
959+
// }
960+
// }
961+
//
962+
// try std.testing.expectEqual(114393, lines);
963+
// try std.testing.expectEqual(915144, count);
964+
// }
965+

src/zero_allocs/stream.zig

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,16 @@ const Chunk = struct {
156156
fn consumed(self: *const @This()) bool {
157157
return self.offset >= self.len;
158158
}
159+
160+
fn isEnd(self:*const @This()) bool {
161+
const chunk = self.bytes[self.offset .. self.len];
162+
if (chunk.len == 0) return true;
163+
if (chunk.len == 1 and chunk[0] == self.opts.column_line_end) return true;
164+
if (self.opts.column_line_end_prefix) |cr| {
165+
if (chunk.len == 2 and chunk[0] == cr and chunk[1] == self.opts.column_line_end) return true;
166+
}
167+
return false;
168+
}
159169
};
160170

161171
const ParserState = struct {
@@ -225,6 +235,8 @@ pub fn Parser(comptime Reader: type, comptime Writer: type) type {
225235
if (self._state.next_chunk.len == 0) {
226236
return true;
227237
}
238+
} else if (self._state.cur_chunk.atEnd() and self._state.cur_chunk.isEnd()) {
239+
return true;
228240
}
229241
return false;
230242
}
@@ -806,3 +818,41 @@ test "End with quote" {
806818

807819
try testing.expectEqual(fieldCount, cnt);
808820
}
821+
822+
// For my own testing. I don't have the ability to distribute trips.csv at this time
823+
// I uncomment this for testing locally against a 13MB test file
824+
825+
// test "csv" {
826+
// var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
827+
// defer arena.deinit();
828+
//
829+
// const allocator = arena.allocator();
830+
// var path_buffer: [std.fs.max_path_bytes]u8 = undefined;
831+
// const path = try std.fs.realpathZ("trips.csv", &path_buffer);
832+
//
833+
// const file = try std.fs.openFileAbsolute(path, .{});
834+
// defer file.close();
835+
//
836+
// const mb = (1 << 10) << 10;
837+
// const csv = try file.readToEndAlloc(allocator, 500 * mb);
838+
// var input = std.io.fixedBufferStream(csv);
839+
//
840+
// var buff = std.ArrayList(u8).init(std.testing.allocator);
841+
// defer buff.deinit();
842+
//
843+
// var stream = init(input.reader(), @TypeOf(buff.writer()), .{});
844+
// var count: usize = 0;
845+
// var lines: usize = 0;
846+
// while (!stream.done()) {
847+
// defer { buff.clearRetainingCapacity(); }
848+
// try stream.next(buff.writer());
849+
// count += 1;
850+
// if (stream.atRowEnd()) {
851+
// lines += 1;
852+
// }
853+
// }
854+
//
855+
// try std.testing.expectEqual(114393, lines);
856+
// try std.testing.expectEqual(915144, count);
857+
// }
858+

0 commit comments

Comments
 (0)