@@ -2,24 +2,24 @@ use std::fs::File;
2
2
use std:: mem;
3
3
use std:: path:: PathBuf ;
4
4
5
- use csv:: ByteRecord ;
5
+ use csv:: { ByteRecord , WriterBuilder } ;
6
6
7
7
pub struct CsvChunker {
8
8
pub ( crate ) reader : csv:: Reader < File > ,
9
9
pub ( crate ) headers : ByteRecord ,
10
- pub ( crate ) buffer : Vec < u8 > ,
10
+ pub ( crate ) writer : csv :: Writer < Vec < u8 > > ,
11
11
pub ( crate ) record : ByteRecord ,
12
12
pub ( crate ) size : usize ,
13
+ pub ( crate ) delimiter : u8 ,
13
14
}
14
15
15
16
impl CsvChunker {
16
- pub fn new ( file : PathBuf , size : usize ) -> Self {
17
+ pub fn new ( file : PathBuf , size : usize , delimiter : u8 ) -> Self {
17
18
let mut reader = csv:: Reader :: from_path ( file) . unwrap ( ) ;
18
- let mut buffer = Vec :: new ( ) ;
19
+ let mut writer = WriterBuilder :: new ( ) . delimiter ( delimiter ) . from_writer ( Vec :: new ( ) ) ;
19
20
let headers = reader. byte_headers ( ) . unwrap ( ) . clone ( ) ;
20
- buffer. extend_from_slice ( headers. as_slice ( ) ) ;
21
- buffer. push ( b'\n' ) ;
22
- Self { reader, headers, buffer, record : ByteRecord :: new ( ) , size }
21
+ writer. write_byte_record ( & headers) . unwrap ( ) ;
22
+ Self { reader, headers, writer, record : ByteRecord :: new ( ) , size, delimiter }
23
23
}
24
24
}
25
25
@@ -28,30 +28,33 @@ impl Iterator for CsvChunker {
28
28
29
29
fn next ( & mut self ) -> Option < Self :: Item > {
30
30
while self . reader . read_byte_record ( & mut self . record ) . unwrap ( ) {
31
- if self . buffer . len ( ) + self . record . len ( ) >= self . size {
32
- let buffer = mem:: take ( & mut self . buffer ) ;
31
+ if self . writer . get_ref ( ) . len ( ) + self . record . len ( ) >= self . size {
32
+ let mut writer =
33
+ WriterBuilder :: new ( ) . delimiter ( self . delimiter ) . from_writer ( Vec :: new ( ) ) ;
34
+ writer. write_byte_record ( & self . headers ) . unwrap ( ) ;
35
+ let writer = mem:: replace ( & mut self . writer , writer) ;
33
36
34
37
// Insert the header and out of bound record
35
- self . buffer . extend_from_slice ( self . headers . as_slice ( ) ) ;
36
- self . buffer . push ( b'\n' ) ;
37
- self . buffer . extend_from_slice ( self . record . as_slice ( ) ) ;
38
- self . buffer . push ( b'\n' ) ;
38
+ self . writer . write_byte_record ( & self . headers ) . unwrap ( ) ;
39
+ self . writer . write_byte_record ( & self . record ) . unwrap ( ) ;
39
40
40
- return Some ( buffer ) ;
41
+ return Some ( writer . into_inner ( ) . unwrap ( ) ) ;
41
42
} else {
42
43
// Insert only the record
43
- self . buffer . extend_from_slice ( self . record . as_slice ( ) ) ;
44
- self . buffer . push ( b'\n' ) ;
44
+ self . writer . write_byte_record ( & self . record ) . unwrap ( ) ;
45
45
}
46
46
}
47
47
// If there only less than or the headers in the buffer and a
48
48
// newline character it means that there are no documents in it.
49
- if self . buffer . len ( ) <= self . headers . len ( ) + 1 {
49
+ if self . writer . get_ref ( ) . len ( ) <= self . headers . len ( ) + 1 {
50
50
None
51
51
} else {
52
+ let mut writer = WriterBuilder :: new ( ) . delimiter ( self . delimiter ) . from_writer ( Vec :: new ( ) ) ;
53
+ writer. write_byte_record ( & self . headers ) . unwrap ( ) ;
52
54
// We make the buffer empty by doing that and next time we will
53
55
// come back to this _if else_ condition to then return None.
54
- Some ( mem:: take ( & mut self . buffer ) )
56
+ let writer = mem:: replace ( & mut self . writer , writer) ;
57
+ Some ( writer. into_inner ( ) . unwrap ( ) )
55
58
}
56
59
}
57
60
}
0 commit comments