8
8
using WebReaper . Core . CookieStorage . Abstract ;
9
9
using WebReaper . Core . LinkTracker . Abstract ;
10
10
using WebReaper . Core . LinkTracker . Concrete ;
11
+ using WebReaper . Core . Parser . Abstract ;
11
12
using WebReaper . Core . Scheduler . Abstract ;
12
13
using WebReaper . Core . Scheduler . Concrete ;
13
14
using WebReaper . Domain ;
@@ -35,9 +36,14 @@ public class ScraperEngineBuilder
35
36
36
37
private IScheduler Scheduler { get ; set ; } = new InMemoryScheduler ( ) ;
37
38
private IScraperConfigStorage ? ConfigStorage { get ; set ; } = new InMemoryScraperConfigStorage ( ) ;
38
-
39
39
protected IProxyProvider ? ProxyProvider { get ; set ; }
40
40
41
+ public ScraperEngineBuilder WithContentParser ( IContentParser contentParser )
42
+ {
43
+ SpiderBuilder . WithContentParser ( contentParser ) ;
44
+ return this ;
45
+ }
46
+
41
47
public ScraperEngineBuilder AddSink ( IScraperSink sink )
42
48
{
43
49
SpiderBuilder . AddSink ( sink ) ;
@@ -186,7 +192,6 @@ public ScraperEngineBuilder GetWithBrowser(
186
192
ConfigBuilder . GetWithBrowser ( startUrls , actionBuilder ? . Invoke ( new PageActionBuilder ( ) ) ) ;
187
193
return this ;
188
194
}
189
-
190
195
public ScraperEngineBuilder GetWithBrowser ( params string [ ] startUrls )
191
196
{
192
197
ConfigBuilder . GetWithBrowser ( startUrls ) ;
@@ -201,7 +206,7 @@ public ScraperEngineBuilder Follow(string linkSelector)
201
206
202
207
public ScraperEngineBuilder FollowWithBrowser (
203
208
string linkSelector ,
204
- Func < PageActionBuilder ,
209
+ Func < PageActionBuilder ,
205
210
List < PageAction > > ? actionBuilder = null )
206
211
{
207
212
ConfigBuilder . FollowWithBrowser ( linkSelector , actionBuilder ? . Invoke ( new PageActionBuilder ( ) ) ) ;
@@ -278,7 +283,6 @@ public ScraperEngineBuilder WithMongoDbCookieStorage(string connectionString, st
278
283
logger ) ;
279
284
return this ;
280
285
}
281
-
282
286
public ScraperEngineBuilder WithFileCookieStorage ( string fileName )
283
287
{
284
288
SpiderBuilder . WithFileCookieStorage ( fileName ) ;
@@ -335,12 +339,10 @@ public ScraperEngineBuilder WithParallelismDegree(int parallelismDegree)
335
339
public async Task < ScraperEngine > BuildAsync ( )
336
340
{
337
341
SpiderBuilder . WithConfigStorage ( ConfigStorage ) ;
338
-
339
342
var config = ConfigBuilder . Build ( ) ;
340
343
var spider = SpiderBuilder . Build ( ) ;
341
-
342
344
await ConfigStorage . CreateConfigAsync ( config ) ;
343
345
344
346
return new ScraperEngine ( _parallelismDegree , ConfigStorage , Scheduler , spider , Logger ) ;
345
347
}
346
- }
348
+ }
0 commit comments