Skip to content

Commit 988ea8c

Browse files
authored
Merge pull request #24 from justynhunter/feature/allow-contentparser-set
Adds methods to set the IContentParser
2 parents c5d4b7c + 0f84f72 commit 988ea8c

File tree

2 files changed

+17
-9
lines changed

2 files changed

+17
-9
lines changed

WebReaper/Builders/ScraperEngineBuilder.cs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using WebReaper.Core.CookieStorage.Abstract;
99
using WebReaper.Core.LinkTracker.Abstract;
1010
using WebReaper.Core.LinkTracker.Concrete;
11+
using WebReaper.Core.Parser.Abstract;
1112
using WebReaper.Core.Scheduler.Abstract;
1213
using WebReaper.Core.Scheduler.Concrete;
1314
using WebReaper.Domain;
@@ -35,9 +36,14 @@ public class ScraperEngineBuilder
3536

3637
private IScheduler Scheduler { get; set; } = new InMemoryScheduler();
3738
private IScraperConfigStorage? ConfigStorage { get; set; } = new InMemoryScraperConfigStorage();
38-
3939
protected IProxyProvider? ProxyProvider { get; set; }
4040

41+
public ScraperEngineBuilder WithContentParser(IContentParser contentParser)
42+
{
43+
SpiderBuilder.WithContentParser(contentParser);
44+
return this;
45+
}
46+
4147
public ScraperEngineBuilder AddSink(IScraperSink sink)
4248
{
4349
SpiderBuilder.AddSink(sink);
@@ -186,7 +192,6 @@ public ScraperEngineBuilder GetWithBrowser(
186192
ConfigBuilder.GetWithBrowser(startUrls, actionBuilder?.Invoke(new PageActionBuilder()));
187193
return this;
188194
}
189-
190195
public ScraperEngineBuilder GetWithBrowser(params string[] startUrls)
191196
{
192197
ConfigBuilder.GetWithBrowser(startUrls);
@@ -201,7 +206,7 @@ public ScraperEngineBuilder Follow(string linkSelector)
201206

202207
public ScraperEngineBuilder FollowWithBrowser(
203208
string linkSelector,
204-
Func<PageActionBuilder,
209+
Func<PageActionBuilder,
205210
List<PageAction>>? actionBuilder = null)
206211
{
207212
ConfigBuilder.FollowWithBrowser(linkSelector, actionBuilder?.Invoke(new PageActionBuilder()));
@@ -278,7 +283,6 @@ public ScraperEngineBuilder WithMongoDbCookieStorage(string connectionString, st
278283
logger);
279284
return this;
280285
}
281-
282286
public ScraperEngineBuilder WithFileCookieStorage(string fileName)
283287
{
284288
SpiderBuilder.WithFileCookieStorage(fileName);
@@ -335,12 +339,10 @@ public ScraperEngineBuilder WithParallelismDegree(int parallelismDegree)
335339
public async Task<ScraperEngine> BuildAsync()
336340
{
337341
SpiderBuilder.WithConfigStorage(ConfigStorage);
338-
339342
var config = ConfigBuilder.Build();
340343
var spider = SpiderBuilder.Build();
341-
342344
await ConfigStorage.CreateConfigAsync(config);
343345

344346
return new ScraperEngine(_parallelismDegree, ConfigStorage, Scheduler, spider, Logger);
345347
}
346-
}
348+
}

WebReaper/Builders/SpiderBuilder.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ public class SpiderBuilder
5151

5252
protected event Action<ParsedData> ScrapedData;
5353

54+
public SpiderBuilder WithContentParser(IContentParser contentParser)
55+
{
56+
ContentParser = contentParser;
57+
return this;
58+
}
59+
5460
public SpiderBuilder WithLogger(ILogger logger)
5561
{
5662
Logger = logger;
@@ -166,7 +172,7 @@ public SpiderBuilder WithRedisCookieStorage(string connectionString, string redi
166172
CookieStorage = new RedisCookieStorage(connectionString, redisKey, Logger);
167173
return this;
168174
}
169-
175+
170176
public SpiderBuilder WithFileCookieStorage(string fileName)
171177
{
172178
CookieStorage = new FileCookieStorage(fileName, Logger);
@@ -235,4 +241,4 @@ public ISpider Build()
235241

236242
return spider;
237243
}
238-
}
244+
}

0 commit comments

Comments
 (0)