Skip to content

Commit 1f39466

Browse files
committed
Add catch error method to prevent single URL from breaking loops
1 parent 5ee7498 commit 1f39466

File tree

1 file changed

+19
-13
lines changed

1 file changed

+19
-13
lines changed

lazyops/lazysources/gdelt/models.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,21 @@ def _run(self):
3636
if self.extracted:
3737
return
3838
_init_newsplease()
39-
data = _NP.from_url(self.url)
40-
self.title = data.title
41-
self.description = data.description
42-
self.image_url = data.image_url
43-
self.language = data.language
44-
self.domain = data.source_domain
45-
self.text = data.maintext
46-
self.authors = data.authors
47-
self.date_publish = data.date_publish
48-
self.date_modify = data.date_modify
49-
self.extracted = True
39+
try:
40+
data = _NP.from_url(self.url)
41+
self.title = data.title
42+
self.description = data.description
43+
self.image_url = data.image_url
44+
self.language = data.language
45+
self.domain = data.source_domain
46+
self.text = data.maintext
47+
self.authors = data.authors
48+
self.date_publish = data.date_publish
49+
self.date_modify = data.date_modify
50+
self.extracted = True
51+
except Exception as e:
52+
logger.error(f'Error Parsing URL: {self.url}.\n{str(e)}')
53+
5054

5155

5256
@lazyclass
@@ -68,11 +72,13 @@ def parse(self):
6872
return
6973
self.extraction = Article(url=self.url)
7074
self.extraction._run()
71-
self.text = self.extraction.text
75+
if self.extraction.extracted:
76+
self.text = self.extraction.text
7277

7378
async def async_parse(self):
7479
if self.extraction is not None:
7580
return
7681
self.extraction = Article(url=self.url)
7782
self.extraction._run()
78-
self.text = self.extraction.text
83+
if self.extraction.extracted:
84+
self.text = self.extraction.text

0 commit comments

Comments
 (0)