@@ -45,22 +45,33 @@ async def alternate_news_sources() -> list[dict[str, list[dict[str, str]]]]:
45
45
search for news from alternate sources
46
46
:return:
47
47
"""
48
- headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' }
49
48
articles_list : list [RssArticle ] = await parse_feeds ()
50
49
for i , article in enumerate (articles_list ):
50
+
51
51
summary , body , images = await parse_article (article )
52
+ # NOTE - probably nothing to lose sleep over, but if an article does not
53
+ # have images it won't be saved
54
+ if not all ([summary , body , images ]):
55
+ continue
56
+
52
57
article .body = body
53
58
article .summary = summary
54
- article .thumbnail = images .get ('thumbnail' )
59
+ article .thumbnail = images
60
+
55
61
articles_list [i ] = article
62
+
56
63
return articles_list
57
64
58
65
59
- async def parse_article (article : RssArticle ) -> tuple [str , str , dict [str , str | int ]]:
66
+ async def parse_article (article : RssArticle ) -> tuple [str , str , list [ dict [str , str | int ] ]]:
60
67
headers = {
61
68
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
62
69
}
70
+
63
71
html = await download_article (link = article .link , headers = headers , timeout = 60 )
72
+ if html is None :
73
+ return None , None , None
74
+
64
75
soup = BeautifulSoup (html , 'html.parser' )
65
76
summary = soup .find ('p' ).get_text ()
66
77
body = ''
@@ -71,5 +82,3 @@ async def parse_article(article: RssArticle) -> tuple[str, str, dict[str, str |
71
82
elif elem .name == 'img' :
72
83
images .append (dict (src = elem ['src' ], alt = elem ['alt' ], width = elem ['width' ], height = elem ['height' ]))
73
84
return summary , body , images
74
-
75
-
0 commit comments