Skip to content

Commit d1a9d5b

Browse files
Refactored News API
1 parent 22021bc commit d1a9d5b

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

src/main/main.py

+1
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,4 @@ async def scheduled_task():
9292
@app.on_event("startup")
9393
async def startup_event():
9494
asyncio.create_task(scheduled_task())
95+

src/tasks/news_scraper.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -45,22 +45,33 @@ async def alternate_news_sources() -> list[dict[str, list[dict[str, str]]]]:
4545
search for news from alternate sources
4646
:return:
4747
"""
48-
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
4948
articles_list: list[RssArticle] = await parse_feeds()
5049
for i, article in enumerate(articles_list):
50+
5151
summary, body, images = await parse_article(article)
52+
# NOTE - probably nothing to lose sleep over, but if an article does not
53+
# have images it won't be saved
54+
if not all([summary, body, images]):
55+
continue
56+
5257
article.body = body
5358
article.summary = summary
54-
article.thumbnail = images.get('thumbnail')
59+
article.thumbnail = images
60+
5561
articles_list[i] = article
62+
5663
return articles_list
5764

5865

59-
async def parse_article(article: RssArticle) -> tuple[str, str, dict[str, str | int]]:
66+
async def parse_article(article: RssArticle) -> tuple[str, str, list[dict[str, str | int]]]:
6067
headers = {
6168
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
6269
}
70+
6371
html = await download_article(link=article.link, headers=headers, timeout=60)
72+
if html is None:
73+
return None, None, None
74+
6475
soup = BeautifulSoup(html, 'html.parser')
6576
summary = soup.find('p').get_text()
6677
body = ''
@@ -71,5 +82,3 @@ async def parse_article(article: RssArticle) -> tuple[str, str, dict[str, str |
7182
elif elem.name == 'img':
7283
images.append(dict(src=elem['src'], alt=elem['alt'], width=elem['width'], height=elem['height']))
7384
return summary, body, images
74-
75-

0 commit comments

Comments
 (0)