Skip to content
7 changes: 7 additions & 0 deletions lib/routes/nfl/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: 'NFL',
url: 'nfl.com',
lang: 'en',
};
214 changes: 214 additions & 0 deletions lib/routes/nfl/news.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
import { load } from 'cheerio';

import type { DataItem, Route } from '@/types';
import cache from '@/utils/cache';
import ofetch from '@/utils/ofetch';
import { parseDate } from '@/utils/parse-date';

const teamDomains: Record<string, string> = {
'49ers': 'www.49ers.com',
bears: 'www.chicagobears.com',
bengals: 'www.bengals.com',
bills: 'www.buffalobills.com',
broncos: 'www.denverbroncos.com',
browns: 'www.clevelandbrowns.com',
buccaneers: 'www.buccaneers.com',
cardinals: 'www.azcardinals.com',
chargers: 'www.chargers.com',
chiefs: 'www.chiefs.com',
colts: 'www.colts.com',
commanders: 'www.commanders.com',
cowboys: 'www.dallascowboys.com',
dolphins: 'www.miamidolphins.com',
eagles: 'www.philadelphiaeagles.com',
falcons: 'www.atlantafalcons.com',
giants: 'www.giants.com',
jaguars: 'www.jaguars.com',
jets: 'www.newyorkjets.com',
lions: 'www.detroitlions.com',
packers: 'www.packers.com',
panthers: 'www.panthers.com',
patriots: 'www.patriots.com',
raiders: 'www.raiders.com',
rams: 'www.therams.com',
ravens: 'www.baltimoreravens.com',
saints: 'www.neworleanssaints.com',
seahawks: 'www.seahawks.com',
steelers: 'www.steelers.com',
texans: 'www.houstontexans.com',
titans: 'www.tennesseetitans.com',
vikings: 'www.vikings.com',
};

export const route: Route = {
path: '/news/:team',
categories: ['traditional-media'],
example: '/nfl/news/seahawks',
parameters: { team: 'Team name as used in the route key, see table below' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: Object.entries(teamDomains).map(([team, domain]) => ({
source: [`${domain}/news/*`],
target: `/news/${team}`,
})),
name: 'Team News',
maintainers: ['nickyfoto'],
description: `Fetches news from official NFL team websites.

| Team | Key | Domain |
|------|-----|--------|
| Arizona Cardinals | cardinals | azcardinals.com |
| Atlanta Falcons | falcons | atlantafalcons.com |
| Baltimore Ravens | ravens | baltimoreravens.com |
| Buffalo Bills | bills | buffalobills.com |
| Carolina Panthers | panthers | panthers.com |
| Chicago Bears | bears | chicagobears.com |
| Cincinnati Bengals | bengals | bengals.com |
| Cleveland Browns | browns | clevelandbrowns.com |
| Dallas Cowboys | cowboys | dallascowboys.com |
| Denver Broncos | broncos | denverbroncos.com |
| Detroit Lions | lions | detroitlions.com |
| Green Bay Packers | packers | packers.com |
| Houston Texans | texans | houstontexans.com |
| Indianapolis Colts | colts | colts.com |
| Jacksonville Jaguars | jaguars | jaguars.com |
| Kansas City Chiefs | chiefs | chiefs.com |
| Las Vegas Raiders | raiders | raiders.com |
| Los Angeles Chargers | chargers | chargers.com |
| Los Angeles Rams | rams | therams.com |
| Miami Dolphins | dolphins | miamidolphins.com |
| Minnesota Vikings | vikings | vikings.com |
| New England Patriots | patriots | patriots.com |
| New Orleans Saints | saints | neworleanssaints.com |
| New York Giants | giants | giants.com |
| New York Jets | jets | newyorkjets.com |
| Philadelphia Eagles | eagles | philadelphiaeagles.com |
| Pittsburgh Steelers | steelers | steelers.com |
| San Francisco 49ers | 49ers | 49ers.com |
| Seattle Seahawks | seahawks | seahawks.com |
| Tampa Bay Buccaneers | buccaneers | buccaneers.com |
| Tennessee Titans | titans | tennesseetitans.com |
| Washington Commanders | commanders | commanders.com |`,
handler,
};

async function handler(ctx) {
const team = ctx.req.param('team');
const domain = teamDomains[team];
if (!domain) {
throw new Error(`Unknown NFL team: ${team}. Valid teams: ${Object.keys(teamDomains).join(', ')}`);
}

const baseUrl = `https://${domain}`;
const listingUrl = `${baseUrl}/news/`;

const response = await ofetch(listingUrl);
const $ = load(response);

const seen = new Set<string>();
const list: Array<{ title: string; link: string }> = [];

$('a[href^="/news/"]').each((_, el) => {
const $el = $(el);
const href = $el.attr('href');
// Only match article links: /news/{single-slug} (no extra path segments or trailing slash)
if (!href || !/^\/news\/[^/]+$/.test(href) || seen.has(href)) {
return;
}
seen.add(href);
const title = $el.text().trim() || $el.find('img').attr('alt') || '';
if (title) {
list.push({
title,
link: `${baseUrl}${href}`,
});
}
});
Comment on lines +117 to +135
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Almost all examples provided for testing contain > 100 links which is a big NO. Use the API at /api/lazy/load instead.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure how to implement /api/lazy/load I see https://www.seahawks.com/api/lazy/load return 200 with empty body. Can you specify how we can reduce the items we load? Thanks

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Visit https://www.49ers.com/news/latest-headlines/ and click that load more button. You should be able to see the payload and it works for which doesn’t have latest headlines like seahawks.com.


const items = (
await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
let articleResponse;
try {
articleResponse = await ofetch(item.link);
} catch {
return null;
}
const $article = load(articleResponse);

let jsonLd: Record<string, any> | null = null;
$article('script[type="application/ld+json"]').each((_, el) => {
try {
const data = JSON.parse($article(el).text());
const candidate = Array.isArray(data) ? data.find((d) => d['@type'] === 'NewsArticle') : data;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Provide the URL to the article where you found an array of JSON-LD objects within a single script[type="application/ld+json"] element which makes you use the Array.isArray(data) check.

if (candidate?.['@type'] === 'NewsArticle') {
jsonLd = candidate;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Parse NewsArticle entries nested under @graph

The JSON-LD extraction only accepts a top-level NewsArticle object or a top-level array of such objects; when a page uses the common {"@graph": [...]} shape, candidate becomes the wrapper object and the @type check fails, so headline, datePublished, and other metadata are dropped even though they are present. In that case items fall back to link text and can miss pubDate, which reduces feed quality and can break date-based ordering.

Useful? React with 👍 / 👎.

}
} catch {
// skip malformed JSON-LD
}
});

const result: DataItem = {
title: jsonLd?.headline || item.title,
link: item.link,
pubDate: jsonLd?.datePublished ? parseDate(jsonLd.datePublished) : undefined,
author: jsonLd?.author?.name || (Array.isArray(jsonLd?.author) ? jsonLd.author[0]?.name : undefined),
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Provide the URL to the article where you found an array of author which makes you use the Array.isArray(jsonLd?.author) check.

image: jsonLd?.image?.url || (Array.isArray(jsonLd?.image) ? jsonLd.image[0]?.url : undefined),
};

// Try to extract article body HTML
const contentSelectors = ['article .nfl-c-body-part', '.article-body', 'article [data-module="content"]', 'article'];
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Provide the URL to the article where you found the article is located in .article-body or article [data-module="content"].

for (const selector of contentSelectors) {
const content = $article(selector);
if (content.length) {
content.find('script, style, [data-ad], .ad, .social-share, .related-content').remove();
// Fix lazy-loaded images: real URLs live in data-src/data-srcset, src is a placeholder GIF.
// Also strip the Cloudinary "/t_lazy/" transformation which returns a blurred placeholder.
const unlazy = (url: string) => url.replaceAll('/t_lazy/', '/');

Check warning

Code scanning / oxlint

eslint-plugin-unicorn(consistent-function-scoping) Warning

Function unlazy does not capture any variables from its parent scope
Move unlazy to the outer scope to avoid recreating it on every call.
content.find('img[data-src]').each((_, img) => {
const $img = $article(img);
$img.attr('src', unlazy($img.attr('data-src')!));
$img.removeAttr('data-src');
});
content.find('source[data-srcset]').each((_, src) => {
const $src = $article(src);
$src.attr('srcset', unlazy($src.attr('data-srcset')!));
$src.removeAttr('data-srcset');
});
const html = content
.toArray()
.map((el) => $article(el).html())
.filter(Boolean)
.join('');
if (html) {
result.description = html;
break;
}
}
}

if (!result.description && jsonLd?.description) {
result.description = jsonLd.description;
}

return result;
})
)
)
).filter(Boolean) as DataItem[];

return {
title: $('title').text() || `${team.charAt(0).toUpperCase() + team.slice(1)} News`,
link: listingUrl,
language: 'en',
item: items,
};
}
Loading