-
Notifications
You must be signed in to change notification settings - Fork 9.6k
feat(route/zhipuai): add research route #21596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
6c02f1e
f0071b9
e31c823
8d7a6ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| import type { Namespace } from '@/types'; | ||
|
|
||
| export const namespace: Namespace = { | ||
| name: 'Zhipu AI', | ||
| url: 'zhipuai.cn', | ||
| }; |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,139 @@ | ||||
| import { load } from 'cheerio'; | ||||
|
|
||||
| import type { Route } from '@/types'; | ||||
| import ofetch from '@/utils/ofetch'; | ||||
| import { parseDate } from '@/utils/parse-date'; | ||||
|
|
||||
| const tagMap: Record<string, string> = { | ||||
| basemodel: '基座模型', | ||||
| multimodal: '多模态', | ||||
| reasoning: '推理模型', | ||||
| agent: 'Agent', | ||||
| codemodel: '代码模型', | ||||
| }; | ||||
|
|
||||
| export const route: Route = { | ||||
| path: '/research/:language?/:tag?', | ||||
| categories: ['programming'], | ||||
| example: '/zhipuai/research', | ||||
| parameters: { | ||||
| language: { | ||||
| description: 'Language', | ||||
| options: [ | ||||
| { value: 'zh', label: '中文' }, | ||||
| { value: 'en', label: 'English' }, | ||||
| ], | ||||
| default: 'zh', | ||||
| }, | ||||
| tag: { | ||||
| description: 'Filter by tag', | ||||
| options: [ | ||||
| { value: 'basemodel', label: '基座模型' }, | ||||
| { value: 'multimodal', label: '多模态' }, | ||||
| { value: 'reasoning', label: '推理模型' }, | ||||
| { value: 'agent', label: 'Agent' }, | ||||
| { value: 'codemodel', label: '代码模型' }, | ||||
| ], | ||||
| }, | ||||
| }, | ||||
| features: { | ||||
| requireConfig: false, | ||||
| requirePuppeteer: false, | ||||
| antiCrawler: false, | ||||
| supportBT: false, | ||||
| supportPodcast: false, | ||||
| supportScihub: false, | ||||
| }, | ||||
| radar: [ | ||||
| { | ||||
| source: ['zhipuai.cn/zh/research', 'zhipuai.cn/en/research'], | ||||
| target: '/zhipuai/research', | ||||
| }, | ||||
| ], | ||||
| name: 'Research', | ||||
| maintainers: ['27Aaron'], | ||||
| url: 'zhipuai.cn/zh/research', | ||||
| handler: async (ctx) => { | ||||
| const language = ctx.req.param('language') ?? 'zh'; | ||||
| const validLanguages = ['zh', 'en']; | ||||
| const lang = validLanguages.includes(language) ? language : 'zh'; | ||||
| const locale = lang === 'zh' ? 'zh' : 'en'; | ||||
|
|
||||
| const tag = ctx.req.param('tag'); | ||||
| const filterTag = tag && tag in tagMap ? tagMap[tag] : undefined; | ||||
|
|
||||
| // Fetch SSR page - article data is embedded in Next.js RSC payload | ||||
| const html = await ofetch(`https://www.zhipuai.cn/${locale}/research`, { responseType: 'text' }); | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need to specific the default value https://github.com/unjs/ofetch/blob/dfbe3ca4ef8a22fc023fca5a5ef530e525f5e523/src/utils.ts#L78-L80 |
||||
|
|
||||
| // Extract blogsItems from RSC payload using bracket counting | ||||
| const extractArticles = (html: string): Array<Record<string, unknown>> => { | ||||
|
||||
| let startIdx = html.indexOf(String.raw`blogsItems\":[`); | ||||
| let offset = String.raw`blogsItems\":`.length; | ||||
| if (startIdx === -1) { | ||||
| startIdx = html.indexOf('blogsItems":['); | ||||
| offset = 'blogsItems":'.length; | ||||
| } | ||||
| if (startIdx === -1) { | ||||
| throw new Error('blogsItems not found in page'); | ||||
| } | ||||
|
|
||||
| const arrStart = startIdx + offset; | ||||
| let depth = 0; | ||||
| let arrEnd = arrStart; | ||||
| for (let i = arrStart; i < html.length; i++) { | ||||
| const c = html[i]; | ||||
| if (c === '[') { | ||||
| depth++; | ||||
| } else if (c === ']') { | ||||
| depth--; | ||||
| if (depth === 0) { | ||||
| arrEnd = i + 1; | ||||
| break; | ||||
| } | ||||
| } | ||||
| } | ||||
|
|
||||
| const raw = html | ||||
| .slice(arrStart, arrEnd) | ||||
| .replaceAll(String.raw`\"`, '"') | ||||
| .replaceAll(String.raw`\n`, '\n') | ||||
| .replaceAll(String.raw`\\`, '\\'); | ||||
| return JSON.parse(raw); | ||||
| }; | ||||
|
|
||||
| const items = extractArticles(html); | ||||
|
|
||||
| const filtered = filterTag ? items.filter((item) => (item.tag_zh as string[])?.includes(filterTag)) : items; | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line 77 in 42c6b49
|
||||
|
|
||||
| const titleKey = lang === 'zh' ? 'title_zh' : 'title_en'; | ||||
| const resumeKey = lang === 'zh' ? 'resume_zh' : 'resume_en'; | ||||
| const thumbnailKey = lang === 'zh' ? 'thumbnail_zh' : 'thumbnail_en'; | ||||
| const tagKey = lang === 'zh' ? 'tag_zh' : 'tag_en'; | ||||
|
Comment on lines
+108
to
+111
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -const titleKey = lang === 'zh' ? 'title_zh' : 'title_en';
+const titleKey = `title_{lang}`;and so on |
||||
|
|
||||
| const result = filtered.map((item) => { | ||||
| const thumbnail = item[thumbnailKey] as string | undefined; | ||||
| const resume = item[resumeKey] as string | undefined; | ||||
| const coverHtml = thumbnail ? `<img src="${thumbnail}">` : ''; | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Double the feed output. None of them has a single image as you can see from #21596 (comment) |
||||
| const $ = load(`<div>${coverHtml}<p>${resume ?? ''}</p></div>`); | ||||
| $('div').find('script').remove(); | ||||
|
Comment on lines
+117
to
+118
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Redundant use of cheerio
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Provide the object with its id where you do find it contains |
||||
|
|
||||
| return { | ||||
| title: item[titleKey] as string, | ||||
| link: `https://www.zhipuai.cn/${locale}/research/${item.id}`, | ||||
| pubDate: parseDate(item.createAt as string), | ||||
| author: '智谱AI', | ||||
| category: item[tagKey] as string[], | ||||
| description: $('div').html() ?? '', | ||||
| }; | ||||
| }); | ||||
|
|
||||
| const titlePrefix = lang === 'zh' ? '智谱AI 研究' : 'Zhipu AI Research'; | ||||
| const titleSuffix = filterTag ? ` - ${filterTag}` : ''; | ||||
|
|
||||
| return { | ||||
| title: `${titlePrefix}${titleSuffix}`, | ||||
| link: `https://www.zhipuai.cn/${locale}/research`, | ||||
| item: result, | ||||
| }; | ||||
| }, | ||||
| }; | ||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.