diff --git a/.changeset/curvy-pillows-argue.md b/.changeset/curvy-pillows-argue.md new file mode 100644 index 00000000000..fadec736659 --- /dev/null +++ b/.changeset/curvy-pillows-argue.md @@ -0,0 +1,6 @@ +--- +'@modern-js/runtime': minor +--- + +feat: enhance streaming ssr control for SEO and compliance by introducing dynamic wait options +feat: 通过引入动态等待选项,增强 streaming SSR 控制,以提升 SEO. diff --git a/packages/document/main-doc/docs/en/guides/basic-features/render/streaming-ssr.mdx b/packages/document/main-doc/docs/en/guides/basic-features/render/streaming-ssr.mdx index 33ee5f10a80..d1bbc02ca4e 100644 --- a/packages/document/main-doc/docs/en/guides/basic-features/render/streaming-ssr.mdx +++ b/packages/document/main-doc/docs/en/guides/basic-features/render/streaming-ssr.mdx @@ -212,13 +212,30 @@ function ErrorElement() { } ``` -## Waiting for All Content to Load for Crawlers +## Controlling When to Wait for Full HTML -Streaming can enhance user experience by allowing users to perceive content as it becomes available. +Streaming improves perceived speed, but in some cases (SEO crawlers, A/B buckets, compliance pages) you may want to wait for all content before sending the response. -However, when a crawler visits the page, it might need to load all content and output the entire HTML at once, rather than progressively loading it. +Modern.js decides the streaming mode with this priority: -Modern.js uses [isbot](https://www.npmjs.com/package/isbot) to determine if a request is from a crawler based on the `user-agent` header. +1. Request header `x-should-stream-all` (set per-request in middleware). +2. Env `MODERN_JS_STREAM_TO_STRING` (forces full HTML). +3. [isbot](https://www.npmjs.com/package/isbot) check on `user-agent` (bots get full HTML). +4. Default: stream shell first. + +Set the header in your middleware to choose the behavior dynamically: + +```ts title="middleware example" +export const middleware = async (ctx, next) => { + const ua = ctx.req.header('user-agent') || ''; + const shouldWaitAll = /Lighthouse|Googlebot/i.test(ua) || ctx.req.path === '/marketing'; + + // Write a boolean string: true -> onAllReady, false -> onShellReady + ctx.req.headers.set('x-should-stream-all', String(shouldWaitAll)); + + await next(); +}; +``` import StreamSSRPerformance from '@site-docs-en/components/stream-ssr-performance'; diff --git a/packages/document/main-doc/docs/zh/guides/basic-features/render/streaming-ssr.mdx b/packages/document/main-doc/docs/zh/guides/basic-features/render/streaming-ssr.mdx index 2ead641ab39..a6b25104377 100644 --- a/packages/document/main-doc/docs/zh/guides/basic-features/render/streaming-ssr.mdx +++ b/packages/document/main-doc/docs/zh/guides/basic-features/render/streaming-ssr.mdx @@ -218,14 +218,30 @@ function ErrorElement() { } ``` -## 为爬虫等待所有内容加载完毕 +## 控制是否等待全部内容再输出 -流式传输可以提高用户体验,因为当页面内容可用时,用户可以及时感知到它们。 +流式传输可以提高用户体验,因为当页面内容可用时,用户可以及时感知到它们。但在部分场景下(例如 SEO 爬虫、特定 AB 实验或合规页面)希望等所有内容完成后再一次性输出。 -然而,当一个爬虫访问该页面时,它可能需要先加载所有内容,直接输出整个 HTML,而不是渐进式地加载它。 +Modern.js 默认行为的判定优先级为: -Modern.js 使用 [isbot](https://www.npmjs.com/package/isbot) 对请求的 `uesr-agent`,以判断请求是否来自爬虫。 +1. 请求头 `x-should-stream-all`(中间件可写)。 +2. 环境变量 `MODERN_JS_STREAM_TO_STRING`(强制全量)。 +3. [isbot](https://www.npmjs.com/package/isbot) 检测 `user-agent`(爬虫全量)。 +4. 默认流式(先 shell 后内容)。 +你可以在自定义中间件里按请求动态写入标记,控制是否等待全部内容: + +```ts title="middleware 示例" +export const middleware = async (ctx, next) => { + const ua = ctx.req.header('user-agent') || ''; + const shouldWaitAll = /Lighthouse|Googlebot/i.test(ua) || ctx.req.path === '/marketing'; + + // 写入布尔值字符串,true 表示使用 onAllReady,false 表示使用 onShellReady + ctx.req.headers.set('x-should-stream-all', String(shouldWaitAll)); + + await next(); +}; +``` import StreamSSRPerformance from '@site-docs/components/stream-ssr-performance'; diff --git a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts index 696d3ae8983..8c8ba008067 100644 --- a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts +++ b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts @@ -1,10 +1,8 @@ import { PassThrough, Transform } from 'stream'; -import type { DeferredData } from '@modern-js/runtime-utils/browser'; import { createReadableStreamFromReadable, storage, } from '@modern-js/runtime-utils/node'; -import checkIsBot from 'isbot'; import { ServerStyleSheet } from 'styled-components'; import { ESCAPED_SHELL_STREAM_END_MARK } from '../../../common'; import { RenderLevel } from '../../constants'; @@ -14,6 +12,7 @@ import { type CreateReadableStreamFromElement, ShellChunkStatus, getReadableStreamFromString, + resolveStreamingMode, } from './shared'; import { getTemplates } from './template'; @@ -27,10 +26,7 @@ export const createReadableStreamFromElement: CreateReadableStreamFromElement = let renderLevel = RenderLevel.SERVER_RENDER; const forceStream2String = Boolean(process.env.MODERN_JS_STREAM_TO_STRING); - // When a crawler visit the page, we should waiting for entrie content of page - - const isbot = checkIsBot(request.headers.get('user-agent')); - const onReady = isbot || forceStream2String ? 'onAllReady' : 'onShellReady'; + const { onReady } = resolveStreamingMode(request, forceStream2String); const sheet = new ServerStyleSheet(); diff --git a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts index ed494c6394f..ecf5045e31e 100644 --- a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts +++ b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts @@ -1,6 +1,5 @@ import { renderSSRStream } from '@modern-js/render/ssr'; import { storage } from '@modern-js/runtime-utils/node'; -import checkIsBot from 'isbot'; import { ESCAPED_SHELL_STREAM_END_MARK } from '../../../common'; import { RenderLevel } from '../../constants'; import { enqueueFromEntries } from './deferredScript'; @@ -9,6 +8,7 @@ import { ShellChunkStatus, encodeForWebStream, getReadableStreamFromString, + resolveStreamingMode, } from './shared'; import { getTemplates } from './template'; @@ -55,11 +55,17 @@ export const createReadableStreamFromElement: CreateReadableStreamFromElement = options?.onAllReady?.(); }); - const isbot = checkIsBot(request.headers.get('user-agent')); - if (isbot) { - // However, when a crawler visits your page, or if you’re generating the pages at the build time, - // you might want to let all of the content load first and then produce the final HTML output instead of revealing it progressively. - // from: https://react.dev/reference/react-dom/server/renderToReadableStream#handling-different-errors-in-different-ways + const forceStreamToString = Boolean( + typeof process !== 'undefined' && + process.env?.MODERN_JS_STREAM_TO_STRING, + ); + const { waitForAllReady } = resolveStreamingMode( + request, + forceStreamToString, + ); + + if (waitForAllReady) { + // Prefer to wait for full content when instructed by middleware marker/env/isbot. await readableOriginal.allReady; } diff --git a/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx b/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx index 5213740b9df..fcf4c3f1359 100644 --- a/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx +++ b/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx @@ -5,6 +5,7 @@ import type { SSRManifest as RscSSRManifest, ServerManifest as RscServerManifest, } from '@modern-js/types/server'; +import checkIsBot from 'isbot'; import type React from 'react'; import { JSX_SHELL_STREAM_END_MARK } from '../../../common'; import type { RuntimeContext } from '../../context'; @@ -42,6 +43,50 @@ export enum ShellChunkStatus { FINISH = 1, } +const SHOULD_STREAM_ALL_HEADER = 'x-should-stream-all'; + +function parseShouldStreamAllFlag(value: string | null): boolean | undefined { + if (!value) { + return undefined; + } + const normalized = value.trim().toLowerCase(); + + // if the header is set to 'false', treat it as false, runtime will not stream all. + // Otherwise, treat it as true. + if (normalized === 'false') { + return false; + } + return true; +} + +export function resolveStreamingMode( + request: Request, + forceStreamToString: boolean, +): { + onReady: 'onAllReady' | 'onShellReady'; + waitForAllReady: boolean; +} { + const shouldStreamAll = parseShouldStreamAllFlag( + request.headers.get(SHOULD_STREAM_ALL_HEADER), + ); + + const isBot = checkIsBot(request.headers.get('user-agent')); + + if (shouldStreamAll) { + return { onReady: 'onAllReady', waitForAllReady: true }; + } + + if (forceStreamToString) { + return { onReady: 'onAllReady', waitForAllReady: true }; + } + + if (isBot) { + return { onReady: 'onAllReady', waitForAllReady: true }; + } + + return { onReady: 'onShellReady', waitForAllReady: false }; +} + let encoder: TextEncoder; export function encodeForWebStream(thing: unknown) { if (!encoder) {