diff --git a/packages/document/docs/en/guides/basic-features/render/streaming-ssr.mdx b/packages/document/docs/en/guides/basic-features/render/streaming-ssr.mdx index 6b74829e3af..98b37f86252 100644 --- a/packages/document/docs/en/guides/basic-features/render/streaming-ssr.mdx +++ b/packages/document/docs/en/guides/basic-features/render/streaming-ssr.mdx @@ -229,13 +229,30 @@ function ErrorElement() { } ``` -## Waiting for All Content to Load for Crawlers +## Controlling When to Wait for Full HTML -Streaming can enhance user experience by allowing users to perceive content as it becomes available. +Streaming improves perceived speed, but in some cases (SEO crawlers, A/B buckets, compliance pages) you may want to wait for all content before sending the response. -However, when a crawler visits the page, it might need to load all content and output the entire HTML at once, rather than progressively loading it. +Modern.js decides the streaming mode with this priority: -Modern.js uses [isbot](https://www.npmjs.com/package/isbot) to determine if a request is from a crawler based on the `user-agent` header. +1. Request header `x-should-stream-all` (set per-request in middleware). +2. Env `MODERN_JS_STREAM_TO_STRING` (forces full HTML). +3. [isbot](https://www.npmjs.com/package/isbot) check on `user-agent` (bots get full HTML). +4. Default: stream shell first. + +Set the header in your middleware to choose the behavior dynamically: + +```ts title="middleware example" +export const middleware = async (ctx, next) => { + const ua = ctx.req.header('user-agent') || ''; + const shouldWaitAll = /Lighthouse|Googlebot/i.test(ua) || ctx.req.path === '/marketing'; + + // Write a boolean string: true -> onAllReady, false -> onShellReady + ctx.req.headers.set('x-should-stream-all', String(shouldWaitAll)); + + await next(); +}; +``` import StreamSSRPerformance from '@site-docs-en/components/stream-ssr-performance'; diff --git a/packages/document/docs/zh/guides/basic-features/render/streaming-ssr.mdx b/packages/document/docs/zh/guides/basic-features/render/streaming-ssr.mdx index 282c8c562ce..a6a21ae5bc5 100644 --- a/packages/document/docs/zh/guides/basic-features/render/streaming-ssr.mdx +++ b/packages/document/docs/zh/guides/basic-features/render/streaming-ssr.mdx @@ -235,14 +235,33 @@ function ErrorElement() { } ``` -## 为爬虫等待所有内容加载完毕 +## 控制是否等待全部内容再输出 -流式传输可以提高用户体验,因为当页面内容可用时,用户可以及时感知到它们。 +流式传输可以提高用户体验,因为当页面内容可用时,用户可以及时感知到它们。但在部分场景下(例如 SEO 爬虫、特定 AB 实验或合规页面)希望等所有内容完成后再一次性输出。 -然而,当一个爬虫访问该页面时,它可能需要先加载所有内容,直接输出整个 HTML,而不是渐进式地加载它。 +Modern.js 默认行为的判定优先级为: Modern.js 使用 [isbot](https://www.npmjs.com/package/isbot) 对请求的 `user-agent`,以判断请求是否来自爬虫。 +1. 请求头 `x-should-stream-all`(中间件可写)。 +2. 环境变量 `MODERN_JS_STREAM_TO_STRING`(强制全量)。 +3. [isbot](https://www.npmjs.com/package/isbot) 检测 `user-agent`(爬虫全量)。 +4. 默认流式(先 shell 后内容)。 + +你可以在自定义中间件里按请求动态写入标记,控制是否等待全部内容: + +```ts title="middleware 示例" +export const middleware = async (ctx, next) => { + const ua = ctx.req.header('user-agent') || ''; + const shouldWaitAll = + /Lighthouse|Googlebot/i.test(ua) || ctx.req.path === '/marketing'; + + // 写入布尔值字符串,true 表示使用 onAllReady,false 表示使用 onShellReady + ctx.req.headers.set('x-should-stream-all', String(shouldWaitAll)); + + await next(); +}; +``` import StreamSSRPerformance from '@site-docs/components/stream-ssr-performance'; diff --git a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts index 00d5d9620f8..dac4d75bfe3 100644 --- a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts +++ b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.ts @@ -3,7 +3,6 @@ import { createReadableStreamFromReadable, storage, } from '@modern-js/runtime-utils/node'; -import checkIsBot from 'isbot'; import type { ReactElement } from 'react'; import { ESCAPED_SHELL_STREAM_END_MARK } from '../../../common'; import { RenderLevel } from '../../constants'; @@ -14,6 +13,7 @@ import { type CreateReadableStreamFromElement, ShellChunkStatus, getReadableStreamFromString, + resolveStreamingMode, } from './shared'; import { getTemplates } from './template'; @@ -35,12 +35,7 @@ export const createReadableStreamFromElement: CreateReadableStreamFromElement = const forceStream2String = Boolean(process.env.MODERN_JS_STREAM_TO_STRING); // When a crawler visit the page, we should waiting for entrie content of page - const isbot = checkIsBot(request.headers.get('user-agent')); - const isSsgRender = request.headers.get('x-modern-ssg-render') === 'true'; - const onReady = - isbot || isSsgRender || forceStream2String - ? 'onAllReady' - : 'onShellReady'; + const { onReady } = resolveStreamingMode(request, forceStream2String); const internalRuntimeContext = getGlobalInternalRuntimeContext(); const hooks = internalRuntimeContext.hooks; diff --git a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts index de6e3e65895..a0447cc647b 100644 --- a/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts +++ b/packages/runtime/plugin-runtime/src/core/server/stream/createReadableStream.worker.ts @@ -1,6 +1,5 @@ import { renderSSRStream } from '@modern-js/render/ssr'; import { storage } from '@modern-js/runtime-utils/node'; -import checkIsBot from 'isbot'; import { ESCAPED_SHELL_STREAM_END_MARK } from '../../../common'; import { RenderLevel } from '../../constants'; import { enqueueFromEntries } from './deferredScript'; @@ -9,6 +8,7 @@ import { ShellChunkStatus, encodeForWebStream, getReadableStreamFromString, + resolveStreamingMode, } from './shared'; import { getTemplates } from './template'; @@ -55,12 +55,20 @@ export const createReadableStreamFromElement: CreateReadableStreamFromElement = options?.onAllReady?.(); }); - const isbot = checkIsBot(request.headers.get('user-agent')); - const isSsgRender = request.headers.get('x-modern-ssg-render') === 'true'; - if (isbot || isSsgRender) { - // However, when a crawler visits your page, or if you're generating the pages at the build time, - // you might want to let all of the content load first and then produce the final HTML output instead of revealing it progressively. - // from: https://react.dev/reference/react-dom/server/renderToReadableStream#handling-different-errors-in-different-ways + // However, when a crawler visits your page, or if you're generating the pages at the build time, + // you might want to let all of the content load first and then produce the final HTML output instead of revealing it progressively. + // from: https://react.dev/reference/react-dom/server/renderToReadableStream#handling-different-errors-in-different-ways + const forceStreamToString = Boolean( + typeof process !== 'undefined' && + process.env?.MODERN_JS_STREAM_TO_STRING, + ); + const { waitForAllReady } = resolveStreamingMode( + request, + forceStreamToString, + ); + + if (waitForAllReady) { + // Prefer to wait for full content when instructed by middleware marker/env/isbot. await readableOriginal.allReady; } diff --git a/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx b/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx index a3bc570edaf..2c43b60af27 100644 --- a/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx +++ b/packages/runtime/plugin-runtime/src/core/server/stream/shared.tsx @@ -5,6 +5,7 @@ import type { SSRManifest as RscSSRManifest, ServerManifest as RscServerManifest, } from '@modern-js/types/server'; +import checkIsBot from 'isbot'; import type React from 'react'; import { JSX_SHELL_STREAM_END_MARK } from '../../../common'; import type { TRuntimeContext } from '../../context'; @@ -42,6 +43,56 @@ export enum ShellChunkStatus { FINISH = 1, } +const SHOULD_STREAM_ALL_HEADER = 'x-should-stream-all'; + +function parseShouldStreamAllFlag(value: string | null): boolean | undefined { + if (!value) { + return undefined; + } + const normalized = value.trim().toLowerCase(); + + // if the header is set to 'false', treat it as false, runtime will not stream all. + // Otherwise, treat it as true. + if (normalized === 'false') { + return false; + } + return true; +} + +export function resolveStreamingMode( + request: Request, + forceStreamToString: boolean, +): { + onReady: 'onAllReady' | 'onShellReady'; + waitForAllReady: boolean; +} { + const shouldStreamAll = parseShouldStreamAllFlag( + request.headers.get(SHOULD_STREAM_ALL_HEADER), + ); + + const isSsgRender = request.headers.get('x-modern-ssg-render') === 'true'; + + const isBot = checkIsBot(request.headers.get('user-agent')); + + if (shouldStreamAll) { + return { onReady: 'onAllReady', waitForAllReady: true }; + } + + if (forceStreamToString) { + return { onReady: 'onAllReady', waitForAllReady: true }; + } + + if (isBot) { + return { onReady: 'onAllReady', waitForAllReady: true }; + } + + if (isSsgRender) { + return { onReady: 'onAllReady', waitForAllReady: true }; + } + + return { onReady: 'onShellReady', waitForAllReady: false }; +} + let encoder: TextEncoder; export function encodeForWebStream(thing: unknown) { if (!encoder) {