Skip to content

Commit 5fbb904

Browse files
authored
Single style capture (#1437)
Support a contrived/rare case where a <style> element has multiple text node children (this is usually only possible to recreate via javascript append) ... this PR fixes cases where there are subsequent text mutations to these nodes; previously these would have been lost * In this scenario, a new CSS comment may now be inserted into the captured `_cssText` for a <style> element to show where it should be broken up into text elements upon replay: `/* rr_split */` * The new 'can record and replay style mutations' test is the principal way to the problematic scenarios, and is a detailed 'catch-all' test with many checks to cover most of the ways things can fail * There are new tests for splitting/rebuilding the css using the rr_split marker * The prior 'dynamic stylesheet' route is now the main route for serializing a stylesheet; dynamic stylesheet were missed out in #1533 but that case is now covered with this PR This PR was originally extracted from #1475 so the initial motivation was to change the approach on stringifying <style> elements to do so in a single place. This is also the motivating factor for always serializing <style> elements via the `_cssText` attribute rather than in it's childNodes; in #1475 we will be delaying populating `_cssText` for performance and instead recorrding them as assets. Thanks for the detailed review to Justin Halsall <[email protected]> & Yun Feng <https://github.com/YunFeng0817>
1 parent 8837fe3 commit 5fbb904

19 files changed

+1606
-398
lines changed

.changeset/single-style-capture.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"rrweb-snapshot": patch
3+
"rrweb": patch
4+
---
5+
6+
Edge case: Provide support for mutations on a <style> element which (unusually) has multiple text nodes

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"eslint-plugin-compat": "^5.0.0",
3535
"eslint-plugin-jest": "^27.6.0",
3636
"eslint-plugin-tsdoc": "^0.2.17",
37+
"happy-dom": "^14.12.0",
3738
"markdownlint": "^0.25.1",
3839
"markdownlint-cli": "^0.31.1",
3940
"prettier": "2.8.4",

packages/rrdom/package.json

-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
"@typescript-eslint/eslint-plugin": "^5.23.0",
4747
"@typescript-eslint/parser": "^5.23.0",
4848
"eslint": "^8.15.0",
49-
"happy-dom": "^14.12.0",
5049
"puppeteer": "^17.1.3",
5150
"typescript": "^5.4.5",
5251
"vite": "^5.3.1",

packages/rrweb-snapshot/src/rebuild.ts

+85-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { mediaSelectorPlugin, pseudoClassPlugin } from './css';
22
import {
33
type serializedNodeWithId,
4+
type serializedElementNodeWithId,
5+
type serializedTextNodeWithId,
46
NodeType,
57
type tagMap,
68
type elementNode,
@@ -78,6 +80,77 @@ export function createCache(): BuildCache {
7880
};
7981
}
8082

83+
/**
84+
* undo splitCssText/markCssSplits
85+
* (would move to utils.ts but uses `adaptCssForReplay`)
86+
*/
87+
export function applyCssSplits(
88+
n: serializedElementNodeWithId,
89+
cssText: string,
90+
hackCss: boolean,
91+
cache: BuildCache,
92+
): void {
93+
const childTextNodes: serializedTextNodeWithId[] = [];
94+
for (const scn of n.childNodes) {
95+
if (scn.type === NodeType.Text) {
96+
childTextNodes.push(scn);
97+
}
98+
}
99+
const cssTextSplits = cssText.split('/* rr_split */');
100+
while (
101+
cssTextSplits.length > 1 &&
102+
cssTextSplits.length > childTextNodes.length
103+
) {
104+
// unexpected: remerge the last two so that we don't discard any css
105+
cssTextSplits.splice(-2, 2, cssTextSplits.slice(-2).join(''));
106+
}
107+
for (let i = 0; i < childTextNodes.length; i++) {
108+
const childTextNode = childTextNodes[i];
109+
const cssTextSection = cssTextSplits[i];
110+
if (childTextNode && cssTextSection) {
111+
// id will be assigned when these child nodes are
112+
// iterated over in buildNodeWithSN
113+
childTextNode.textContent = hackCss
114+
? adaptCssForReplay(cssTextSection, cache)
115+
: cssTextSection;
116+
}
117+
}
118+
}
119+
120+
/**
121+
* Normally a <style> element has a single textNode containing the rules.
122+
* During serialization, we bypass this (`styleEl.sheet`) to get the rules the
123+
* browser sees and serialize this to a special _cssText attribute, blanking
124+
* out any text nodes. This function reverses that and also handles cases where
125+
* there were no textNode children present (dynamic css/or a <link> element) as
126+
* well as multiple textNodes, which need to be repopulated (based on presence of
127+
* a special `rr_split` marker in case they are modified by subsequent mutations.
128+
*/
129+
export function buildStyleNode(
130+
n: serializedElementNodeWithId,
131+
styleEl: HTMLStyleElement, // when inlined, a <link type="stylesheet"> also gets rebuilt as a <style>
132+
cssText: string,
133+
options: {
134+
doc: Document;
135+
hackCss: boolean;
136+
cache: BuildCache;
137+
},
138+
) {
139+
const { doc, hackCss, cache } = options;
140+
if (n.childNodes.length) {
141+
applyCssSplits(n, cssText, hackCss, cache);
142+
} else {
143+
if (hackCss) {
144+
cssText = adaptCssForReplay(cssText, cache);
145+
}
146+
/**
147+
<link> element or dynamic <style> are serialized without any child nodes
148+
we create the text node without an ID or presence in mirror as it can't
149+
*/
150+
styleEl.appendChild(doc.createTextNode(cssText));
151+
}
152+
}
153+
81154
function buildNode(
82155
n: serializedNodeWithId,
83156
options: {
@@ -154,14 +227,13 @@ function buildNode(
154227
continue;
155228
}
156229

157-
const isTextarea = tagName === 'textarea' && name === 'value';
158-
const isRemoteOrDynamicCss = tagName === 'style' && name === '_cssText';
159-
if (isRemoteOrDynamicCss && hackCss && typeof value === 'string') {
160-
value = adaptCssForReplay(value, cache);
161-
}
162-
if ((isTextarea || isRemoteOrDynamicCss) && typeof value === 'string') {
163-
// https://github.com/rrweb-io/rrweb/issues/112
164-
// https://github.com/rrweb-io/rrweb/pull/1351
230+
if (typeof value !== 'string') {
231+
// pass
232+
} else if (tagName === 'style' && name === '_cssText') {
233+
buildStyleNode(n, node as HTMLStyleElement, value, options);
234+
continue; // no need to set _cssText as attribute
235+
} else if (tagName === 'textarea' && name === 'value') {
236+
// create without an ID or presence in mirror
165237
node.appendChild(doc.createTextNode(value));
166238
n.childNodes = []; // value overrides childNodes
167239
continue;
@@ -317,11 +389,11 @@ function buildNode(
317389
return node;
318390
}
319391
case NodeType.Text:
320-
return doc.createTextNode(
321-
n.isStyle && hackCss
322-
? adaptCssForReplay(n.textContent, cache)
323-
: n.textContent,
324-
);
392+
if (n.isStyle && hackCss) {
393+
// support legacy style
394+
return doc.createTextNode(adaptCssForReplay(n.textContent, cache));
395+
}
396+
return doc.createTextNode(n.textContent);
325397
case NodeType.CDATA:
326398
return doc.createCDATASection(n.textContent);
327399
case NodeType.Comment:

packages/rrweb-snapshot/src/snapshot.ts

+38-37
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
toLowerCase,
2828
extractFileExtension,
2929
absolutifyURLs,
30+
markCssSplits,
3031
} from './utils';
3132
import dom from '@rrweb/utils';
3233

@@ -403,6 +404,7 @@ function serializeNode(
403404
* `newlyAddedElement: true` skips scrollTop and scrollLeft check
404405
*/
405406
newlyAddedElement?: boolean;
407+
cssCaptured?: boolean;
406408
},
407409
): serializedNode | false {
408410
const {
@@ -420,6 +422,7 @@ function serializeNode(
420422
recordCanvas,
421423
keepIframeSrcFn,
422424
newlyAddedElement = false,
425+
cssCaptured = false,
423426
} = options;
424427
// Only record root id when document object is not the base document
425428
const rootId = getRootId(doc, mirror);
@@ -466,6 +469,7 @@ function serializeNode(
466469
needsMask,
467470
maskTextFn,
468471
rootId,
472+
cssCaptured,
469473
});
470474
case n.CDATA_SECTION_NODE:
471475
return {
@@ -497,48 +501,38 @@ function serializeTextNode(
497501
needsMask: boolean;
498502
maskTextFn: MaskTextFn | undefined;
499503
rootId: number | undefined;
504+
cssCaptured?: boolean;
500505
},
501506
): serializedNode {
502-
const { needsMask, maskTextFn, rootId } = options;
507+
const { needsMask, maskTextFn, rootId, cssCaptured } = options;
503508
// The parent node may not be a html element which has a tagName attribute.
504509
// So just let it be undefined which is ok in this use case.
505510
const parent = dom.parentNode(n);
506511
const parentTagName = parent && (parent as HTMLElement).tagName;
507-
let text = dom.textContent(n);
512+
let textContent: string | null = '';
508513
const isStyle = parentTagName === 'STYLE' ? true : undefined;
509514
const isScript = parentTagName === 'SCRIPT' ? true : undefined;
510-
if (isStyle && text) {
511-
try {
512-
// try to read style sheet
513-
if (n.nextSibling || n.previousSibling) {
514-
// This is not the only child of the stylesheet.
515-
// We can't read all of the sheet's .cssRules and expect them
516-
// to _only_ include the current rule(s) added by the text node.
517-
// So we'll be conservative and keep textContent as-is.
518-
} else if ((parent as HTMLStyleElement).sheet?.cssRules) {
519-
text = stringifyStylesheet((parent as HTMLStyleElement).sheet!);
520-
}
521-
} catch (err) {
522-
console.warn(
523-
`Cannot get CSS styles from text's parentNode. Error: ${err as string}`,
524-
n,
525-
);
526-
}
527-
text = absolutifyURLs(text, getHref(options.doc));
528-
}
529515
if (isScript) {
530-
text = 'SCRIPT_PLACEHOLDER';
516+
textContent = 'SCRIPT_PLACEHOLDER';
517+
} else if (!cssCaptured) {
518+
textContent = dom.textContent(n);
519+
if (isStyle && textContent) {
520+
// mutation only: we don't need to use stringifyStylesheet
521+
// as a <style> text node mutation obliterates any previous
522+
// programmatic rule manipulation (.insertRule etc.)
523+
// so the current textContent represents the most up to date state
524+
textContent = absolutifyURLs(textContent, getHref(options.doc));
525+
}
531526
}
532-
if (!isStyle && !isScript && text && needsMask) {
533-
text = maskTextFn
534-
? maskTextFn(text, dom.parentElement(n))
535-
: text.replace(/[\S]/g, '*');
527+
if (!isStyle && !isScript && textContent && needsMask) {
528+
textContent = maskTextFn
529+
? maskTextFn(textContent, dom.parentElement(n))
530+
: textContent.replace(/[\S]/g, '*');
536531
}
537532

538533
return {
539534
type: NodeType.Text,
540-
textContent: text || '',
541-
isStyle,
535+
textContent: textContent || '',
542536
rootId,
543537
};
544538
}
@@ -608,17 +602,14 @@ function serializeElementNode(
608602
attributes._cssText = cssText;
609603
}
610604
}
611-
// dynamic stylesheet
612-
if (
613-
tagName === 'style' &&
614-
(n as HTMLStyleElement).sheet &&
615-
// TODO: Currently we only try to get dynamic stylesheet when it is an empty style element
616-
!(n.innerText || dom.textContent(n) || '').trim().length
617-
) {
618-
const cssText = stringifyStylesheet(
605+
if (tagName === 'style' && (n as HTMLStyleElement).sheet) {
606+
let cssText = stringifyStylesheet(
619607
(n as HTMLStyleElement).sheet as CSSStyleSheet,
620608
);
621609
if (cssText) {
610+
if (n.childNodes.length > 1) {
611+
cssText = markCssSplits(cssText, n as HTMLStyleElement);
612+
}
622613
attributes._cssText = cssText;
623614
}
624615
}
@@ -937,6 +928,7 @@ export function serializeNodeWithId(
937928
node: serializedElementNodeWithId,
938929
) => unknown;
939930
stylesheetLoadTimeout?: number;
931+
cssCaptured?: boolean;
940932
},
941933
): serializedNodeWithId | null {
942934
const {
@@ -962,6 +954,7 @@ export function serializeNodeWithId(
962954
stylesheetLoadTimeout = 5000,
963955
keepIframeSrcFn = () => false,
964956
newlyAddedElement = false,
957+
cssCaptured = false,
965958
} = options;
966959
let { needsMask } = options;
967960
let { preserveWhiteSpace = true } = options;
@@ -992,6 +985,7 @@ export function serializeNodeWithId(
992985
recordCanvas,
993986
keepIframeSrcFn,
994987
newlyAddedElement,
988+
cssCaptured,
995989
});
996990
if (!_serializedNode) {
997991
// TODO: dev only
@@ -1007,7 +1001,6 @@ export function serializeNodeWithId(
10071001
slimDOMExcluded(_serializedNode, slimDOMOptions) ||
10081002
(!preserveWhiteSpace &&
10091003
_serializedNode.type === NodeType.Text &&
1010-
!_serializedNode.isStyle &&
10111004
!_serializedNode.textContent.replace(/^\s+|\s+$/gm, '').length)
10121005
) {
10131006
id = IGNORED_NODE;
@@ -1072,6 +1065,7 @@ export function serializeNodeWithId(
10721065
onStylesheetLoad,
10731066
stylesheetLoadTimeout,
10741067
keepIframeSrcFn,
1068+
cssCaptured: false,
10751069
};
10761070

10771071
if (
@@ -1081,6 +1075,13 @@ export function serializeNodeWithId(
10811075
) {
10821076
// value parameter in DOM reflects the correct value, so ignore childNode
10831077
} else {
1078+
if (
1079+
serializedNode.type === NodeType.Element &&
1080+
(serializedNode as elementNode).attributes._cssText !== undefined &&
1081+
typeof serializedNode.attributes._cssText === 'string'
1082+
) {
1083+
bypassOptions.cssCaptured = true;
1084+
}
10841085
for (const childN of Array.from(dom.childNodes(n))) {
10851086
const serializedChildNode = serializeNodeWithId(childN, bypassOptions);
10861087
if (serializedChildNode) {

packages/rrweb-snapshot/src/types.ts

+20-2
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,18 @@ export type documentTypeNode = {
2020
systemId: string;
2121
};
2222

23-
export type attributes = {
24-
[key: string]: string | number | true | null;
23+
type cssTextKeyAttr = {
24+
_cssText?: string;
2525
};
26+
27+
export type attributes = cssTextKeyAttr & {
28+
[key: string]:
29+
| string
30+
| number // properties e.g. rr_scrollLeft or rr_mediaCurrentTime
31+
| true // e.g. checked on <input type="radio">
32+
| null; // an indication that an attribute was removed (during a mutation)
33+
};
34+
2635
export type legacyAttributes = {
2736
/**
2837
* @deprecated old bug in rrweb was causing these to always be set
@@ -45,6 +54,10 @@ export type elementNode = {
4554
export type textNode = {
4655
type: NodeType.Text;
4756
textContent: string;
57+
/**
58+
* @deprecated styles are now always snapshotted against parent <style> element
59+
* style mutations can still happen via an added textNode, but they don't need this attribute for correct replay
60+
*/
4861
isStyle?: true;
4962
};
5063

@@ -78,6 +91,11 @@ export type serializedElementNodeWithId = Extract<
7891
Record<'type', NodeType.Element>
7992
>;
8093

94+
export type serializedTextNodeWithId = Extract<
95+
serializedNodeWithId,
96+
Record<'type', NodeType.Text>
97+
>;
98+
8199
export type tagMap = {
82100
[key: string]: string;
83101
};

0 commit comments

Comments
 (0)