Skip to content

Commit cd4b975

Browse files
authored
Add support for C14N (#119)
* wip: add support for C14N * Refactor C14N API - Replace xmlC14NDocDumpMemory with xmlC14NExecute + xmlOutputBufferCreateIO - Remove document cloning workaround (xmlCopyNode, xmlNewDoc) by implementing proper node-level canonicalization - Add canonicalize() and canonicalizeToString() methods to both XmlDocument and XmlNode classes - Introduce XmlC14NIsVisibleCallback type for custom node filtering during canonicalization - Update C14NOptions interface with better type definitions and documentation - Remove obsolete utility classes (XmlNodeSetWrapper) and simplify memory management - Expand test coverage with comprehensive tests for all C14N modes, node-level canonicalization, comments handling, and inclusive namespaces * revert un-needed changes * export full c14n api and types * refactor: refine canonicalization API, expose types, integrate document/node methods, adjust memory handling and callbacks * replaced DisposableMalloc and CStringArrayWrapper with allocCStringArray(string[]) and free(Pointer) * refactor: remove unused functions * refactor: c14n options * refactor: format, remove unnecessary exports, ignore unreachable code in coverage * tests: add tests for all supported node types * add explanation for error
1 parent 2d5193f commit cd4b975

File tree

8 files changed

+849
-0
lines changed

8 files changed

+849
-0
lines changed

binding/exported-functions.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ _malloc
33
_xmlAddChild
44
_xmlAddNextSibling
55
_xmlAddPrevSibling
6+
_xmlC14NExecute
67
_xmlCleanupInputCallbacks
78
_xmlCtxtParseDtd
89
_xmlCtxtReadMemory
@@ -31,6 +32,8 @@ _xmlNewParserCtxt
3132
_xmlNewReference
3233
_xmlNodeGetContent
3334
_xmlNodeSetContentLen
35+
_xmlOutputBufferClose
36+
_xmlOutputBufferCreateIO
3437
_xmlRegisterInputCallbacks
3538
_xmlRelaxNGFree
3639
_xmlRelaxNGFreeParserCtxt

src/c14n.mts

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
import {
2+
addFunction,
3+
allocCStringArray,
4+
free,
5+
xmlC14NExecute,
6+
xmlOutputBufferCreateIO,
7+
xmlOutputBufferClose,
8+
XmlError,
9+
XmlOutputBufferHandler,
10+
XmlTreeCommonStruct,
11+
} from './libxml2.mjs';
12+
import type { XmlNode } from './nodes.mjs';
13+
import type {
14+
XmlDocPtr, XmlOutputBufferPtr, Pointer, XmlNodePtr,
15+
} from './libxml2raw.mjs';
16+
import type { XmlDocument } from './document.mjs';
17+
import { ContextStorage } from './utils.mjs';
18+
19+
/**
20+
* Context for the C14N isVisible callback.
21+
* @internal
22+
*/
23+
interface C14NCallbackContext {
24+
/** The JS callback to invoke, or null if using nodeSet mode */
25+
jsCallback: XmlC14NIsVisibleCallback | null;
26+
/** For nodeSet mode: set of root pointers to check against */
27+
rootPtrs: Set<number> | null;
28+
/** Whether to cascade invisibility to descendants */
29+
cascade: boolean;
30+
/** Tracks nodes made invisible (for cascade mode) */
31+
invisible: Set<number> | null;
32+
}
33+
34+
const c14nCallbackStorage = new ContextStorage<C14NCallbackContext>();
35+
36+
/**
37+
* Global C14N visibility callback - created once at module initialization.
38+
* Signature: int(void* user_data, xmlNodePtr node, xmlNodePtr parent)
39+
* @internal
40+
*/
41+
const c14nIsVisibleCallback = addFunction(
42+
(userDataIndex: number, nodePtr: number, parentPtr: number): number => {
43+
const ctx = c14nCallbackStorage.get(userDataIndex);
44+
45+
// Handle nodeSet mode
46+
if (ctx.rootPtrs !== null) {
47+
// Visible if node is a selected root, or lies within any selected root subtree
48+
if (ctx.rootPtrs.has(nodePtr)) return 1;
49+
let cur = parentPtr;
50+
while (cur !== 0) {
51+
if (ctx.rootPtrs.has(cur)) return 1;
52+
cur = XmlTreeCommonStruct.parent(cur);
53+
}
54+
return 0;
55+
}
56+
57+
// Handle isVisible callback mode
58+
if (ctx.jsCallback !== null) {
59+
// Cascade invisibility check
60+
if (ctx.cascade && ctx.invisible) {
61+
if (parentPtr !== 0 && ctx.invisible.has(parentPtr)) {
62+
ctx.invisible.add(nodePtr);
63+
return 0;
64+
}
65+
}
66+
const res = ctx.jsCallback(nodePtr, parentPtr) ? 1 : 0;
67+
if (ctx.cascade && ctx.invisible && res === 0) {
68+
ctx.invisible.add(nodePtr);
69+
}
70+
return res;
71+
}
72+
/* c8 ignore next 2, callback is not registered if neither is present */
73+
return 1;
74+
},
75+
'iiii',
76+
) as Pointer;
77+
78+
/**
79+
* C14N (Canonical XML) modes supported by libxml2
80+
* @see http://www.w3.org/TR/xml-c14n
81+
* @see http://www.w3.org/TR/xml-exc-c14n
82+
*/
83+
export const XmlC14NMode = {
84+
/** Original C14N 1.0 specification */
85+
XML_C14N_1_0: 0,
86+
/** Exclusive C14N 1.0 (omits unused namespace declarations) */
87+
XML_C14N_EXCLUSIVE_1_0: 1,
88+
/** C14N 1.1 specification */
89+
XML_C14N_1_1: 2,
90+
} as const;
91+
92+
/**
93+
* Callback to determine if a node should be included in canonicalization.
94+
*
95+
* @param node The node being evaluated
96+
* @param parent The parent of the node being evaluated
97+
* @returns true if the node should be included, false otherwise
98+
*/
99+
export type XmlC14NIsVisibleCallback = (node: XmlNodePtr, parent: XmlNodePtr) => boolean;
100+
101+
/**
102+
* Options for XML canonicalization
103+
*/
104+
export interface C14NOptions {
105+
/** The canonicalization mode to use
106+
* @default XmlC14NMode.XML_C14N_1_0
107+
*/
108+
mode?: typeof XmlC14NMode[keyof typeof XmlC14NMode];
109+
110+
/** Whether to include comments in the canonicalized output
111+
* @default false
112+
*/
113+
withComments?: boolean;
114+
115+
/** List of inclusive namespace prefixes for exclusive canonicalization
116+
* Only applies when mode is XML_C14N_EXCLUSIVE_1_0
117+
*/
118+
inclusiveNamespacePrefixes?: string[];
119+
120+
/** Custom callback to determine node visibility
121+
* Must not be used together with {@link nodeSet}
122+
*/
123+
isVisible?: XmlC14NIsVisibleCallback;
124+
125+
/** Set of nodes to include in canonicalization
126+
* Must not be used together with {@link isVisible}
127+
*/
128+
nodeSet?: Set<XmlNode>;
129+
}
130+
131+
/**
132+
* C14N options without filtering callbacks (for subtree canonicalization)
133+
*/
134+
export type SubtreeC14NOptions = Omit<C14NOptions, 'isVisible' | 'nodeSet'>;
135+
136+
/**
137+
* Check if a node is within a subtree rooted at a specific node by walking
138+
* up the parent chain using the libxml-provided parent pointer.
139+
*
140+
* Important: Namespace declaration nodes (xmlNs) are not part of the tree and
141+
* don't have a normal parent field. libxml2 calls the visibility callback with
142+
* the owning element as `parentPtr`, so we must start walking from `parentPtr`
143+
* rather than dereferencing the node.
144+
* @internal
145+
*/
146+
function isNodeInSubtree(nodePtr: number, parentPtr: number, rootPtr: number): boolean {
147+
if (nodePtr === rootPtr) {
148+
return true;
149+
}
150+
let currentPtr = parentPtr;
151+
while (currentPtr !== 0) {
152+
if (currentPtr === rootPtr) {
153+
return true;
154+
}
155+
currentPtr = XmlTreeCommonStruct.parent(currentPtr);
156+
}
157+
return false;
158+
}
159+
160+
/**
161+
* Internal implementation using xmlC14NExecute
162+
* @internal
163+
*/
164+
function canonicalizeInternal(
165+
handler: XmlOutputBufferHandler,
166+
docPtr: XmlDocPtr,
167+
options: C14NOptions = {},
168+
cascade: boolean = true,
169+
): void {
170+
const hasIsVisible = (opts: C14NOptions):
171+
opts is C14NOptions & {
172+
isVisible: XmlC14NIsVisibleCallback
173+
} => typeof (opts as any).isVisible === 'function';
174+
175+
const hasNodeSet = (opts: C14NOptions):
176+
opts is C14NOptions & { nodeSet: Set<XmlNode> } => (opts as any).nodeSet instanceof Set;
177+
178+
// Validate mutually exclusive options
179+
if (hasIsVisible(options) && hasNodeSet(options)) {
180+
throw new XmlError('Cannot specify both isVisible and nodeSet');
181+
}
182+
183+
let outputBufferPtr: XmlOutputBufferPtr | null = null;
184+
let prefixArrayPtr: Pointer = 0;
185+
let contextIndex: number = 0;
186+
187+
try {
188+
// Create output buffer using IO callbacks
189+
outputBufferPtr = xmlOutputBufferCreateIO(handler);
190+
191+
// Build callback context based on options
192+
if (hasIsVisible(options) || hasNodeSet(options)) {
193+
const context: C14NCallbackContext = {
194+
jsCallback: hasIsVisible(options) ? options.isVisible : null,
195+
rootPtrs: hasNodeSet(options)
196+
? new Set(Array.from(options.nodeSet)
197+
.map((n) => n._nodePtr))
198+
: null,
199+
cascade,
200+
invisible: cascade ? new Set<number>() : null,
201+
};
202+
contextIndex = c14nCallbackStorage.allocate(context);
203+
}
204+
205+
// Handle inclusive namespace prefixes
206+
if (options.inclusiveNamespacePrefixes) {
207+
prefixArrayPtr = allocCStringArray(options.inclusiveNamespacePrefixes);
208+
}
209+
210+
const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0;
211+
const withComments = options.withComments ? 1 : 0;
212+
213+
const result = xmlC14NExecute(
214+
docPtr,
215+
contextIndex !== 0 ? c14nIsVisibleCallback : 0 as Pointer,
216+
contextIndex, // user_data is the storage index
217+
mode,
218+
prefixArrayPtr,
219+
withComments,
220+
outputBufferPtr,
221+
);
222+
223+
/* c8 ignore next 3, defensive code */
224+
if (result < 0) {
225+
throw new XmlError('Failed to canonicalize XML document');
226+
}
227+
} finally {
228+
if (prefixArrayPtr) free(prefixArrayPtr);
229+
if (outputBufferPtr) {
230+
xmlOutputBufferClose(outputBufferPtr);
231+
}
232+
if (contextIndex !== 0) {
233+
c14nCallbackStorage.free(contextIndex);
234+
}
235+
}
236+
}
237+
238+
/**
239+
* Canonicalize an entire XML document to a buffer and invoke callbacks to process.
240+
*
241+
242+
* @param handler Callback to receive the canonicalized output
243+
* @param doc The XML document to canonicalize
244+
* @param options Canonicalization options
245+
*
246+
* @example
247+
* ```typescript
248+
* const handler = new XmlStringOutputBufferHandler();
249+
* canonicalizeDocument(handler, doc, {
250+
* mode: XmlC14NMode.XML_C14N_1_0,
251+
* withComments: false
252+
* });
253+
* ```
254+
*/
255+
export function canonicalizeDocument(
256+
handler: XmlOutputBufferHandler,
257+
doc: XmlDocument,
258+
options: C14NOptions = {},
259+
): void {
260+
canonicalizeInternal(handler, doc._ptr, options);
261+
}
262+
263+
/**
264+
* Canonicalize a subtree of an XML document to a buffer and invoke callbacks to process.
265+
*
266+
* This is a convenience helper that creates an isVisible callback to filter
267+
* only nodes within the specified subtree.
268+
*
269+
* @param handler Callback to receive the canonicalized output
270+
* @param doc The document containing the subtree
271+
* @param subtreeRoot The root node of the subtree to canonicalize
272+
* @param options Canonicalization options (cannot include isVisible or nodeSet)
273+
*
274+
* @example
275+
* ```typescript
276+
* const element = doc.get('//my-element');
277+
* const handler = new XmlStringOutputBufferHandler();
278+
* canonicalizeSubtree(handler, doc, element!, {
279+
* mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0,
280+
* inclusiveNamespacePrefixes: ['ns1', 'ns2'],
281+
* withComments: false
282+
* });
283+
* ```
284+
*/
285+
export function canonicalizeSubtree(
286+
handler: XmlOutputBufferHandler,
287+
doc: XmlDocument,
288+
subtreeRoot: XmlNode,
289+
options: SubtreeC14NOptions = {},
290+
): void {
291+
const subtreeRootPtr = subtreeRoot._nodePtr;
292+
const isVisible = (nodePtr: number, parentPtr: number) => (
293+
isNodeInSubtree(nodePtr, parentPtr, subtreeRootPtr)
294+
);
295+
// Use non-cascading behavior for subtree helper
296+
canonicalizeInternal(handler, doc._ptr, {
297+
...options,
298+
isVisible: isVisible as unknown as XmlC14NIsVisibleCallback,
299+
}, /* wrapCascade */ false);
300+
}

src/document.mts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import type { XmlDocPtr, XmlParserCtxtPtr } from './libxml2raw.mjs';
3232
import { disposeBy, XmlDisposable } from './disposable.mjs';
3333
import { XmlDtd } from './dtd.mjs';
3434
import { XmlStringOutputBufferHandler } from './utils.mjs';
35+
import { type C14NOptions, canonicalizeDocument } from './c14n.mjs';
3536

3637
export enum ParseOption {
3738
XML_PARSE_DEFAULT = 0,
@@ -494,4 +495,29 @@ export class XmlDocument extends XmlDisposable<XmlDocument> {
494495
xmlXIncludeFreeContext(xinc);
495496
}
496497
}
498+
499+
/**
500+
* Canonicalize the document and invoke the handler to process.
501+
*
502+
* @param handler handlers to process the content in the buffer
503+
* @param options options to adjust the canonicalization behavior
504+
* @see {@link canonicalizeDocument}
505+
* @see {@link canonicalizeToString}
506+
*/
507+
canonicalize(handler: XmlOutputBufferHandler, options?: C14NOptions): void {
508+
canonicalizeDocument(handler, this, options);
509+
}
510+
511+
/**
512+
* Canonicalize the document to a string.
513+
*
514+
* @param options options to adjust the canonicalization behavior
515+
* @returns The canonicalized XML string
516+
* @see {@link canonicalize}
517+
*/
518+
canonicalizeToString(options?: C14NOptions): string {
519+
const handler = new XmlStringOutputBufferHandler();
520+
this.canonicalize(handler, options);
521+
return handler.result;
522+
}
497523
}

src/index.mts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,11 @@ export {
5252
readBuffer,
5353
closeBuffer,
5454
XmlBufferInputProvider,
55+
XmlStringOutputBufferHandler,
5556
} from './utils.mjs';
57+
export {
58+
XmlC14NMode,
59+
type C14NOptions,
60+
type SubtreeC14NOptions,
61+
type XmlC14NIsVisibleCallback,
62+
} from './c14n.mjs';

0 commit comments

Comments
 (0)