-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathextract-dfns.mjs
925 lines (838 loc) · 34.6 KB
/
extract-dfns.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
import extractWebIdl from './extract-webidl.mjs';
import informativeSelector from './informative-selector.mjs';
import {parse} from "../../node_modules/webidl2/index.js";
/**
* Extract definitions in the spec that follow the "Definitions data model":
* https://tabatkins.github.io/bikeshed/#dfn-contract
*
* Each definition returned by the function will have the following properties:
* - id: The local ID in the DOM. Should be unique within a spec page.
* - href: The absolute URL to the definition.
* - linkingText: List of linking phrases for references.
* - localLinkingText: List of linking phrases for local references only.
* - type: The definition type. One of the values in
* https://tabatkins.github.io/bikeshed/#dfn-types
* - for: The list of namespaces for the definition
* - access: "public" when definition can be referenced by other specifications,
* "private" when it should be viewed as a local definition.
* - informative: true when definition appears in an informative section,
* false if it is normative
* - heading: Heading under which the term is to be found. An object with "id",
* "title", and "number" properties
* - definedIn: An indication of where the definition appears in the spec. Value
* can be one of "dt", "pre", "table", "heading", "note", "example", or
* "prose" (last one indicates that definition appears in the main body of
* the spec)
*
* The extraction ignores definitions with an unknown type. A warning is issued
* to the console when that happens.
*
* The extraction uses the first definition it finds when it bumps into a term
* that is defined more than once (same "linkingText", same "type", same "for").
* A warning is issued to the console when that happens.
*
* @function
* @public
* @return {Array(Object)} An Array of definitions
*/
import cloneAndClean from './clone-and-clean.mjs';
function normalize(str) {
return str.trim().replace(/\s+/g, ' ');
}
// Valid types defined in https://tabatkins.github.io/bikeshed/#dfn-types
// (+ "namespace", "event" and "permission" which are not yet in the doc)
function hasValidType(el) {
const validDfnTypes = [
// CSS types
'property',
'descriptor',
'value',
'type',
'at-rule',
'function',
'selector',
// Web IDL types
'namespace',
'interface',
'constructor',
'method',
'argument',
'attribute',
'callback',
'dictionary',
'dict-member',
'enum',
'enum-value',
'exception',
'const',
'typedef',
'stringifier',
'serializer',
'iterator',
'maplike',
'setlike',
'extended-attribute',
'event',
'permission',
// Element types
'element',
'element-state',
'element-attr',
'attr-value',
// CDDL types
'cddl-module',
'cddl-type',
'cddl-parameter',
'cddl-key',
'cddl-value',
// URL scheme
'scheme',
// HTTP header
'http-header',
// Grammar type
'grammar',
// "English" terms
'abstract-op',
'dfn'
];
const type = el.getAttribute('data-dfn-type') ?? 'dfn';
const isValid = validDfnTypes.includes(type);
if (!isValid) {
console.warn('[reffy]', `"${type}" is an invalid dfn type for "${normalize(el.textContent)}"`);
}
return isValid;
}
// Return true when exported definition is not already defined in the list,
// Return false and issue a warning when it is already defined.
function isNotAlreadyExported(dfn, idx, list) {
const first = list.find(d => d === dfn ||
(d.access === 'public' && dfn.access === 'public' &&
d.type === dfn.type &&
d.linkingText.length === dfn.linkingText.length &&
d.linkingText.every(lt => dfn.linkingText.find(t => t == lt)) &&
d.for.length === dfn.for.length &&
d.for.every(lt => dfn.for.find(t => t === lt))));
if (first !== dfn) {
console.warn('[reffy]', `Duplicate dfn found for "${dfn.linkingText[0]}", type="${dfn.type}", for="${dfn.for[0]}", dupl=${dfn.href}, first=${first.href}`);
}
return first === dfn;
}
// Extract the element's inner HTML content, removing any complex structure,
// so that the result can be injected elsewhere without creating problems.
function getHtmlProseDefinition(proseEl) {
// Strip element of all annotations
proseEl = cloneAndClean(proseEl);
// Keep simple grouping content and text-level semantics elements
const keepSelector = [
'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure', 'hr', 'li',
'ol', 'p', 'pre', 'ul',
'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em',
'i', 'kbd', 'mark', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'small', 'span',
'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr'
].join(',');
let el;
while (el = proseEl.querySelector(`:not(${keepSelector})`)) {
// The content is more complex than anticipated. It may be worth checking
// the definition to assess whether the extraction logic needs to become
// smarter. For lack of a better reporting mechanism for now, let's record
// a warning.
console.warn('[reffy]', `Unexpected element "${el.nodeName}" found in textual definition of "${proseEl.getAttribute('data-defines')}"`);
el.remove();
}
// Drop all attributes except "href", "dir", "lang" and "title"
// For "href", let's make sure that we have an absolute URL
[...proseEl.querySelectorAll('*')].forEach(el => {
el.getAttributeNames().forEach(attr => {
if (attr === 'href') {
const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
const url = new URL(el.getAttribute('href'), page ?? window.location.href);
el.setAttribute('href', url.toString());
}
else if (!['dir', 'lang', 'title'].includes(attr)) {
el.removeAttribute(attr);
}
});
});
return proseEl.innerHTML.trim();
}
function definitionMapper(el, idToHeading, usesDfnDataModel) {
let definedIn = 'prose';
const enclosingEl = el.closest('dt,pre,table,h1,h2,h3,h4,h5,h6,.note,.example') || el;
switch (enclosingEl.nodeName) {
case 'DT':
case 'PRE':
case 'TABLE':
definedIn = enclosingEl.nodeName.toLowerCase();
break;
case 'H1':
case 'H2':
case 'H3':
case 'H4':
case 'H5':
case 'H6':
definedIn = 'heading';
break;
default:
if (enclosingEl.classList.contains('note')) {
definedIn = 'note';
}
else if (enclosingEl.classList.contains('example')) {
definedIn = 'example';
}
break;
}
// Compute the absolute URL with fragment
// (Note the crawler merges pages of a multi-page spec in the first page
// to ease parsing logic, and we want to get back to the URL of the page)
const page = el.closest('[data-reffy-page]')?.getAttribute('data-reffy-page');
const url = new URL(page ?? window.location.href);
url.hash = '#' + encodeURIComponent(el.getAttribute('id'));
const href = url.toString();
const dfn = {
// ID is the id attribute
// (ID may not be unique in a multi-page spec)
id: el.getAttribute('id'),
// Absolute URL with fragment
href,
// Linking text is given by the data-lt attribute if present, or it is the
// textual content
linkingText: el.hasAttribute('data-lt') ?
el.getAttribute('data-lt').split('|').map(normalize) :
[normalize(el.textContent)],
// Additional linking text can be defined for local references
localLinkingText: el.getAttribute('data-local-lt') ?
el.getAttribute('data-local-lt').split('|').map(normalize) :
[],
// Link type must be specified, or it is "dfn"
type: el.getAttribute('data-dfn-type') || 'dfn',
// Definition may be namespaced to other constructs. Note the list is not
// purely comma-separated due to function parameters. For instance,
// attribute value may be "method(foo,bar), method()"
for: el.getAttribute('data-dfn-for') ?
el.getAttribute('data-dfn-for').split(/,(?![^\(]*\))/).map(normalize) :
[],
// Definition is public if explicitly marked as exportable or if export has
// not been explicitly disallowed and its type is not "dfn" or a CDDL type,
// or if the spec is an old spec that does not use the "data-dfn-type"
// convention.
access: (!usesDfnDataModel ||
el.hasAttribute('data-export') ||
(!el.hasAttribute('data-noexport') &&
el.hasAttribute('data-dfn-type') &&
el.getAttribute('data-dfn-type') !== 'dfn' &&
!el.getAttribute('data-dfn-type').startsWith('cddl-'))) ?
'public' : 'private',
// Whether the term is defined in a normative/informative section
informative: !!el.closest(informativeSelector),
// Heading under which the term is to be found,
// Defaults to the page or document URL and the spec's title
heading: idToHeading[href] ?? {
href: (new URL(page ?? window.location.href)).toString(),
title: document.title
},
// Enclosing element under which the definition appears. Value can be one of
// "dt", "pre", "table", "heading", "note", "example", or "prose" (last one
// indicates that definition appears in the main body of the specification)
definedIn
};
// Extract a prose definition in HTML for the term, if available
const proseEl = document.querySelector(`[data-defines="#${dfn.id}"]`);
if (proseEl) {
const htmlProse = getHtmlProseDefinition(proseEl);
if (htmlProse) {
dfn.htmlProse = htmlProse;
}
}
return dfn;
}
export default function (spec, idToHeading = {}) {
const definitionsSelector = [
// re data-lt, see https://github.com/w3c/reffy/issues/336#issuecomment-650339747
// As for `<dfn>` we'll consider that headings without a `data-dfn-type`
// have an implicit `"data-dfn-type"="dfn"` attribute, provided they also
// have some other definition related attribute (because we only want to
// extract headings that want to be seen as definitions)
'dfn[id]:not([data-lt=""])',
':is(h2,h3,h4,h5,h6)[id]:is([data-dfn-type],[data-dfn-for],[data-export],[data-noexport],[data-lt]):not([data-lt=""])'
].join(',');
const shortname = (typeof spec === 'string') ? spec : spec.shortname;
switch (shortname) {
case "CSS21":
preProcessCSS21();
break;
case "html":
preProcessHTML();
break;
case "ecmascript":
preProcessEcmascript();
break;
case "SVG2":
preProcessSVG2();
break;
case "rfc8610":
// RFC8610 defines CDDL
preProcessRFC8610();
break;
}
const definitions = [...document.querySelectorAll(definitionsSelector)];
const usesDfnDataModel = definitions.some(dfn =>
dfn.hasAttribute('data-dfn-type') ||
dfn.hasAttribute('data-dfn-for') ||
dfn.hasAttribute('data-export') ||
dfn.hasAttribute('data-noexport'));
return definitions
.map(node => {
// 2021-06-21: Temporary preprocessing of invalid "idl" dfn type (used for
// internal slots) while fix for https://github.com/w3c/respec/issues/3644
// propagates to all EDs and /TR specs. To be dropped once crawls no
// longer produce warnings.
if (node.getAttribute('data-dfn-type') === 'idl') {
const linkingText = node.hasAttribute('data-lt') ?
node.getAttribute('data-lt').split('|').map(normalize) :
[normalize(node.textContent)];
node.setAttribute('data-dfn-type', linkingText[0].endsWith(')') ? 'method' : 'attribute');
console.warn('[reffy]', `Fixed invalid "idl" dfn type "${normalize(node.textContent)}"`);
}
return node;
})
.filter(hasValidType)
// Exclude IDL terms defined in a block that is flagged as to be excluded
// or inside a <del>
.filter(node => !node.closest('.exclude,del'))
// When the whole term links to an external spec, the definition is an
// imported definition. Such definitions are not "real" definitions, let's
// skip them.
// One hardcoded exception-to-the-rule, see:
// https://github.com/w3c/webref/issues/882
// (pending a proper dfns curation process, see:
// https://github.com/w3c/webref/issues/789)
.filter(node => {
const link = node.querySelector('a[href^="http"]');
return !link ||
(node.textContent.trim() !== link.textContent.trim()) ||
(link.href === 'https://www.w3.org/TR/CSS2/syndata.html#vendor-keywords');
})
.map(node => definitionMapper(node, idToHeading, usesDfnDataModel))
.filter(isNotAlreadyExported);
}
function preProcessEcmascript() {
// Skip elements in sections marked as legacy
const legacySectionFilter = n => !n.closest("[legacy]");
const wrapWithDfn = (el) => {
// wrap with a dfn
const dfn = document.createElement("dfn");
for (let child of [...el.childNodes]) {
dfn.appendChild(child);
}
el.appendChild(dfn);
// set id
dfn.setAttribute("id", el.parentNode.getAttribute("id"));
if (el.parentNode.hasAttribute("aoid")) {
dfn.setAttribute("aoid", el.parentNode.getAttribute("aoid"));
}
return dfn;
};
const cleanMethodName = (name) => {
return name.replace(/\[/g, '')
.replace(/\]/g, '') // removing brackets used to mark optional args
.replace(/ \( */, '(')
.replace(/ *\)/, ')')
.replace(/ *,/g, ','); // trimming internal spaces
};
let definitionNames = new Set();
let idlTypes = {};
// We find the list of abstract methods
// to help with scoping abstract operations
let abstractMethods = {};
const abstractMethodCaptions = [...document.querySelectorAll("figcaption")]
.filter(el => el.textContent.match(/(abstract|additional) method/i) && el.parentNode.querySelector("emu-xref"));
for (const figcaption of abstractMethodCaptions) {
let scope = figcaption.querySelector("emu-xref").textContent;
if (scope.endsWith('Environment Records')) {
// Environment records come with an abstract class, and subclasses:
// https://tc39.es/ecma262/multipage/executable-code-and-execution-contexts.html#sec-environment-records
// Methods are defined for each class. We pretend that the scope is the
// abstract class for now. Exact scope will be determined by looking at
// the title of the section under which the method is found.
scope = 'Environment Records';
}
const table = figcaption.parentNode.querySelector("tbody");
for (const td of table.querySelectorAll("tr td:first-child")) {
// We only consider the name of the method, not the potential parameters
// as they're not necessarily consistently named across
// the list and the definition
const methodName = td.textContent.split('(')[0].trim();
abstractMethods[methodName] = scope;
}
}
// Regular expression used to drop section numbers from section titles
const sectionNumberRegExp = /^([A-Z]\.)?[0-9\.]+ /;
// Regular expression that matches scoped methods à la "JSON.parse"
const scopedNameRegExp = /^[a-z0-9]+\.[a-z0-9]+/i;
// Regular expression that matches general unscoped method names à la
// "ArrayCreate (", "ToInt32 (" or "decodeURI (". The expression also matches
// constructors.
const methodNameRegExp = /^([a-z0-9]+)+ *\(/i;
// More specific regular expression that matches abstract operations methods
// à la "ToInt32 (". Does not match "decodeURI (" for instance as it does not
// start with an upper case character.
const abstractOpRegExp = /^[A-Z][a-zA-Z0-9]+ *\(/;
[...document.querySelectorAll("h1")]
.filter(legacySectionFilter)
.forEach(el => {
let dfnName = el.textContent.replace(sectionNumberRegExp, '').trim() ;// remove section number
const dfnId = el.parentNode.id;
if (dfnId.match(/-objects?$/) && dfnName.match(/ Objects?$/)) {
// Skip headings that look like object definitions, but aren't
const notObjectIds = ["sec-global-object", "sec-fundamental-objects", "sec-waiterlist-objects"];
if (notObjectIds.includes(dfnId)) return;
// only keep ids that match a credible pattern for object names
// i.e. a single word
// there are exceptions to that simple rule
// RegExp includes its expansion (regular expansion) in the id
// WeakRef is translated into weak-ref in the id
const objectsIdsExceptions = ["sec-regexp-regular-expression-objects", "sec-weak-ref-objects", "sec-aggregate-error-objects", "sec-finalization-registry-objects", "sec-async-function-objects"];
if (!dfnId.match(/sec-[a-z]+-objects?/)
&& !objectsIdsExceptions.includes(dfnId)
) return;
const dfn = wrapWithDfn(el);
// set data-lt
dfnName = dfnName
.replace(/^The /, '')
.replace(/ Objects?$/, '')
// regexp def includes "(Regular Expression)"
.replace(/ \([^\)]*\)/, '') ;
dfn.dataset.lt = dfnName;
// FIXME
// These interfaces are also defined in WebIDL, which in general is
// the prefered source for these terms
// Because bikeshed does not yet support spec-specific imports,
// we hide these terms as not exported
// cf https://github.com/w3c/reffy/pull/732#issuecomment-925950287
const exportExceptions = [ "Promise", "DataView", "ArrayBuffer" ];
if (exportExceptions.includes(dfnName)) {
dfn.dataset.noexport = "";
}
if (dfnName.match(/^[A-Z]/)) {
// set dfn-type
if (dfnName.match(/Error$/)) {
dfn.dataset.dfnType = "exception";
} else if (!el.parentNode.querySelector('[id$="constructor"]')) {
// Objects without constructors match to the namespace type
dfn.dataset.dfnType = "namespace";
} else {
dfn.dataset.dfnType = "interface";
}
// We keep track of types associated with a name
// to associate the same type to the relevant intrinsic object
// à la %Math%
idlTypes[dfnName] = dfn.dataset.dfnType;
}
definitionNames.add(dfnName);
} else if (dfnId.match(/-[a-z]+error$/) && !dfnName.match(/\(/)) {
const dfn = wrapWithDfn(el);
dfn.dataset.lt = dfnName;
dfn.dataset.dfnType = "exception";
definitionNames.add(dfnName);
idlTypes[dfnName] = dfn.dataset.dfnType;
} else if (dfnId.match(/[-\.]prototype[-\.]/)) {
// methods and attributes on objects
// Skip headings with a space and no parenthesis
// (they mention prototype but aren't a prototype property def)
// with the exception of "set " and "get " headings
// (which describe setters and getters)
if (!dfnName.match(/\(/) && (dfnName.match(/ /) && !dfnName.match(/^[gs]et /))) return;
// Skip unscoped internal methods à la [[SetPrototypeOf]](V)
if (dfnName.match(/\[\[/)) return;
// Skip symbol-based property definitions;
// not clear they're useful as externally referenceable names
if (dfnName.match(/@@/)) return;
// Skip .constructor as that cannot be considered as an attribute
if (dfnName.match(/\.constructor$/)) return;
const dfn = wrapWithDfn(el);
// set definition scope
dfn.dataset.dfnFor = dfnName.replace(/\.prototype\..*/, '')
.replace(/^[gs]et /, ''); // remove "get"/"set" markers
// Remove parent object prototype (set as scope)
dfnName = dfnName.replace(/.*\.prototype\./, '');
dfn.dataset.lt = dfnName;
// set dfn-type
if (dfn.dataset.lt.match(/\(/)) {
dfnName = cleanMethodName(dfnName);
dfn.dataset.lt = dfnName;
dfn.dataset.dfnType = "method";
} else {
dfn.dataset.dfnType = "attribute";
}
} else if (el.closest("#sec-value-properties-of-the-global-object")) {
// properties of the global object
if (el.id !== "#sec-value-properties-of-the-global-object"){
const dfn = wrapWithDfn(el);
dfn.dataset.lt = dfnName;
dfn.dataset.dfnType = "attribute";
dfn.dataset.dfnFor = "globalThis";
}
} else {
// We handle other headings that look like a method / property
// on an object instance (rather than its prototype)
// or an abstract op
// if there is already a dfn element, we move on
if (el.querySelector("dfn")) return;
// only dealing with well-known patterns
if (!dfnName.match(scopedNameRegExp)
&& !dfnName.match(methodNameRegExp)
) return;
// Skip symbol-based property definitions
if (dfnName.match(/@@/)) return;
// Skip .prototype as that cannot be considered
// as an attribute
if (dfnName.match(/\.prototype$/)) return;
// Skip headings where foo.bar appears as part of a longer phrase
if (!dfnName.match(/\(/) && dfnName.match(/ /)) return;
// redundant definitions of constructors on the global object
// e.g. "Array ( . . . )"
if (dfnName.match(/\. \. \./)) return;
const dfn = wrapWithDfn(el);
if (dfnName.match(scopedNameRegExp)) {
// set definition scope
// This assumes that such methods and attributes are only defined
// one-level deep from the global scope
dfn.dataset.dfnFor = dfnName.replace(/\..*$/, '');
dfnName = dfnName.replace(dfn.dataset.dfnFor + ".", '');
if (dfnName.match(/\(/)) {
dfnName = cleanMethodName(dfnName);
dfn.dataset.lt = dfnName;
dfn.dataset.dfnType = "method";
} else {
dfn.dataset.lt = dfnName;
if (dfnName.match(/^[A-Z]+$/)) {
dfn.dataset.dfnType = "const";
} else {
dfn.dataset.dfnType = "attribute";
}
}
} else if (dfnName.match(abstractOpRegExp)) {
dfnName = cleanMethodName(dfnName);
dfn.dataset.lt = dfnName;
const opName = dfnName.split('(')[0];
// distinguish global constructors from abstract operations
if (idlTypes[opName]) {
dfn.dataset.dfnType = "constructor";
dfn.dataset.dfnFor = opName;
} else {
// If the name is listed as an Abstract Method
// we set the dfn-for accordingly
// Note we look for a possibly more specific scope by looking at the
// title of the containing section. This is useful for
// "Environment Records" methods.
if (abstractMethods[opName]) {
const baseClass = abstractMethods[opName];
let parent = dfn.parentNode.closest('emu-clause');
while (parent) {
const title = parent.querySelector('h1')?.textContent.replace(sectionNumberRegExp, '').trim();
if (title?.toLowerCase().endsWith(baseClass.toLowerCase())) {
dfn.dataset.dfnFor = title;
break;
}
parent = parent.parentNode.closest('emu-clause');
}
if (!dfn.dataset.dfnFor) {
dfn.dataset.dfnFor = baseClass;
}
}
if (dfn.getAttribute("aoid")) {
dfn.dataset.lt = dfn.getAttribute("aoid") + '|' + dfn.dataset.lt;
}
dfn.dataset.dfnType = "abstract-op";
}
} else { // methods of the global object
dfnName = cleanMethodName(dfnName);
dfn.dataset.lt = dfnName;
dfn.dataset.dfnType = "method";
dfn.dataset.dfnFor = "globalThis";
}
definitionNames.add(dfnName);
}
});
// Extract abstract operations from <emu-eqn> with aoid attribute
[...document.querySelectorAll("emu-eqn[aoid]")]
.filter(legacySectionFilter)
.forEach(el => {
// Skip definitions of constant values (e.g. msPerDay)
if (el.textContent.match(/=/)) return;
const dfn = wrapWithDfn(el);
dfn.dataset.lt = el.getAttribute("aoid");
dfn.dataset.dfnType = "abstract-op";
dfn.id = el.id;
});
// Extract State Components from tables
[...document.querySelectorAll("figure > table")]
.filter(legacySectionFilter)
.forEach(el => {
const title = el.parentNode.querySelector("figcaption")?.textContent || "";
if (!title.match(/state components for/i)) return;
const scope = title.replace(/^.*state components for/i, '').trim();
for (const td of el.querySelectorAll("tr td:first-child")) {
const dfn = wrapWithDfn(td);
dfn.dataset.dfnFor = scope;
dfn.id = el.closest("emu-table[id],emu-clause[id]").id;
}
});
[...document.querySelectorAll("dfn")]
.filter(legacySectionFilter)
.forEach(el => {
// Skip definitions in conformance page and conventions page
if (el.closest('section[data-reffy-page$="conformance.html"]') ||
el.closest('section[data-reffy-page$="notational-conventions.html"]')) {
el.removeAttribute("id");
return;
}
// rely on the aoid attribute as a hint we're dealing
// with an abstract-op
if (el.getAttribute("aoid")) {
el.dataset.dfnType = "abstract-op";
}
// Mark well-known intrinsic objects as the same type as their visible object (if set), defaulting to "interface"
if (el.textContent.match(/^%[A-Z].*%$/)) {
el.dataset.dfnType = idlTypes[el.textContent.replace(/%/g, '')] || "interface";
definitionNames.add(el.textContent.trim());
}
// %names% in the global object section are operations of the globalThis object
if (el.closest('[data-reffy-page$="global-object.html"]') && el.textContent.match(/^%[a-z]+%/i)) {
el.dataset.dfnFor = "globalThis";
// TODO: this doesn't capture the arguments
el.dataset.dfnType = "method";
}
// Mark well-known symbols as "const"
// for lack of a better type, and as the WebIDL spec has been doing
if (el.textContent.match(/^@@[a-z]*$/i)) {
el.dataset.dfnType = "const";
}
if (el.getAttribute("variants")) {
el.dataset.lt = (el.dataset.lt ?? el.textContent.trim()) + "|" + el.getAttribute("variants");
}
// Skip definitions that have already been identified
// with a more specific typing
if (!el.dataset.dfnType) {
// we already have a matching typed definition
if (definitionNames.has(el.textContent.trim())) return;
}
// If the <dfn> has no id, we attach it the one from the closest
// <emu-clause> with an id
// Note that this means several definitions can share the same id
if (!el.getAttribute("id")) {
if (el.closest("emu-clause[id]")) {
el.setAttribute("id", el.closest("emu-clause").getAttribute("id"));
}
}
// Any generic <dfn> not previously filtered out
// is deemed to be exported, scoped to ECMAScript
if (!el.dataset.dfnType) {
if (!el.dataset.dfnFor) {
el.dataset.dfnFor = "ECMAScript";
}
el.dataset.export = "";
}
});
// Another pass of clean up for duplicates
// This cannot be done in the first pass
// because %Foo.prototype% does not necessarily get identified before
// the equivalent " prototype object" dfn
[...document.querySelectorAll("dfn[id][data-export]")]
.filter(legacySectionFilter)
.forEach(dfn => {
// we have the syntactic equivalent %x.prototype%
let m = dfn.textContent.trim().match(/^(.*) prototype( object)?$/);
if (m && definitionNames.has(`%${m[1].trim()}.prototype%`)) {
dfn.removeAttribute("id");
delete dfn.dataset.export;
return;
}
});
}
function preProcessHTML() {
const headingSelector = ':is(h2,h3,h4,h5,h6)[id]:not(:is([data-dfn-type],[data-dfn-for],[data-export],[data-noexport],[data-lt])) dfn';
// we copy the id on the dfn when it is set on the surrounding heading
document.querySelectorAll(headingSelector)
.forEach(el => {
const headingId = el.closest("h2, h3, h4, h5, h6").id;
if (!el.id) {
el.id = headingId;
}
});
}
function preProcessCSS21() {
document.querySelectorAll('span.index-def')
.forEach(span => {
// Definition ID is to be found in a nearby anchor
const anchor = span.querySelector('a[name]') ?? span.closest('a[name]');
if (!anchor) {
return;
}
// Once in a while, definition has a "<dfn>", and once in a while, that
// "<dfn>" already follows the dfn data model.
let dfn = span.querySelector('dfn') ?? span.closest('dfn');
if (dfn?.id) {
return;
}
// No "<dfn>"? Let's create it
if (!dfn) {
dfn = document.createElement('dfn');
for (let child of [...span.childNodes]) {
dfn.appendChild(child);
}
span.appendChild(dfn);
}
// Complete the "<dfn>" with expected attributes
dfn.id = anchor.getAttribute('name');
dfn.dataset.export = '';
// Drop suffixes such "::definition of" and wrapping quotes,
// and drop possible duplicates
dfn.dataset.lt = (span.getAttribute('title') ?? dfn.textContent).split('|')
.map(normalize)
.map(text => text.replace(/::definition of$/, '')
.replace(/, definition of$/, '')
.replace(/^'(.*)'$/, '$1'))
.filter((text, idx, array) => array.indexOf(text) === idx)
.join('|');
let dfnType = null;
switch (anchor.getAttribute('class') ?? '') {
case 'propdef-title':
dfnType = 'property';
break;
case 'value-def':
if (dfn.dataset.lt.match(/^<.*>$/)) {
dfnType = 'type';
}
else {
dfnType = 'value';
}
break;
}
if (dfnType) {
dfn.dataset.dfnType = dfnType;
}
});
}
function preProcessSVG2() {
const idl = extractWebIdl();
const idlTree = parse(idl);
const idlInterfaces = idlTree.filter(item => item.type === "interface" || item.type === "interface mixin");
// the only element definition not properly marked up in the SVG spec
const linkHeading = document.getElementById("LinkElement");
if (linkHeading && !linkHeading.dataset.dfnType) {
linkHeading.dataset.dfnType = "element";
linkHeading.dataset.lt = "link";
}
document.querySelectorAll(".attrdef dfn[id]:not([data-dfn-type]):not([data-skip])")
.forEach(el => {
el.dataset.dfnType = "element-attr";
const attrDesc = document.querySelector('[data-reffy-page$="attindex.html"] th span.attr-name a[href$="#' + el.id + '"]');
if (attrDesc) {
el.dataset.dfnFor = attrDesc.closest('tr').querySelector('td').textContent;
} else {
console.error("Could not find description for " + el.textContent);
}
});
document.querySelectorAll("dt[id] > .adef, dt[id] > .property")
.forEach(el => {
const dt = el.parentNode;
const newDt = document.createElement("dt");
const dfn = document.createElement("dfn");
dfn.id = dt.id;
dfn.dataset.dfnType = el.classList.contains("adef") ? "element-attr" : "property";
const indexPage = el.classList.contains("adef") ? "attindex.html" : "propidx.html";
const attrDesc = document.querySelector('[data-reffy-page$="' + indexPage + '"] th a[href$="#' + dfn.id + '"]');
if (attrDesc) {
// TODO: this doesn't deal with grouping of elements, e.g. "text content elements"
dfn.dataset.dfnFor = [...attrDesc.closest('tr').querySelectorAll('span.element-name a')].map (n => n.textContent).join(',');
} else {
console.error("Could not find description for " + el.textContent + "/" + dfn.id);
}
dfn.textContent = el.textContent;
newDt.appendChild(dfn);
dt.replaceWith(newDt);
});
document.querySelectorAll('b[id^="__svg__"]').forEach(el => {
const [,, containername, membername] = el.id.split('__');
if (containername && membername) {
let container = idlTree.find(i => i.name === containername);
if (container) {
let member = container.members.find(m => m.name === membername);
if (member) {
const dfn = document.createElement("dfn");
dfn.id = el.id;
dfn.textContent = el.textContent;
dfn.dataset.dfnFor = containername;
dfn.dataset.dfnType = member.type === "operation" ? "method" : member.type;
el.replaceWith(dfn);
}
}
}
});
document.querySelectorAll('h3[id^="Interface"]:not([data-dfn-type])').forEach(el => {
const name = el.id.slice("Interface".length);
if (idlTree.find(i => i.name === name && i.type === "interface")) {
el.dataset.dfnType = "interface";
el.dataset.lt = name;
}
});
document.querySelectorAll('b[id]:not([data-dfn-type])').forEach(el => {
const name = el.textContent;
const idlItem = idlTree.find(i => i.name === name) ;
if (idlItem) {
const dfn = document.createElement("dfn");
dfn.id = el.id;
dfn.dataset.dfnType = idlItem.type;
dfn.textContent = el.textContent;
el.replaceWith(dfn);
}
});
}
/**
* The CDDL RFC defines a standard prelude with a number of CDDL types that
* other specs that define CDDL make extensive use of. To be able to link back
* to these type definitions from other specs, we need these types to appear
* in the dfns extract of the RFC somehow.
*
* Now, the RFC only defines one ID for the appendix that contains the
* standard prelude. We need to "share" that ID across all types. To avoid
* introducing definitions that have the same ID and href, which could perhaps
* confuse tools that ingest the definitions, the approach taken here is to
* create a single definition that contains all the types as linking text.
*/
function preProcessRFC8610() {
// The RFC is defined as a set of pages (yuck!)
// The standard prelude is an appendix, let's look for it
const prePages = [...document.querySelectorAll('pre.newpage')];
const preludeStart = /<a [^>]*id=[^>]*>Appendix .<\/a>\.\s+Standard Prelude/;
const preludeEnd = /Figure \d+: CDDL Prelude/;
const preStart = prePages
.findIndex(pre => pre.innerHTML.match(preludeStart));
if (preStart === -1) {
// Can't find the expected prelude start text, not a good start!
return;
}
const preEnd = prePages
.findIndex((pre, idx) => idx >= preStart && pre.innerHTML.match(preludeEnd));
if (preEnd === -1) {
// Can't find the expected prelude ending text, not a good start!
return;
}
// Extract the list of types defined in the appendix
const preludeTypes = prePages.slice(preStart, preEnd + 1)
.map(pre => [...pre.innerHTML.matchAll(/^\s+([a-z0-9\-]+) = .*$/mg)]
.map(m => m[1])
)
.flat();
// Convert the appendix heading into a cddl-type definition that lists
// all CDDL types.
const el = prePages[preStart].querySelector(`a[id]`);
const dfn = document.createElement("dfn");
dfn.id = el.id;
dfn.dataset.dfnType = 'cddl-type';
dfn.dataset.lt = preludeTypes.join('|');
dfn.dataset.export = '';
dfn.textContent = el.textContent;
el.replaceWith(dfn);
}