Skip to content

Commit 068b78c

Browse files
committed
implement json curly bracket attributes (for v0.2.0)
1 parent 29db9c1 commit 068b78c

File tree

3 files changed

+142
-8
lines changed

3 files changed

+142
-8
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
This is a fork of part of the golang.org/x/net/html package.
22

3+
## v0.2.0
4+
5+
For v0.2.0 we made a more radical change to the [tokenizer](https://pkg.go.dev/golang.org/x/net/html#Tokenizer) package.
6+
7+
We added a new syntax to allow attributes to be set with '{}' syntax.
8+
Any valid JSON expression is allowed within the curly brackets (this more
9+
closely matches JSX syntax).
10+
11+
```
12+
<div data-num={5}></div>
13+
```
14+
15+
To support proper decoding in the client, attributes now have a an `IsJson bool` field
16+
which is set to true if an attribute was parsed with the new {} syntax.
17+
18+
If you only need the case-sensitive tokenization for tags/attributes it is
19+
recommended to use v0.1.0 and not v0.2.0.
20+
21+
## v0.1.0
22+
323
It is not a complete fork as we only want to modify and change https://pkg.go.dev/golang.org/x/net/html#Tokenizer. So this is the minimal amount of code to get html.Tokenizer working.
424

525
The reason for the fork is to allow for returning of case-sensitive tag names and attribute names. The current package normalizes the tag names and attribute names by calling (the equivalent of) strings.ToLower on them before returning them to the caller. We made a very small two line change in token.go to remove those ToLower calls. Other changes involve copying enough code from other files to get all the dependencies satisfied and get it compling again.

parsebraceattr.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Copyright 2024 Command Line Inc. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package htmltoken
6+
7+
import "fmt"
8+
9+
func (z *Tokenizer) parseBraceAttr() {
10+
braceCount := 1
11+
inString := false
12+
prevStrBackslash := false
13+
14+
z.pendingAttr[1].start = z.raw.end
15+
for {
16+
ch := z.readByte()
17+
if z.err != nil {
18+
z.pendingAttr[1].end = z.raw.end
19+
return
20+
}
21+
if inString {
22+
if prevStrBackslash {
23+
prevStrBackslash = false
24+
continue
25+
}
26+
if ch == '\\' {
27+
prevStrBackslash = true
28+
continue
29+
}
30+
if ch == '"' {
31+
inString = false
32+
continue
33+
}
34+
continue
35+
}
36+
if ch == '{' {
37+
braceCount++
38+
continue
39+
}
40+
if ch == '"' {
41+
inString = true
42+
continue
43+
}
44+
if ch == '}' {
45+
braceCount--
46+
if braceCount == 0 {
47+
z.pendingAttr[1].end = z.raw.end - 1
48+
return
49+
}
50+
continue
51+
}
52+
}
53+
}
54+
55+
func (z *Tokenizer) parseBraceAttrEx(input string) (string, error) {
56+
var result []rune
57+
braceCount := 0
58+
inString := false
59+
60+
for i := 0; i < len(input); i++ {
61+
ch := rune(input[i])
62+
63+
if inString {
64+
// Handle string escape sequences
65+
if ch == '\\' && i+1 < len(input) {
66+
result = append(result, ch, rune(input[i+1]))
67+
i++
68+
continue
69+
}
70+
if ch == '"' {
71+
inString = false
72+
}
73+
result = append(result, ch)
74+
continue
75+
}
76+
77+
switch ch {
78+
case '{':
79+
braceCount++
80+
case '}':
81+
braceCount--
82+
if braceCount == 0 {
83+
return string(result), nil
84+
}
85+
case '"':
86+
inString = true
87+
}
88+
89+
result = append(result, ch)
90+
}
91+
92+
if braceCount != 0 {
93+
return "", fmt.Errorf("unbalanced braces")
94+
}
95+
96+
return string(result), nil
97+
}

token.go

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// and modified to be used in the vdom package
77
// we are producing a JSX-like parser, which requires us to have case sensitivity for attributes and tags
88
// the sole changes in this package are to remove the lower() calls.
9+
// modifications are marked with a comment starting with "MOD"
910

1011
package htmltoken
1112

@@ -72,6 +73,7 @@ func (t TokenType) String() string {
7273
// Namespace is only used by the parser, not the tokenizer.
7374
type Attribute struct {
7475
Namespace, Key, Val string
76+
IsJson bool // MOD - added to support json attributes
7577
}
7678

7779
// A Token consists of a TokenType and some Data (tag name for start and end
@@ -297,9 +299,11 @@ type Tokenizer struct {
297299
// pendingAttr is the attribute key and value currently being tokenized.
298300
// When complete, pendingAttr is pushed onto attr. nAttrReturned is
299301
// incremented on each call to TagAttr.
300-
pendingAttr [2]span
301-
attr [][2]span
302-
nAttrReturned int
302+
pendingAttr [2]span
303+
pendingAttrIsJson bool // MOD - added to support json attributes
304+
attr [][2]span
305+
jsonAttr []bool // MOD - added to support json attributes (made parallel to reduce code changes)
306+
nAttrReturned int
303307
// rawTag is the "script" in "</script>" that closes the next token. If
304308
// non-empty, the subsequent call to Next will return a raw or RCDATA text
305309
// token: one that treats "<p>" as text instead of an element.
@@ -995,6 +999,7 @@ func (z *Tokenizer) readStartTag() TokenType {
995999
// in [A-Za-z].
9961000
func (z *Tokenizer) readTag(saveAttr bool) {
9971001
z.attr = z.attr[:0]
1002+
z.jsonAttr = z.jsonAttr[:0]
9981003
z.nAttrReturned = 0
9991004
// Read the tag name and attribute key/value pairs.
10001005
z.readTagName()
@@ -1006,12 +1011,14 @@ func (z *Tokenizer) readTag(saveAttr bool) {
10061011
if z.err != nil || c == '>' {
10071012
break
10081013
}
1014+
z.pendingAttrIsJson = false
10091015
z.raw.end--
10101016
z.readTagAttrKey()
10111017
z.readTagAttrVal()
10121018
// Save pendingAttr if saveAttr and that attribute has a non-empty key.
10131019
if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
10141020
z.attr = append(z.attr, z.pendingAttr)
1021+
z.jsonAttr = append(z.jsonAttr, z.pendingAttrIsJson)
10151022
}
10161023
if z.skipWhiteSpace(); z.err != nil {
10171024
break
@@ -1116,6 +1123,12 @@ func (z *Tokenizer) readTagAttrVal() {
11161123
}
11171124
}
11181125

1126+
case '{':
1127+
// MOD -- added support for brace-enclosed JSON attributes
1128+
z.pendingAttrIsJson = true
1129+
z.parseBraceAttr()
1130+
return
1131+
11191132
default:
11201133
z.pendingAttr[1].start = z.raw.end - 1
11211134
for {
@@ -1345,20 +1358,22 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
13451358
// TagAttr returns the lower-cased key and unescaped value of the next unparsed
13461359
// attribute for the current tag token and whether there are more attributes.
13471360
// The contents of the returned slices may change on the next call to Next.
1348-
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
1361+
// MOD -- added isJson bool return value
1362+
func (z *Tokenizer) TagAttr() (key, val []byte, isJson bool, moreAttr bool) {
13491363
if z.nAttrReturned < len(z.attr) {
13501364
switch z.tt {
13511365
case StartTagToken, SelfClosingTagToken:
13521366
x := z.attr[z.nAttrReturned]
1367+
isJson := z.jsonAttr[z.nAttrReturned]
13531368
z.nAttrReturned++
13541369
key = z.buf[x[0].start:x[0].end]
13551370
val = z.buf[x[1].start:x[1].end]
13561371
// MOD -- remove lower(s)
1357-
return key, unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
1372+
return key, unescape(convertNewlines(val), true), isJson, z.nAttrReturned < len(z.attr)
13581373
// return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
13591374
}
13601375
}
1361-
return nil, nil, false
1376+
return nil, nil, false, false
13621377
}
13631378

13641379
// Token returns the current Token. The result's Data and Attr values remain
@@ -1372,8 +1387,10 @@ func (z *Tokenizer) Token() Token {
13721387
name, moreAttr := z.TagName()
13731388
for moreAttr {
13741389
var key, val []byte
1375-
key, val, moreAttr = z.TagAttr()
1376-
t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
1390+
var isJson bool
1391+
// MOD -- added isJson
1392+
key, val, isJson, moreAttr = z.TagAttr()
1393+
t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val), isJson})
13771394
}
13781395
if a := atom.Lookup(name); a != 0 {
13791396
t.DataAtom, t.Data = a, a.String()

0 commit comments

Comments
 (0)