Skip to content

Commit 3f22486

Browse files
authored
Merge pull request #1034 from ClickHouse/json_string_column
feat: json string column
2 parents 71e1bb8 + 1353ef1 commit 3f22486

11 files changed

+330
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
00000000 34 39 ad b3 8d 96 d2 87 bb 3b aa 1e 3f 4b 64 f5 |49.......;..?Kd.|
2+
00000010 82 1d 00 00 00 af 00 00 00 8f 48 65 6c 6c 6f 21 |..........Hello!|
3+
00000020 0a 48 07 00 8d 70 48 65 6c 6c 6f 21 0a |.H...pHello!.|
45 Bytes
Binary file not shown.

proto/_golden/col_json_str.hex

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5|
2+
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":|
3+
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t|
4+
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob|
5+
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"|
6+
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}|

proto/_golden/col_json_str.raw

88 Bytes
Binary file not shown.

proto/_golden/col_json_str_bytes.hex

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5|
2+
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":|
3+
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t|
4+
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob|
5+
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"|
6+
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}|

proto/_golden/col_json_str_bytes.raw

88 Bytes
Binary file not shown.
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5|
2+
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":|
3+
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t|
4+
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob|
5+
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"|
6+
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}|
88 Bytes
Binary file not shown.

proto/col_json_str.go

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package proto
2+
3+
import (
4+
"github.com/go-faster/errors"
5+
)
6+
7+
const JSONStringSerializationVersion uint64 = 1
8+
9+
// ColJSONStr represents String column.
10+
//
11+
// Use ColJSONBytes for []bytes ColumnOf implementation.
12+
type ColJSONStr struct {
13+
str ColStr
14+
}
15+
16+
// Append string to column.
17+
func (c *ColJSONStr) Append(v string) {
18+
c.str.Append(v)
19+
}
20+
21+
// AppendBytes append byte slice as string to column.
22+
func (c *ColJSONStr) AppendBytes(v []byte) {
23+
c.str.AppendBytes(v)
24+
}
25+
26+
func (c *ColJSONStr) AppendArr(v []string) {
27+
c.str.AppendArr(v)
28+
}
29+
30+
// Compile-time assertions for ColJSONStr.
31+
var (
32+
_ ColInput = ColJSONStr{}
33+
_ ColResult = (*ColJSONStr)(nil)
34+
_ Column = (*ColJSONStr)(nil)
35+
_ ColumnOf[string] = (*ColJSONStr)(nil)
36+
_ Arrayable[string] = (*ColJSONStr)(nil)
37+
)
38+
39+
// Type returns ColumnType of JSON.
40+
func (ColJSONStr) Type() ColumnType {
41+
return ColumnTypeJSON
42+
}
43+
44+
// Rows returns count of rows in column.
45+
func (c ColJSONStr) Rows() int {
46+
return c.str.Rows()
47+
}
48+
49+
// Reset resets data in row, preserving capacity for efficiency.
50+
func (c *ColJSONStr) Reset() {
51+
c.str.Reset()
52+
}
53+
54+
// EncodeColumn encodes String rows to *Buffer.
55+
func (c ColJSONStr) EncodeColumn(b *Buffer) {
56+
b.PutUInt64(JSONStringSerializationVersion)
57+
58+
c.str.EncodeColumn(b)
59+
}
60+
61+
// WriteColumn writes JSON rows to *Writer.
62+
func (c ColJSONStr) WriteColumn(w *Writer) {
63+
w.ChainBuffer(func(b *Buffer) {
64+
b.PutUInt64(JSONStringSerializationVersion)
65+
})
66+
67+
c.str.WriteColumn(w)
68+
}
69+
70+
// ForEach calls f on each string from column.
71+
func (c ColJSONStr) ForEach(f func(i int, s string) error) error {
72+
return c.str.ForEach(f)
73+
}
74+
75+
// First returns the first row of the column.
76+
func (c ColJSONStr) First() string {
77+
return c.str.First()
78+
}
79+
80+
// Row returns row with number i.
81+
func (c ColJSONStr) Row(i int) string {
82+
return c.str.Row(i)
83+
}
84+
85+
// RowBytes returns row with number i as byte slice.
86+
func (c ColJSONStr) RowBytes(i int) []byte {
87+
return c.str.RowBytes(i)
88+
}
89+
90+
// ForEachBytes calls f on each string from column as byte slice.
91+
func (c ColJSONStr) ForEachBytes(f func(i int, b []byte) error) error {
92+
return c.str.ForEachBytes(f)
93+
}
94+
95+
// DecodeColumn decodes String rows from *Reader.
96+
func (c *ColJSONStr) DecodeColumn(r *Reader, rows int) error {
97+
jsonSerializationVersion, err := r.UInt64()
98+
if err != nil {
99+
return errors.Wrap(err, "failed to read json serialization version")
100+
}
101+
102+
if jsonSerializationVersion != JSONStringSerializationVersion {
103+
return errors.Errorf("received invalid JSON string serialization version %d. Setting \"output_format_native_write_json_as_string\" must be enabled.", jsonSerializationVersion)
104+
}
105+
106+
return c.str.DecodeColumn(r, rows)
107+
}
108+
109+
// LowCardinality returns LowCardinality(JSON).
110+
func (c *ColJSONStr) LowCardinality() *ColLowCardinality[string] {
111+
return c.str.LowCardinality()
112+
}
113+
114+
// Array is helper that creates Array(JSON).
115+
func (c *ColJSONStr) Array() *ColArr[string] {
116+
return c.str.Array()
117+
}
118+
119+
// Nullable is helper that creates Nullable(JSON).
120+
func (c *ColJSONStr) Nullable() *ColNullable[string] {
121+
return c.str.Nullable()
122+
}
123+
124+
// ColJSONBytes is ColJSONStr wrapper to be ColumnOf for []byte.
125+
type ColJSONBytes struct {
126+
ColJSONStr
127+
}
128+
129+
// Row returns row with number i.
130+
func (c ColJSONBytes) Row(i int) []byte {
131+
return c.RowBytes(i)
132+
}
133+
134+
// Append byte slice to column.
135+
func (c *ColJSONBytes) Append(v []byte) {
136+
c.AppendBytes(v)
137+
}
138+
139+
// AppendArr append slice of byte slices to column.
140+
func (c *ColJSONBytes) AppendArr(v [][]byte) {
141+
for _, s := range v {
142+
c.Append(s)
143+
}
144+
}
145+
146+
// Array is helper that creates Array(JSON).
147+
func (c *ColJSONBytes) Array() *ColArr[[]byte] {
148+
return &ColArr[[]byte]{
149+
Data: c,
150+
}
151+
}
152+
153+
// Nullable is helper that creates Nullable(JSON).
154+
func (c *ColJSONBytes) Nullable() *ColNullable[[]byte] {
155+
return &ColNullable[[]byte]{
156+
Values: c,
157+
}
158+
}

proto/col_json_str_test.go

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package proto
2+
3+
import (
4+
"bytes"
5+
"io"
6+
"testing"
7+
8+
"github.com/stretchr/testify/require"
9+
10+
"github.com/ClickHouse/ch-go/internal/gold"
11+
)
12+
13+
var testJSONValues = []string{
14+
"{\"x\": 5, \"y\": 10}",
15+
"{\"a\": \"test\", \"b\": \"test2\"}",
16+
"{\"a\": \"obj test\", \"b\": {\"c\": 20}}",
17+
}
18+
19+
func TestColJSONBytes(t *testing.T) {
20+
testColumn(t, "json_bytes", func() ColumnOf[[]byte] {
21+
return new(ColJSONBytes)
22+
}, []byte(testJSONValues[0]), []byte(testJSONValues[1]), []byte(testJSONValues[2]))
23+
}
24+
25+
func TestColJSONStr_AppendBytes(t *testing.T) {
26+
var data ColJSONStr
27+
28+
data.AppendBytes([]byte(testJSONValues[0]))
29+
data.AppendBytes([]byte(testJSONValues[1]))
30+
data.AppendBytes([]byte(testJSONValues[2]))
31+
32+
var buf Buffer
33+
data.EncodeColumn(&buf)
34+
35+
t.Run("Golden", func(t *testing.T) {
36+
gold.Bytes(t, buf.Buf, "col_json_str_bytes")
37+
})
38+
t.Run("Ok", func(t *testing.T) {
39+
br := bytes.NewReader(buf.Buf)
40+
r := NewReader(br)
41+
42+
var dec ColJSONStr
43+
require.NoError(t, dec.DecodeColumn(r, 3))
44+
require.Equal(t, data, dec)
45+
46+
t.Run("ForEach", func(t *testing.T) {
47+
var output []string
48+
f := func(i int, s string) error {
49+
output = append(output, s)
50+
return nil
51+
}
52+
require.NoError(t, dec.ForEach(f))
53+
require.Equal(t, testJSONValues, output)
54+
})
55+
})
56+
}
57+
58+
func TestColJSONStr_EncodeColumn(t *testing.T) {
59+
var data ColJSONStr
60+
61+
input := testJSONValues
62+
rows := len(input)
63+
for _, s := range input {
64+
data.Append(s)
65+
}
66+
67+
var buf Buffer
68+
data.EncodeColumn(&buf)
69+
70+
t.Run("Golden", func(t *testing.T) {
71+
gold.Bytes(t, buf.Buf, "col_json_str")
72+
})
73+
t.Run("Ok", func(t *testing.T) {
74+
br := bytes.NewReader(buf.Buf)
75+
r := NewReader(br)
76+
77+
var dec ColJSONStr
78+
require.NoError(t, dec.DecodeColumn(r, rows))
79+
require.Equal(t, data, dec)
80+
81+
t.Run("ForEach", func(t *testing.T) {
82+
var output []string
83+
f := func(i int, s string) error {
84+
output = append(output, s)
85+
return nil
86+
}
87+
require.NoError(t, dec.ForEach(f))
88+
require.Equal(t, input, output)
89+
})
90+
})
91+
t.Run("EOF", func(t *testing.T) {
92+
r := NewReader(bytes.NewReader(nil))
93+
94+
var dec ColJSONStr
95+
require.ErrorIs(t, dec.DecodeColumn(r, rows), io.EOF)
96+
})
97+
}
98+
99+
func BenchmarkColJSONStr_DecodeColumn(b *testing.B) {
100+
const rows = 1_000
101+
var data ColJSONStr
102+
for i := 0; i < rows; i++ {
103+
data.Append("{\"x\": 5}")
104+
}
105+
106+
var buf Buffer
107+
data.EncodeColumn(&buf)
108+
109+
br := bytes.NewReader(buf.Buf)
110+
r := NewReader(br)
111+
112+
var dec ColJSONStr
113+
if err := dec.DecodeColumn(r, rows); err != nil {
114+
b.Fatal(err)
115+
}
116+
117+
b.SetBytes(int64(len(buf.Buf)))
118+
b.ResetTimer()
119+
b.ReportAllocs()
120+
121+
for i := 0; i < b.N; i++ {
122+
br.Reset(buf.Buf)
123+
r.raw.Reset(br)
124+
dec.Reset()
125+
126+
if err := dec.DecodeColumn(r, rows); err != nil {
127+
b.Fatal(err)
128+
}
129+
}
130+
}
131+
132+
func BenchmarkColJSONStr_EncodeColumn(b *testing.B) {
133+
const rows = 1_000
134+
var data ColJSONStr
135+
for i := 0; i < rows; i++ {
136+
data.Append("{\"x\": 5}")
137+
}
138+
139+
var buf Buffer
140+
data.EncodeColumn(&buf)
141+
142+
b.SetBytes(int64(len(buf.Buf)))
143+
b.ResetTimer()
144+
b.ReportAllocs()
145+
146+
for i := 0; i < b.N; i++ {
147+
buf.Reset()
148+
data.EncodeColumn(&buf)
149+
}
150+
}

proto/column.go

+1
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ const (
250250
ColumnTypePoint ColumnType = "Point"
251251
ColumnTypeInterval ColumnType = "Interval"
252252
ColumnTypeNothing ColumnType = "Nothing"
253+
ColumnTypeJSON ColumnType = "JSON"
253254
)
254255

255256
// colWrap wraps Column with type t.

0 commit comments

Comments
 (0)