Skip to content

Commit

Permalink
Slight improve on numeric parser perf
Browse files Browse the repository at this point in the history
Befor:
```
BenchmarkNumberType/no-num
BenchmarkNumberType/no-num-10         	  332910	      3478 ns/op	  69.87 MB/s	    3162 B/op	      45 allocs/op
BenchmarkNumberType/num
BenchmarkNumberType/num-10            	  313408	      3839 ns/op	  63.29 MB/s	    3555 B/op	      64 allocs/op
```

After
```
BenchmarkNumberType/no-num
BenchmarkNumberType/no-num-10         	  345669	      3483 ns/op	  69.78 MB/s	    3162 B/op	      45 allocs/op
BenchmarkNumberType/num
BenchmarkNumberType/num-10            	  328444	      3728 ns/op	  65.18 MB/s	    3426 B/op	      57 allocs/op
```

---

Pull Request resolved: #195
commit_hash:8025beb33e4613a407790818f282571d3f29446b
  • Loading branch information
laskoviymishka authored and robot-piglet committed Feb 4, 2025
1 parent d69e1f0 commit 79a785d
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 1 deletion.
3 changes: 2 additions & 1 deletion pkg/parsers/generic/generic_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,8 @@ func wrapIntoEmptyInterface(v *fastjson.Value, useNumbers bool) interface{} {
return v.GetBool()
case fastjson.TypeNumber:
if useNumbers {
return json.Number(v.MarshalTo(nil))
// v.String do wierd magic trick that prevent escape string to heap
return json.Number(v.String())
}
return v.GetFloat64()
default:
Expand Down
85 changes: 85 additions & 0 deletions pkg/parsers/generic/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,91 @@ func makePersqueueReadMessage(i int, rawLine string) parsers.Message {
}
}

func BenchmarkNumberType(b *testing.B) {
rawLines := strings.Split(string(parserTestNumbers), "\n")

fields := []abstract.ColSchema{
{
ColumnName: "id",
DataType: schema.TypeInt8.String(),
},
{
ColumnName: "number_field",
DataType: schema.TypeInt64.String(),
},
{
ColumnName: "float_field",
DataType: schema.TypeFloat64.String(),
},
{
ColumnName: "obj_field",
DataType: schema.TypeAny.String(),
},
{
ColumnName: "array_field",
DataType: schema.TypeAny.String(),
},
}

parserConfig := &GenericParserConfig{
Format: "json",
SchemaResourceName: "",
Fields: fields,
AuxOpts: AuxParserOpts{
Topic: "my_topic_name",
},
}
parser := NewGenericParser(parserConfig, fields, logger.Log, stats.NewSourceStats(metrics.NewRegistry().WithTags(map[string]string{"id": "TestParserNumberTypes"})))

parserConfigUseNumbers := &GenericParserConfig{
Format: "json",
SchemaResourceName: "",
Fields: fields,
AuxOpts: AuxParserOpts{
Topic: "my_topic_name",
UseNumbersInAny: true,
},
}
parserWithNumbers := NewGenericParser(parserConfigUseNumbers, fields, logger.Log, stats.NewSourceStats(metrics.NewRegistry().WithTags(map[string]string{"id": "TestParserNumberTypes"})))

b.Run("no-num", func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
size := int64(0)
for i, line := range rawLines {
if line == "" {
continue
}
msg := makePersqueueReadMessage(i, line)

result := parser.Do(msg, abstract.Partition{Cluster: "", Partition: 0, Topic: ""})
require.True(b, len(result) > 0)
size += int64(len(msg.Value))
}
b.SetBytes(size)
}
b.ReportAllocs()
})
b.Run("num", func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
size := int64(0)
for i, line := range rawLines {
if line == "" {
continue
}
msg := makePersqueueReadMessage(i, line)

result := parserWithNumbers.Do(msg, abstract.Partition{Cluster: "", Partition: 0, Topic: ""})
require.True(b, len(result) > 0)
size += int64(len(msg.Value))
}
b.SetBytes(size)
}
b.ReportAllocs()
})
}

func TestParserNumberTypes(t *testing.T) {
rawLines := strings.Split(string(parserTestNumbers), "\n")

Expand Down

0 comments on commit 79a785d

Please sign in to comment.