Skip to content
This repository was archived by the owner on Jul 19, 2023. It is now read-only.

Commit ebc3e04

Browse files
kolesnikovaecyriltovenasimonswine
authored
Introduce symdb (#767)
* Increase parquet writer PageBufferSize * reduce by 2 page buffer size * Introduce symdb * Add chunk format description * Add chunk format description * Improve naming * Implement stack trace appender * Limit chunk by number of nodes * Stacktrace ID is uint32 * Add in-memory stacktrace resolver * Add writer * Add writer * Fix stacktrace resolver * Single pass write * Index file refactoring * Fixes, improvements, notes * Ignore empty stacktraces * Fix chunk boundary check * Fix tests * Store chunk headers sorted * Make chunk index explicit * Add file reader * Use group varint encoding * Refine stacktrace tree * Stacktrace tree race condition elimination * Remove unused stacktracesResolve.do * Better nil coalescence in stack trace appender * Format imports * Use the new symDB package (#770) * Ingest stacktraces in the new symdb * Setup read in memory read path * Fix up a comment placement * Start setting up the read path * Update to uint32 * Introduce stacktrace partition (#775) * Introduce stacktrace partition This determines the partition of a particular profile, by looking first at its metadata: * If there is a `Filename` on the main mapping use its filepath.Base(Filename) * Failing that take the externally supplied `service_name` * Fallback to `unknown` Take the underlying string value and hash. * After a chat with cyril we decided to not longer mod and use the hash straight away. We don't wanted to risk the collisions of two very big stacktrace applications. * Remove reconstructMeta from singleBlockQuerier * support multiple versions of stacktraces resolver * Integrate v2 reader for stacktraces in block reader * Fixes tests * Rewrite locations Ids * Rewrite test for counting uniq stacktraces * lint and fmt * Fixes more tests * Fixes leftover from todo --------- Co-authored-by: Christian Simon <[email protected]> * Use prefixed bucket for symbols * Initialize locationsIdsByStacktraceID * Initialize locationsIdsByStacktraceID for pprof as well * Fix chunk headers sort * Inline node alloc * Mapping filename extraction * Tidy go.mod * Fix TestHeadIngestStacktraces * Use symdb.DefaultDirName * Sort mappings on write * Make column iterator to respect the context * Fix unexpected EOF on stacktrace chunk unmarshal * Fix symbols upload * Fix symbols upload * Release fetched data * 3MB Page Buffer Size * Sort stacktraces IDs as expected by the resolver --------- Co-authored-by: Cyril Tovena <[email protected]> Co-authored-by: Christian Simon <[email protected]>
1 parent 4d21cbe commit ebc3e04

29 files changed

+3138
-456
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ require (
77
github.com/bufbuild/connect-grpchealth-go v1.0.0
88
github.com/cespare/xxhash/v2 v2.2.0
99
github.com/dennwc/varint v1.0.0
10+
github.com/dgryski/go-groupvarint v0.0.0-20211110171037-f5865eef0641
1011
github.com/drone/envsubst v1.0.3
1112
github.com/dustin/go-humanize v1.0.0
1213
github.com/felixge/fgprof v0.9.4-0.20221116204635-ececf7638e93

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,8 @@ github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWa
571571
github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA=
572572
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
573573
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
574+
github.com/dgryski/go-groupvarint v0.0.0-20211110171037-f5865eef0641 h1:z0nAHq7J9u9XvP/iyxka0dG6vGobRH5dAz38APXRCKo=
575+
github.com/dgryski/go-groupvarint v0.0.0-20211110171037-f5865eef0641/go.mod h1:MlkUQveSLEDbIgq2r1e++tSf0zfzU9mQpa9Qkczl+9Y=
574576
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
575577
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
576578
github.com/digitalocean/godo v1.98.0 h1:potyC1eD0N9n5/P4/WmJuKgg+OGYZOBWEW+/aKTX6QQ=

pkg/model/profile.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
package model
22

33
import (
4+
"path/filepath"
45
"strings"
56

7+
"github.com/cespare/xxhash/v2"
68
"github.com/gogo/status"
79
"github.com/google/pprof/profile"
810
"github.com/prometheus/prometheus/model/labels"
911
"google.golang.org/grpc/codes"
1012

13+
profilev1 "github.com/grafana/phlare/api/gen/proto/go/google/v1"
1114
ingestv1 "github.com/grafana/phlare/api/gen/proto/go/ingester/v1"
1215
typesv1 "github.com/grafana/phlare/api/gen/proto/go/types/v1"
1316
)
@@ -61,3 +64,43 @@ func SetProfileMetadata(p *profile.Profile, ty *typesv1.ProfileType) {
6164
p.Period = 1
6265
}
6366
}
67+
68+
func StacktracePartitionFromProfile(lbls []Labels, p *profilev1.Profile) uint64 {
69+
return xxhash.Sum64String(stacktracePartitionKeyFromProfile(lbls, p))
70+
}
71+
72+
func stacktracePartitionKeyFromProfile(lbls []Labels, p *profilev1.Profile) string {
73+
// take the first mapping (which is the main binary's file basename)
74+
if len(p.Mapping) > 0 {
75+
if filenameID := p.Mapping[0].Filename; filenameID > 0 {
76+
if filename := extractMappingFilename(p.StringTable[filenameID]); filename != "" {
77+
return filename
78+
}
79+
}
80+
}
81+
82+
// failing that look through the labels for the ServiceName
83+
if len(lbls) > 0 {
84+
for _, lbl := range lbls[0] {
85+
if lbl.Name == LabelNameServiceName {
86+
return lbl.Value
87+
}
88+
}
89+
}
90+
91+
return "unknown"
92+
}
93+
94+
func extractMappingFilename(filename string) string {
95+
// See github.com/google/pprof/profile/profile.go
96+
// It's unlikely that the main binary mapping is one of them.
97+
if filename == "" ||
98+
strings.HasPrefix(filename, "[") ||
99+
strings.HasPrefix(filename, "linux-vdso") ||
100+
strings.HasPrefix(filename, "/dev/dri/") {
101+
return ""
102+
}
103+
// Like filepath.ToSlash but doesn't rely on OS.
104+
n := strings.ReplaceAll(filename, `\`, `/`)
105+
return strings.TrimSpace(filepath.Base(filepath.Clean(n)))
106+
}

pkg/model/profile_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package model
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func Test_filename_extraction(t *testing.T) {
10+
assert.Equal(t, "app", extractMappingFilename(`app`))
11+
assert.Equal(t, "app", extractMappingFilename(`./app`))
12+
assert.Equal(t, "app", extractMappingFilename(`/usr/bin/app`))
13+
assert.Equal(t, "app", extractMappingFilename(`../../../app`))
14+
assert.Equal(t, "app", extractMappingFilename(`/usr/bin/app\`))
15+
assert.Equal(t, "app", extractMappingFilename(`/usr/bin/app\\`))
16+
assert.Equal(t, "my awesome app", extractMappingFilename(`/usr/bin/my awesome app`))
17+
assert.Equal(t, "app", extractMappingFilename(`/usr/bin/my\ awesome\ app`))
18+
19+
assert.Equal(t, "app.exe", extractMappingFilename(`C:\\app.exe`))
20+
assert.Equal(t, "app.exe", extractMappingFilename(`C:\\./app.exe`))
21+
assert.Equal(t, "app.exe", extractMappingFilename(`./app.exe`))
22+
assert.Equal(t, "app.exe", extractMappingFilename(`./../app.exe`))
23+
assert.Equal(t, "app.exe", extractMappingFilename(`C:\\build\app.exe`))
24+
assert.Equal(t, "My App.exe", extractMappingFilename(`C:\\build\My App.exe`))
25+
assert.Equal(t, "Not My App.exe", extractMappingFilename(`C:\\build\Not My App.exe`))
26+
assert.Equal(t, "app.exe", extractMappingFilename(`\\app.exe`))
27+
assert.Equal(t, "app.exe", extractMappingFilename(`\\build\app.exe`))
28+
29+
assert.Equal(t, "bin", extractMappingFilename(`/usr/bin/`))
30+
assert.Equal(t, "build", extractMappingFilename(`\\build\`))
31+
32+
assert.Equal(t, "", extractMappingFilename(""))
33+
assert.Equal(t, "", extractMappingFilename(`[vdso]`))
34+
assert.Equal(t, "", extractMappingFilename(`[vsyscall]`))
35+
assert.Equal(t, "not a path actually", extractMappingFilename(`not a path actually`))
36+
}

pkg/phlaredb/block/metadata.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type MetaVersion int
3838
const (
3939
// Version1 is a enumeration of Phlare section of TSDB meta supported by Phlare.
4040
MetaVersion1 = MetaVersion(1)
41+
MetaVersion2 = MetaVersion(2)
4142
)
4243

4344
type BlockStats struct {
@@ -130,6 +131,7 @@ func NewMeta() *Meta {
130131
MinTime: math.MaxInt64,
131132
MaxTime: 0,
132133
Labels: make(map[string]string),
134+
Version: MetaVersion2,
133135
}
134136
}
135137

@@ -143,7 +145,10 @@ func MetaFromDir(dir string) (*Meta, int64, error) {
143145
if err := json.Unmarshal(b, &m); err != nil {
144146
return nil, 0, err
145147
}
146-
if m.Version != MetaVersion1 {
148+
switch m.Version {
149+
case MetaVersion1:
150+
case MetaVersion2:
151+
default:
147152
return nil, 0, errors.Errorf("unexpected meta file version %d", m.Version)
148153
}
149154

@@ -174,8 +179,6 @@ func (meta *Meta) WriteTo(w io.Writer) (int64, error) {
174179
}
175180

176181
func (meta *Meta) WriteToFile(logger log.Logger, dir string) (int64, error) {
177-
meta.Version = MetaVersion1
178-
179182
// Make any changes to the file appear atomic.
180183
path := filepath.Join(dir, MetaFilename)
181184
tmp := path + ".tmp"
@@ -249,7 +252,10 @@ func Read(rc io.ReadCloser) (_ *Meta, err error) {
249252
return nil, err
250253
}
251254

252-
if m.Version != MetaVersion1 {
255+
switch m.Version {
256+
case MetaVersion1:
257+
case MetaVersion2:
258+
default:
253259
return nil, errors.Errorf("unexpected meta file version %d", m.Version)
254260
}
255261

0 commit comments

Comments
 (0)