Skip to content
This repository has been archived by the owner on Nov 7, 2022. It is now read-only.

Commit

Permalink
Add trace head-sampling (#583)
Browse files Browse the repository at this point in the history
* Add trace head-sampling

This is to complete the sampling feature since OC Service already offers tail-sampling. This is implemented using an extra hashing as an attempt to avoid bias from trace ID generation and also to correctly spread traces for backends that also do hashing.

Tests to enforce hash of different lengths.

Make hashing seed configurable

* Direct recommendation about hash seeding setting

* More precise wording for hash-seed comment
  • Loading branch information
Paulo Janotti authored Jun 19, 2019
1 parent 610d5f6 commit dee9655
Show file tree
Hide file tree
Showing 6 changed files with 559 additions and 1 deletion.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,29 @@ global:
keep: true # keep the attribute with the original key
```

### <a name="probabilistic-trace-sampling"></a>Probabilistic Head-based Trace Sampling

In some scenarios it may be desirable to perform probabilistic head-based trace sampling on the collector.
This can be done using by specifying `probabilistic` policy secion under the `sampling` section of the collector configuration file.

```yaml
sampling:
# mode indicates if the sampling is head or tail based. For probabilistic the mode is head-based.
mode: head
policies:
# section below defines a probabilistic trace sampler based on hashing the trace ID associated to
# each span and sampling the span according to the given spans.
probabilistic:
configuration:
# sampling-percentage is the percentage of sampling to be applied to all spans, unless their service is specified
# on sampling-percentage.
sampling-percentage: 5
# hash-seed allows choosing the seed for the hash function used in the trace sampling. This is important when
# multiple layers of collectors are being used with head sampling, in such scenarios make sure to
# choose different seeds for each layer.
hash-seed: 1
```

### <a name="tail-sampling"></a>Intelligent Sampling

```yaml
Expand Down
3 changes: 3 additions & 0 deletions cmd/occollector/app/builder/sampling_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ const (
// NoSampling mode is the default and means that all data arriving at the collector
// is passed ahead.
NoSampling Mode = "no-sampling"
// HeadSampling is the mode in which trace data is sampled at ingestion, without seeing
// the whole trace data.
HeadSampling Mode = "head"
// TailSampling is the mode in which trace data is temporarily retained until an evaluation
// if the trace should be sampled is performed.
TailSampling Mode = "tail"
Expand Down
29 changes: 28 additions & 1 deletion cmd/occollector/app/collector/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/census-instrumentation/opencensus-service/processor/addattributesprocessor"
"github.com/census-instrumentation/opencensus-service/processor/attributekeyprocessor"
"github.com/census-instrumentation/opencensus-service/processor/multiconsumer"
"github.com/census-instrumentation/opencensus-service/processor/tracesamplerprocessor"
)

func createExporters(v *viper.Viper, logger *zap.Logger) ([]func(), []consumer.TraceConsumer, []consumer.MetricsConsumer) {
Expand Down Expand Up @@ -281,7 +282,12 @@ func startProcessor(v *viper.Viper, logger *zap.Logger) (consumer.TraceConsumer,

var tailSamplingProcessor consumer.TraceConsumer
samplingProcessorCfg := builder.NewDefaultSamplingCfg().InitFromViper(v)
if samplingProcessorCfg.Mode == builder.TailSampling {
useHeadSamplingProcessor := false
if samplingProcessorCfg.Mode == builder.HeadSampling {
// Head-sampling should be the first processor in the pipeline to avoid global operations on data
// that is not going to be sampled, for now just set a flag to added the sampler later.
useHeadSamplingProcessor = true
} else if samplingProcessorCfg.Mode == builder.TailSampling {
var err error
tailSamplingProcessor, err = buildSamplingProcessor(samplingProcessorCfg, nameToTraceConsumer, v, logger)
if err != nil {
Expand Down Expand Up @@ -331,5 +337,26 @@ func startProcessor(v *viper.Viper, logger *zap.Logger) (consumer.TraceConsumer,
tp, _ = attributekeyprocessor.NewTraceProcessor(tp, multiProcessorCfg.Global.Attributes.KeyReplacements...)
}
}

if useHeadSamplingProcessor {
vTraceSampler := v.Sub("sampling.policies.probabilistic.configuration")
if vTraceSampler == nil {
logger.Error("Trace head-based sampling mode is enabled but there is no valid policy section defined")
os.Exit(1)
}

cfg := &tracesamplerprocessor.TraceSamplerCfg{}
samplerCfg, err := cfg.InitFromViper(vTraceSampler)
if err != nil {
logger.Error("Trace head-based sampling configuration error", zap.Error(err))
os.Exit(1)
}
logger.Info(
"Trace head-sampling enabled",
zap.Float32("sampling-percentage", samplerCfg.SamplingPercentage),
)
tp, _ = tracesamplerprocessor.NewTraceProcessor(tp, *samplerCfg)
}

return tp, closeFns
}
19 changes: 19 additions & 0 deletions cmd/occollector/app/collector/processors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/census-instrumentation/opencensus-service/processor/attributekeyprocessor"
"github.com/census-instrumentation/opencensus-service/processor/multiconsumer"
"github.com/census-instrumentation/opencensus-service/processor/processortest"
"github.com/census-instrumentation/opencensus-service/processor/tracesamplerprocessor"
)

func Test_startProcessor(t *testing.T) {
Expand Down Expand Up @@ -85,6 +86,24 @@ func Test_startProcessor(t *testing.T) {
return attributeKeyProcessor
},
},
{
name: "sampling_config_trace_sampler",
setupViperCfg: func() *viper.Viper {
v := viper.New()
v.Set("logging-exporter", true)
v.Set("sampling.mode", "head")
v.Set("sampling.policies.probabilistic.configuration.sampling-percentage", 5)
return v
},
wantExamplar: func(t *testing.T) interface{} {
nopProcessor := processortest.NewNopTraceProcessor(nil)
tracesamplerprocessor, err := tracesamplerprocessor.NewTraceProcessor(nopProcessor, tracesamplerprocessor.TraceSamplerCfg{})
if err != nil {
t.Fatalf("tracesamplerprocessor.NewTraceProcessor() = %v", err)
}
return tracesamplerprocessor
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
168 changes: 168 additions & 0 deletions processor/tracesamplerprocessor/tracesamplerprocessor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// Copyright 2019, OpenCensus Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tracesamplerprocessor

import (
"context"
"errors"
"fmt"

tracepb "github.com/census-instrumentation/opencensus-proto/gen-go/trace/v1"
"github.com/spf13/viper"

"github.com/census-instrumentation/opencensus-service/consumer"
"github.com/census-instrumentation/opencensus-service/data"
"github.com/census-instrumentation/opencensus-service/processor"
)

const (
// The constants below are tags used to read the configuration via viper.
samplingPercentageCfgTag = "sampling-percentage"
hashSeedCfgTag = "hash-seed"

// The constants help translate user friendly percentages to numbers direct used in sampling.
numHashBuckets = 0x4000 // Using a power of 2 to avoid division.
bitMaskHashBuckets = numHashBuckets - 1
percentageScaleFactor = numHashBuckets / 100.0
)

// TraceSamplerCfg has the configuration guiding the trace sampler processor.
type TraceSamplerCfg struct {
// SamplingPercentage is the percentage rate at which traces are going to be sampled. Defaults to zero, i.e.: no sample.
// Values greater or equal 100 are treated as "sample all traces".
SamplingPercentage float32
// HashSeed allows one to configure the hashing seed. This is important in scenarios where multiple layers of collectors
// have different sampling rates: if they use the same seed all passing one layer may pass the other even if they have
// different sampling rates, configuring different seeds avoids that.
HashSeed uint32
}

// InitFromViper updates TraceSamplerCfg according to the viper configuration.
func (tsc *TraceSamplerCfg) InitFromViper(v *viper.Viper) (*TraceSamplerCfg, error) {
if v == nil {
return nil, errors.New("v is nil")
}
if err := v.UnmarshalKey(samplingPercentageCfgTag, &tsc.SamplingPercentage); err != nil {
return nil, fmt.Errorf("failed to unmarshal %q: %v", samplingPercentageCfgTag, err)
}
if err := v.UnmarshalKey(hashSeedCfgTag, &tsc.HashSeed); err != nil {
return nil, fmt.Errorf("failed to unmarshal %q: %v", hashSeedCfgTag, err)
}
return tsc, nil
}

type tracesamplerprocessor struct {
nextConsumer consumer.TraceConsumer
scaledSamplingRate uint32
hashSeed uint32
}

var _ processor.TraceProcessor = (*tracesamplerprocessor)(nil)

// NewTraceProcessor returns a processor.TraceProcessor that will perform head sampling according to the given
// configuration.
func NewTraceProcessor(nextConsumer consumer.TraceConsumer, cfg TraceSamplerCfg) (processor.TraceProcessor, error) {
if nextConsumer == nil {
return nil, errors.New("nextConsumer is nil")
}

return &tracesamplerprocessor{
nextConsumer: nextConsumer,
// Adjust sampling percentage on private so recalculations are avoided.
scaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor),
hashSeed: cfg.HashSeed,
}, nil
}

func (tsp *tracesamplerprocessor) ConsumeTraceData(ctx context.Context, td data.TraceData) error {
scaledSamplingRate := tsp.scaledSamplingRate
if scaledSamplingRate >= numHashBuckets {
return tsp.nextConsumer.ConsumeTraceData(ctx, td)
}

sampledTraceData := data.TraceData{
Node: td.Node,
Resource: td.Resource,
SourceFormat: td.SourceFormat,
}

sampledSpans := make([]*tracepb.Span, 0, len(td.Spans))
for _, span := range td.Spans {
// If one assumes random trace ids hashing may seems avoidable, however, traces can be coming from sources
// with various different criterias to generate trace id and perhaps were already sampled without hashing.
// Hashing here prevents bias due to such systems.
if hash(span.TraceId, tsp.hashSeed)&bitMaskHashBuckets < scaledSamplingRate {
sampledSpans = append(sampledSpans, span)
}
}

sampledTraceData.Spans = sampledSpans

return tsp.nextConsumer.ConsumeTraceData(ctx, sampledTraceData)
}

// hash is a murmur3 hash function, see http://en.wikipedia.org/wiki/MurmurHash.
func hash(key []byte, seed uint32) (hash uint32) {
const (
c1 = 0xcc9e2d51
c2 = 0x1b873593
c3 = 0x85ebca6b
c4 = 0xc2b2ae35
r1 = 15
r2 = 13
m = 5
n = 0xe6546b64
)

hash = seed
iByte := 0
for ; iByte+4 <= len(key); iByte += 4 {
k := uint32(key[iByte]) | uint32(key[iByte+1])<<8 | uint32(key[iByte+2])<<16 | uint32(key[iByte+3])<<24
k *= c1
k = (k << r1) | (k >> (32 - r1))
k *= c2
hash ^= k
hash = (hash << r2) | (hash >> (32 - r2))
hash = hash*m + n
}

// TraceId and SpanId have lengths that are multiple of 4 so the code below is never expected to
// be hit when sampling traces. However, it is preserved here to keep it as a correct murmur3 implementation.
// This is enforced via tests.
var remainingBytes uint32
switch len(key) - iByte {
case 3:
remainingBytes += uint32(key[iByte+2]) << 16
fallthrough
case 2:
remainingBytes += uint32(key[iByte+1]) << 8
fallthrough
case 1:
remainingBytes += uint32(key[iByte])
remainingBytes *= c1
remainingBytes = (remainingBytes << r1) | (remainingBytes >> (32 - r1))
remainingBytes = remainingBytes * c2
hash ^= remainingBytes
}

hash ^= uint32(len(key))
hash ^= hash >> 16
hash *= c3
hash ^= hash >> 13
hash *= c4
hash ^= hash >> 16

return
}
Loading

0 comments on commit dee9655

Please sign in to comment.