-
Notifications
You must be signed in to change notification settings - Fork 2
/
minsketch.go
87 lines (72 loc) · 1.61 KB
/
minsketch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package abacus
import "sync"
import (
"github.com/spaolacci/murmur3"
"unsafe"
)
type CountType uint32
const Max = ^(CountType(0))
type Sketch struct {
Width uint32
Depth uint32
Count [][]CountType
mutex sync.RWMutex
}
func sizeOfCell() uintptr{
var a CountType
return unsafe.Sizeof(a)
}
func NewSketch(width, depth uint32) (sk *Sketch) {
sk = &Sketch{
Width: width,
Depth: depth,
Count: make([][]CountType, depth),
}
for i := uint32(0); i < depth; i++ {
sk.Count[i] = make([]CountType, width)
}
return sk
}
func (sk *Sketch) Incr(dat []byte) (min CountType) {
return sk.Add(dat, 1)
}
func (sk *Sketch) positions(dat []byte) (pos []uint32) {
// reference: https://github.com/addthis/stream-lib/blob/master/src/main/java/com/clearspring/analytics/stream/membership/Filter.java
hash1 := murmur3.Sum32WithSeed(dat, 0)
hash2 := murmur3.Sum32WithSeed(dat, hash1)
pos = make([]uint32, sk.Depth)
for i := uint32(0); i < sk.Depth; i++ {
pos[i] = (hash1 + i*hash2) % sk.Width
}
return pos
}
func (sk *Sketch) Add(dat []byte, cnt CountType) (min CountType) {
pos := sk.positions(dat)
min = sk.query(pos)
min += cnt
sk.mutex.Lock()
for i := uint32(0); i < sk.Depth; i++ {
v := sk.Count[i][pos[i]]
if v < min {
sk.Count[i][pos[i]] = min
}
}
sk.mutex.Unlock()
return min
}
func (sk *Sketch) Query(dat []byte) (min CountType) {
pos := sk.positions(dat)
return sk.query(pos)
}
func (sk *Sketch) query(pos []uint32) (min CountType) {
min = Max
sk.mutex.RLock()
for i := uint32(0); i < sk.Depth; i++ {
v := sk.Count[i][pos[i]]
if min > v {
min = v
}
}
sk.mutex.RUnlock()
return min
}