Skip to content

Commit 627f366

Browse files
committed
update signature interface
1 parent d3a0bd6 commit 627f366

File tree

4 files changed

+36
-13
lines changed

4 files changed

+36
-13
lines changed

lsh.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ const (
1111
integrationPrecision = 0.01
1212
)
1313

14-
type hashKeyFunc func(Signature) string
14+
type hashKeyFunc func([]uint64) string
1515

1616
func hashKeyFuncGen(hashValueSize int) hashKeyFunc {
17-
return func(sig Signature) string {
17+
return func(sig []uint64) string {
1818
s := make([]byte, hashValueSize*len(sig))
1919
buf := make([]byte, 8)
2020
for i, v := range sig {
@@ -163,7 +163,7 @@ func (f *MinhashLSH) Params() (k, l int) {
163163

164164
// Add a key with MinHash signature into the index.
165165
// The key won't be searchable until Index() is called.
166-
func (f *MinhashLSH) Add(key interface{}, sig Signature) {
166+
func (f *MinhashLSH) Add(key interface{}, sig []uint64) {
167167
// Generate hash keys
168168
Hs := make([]string, f.l)
169169
for i := 0; i < f.l; i++ {
@@ -213,7 +213,7 @@ func (f *MinhashLSH) Index() {
213213
}
214214

215215
// Query returns candidate keys given the query signature.
216-
func (f *MinhashLSH) Query(sig Signature) []interface{} {
216+
func (f *MinhashLSH) Query(sig []uint64) []interface{} {
217217
set := f.query(sig, f.k)
218218
results := make([]interface{}, 0, len(set))
219219
for key := range set {
@@ -222,7 +222,7 @@ func (f *MinhashLSH) Query(sig Signature) []interface{} {
222222
return results
223223
}
224224

225-
func (f *MinhashLSH) query(sig Signature, minK int) map[interface{}]bool {
225+
func (f *MinhashLSH) query(sig []uint64, minK int) map[interface{}]bool {
226226
results := make(map[interface{}]bool)
227227
for K := f.k; K >= minK; K-- {
228228
prefixSize := f.hashValueSize * K

lsh_benchmark_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
)
77

88
func Benchmark_Insert10000(b *testing.B) {
9-
sigs := make([]Signature, 10000)
9+
sigs := make([][]uint64, 10000)
1010
for i := range sigs {
1111
sigs[i] = randomSignature(64, int64(i))
1212
}

lsh_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import (
55
"testing"
66
)
77

8-
func randomSignature(size int, seed int64) Signature {
8+
func randomSignature(size int, seed int64) []uint64 {
99
r := rand.New(rand.NewSource(seed))
10-
sig := make(Signature, size)
10+
sig := make([]uint64, size)
1111
for i := range sig {
1212
sig[i] = uint64(r.Int63())
1313
}

minhash.go

+28-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package minhashlsh
22

33
import (
4+
"bytes"
45
"encoding/binary"
56
"hash/fnv"
67
"math/rand"
@@ -11,15 +12,13 @@ import (
1112
// The number of byte in a hash value for Minhash
1213
const hashValueSize = 8
1314

14-
type Signature []uint64
15-
16-
// Represents a MinHash object
15+
// Minhash represents a MinHash object
1716
type Minhash struct {
1817
mw *minwise.MinWise
1918
seed int64
2019
}
2120

22-
// Initialize a MinHash object with a seed and the number of
21+
// NewMinhash initialize a MinHash object with a seed and the number of
2322
// hash functions.
2423
func NewMinhash(seed int64, numHash int) *Minhash {
2524
r := rand.New(rand.NewSource(seed))
@@ -54,7 +53,7 @@ func (m *Minhash) Push(b []byte) {
5453
m.mw.Push(b)
5554
}
5655

57-
// Export the MinHash signature.
56+
// Signature exports the MinHash as a list of hash values.
5857
func (m *Minhash) Signature() []uint64 {
5958
return m.mw.Signature()
6059
}
@@ -68,3 +67,27 @@ func (m *Minhash) Merge(o *Minhash) {
6867
}
6968
m.mw.Merge(o.mw)
7069
}
70+
71+
// SigToBytes serializes the signature into byte slice
72+
func SigToBytes(sig []uint64) []byte {
73+
buf := new(bytes.Buffer)
74+
for _, v := range sig {
75+
binary.Write(buf, binary.BigEndian, v)
76+
}
77+
return buf.Bytes()
78+
}
79+
80+
// BytesToSig converts a byte slice into a signature
81+
func BytesToSig(data []byte) ([]uint64, error) {
82+
size := len(data) / hashValueSize
83+
sig := make([]uint64, size)
84+
buf := bytes.NewReader(data)
85+
var v uint64
86+
for i := range sig {
87+
if err := binary.Read(buf, binary.BigEndian, &v); err != nil {
88+
return nil, err
89+
}
90+
sig[i] = v
91+
}
92+
return sig, nil
93+
}

0 commit comments

Comments
 (0)