-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter.go
189 lines (161 loc) · 3.99 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/*
Package cuckoo provides a Cuckoo Filter (Practically Better Than Bloom).
Cuckoo filters provide the flexibility to add and remove items dynamically.
A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter).
It is essentially a cuckoo hash table storing each key's fingerprint.
Implementation is based heavily on whitepaper: "Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky
(https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf).
*/
package cuckoo
import (
"fmt"
"hash/fnv"
"math/rand"
"github.com/kuba--/cuckoo/internal"
)
// MaxNumKicks stands for maximum number of cuckoo kicks before insert failure.
var MaxNumKicks uint32 = 512
// DefaultHash32 is a default hash function.
var DefaultHash32 = fnv.New32a()
// Filter keeps hashtable with fingerprints.
type Filter struct {
buckets []internal.Bucket
count uint32
}
// NewFilter creates a new Cuckoo Filter with given capacity (rounding up to the nearest power of 2).
func NewFilter(capacity uint32) *Filter {
var len uint32 = 1
if capacity > internal.BucketSize {
len = power2(capacity) / internal.BucketSize
}
return &Filter{buckets: make([]internal.Bucket, len)}
}
/*
Insert inserts an item to the filter.
Pseudo code:
f = fingerprint(x);
i1 = hash(x);
i2 = i1 ⊕ hash(f);
if bucket[i1] or bucket[i2] has an empty entry then
add f to that bucket;
return Done;
// must relocate existing items;
i = randomly pick i1 or i2;
for n = 0; n < MaxNumKicks; n++ do
randomly select an entry e from bucket[i];
swap f and the fingerprint stored in entry e;
i = i ⊕ hash( f );
if bucket[i] has an empty entry then
add f to bucket[i];
return Done;
// Hashtable is considered full;
return Failure;
*/
func (f *Filter) Insert(item []byte) error {
x := sum32(item)
fp := f.fingerprint(x)
i := f.index1(x)
ok := f.buckets[i].Insert(fp)
if !ok {
i = f.index2(i, fp)
ok = f.buckets[i].Insert(fp)
}
for n := uint32(0); !ok && n < MaxNumKicks; n++ {
j := rand.Intn(internal.BucketSize)
e := f.buckets[i][j]
f.buckets[i][j] = fp
fp = e
i = f.index2(i, fp)
ok = f.buckets[i].Insert(fp)
}
if !ok {
return fmt.Errorf("Hashtable is considered full (MaxNumKicks: %v, Count: %v)", MaxNumKicks, f.Count())
}
f.count++
return nil
}
/*
Lookup reports if the item is inserted, with false positive rate.
Pseudo code:
f = fingerprint(x);
i1 = hash(x);
i2 = i1 ⊕ hash(f);
if bucket[i1] or bucket[i2] has f then
return True;
return False;
*/
func (f *Filter) Lookup(item []byte) bool {
x := sum32(item)
fp := f.fingerprint(x)
i := f.index1(x)
ok := f.buckets[i].Lookup(fp)
if !ok {
i = f.index2(i, fp)
ok = f.buckets[i].Lookup(fp)
}
return ok
}
/*
Delete deletes an item from the filter.
Pseudo code:
f = fingerprint(x);
i1 = hash(x);
i2 = i1 ⊕ hash(f);
if bucket[i1] or bucket[i2] has f then
remove a copy of f from this bucket;
return True;
return False;
*/
func (f *Filter) Delete(item []byte) bool {
x := sum32(item)
fp := f.fingerprint(x)
i := f.index1(x)
ok := f.buckets[i].Delete(fp)
if !ok {
i = f.index2(i, fp)
ok = f.buckets[i].Delete(fp)
}
if ok {
f.count--
}
return ok
}
/*
Count returns how many items are in the filter.
*/
func (f *Filter) Count() uint32 {
return f.count
}
// fingerprint generates a byte tag for given Hash32 value.
func (*Filter) fingerprint(x uint32) byte {
fp := x & 0xff
if fp == 0 {
fp++
}
return byte(fp)
}
// index1 generates an index for given Hash32 value
func (f *Filter) index1(x uint32) uint32 {
return x & uint32(len(f.buckets)-1)
}
// index2 generates alternative index.
func (f *Filter) index2(i1 uint32, fp byte) uint32 {
return i1 ^ f.index1(sum32([]byte{fp}))
}
// sum32 generates a Hash32 value for given item.
func sum32(item []byte) uint32 {
DefaultHash32.Reset()
DefaultHash32.Write(item)
return DefaultHash32.Sum32()
}
// power2 returns nearest power of 2 for given number.
func power2(n uint32) uint32 {
n--
n |= n >> 1
n |= n >> 2
n |= n >> 4
n |= n >> 8
n |= n >> 16
n++
return n
}