-
Notifications
You must be signed in to change notification settings - Fork 164
/
Copy pathsliding_window.go
234 lines (201 loc) · 7.09 KB
/
sliding_window.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
package flow
import (
"sort"
"sync"
"time"
"github.com/reugn/go-streams"
)
// SlidingWindowOpts represents SlidingWindow configuration options.
type SlidingWindowOpts[T any] struct {
// EventTimeExtractor is a function that extracts the event time from an element.
// Event time is the time at which the event occurred on its producing device.
// Using event time enables correct windowing even when events arrive out of order
// or with delays.
//
// If EventTimeExtractor is not specified, processing time is used. Processing time
// refers to the system time of the machine executing the window operation.
EventTimeExtractor func(T) time.Time
// EmitPartialWindow determines whether to emit window elements before the first
// full window duration has elapsed. If false, the first window will only be
// emitted after the full window duration.
EmitPartialWindow bool
}
// timedElement stores an incoming element along with its event time.
type timedElement[T any] struct {
element T
eventTime time.Time
}
// SlidingWindow assigns elements to windows of fixed length configured by the window
// size parameter.
// An additional window slide parameter controls how frequently a sliding window is started.
// Hence, sliding windows can be overlapping if the slide is smaller than the window size.
// In this case elements are assigned to multiple windows.
// T indicates the incoming element type, and the outgoing element type is []T.
type SlidingWindow[T any] struct {
mu sync.Mutex
windowSize time.Duration
slidingInterval time.Duration
queue []timedElement[T]
in chan any
out chan any
done chan struct{}
opts SlidingWindowOpts[T]
}
// Verify SlidingWindow satisfies the Flow interface.
var _ streams.Flow = (*SlidingWindow[any])(nil)
// NewSlidingWindow returns a new SlidingWindow operator based on processing time.
// Processing time refers to the system time of the machine that is executing the
// respective operation.
// T specifies the incoming element type, and the outgoing element type is []T.
//
// windowSize is the Duration of generated windows.
// slidingInterval is the sliding interval of generated windows.
//
// NewSlidingWindow panics if slidingInterval is larger than windowSize.
func NewSlidingWindow[T any](windowSize, slidingInterval time.Duration) *SlidingWindow[T] {
return NewSlidingWindowWithOpts[T](windowSize, slidingInterval, SlidingWindowOpts[T]{})
}
// NewSlidingWindowWithOpts returns a new SlidingWindow operator configured with the
// provided configuration options.
// T specifies the incoming element type, and the outgoing element type is []T.
//
// windowSize is the Duration of generated windows.
// slidingInterval is the sliding interval of generated windows.
// opts are the sliding window configuration options.
//
// NewSlidingWindowWithOpts panics if slidingInterval is larger than windowSize.
func NewSlidingWindowWithOpts[T any](
windowSize, slidingInterval time.Duration, opts SlidingWindowOpts[T]) *SlidingWindow[T] {
if windowSize < slidingInterval {
panic("sliding interval is larger than window size")
}
slidingWindow := &SlidingWindow[T]{
windowSize: windowSize,
slidingInterval: slidingInterval,
in: make(chan any),
out: make(chan any),
done: make(chan struct{}),
opts: opts,
}
// start buffering incoming stream elements
go slidingWindow.receive()
return slidingWindow
}
// Via asynchronously streams data to the given Flow and returns it.
func (sw *SlidingWindow[T]) Via(flow streams.Flow) streams.Flow {
go sw.emit()
go sw.transmit(flow)
return flow
}
// To streams data to the given Sink and blocks until the Sink has completed
// processing all data.
func (sw *SlidingWindow[T]) To(sink streams.Sink) {
go sw.emit()
sw.transmit(sink)
sink.AwaitCompletion()
}
// Out returns the output channel of the SlidingWindow operator.
func (sw *SlidingWindow[T]) Out() <-chan any {
return sw.out
}
// In returns the input channel of the SlidingWindow operator.
func (sw *SlidingWindow[T]) In() chan<- any {
return sw.in
}
// transmit submits closed windows to the next Inlet.
func (sw *SlidingWindow[T]) transmit(inlet streams.Inlet) {
for window := range sw.out {
inlet.In() <- window
}
close(inlet.In())
}
// eventTime extracts the time from an element if the EventTimeExtractor is set.
// Otherwise, the processing time is returned.
func (sw *SlidingWindow[T]) eventTime(element T) time.Time {
if sw.opts.EventTimeExtractor == nil {
return time.Now()
}
return sw.opts.EventTimeExtractor(element)
}
// receive buffers the incoming elements by pushing them into the queue,
// wrapping the original item into a timedElement along with its event time.
func (sw *SlidingWindow[T]) receive() {
for element := range sw.in {
eventTime := sw.eventTime(element.(T))
sw.mu.Lock()
timed := timedElement[T]{
element: element.(T),
eventTime: eventTime,
}
sw.queue = append(sw.queue, timed)
sw.mu.Unlock()
}
close(sw.done)
}
// emit captures and emits a new window every sw.slidingInterval.
func (sw *SlidingWindow[T]) emit() {
if !sw.opts.EmitPartialWindow {
timer := time.NewTimer(sw.windowSize - sw.slidingInterval)
select {
case <-timer.C:
case <-sw.done:
timer.Stop()
close(sw.out)
return
}
}
lastTick := time.Now()
ticker := time.NewTicker(sw.slidingInterval)
defer ticker.Stop()
for {
select {
case lastTick = <-ticker.C:
sw.dispatchWindow(lastTick)
case <-sw.done:
sw.dispatchWindow(lastTick.Add(sw.slidingInterval))
close(sw.out)
return
}
}
}
// dispatchWindow is responsible for sending the elements in the current
// window to the output channel and moving the window to the next position.
func (sw *SlidingWindow[T]) dispatchWindow(tick time.Time) {
sw.mu.Lock()
// sort elements in the queue by their time
sort.Slice(sw.queue, func(i, j int) bool {
return sw.queue[i].eventTime.Before(sw.queue[j].eventTime)
})
// extract current window elements
windowElements := sw.extractWindowElements(tick)
sw.mu.Unlock()
// send elements downstream if the current window is not empty
if len(windowElements) > 0 {
sw.out <- windowElements
}
}
// extractWindowElements extracts and returns elements from the sliding window that
// fall within the current window. Elements newer than tick will not be included.
// The sliding window queue is updated to remove previous interval elements.
func (sw *SlidingWindow[T]) extractWindowElements(tick time.Time) []T {
// calculate the next window start time
nextWindowStartTime := tick.Add(-sw.windowSize).Add(sw.slidingInterval)
elements := make([]T, 0, len(sw.queue))
var remainingElements []timedElement[T]
for i, element := range sw.queue {
if remainingElements == nil && element.eventTime.After(nextWindowStartTime) {
// copy remaining elements
remainingElements = make([]timedElement[T], len(sw.queue)-i)
_ = copy(remainingElements, sw.queue[i:])
}
switch {
case element.eventTime.Before(tick):
elements = append(elements, element.element)
default:
break // we can break since the queue is ordered
}
}
// move the window
sw.queue = remainingElements
return elements
}