Skip to content

Commit 19f8ccb

Browse files
feat: query split (#301)
1 parent 7327752 commit 19f8ccb

File tree

8 files changed

+764
-46
lines changed

8 files changed

+764
-46
lines changed

config/config.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,9 +221,11 @@ type ClickHouse struct {
221221
TagsAdaptiveQueries int `toml:"tags-adaptive-queries" json:"tags-adaptive-queries" comment:"Tags adaptive queries (based on load average) for increase/decrease concurrent queries"`
222222
TagsLimiter limiter.ServerLimiter `toml:"-" json:"-"`
223223

224-
WildcardMinDistance int `toml:"wildcard-min-distance" json:"wildcard-min-distance" comment:"If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries."`
225-
TagsMinInQuery int `toml:"tags-min-in-query" json:"tags-min-in-query" comment:"Minimum tags in seriesByTag query"`
226-
TagsMinInAutocomplete int `toml:"tags-min-in-autocomplete" json:"tags-min-in-autocomplete" comment:"Minimum tags in autocomplete query"`
224+
WildcardMinDistance int `toml:"wildcard-min-distance" json:"wildcard-min-distance" comment:"If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries."`
225+
TrySplitQuery bool `toml:"try-split-query" json:"try-split-query" comment:"Plain queries like '{first,second}.custom.metric.*' are also a subject to wildcard-min-distance restriction. But can be split into 2 queries: 'first.custom.metric.*', 'second.custom.metric.*'. Note that: only one list will be split; if there are wildcard in query before (after) list then reverse (direct) notation will be preferred; if there are wildcards before and after list, then query will not be split"`
226+
MaxNodeToSplitIndex int `toml:"max-node-to-split-index" json:"max-node-to-split-index" comment:"Used only if try-split-query is true. Query that contains list will be split if its (list) node index is less or equal to max-node-to-split-index. By default is 0. It is recommended to have this value set to 2 or 3 and increase it very carefully, because 3 or 4 plain nodes without wildcards have good selectivity"`
227+
TagsMinInQuery int `toml:"tags-min-in-query" json:"tags-min-in-query" comment:"Minimum tags in seriesByTag query"`
228+
TagsMinInAutocomplete int `toml:"tags-min-in-autocomplete" json:"tags-min-in-autocomplete" comment:"Minimum tags in autocomplete query"`
227229

228230
UserLimits map[string]UserLimits `toml:"user-limits" json:"user-limits" comment:"customized query limiter for some users" commented:"true"`
229231
DateFormat string `toml:"date-format" json:"date-format" comment:"Date format (default, utc, both)"`

doc/config.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,10 @@ Only one tag used as filter for index field Tag1, see graphite_tagged table [str
313313
tags-adaptive-queries = 0
314314
# If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries.
315315
wildcard-min-distance = 0
316+
# Plain queries like '{first,second}.custom.metric.*' are also a subject to wildcard-min-distance restriction. But can be split into 2 queries: 'first.custom.metric.*', 'second.custom.metric.*'. Note that: only one list will be split; if there are wildcard in query before (after) list then reverse (direct) notation will be preferred; if there are wildcards before and after list, then query will not be split
317+
try-split-query = false
318+
# Used only if try-split-query is true. Query that contains list will be split if its (list) node index is less or equal to max-node-to-split-index. By default is 0. It is recommended to have this value set to 2 or 3 and increase it very carefully, because 3 or 4 plain nodes without wildcards have good selectivity
319+
max-node-to-split-index = 0
316320
# Minimum tags in seriesByTag query
317321
tags-min-in-query = 0
318322
# Minimum tags in autocomplete query

finder/finder.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,20 @@ func newPlainFinder(ctx context.Context, config *config.Config, query string, fr
6666
opts,
6767
useCache,
6868
)
69+
70+
if config.ClickHouse.TrySplitQuery {
71+
f = WrapSplitIndex(
72+
f,
73+
config.ClickHouse.WildcardMinDistance,
74+
config.ClickHouse.URL,
75+
config.ClickHouse.IndexTable,
76+
config.ClickHouse.IndexUseDaily,
77+
config.ClickHouse.IndexReverse,
78+
config.ClickHouse.IndexReverses,
79+
opts,
80+
useCache,
81+
)
82+
}
6983
} else {
7084
if from > 0 && until > 0 && config.ClickHouse.DateTreeTable != "" {
7185
f = NewDateFinder(config.ClickHouse.URL, config.ClickHouse.DateTreeTable, config.ClickHouse.DateTreeTableVersion, opts)

finder/index.go

Lines changed: 63 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -119,20 +119,13 @@ func (idx *IndexFinder) useReverse(query string) bool {
119119
return idx.useReverse(query)
120120
}
121121

122-
func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *where.Where {
123-
reverse := idx.useReverse(query)
124-
if reverse {
125-
query = ReverseString(query)
126-
}
127-
128-
if idx.dailyEnabled && from > 0 && until > 0 {
129-
idx.useDaily = true
130-
} else {
131-
idx.useDaily = false
132-
}
122+
func useDaily(dailyEnabled bool, from, until int64) bool {
123+
return dailyEnabled && from > 0 && until > 0
124+
}
133125

126+
func calculateIndexLevelOffset(useDaily, reverse bool) int {
134127
var levelOffset int
135-
if idx.useDaily {
128+
if useDaily {
136129
if reverse {
137130
levelOffset = ReverseLevelOffset
138131
}
@@ -142,8 +135,11 @@ func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *wher
142135
levelOffset = TreeLevelOffset
143136
}
144137

145-
w := idx.where(query, levelOffset)
146-
if idx.useDaily {
138+
return levelOffset
139+
}
140+
141+
func addDatesToWhere(w *where.Where, useDaily bool, from, until int64) {
142+
if useDaily {
147143
w.Andf(
148144
"Date >='%s' AND Date <= '%s'",
149145
date.FromTimestampToDaysFormat(from),
@@ -152,10 +148,24 @@ func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *wher
152148
} else {
153149
w.And(where.Eq("Date", DefaultTreeDate))
154150
}
151+
}
152+
153+
func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *where.Where {
154+
reverse := idx.useReverse(query)
155+
if reverse {
156+
query = ReverseString(query)
157+
}
158+
159+
idx.useDaily = useDaily(idx.dailyEnabled, from, until)
160+
161+
levelOffset := calculateIndexLevelOffset(idx.useDaily, reverse)
162+
163+
w := idx.where(query, levelOffset)
164+
addDatesToWhere(w, idx.useDaily, from, until)
155165
return w
156166
}
157167

158-
func (idx *IndexFinder) validatePlainQuery(query string, wildcardMinDistance int) error {
168+
func validatePlainQuery(query string, wildcardMinDistance int) error {
159169
if where.HasUnmatchedBrackets(query) {
160170
return errs.NewErrorWithCode("query has unmatched brackets", http.StatusBadRequest)
161171
}
@@ -175,7 +185,7 @@ func (idx *IndexFinder) validatePlainQuery(query string, wildcardMinDistance int
175185
}
176186

177187
func (idx *IndexFinder) Execute(ctx context.Context, config *config.Config, query string, from int64, until int64, stat *FinderStat) (err error) {
178-
err = idx.validatePlainQuery(query, config.ClickHouse.WildcardMinDistance)
188+
err = validatePlainQuery(query, config.ClickHouse.WildcardMinDistance)
179189
if err != nil {
180190
return err
181191
}
@@ -202,45 +212,61 @@ func (idx *IndexFinder) Abs(v []byte) []byte {
202212
return v
203213
}
204214

205-
func (idx *IndexFinder) bodySplit() {
206-
if len(idx.body) == 0 {
207-
return
215+
func splitIndexBody(body []byte, useReverse, useCache bool) ([]byte, [][]byte, bool) {
216+
if len(body) == 0 {
217+
return body, [][]byte{}, false
208218
}
209219

210-
idx.rows = bytes.Split(bytes.TrimSuffix(idx.body, []byte{'\n'}), []byte{'\n'})
220+
rows := bytes.Split(bytes.TrimSuffix(body, []byte{'\n'}), []byte{'\n'})
221+
setDirect := false
211222

212-
if idx.useReverse("") {
213-
// rotate names for reduce
223+
if useReverse {
214224
var buf bytes.Buffer
215-
if idx.useCache {
216-
buf.Grow(len(idx.body))
225+
if useCache {
226+
buf.Grow(len(body))
217227
}
218-
for i := 0; i < len(idx.rows); i++ {
219-
idx.rows[i] = ReverseBytes(idx.rows[i])
220-
if idx.useCache {
221-
buf.Write(idx.rows[i])
228+
229+
for i := range rows {
230+
rows[i] = ReverseBytes(rows[i])
231+
if useCache {
232+
buf.Write(rows[i])
222233
buf.WriteByte('\n')
223234
}
224235
}
225-
if idx.useCache {
226-
idx.body = buf.Bytes()
227-
idx.reverse = queryDirect
236+
237+
if useCache {
238+
body = buf.Bytes()
239+
setDirect = true
228240
}
229241
}
242+
243+
return body, rows, setDirect
244+
}
245+
246+
func (idx *IndexFinder) bodySplit() {
247+
setDirect := false
248+
idx.body, idx.rows, setDirect = splitIndexBody(idx.body, idx.useReverse(""), idx.useCache)
249+
if setDirect {
250+
idx.reverse = queryDirect
251+
}
230252
}
231253

232-
func (idx *IndexFinder) makeList(onlySeries bool) [][]byte {
233-
if len(idx.rows) == 0 {
254+
func makeList(rows [][]byte, onlySeries bool) [][]byte {
255+
if len(rows) == 0 {
234256
return [][]byte{}
235257
}
236258

237-
rows := make([][]byte, len(idx.rows))
259+
resRows := make([][]byte, len(rows))
238260

239-
for i := 0; i < len(idx.rows); i++ {
240-
rows[i] = idx.rows[i]
261+
for i := 0; i < len(rows); i++ {
262+
resRows[i] = rows[i]
241263
}
242264

243-
return rows
265+
return resRows
266+
}
267+
268+
func (idx *IndexFinder) makeList(onlySeries bool) [][]byte {
269+
return makeList(idx.rows, onlySeries)
244270
}
245271

246272
func (idx *IndexFinder) List() [][]byte {

0 commit comments

Comments
 (0)