Skip to content

File tree

2 files changed

+68
-43
lines changed

2 files changed

+68
-43
lines changed
 

‎go/summarize/summarize-keys.go

+67-42
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ type (
5151
}
5252

5353
Summary struct {
54-
tables []TableSummary
54+
tables []*TableSummary
5555
failures []FailuresSummary
5656
hotQueries []keys.QueryAnalysisResult
5757
}
@@ -60,7 +60,7 @@ type (
6060
Table string
6161
ReadQueryCount int
6262
WriteQueryCount int
63-
Columns map[ColumnInformation]ColumnUsage
63+
ColumnUses map[ColumnInformation]ColumnUsage
6464
JoinPredicates []operators.JoinPredicate
6565
Failed bool
6666
RowCount int
@@ -78,6 +78,19 @@ type (
7878
queryGraph map[graphKey]map[operators.JoinPredicate]int
7979
)
8080

81+
func (s *Summary) GetTable(name string) *TableSummary {
82+
for _, table := range s.tables {
83+
if table.Table == name {
84+
return table
85+
}
86+
}
87+
return nil
88+
}
89+
90+
func (s *Summary) AddTable(table *TableSummary) {
91+
s.tables = append(s.tables, table)
92+
}
93+
8194
const (
8295
Join Position = iota
8396
JoinRange
@@ -112,9 +125,9 @@ func (ts TableSummary) GetColumns() iter.Seq2[ColumnInformation, ColumnUsage] {
112125
ci ColumnInformation
113126
cu ColumnUsage
114127
}
115-
columns := make([]colDetails, 0, len(ts.Columns))
128+
columns := make([]colDetails, 0, len(ts.ColumnUses))
116129
maxColUse := make(map[string]float64)
117-
for colInfo, usage := range ts.Columns {
130+
for colInfo, usage := range ts.ColumnUses {
118131
columns = append(columns, colDetails{ci: colInfo, cu: usage})
119132
if maxColUse[colInfo.Name] < usage.Percentage {
120133
maxColUse[colInfo.Name] = usage.Percentage
@@ -156,19 +169,16 @@ func (ts TableSummary) UseCount() int {
156169
// and prints this summary information to the output.
157170
func printKeysSummary(out io.Writer, fileName string, analysedQueries *keys.Output, now time.Time, hotMetric, schemaInfoPath string) {
158171
var err error
172+
summary := &Summary{}
159173

160174
metricReader := getMetricForHotness(hotMetric)
161175

162-
var schemaInfo *schema.Info
163-
if schemaInfoPath != "" {
164-
schemaInfo, err = schema.Load(schemaInfoPath)
165-
if err != nil {
166-
panic(err)
167-
}
176+
err = summarizeSchemaInfo(summary, schemaInfoPath)
177+
if err != nil {
178+
panic(err)
168179
}
169-
summary := &Summary{}
170-
summarizeKeysQueries(summary, analysedQueries, metricReader, schemaInfo)
171180

181+
summarizeKeysQueries(summary, analysedQueries, metricReader)
172182
md := &markdown.MarkDown{}
173183
msg := `# Query Analysis Report
174184
@@ -178,7 +188,7 @@ func printKeysSummary(out io.Writer, fileName string, analysedQueries *keys.Outp
178188
`
179189
md.Printf(msg, now.Format(time.DateTime), fileName)
180190
renderHotQueries(md, summary.hotQueries, metricReader)
181-
renderTableUsage(summary.tables, md, schemaInfo != nil)
191+
renderTableUsage(summary.tables, md, schemaInfoPath != "")
182192
renderTablesJoined(md, analysedQueries)
183193
renderFailures(md, summary.failures)
184194

@@ -188,6 +198,26 @@ func printKeysSummary(out io.Writer, fileName string, analysedQueries *keys.Outp
188198
}
189199
}
190200

201+
func summarizeSchemaInfo(summary *Summary, schemaInfoPath string) error {
202+
if schemaInfoPath == "" {
203+
return nil
204+
}
205+
schemaInfo, err := schema.Load(schemaInfoPath)
206+
if err != nil {
207+
return err
208+
}
209+
for _, ti := range schemaInfo.Tables {
210+
table := summary.GetTable(ti.Name)
211+
if table == nil {
212+
table = &TableSummary{Table: ti.Name}
213+
summary.AddTable(table)
214+
}
215+
table.RowCount = ti.Rows
216+
}
217+
218+
return nil
219+
}
220+
191221
type getMetric = func(q keys.QueryAnalysisResult) float64
192222

193223
func getMetricForHotness(metric string) getMetric {
@@ -269,12 +299,15 @@ func renderHotQueries(md *markdown.MarkDown, queries []keys.QueryAnalysisResult,
269299
}
270300
}
271301

272-
func renderTableUsage(tableSummaries []TableSummary, md *markdown.MarkDown, includeRowCount bool) {
302+
func renderTableUsage(tableSummaries []*TableSummary, md *markdown.MarkDown, includeRowCount bool) {
273303
if len(tableSummaries) == 0 {
274304
return
275305
}
276306

277307
sort.Slice(tableSummaries, func(i, j int) bool {
308+
if tableSummaries[i].UseCount() == tableSummaries[j].UseCount() {
309+
return tableSummaries[i].Table < tableSummaries[j].Table
310+
}
278311
return tableSummaries[i].UseCount() > tableSummaries[j].UseCount()
279312
})
280313

@@ -287,7 +320,7 @@ func renderTableUsage(tableSummaries []TableSummary, md *markdown.MarkDown, incl
287320
}
288321
}
289322

290-
func renderTableOverview(md *markdown.MarkDown, tableSummaries []TableSummary, includeRowCount bool) {
323+
func renderTableOverview(md *markdown.MarkDown, tableSummaries []*TableSummary, includeRowCount bool) {
291324
headers := []string{"Table Name", "Reads", "Writes"}
292325
if includeRowCount {
293326
headers = append(headers, "Number of Rows")
@@ -308,7 +341,7 @@ func renderTableOverview(md *markdown.MarkDown, tableSummaries []TableSummary, i
308341
md.PrintTable(headers, rows)
309342
}
310343

311-
func renderColumnUsageTable(md *markdown.MarkDown, summary TableSummary) {
344+
func renderColumnUsageTable(md *markdown.MarkDown, summary *TableSummary) {
312345
md.PrintHeader(fmt.Sprintf("Table: `%s` (%d reads and %d writes)", summary.Table, summary.ReadQueryCount, summary.WriteQueryCount), 4)
313346

314347
headers := []string{"Column", "Position", "Used %"}
@@ -427,7 +460,7 @@ func makeKey(lhs, rhs operators.Column) graphKey {
427460
return graphKey{rhs.Table, lhs.Table}
428461
}
429462

430-
func summarizeKeysQueries(summary *Summary, queries *keys.Output, metricReader getMetric, schemaInfo *schema.Info) {
463+
func summarizeKeysQueries(summary *Summary, queries *keys.Output, metricReader getMetric) {
431464
tableSummaries := make(map[string]*TableSummary)
432465
tableUsageWriteCounts := make(map[string]int)
433466
tableUsageReadCounts := make(map[string]int)
@@ -438,39 +471,31 @@ func summarizeKeysQueries(summary *Summary, queries *keys.Output, metricReader g
438471
checkQueryForHotness(&summary.hotQueries, query, metricReader)
439472
}
440473

441-
tableRows := make(map[string]int)
442-
if schemaInfo != nil {
443-
for _, ti := range schemaInfo.Tables {
444-
tableRows[ti.Name] = ti.Rows
445-
}
446-
}
447-
448474
// Second pass: calculate percentages
449475
for _, tblSummary := range tableSummaries {
450476
tblSummary.ReadQueryCount = tableUsageReadCounts[tblSummary.Table]
451477
tblSummary.WriteQueryCount = tableUsageWriteCounts[tblSummary.Table]
452478
count := tblSummary.ReadQueryCount + tblSummary.WriteQueryCount
453-
if schemaInfo != nil {
454-
if rowCount, ok := tableRows[tblSummary.Table]; ok {
455-
tblSummary.RowCount = rowCount
456-
}
457-
}
458479
countF := float64(count)
459-
for colName, usage := range tblSummary.Columns {
480+
for colName, usage := range tblSummary.ColumnUses {
460481
usage.Percentage = (float64(usage.Count) / countF) * 100
461-
tblSummary.Columns[colName] = usage
482+
tblSummary.ColumnUses[colName] = usage
462483
}
463484
}
464485

465486
// Convert map to slice
466-
result := make([]TableSummary, 0, len(tableSummaries))
467-
for _, summary := range tableSummaries {
468-
result = append(result, *summary)
487+
for _, tblSummary := range tableSummaries {
488+
table := summary.GetTable(tblSummary.Table)
489+
if table == nil {
490+
summary.AddTable(tblSummary)
491+
continue
492+
}
493+
table.ReadQueryCount = tblSummary.ReadQueryCount
494+
table.WriteQueryCount = tblSummary.WriteQueryCount
495+
if table.ColumnUses != nil {
496+
panic("ColumnUses already set for table" + tblSummary.Table)
497+
}
469498
}
470-
sort.Slice(result, func(i, j int) bool {
471-
return result[i].Table < result[j].Table
472-
})
473-
summary.tables = result
474499

475500
// Collect failed queries
476501
var failures []FailuresSummary
@@ -508,8 +533,8 @@ func gatherTableInfo(query keys.QueryAnalysisResult, tableSummaries map[string]*
508533
for _, table := range query.TableNames {
509534
if _, exists := tableSummaries[table]; !exists {
510535
tableSummaries[table] = &TableSummary{
511-
Table: table,
512-
Columns: make(map[ColumnInformation]ColumnUsage),
536+
Table: table,
537+
ColumnUses: make(map[ColumnInformation]ColumnUsage),
513538
}
514539
}
515540

@@ -536,9 +561,9 @@ func summarizeColumnUsage(tableSummary *TableSummary, query keys.QueryAnalysisRe
536561
columns = slices.Compact(columns)
537562

538563
for _, col := range columns {
539-
usage := tableSummary.Columns[col]
564+
usage := tableSummary.ColumnUses[col]
540565
usage.Count += query.UsageCount
541-
tableSummary.Columns[col] = usage
566+
tableSummary.ColumnUses[col] = usage
542567
}
543568
}
544569

‎go/summarize/summarize-keys_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func TestTableSummary(t *testing.T) {
4343
}
4444

4545
ts := TableSummary{
46-
Columns: map[ColumnInformation]ColumnUsage{
46+
ColumnUses: map[ColumnInformation]ColumnUsage{
4747
{Name: "l_shipmode", Pos: WhereRange}: {Percentage: 6},
4848
{Name: "l_receiptdate", Pos: WhereRange}: {Percentage: 28},
4949
{Name: "l_shipdate", Pos: WhereRange}: {Percentage: 22},

0 commit comments

Comments
 (0)
Please sign in to comment.