Skip to content

Commit c262c41

Browse files
authored
Merge pull request #94 from codefuse-ai/lhk_dev
[feat] improve inst combine pass, optimize generated soufflé code
2 parents 89bac38 + 127ea85 commit c262c41

File tree

8 files changed

+199
-39
lines changed

8 files changed

+199
-39
lines changed

godel-script/README.md

+9-13
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,8 @@ git reset HEAD~
7979
Use command below:
8080

8181
```bash
82-
mkdir build
83-
cd build
84-
cmake ..
85-
make -j
82+
mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release
83+
make -j6
8684
```
8785

8886
After building, you'll find `build/godel` in the `build` folder.
@@ -91,20 +89,18 @@ After building, you'll find `build/godel` in the `build` folder.
9189

9290
Use this command for help:
9391

94-
> ./build/godel -h
92+
> godel -h
9593
96-
### Compile Target Soufflé
94+
### Compile GödelScript to Target Soufflé
9795

98-
> ./build/godel -p {godel library directory} {input file} -s {soufflé output file} -Of
96+
> godel -p {godel library directory} {input file} -s {soufflé output file} -O2
9997
100-
`-Of` is an optimization for join order, we suggest to switch it on.
98+
We suggest to use `-O2` for stable optimizations.
10199

102-
### Directly Run Soufflé
100+
### Directly Run GödelScript
103101

104-
> ./build/godel -p {godel library directory} {input file} -r -Of -f {database directory}
102+
> godel -p {godel library directory} {input file} -r -O2 -f {database directory}
105103
106-
`-Of` is an optimization for join order, we suggest to switch it on.
104+
We suggest to use `-O2` for stable optimizations.
107105

108106
`-r` means directly run soufflé.
109-
110-
`-v` could be used for getting verbose info.

godel-script/godel-frontend/src/error/error.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -231,13 +231,13 @@ void error::warn_ignored_DO_schema(const std::unordered_set<std::string>& vec) {
231231
size_t ignored_count = 0;
232232
for(const auto& i : vec) {
233233
++ignored_count;
234-
if (ignored_count > 4) {
234+
if (ignored_count > 8) {
235235
break;
236236
}
237237
std::clog << reset << " " << i << "\n";
238238
}
239-
if (vec.size() > 4) {
240-
std::clog << reset << " ...(" << vec.size()-4 << ")\n";
239+
if (vec.size() > 8) {
240+
std::clog << reset << " ...(" << vec.size() - 8 << ")\n";
241241
}
242242
std::clog << std::endl;
243243
}

godel-script/godel-frontend/src/ir/inst_combine.cpp

+122-13
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ void inst_combine_pass::visit_store(lir::store* s) {
1212
//
1313
// (
1414
// ssa_temp_0 = a,
15-
// b = ssa_temp_1,
15+
// ssa_temp_1 = b,
1616
// call(ssa_temp_2, ssa_temp_0, ssa_temp_1)
1717
// )
1818
//
@@ -86,26 +86,67 @@ void inst_combine_pass::visit_compare(lir::compare* c) {
8686
}
8787
}
8888

89+
void inst_combine_pass::visit_call(lir::call* c) {
90+
if (c->get_func_kind() != lir::call::kind::key_cmp) {
91+
return;
92+
}
93+
if (c->get_function_name() != "key_eq") {
94+
return;
95+
}
96+
97+
const auto& left = c->get_arguments()[0];
98+
const auto& right = c->get_arguments()[1];
99+
100+
// record this case:
101+
//
102+
// a.key_eq(b.getParent())
103+
// -->
104+
// (
105+
// getParent(ssa_temp_0, b),
106+
// a = ssa_temp_0
107+
// )
108+
//
109+
// and optimize this case to:
110+
//
111+
// getParent(a, b)
112+
//
113+
if (left.kind==lir::inst_value_kind::variable &&
114+
right.kind==lir::inst_value_kind::variable) {
115+
variable_reference_graph[left.content].insert({right.content, c});
116+
variable_reference_graph[right.content].insert({left.content, c});
117+
}
118+
}
119+
89120
bool inst_combine_pass::run() {
90-
for(auto impl : ctx->rule_impls) {
91-
scan(impl);
92-
inst_elimination_worker().copy(impl);
121+
for (auto impl : ctx->rule_impls) {
122+
run_on_single_impl(impl);
93123
}
94-
for(auto impl : ctx->database_get_table) {
95-
scan(impl);
96-
inst_elimination_worker().copy(impl);
124+
for (auto impl : ctx->database_get_table) {
125+
run_on_single_impl(impl);
97126
}
98-
for(auto impl : ctx->schema_get_field) {
99-
scan(impl);
100-
inst_elimination_worker().copy(impl);
127+
for (auto impl : ctx->schema_get_field) {
128+
run_on_single_impl(impl);
101129
}
102-
for(auto impl : ctx->schema_data_constraint_impls) {
103-
scan(impl);
104-
inst_elimination_worker().copy(impl);
130+
for (auto impl : ctx->schema_data_constraint_impls) {
131+
run_on_single_impl(impl);
105132
}
106133
return true;
107134
}
108135

136+
void inst_combine_pass::run_on_single_impl(souffle_rule_impl* b) {
137+
auto worker = inst_elimination_worker();
138+
size_t pass_run_count = 0;
139+
const size_t max_pass_run_count = 16;
140+
scan(b);
141+
worker.copy(b);
142+
++ pass_run_count;
143+
while (worker.get_eliminated_count() && pass_run_count < max_pass_run_count) {
144+
scan(b);
145+
worker.copy(b);
146+
++ pass_run_count;
147+
}
148+
}
149+
109150
void inst_combine_pass::scan(souffle_rule_impl* b) {
110151
variable_reference_graph.clear();
111152
b->get_block()->accept(this);
@@ -265,6 +306,7 @@ void inst_elimination_worker::visit_block(lir::block* node) {
265306
for(auto i : node->get_content()) {
266307
// skip eliminated instruction
267308
if (i->get_flag_eliminated()) {
309+
++ eliminated_count;
268310
continue;
269311
}
270312

@@ -338,6 +380,8 @@ void inst_elimination_worker::visit_aggregator(lir::aggregator* node) {
338380
}
339381

340382
void inst_elimination_worker::copy(souffle_rule_impl* impl) {
383+
eliminated_count = 0;
384+
blk.clear();
341385
auto impl_blk = new lir::block(impl->get_block()->get_location());
342386

343387
blk.push_back(impl_blk);
@@ -354,4 +398,69 @@ void inst_elimination_worker::copy(souffle_rule_impl* impl) {
354398
delete impl_blk;
355399
}
356400

401+
void replace_find_call::visit_block(lir::block* node) {
402+
bool has_find_call = false;
403+
for (auto i : node->get_content()) {
404+
if (i->get_kind() != lir::inst_kind::inst_call) {
405+
continue;
406+
}
407+
auto call = reinterpret_cast<lir::call*>(i);
408+
if (call->get_func_kind() == lir::call::kind::find &&
409+
call->get_function_name() == "find") {
410+
has_find_call = true;
411+
break;
412+
}
413+
}
414+
415+
if (has_find_call) {
416+
std::vector<lir::inst*> new_content;
417+
for (auto i : node->get_content()) {
418+
if (i->get_kind() != lir::inst_kind::inst_call) {
419+
new_content.push_back(i);
420+
continue;
421+
}
422+
423+
auto call = reinterpret_cast<lir::call*>(i);
424+
if (call->get_func_kind() != lir::call::kind::find ||
425+
call->get_function_name() != "find") {
426+
new_content.push_back(i);
427+
continue;
428+
}
429+
430+
auto dst = call->get_return();
431+
auto arg0 = call->get_arguments()[0];
432+
auto arg1 = call->get_arguments()[1];
433+
auto new_block = new lir::block(call->get_location());
434+
new_block->set_use_comma();
435+
new_content.push_back(new_block);
436+
437+
new_block->add_new_content(new lir::store(arg0, dst, call->get_location()));
438+
new_block->add_new_content(new lir::store(arg1, arg0, call->get_location()));
439+
440+
delete i;
441+
}
442+
node->get_mutable_content().swap(new_content);
443+
} else {
444+
for (auto i : node->get_content()) {
445+
i->accept(this);
446+
}
447+
}
448+
}
449+
450+
bool replace_find_call::run() {
451+
for (auto impl : ctx->rule_impls) {
452+
impl->get_block()->accept(this);
453+
}
454+
for (auto impl : ctx->database_get_table) {
455+
impl->get_block()->accept(this);
456+
}
457+
for (auto impl : ctx->schema_get_field) {
458+
impl->get_block()->accept(this);
459+
}
460+
for (auto impl : ctx->schema_data_constraint_impls) {
461+
impl->get_block()->accept(this);
462+
}
463+
return true;
464+
}
465+
357466
}

godel-script/godel-frontend/src/ir/inst_combine.h

+18
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ class inst_combine_pass: public pass {
2020
private:
2121
void visit_store(lir::store*) override;
2222
void visit_compare(lir::compare*) override;
23+
void visit_call(lir::call*) override;
2324

2425
private:
2526
void scan(souffle_rule_impl*);
27+
void run_on_single_impl(souffle_rule_impl*);
2628

2729
public:
2830
inst_combine_pass(ir_context& c): pass(pass_kind::ps_inst_combine, c) {}
@@ -65,6 +67,7 @@ class combine_worker: public lir::inst_visitor {
6567
class inst_elimination_worker: public lir::inst_visitor {
6668
private:
6769
std::vector<lir::block*> blk;
70+
size_t eliminated_count = 0;
6871

6972
private:
7073
void visit_boolean(lir::boolean* node) override {
@@ -111,6 +114,21 @@ class inst_elimination_worker: public lir::inst_visitor {
111114

112115
public:
113116
void copy(souffle_rule_impl*);
117+
auto get_eliminated_count() const {
118+
return eliminated_count;
119+
}
120+
};
121+
122+
class replace_find_call: public pass {
123+
private:
124+
void visit_block(lir::block*) override;
125+
126+
public:
127+
replace_find_call(ir_context& c): pass(pass_kind::ps_replace_find_call, c) {}
128+
const char* get_name() const override {
129+
return "[Transform] Replace Find Call";
130+
}
131+
bool run() override;
114132
};
115133

116134
}

godel-script/godel-frontend/src/ir/ir_gen.h

+5
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,14 @@ class ir_gen: public ast_visitor {
226226
std::vector<lir::call*>&,
227227
bool);
228228
bool visit_for_stmt(for_stmt*) override;
229+
// adjust order of generated IR, to change the join order, make it running faster
230+
// for statement often uses a large set, so this optimization is useful in most cases
229231
void optimized_for_stmt_gen(for_stmt*);
230232
void unoptimized_for_stmt_gen(for_stmt*);
231233
bool visit_let_stmt(let_stmt*) override;
234+
// adjust order of generated IR, to change the join order, make it running faster
235+
// let statement often uses single value or a small set
236+
// so this optimization is not very useful, or even harmful
232237
void optimized_let_stmt_gen(let_stmt*);
233238
void unoptimized_let_stmt_gen(let_stmt*);
234239

godel-script/godel-frontend/src/ir/pass.h

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ enum class pass_kind {
1313
ps_remove_unused,
1414
ps_remove_unused_type,
1515
ps_inst_combine,
16+
ps_replace_find_call,
1617
ps_flatten_nested_block,
1718
ps_aggregator_inline_remark,
1819
ps_ungrounded_check,

godel-script/godel-frontend/src/ir/pass_manager.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void pass_manager::run(ir_context& ctx, const cli::configure& conf) {
2929
ordered_pass_list.push_back(new unused_type_alias_remove_pass(ctx));
3030
}
3131
if (!conf.count(cli::option::cli_disable_inst_combine)) {
32+
ordered_pass_list.push_back(new replace_find_call(ctx));
3233
ordered_pass_list.push_back(new inst_combine_pass(ctx));
3334
}
3435
ordered_pass_list.push_back(new flatten_nested_block(ctx));

0 commit comments

Comments
 (0)