Skip to content

Commit baa7138

Browse files
author
Mark Hale
committed
Add support n-ary unions.
1 parent 099955e commit baa7138

17 files changed

Lines changed: 437 additions & 37 deletions

strategy/src/main/java/com/msd/gin/halyard/algebra/AbstractExtendedQueryModelVisitor.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ public void meetOther(QueryModelNode node) throws X {
1212
meet((ExtendedTupleFunctionCall)node);
1313
} else if (node instanceof StarJoin) {
1414
meet((StarJoin)node);
15+
} else if (node instanceof NAryUnion) {
16+
meet((NAryUnion) node);
1517
} else {
1618
super.meetOther(node);
1719
}
@@ -29,4 +31,8 @@ public void meet(StarJoin node) throws X {
2931
meetNAryTupleOperator(node);
3032
}
3133

34+
public void meet(NAryUnion node) throws X {
35+
meetNAryTupleOperator(node);
36+
}
37+
3238
}

strategy/src/main/java/com/msd/gin/halyard/algebra/NAryTupleOperator.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ public int getArgCount() {
4040
return args.length;
4141
}
4242

43+
@Override
44+
public <X extends Exception> void visit(QueryModelVisitor<X> visitor) throws X {
45+
visitor.meetOther(this);
46+
}
47+
4348
@Override
4449
public <X extends Exception> void visitChildren(final QueryModelVisitor<X> visitor) throws X {
4550
for (TupleExpr arg : args) {
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package com.msd.gin.halyard.algebra;
2+
3+
import java.util.LinkedHashSet;
4+
import java.util.List;
5+
import java.util.Set;
6+
7+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
8+
9+
public class NAryUnion extends NAryTupleOperator {
10+
private static final long serialVersionUID = 3627321682402493645L;
11+
12+
public NAryUnion(List<TupleExpr> exprs) {
13+
assert exprs.size() > 1;
14+
setArgs(exprs);
15+
}
16+
17+
@Override
18+
public Set<String> getAssuredBindingNames() {
19+
Set<String> bindingNames = new LinkedHashSet<>(16);
20+
bindingNames.addAll(getArg(0).getAssuredBindingNames());
21+
for (int i=1; i<getArgCount(); i++) {
22+
bindingNames.retainAll(getArg(i).getAssuredBindingNames());
23+
}
24+
return bindingNames;
25+
}
26+
27+
@Override
28+
public boolean equals(Object other) {
29+
if (other instanceof NAryUnion) {
30+
NAryUnion o = (NAryUnion) other;
31+
return super.equals(other);
32+
}
33+
return false;
34+
}
35+
36+
@Override
37+
public int hashCode() {
38+
return super.hashCode() ^ "NAryUnion".hashCode();
39+
}
40+
41+
@Override
42+
public NAryUnion clone() {
43+
return (NAryUnion) super.clone();
44+
}
45+
}

strategy/src/main/java/com/msd/gin/halyard/algebra/StarJoin.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
2727
import org.eclipse.rdf4j.query.algebra.QueryModelVisitor;
2828
import org.eclipse.rdf4j.query.algebra.StatementPattern;
29+
import org.eclipse.rdf4j.query.algebra.StatementPattern.Scope;
2930
import org.eclipse.rdf4j.query.algebra.TupleExpr;
3031
import org.eclipse.rdf4j.query.algebra.Var;
31-
import org.eclipse.rdf4j.query.algebra.StatementPattern.Scope;
3232
import org.eclipse.rdf4j.query.algebra.helpers.collectors.StatementPatternCollector;
3333

3434
/**
@@ -93,11 +93,6 @@ public <L extends Collection<Var>> L getVars(L varCollection) {
9393
return varCollection;
9494
}
9595

96-
@Override
97-
public <X extends Exception> void visit(QueryModelVisitor<X> visitor) throws X {
98-
visitor.meetOther(this);
99-
}
100-
10196
@Override
10297
public <X extends Exception> void visitChildren(final QueryModelVisitor<X> visitor) throws X {
10398
commonVar.visit(visitor);

strategy/src/main/java/com/msd/gin/halyard/optimizers/HalyardFilterOptimizer.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package com.msd.gin.halyard.optimizers;
1818

1919
import com.msd.gin.halyard.algebra.AbstractExtendedQueryModelVisitor;
20+
import com.msd.gin.halyard.algebra.NAryUnion;
2021
import com.msd.gin.halyard.algebra.SkipVarsQueryModelVisitor;
2122
import com.msd.gin.halyard.algebra.StarJoin;
2223

@@ -217,6 +218,27 @@ public void meet(Union union) {
217218
FilterRelocator.optimize(clone);
218219
}
219220

221+
// Halyard
222+
@Override
223+
public void meet(NAryUnion union) {
224+
int n = union.getArgCount();
225+
Filter[] clones = new Filter[n];
226+
clones[0] = filter;
227+
for (int i=1; i<n; i++) {
228+
Filter clone = new Filter();
229+
clone.setCondition(filter.getCondition().clone());
230+
clones[i] = clone;
231+
}
232+
233+
for (int i=0; i<n; i++) {
234+
relocate(clones[i], union.getArg(i));
235+
}
236+
237+
for (int i=0; i<n; i++) {
238+
FilterRelocator.optimize(clones[i]);
239+
}
240+
}
241+
220242
@Override
221243
public void meet(Difference node) {
222244
Filter clone = new Filter();

strategy/src/main/java/com/msd/gin/halyard/optimizers/HalyardIterativeEvaluationOptimizer.java

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
package com.msd.gin.halyard.optimizers;
22

3+
import com.msd.gin.halyard.algebra.NAryUnion;
34
import com.msd.gin.halyard.algebra.SkipVarsQueryModelVisitor;
45

6+
import java.util.ArrayList;
7+
import java.util.List;
8+
59
import org.eclipse.rdf4j.query.BindingSet;
610
import org.eclipse.rdf4j.query.Dataset;
11+
import org.eclipse.rdf4j.query.algebra.AbstractQueryModelNode;
712
import org.eclipse.rdf4j.query.algebra.Join;
813
import org.eclipse.rdf4j.query.algebra.TupleExpr;
914
import org.eclipse.rdf4j.query.algebra.Union;
@@ -24,36 +29,85 @@ public void meet(Union union) {
2429

2530
TupleExpr leftArg = union.getLeftArg();
2631
TupleExpr rightArg = union.getRightArg();
27-
28-
if (leftArg instanceof Join && rightArg instanceof Join) {
32+
if ((leftArg instanceof Join) && (rightArg instanceof Join)) {
2933
Join leftJoinArg = (Join) leftArg;
30-
Join rightJoin = (Join) rightArg;
31-
TupleExpr leftSide = leftJoinArg.getRightArg();
32-
TupleExpr rightSide = rightJoin.getRightArg();
34+
Join rightJoinArg = (Join) rightArg;
35+
TupleExpr newLeftSide = leftJoinArg.getRightArg();
36+
TupleExpr newRightSide = rightJoinArg.getRightArg();
37+
if (!TupleExprs.isVariableScopeChange(newLeftSide) && !TupleExprs.isVariableScopeChange(newRightSide)) {
38+
TupleExpr commonFactor = leftJoinArg.getLeftArg();
39+
if (commonFactor.equals(rightJoinArg.getLeftArg())) {
40+
// factor out the left-most join argument
41+
Join newJoin = new Join();
42+
union.replaceWith(newJoin);
43+
newJoin.setLeftArg(commonFactor);
44+
newJoin.setRightArg(union);
45+
union.setLeftArg(newLeftSide);
46+
union.setRightArg(newRightSide);
47+
48+
adjustVariableScopes(union, newJoin);
49+
reestimateResultSizes(union, commonFactor, newJoin);
50+
51+
union.visit(this);
52+
}
53+
}
54+
}
55+
}
56+
57+
private void adjustVariableScopes(AbstractQueryModelNode union, Join newJoin) {
58+
if (union.isVariableScopeChange()) {
59+
newJoin.setVariableScopeChange(true);
60+
union.setVariableScopeChange(false);
61+
}
62+
}
63+
64+
private void reestimateResultSizes(AbstractQueryModelNode union, TupleExpr commonFactor, Join newJoin) {
65+
double commonEstimate = commonFactor.getResultSizeEstimate();
66+
double unionEstimate = union.getResultSizeEstimate();
67+
if (commonEstimate > 0.0 && unionEstimate >= 0.0) {
68+
union.setResultSizeEstimate(unionEstimate/commonEstimate);
69+
newJoin.setResultSizeEstimate(unionEstimate);
70+
} else {
71+
union.setResultSizeEstimate(-1);
72+
}
73+
}
74+
75+
@Override
76+
public void meet(NAryUnion union) {
77+
super.meet(union);
78+
79+
TupleExpr firstArg = union.getArg(0);
80+
if (firstArg instanceof Join) {
81+
Join firstJoin = (Join) firstArg;
82+
TupleExpr commonFactor = firstJoin.getLeftArg();
83+
TupleExpr newLeftSide = firstJoin.getRightArg();
84+
if (!TupleExprs.isVariableScopeChange(newLeftSide)) {
85+
int n = union.getArgCount();
86+
List<TupleExpr> newArgs = new ArrayList<>(n);
87+
newArgs.add(newLeftSide);
88+
for (int i=1; i<n; i++) {
89+
TupleExpr arg = union.getArg(i);
90+
if (!(arg instanceof Join)) {
91+
return;
92+
}
93+
Join join = (Join) arg;
94+
TupleExpr newArg = join.getRightArg();
95+
if (commonFactor.equals(join.getLeftArg()) && !TupleExprs.isVariableScopeChange(newArg)) {
96+
newArgs.add(newArg);
97+
} else {
98+
return;
99+
}
100+
}
33101

34-
if (leftJoinArg.getLeftArg().equals(rightJoin.getLeftArg()) && !(TupleExprs.isVariableScopeChange(leftSide) && TupleExprs.isVariableScopeChange(rightSide))) {
35102
// factor out the left-most join argument
36-
TupleExpr commonFactor = leftJoinArg.getLeftArg();
37103
Join newJoin = new Join();
38104
union.replaceWith(newJoin);
39105
newJoin.setLeftArg(commonFactor);
40106
newJoin.setRightArg(union);
41-
union.setLeftArg(leftSide);
42-
union.setRightArg(rightSide);
43-
// Halyard
44-
if (union.isVariableScopeChange()) {
45-
newJoin.setVariableScopeChange(true);
46-
union.setVariableScopeChange(false);
47-
}
48-
// re-estimate
49-
double commonEstimate = commonFactor.getResultSizeEstimate();
50-
double unionEstimate = union.getResultSizeEstimate();
51-
if (commonEstimate > 0.0 && unionEstimate >= 0.0) {
52-
union.setResultSizeEstimate(unionEstimate/commonEstimate);
53-
newJoin.setResultSizeEstimate(unionEstimate);
54-
} else {
55-
union.setResultSizeEstimate(-1);
56-
}
107+
union.setArgs(newArgs);
108+
109+
adjustVariableScopes(union, newJoin);
110+
reestimateResultSizes(union, commonFactor, newJoin);
57111

58112
union.visit(this);
59113
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package com.msd.gin.halyard.optimizers;
2+
3+
import com.msd.gin.halyard.algebra.AbstractExtendedQueryModelVisitor;
4+
import com.msd.gin.halyard.algebra.NAryUnion;
5+
import com.msd.gin.halyard.algebra.SkipVarsQueryModelVisitor;
6+
7+
import java.util.ArrayList;
8+
import java.util.Collections;
9+
import java.util.List;
10+
11+
import org.eclipse.rdf4j.common.exception.RDF4JException;
12+
import org.eclipse.rdf4j.query.BindingSet;
13+
import org.eclipse.rdf4j.query.Dataset;
14+
import org.eclipse.rdf4j.query.algebra.QueryModelVisitor;
15+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
16+
import org.eclipse.rdf4j.query.algebra.Union;
17+
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
18+
import org.eclipse.rdf4j.query.algebra.helpers.TupleExprs;
19+
20+
public class NAryUnionOptimizer implements QueryOptimizer {
21+
private final int minUnions;
22+
23+
public NAryUnionOptimizer(int minJoins) {
24+
if (minJoins < 1) {
25+
throw new IllegalArgumentException("Minimum unions must be greater than or equal to one");
26+
}
27+
this.minUnions = minJoins;
28+
}
29+
30+
@Override
31+
public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) {
32+
tupleExpr.visit(new NAryUnionJoinFinder());
33+
}
34+
35+
final class NAryUnionJoinFinder extends SkipVarsQueryModelVisitor<RDF4JException> {
36+
@Override
37+
public void meet(Union node) throws RDF4JException {
38+
UnionCollector<RDF4JException> collector = new UnionCollector<>(this);
39+
node.visit(collector);
40+
if (!collector.getExpressions().isEmpty()) {
41+
processUnions(node, collector.getExpressions());
42+
}
43+
}
44+
45+
private void processUnions(Union top, List<TupleExpr> exprs) {
46+
if (exprs.size() > minUnions) {
47+
NAryUnion nunion = new NAryUnion(exprs);
48+
nunion.setVariableScopeChange(top.isVariableScopeChange());
49+
top.replaceWith(nunion);
50+
}
51+
}
52+
}
53+
54+
static final class UnionCollector<X extends Exception> extends AbstractExtendedQueryModelVisitor<X> {
55+
private final QueryModelVisitor<X> visitor;
56+
private List<TupleExpr> exprs;
57+
58+
UnionCollector(QueryModelVisitor<X> visitor) {
59+
this.visitor = visitor;
60+
}
61+
62+
List<TupleExpr> getExpressions() {
63+
return (exprs != null) ? exprs : Collections.emptyList();
64+
}
65+
66+
void addExpression(TupleExpr expr) throws X {
67+
if (exprs == null) {
68+
exprs = new ArrayList<>();
69+
}
70+
exprs.add(expr);
71+
// resume previous visitor
72+
expr.visit(visitor);
73+
}
74+
75+
@Override
76+
public void meet(Union node) throws X {
77+
TupleExpr left = node.getLeftArg();
78+
TupleExpr right = node.getRightArg();
79+
if (!TupleExprs.isVariableScopeChange(left)) {
80+
if (left instanceof Union) {
81+
left.visit(this);
82+
} else {
83+
addExpression(left);
84+
}
85+
}
86+
if (!TupleExprs.isVariableScopeChange(right)) {
87+
if (right instanceof Union) {
88+
right.visit(this);
89+
} else {
90+
addExpression(right);
91+
}
92+
}
93+
}
94+
}
95+
}

strategy/src/main/java/com/msd/gin/halyard/strategy/HalyardQueryOptimizerPipeline.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.msd.gin.halyard.optimizers.HalyardEvaluationStatistics;
2323
import com.msd.gin.halyard.optimizers.HalyardQueryJoinOptimizer;
2424
import com.msd.gin.halyard.optimizers.JoinAlgorithmOptimizer;
25+
import com.msd.gin.halyard.optimizers.NAryUnionOptimizer;
2526
import com.msd.gin.halyard.optimizers.QueryJoinOptimizer;
2627
import com.msd.gin.halyard.optimizers.StarJoinOptimizer;
2728

@@ -43,6 +44,7 @@ public final class HalyardQueryOptimizerPipeline implements QueryOptimizerPipeli
4344
private final EvaluationStrategy strategy;
4445
private final ValueFactory valueFactory;
4546
private final StarJoinOptimizer starJoinOptimizer;
47+
private final NAryUnionOptimizer naryUnionOptimizer;
4648
private final JoinAlgorithmOptimizer joinAlgoOptimizer;
4749

4850
public HalyardQueryOptimizerPipeline(HalyardEvaluationStrategy strategy, ValueFactory valueFactory, ExtendedEvaluationStatistics statistics) {
@@ -51,6 +53,8 @@ public HalyardQueryOptimizerPipeline(HalyardEvaluationStrategy strategy, ValueFa
5153
this.statistics = statistics;
5254
int minJoins = strategy.getConfig().starJoinMinJoins;
5355
this.starJoinOptimizer = new StarJoinOptimizer(minJoins);
56+
int minUnions = strategy.getConfig().naryUnionMinUnions;
57+
this.naryUnionOptimizer = new NAryUnionOptimizer(minUnions);
5458
int hashJoinLimit = strategy.getConfig().hashJoinLimit;
5559
float costRatio = strategy.getConfig().hashJoinCostRatio;
5660
this.joinAlgoOptimizer = new JoinAlgorithmOptimizer(statistics, hashJoinLimit, costRatio);
@@ -77,6 +81,7 @@ public Iterable<QueryOptimizer> getOptimizers() {
7781
new ConstrainedValueOptimizer(),
7882
starJoinOptimizer,
7983
(statistics instanceof HalyardEvaluationStatistics) ? new HalyardQueryJoinOptimizer((HalyardEvaluationStatistics) statistics) : new QueryJoinOptimizer(statistics),
84+
naryUnionOptimizer,
8085
ExtendedQueryOptimizerPipeline.ITERATIVE_EVALUATION_OPTIMIZER,
8186
ExtendedQueryOptimizerPipeline.FILTER_OPTIMIZER, // after join optimizer so we push down on the best statements
8287
StandardQueryOptimizerPipeline.ORDER_LIMIT_OPTIMIZER,

0 commit comments

Comments
 (0)