forked from apache/impala
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsemi-joins.test
More file actions
535 lines (534 loc) · 12.4 KB
/
semi-joins.test
File metadata and controls
535 lines (534 loc) · 12.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
====
---- QUERY
# Verification that the setup of SEMIJOIN tables was correct. (SemiJoinTblA)
SELECT * FROM SemiJoinTblA
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Verification that the setup of SEMIJOIN tables was correct. (SemiJoinTblB)
SELECT * FROM SemiJoinTblB
---- RESULTS
1,1,1
1,1,10
1,2,5
1,NULL,10
2,10,NULL
3,NULL,NULL
3,NULL,50
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing left anti join
select j.* from functional_parquet.JoinTbl j
left anti join functional_parquet.DimTbl d
on j.test_id = d.id
---- RESULTS
1106,'Name6',94612,5000
1106,'Name16',94612,5000
1106,'Name6',94616,5000
1106,'Name16',94616,5000
1106,'Name6',94612,15000
1106,'Name16',94612,15000
1106,'Name6',94616,15000
1106,'Name16',94616,15000
---- TYPES
bigint, string, int, int
====
---- QUERY
# Testing left anti join on string column
select j.* from functional_parquet.JoinTbl j
left anti join functional_parquet.DimTbl d
on j.test_name = d.name
---- RESULTS
1006,'Name16',94612,5000
1106,'Name16',94612,5000
1006,'Name16',94616,5000
1106,'Name16',94616,5000
1006,'Name16',94612,15000
1106,'Name16',94612,15000
1006,'Name16',94616,15000
1106,'Name16',94616,15000
---- TYPES
bigint, string, int, int
====
---- QUERY
# Testing multi-way joins that include a left anti join
select count(*) from functional_parquet.JoinTbl j
left anti join functional_parquet.DimTbl d
on j.test_id = d.id
left outer join functional_parquet.JoinTbl k
on j.test_id = k.test_id
---- RESULTS
64
---- TYPES
BIGINT
====
---- QUERY
# Testing multi-way joins that include only left anti joins
select count(*) from functional_parquet.JoinTbl j
left anti join functional_parquet.DimTbl d
on j.test_id = d.id
left anti join functional_parquet.JoinTbl k
on j.test_id = k.test_id
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Regression test for IMPALA-1160. Proper handling of left anti joins with NULLs
SELECT a.* FROM SemiJoinTblA a
LEFT ANTI JOIN SemiJoinTblB b ON a.b = b.b
---- RESULTS
2,4,30
1,3,10
2,NULL,20
NULL,NULL,30
---- TYPES
INT, INT, INT
====
---- QUERY
# Regression test for IMPALA-1175: Anti join query crashes Impala.
SELECT * FROM SemiJoinTblA A LEFT ANTI JOIN
(SELECT count(*) `$c$2`, B.b `$c$1` FROM SemiJoinTblB B GROUP BY B.b) `$a$1`
ON A.a != `$a$1`.`$c$2` AND `$a$1`.`$c$1` = A.b
---- RESULTS
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Regression tests for IMPALA-1177: Incorrect results in query with ANTI JOIN on tinyint
# column with nulls.
SELECT COUNT(*) FROM functional_parquet.alltypesagg t1
LEFT ANTI JOIN functional_parquet.alltypes t2
ON t2.tinyint_col = t1.tinyint_col
---- RESULTS
2000
---- TYPES
BIGINT
====
---- QUERY
SELECT COUNT(*) FROM functional_parquet.alltypesagg t1
LEFT ANTI JOIN functional_parquet.alltypes t2
ON t2.tinyint_col = t1.tinyint_col and t1.day = 1
---- RESULTS
10100
---- TYPES
BIGINT
====
---- QUERY
SELECT COUNT(*) FROM functional_parquet.alltypesagg t1
LEFT ANTI JOIN functional_parquet.alltypes t2
ON t2.tinyint_col = t1.tinyint_col and t2.month = 1
---- RESULTS
2000
====
---- QUERY
# Regression test for IMPALA-1204: ANTI JOIN crash running complicated query with right
# joins.
SELECT 1 FROM functional_parquet.alltypestiny t1
INNER JOIN functional_parquet.alltypestiny t2
ON t2.bigint_col = t1.tinyint_col AND t2.tinyint_col = t1.id
LEFT ANTI JOIN
(SELECT 1 `$c$2`, tt6.id `$c$1`
FROM functional_parquet.alltypes tt5
RIGHT OUTER JOIN functional_parquet.alltypestiny tt6
ON tt6.month = tt5.bigint_col) `$a$1` ON t2.int_col = `$a$1`.`$c$1`
---- RESULTS
---- TYPES
TINYINT
====
---- QUERY
# left semi-join on bigint
select d.* from functional_parquet.DimTbl d
left semi join functional_parquet.JoinTbl j
on (d.id = j.test_id)
---- RESULTS
1001,'Name1',94611
1002,'Name2',94611
1003,'Name3',94612
1004,'Name4',94612
1005,'Name5',94613
1006,'Name6',94613
---- TYPES
bigint, string, int
====
---- QUERY
# left semi-join on string
select d.* from functional_parquet.DimTbl d
left semi join functional_parquet.JoinTbl j
on (j.test_name = d.name)
---- RESULTS
1001,'Name1',94611
1002,'Name2',94611
1003,'Name3',94612
1004,'Name4',94612
1005,'Name5',94613
1006,'Name6',94613
---- TYPES
bigint, string, int
====
---- QUERY
# left semi-join on int
select d.* from functional_parquet.DimTbl d
left semi join functional_parquet.JoinTbl j
on (j.test_zip = d.zip)
---- RESULTS
1001,'Name1',94611
1002,'Name2',94611
1003,'Name3',94612
1004,'Name4',94612
---- TYPES
bigint, string, int
====
---- QUERY
# Regression test for IMPALA-1249. Left anti join on empty build side.
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN
(SELECT b.* FROM SemiJoinTblB b WHERE b.a > 10) v ON a.b = v.b
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing right semi join
select b.* FROM SemiJoinTblA a
right semi join SemiJoinTblB b on a.b = b.b
---- RESULTS
1,1,10
1,1,1
1,2,5
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing right semi join with duplicates
SELECT b.int_col FROM functional_parquet.tinyinttable a
RIGHT SEMI JOIN functional_parquet.tinyinttable b
ON a.int_col % 2 = b.int_col % 2
---- RESULTS
0
1
2
3
4
5
6
7
8
9
---- TYPES
INT
====
---- QUERY
# Testing right semi join with duplicates and other conjuncts
SELECT b.int_col FROM functional_parquet.tinyinttable a
RIGHT SEMI JOIN functional_parquet.tinyinttable b
ON a.int_col % 2 = b.int_col % 2 AND a.int_col + b.int_col > 9
---- RESULTS
1
2
3
4
5
6
7
8
9
---- TYPES
INT
====
---- QUERY
# Testing right anti joins
select b.* FROM SemiJoinTblA a
right anti join SemiJoinTblB b on a.b = b.b
---- RESULTS
2,10,NULL
1,NULL,10
3,NULL,50
3,NULL,NULL
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing right anti join with duplicates and other conjuncts
SELECT b.int_col FROM functional_parquet.tinyinttable a
RIGHT ANTI JOIN functional_parquet.tinyinttable b
ON a.int_col % 2 = b.int_col % 2 AND a.int_col + b.int_col > 9
---- RESULTS
0
---- TYPES
INT
====
---- QUERY
# Anti joins have a uni-directional value transfer (IMPALA-1249).
select a.* FROM SemiJoinTblA a
left anti join
(select * from SemiJoinTblB where b <= 3) b
on a.b = b.b
---- RESULTS
1,3,10
2,4,30
2,NULL,20
NULL,NULL,30
---- TYPES
INT, INT, INT
====
---- QUERY
# Regression test for IMPALA-1351: PHJ::GetNext() returns -1 in NAAJ w/o error.
# NOTE: Commented out because it takes a few hours to run on a single node.
# TODO: Casey will try to simplify, also consider moving to semi-joins-exhaustive.
#SELECT COUNT(t2.month) AS int_col_1
#FROM alltypesagg t1 CROSS
#JOIN alltypesagg t2 FULL
#OUTER JOIN alltypestiny t3 ON t3.year = t2.int_col
#AND t3.int_col = t2.year
#WHERE t1.day NOT IN
# (SELECT tt1.year AS int_col_1
# FROM alltypes tt1
# RIGHT JOIN alltypes tt2 ON tt2.month = tt1.year
# WHERE t3.tinyint_col = tt2.tinyint_col
# AND t3.bigint_col = tt2.bigint_col
# AND t3.int_col = tt1.id)
#AND t1.day NOT IN
# (SELECT ttt1.tinyint_col AS tinyint_col_1
# FROM alltypestiny ttt1
# WHERE t1.tinyint_col = ttt1.month
# AND ttt1.month IS NULL
# AND ttt1.tinyint_col NOT IN
# (SELECT tttt2.int_col AS int_col_1
# FROM alltypesagg tttt1
# RIGHT JOIN alltypes tttt2 ON tttt2.id = tttt1.year)
# AND ttt1.month NOT IN
# (SELECT MIN(tttt2.year + tttt1.smallint_col * tttt1.int_col - tttt2.bigint_col) +
# MAX(tttt2.tinyint_col) AS bigint_col_1
# FROM alltypestiny tttt1
# INNER JOIN alltypestiny tttt2 ON tttt2.month = tttt1.smallint_col)
# AND ttt1.month IS NOT NULL
# AND t2.smallint_col = ttt1.bigint_col)
#---- RESULTS
#121000000
#---- TYPES
#BIGINT
#====
# Testing right anti join with empty probe side.
SELECT b.* FROM (SELECT a.* from SemiJoinTblA a where a.a > 10) v
RIGHT ANTI JOIN SemiJoinTblB b on v.b = b.b
---- RESULTS
1,1,1
1,1,10
1,2,5
1,NULL,10
2,10,NULL
3,NULL,NULL
3,NULL,50
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON FALSE should return all rows from left table
# because no rows from right table can match.
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing RIGHT ANTI JOIN with ON FALSE should return all rows from right table
# because no rows from left table can match.
SELECT b.* FROM SemiJoinTblA a RIGHT ANTI JOIN SemiJoinTblB b ON false
---- RESULTS
1,1,1
1,1,10
1,2,5
1,NULL,10
2,10,NULL
3,NULL,NULL
3,NULL,50
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON TRUE should return empty result set
# because all rows from right table match.
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON true
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing RIGHT ANTI JOIN with ON TRUE should return empty result set
# because all rows from left table match.
SELECT b.* FROM SemiJoinTblA a RIGHT ANTI JOIN SemiJoinTblB b ON true
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with constant expression 1=0 (equivalent to FALSE)
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON 1=0
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with constant expression 1=1 (equivalent to TRUE)
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON 1=1
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON FALSE and WHERE clause
# Should return rows from left table that satisfy WHERE condition
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false
WHERE a.a = 1
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT ANTI JOIN with ON TRUE and WHERE clause
# Should return empty result (anti-join filters all, then WHERE is applied)
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON true
WHERE a.a = 1
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT SEMI JOIN with ON FALSE should return empty result
# because no rows from right table can match.
SELECT a.* FROM SemiJoinTblA a LEFT SEMI JOIN SemiJoinTblB b ON false
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing LEFT SEMI JOIN with ON TRUE should return all rows from left table
SELECT a.* FROM SemiJoinTblA a LEFT SEMI JOIN SemiJoinTblB b ON true
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
NULL,NULL,30
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing nested query with LEFT ANTI JOIN ON FALSE
SELECT * FROM (
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON false
) v WHERE v.a = 2
---- RESULTS
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing multiple anti-joins with constant predicates
SELECT a.* FROM SemiJoinTblA a
LEFT ANTI JOIN SemiJoinTblB b ON false
LEFT ANTI JOIN SemiJoinTblB c ON true
---- RESULTS
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with mixed constant and regular predicates (ON FALSE OR a.a = b.a)
# The OR with FALSE should be optimized away, leaving just a.a = b.a
# Only rows with NULL in column a are returned because NULL doesn't match anything
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON (false OR a.a = b.a)
---- RESULTS
NULL,NULL,30
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with mixed constant and regular predicates (ON TRUE AND a.a = b.a)
# The AND with TRUE should be optimized away, leaving just a.a = b.a
# Only rows with NULL in column a are returned because NULL doesn't match anything
SELECT a.* FROM SemiJoinTblA a LEFT ANTI JOIN SemiJoinTblB b ON (true AND a.a = b.a)
---- RESULTS
NULL,NULL,30
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with inline view on the left side and ON FALSE
# The inline view should not be marked as having an empty result set
SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 1) v
LEFT ANTI JOIN SemiJoinTblB b ON false
---- RESULTS
1,1,1
1,1,10
1,2,10
1,3,10
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing nested inline views with anti-join and ON FALSE
# Verifies that constant predicates don't propagate incorrectly through nested views
SELECT * FROM (
SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 2) v
LEFT ANTI JOIN SemiJoinTblB b ON false
) v2 WHERE v2.a = 2
---- RESULTS
2,4,30
2,NULL,20
---- TYPES
INT, INT, INT
====
---- QUERY
# Testing anti-join with inline view and mixed predicates
# Constant FALSE should not be migrated into the inline view
SELECT v.* FROM (SELECT * FROM SemiJoinTblA WHERE a = 1) v
LEFT ANTI JOIN SemiJoinTblB b ON (false OR v.a = b.a)
---- RESULTS
---- TYPES
INT, INT, INT
====