Skip to content

Commit 2ba0f86

Browse files
committed
Add testing for partitioned BZ2 tables in Hive
In the past, partitioned BZ2 tables in Hive failed. The error now is not reproducable.
1 parent 2a8da5e commit 2ba0f86

File tree

3 files changed

+75
-6
lines changed

3 files changed

+75
-6
lines changed

presto-product-tests/src/main/java/com/facebook/presto/tests/TestGroups.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public final class TestGroups
5252
public static final String AUTHORIZATION = "authorization";
5353
public static final String POST_HIVE_1_0_1 = "post_hive_1_0_1";
5454
public static final String PREPARED_STATEMENTS = "prepared_statements";
55+
public static final String BIG_QUERY = "big_query";
5556

5657
private TestGroups() {}
5758
}

presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveStorageFormats.java

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,27 @@
1313
*/
1414
package com.facebook.presto.tests.hive;
1515

16+
import com.facebook.presto.tests.ImmutableTpchTablesRequirements.ImmutableLineItemTable;
1617
import com.google.common.base.MoreObjects;
1718
import com.google.common.base.Throwables;
1819
import com.google.common.collect.ImmutableMap;
1920
import com.teradata.tempto.ProductTest;
21+
import com.teradata.tempto.Requires;
2022
import com.teradata.tempto.assertions.QueryAssert.Row;
2123
import com.teradata.tempto.query.QueryResult;
2224
import org.testng.annotations.DataProvider;
2325
import org.testng.annotations.Test;
2426

2527
import java.sql.Connection;
2628
import java.sql.SQLException;
29+
import java.sql.Statement;
2730
import java.util.List;
2831
import java.util.Map;
2932

33+
import static com.facebook.presto.tests.TestGroups.BIG_QUERY;
3034
import static com.facebook.presto.tests.TestGroups.STORAGE_FORMATS;
3135
import static com.facebook.presto.tests.utils.JdbcDriverUtils.setSessionProperty;
36+
import static com.facebook.presto.tests.utils.QueryExecutors.onHive;
3237
import static com.facebook.presto.util.ImmutableCollectors.toImmutableList;
3338
import static com.teradata.tempto.assertions.QueryAssert.Row.row;
3439
import static com.teradata.tempto.assertions.QueryAssert.assertThat;
@@ -41,6 +46,7 @@ public class TestHiveStorageFormats
4146
extends ProductTest
4247
{
4348
private static final String TPCH_SCHEMA = "tiny";
49+
private static final String TEST_TPCH_LINIETEM = "tpch." + TPCH_SCHEMA + ".lineitem";
4450

4551
@DataProvider(name = "storage_formats")
4652
public static Object[][] storageFormats()
@@ -97,7 +103,7 @@ public void testInsertIntoTable(StorageFormat storageFormat)
97103
query(insertInto);
98104

99105
// SELECT FROM TABLE
100-
assertSelect("select sum(tax), sum(discount), sum(linenumber) from %s", tableName);
106+
assertSelect("select sum(tax), sum(discount), sum(linenumber) from %s", tableName, TEST_TPCH_LINIETEM);
101107

102108
// DROP TABLE
103109
query(format("DROP TABLE %s", tableName));
@@ -125,7 +131,7 @@ public void testCreateTableAs(StorageFormat storageFormat)
125131
query(createTableAsSelect);
126132

127133
// SELECT FROM TABLE
128-
assertSelect("select sum(extendedprice), sum(suppkey), count(partkey) from %s", tableName);
134+
assertSelect("select sum(extendedprice), sum(suppkey), count(partkey) from %s", tableName, TEST_TPCH_LINIETEM);
129135

130136
// DROP TABLE
131137
query(format("DROP TABLE %s", tableName));
@@ -171,7 +177,7 @@ public void testInsertIntoPartitionedTable(StorageFormat storageFormat)
171177
query(insertInto);
172178

173179
// SELECT FROM TABLE
174-
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName);
180+
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName, TEST_TPCH_LINIETEM);
175181

176182
// DROP TABLE
177183
query(format("DROP TABLE %s", tableName));
@@ -199,15 +205,72 @@ public void testCreatePartitionedTableAs(StorageFormat storageFormat)
199205
query(createTableAsSelect);
200206

201207
// SELECT FROM TABLE
202-
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName);
208+
assertSelect("select sum(tax), sum(discount), sum(length(returnflag)) from %s", tableName, TEST_TPCH_LINIETEM);
203209

204210
// DROP TABLE
205211
query(format("DROP TABLE %s", tableName));
206212
}
207213

208-
private static void assertSelect(String query, String tableName)
214+
@Requires(ImmutableLineItemTable.class)
215+
@Test(groups = {STORAGE_FORMATS, BIG_QUERY})
216+
public void testSelectFromPartitionedBzipTable() throws Exception
209217
{
210-
QueryResult expected = query(format(query, "tpch." + TPCH_SCHEMA + ".lineitem"));
218+
// This test is marked as "big_query" because INSERT OVERWRITE TABLE is very slow, but that
219+
// is the only way to get bzip tables in Hive.
220+
221+
String tableName = "storage_formats_test_select_partitioned_bzip";
222+
query(format("DROP TABLE IF EXISTS %s", tableName));
223+
224+
// The BZIP part of the table comes from the configs that are set during insert
225+
String createTable = format(
226+
"CREATE TABLE %s(" +
227+
" l_orderkey BIGINT," +
228+
" l_partkey BIGINT," +
229+
" l_suppkey BIGINT," +
230+
" l_linenumber INT," +
231+
" l_quantity DOUBLE," +
232+
" l_extendedprice DOUBLE," +
233+
" l_discount DOUBLE," +
234+
" l_tax DOUBLE," +
235+
" l_linestatus VARCHAR(1)," +
236+
" l_shipinstruct VARCHAR(25)," +
237+
" l_shipmode VARCHAR(10)," +
238+
" l_comment VARCHAR(44)" +
239+
") PARTITIONED BY (l_returnflag VARCHAR(1)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE",
240+
tableName);
241+
onHive().executeQuery(createTable);
242+
243+
try {
244+
String insertInto = format(
245+
"INSERT OVERWRITE TABLE %s PARTITION(l_returnflag) " +
246+
"SELECT l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, " +
247+
"l_linestatus, l_shipinstruct, l_shipmode, l_comment, l_returnflag " +
248+
"FROM default.lineitem", tableName);
249+
Statement statement = onHive().getConnection().createStatement();
250+
setHiveConfigsForBzipInsert(statement);
251+
statement.execute(insertInto);
252+
statement.close();
253+
254+
assertSelect("select sum(l_tax), sum(l_discount), sum(length(l_returnflag)) from %s", tableName, "hive.default.lineitem");
255+
}
256+
finally {
257+
query(format("DROP TABLE %s", tableName));
258+
}
259+
}
260+
261+
private void setHiveConfigsForBzipInsert(Statement statement)
262+
throws SQLException
263+
{
264+
statement.execute("SET hive.exec.compress.output=true;");
265+
statement.execute("SET mapreduce.output.fileoutputformat.compress=true;");
266+
statement.execute("SET mapred.output.compress=true");
267+
statement.execute("SET mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.BZip2Codec");
268+
statement.execute("SET hive.exec.dynamic.partition.mode=nonstrict;");
269+
}
270+
271+
private static void assertSelect(String query, String tableName, String expectedTable)
272+
{
273+
QueryResult expected = query(format(query, expectedTable));
211274
List<Row> expectedRows = expected.rows().stream()
212275
.map((columns) -> row(columns.toArray()))
213276
.collect(toImmutableList());

presto-product-tests/src/main/java/com/facebook/presto/tests/utils/QueryExecutors.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ public static QueryExecutor onPresto()
2424
return testContext().getDependency(QueryExecutor.class, "presto");
2525
}
2626

27+
public static QueryExecutor onHive()
28+
{
29+
return testContext().getDependency(QueryExecutor.class, "hive");
30+
}
31+
2732
public static QueryExecutor connectToPresto(String prestoConfig)
2833
{
2934
return testContext().getDependency(QueryExecutor.class, prestoConfig);

0 commit comments

Comments
 (0)