Skip to content

Commit

Permalink
IMPALA-11806: Fix TestIcebergTable.test_load E2E test
Browse files Browse the repository at this point in the history
The test had a flaky part, it was referring to a directory which was
random generated. Removed the reference to this directory.

The test was failing with filesystems other than HDFS due to the
hdfs_client dependency, replaced the hdfs_client calls to
filesystem_client instead.

Testing:
 - Executed the test locally (HDFS/Minicluster)
 - Triggered an Ozone build to verify it with different FS

Change-Id: Id95523949aab7dc2417a3d06cf780d3de2e44ee3
Reviewed-on: http://gerrit.cloudera.org:8080/19385
Reviewed-by: Tamas Mate <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
  • Loading branch information
tmater authored and Impala Public Jenkins committed Dec 21, 2022
1 parent 390a932 commit 6ff9943
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ stored as iceberg;
---- QUERY
load data inpath '/tmp/$DATABASE/mismatching_schema/' overwrite into table test_iceberg_load_schema_mismatch;
---- CATCH
AnalysisException: Target table 'test_load_a61184e9.test_iceberg_load_schema_mismatch' has fewer columns (1) than the SELECT / VALUES clause returns (4)
row_regex:.*AnalysisException: Target table .* has fewer columns \(1\) than the SELECT / VALUES clause returns \(4\).*
====
---- QUERY
# Test 9: Partitioned Iceberg table
Expand Down
27 changes: 13 additions & 14 deletions tests/query_test/test_iceberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,6 @@ def test_mixed_file_format(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
unique_database)

@SkipIfLocal.hdfs_client
def test_load(self, vector, unique_database):
"""Test LOAD DATA INPATH for Iceberg tables, the first part of this method inits the
target directory, copies existing test data to HDFS. The second part runs the test
Expand All @@ -800,36 +799,36 @@ def test_load(self, vector, unique_database):
SRC_DIR = os.path.join(os.environ['IMPALA_HOME'],
"testdata/data/iceberg_test/iceberg_mixed_file_format_test/data/{0}")
DST_DIR = "/tmp/" + unique_database + "/parquet/"
self.hdfs_client.make_dir(DST_DIR, permission=777)
self.filesystem_client.make_dir(DST_DIR, permission=777)
file_parq1 = "00000-0-data-gfurnstahl_20220906113044_157fc172-f5d3-4c70-8653-" \
"fff150b6136a-job_16619542960420_0002-1-00001.parquet"
file_parq2 = "00000-0-data-gfurnstahl_20220906114830_907f72c7-36ac-4135-8315-" \
"27ff880faff0-job_16619542960420_0004-1-00001.parquet"
self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq2), DST_DIR)
self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq2), DST_DIR)
DST_DIR = "/tmp/" + unique_database + "/orc/"
self.hdfs_client.make_dir(DST_DIR, permission=777)
self.filesystem_client.make_dir(DST_DIR, permission=777)
file_orc1 = "00000-0-data-gfurnstahl_20220906113255_8d49367d-e338-4996-ade5-" \
"ee500a19c1d1-job_16619542960420_0003-1-00001.orc"
file_orc2 = "00000-0-data-gfurnstahl_20220906114900_9c1b7b46-5643-428f-a007-" \
"519c5500ed04-job_16619542960420_0004-1-00001.orc"
self.hdfs_client.copy_from_local(SRC_DIR.format(file_orc1), DST_DIR)
self.hdfs_client.copy_from_local(SRC_DIR.format(file_orc2), DST_DIR)
self.filesystem_client.copy_from_local(SRC_DIR.format(file_orc1), DST_DIR)
self.filesystem_client.copy_from_local(SRC_DIR.format(file_orc2), DST_DIR)
# Test 7 init: overwrite
DST_DIR = "/tmp/" + unique_database + "/overwrite/"
self.hdfs_client.make_dir(DST_DIR, permission=777)
self.hdfs_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
self.filesystem_client.make_dir(DST_DIR, permission=777)
self.filesystem_client.copy_from_local(SRC_DIR.format(file_parq1), DST_DIR)
# Test 8 init: mismatching parquet schema format
SRC_DIR = os.path.join(os.environ['IMPALA_HOME'], "testdata/data/iceberg_test/"
"iceberg_partitioned/data/event_time_hour=2020-01-01-08/action=view/{0}")
DST_DIR = "/tmp/" + unique_database + "/mismatching_schema/"
self.hdfs_client.make_dir(DST_DIR, permission=777)
self.filesystem_client.make_dir(DST_DIR, permission=777)
file = "00001-1-b975a171-0911-47c2-90c8-300f23c28772-00000.parquet"
self.hdfs_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
# Test 9 init: partitioned
DST_DIR = "/tmp/" + unique_database + "/partitioned/"
self.hdfs_client.make_dir(DST_DIR, permission=777)
self.hdfs_client.copy_from_local(SRC_DIR.format(file), DST_DIR)
self.filesystem_client.make_dir(DST_DIR, permission=777)
self.filesystem_client.copy_from_local(SRC_DIR.format(file), DST_DIR)

# Init test table
create_iceberg_table_from_directory(self.client, unique_database,
Expand All @@ -838,7 +837,7 @@ def test_load(self, vector, unique_database):
# Execute tests
self.run_test_case('QueryTest/iceberg-load', vector, use_db=unique_database)
# Clean up temporary directory
self.hdfs_client.delete_file_dir("/tmp/{0}".format(unique_database), True)
self.filesystem_client.delete_file_dir("/tmp/{0}".format(unique_database), True)

def test_table_sampling(self, vector):
self.run_test_case('QueryTest/iceberg-tablesample', vector,
Expand Down

0 comments on commit 6ff9943

Please sign in to comment.