Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions presto-native-execution/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,68 @@
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-core</artifactId>
Comment thread
Joe-Abraham marked this conversation as resolved.
<version>${dep.iceberg.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-api</artifactId>
<version>${dep.iceberg.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-bundled-guava</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-parquet</artifactId>
<version>${dep.iceberg.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.facebook.presto.hadoop</groupId>
<artifactId>hadoop-apache</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>${dep.parquet.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.yetus</groupId>
<artifactId>audience-annotations</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.facebook.airlift</groupId>
<artifactId>http-server</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "presto_cpp/presto_protocol/connector/iceberg/IcebergConnectorProtocol.h"
#include "velox/connectors/hive/iceberg/IcebergDataSink.h"
#include "velox/connectors/hive/iceberg/IcebergMetadataColumns.h"
#include "velox/connectors/hive/iceberg/IcebergSplit.h"
#include "velox/type/fbhive/HiveTypeParser.h"

Expand Down Expand Up @@ -93,6 +94,26 @@ std::unique_ptr<velox::connector::ConnectorTableHandle> toIcebergTableHandle(
types.push_back(VELOX_DYNAMIC_TYPE_DISPATCH(
fieldNamesToLowerCase, parsedType->kind(), parsedType));
}

// Iceberg metadata columns for row lineage (_row_id,
// _last_updated_sequence_number) are not included in the table's
// dataColumns but may exist physically in the written files (e.g. after
// MERGE/UPDATE). We add them to finalDataColumns if they are requested in
// columnHandles, so the reader can match them with Parquet schema.
for (const auto& handle : columnHandles) {
if ((handle->name() ==
velox::connector::hive::iceberg::IcebergMetadataColumn::
kRowIdColumnName ||
handle->name() ==
velox::connector::hive::iceberg::IcebergMetadataColumn::
kLastUpdatedSequenceNumberColumnName) &&
std::find(names.begin(), names.end(), handle->name()) ==
names.end()) {
names.emplace_back(handle->name());
types.push_back(velox::BIGINT());
}
}

finalDataColumns = ROW(std::move(names), std::move(types));
}

Expand Down Expand Up @@ -210,10 +231,18 @@ IcebergPrestoToVeloxConnector::toVeloxSplit(
deletes.emplace_back(icebergDeleteFile);
}

// Row-lineage info columns are V3-only. Omitting them for V1/V2 causes the
// reader to output NULL for both _row_id and _last_updated_sequence_number.
std::unordered_map<std::string, std::string> infoColumns = {
{"$data_sequence_number",
std::to_string(icebergSplit->dataSequenceNumber)},
{"$path", icebergSplit->path}};
if (icebergSplit->firstRowId >= 0) {
infoColumns[velox::connector::hive::iceberg::IcebergMetadataColumn::
kDataSequenceNumberInfoColumn] =
std::to_string(icebergSplit->dataSequenceNumber);
infoColumns[velox::connector::hive::iceberg::IcebergMetadataColumn::
kFirstRowIdInfoColumn] =
std::to_string(icebergSplit->firstRowId);
}

return std::make_unique<velox::connector::hive::iceberg::HiveIcebergSplit>(
catalogId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ private void createTestTables()
QueryRunner javaQueryRunner = ((QueryRunner) getExpectedQueryRunner());

javaQueryRunner.execute("DROP TABLE IF EXISTS test_hidden_columns");
javaQueryRunner.execute("CREATE TABLE test_hidden_columns AS SELECT * FROM tpch.tiny.region WHERE regionkey=0");
javaQueryRunner.execute("CREATE TABLE test_hidden_columns WITH (\"format-version\" = '3') AS SELECT * FROM tpch.tiny.region WHERE regionkey=0");
javaQueryRunner.execute("INSERT INTO test_hidden_columns SELECT * FROM tpch.tiny.region WHERE regionkey=1");

javaQueryRunner.execute("DROP TABLE IF EXISTS ice_table_partitioned");
Expand Down
Loading
Loading