Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions cpp/src/io/parquet/decode_fixed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,8 @@ constexpr bool is_split_decode()
* @param num_rows Maximum number of rows to read
* @param page_mask Boolean vector indicating which pages need to be decoded
* @param initial_str_offsets Vector to store the initial offsets for large nested string cols
* @param page_string_offset_indices Device span of offsets, indexed per-page, into the column's
* string offset buffer
* @param error_code Error code to set if an error is encountered
*/
template <typename level_t, int decode_block_size_t, decode_kernel_mask kernel_mask_t>
Expand All @@ -940,6 +942,7 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
size_t num_rows,
cudf::device_span<bool const> page_mask,
cudf::device_span<size_t> initial_str_offsets,
cudf::device_span<size_t const> page_string_offset_indices,
kernel_error::pointer error_code)
{
constexpr bool has_dict_t = has_dict<kernel_mask_t>();
Expand All @@ -954,10 +957,10 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
constexpr int rle_run_buffer_size = rle_stream_required_run_buffer_size<decode_block_size_t>();

__shared__ __align__(16) page_state_s state_g;
constexpr bool use_dict_buffers = has_dict_t || has_bools_t || has_strings_t;
constexpr bool use_dict_buffers = has_dict_t || has_bools_t;
using state_buf_t = page_state_buffers_s<rolling_buf_size, // size of nz_idx buffer
use_dict_buffers ? rolling_buf_size : 1,
has_strings_t ? rolling_buf_size : 1>;
1>;
__shared__ __align__(16) state_buf_t state_buffers;

auto const block = cg::this_thread_block();
Expand Down Expand Up @@ -1083,6 +1086,8 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
int valid_count = 0;
size_t string_output_offset = 0;
int const init_valid_map_offset = s->nesting_info[s->col.max_nesting_depth - 1].valid_map_offset;
uint32_t* const str_offsets =
s->col.column_string_offset_base + page_string_offset_indices[page_idx];

// Skip ahead in the decoding so that we don't repeat work (skipped_leaf_values = 0 for non-lists)
auto const skipped_leaf_values = s->page.skipped_leaf_values;
Expand All @@ -1094,10 +1099,6 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
processed_count = skip_decode<rolling_buf_size>(rep_decoder, skipped_leaf_values, t);
if constexpr (has_dict_t) {
skip_decode<rolling_buf_size>(dict_stream, skipped_leaf_values, t);
} else if constexpr (has_strings_t) {
initialize_string_descriptors<is_calc_sizes_only::YES>(s, sb, skipped_leaf_values, block);
if (t == 0) { s->dict_pos = processed_count; }
block.sync();
} else if constexpr (has_bools_t) {
if (bools_are_rle_stream) {
skip_decode<rolling_buf_size>(bool_stream, skipped_leaf_values, t);
Expand Down Expand Up @@ -1165,11 +1166,6 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
if constexpr (has_dict_t) {
dict_stream.decode_next(t, next_valid_count - valid_count);
block.sync();
} else if constexpr (has_strings_t) {
auto const target_pos = next_valid_count + skipped_leaf_values;
initialize_string_descriptors<is_calc_sizes_only::NO>(s, sb, target_pos, block);
if (t == 0) { s->dict_pos = target_pos; }
block.sync();
} else if constexpr (has_bools_t) {
if (bools_are_rle_stream) {
bool_stream.decode_next(t, next_valid_count - valid_count);
Expand All @@ -1184,8 +1180,8 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
auto decode_values = [&]<copy_mode copy_mode_t>() {
if constexpr (has_strings_t) {
string_output_offset =
decode_strings<decode_block_size_t, has_lists_t, split_decode_t, copy_mode_t>(
s, sb, valid_count, next_valid_count, t, string_output_offset);
decode_strings<decode_block_size_t, has_dict_t, has_lists_t, split_decode_t, copy_mode_t>(
s, sb, valid_count, next_valid_count, t, str_offsets, string_output_offset);
} else if constexpr (split_decode_t) {
decode_fixed_width_split_values<decode_block_size_t, has_lists_t, copy_mode_t>(
s, sb, valid_count, next_valid_count, t);
Expand Down Expand Up @@ -1269,6 +1265,7 @@ void decode_page_data(cudf::detail::hostdevice_span<PageInfo> pages,
decode_kernel_mask kernel_mask,
cudf::device_span<bool const> page_mask,
cudf::device_span<size_t> initial_str_offsets,
cudf::device_span<size_t const> page_string_offset_indices,
kernel_error::pointer error_code,
rmm::cuda_stream_view stream)
{
Expand All @@ -1288,6 +1285,7 @@ void decode_page_data(cudf::detail::hostdevice_span<PageInfo> pages,
num_rows,
page_mask,
initial_str_offsets,
page_string_offset_indices,
error_code);
} else {
decode_page_data_generic<uint16_t, decode_block_size, mask>
Expand All @@ -1297,6 +1295,7 @@ void decode_page_data(cudf::detail::hostdevice_span<PageInfo> pages,
num_rows,
page_mask,
initial_str_offsets,
page_string_offset_indices,
error_code);
}
};
Expand Down
Loading
Loading