Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support for Binaryview and StringView types #367

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/nanoarrow/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,13 @@ static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
}
}

if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW) {
array_view->n_varidic_buffers = array->n_buffers - 3;
array_view->variadic_buffer_sizes = array->buffers[array->n_buffers - 1];
// array_view->variadic_buffers = ...
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// array_view->variadic_buffers = ...
array_view->variadic_buffers = array-> buffers + 2

...I think?

buffers_required += array_view->n_varidic_buffers + 1;
}

// Check the number of buffers
if (buffers_required != array->n_buffers) {
ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d buffer(s)",
Expand Down
12 changes: 11 additions & 1 deletion src/nanoarrow/nanoarrow_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,9 @@ enum ArrowType {
NANOARROW_TYPE_LARGE_STRING,
NANOARROW_TYPE_LARGE_BINARY,
NANOARROW_TYPE_LARGE_LIST,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
NANOARROW_TYPE_BINARY_VIEW,
NANOARROW_TYPE_STRING_VIEW,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this ArrowType enum meant to be stable? (i.e. can I only add new types at the end, or can I put them at a more logical place in the list?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's probably best to put it at the end to keep the existing values (we can always rearrange them in places where they're more likely to be seen).

};

/// \brief Get a string value of an enum ArrowType value
Expand Down Expand Up @@ -482,10 +484,14 @@ static inline const char* ArrowTypeString(enum ArrowType type) {
return "double";
case NANOARROW_TYPE_STRING:
return "string";
case NANOARROW_TYPE_STRING_VIEW:
return "string_view";
case NANOARROW_TYPE_BINARY:
return "binary";
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
return "fixed_size_binary";
case NANOARROW_TYPE_BINARY_VIEW:
return "binary_view";
case NANOARROW_TYPE_DATE32:
return "date32";
case NANOARROW_TYPE_DATE64:
Expand Down Expand Up @@ -784,6 +790,10 @@ struct ArrowArrayView {
/// type_id == union_type_id_map[128 + child_index]. This value may be
/// NULL in the case where child_id == type_id.
int8_t* union_type_id_map;

int64_t n_varidic_buffers;
int64_t* variadic_buffer_sizes;
const void** variadic_buffers;
};

// Used as the private data member for ArrowArrays allocated here and accessed
Expand Down
25 changes: 25 additions & 0 deletions src/nanoarrow/schema.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,14 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType type) {
return "u";
case NANOARROW_TYPE_LARGE_STRING:
return "U";
case NANOARROW_TYPE_STRING_VIEW:
return "vu";
case NANOARROW_TYPE_BINARY:
return "z";
case NANOARROW_TYPE_LARGE_BINARY:
return "Z";
case NANOARROW_TYPE_BINARY_VIEW:
return "vz";

case NANOARROW_TYPE_DATE32:
return "tdD";
Expand Down Expand Up @@ -690,6 +694,25 @@ static ArrowErrorCode ArrowSchemaViewParse(struct ArrowSchemaView* schema_view,
*format_end_out = format + 1;
return NANOARROW_OK;

// view types
case 'v':
switch (format[1]) {
case 'z':
schema_view->type = NANOARROW_TYPE_BINARY_VIEW;
schema_view->storage_type = NANOARROW_TYPE_BINARY_VIEW;
*format_end_out = format + 2;
return NANOARROW_OK;
case 'u':
schema_view->type = NANOARROW_TYPE_STRING_VIEW;
schema_view->storage_type = NANOARROW_TYPE_STRING_VIEW;
*format_end_out = format + 2;
return NANOARROW_OK;
default:
ArrowErrorSet(error, "Expected 'v' or 'u' following 'z' but found '%s'",
format + 1);
return EINVAL;
}

// nested types
case '+':
switch (format[1]) {
Expand Down Expand Up @@ -1055,8 +1078,10 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct ArrowSchemaView* schema_vie
case NANOARROW_TYPE_DECIMAL256:
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
case NANOARROW_TYPE_DATE32:
case NANOARROW_TYPE_DATE64:
case NANOARROW_TYPE_INTERVAL_MONTHS:
Expand Down
15 changes: 15 additions & 0 deletions src/nanoarrow/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,21 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
break;

// case NANOARROW_TYPE_STRING_VIEW:
// layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
// layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
// layout->element_size_bits[1] = 64;
// layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
// layout->buffer_data_type[2] = NANOARROW_TYPE_STRING;
// break;
// case NANOARROW_TYPE_BINARY_VIEW:
// layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
// layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
// layout->element_size_bits[1] = 64;
// layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
// layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
// break;

default:
break;
}
Expand Down
Loading