Skip to content

Commit

Permalink
feat(c): Implement ingestion and testing for float16, string_view, an…
Browse files Browse the repository at this point in the history
…d binary_view (#2234)

This PR tests ingestion of half float, string view, and binary view, for
which support was added between nanoarrow 0.5.0 and nanoarrow 0.6.0. It
also removes some skips for various combinations of numeric types for
which support was already available that I missed when adding support
for those types.
  • Loading branch information
paleolimbot authored Oct 17, 2024
1 parent c8e7691 commit 0366632
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 14 deletions.
10 changes: 8 additions & 2 deletions c/driver/postgresql/copy/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -590,8 +590,9 @@ static inline ArrowErrorCode MakeCopyFieldWriter(
*out = T::Create<T>(array_view);
return NANOARROW_OK;
}
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_UINT32: {
case NANOARROW_TYPE_UINT64: {
using T = PostgresCopyNetworkEndianFieldWriter<int64_t>;
*out = T::Create<T>(array_view);
return NANOARROW_OK;
Expand All @@ -612,6 +613,7 @@ static inline ArrowErrorCode MakeCopyFieldWriter(
return ADBC_STATUS_NOT_IMPLEMENTED;
}
}
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT: {
using T = PostgresCopyFloatFieldWriter;
*out = T::Create<T>(array_view);
Expand All @@ -637,8 +639,12 @@ static inline ArrowErrorCode MakeCopyFieldWriter(
return NANOARROW_OK;
}
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING: {
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW: {
using T = PostgresCopyBinaryFieldWriter;
*out = T::Create<T>(array_view);
return NANOARROW_OK;
Expand Down
4 changes: 4 additions & 0 deletions c/driver/postgresql/postgres_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,17 +558,21 @@ inline ArrowErrorCode PostgresType::FromSchema(const PostgresTypeResolver& resol
return resolver.Find(resolver.GetOID(PostgresTypeId::kInt4), out, error);
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_UINT64:
return resolver.Find(resolver.GetOID(PostgresTypeId::kInt8), out, error);
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat4), out, error);
case NANOARROW_TYPE_DOUBLE:
return resolver.Find(resolver.GetOID(PostgresTypeId::kFloat8), out, error);
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
return resolver.Find(resolver.GetOID(PostgresTypeId::kText), out, error);
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
return resolver.Find(resolver.GetOID(PostgresTypeId::kBytea), out, error);
case NANOARROW_TYPE_DATE32:
case NANOARROW_TYPE_DATE64:
Expand Down
18 changes: 13 additions & 5 deletions c/driver/postgresql/postgresql_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,24 @@ class PostgresQuirks : public adbc_validation::DriverQuirks {
ArrowType IngestSelectRoundTripType(ArrowType ingest_type) const override {
switch (ingest_type) {
case NANOARROW_TYPE_INT8:
case NANOARROW_TYPE_UINT8:
return NANOARROW_TYPE_INT16;
case NANOARROW_TYPE_UINT16:
return NANOARROW_TYPE_INT32;
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_UINT64:
return NANOARROW_TYPE_INT64;
case NANOARROW_TYPE_HALF_FLOAT:
return NANOARROW_TYPE_FLOAT;
case NANOARROW_TYPE_DURATION:
return NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO;
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
return NANOARROW_TYPE_STRING;
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
return NANOARROW_TYPE_BINARY;
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
return NANOARROW_TYPE_STRING;
Expand Down Expand Up @@ -886,11 +899,6 @@ class PostgresStatementTest : public ::testing::Test,
void SetUp() override { ASSERT_NO_FATAL_FAILURE(SetUpTest()); }
void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); }

void TestSqlIngestUInt8() { GTEST_SKIP() << "Not implemented"; }
void TestSqlIngestUInt16() { GTEST_SKIP() << "Not implemented"; }
void TestSqlIngestUInt32() { GTEST_SKIP() << "Not implemented"; }
void TestSqlIngestUInt64() { GTEST_SKIP() << "Not implemented"; }

void TestSqlPrepareErrorParamCountMismatch() { GTEST_SKIP() << "Not yet implemented"; }
void TestSqlPrepareGetParameterSchema() { GTEST_SKIP() << "Not yet implemented"; }
void TestSqlPrepareSelectParams() { GTEST_SKIP() << "Not yet implemented"; }
Expand Down
8 changes: 7 additions & 1 deletion c/driver/sqlite/sqlite_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,16 @@ class SqliteQuirks : public adbc_validation::DriverQuirks {
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_UINT64:
return NANOARROW_TYPE_INT64;
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE:
return NANOARROW_TYPE_DOUBLE;
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
return NANOARROW_TYPE_STRING;
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
return NANOARROW_TYPE_BINARY;
case NANOARROW_TYPE_DATE32:
case NANOARROW_TYPE_TIMESTAMP:
return NANOARROW_TYPE_STRING;
Expand Down
11 changes: 9 additions & 2 deletions c/driver/sqlite/statement_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,11 @@ AdbcStatusCode AdbcSqliteBinderSet(struct AdbcSqliteBinder* binder,
switch (value_view.type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
break;
default:
SetError(error, "Column %d dictionary has unsupported type %s", i,
Expand Down Expand Up @@ -326,7 +329,9 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3
} else {
switch (binder->types[col]) {
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY: {
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW: {
struct ArrowBufferView value =
ArrowArrayViewGetBytesUnsafe(binder->batch.children[col], binder->next_row);
status = sqlite3_bind_blob(stmt, col + 1, value.data.as_char, value.size_bytes,
Expand Down Expand Up @@ -359,6 +364,7 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3
status = sqlite3_bind_int64(stmt, col + 1, value);
break;
}
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE: {
double value = ArrowArrayViewGetDoubleUnsafe(binder->batch.children[col],
Expand All @@ -367,7 +373,8 @@ AdbcStatusCode AdbcSqliteBinderBindNext(struct AdbcSqliteBinder* binder, sqlite3
break;
}
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING: {
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW: {
struct ArrowBufferView value =
ArrowArrayViewGetBytesUnsafe(binder->batch.children[col], binder->next_row);
status = sqlite3_bind_text(stmt, col + 1, value.data.as_char, value.size_bytes,
Expand Down
12 changes: 9 additions & 3 deletions c/driver_manager/adbc_driver_manager_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,18 @@ class SqliteQuirks : public adbc_validation::DriverQuirks {
case NANOARROW_TYPE_UINT32:
case NANOARROW_TYPE_UINT64:
return NANOARROW_TYPE_INT64;
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE:
return NANOARROW_TYPE_DOUBLE;
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_STRING_VIEW:
return NANOARROW_TYPE_STRING;
case NANOARROW_TYPE_LARGE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_BINARY_VIEW:
return NANOARROW_TYPE_BINARY;
case NANOARROW_TYPE_DATE32:
case NANOARROW_TYPE_TIMESTAMP:
return NANOARROW_TYPE_STRING;
default:
return ingest_type;
Expand Down Expand Up @@ -267,8 +275,6 @@ class SqliteStatementTest : public ::testing::Test,
void TearDown() override { ASSERT_NO_FATAL_FAILURE(TearDownTest()); }

void TestSqlIngestUInt64() { GTEST_SKIP() << "Cannot ingest UINT64 (out of range)"; }
void TestSqlIngestBinary() { GTEST_SKIP() << "Cannot ingest BINARY (not implemented)"; }
void TestSqlIngestDate32() { GTEST_SKIP() << "Cannot ingest DATE (not implemented)"; }
void TestSqlIngestTimestamp() {
GTEST_SKIP() << "Cannot ingest TIMESTAMP (not implemented)";
}
Expand Down
10 changes: 10 additions & 0 deletions c/validation/adbc_validation.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,13 +373,18 @@ class StatementTest {
void TestSqlIngestUInt64();

// Floats
void TestSqlIngestFloat16();
void TestSqlIngestFloat32();
void TestSqlIngestFloat64();

// Strings
void TestSqlIngestString();
void TestSqlIngestLargeString();
void TestSqlIngestStringView();
void TestSqlIngestBinary();
void TestSqlIngestLargeBinary();
void TestSqlIngestFixedSizeBinary();
void TestSqlIngestBinaryView();

// Temporal
void TestSqlIngestDuration();
Expand Down Expand Up @@ -494,11 +499,16 @@ class StatementTest {
TEST_F(FIXTURE, SqlIngestUInt16) { TestSqlIngestUInt16(); } \
TEST_F(FIXTURE, SqlIngestUInt32) { TestSqlIngestUInt32(); } \
TEST_F(FIXTURE, SqlIngestUInt64) { TestSqlIngestUInt64(); } \
TEST_F(FIXTURE, SqlIngestFloat16) { TestSqlIngestFloat16(); } \
TEST_F(FIXTURE, SqlIngestFloat32) { TestSqlIngestFloat32(); } \
TEST_F(FIXTURE, SqlIngestFloat64) { TestSqlIngestFloat64(); } \
TEST_F(FIXTURE, SqlIngestString) { TestSqlIngestString(); } \
TEST_F(FIXTURE, SqlIngestLargeString) { TestSqlIngestLargeString(); } \
TEST_F(FIXTURE, SqlIngestStringView) { TestSqlIngestStringView(); } \
TEST_F(FIXTURE, SqlIngestBinary) { TestSqlIngestBinary(); } \
TEST_F(FIXTURE, SqlIngestLargeBinary) { TestSqlIngestLargeBinary(); } \
TEST_F(FIXTURE, SqlIngestFixedSizeBinary) { TestSqlIngestFixedSizeBinary(); } \
TEST_F(FIXTURE, SqlIngestBinaryView) { TestSqlIngestBinaryView(); } \
TEST_F(FIXTURE, SqlIngestDuration) { TestSqlIngestDuration(); } \
TEST_F(FIXTURE, SqlIngestDate32) { TestSqlIngestDate32(); } \
TEST_F(FIXTURE, SqlIngestTimestamp) { TestSqlIngestTimestamp(); } \
Expand Down
38 changes: 38 additions & 0 deletions c/validation/adbc_validation_statement.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ void StatementTest::TestSqlIngestInt64() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<int64_t>(NANOARROW_TYPE_INT64));
}

void StatementTest::TestSqlIngestFloat16() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<float>(NANOARROW_TYPE_HALF_FLOAT));
}

void StatementTest::TestSqlIngestFloat32() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<float>(NANOARROW_TYPE_FLOAT));
}
Expand All @@ -263,6 +267,12 @@ void StatementTest::TestSqlIngestLargeString() {
NANOARROW_TYPE_LARGE_STRING, {std::nullopt, "", "", "1234", ""}, false));
}

void StatementTest::TestSqlIngestStringView() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::string>(
NANOARROW_TYPE_STRING_VIEW, {std::nullopt, "", "", "longer than 12 bytes", ""},
false));
}

void StatementTest::TestSqlIngestBinary() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::vector<std::byte>>(
NANOARROW_TYPE_BINARY,
Expand All @@ -274,6 +284,34 @@ void StatementTest::TestSqlIngestBinary() {
false));
}

void StatementTest::TestSqlIngestLargeBinary() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::vector<std::byte>>(
NANOARROW_TYPE_LARGE_BINARY,
{std::nullopt, std::vector<std::byte>{},
std::vector<std::byte>{std::byte{0x00}, std::byte{0x01}},
std::vector<std::byte>{std::byte{0x01}, std::byte{0x02}, std::byte{0x03},
std::byte{0x04}},
std::vector<std::byte>{std::byte{0xfe}, std::byte{0xff}}},
false));
}

void StatementTest::TestSqlIngestFixedSizeBinary() {
SchemaField field = SchemaField::FixedSize("col", NANOARROW_TYPE_FIXED_SIZE_BINARY, 4);
ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::string>(
field, {std::nullopt, "abcd", "efgh", "ijkl", "mnop"}, false));
}

void StatementTest::TestSqlIngestBinaryView() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestType<std::vector<std::byte>>(
NANOARROW_TYPE_LARGE_BINARY,
{std::nullopt, std::vector<std::byte>{},
std::vector<std::byte>{std::byte{0x00}, std::byte{0x01}},
std::vector<std::byte>{std::byte{0x01}, std::byte{0x02}, std::byte{0x03},
std::byte{0x04}},
std::vector<std::byte>{std::byte{0xfe}, std::byte{0xff}}},
false));
}

void StatementTest::TestSqlIngestDate32() {
ASSERT_NO_FATAL_FAILURE(TestSqlIngestNumericType<int32_t>(NANOARROW_TYPE_DATE32));
}
Expand Down
12 changes: 11 additions & 1 deletion c/validation/adbc_validation_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,16 @@ ::testing::Matcher<AdbcStatusCode> IsStatus(AdbcStatusCode code,
} while (false);

static int MakeSchemaColumnImpl(struct ArrowSchema* column, const SchemaField& field) {
CHECK_ERRNO(ArrowSchemaSetType(column, field.type));
switch (field.type) {
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
CHECK_ERRNO(ArrowSchemaSetTypeFixedSize(column, field.type, field.fixed_size));
break;
default:
CHECK_ERRNO(ArrowSchemaSetType(column, field.type));
break;
}

CHECK_ERRNO(ArrowSchemaSetName(column, field.name.c_str()));

if (!field.nullable) {
Expand All @@ -181,6 +190,7 @@ static int MakeSchemaColumnImpl(struct ArrowSchema* column, const SchemaField& f
// SetType for a list will allocate and initialize children
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
case NANOARROW_TYPE_MAP: {
size_t i = 0;
for (const SchemaField& child : field.children) {
Expand Down
8 changes: 8 additions & 0 deletions c/validation/adbc_validation_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ struct GetObjectsReader {
struct SchemaField {
std::string name;
ArrowType type = NANOARROW_TYPE_UNINITIALIZED;
int32_t fixed_size = 0;
bool nullable = true;
std::vector<SchemaField> children;

Expand All @@ -271,6 +272,13 @@ struct SchemaField {
out.children = std::move(children);
return out;
}

static SchemaField FixedSize(std::string name, ArrowType type, int32_t fixed_size,
std::vector<SchemaField> children = {}) {
SchemaField out = Nested(name, type, std::move(children));
out.fixed_size = fixed_size;
return out;
}
};

/// \brief Make a schema from a vector of (name, type, nullable) tuples.
Expand Down

0 comments on commit 0366632

Please sign in to comment.