From 4a4ebc9a8661d2234322c2fbe60d6bcc77620e3c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 29 Jun 2024 07:00:08 -0300 Subject: [PATCH 01/61] update thrift --- cpp/src/parquet/parquet.thrift | 186 +++++++++++++++++++++++++++++---- 1 file changed, 165 insertions(+), 21 deletions(-) diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index a1883d335aa23..1653d7365ea49 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -20,8 +20,7 @@ /** * File format description for the parquet file format */ -cpp_include "parquet/windows_compatibility.h" -namespace cpp parquet.format +namespace cpp parquet namespace java org.apache.parquet.format /** @@ -238,6 +237,83 @@ struct SizeStatistics { 3: optional list definition_level_histogram; } +/** + * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge + * between points represent a straight cartesian line or the shortest line on + * the sphere. Please note that it only applies to polygons. + */ +enum Edges { + PLANAR = 0; + SPHERICAL = 1; +} + +/** + * A custom WKB-encoded polygon or multi-polygon to represent a covering of + * geometries. For example, it may be a bounding box, or an evelope of geometries + * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if + * an edge of geographic coordinates crosses the antimeridian). In addition, it can + * also be used to provide vendor-agnostic coverings like S2 or H3 grids. + */ +struct Covering { + /** Bytes of a WKB-encoded geometry */ + 1: required binary geometry; + /** Edges of the geometry, which is independent of edges from the logical type */ + 2: required Edges edges; +} + +/** + * Bounding box of geometries in the representation of min/max value pair of + * coordinates from each axis. Values of Z and M are omitted for 2D geometries. + */ +struct BoundingBox { + 1: required double xmin; + 2: required double xmax; + 3: required double ymin; + 4: required double ymax; + 5: optional double zmin; + 6: optional double zmax; + 7: optional double mmin; + 8: optional double mmax; +} + +/** Statistics specific to GEOMETRY logical type */ +struct GeometryStatistics { + /** A bounding box of geometries */ + 1: optional BoundingBox bbox; + + /** A covering polygon of geometries */ + 2: optional Covering covering; + + /** + * The geometry types of all geometries, or an empty array if they are not + * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] + * except that values in the list are WKB (ISO variant) integer codes [2]. Table + * below shows the most common geometry types and their codes: + * + * | Type | XY | XYZ | XYM | XYZM | + * | :----------------- | :--- | :--- | :--- | :--: | + * | Point | 0001 | 1001 | 2001 | 3001 | + * | LineString | 0002 | 1002 | 2002 | 3002 | + * | Polygon | 0003 | 1003 | 2003 | 3003 | + * | MultiPoint | 0004 | 1004 | 2004 | 3004 | + * | MultiLineString | 0005 | 1005 | 2005 | 3005 | + * | MultiPolygon | 0006 | 1006 | 2006 | 3006 | + * | GeometryCollection | 0007 | 1007 | 2007 | 3007 | + * + * In addition, the following rules are used: + * - A list of multiple values indicates that multiple geometry types are + * present (e.g. `[0003, 0006]`). + * - An empty array explicitly signals that the geometry types are not known. + * - The geometry types in the list must be unique (e.g. `[0001, 0001]` + * is not valid). + * + * Please refer to links below for more detail: + * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.0.0/format-specs/geoparquet.md?plain=1#L91 + */ + 3: optional list geometry_types; +} + /** * Statistics per row group and per page * All fields are optional. @@ -280,6 +356,9 @@ struct Statistics { 7: optional bool is_max_value_exact; /** If true, min_value is the actual minimum value for a column */ 8: optional bool is_min_value_exact; + + /** statistics specific to geometry logical type */ + 9: optional GeometryStatistics geometry_stats; } /** Empty structs to use as logical type annotations */ @@ -374,6 +453,51 @@ struct JsonType { struct BsonType { } +/** + * Physical type and encoding for the geometry type. + */ +enum GeometryEncoding { + /** + * Allowed for physical type: BYTE_ARRAY. + * + * Well-known binary (WKB) representations of geometries. It supports 2D or + * 3D geometries of the standard geometry types (Point, LineString, Polygon, + * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This + * is the preferred option for maximum portability. + * + * This encoding enables GeometryStatistics to be set in the column chunk + * and page index. + */ + WKB = 0; + + // TODO: add native encoding from GeoParquet/GeoArrow +} + +/** + * Geometry logical type annotation (added in 2.11.0) + */ +struct GeometryType { + /** + * Physical type and encoding for the geometry type. Please refer to the + * definition of GeometryEncoding for more detail. + */ + 1: required GeometryEncoding encoding; + /** + * Edges of polygon. + */ + 2: required Edges edges; + /** + * Coordinate Reference System, i.e. mapping of how coordinates refer to + * precise locations on earth, e.g. OGC:CRS84 + */ + 3: optional string crs; + /** + * Additional informative metadata. + * It can be used by GeoParquet to offload some of the column metadata. + */ + 4: optional binary metadata; +} + /** * LogicalType annotations to replace ConvertedType. * @@ -404,6 +528,7 @@ union LogicalType { 13: BsonType BSON // use ConvertedType BSON 14: UUIDType UUID // no compatible ConvertedType 15: Float16Type FLOAT16 // no compatible ConvertedType + 16: GeometryType GEOMETRY // no compatible ConvertedType } /** @@ -527,12 +652,15 @@ enum Encoding { */ RLE_DICTIONARY = 8; - /** Encoding for floating-point data. + /** Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). K byte-streams are created where K is the size in bytes of the data type. - The individual bytes of an FP value are scattered to the corresponding stream and + The individual bytes of a value are scattered to the corresponding stream and the streams are concatenated. This itself does not reduce the size of the data but can lead to better compression afterwards. + + Added in 2.8 for FLOAT and DOUBLE. + Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ BYTE_STREAM_SPLIT = 9; } @@ -736,10 +864,10 @@ struct PageHeader { } /** - * Wrapper struct to specify sort order + * Sort order within a RowGroup of a leaf column */ struct SortingColumn { - /** The column index (in this row group) **/ + /** The ordinal position of the column (in this row group) **/ 1: required i32 column_idx /** If true, indicates this column is sorted in descending order. **/ @@ -789,7 +917,7 @@ struct ColumnMetaData { /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ 6: required i64 total_uncompressed_size - /** total byte size of all compressed, and potentially encrypted, pages + /** total byte size of all compressed, and potentially encrypted, pages * in this column chunk (including the headers) **/ 7: required i64 total_compressed_size @@ -904,10 +1032,10 @@ struct RowGroup { * in this row group **/ 5: optional i64 file_offset - /** Total byte size of all compressed (and potentially encrypted) column data + /** Total byte size of all compressed (and potentially encrypted) column data * in this row group **/ 6: optional i64 total_compressed_size - + /** Row group ordinal in the file **/ 7: optional i16 ordinal } @@ -946,12 +1074,13 @@ union ColumnOrder { * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison + * INTERVAL - undefined * JSON - unsigned byte-wise comparison * BSON - unsigned byte-wise comparison * ENUM - unsigned byte-wise comparison * LIST - undefined * MAP - undefined + * GEOMETRY - undefined, use GeometryStatistics instead. * * In the absence of logical types, the sort order is determined by the physical type: * BOOLEAN - false, true @@ -971,7 +1100,7 @@ union ColumnOrder { * - If the min is +0, the row group may contain -0 values as well. * - If the max is -0, the row group may contain +0 values as well. * - When looking for NaN values, min and max should be ignored. - * + * * When writing statistics the following rules should be followed: * - NaNs should not be written to min or max statistics fields. * - If the computed max value is zero (whether negative or positive), @@ -999,6 +1128,13 @@ struct PageLocation { 3: required i64 first_row_index } +/** + * Optional offsets for each data page in a ColumnChunk. + * + * Forms part of the page index, along with ColumnIndex. + * + * OffsetIndex may be present even if ColumnIndex is not. + */ struct OffsetIndex { /** * PageLocations, ordered by increasing PageLocation.offset. It is required @@ -1015,8 +1151,14 @@ struct OffsetIndex { } /** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i] + * Optional statistics for each data page in a ColumnChunk. + * + * Forms part the page index, along with OffsetIndex. + * + * If this structure is present, OffsetIndex must also be present. + * + * For each field in this structure, [i] refers to the page at + * OffsetIndex.page_locations[i] */ struct ColumnIndex { /** @@ -1070,6 +1212,8 @@ struct ColumnIndex { **/ 7: optional list definition_level_histograms; + /** A list containing statistics of GEOMETRY logical type for each page */ + 8: optional list geometry_stats; } struct AesGcmV1 { @@ -1149,30 +1293,30 @@ struct FileMetaData { */ 7: optional list column_orders; - /** + /** * Encryption algorithm. This field is set only in encrypted files * with plaintext footer. Files with encrypted footer store algorithm id * in FileCryptoMetaData structure. */ 8: optional EncryptionAlgorithm encryption_algorithm - /** - * Retrieval metadata of key used for signing the footer. - * Used only in encrypted files with plaintext footer. - */ + /** + * Retrieval metadata of key used for signing the footer. + * Used only in encrypted files with plaintext footer. + */ 9: optional binary footer_signing_key_metadata } /** Crypto metadata for files with encrypted footer **/ struct FileCryptoMetaData { - /** + /** * Encryption algorithm. This field is only used for files * with encrypted footer. Files with plaintext footer store algorithm id * inside footer (FileMetaData structure). */ 1: required EncryptionAlgorithm encryption_algorithm - - /** Retrieval metadata of key used for encryption of footer, + + /** Retrieval metadata of key used for encryption of footer, * and (possibly) columns **/ 2: optional binary key_metadata } From 6f4e4b71fa11279550b788959e375ad715482789 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 29 Jun 2024 07:22:17 -0300 Subject: [PATCH 02/61] update thrift defs --- cpp/src/generated/parquet_types.cpp | 2519 ++++++++++++++++----------- cpp/src/generated/parquet_types.h | 650 ++++++- cpp/src/generated/parquet_types.tcc | 976 ++++++++--- 3 files changed, 2770 insertions(+), 1375 deletions(-) diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 1ba0c4626233f..565e5f57622e1 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.19.0) + * Autogenerated by Thrift Compiler (0.20.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -78,12 +78,12 @@ int _kConvertedTypeValues[] = { ConvertedType::ENUM, /** * A decimal value. - * + * * This may be used to annotate binary or fixed primitive types. The * underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. - * + * * This must be accompanied by a (maximum) precision and a scale in the * SchemaElement. The precision specifies the number of digits in the decimal * and the scale stores the location of the decimal point. For example 1.23 @@ -93,47 +93,47 @@ int _kConvertedTypeValues[] = { ConvertedType::DECIMAL, /** * A Date - * + * * Stored as days since Unix epoch, encoded as the INT32 physical type. - * + * */ ConvertedType::DATE, /** * A time - * + * * The total number of milliseconds since midnight. The value is stored * as an INT32 physical type. */ ConvertedType::TIME_MILLIS, /** * A time. - * + * * The total number of microseconds since midnight. The value is stored as * an INT64 physical type. */ ConvertedType::TIME_MICROS, /** * A date/time combination - * + * * Date and time recorded as milliseconds since the Unix epoch. Recorded as * a physical type of INT64. */ ConvertedType::TIMESTAMP_MILLIS, /** * A date/time combination - * + * * Date and time recorded as microseconds since the Unix epoch. The value is * stored as an INT64 physical type. */ ConvertedType::TIMESTAMP_MICROS, /** * An unsigned integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ ConvertedType::UINT_8, ConvertedType::UINT_16, @@ -141,12 +141,12 @@ int _kConvertedTypeValues[] = { ConvertedType::UINT_64, /** * A signed integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ ConvertedType::INT_8, ConvertedType::INT_16, @@ -154,19 +154,19 @@ int _kConvertedTypeValues[] = { ConvertedType::INT_64, /** * An embedded JSON document - * + * * A JSON document embedded within a single UTF8 column. */ ConvertedType::JSON, /** * An embedded BSON document - * + * * A BSON document embedded within a single BINARY column. */ ConvertedType::BSON, /** * An interval of time - * + * * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 * This data is composed of three separate little endian unsigned * integers. Each stores a component of a duration of time. The first @@ -202,12 +202,12 @@ const char* _kConvertedTypeNames[] = { "ENUM", /** * A decimal value. - * + * * This may be used to annotate binary or fixed primitive types. The * underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. - * + * * This must be accompanied by a (maximum) precision and a scale in the * SchemaElement. The precision specifies the number of digits in the decimal * and the scale stores the location of the decimal point. For example 1.23 @@ -217,47 +217,47 @@ const char* _kConvertedTypeNames[] = { "DECIMAL", /** * A Date - * + * * Stored as days since Unix epoch, encoded as the INT32 physical type. - * + * */ "DATE", /** * A time - * + * * The total number of milliseconds since midnight. The value is stored * as an INT32 physical type. */ "TIME_MILLIS", /** * A time. - * + * * The total number of microseconds since midnight. The value is stored as * an INT64 physical type. */ "TIME_MICROS", /** * A date/time combination - * + * * Date and time recorded as milliseconds since the Unix epoch. Recorded as * a physical type of INT64. */ "TIMESTAMP_MILLIS", /** * A date/time combination - * + * * Date and time recorded as microseconds since the Unix epoch. The value is * stored as an INT64 physical type. */ "TIMESTAMP_MICROS", /** * An unsigned integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ "UINT_8", "UINT_16", @@ -265,12 +265,12 @@ const char* _kConvertedTypeNames[] = { "UINT_64", /** * A signed integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ "INT_8", "INT_16", @@ -278,19 +278,19 @@ const char* _kConvertedTypeNames[] = { "INT_64", /** * An embedded JSON document - * + * * A JSON document embedded within a single UTF8 column. */ "JSON", /** * An embedded BSON document - * + * * A BSON document embedded within a single BINARY column. */ "BSON", /** * An interval of time - * + * * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 * This data is composed of three separate little endian unsigned * integers. Each stores a component of a duration of time. The first @@ -372,6 +372,84 @@ std::string to_string(const FieldRepetitionType::type& val) { } } +int _kEdgesValues[] = { + Edges::PLANAR, + Edges::SPHERICAL +}; +const char* _kEdgesNames[] = { + "PLANAR", + "SPHERICAL" +}; +const std::map _Edges_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(2, _kEdgesValues, _kEdgesNames), ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); + +std::ostream& operator<<(std::ostream& out, const Edges::type& val) { + std::map::const_iterator it = _Edges_VALUES_TO_NAMES.find(val); + if (it != _Edges_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} + +std::string to_string(const Edges::type& val) { + std::map::const_iterator it = _Edges_VALUES_TO_NAMES.find(val); + if (it != _Edges_VALUES_TO_NAMES.end()) { + return std::string(it->second); + } else { + return std::to_string(static_cast(val)); + } +} + +int _kGeometryEncodingValues[] = { + /** + * Allowed for physical type: BYTE_ARRAY. + * + * Well-known binary (WKB) representations of geometries. It supports 2D or + * 3D geometries of the standard geometry types (Point, LineString, Polygon, + * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This + * is the preferred option for maximum portability. + * + * This encoding enables GeometryStatistics to be set in the column chunk + * and page index. + */ + GeometryEncoding::WKB +}; +const char* _kGeometryEncodingNames[] = { + /** + * Allowed for physical type: BYTE_ARRAY. + * + * Well-known binary (WKB) representations of geometries. It supports 2D or + * 3D geometries of the standard geometry types (Point, LineString, Polygon, + * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This + * is the preferred option for maximum portability. + * + * This encoding enables GeometryStatistics to be set in the column chunk + * and page index. + */ + "WKB" +}; +const std::map _GeometryEncoding_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(1, _kGeometryEncodingValues, _kGeometryEncodingNames), ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); + +std::ostream& operator<<(std::ostream& out, const GeometryEncoding::type& val) { + std::map::const_iterator it = _GeometryEncoding_VALUES_TO_NAMES.find(val); + if (it != _GeometryEncoding_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} + +std::string to_string(const GeometryEncoding::type& val) { + std::map::const_iterator it = _GeometryEncoding_VALUES_TO_NAMES.find(val); + if (it != _GeometryEncoding_VALUES_TO_NAMES.end()) { + return std::string(it->second); + } else { + return std::to_string(static_cast(val)); + } +} + int _kEncodingValues[] = { /** * Default encoding. @@ -421,12 +499,15 @@ int _kEncodingValues[] = { */ Encoding::RLE_DICTIONARY, /** - * Encoding for floating-point data. + * Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding stream and + * The individual bytes of a value are scattered to the corresponding stream and * the streams are concatenated. * This itself does not reduce the size of the data but can lead to better compression * afterwards. + * + * Added in 2.8 for FLOAT and DOUBLE. + * Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ Encoding::BYTE_STREAM_SPLIT }; @@ -479,12 +560,15 @@ const char* _kEncodingNames[] = { */ "RLE_DICTIONARY", /** - * Encoding for floating-point data. + * Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding stream and + * The individual bytes of a value are scattered to the corresponding stream and * the streams are concatenated. * This itself does not reduce the size of the data but can lead to better compression * afterwards. + * + * Added in 2.8 for FLOAT and DOUBLE. + * Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ "BYTE_STREAM_SPLIT" }; @@ -684,6 +768,246 @@ void SizeStatistics::printTo(std::ostream& out) const { } +Covering::~Covering() noexcept { +} + + +void Covering::__set_geometry(const std::string& val) { + this->geometry = val; +} + +void Covering::__set_edges(const Edges::type val) { + this->edges = val; +} +std::ostream& operator<<(std::ostream& out, const Covering& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(Covering &a, Covering &b) { + using ::std::swap; + swap(a.geometry, b.geometry); + swap(a.edges, b.edges); +} + +Covering::Covering(const Covering& other17) { + geometry = other17.geometry; + edges = other17.edges; +} +Covering::Covering(Covering&& other18) noexcept { + geometry = std::move(other18.geometry); + edges = other18.edges; +} +Covering& Covering::operator=(const Covering& other19) { + geometry = other19.geometry; + edges = other19.edges; + return *this; +} +Covering& Covering::operator=(Covering&& other20) noexcept { + geometry = std::move(other20.geometry); + edges = other20.edges; + return *this; +} +void Covering::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "Covering("; + out << "geometry=" << to_string(geometry); + out << ", " << "edges=" << to_string(edges); + out << ")"; +} + + +BoundingBox::~BoundingBox() noexcept { +} + + +void BoundingBox::__set_xmin(const double val) { + this->xmin = val; +} + +void BoundingBox::__set_xmax(const double val) { + this->xmax = val; +} + +void BoundingBox::__set_ymin(const double val) { + this->ymin = val; +} + +void BoundingBox::__set_ymax(const double val) { + this->ymax = val; +} + +void BoundingBox::__set_zmin(const double val) { + this->zmin = val; +__isset.zmin = true; +} + +void BoundingBox::__set_zmax(const double val) { + this->zmax = val; +__isset.zmax = true; +} + +void BoundingBox::__set_mmin(const double val) { + this->mmin = val; +__isset.mmin = true; +} + +void BoundingBox::__set_mmax(const double val) { + this->mmax = val; +__isset.mmax = true; +} +std::ostream& operator<<(std::ostream& out, const BoundingBox& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(BoundingBox &a, BoundingBox &b) { + using ::std::swap; + swap(a.xmin, b.xmin); + swap(a.xmax, b.xmax); + swap(a.ymin, b.ymin); + swap(a.ymax, b.ymax); + swap(a.zmin, b.zmin); + swap(a.zmax, b.zmax); + swap(a.mmin, b.mmin); + swap(a.mmax, b.mmax); + swap(a.__isset, b.__isset); +} + +BoundingBox::BoundingBox(const BoundingBox& other21) noexcept { + xmin = other21.xmin; + xmax = other21.xmax; + ymin = other21.ymin; + ymax = other21.ymax; + zmin = other21.zmin; + zmax = other21.zmax; + mmin = other21.mmin; + mmax = other21.mmax; + __isset = other21.__isset; +} +BoundingBox::BoundingBox(BoundingBox&& other22) noexcept { + xmin = other22.xmin; + xmax = other22.xmax; + ymin = other22.ymin; + ymax = other22.ymax; + zmin = other22.zmin; + zmax = other22.zmax; + mmin = other22.mmin; + mmax = other22.mmax; + __isset = other22.__isset; +} +BoundingBox& BoundingBox::operator=(const BoundingBox& other23) noexcept { + xmin = other23.xmin; + xmax = other23.xmax; + ymin = other23.ymin; + ymax = other23.ymax; + zmin = other23.zmin; + zmax = other23.zmax; + mmin = other23.mmin; + mmax = other23.mmax; + __isset = other23.__isset; + return *this; +} +BoundingBox& BoundingBox::operator=(BoundingBox&& other24) noexcept { + xmin = other24.xmin; + xmax = other24.xmax; + ymin = other24.ymin; + ymax = other24.ymax; + zmin = other24.zmin; + zmax = other24.zmax; + mmin = other24.mmin; + mmax = other24.mmax; + __isset = other24.__isset; + return *this; +} +void BoundingBox::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "BoundingBox("; + out << "xmin=" << to_string(xmin); + out << ", " << "xmax=" << to_string(xmax); + out << ", " << "ymin=" << to_string(ymin); + out << ", " << "ymax=" << to_string(ymax); + out << ", " << "zmin="; (__isset.zmin ? (out << to_string(zmin)) : (out << "")); + out << ", " << "zmax="; (__isset.zmax ? (out << to_string(zmax)) : (out << "")); + out << ", " << "mmin="; (__isset.mmin ? (out << to_string(mmin)) : (out << "")); + out << ", " << "mmax="; (__isset.mmax ? (out << to_string(mmax)) : (out << "")); + out << ")"; +} + + +GeometryStatistics::~GeometryStatistics() noexcept { +} + + +void GeometryStatistics::__set_bbox(const BoundingBox& val) { + this->bbox = val; +__isset.bbox = true; +} + +void GeometryStatistics::__set_covering(const Covering& val) { + this->covering = val; +__isset.covering = true; +} + +void GeometryStatistics::__set_geometry_types(const std::vector & val) { + this->geometry_types = val; +__isset.geometry_types = true; +} +std::ostream& operator<<(std::ostream& out, const GeometryStatistics& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(GeometryStatistics &a, GeometryStatistics &b) { + using ::std::swap; + swap(a.bbox, b.bbox); + swap(a.covering, b.covering); + swap(a.geometry_types, b.geometry_types); + swap(a.__isset, b.__isset); +} + +GeometryStatistics::GeometryStatistics(const GeometryStatistics& other31) { + bbox = other31.bbox; + covering = other31.covering; + geometry_types = other31.geometry_types; + __isset = other31.__isset; +} +GeometryStatistics::GeometryStatistics(GeometryStatistics&& other32) noexcept { + bbox = std::move(other32.bbox); + covering = std::move(other32.covering); + geometry_types = std::move(other32.geometry_types); + __isset = other32.__isset; +} +GeometryStatistics& GeometryStatistics::operator=(const GeometryStatistics& other33) { + bbox = other33.bbox; + covering = other33.covering; + geometry_types = other33.geometry_types; + __isset = other33.__isset; + return *this; +} +GeometryStatistics& GeometryStatistics::operator=(GeometryStatistics&& other34) noexcept { + bbox = std::move(other34.bbox); + covering = std::move(other34.covering); + geometry_types = std::move(other34.geometry_types); + __isset = other34.__isset; + return *this; +} +void GeometryStatistics::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "GeometryStatistics("; + out << "bbox="; (__isset.bbox ? (out << to_string(bbox)) : (out << "")); + out << ", " << "covering="; (__isset.covering ? (out << to_string(covering)) : (out << "")); + out << ", " << "geometry_types="; (__isset.geometry_types ? (out << to_string(geometry_types)) : (out << "")); + out << ")"; +} + + Statistics::~Statistics() noexcept { } @@ -727,6 +1051,11 @@ void Statistics::__set_is_min_value_exact(const bool val) { this->is_min_value_exact = val; __isset.is_min_value_exact = true; } + +void Statistics::__set_geometry_stats(const GeometryStatistics& val) { + this->geometry_stats = val; +__isset.geometry_stats = true; +} std::ostream& operator<<(std::ostream& out, const Statistics& obj) { obj.printTo(out); @@ -744,53 +1073,58 @@ void swap(Statistics &a, Statistics &b) { swap(a.min_value, b.min_value); swap(a.is_max_value_exact, b.is_max_value_exact); swap(a.is_min_value_exact, b.is_min_value_exact); + swap(a.geometry_stats, b.geometry_stats); swap(a.__isset, b.__isset); } -Statistics::Statistics(const Statistics& other16) { - max = other16.max; - min = other16.min; - null_count = other16.null_count; - distinct_count = other16.distinct_count; - max_value = other16.max_value; - min_value = other16.min_value; - is_max_value_exact = other16.is_max_value_exact; - is_min_value_exact = other16.is_min_value_exact; - __isset = other16.__isset; -} -Statistics::Statistics(Statistics&& other17) noexcept { - max = std::move(other17.max); - min = std::move(other17.min); - null_count = other17.null_count; - distinct_count = other17.distinct_count; - max_value = std::move(other17.max_value); - min_value = std::move(other17.min_value); - is_max_value_exact = other17.is_max_value_exact; - is_min_value_exact = other17.is_min_value_exact; - __isset = other17.__isset; -} -Statistics& Statistics::operator=(const Statistics& other18) { - max = other18.max; - min = other18.min; - null_count = other18.null_count; - distinct_count = other18.distinct_count; - max_value = other18.max_value; - min_value = other18.min_value; - is_max_value_exact = other18.is_max_value_exact; - is_min_value_exact = other18.is_min_value_exact; - __isset = other18.__isset; +Statistics::Statistics(const Statistics& other35) { + max = other35.max; + min = other35.min; + null_count = other35.null_count; + distinct_count = other35.distinct_count; + max_value = other35.max_value; + min_value = other35.min_value; + is_max_value_exact = other35.is_max_value_exact; + is_min_value_exact = other35.is_min_value_exact; + geometry_stats = other35.geometry_stats; + __isset = other35.__isset; +} +Statistics::Statistics(Statistics&& other36) noexcept { + max = std::move(other36.max); + min = std::move(other36.min); + null_count = other36.null_count; + distinct_count = other36.distinct_count; + max_value = std::move(other36.max_value); + min_value = std::move(other36.min_value); + is_max_value_exact = other36.is_max_value_exact; + is_min_value_exact = other36.is_min_value_exact; + geometry_stats = std::move(other36.geometry_stats); + __isset = other36.__isset; +} +Statistics& Statistics::operator=(const Statistics& other37) { + max = other37.max; + min = other37.min; + null_count = other37.null_count; + distinct_count = other37.distinct_count; + max_value = other37.max_value; + min_value = other37.min_value; + is_max_value_exact = other37.is_max_value_exact; + is_min_value_exact = other37.is_min_value_exact; + geometry_stats = other37.geometry_stats; + __isset = other37.__isset; return *this; } -Statistics& Statistics::operator=(Statistics&& other19) noexcept { - max = std::move(other19.max); - min = std::move(other19.min); - null_count = other19.null_count; - distinct_count = other19.distinct_count; - max_value = std::move(other19.max_value); - min_value = std::move(other19.min_value); - is_max_value_exact = other19.is_max_value_exact; - is_min_value_exact = other19.is_min_value_exact; - __isset = other19.__isset; +Statistics& Statistics::operator=(Statistics&& other38) noexcept { + max = std::move(other38.max); + min = std::move(other38.min); + null_count = other38.null_count; + distinct_count = other38.distinct_count; + max_value = std::move(other38.max_value); + min_value = std::move(other38.min_value); + is_max_value_exact = other38.is_max_value_exact; + is_min_value_exact = other38.is_min_value_exact; + geometry_stats = std::move(other38.geometry_stats); + __isset = other38.__isset; return *this; } void Statistics::printTo(std::ostream& out) const { @@ -804,6 +1138,7 @@ void Statistics::printTo(std::ostream& out) const { out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "")); out << ", " << "is_max_value_exact="; (__isset.is_max_value_exact ? (out << to_string(is_max_value_exact)) : (out << "")); out << ", " << "is_min_value_exact="; (__isset.is_min_value_exact ? (out << to_string(is_min_value_exact)) : (out << "")); + out << ", " << "geometry_stats="; (__isset.geometry_stats ? (out << to_string(geometry_stats)) : (out << "")); out << ")"; } @@ -824,18 +1159,18 @@ void swap(StringType &a, StringType &b) { (void) b; } -StringType::StringType(const StringType& other20) noexcept { - (void) other20; +StringType::StringType(const StringType& other39) noexcept { + (void) other39; } -StringType::StringType(StringType&& other21) noexcept { - (void) other21; +StringType::StringType(StringType&& other40) noexcept { + (void) other40; } -StringType& StringType::operator=(const StringType& other22) noexcept { - (void) other22; +StringType& StringType::operator=(const StringType& other41) noexcept { + (void) other41; return *this; } -StringType& StringType::operator=(StringType&& other23) noexcept { - (void) other23; +StringType& StringType::operator=(StringType&& other42) noexcept { + (void) other42; return *this; } void StringType::printTo(std::ostream& out) const { @@ -861,18 +1196,18 @@ void swap(UUIDType &a, UUIDType &b) { (void) b; } -UUIDType::UUIDType(const UUIDType& other24) noexcept { - (void) other24; +UUIDType::UUIDType(const UUIDType& other43) noexcept { + (void) other43; } -UUIDType::UUIDType(UUIDType&& other25) noexcept { - (void) other25; +UUIDType::UUIDType(UUIDType&& other44) noexcept { + (void) other44; } -UUIDType& UUIDType::operator=(const UUIDType& other26) noexcept { - (void) other26; +UUIDType& UUIDType::operator=(const UUIDType& other45) noexcept { + (void) other45; return *this; } -UUIDType& UUIDType::operator=(UUIDType&& other27) noexcept { - (void) other27; +UUIDType& UUIDType::operator=(UUIDType&& other46) noexcept { + (void) other46; return *this; } void UUIDType::printTo(std::ostream& out) const { @@ -898,18 +1233,18 @@ void swap(MapType &a, MapType &b) { (void) b; } -MapType::MapType(const MapType& other28) noexcept { - (void) other28; +MapType::MapType(const MapType& other47) noexcept { + (void) other47; } -MapType::MapType(MapType&& other29) noexcept { - (void) other29; +MapType::MapType(MapType&& other48) noexcept { + (void) other48; } -MapType& MapType::operator=(const MapType& other30) noexcept { - (void) other30; +MapType& MapType::operator=(const MapType& other49) noexcept { + (void) other49; return *this; } -MapType& MapType::operator=(MapType&& other31) noexcept { - (void) other31; +MapType& MapType::operator=(MapType&& other50) noexcept { + (void) other50; return *this; } void MapType::printTo(std::ostream& out) const { @@ -935,18 +1270,18 @@ void swap(ListType &a, ListType &b) { (void) b; } -ListType::ListType(const ListType& other32) noexcept { - (void) other32; +ListType::ListType(const ListType& other51) noexcept { + (void) other51; } -ListType::ListType(ListType&& other33) noexcept { - (void) other33; +ListType::ListType(ListType&& other52) noexcept { + (void) other52; } -ListType& ListType::operator=(const ListType& other34) noexcept { - (void) other34; +ListType& ListType::operator=(const ListType& other53) noexcept { + (void) other53; return *this; } -ListType& ListType::operator=(ListType&& other35) noexcept { - (void) other35; +ListType& ListType::operator=(ListType&& other54) noexcept { + (void) other54; return *this; } void ListType::printTo(std::ostream& out) const { @@ -972,18 +1307,18 @@ void swap(EnumType &a, EnumType &b) { (void) b; } -EnumType::EnumType(const EnumType& other36) noexcept { - (void) other36; +EnumType::EnumType(const EnumType& other55) noexcept { + (void) other55; } -EnumType::EnumType(EnumType&& other37) noexcept { - (void) other37; +EnumType::EnumType(EnumType&& other56) noexcept { + (void) other56; } -EnumType& EnumType::operator=(const EnumType& other38) noexcept { - (void) other38; +EnumType& EnumType::operator=(const EnumType& other57) noexcept { + (void) other57; return *this; } -EnumType& EnumType::operator=(EnumType&& other39) noexcept { - (void) other39; +EnumType& EnumType::operator=(EnumType&& other58) noexcept { + (void) other58; return *this; } void EnumType::printTo(std::ostream& out) const { @@ -1009,18 +1344,18 @@ void swap(DateType &a, DateType &b) { (void) b; } -DateType::DateType(const DateType& other40) noexcept { - (void) other40; +DateType::DateType(const DateType& other59) noexcept { + (void) other59; } -DateType::DateType(DateType&& other41) noexcept { - (void) other41; +DateType::DateType(DateType&& other60) noexcept { + (void) other60; } -DateType& DateType::operator=(const DateType& other42) noexcept { - (void) other42; +DateType& DateType::operator=(const DateType& other61) noexcept { + (void) other61; return *this; } -DateType& DateType::operator=(DateType&& other43) noexcept { - (void) other43; +DateType& DateType::operator=(DateType&& other62) noexcept { + (void) other62; return *this; } void DateType::printTo(std::ostream& out) const { @@ -1046,18 +1381,18 @@ void swap(Float16Type &a, Float16Type &b) { (void) b; } -Float16Type::Float16Type(const Float16Type& other44) noexcept { - (void) other44; +Float16Type::Float16Type(const Float16Type& other63) noexcept { + (void) other63; } -Float16Type::Float16Type(Float16Type&& other45) noexcept { - (void) other45; +Float16Type::Float16Type(Float16Type&& other64) noexcept { + (void) other64; } -Float16Type& Float16Type::operator=(const Float16Type& other46) noexcept { - (void) other46; +Float16Type& Float16Type::operator=(const Float16Type& other65) noexcept { + (void) other65; return *this; } -Float16Type& Float16Type::operator=(Float16Type&& other47) noexcept { - (void) other47; +Float16Type& Float16Type::operator=(Float16Type&& other66) noexcept { + (void) other66; return *this; } void Float16Type::printTo(std::ostream& out) const { @@ -1083,18 +1418,18 @@ void swap(NullType &a, NullType &b) { (void) b; } -NullType::NullType(const NullType& other48) noexcept { - (void) other48; +NullType::NullType(const NullType& other67) noexcept { + (void) other67; } -NullType::NullType(NullType&& other49) noexcept { - (void) other49; +NullType::NullType(NullType&& other68) noexcept { + (void) other68; } -NullType& NullType::operator=(const NullType& other50) noexcept { - (void) other50; +NullType& NullType::operator=(const NullType& other69) noexcept { + (void) other69; return *this; } -NullType& NullType::operator=(NullType&& other51) noexcept { - (void) other51; +NullType& NullType::operator=(NullType&& other70) noexcept { + (void) other70; return *this; } void NullType::printTo(std::ostream& out) const { @@ -1128,22 +1463,22 @@ void swap(DecimalType &a, DecimalType &b) { swap(a.precision, b.precision); } -DecimalType::DecimalType(const DecimalType& other52) noexcept { - scale = other52.scale; - precision = other52.precision; +DecimalType::DecimalType(const DecimalType& other71) noexcept { + scale = other71.scale; + precision = other71.precision; } -DecimalType::DecimalType(DecimalType&& other53) noexcept { - scale = other53.scale; - precision = other53.precision; +DecimalType::DecimalType(DecimalType&& other72) noexcept { + scale = other72.scale; + precision = other72.precision; } -DecimalType& DecimalType::operator=(const DecimalType& other54) noexcept { - scale = other54.scale; - precision = other54.precision; +DecimalType& DecimalType::operator=(const DecimalType& other73) noexcept { + scale = other73.scale; + precision = other73.precision; return *this; } -DecimalType& DecimalType::operator=(DecimalType&& other55) noexcept { - scale = other55.scale; - precision = other55.precision; +DecimalType& DecimalType::operator=(DecimalType&& other74) noexcept { + scale = other74.scale; + precision = other74.precision; return *this; } void DecimalType::printTo(std::ostream& out) const { @@ -1171,18 +1506,18 @@ void swap(MilliSeconds &a, MilliSeconds &b) { (void) b; } -MilliSeconds::MilliSeconds(const MilliSeconds& other56) noexcept { - (void) other56; +MilliSeconds::MilliSeconds(const MilliSeconds& other75) noexcept { + (void) other75; } -MilliSeconds::MilliSeconds(MilliSeconds&& other57) noexcept { - (void) other57; +MilliSeconds::MilliSeconds(MilliSeconds&& other76) noexcept { + (void) other76; } -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other58) noexcept { - (void) other58; +MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other77) noexcept { + (void) other77; return *this; } -MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other59) noexcept { - (void) other59; +MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other78) noexcept { + (void) other78; return *this; } void MilliSeconds::printTo(std::ostream& out) const { @@ -1208,18 +1543,18 @@ void swap(MicroSeconds &a, MicroSeconds &b) { (void) b; } -MicroSeconds::MicroSeconds(const MicroSeconds& other60) noexcept { - (void) other60; +MicroSeconds::MicroSeconds(const MicroSeconds& other79) noexcept { + (void) other79; } -MicroSeconds::MicroSeconds(MicroSeconds&& other61) noexcept { - (void) other61; +MicroSeconds::MicroSeconds(MicroSeconds&& other80) noexcept { + (void) other80; } -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other62) noexcept { - (void) other62; +MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other81) noexcept { + (void) other81; return *this; } -MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other63) noexcept { - (void) other63; +MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other82) noexcept { + (void) other82; return *this; } void MicroSeconds::printTo(std::ostream& out) const { @@ -1245,18 +1580,18 @@ void swap(NanoSeconds &a, NanoSeconds &b) { (void) b; } -NanoSeconds::NanoSeconds(const NanoSeconds& other64) noexcept { - (void) other64; +NanoSeconds::NanoSeconds(const NanoSeconds& other83) noexcept { + (void) other83; } -NanoSeconds::NanoSeconds(NanoSeconds&& other65) noexcept { - (void) other65; +NanoSeconds::NanoSeconds(NanoSeconds&& other84) noexcept { + (void) other84; } -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other66) noexcept { - (void) other66; +NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other85) noexcept { + (void) other85; return *this; } -NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other67) noexcept { - (void) other67; +NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other86) noexcept { + (void) other86; return *this; } void NanoSeconds::printTo(std::ostream& out) const { @@ -1299,30 +1634,30 @@ void swap(TimeUnit &a, TimeUnit &b) { swap(a.__isset, b.__isset); } -TimeUnit::TimeUnit(const TimeUnit& other68) noexcept { - MILLIS = other68.MILLIS; - MICROS = other68.MICROS; - NANOS = other68.NANOS; - __isset = other68.__isset; +TimeUnit::TimeUnit(const TimeUnit& other87) noexcept { + MILLIS = other87.MILLIS; + MICROS = other87.MICROS; + NANOS = other87.NANOS; + __isset = other87.__isset; } -TimeUnit::TimeUnit(TimeUnit&& other69) noexcept { - MILLIS = std::move(other69.MILLIS); - MICROS = std::move(other69.MICROS); - NANOS = std::move(other69.NANOS); - __isset = other69.__isset; +TimeUnit::TimeUnit(TimeUnit&& other88) noexcept { + MILLIS = std::move(other88.MILLIS); + MICROS = std::move(other88.MICROS); + NANOS = std::move(other88.NANOS); + __isset = other88.__isset; } -TimeUnit& TimeUnit::operator=(const TimeUnit& other70) noexcept { - MILLIS = other70.MILLIS; - MICROS = other70.MICROS; - NANOS = other70.NANOS; - __isset = other70.__isset; +TimeUnit& TimeUnit::operator=(const TimeUnit& other89) noexcept { + MILLIS = other89.MILLIS; + MICROS = other89.MICROS; + NANOS = other89.NANOS; + __isset = other89.__isset; return *this; } -TimeUnit& TimeUnit::operator=(TimeUnit&& other71) noexcept { - MILLIS = std::move(other71.MILLIS); - MICROS = std::move(other71.MICROS); - NANOS = std::move(other71.NANOS); - __isset = other71.__isset; +TimeUnit& TimeUnit::operator=(TimeUnit&& other90) noexcept { + MILLIS = std::move(other90.MILLIS); + MICROS = std::move(other90.MICROS); + NANOS = std::move(other90.NANOS); + __isset = other90.__isset; return *this; } void TimeUnit::printTo(std::ostream& out) const { @@ -1359,22 +1694,22 @@ void swap(TimestampType &a, TimestampType &b) { swap(a.unit, b.unit); } -TimestampType::TimestampType(const TimestampType& other72) noexcept { - isAdjustedToUTC = other72.isAdjustedToUTC; - unit = other72.unit; +TimestampType::TimestampType(const TimestampType& other91) noexcept { + isAdjustedToUTC = other91.isAdjustedToUTC; + unit = other91.unit; } -TimestampType::TimestampType(TimestampType&& other73) noexcept { - isAdjustedToUTC = other73.isAdjustedToUTC; - unit = std::move(other73.unit); +TimestampType::TimestampType(TimestampType&& other92) noexcept { + isAdjustedToUTC = other92.isAdjustedToUTC; + unit = std::move(other92.unit); } -TimestampType& TimestampType::operator=(const TimestampType& other74) noexcept { - isAdjustedToUTC = other74.isAdjustedToUTC; - unit = other74.unit; +TimestampType& TimestampType::operator=(const TimestampType& other93) noexcept { + isAdjustedToUTC = other93.isAdjustedToUTC; + unit = other93.unit; return *this; } -TimestampType& TimestampType::operator=(TimestampType&& other75) noexcept { - isAdjustedToUTC = other75.isAdjustedToUTC; - unit = std::move(other75.unit); +TimestampType& TimestampType::operator=(TimestampType&& other94) noexcept { + isAdjustedToUTC = other94.isAdjustedToUTC; + unit = std::move(other94.unit); return *this; } void TimestampType::printTo(std::ostream& out) const { @@ -1410,22 +1745,22 @@ void swap(TimeType &a, TimeType &b) { swap(a.unit, b.unit); } -TimeType::TimeType(const TimeType& other76) noexcept { - isAdjustedToUTC = other76.isAdjustedToUTC; - unit = other76.unit; +TimeType::TimeType(const TimeType& other95) noexcept { + isAdjustedToUTC = other95.isAdjustedToUTC; + unit = other95.unit; } -TimeType::TimeType(TimeType&& other77) noexcept { - isAdjustedToUTC = other77.isAdjustedToUTC; - unit = std::move(other77.unit); +TimeType::TimeType(TimeType&& other96) noexcept { + isAdjustedToUTC = other96.isAdjustedToUTC; + unit = std::move(other96.unit); } -TimeType& TimeType::operator=(const TimeType& other78) noexcept { - isAdjustedToUTC = other78.isAdjustedToUTC; - unit = other78.unit; +TimeType& TimeType::operator=(const TimeType& other97) noexcept { + isAdjustedToUTC = other97.isAdjustedToUTC; + unit = other97.unit; return *this; } -TimeType& TimeType::operator=(TimeType&& other79) noexcept { - isAdjustedToUTC = other79.isAdjustedToUTC; - unit = std::move(other79.unit); +TimeType& TimeType::operator=(TimeType&& other98) noexcept { + isAdjustedToUTC = other98.isAdjustedToUTC; + unit = std::move(other98.unit); return *this; } void TimeType::printTo(std::ostream& out) const { @@ -1461,22 +1796,22 @@ void swap(IntType &a, IntType &b) { swap(a.isSigned, b.isSigned); } -IntType::IntType(const IntType& other80) noexcept { - bitWidth = other80.bitWidth; - isSigned = other80.isSigned; +IntType::IntType(const IntType& other99) noexcept { + bitWidth = other99.bitWidth; + isSigned = other99.isSigned; } -IntType::IntType(IntType&& other81) noexcept { - bitWidth = other81.bitWidth; - isSigned = other81.isSigned; +IntType::IntType(IntType&& other100) noexcept { + bitWidth = other100.bitWidth; + isSigned = other100.isSigned; } -IntType& IntType::operator=(const IntType& other82) noexcept { - bitWidth = other82.bitWidth; - isSigned = other82.isSigned; +IntType& IntType::operator=(const IntType& other101) noexcept { + bitWidth = other101.bitWidth; + isSigned = other101.isSigned; return *this; } -IntType& IntType::operator=(IntType&& other83) noexcept { - bitWidth = other83.bitWidth; - isSigned = other83.isSigned; +IntType& IntType::operator=(IntType&& other102) noexcept { + bitWidth = other102.bitWidth; + isSigned = other102.isSigned; return *this; } void IntType::printTo(std::ostream& out) const { @@ -1504,18 +1839,18 @@ void swap(JsonType &a, JsonType &b) { (void) b; } -JsonType::JsonType(const JsonType& other84) noexcept { - (void) other84; +JsonType::JsonType(const JsonType& other103) noexcept { + (void) other103; } -JsonType::JsonType(JsonType&& other85) noexcept { - (void) other85; +JsonType::JsonType(JsonType&& other104) noexcept { + (void) other104; } -JsonType& JsonType::operator=(const JsonType& other86) noexcept { - (void) other86; +JsonType& JsonType::operator=(const JsonType& other105) noexcept { + (void) other105; return *this; } -JsonType& JsonType::operator=(JsonType&& other87) noexcept { - (void) other87; +JsonType& JsonType::operator=(JsonType&& other106) noexcept { + (void) other106; return *this; } void JsonType::printTo(std::ostream& out) const { @@ -1541,18 +1876,18 @@ void swap(BsonType &a, BsonType &b) { (void) b; } -BsonType::BsonType(const BsonType& other88) noexcept { - (void) other88; +BsonType::BsonType(const BsonType& other107) noexcept { + (void) other107; } -BsonType::BsonType(BsonType&& other89) noexcept { - (void) other89; +BsonType::BsonType(BsonType&& other108) noexcept { + (void) other108; } -BsonType& BsonType::operator=(const BsonType& other90) noexcept { - (void) other90; +BsonType& BsonType::operator=(const BsonType& other109) noexcept { + (void) other109; return *this; } -BsonType& BsonType::operator=(BsonType&& other91) noexcept { - (void) other91; +BsonType& BsonType::operator=(BsonType&& other110) noexcept { + (void) other110; return *this; } void BsonType::printTo(std::ostream& out) const { @@ -1562,6 +1897,84 @@ void BsonType::printTo(std::ostream& out) const { } +GeometryType::~GeometryType() noexcept { +} + + +void GeometryType::__set_encoding(const GeometryEncoding::type val) { + this->encoding = val; +} + +void GeometryType::__set_edges(const Edges::type val) { + this->edges = val; +} + +void GeometryType::__set_crs(const std::string& val) { + this->crs = val; +__isset.crs = true; +} + +void GeometryType::__set_metadata(const std::string& val) { + this->metadata = val; +__isset.metadata = true; +} +std::ostream& operator<<(std::ostream& out, const GeometryType& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(GeometryType &a, GeometryType &b) { + using ::std::swap; + swap(a.encoding, b.encoding); + swap(a.edges, b.edges); + swap(a.crs, b.crs); + swap(a.metadata, b.metadata); + swap(a.__isset, b.__isset); +} + +GeometryType::GeometryType(const GeometryType& other113) { + encoding = other113.encoding; + edges = other113.edges; + crs = other113.crs; + metadata = other113.metadata; + __isset = other113.__isset; +} +GeometryType::GeometryType(GeometryType&& other114) noexcept { + encoding = other114.encoding; + edges = other114.edges; + crs = std::move(other114.crs); + metadata = std::move(other114.metadata); + __isset = other114.__isset; +} +GeometryType& GeometryType::operator=(const GeometryType& other115) { + encoding = other115.encoding; + edges = other115.edges; + crs = other115.crs; + metadata = other115.metadata; + __isset = other115.__isset; + return *this; +} +GeometryType& GeometryType::operator=(GeometryType&& other116) noexcept { + encoding = other116.encoding; + edges = other116.edges; + crs = std::move(other116.crs); + metadata = std::move(other116.metadata); + __isset = other116.__isset; + return *this; +} +void GeometryType::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "GeometryType("; + out << "encoding=" << to_string(encoding); + out << ", " << "edges=" << to_string(edges); + out << ", " << "crs="; (__isset.crs ? (out << to_string(crs)) : (out << "")); + out << ", " << "metadata="; (__isset.metadata ? (out << to_string(metadata)) : (out << "")); + out << ")"; +} + + LogicalType::~LogicalType() noexcept { } @@ -1635,6 +2048,11 @@ void LogicalType::__set_FLOAT16(const Float16Type& val) { this->FLOAT16 = val; __isset.FLOAT16 = true; } + +void LogicalType::__set_GEOMETRY(const GeometryType& val) { + this->GEOMETRY = val; +__isset.GEOMETRY = true; +} std::ostream& operator<<(std::ostream& out, const LogicalType& obj) { obj.printTo(out); @@ -1658,77 +2076,82 @@ void swap(LogicalType &a, LogicalType &b) { swap(a.BSON, b.BSON); swap(a.UUID, b.UUID); swap(a.FLOAT16, b.FLOAT16); + swap(a.GEOMETRY, b.GEOMETRY); swap(a.__isset, b.__isset); } -LogicalType::LogicalType(const LogicalType& other92) noexcept { - STRING = other92.STRING; - MAP = other92.MAP; - LIST = other92.LIST; - ENUM = other92.ENUM; - DECIMAL = other92.DECIMAL; - DATE = other92.DATE; - TIME = other92.TIME; - TIMESTAMP = other92.TIMESTAMP; - INTEGER = other92.INTEGER; - UNKNOWN = other92.UNKNOWN; - JSON = other92.JSON; - BSON = other92.BSON; - UUID = other92.UUID; - FLOAT16 = other92.FLOAT16; - __isset = other92.__isset; -} -LogicalType::LogicalType(LogicalType&& other93) noexcept { - STRING = std::move(other93.STRING); - MAP = std::move(other93.MAP); - LIST = std::move(other93.LIST); - ENUM = std::move(other93.ENUM); - DECIMAL = std::move(other93.DECIMAL); - DATE = std::move(other93.DATE); - TIME = std::move(other93.TIME); - TIMESTAMP = std::move(other93.TIMESTAMP); - INTEGER = std::move(other93.INTEGER); - UNKNOWN = std::move(other93.UNKNOWN); - JSON = std::move(other93.JSON); - BSON = std::move(other93.BSON); - UUID = std::move(other93.UUID); - FLOAT16 = std::move(other93.FLOAT16); - __isset = other93.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other94) noexcept { - STRING = other94.STRING; - MAP = other94.MAP; - LIST = other94.LIST; - ENUM = other94.ENUM; - DECIMAL = other94.DECIMAL; - DATE = other94.DATE; - TIME = other94.TIME; - TIMESTAMP = other94.TIMESTAMP; - INTEGER = other94.INTEGER; - UNKNOWN = other94.UNKNOWN; - JSON = other94.JSON; - BSON = other94.BSON; - UUID = other94.UUID; - FLOAT16 = other94.FLOAT16; - __isset = other94.__isset; +LogicalType::LogicalType(const LogicalType& other117) { + STRING = other117.STRING; + MAP = other117.MAP; + LIST = other117.LIST; + ENUM = other117.ENUM; + DECIMAL = other117.DECIMAL; + DATE = other117.DATE; + TIME = other117.TIME; + TIMESTAMP = other117.TIMESTAMP; + INTEGER = other117.INTEGER; + UNKNOWN = other117.UNKNOWN; + JSON = other117.JSON; + BSON = other117.BSON; + UUID = other117.UUID; + FLOAT16 = other117.FLOAT16; + GEOMETRY = other117.GEOMETRY; + __isset = other117.__isset; +} +LogicalType::LogicalType(LogicalType&& other118) noexcept { + STRING = std::move(other118.STRING); + MAP = std::move(other118.MAP); + LIST = std::move(other118.LIST); + ENUM = std::move(other118.ENUM); + DECIMAL = std::move(other118.DECIMAL); + DATE = std::move(other118.DATE); + TIME = std::move(other118.TIME); + TIMESTAMP = std::move(other118.TIMESTAMP); + INTEGER = std::move(other118.INTEGER); + UNKNOWN = std::move(other118.UNKNOWN); + JSON = std::move(other118.JSON); + BSON = std::move(other118.BSON); + UUID = std::move(other118.UUID); + FLOAT16 = std::move(other118.FLOAT16); + GEOMETRY = std::move(other118.GEOMETRY); + __isset = other118.__isset; +} +LogicalType& LogicalType::operator=(const LogicalType& other119) { + STRING = other119.STRING; + MAP = other119.MAP; + LIST = other119.LIST; + ENUM = other119.ENUM; + DECIMAL = other119.DECIMAL; + DATE = other119.DATE; + TIME = other119.TIME; + TIMESTAMP = other119.TIMESTAMP; + INTEGER = other119.INTEGER; + UNKNOWN = other119.UNKNOWN; + JSON = other119.JSON; + BSON = other119.BSON; + UUID = other119.UUID; + FLOAT16 = other119.FLOAT16; + GEOMETRY = other119.GEOMETRY; + __isset = other119.__isset; return *this; } -LogicalType& LogicalType::operator=(LogicalType&& other95) noexcept { - STRING = std::move(other95.STRING); - MAP = std::move(other95.MAP); - LIST = std::move(other95.LIST); - ENUM = std::move(other95.ENUM); - DECIMAL = std::move(other95.DECIMAL); - DATE = std::move(other95.DATE); - TIME = std::move(other95.TIME); - TIMESTAMP = std::move(other95.TIMESTAMP); - INTEGER = std::move(other95.INTEGER); - UNKNOWN = std::move(other95.UNKNOWN); - JSON = std::move(other95.JSON); - BSON = std::move(other95.BSON); - UUID = std::move(other95.UUID); - FLOAT16 = std::move(other95.FLOAT16); - __isset = other95.__isset; +LogicalType& LogicalType::operator=(LogicalType&& other120) noexcept { + STRING = std::move(other120.STRING); + MAP = std::move(other120.MAP); + LIST = std::move(other120.LIST); + ENUM = std::move(other120.ENUM); + DECIMAL = std::move(other120.DECIMAL); + DATE = std::move(other120.DATE); + TIME = std::move(other120.TIME); + TIMESTAMP = std::move(other120.TIMESTAMP); + INTEGER = std::move(other120.INTEGER); + UNKNOWN = std::move(other120.UNKNOWN); + JSON = std::move(other120.JSON); + BSON = std::move(other120.BSON); + UUID = std::move(other120.UUID); + FLOAT16 = std::move(other120.FLOAT16); + GEOMETRY = std::move(other120.GEOMETRY); + __isset = other120.__isset; return *this; } void LogicalType::printTo(std::ostream& out) const { @@ -1748,6 +2171,7 @@ void LogicalType::printTo(std::ostream& out) const { out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "")); out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "")); out << ", " << "FLOAT16="; (__isset.FLOAT16 ? (out << to_string(FLOAT16)) : (out << "")); + out << ", " << "GEOMETRY="; (__isset.GEOMETRY ? (out << to_string(GEOMETRY)) : (out << "")); out << ")"; } @@ -1826,58 +2250,58 @@ void swap(SchemaElement &a, SchemaElement &b) { swap(a.__isset, b.__isset); } -SchemaElement::SchemaElement(const SchemaElement& other99) { - type = other99.type; - type_length = other99.type_length; - repetition_type = other99.repetition_type; - name = other99.name; - num_children = other99.num_children; - converted_type = other99.converted_type; - scale = other99.scale; - precision = other99.precision; - field_id = other99.field_id; - logicalType = other99.logicalType; - __isset = other99.__isset; -} -SchemaElement::SchemaElement(SchemaElement&& other100) noexcept { - type = other100.type; - type_length = other100.type_length; - repetition_type = other100.repetition_type; - name = std::move(other100.name); - num_children = other100.num_children; - converted_type = other100.converted_type; - scale = other100.scale; - precision = other100.precision; - field_id = other100.field_id; - logicalType = std::move(other100.logicalType); - __isset = other100.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other101) { - type = other101.type; - type_length = other101.type_length; - repetition_type = other101.repetition_type; - name = other101.name; - num_children = other101.num_children; - converted_type = other101.converted_type; - scale = other101.scale; - precision = other101.precision; - field_id = other101.field_id; - logicalType = other101.logicalType; - __isset = other101.__isset; +SchemaElement::SchemaElement(const SchemaElement& other124) { + type = other124.type; + type_length = other124.type_length; + repetition_type = other124.repetition_type; + name = other124.name; + num_children = other124.num_children; + converted_type = other124.converted_type; + scale = other124.scale; + precision = other124.precision; + field_id = other124.field_id; + logicalType = other124.logicalType; + __isset = other124.__isset; +} +SchemaElement::SchemaElement(SchemaElement&& other125) noexcept { + type = other125.type; + type_length = other125.type_length; + repetition_type = other125.repetition_type; + name = std::move(other125.name); + num_children = other125.num_children; + converted_type = other125.converted_type; + scale = other125.scale; + precision = other125.precision; + field_id = other125.field_id; + logicalType = std::move(other125.logicalType); + __isset = other125.__isset; +} +SchemaElement& SchemaElement::operator=(const SchemaElement& other126) { + type = other126.type; + type_length = other126.type_length; + repetition_type = other126.repetition_type; + name = other126.name; + num_children = other126.num_children; + converted_type = other126.converted_type; + scale = other126.scale; + precision = other126.precision; + field_id = other126.field_id; + logicalType = other126.logicalType; + __isset = other126.__isset; return *this; } -SchemaElement& SchemaElement::operator=(SchemaElement&& other102) noexcept { - type = other102.type; - type_length = other102.type_length; - repetition_type = other102.repetition_type; - name = std::move(other102.name); - num_children = other102.num_children; - converted_type = other102.converted_type; - scale = other102.scale; - precision = other102.precision; - field_id = other102.field_id; - logicalType = std::move(other102.logicalType); - __isset = other102.__isset; +SchemaElement& SchemaElement::operator=(SchemaElement&& other127) noexcept { + type = other127.type; + type_length = other127.type_length; + repetition_type = other127.repetition_type; + name = std::move(other127.name); + num_children = other127.num_children; + converted_type = other127.converted_type; + scale = other127.scale; + precision = other127.precision; + field_id = other127.field_id; + logicalType = std::move(other127.logicalType); + __isset = other127.__isset; return *this; } void SchemaElement::printTo(std::ostream& out) const { @@ -1938,38 +2362,38 @@ void swap(DataPageHeader &a, DataPageHeader &b) { swap(a.__isset, b.__isset); } -DataPageHeader::DataPageHeader(const DataPageHeader& other106) { - num_values = other106.num_values; - encoding = other106.encoding; - definition_level_encoding = other106.definition_level_encoding; - repetition_level_encoding = other106.repetition_level_encoding; - statistics = other106.statistics; - __isset = other106.__isset; -} -DataPageHeader::DataPageHeader(DataPageHeader&& other107) noexcept { - num_values = other107.num_values; - encoding = other107.encoding; - definition_level_encoding = other107.definition_level_encoding; - repetition_level_encoding = other107.repetition_level_encoding; - statistics = std::move(other107.statistics); - __isset = other107.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other108) { - num_values = other108.num_values; - encoding = other108.encoding; - definition_level_encoding = other108.definition_level_encoding; - repetition_level_encoding = other108.repetition_level_encoding; - statistics = other108.statistics; - __isset = other108.__isset; +DataPageHeader::DataPageHeader(const DataPageHeader& other131) { + num_values = other131.num_values; + encoding = other131.encoding; + definition_level_encoding = other131.definition_level_encoding; + repetition_level_encoding = other131.repetition_level_encoding; + statistics = other131.statistics; + __isset = other131.__isset; +} +DataPageHeader::DataPageHeader(DataPageHeader&& other132) noexcept { + num_values = other132.num_values; + encoding = other132.encoding; + definition_level_encoding = other132.definition_level_encoding; + repetition_level_encoding = other132.repetition_level_encoding; + statistics = std::move(other132.statistics); + __isset = other132.__isset; +} +DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other133) { + num_values = other133.num_values; + encoding = other133.encoding; + definition_level_encoding = other133.definition_level_encoding; + repetition_level_encoding = other133.repetition_level_encoding; + statistics = other133.statistics; + __isset = other133.__isset; return *this; } -DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other109) noexcept { - num_values = other109.num_values; - encoding = other109.encoding; - definition_level_encoding = other109.definition_level_encoding; - repetition_level_encoding = other109.repetition_level_encoding; - statistics = std::move(other109.statistics); - __isset = other109.__isset; +DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other134) noexcept { + num_values = other134.num_values; + encoding = other134.encoding; + definition_level_encoding = other134.definition_level_encoding; + repetition_level_encoding = other134.repetition_level_encoding; + statistics = std::move(other134.statistics); + __isset = other134.__isset; return *this; } void DataPageHeader::printTo(std::ostream& out) const { @@ -2000,18 +2424,18 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) { (void) b; } -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other110) noexcept { - (void) other110; +IndexPageHeader::IndexPageHeader(const IndexPageHeader& other135) noexcept { + (void) other135; } -IndexPageHeader::IndexPageHeader(IndexPageHeader&& other111) noexcept { - (void) other111; +IndexPageHeader::IndexPageHeader(IndexPageHeader&& other136) noexcept { + (void) other136; } -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other112) noexcept { - (void) other112; +IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other137) noexcept { + (void) other137; return *this; } -IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other113) noexcept { - (void) other113; +IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other138) noexcept { + (void) other138; return *this; } void IndexPageHeader::printTo(std::ostream& out) const { @@ -2052,30 +2476,30 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { swap(a.__isset, b.__isset); } -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other115) noexcept { - num_values = other115.num_values; - encoding = other115.encoding; - is_sorted = other115.is_sorted; - __isset = other115.__isset; +DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other140) noexcept { + num_values = other140.num_values; + encoding = other140.encoding; + is_sorted = other140.is_sorted; + __isset = other140.__isset; } -DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other116) noexcept { - num_values = other116.num_values; - encoding = other116.encoding; - is_sorted = other116.is_sorted; - __isset = other116.__isset; +DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other141) noexcept { + num_values = other141.num_values; + encoding = other141.encoding; + is_sorted = other141.is_sorted; + __isset = other141.__isset; } -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other117) noexcept { - num_values = other117.num_values; - encoding = other117.encoding; - is_sorted = other117.is_sorted; - __isset = other117.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other142) noexcept { + num_values = other142.num_values; + encoding = other142.encoding; + is_sorted = other142.is_sorted; + __isset = other142.__isset; return *this; } -DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other118) noexcept { - num_values = other118.num_values; - encoding = other118.encoding; - is_sorted = other118.is_sorted; - __isset = other118.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other143) noexcept { + num_values = other143.num_values; + encoding = other143.encoding; + is_sorted = other143.is_sorted; + __isset = other143.__isset; return *this; } void DictionaryPageHeader::printTo(std::ostream& out) const { @@ -2145,50 +2569,50 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { swap(a.__isset, b.__isset); } -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other120) { - num_values = other120.num_values; - num_nulls = other120.num_nulls; - num_rows = other120.num_rows; - encoding = other120.encoding; - definition_levels_byte_length = other120.definition_levels_byte_length; - repetition_levels_byte_length = other120.repetition_levels_byte_length; - is_compressed = other120.is_compressed; - statistics = other120.statistics; - __isset = other120.__isset; +DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other145) { + num_values = other145.num_values; + num_nulls = other145.num_nulls; + num_rows = other145.num_rows; + encoding = other145.encoding; + definition_levels_byte_length = other145.definition_levels_byte_length; + repetition_levels_byte_length = other145.repetition_levels_byte_length; + is_compressed = other145.is_compressed; + statistics = other145.statistics; + __isset = other145.__isset; } -DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other121) noexcept { - num_values = other121.num_values; - num_nulls = other121.num_nulls; - num_rows = other121.num_rows; - encoding = other121.encoding; - definition_levels_byte_length = other121.definition_levels_byte_length; - repetition_levels_byte_length = other121.repetition_levels_byte_length; - is_compressed = other121.is_compressed; - statistics = std::move(other121.statistics); - __isset = other121.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other122) { - num_values = other122.num_values; - num_nulls = other122.num_nulls; - num_rows = other122.num_rows; - encoding = other122.encoding; - definition_levels_byte_length = other122.definition_levels_byte_length; - repetition_levels_byte_length = other122.repetition_levels_byte_length; - is_compressed = other122.is_compressed; - statistics = other122.statistics; - __isset = other122.__isset; +DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other146) noexcept { + num_values = other146.num_values; + num_nulls = other146.num_nulls; + num_rows = other146.num_rows; + encoding = other146.encoding; + definition_levels_byte_length = other146.definition_levels_byte_length; + repetition_levels_byte_length = other146.repetition_levels_byte_length; + is_compressed = other146.is_compressed; + statistics = std::move(other146.statistics); + __isset = other146.__isset; +} +DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other147) { + num_values = other147.num_values; + num_nulls = other147.num_nulls; + num_rows = other147.num_rows; + encoding = other147.encoding; + definition_levels_byte_length = other147.definition_levels_byte_length; + repetition_levels_byte_length = other147.repetition_levels_byte_length; + is_compressed = other147.is_compressed; + statistics = other147.statistics; + __isset = other147.__isset; return *this; } -DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other123) noexcept { - num_values = other123.num_values; - num_nulls = other123.num_nulls; - num_rows = other123.num_rows; - encoding = other123.encoding; - definition_levels_byte_length = other123.definition_levels_byte_length; - repetition_levels_byte_length = other123.repetition_levels_byte_length; - is_compressed = other123.is_compressed; - statistics = std::move(other123.statistics); - __isset = other123.__isset; +DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other148) noexcept { + num_values = other148.num_values; + num_nulls = other148.num_nulls; + num_rows = other148.num_rows; + encoding = other148.encoding; + definition_levels_byte_length = other148.definition_levels_byte_length; + repetition_levels_byte_length = other148.repetition_levels_byte_length; + is_compressed = other148.is_compressed; + statistics = std::move(other148.statistics); + __isset = other148.__isset; return *this; } void DataPageHeaderV2::printTo(std::ostream& out) const { @@ -2222,18 +2646,18 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { (void) b; } -SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other124) noexcept { - (void) other124; +SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other149) noexcept { + (void) other149; } -SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other125) noexcept { - (void) other125; +SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other150) noexcept { + (void) other150; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other126) noexcept { - (void) other126; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other151) noexcept { + (void) other151; return *this; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other127) noexcept { - (void) other127; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other152) noexcept { + (void) other152; return *this; } void SplitBlockAlgorithm::printTo(std::ostream& out) const { @@ -2264,22 +2688,22 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { swap(a.__isset, b.__isset); } -BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other128) noexcept { - BLOCK = other128.BLOCK; - __isset = other128.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other153) noexcept { + BLOCK = other153.BLOCK; + __isset = other153.__isset; } -BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other129) noexcept { - BLOCK = std::move(other129.BLOCK); - __isset = other129.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other154) noexcept { + BLOCK = std::move(other154.BLOCK); + __isset = other154.__isset; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other130) noexcept { - BLOCK = other130.BLOCK; - __isset = other130.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other155) noexcept { + BLOCK = other155.BLOCK; + __isset = other155.__isset; return *this; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other131) noexcept { - BLOCK = std::move(other131.BLOCK); - __isset = other131.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other156) noexcept { + BLOCK = std::move(other156.BLOCK); + __isset = other156.__isset; return *this; } void BloomFilterAlgorithm::printTo(std::ostream& out) const { @@ -2306,18 +2730,18 @@ void swap(XxHash &a, XxHash &b) { (void) b; } -XxHash::XxHash(const XxHash& other132) noexcept { - (void) other132; +XxHash::XxHash(const XxHash& other157) noexcept { + (void) other157; } -XxHash::XxHash(XxHash&& other133) noexcept { - (void) other133; +XxHash::XxHash(XxHash&& other158) noexcept { + (void) other158; } -XxHash& XxHash::operator=(const XxHash& other134) noexcept { - (void) other134; +XxHash& XxHash::operator=(const XxHash& other159) noexcept { + (void) other159; return *this; } -XxHash& XxHash::operator=(XxHash&& other135) noexcept { - (void) other135; +XxHash& XxHash::operator=(XxHash&& other160) noexcept { + (void) other160; return *this; } void XxHash::printTo(std::ostream& out) const { @@ -2348,22 +2772,22 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) { swap(a.__isset, b.__isset); } -BloomFilterHash::BloomFilterHash(const BloomFilterHash& other136) noexcept { - XXHASH = other136.XXHASH; - __isset = other136.__isset; +BloomFilterHash::BloomFilterHash(const BloomFilterHash& other161) noexcept { + XXHASH = other161.XXHASH; + __isset = other161.__isset; } -BloomFilterHash::BloomFilterHash(BloomFilterHash&& other137) noexcept { - XXHASH = std::move(other137.XXHASH); - __isset = other137.__isset; +BloomFilterHash::BloomFilterHash(BloomFilterHash&& other162) noexcept { + XXHASH = std::move(other162.XXHASH); + __isset = other162.__isset; } -BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other138) noexcept { - XXHASH = other138.XXHASH; - __isset = other138.__isset; +BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other163) noexcept { + XXHASH = other163.XXHASH; + __isset = other163.__isset; return *this; } -BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other139) noexcept { - XXHASH = std::move(other139.XXHASH); - __isset = other139.__isset; +BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other164) noexcept { + XXHASH = std::move(other164.XXHASH); + __isset = other164.__isset; return *this; } void BloomFilterHash::printTo(std::ostream& out) const { @@ -2390,18 +2814,18 @@ void swap(Uncompressed &a, Uncompressed &b) { (void) b; } -Uncompressed::Uncompressed(const Uncompressed& other140) noexcept { - (void) other140; +Uncompressed::Uncompressed(const Uncompressed& other165) noexcept { + (void) other165; } -Uncompressed::Uncompressed(Uncompressed&& other141) noexcept { - (void) other141; +Uncompressed::Uncompressed(Uncompressed&& other166) noexcept { + (void) other166; } -Uncompressed& Uncompressed::operator=(const Uncompressed& other142) noexcept { - (void) other142; +Uncompressed& Uncompressed::operator=(const Uncompressed& other167) noexcept { + (void) other167; return *this; } -Uncompressed& Uncompressed::operator=(Uncompressed&& other143) noexcept { - (void) other143; +Uncompressed& Uncompressed::operator=(Uncompressed&& other168) noexcept { + (void) other168; return *this; } void Uncompressed::printTo(std::ostream& out) const { @@ -2432,22 +2856,22 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) { swap(a.__isset, b.__isset); } -BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other144) noexcept { - UNCOMPRESSED = other144.UNCOMPRESSED; - __isset = other144.__isset; +BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other169) noexcept { + UNCOMPRESSED = other169.UNCOMPRESSED; + __isset = other169.__isset; } -BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other145) noexcept { - UNCOMPRESSED = std::move(other145.UNCOMPRESSED); - __isset = other145.__isset; +BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other170) noexcept { + UNCOMPRESSED = std::move(other170.UNCOMPRESSED); + __isset = other170.__isset; } -BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other146) noexcept { - UNCOMPRESSED = other146.UNCOMPRESSED; - __isset = other146.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other171) noexcept { + UNCOMPRESSED = other171.UNCOMPRESSED; + __isset = other171.__isset; return *this; } -BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other147) noexcept { - UNCOMPRESSED = std::move(other147.UNCOMPRESSED); - __isset = other147.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other172) noexcept { + UNCOMPRESSED = std::move(other172.UNCOMPRESSED); + __isset = other172.__isset; return *this; } void BloomFilterCompression::printTo(std::ostream& out) const { @@ -2492,30 +2916,30 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) { swap(a.compression, b.compression); } -BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other148) noexcept { - numBytes = other148.numBytes; - algorithm = other148.algorithm; - hash = other148.hash; - compression = other148.compression; +BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other173) noexcept { + numBytes = other173.numBytes; + algorithm = other173.algorithm; + hash = other173.hash; + compression = other173.compression; } -BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other149) noexcept { - numBytes = other149.numBytes; - algorithm = std::move(other149.algorithm); - hash = std::move(other149.hash); - compression = std::move(other149.compression); +BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other174) noexcept { + numBytes = other174.numBytes; + algorithm = std::move(other174.algorithm); + hash = std::move(other174.hash); + compression = std::move(other174.compression); } -BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other150) noexcept { - numBytes = other150.numBytes; - algorithm = other150.algorithm; - hash = other150.hash; - compression = other150.compression; +BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other175) noexcept { + numBytes = other175.numBytes; + algorithm = other175.algorithm; + hash = other175.hash; + compression = other175.compression; return *this; } -BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other151) noexcept { - numBytes = other151.numBytes; - algorithm = std::move(other151.algorithm); - hash = std::move(other151.hash); - compression = std::move(other151.compression); +BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other176) noexcept { + numBytes = other176.numBytes; + algorithm = std::move(other176.algorithm); + hash = std::move(other176.hash); + compression = std::move(other176.compression); return *this; } void BloomFilterHeader::printTo(std::ostream& out) const { @@ -2589,50 +3013,50 @@ void swap(PageHeader &a, PageHeader &b) { swap(a.__isset, b.__isset); } -PageHeader::PageHeader(const PageHeader& other153) { - type = other153.type; - uncompressed_page_size = other153.uncompressed_page_size; - compressed_page_size = other153.compressed_page_size; - crc = other153.crc; - data_page_header = other153.data_page_header; - index_page_header = other153.index_page_header; - dictionary_page_header = other153.dictionary_page_header; - data_page_header_v2 = other153.data_page_header_v2; - __isset = other153.__isset; -} -PageHeader::PageHeader(PageHeader&& other154) noexcept { - type = other154.type; - uncompressed_page_size = other154.uncompressed_page_size; - compressed_page_size = other154.compressed_page_size; - crc = other154.crc; - data_page_header = std::move(other154.data_page_header); - index_page_header = std::move(other154.index_page_header); - dictionary_page_header = std::move(other154.dictionary_page_header); - data_page_header_v2 = std::move(other154.data_page_header_v2); - __isset = other154.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other155) { - type = other155.type; - uncompressed_page_size = other155.uncompressed_page_size; - compressed_page_size = other155.compressed_page_size; - crc = other155.crc; - data_page_header = other155.data_page_header; - index_page_header = other155.index_page_header; - dictionary_page_header = other155.dictionary_page_header; - data_page_header_v2 = other155.data_page_header_v2; - __isset = other155.__isset; +PageHeader::PageHeader(const PageHeader& other178) { + type = other178.type; + uncompressed_page_size = other178.uncompressed_page_size; + compressed_page_size = other178.compressed_page_size; + crc = other178.crc; + data_page_header = other178.data_page_header; + index_page_header = other178.index_page_header; + dictionary_page_header = other178.dictionary_page_header; + data_page_header_v2 = other178.data_page_header_v2; + __isset = other178.__isset; +} +PageHeader::PageHeader(PageHeader&& other179) noexcept { + type = other179.type; + uncompressed_page_size = other179.uncompressed_page_size; + compressed_page_size = other179.compressed_page_size; + crc = other179.crc; + data_page_header = std::move(other179.data_page_header); + index_page_header = std::move(other179.index_page_header); + dictionary_page_header = std::move(other179.dictionary_page_header); + data_page_header_v2 = std::move(other179.data_page_header_v2); + __isset = other179.__isset; +} +PageHeader& PageHeader::operator=(const PageHeader& other180) { + type = other180.type; + uncompressed_page_size = other180.uncompressed_page_size; + compressed_page_size = other180.compressed_page_size; + crc = other180.crc; + data_page_header = other180.data_page_header; + index_page_header = other180.index_page_header; + dictionary_page_header = other180.dictionary_page_header; + data_page_header_v2 = other180.data_page_header_v2; + __isset = other180.__isset; return *this; } -PageHeader& PageHeader::operator=(PageHeader&& other156) noexcept { - type = other156.type; - uncompressed_page_size = other156.uncompressed_page_size; - compressed_page_size = other156.compressed_page_size; - crc = other156.crc; - data_page_header = std::move(other156.data_page_header); - index_page_header = std::move(other156.index_page_header); - dictionary_page_header = std::move(other156.dictionary_page_header); - data_page_header_v2 = std::move(other156.data_page_header_v2); - __isset = other156.__isset; +PageHeader& PageHeader::operator=(PageHeader&& other181) noexcept { + type = other181.type; + uncompressed_page_size = other181.uncompressed_page_size; + compressed_page_size = other181.compressed_page_size; + crc = other181.crc; + data_page_header = std::move(other181.data_page_header); + index_page_header = std::move(other181.index_page_header); + dictionary_page_header = std::move(other181.dictionary_page_header); + data_page_header_v2 = std::move(other181.data_page_header_v2); + __isset = other181.__isset; return *this; } void PageHeader::printTo(std::ostream& out) const { @@ -2676,26 +3100,26 @@ void swap(KeyValue &a, KeyValue &b) { swap(a.__isset, b.__isset); } -KeyValue::KeyValue(const KeyValue& other157) { - key = other157.key; - value = other157.value; - __isset = other157.__isset; +KeyValue::KeyValue(const KeyValue& other182) { + key = other182.key; + value = other182.value; + __isset = other182.__isset; } -KeyValue::KeyValue(KeyValue&& other158) noexcept { - key = std::move(other158.key); - value = std::move(other158.value); - __isset = other158.__isset; +KeyValue::KeyValue(KeyValue&& other183) noexcept { + key = std::move(other183.key); + value = std::move(other183.value); + __isset = other183.__isset; } -KeyValue& KeyValue::operator=(const KeyValue& other159) { - key = other159.key; - value = other159.value; - __isset = other159.__isset; +KeyValue& KeyValue::operator=(const KeyValue& other184) { + key = other184.key; + value = other184.value; + __isset = other184.__isset; return *this; } -KeyValue& KeyValue::operator=(KeyValue&& other160) noexcept { - key = std::move(other160.key); - value = std::move(other160.value); - __isset = other160.__isset; +KeyValue& KeyValue::operator=(KeyValue&& other185) noexcept { + key = std::move(other185.key); + value = std::move(other185.value); + __isset = other185.__isset; return *this; } void KeyValue::printTo(std::ostream& out) const { @@ -2736,26 +3160,26 @@ void swap(SortingColumn &a, SortingColumn &b) { swap(a.nulls_first, b.nulls_first); } -SortingColumn::SortingColumn(const SortingColumn& other161) noexcept { - column_idx = other161.column_idx; - descending = other161.descending; - nulls_first = other161.nulls_first; +SortingColumn::SortingColumn(const SortingColumn& other186) noexcept { + column_idx = other186.column_idx; + descending = other186.descending; + nulls_first = other186.nulls_first; } -SortingColumn::SortingColumn(SortingColumn&& other162) noexcept { - column_idx = other162.column_idx; - descending = other162.descending; - nulls_first = other162.nulls_first; +SortingColumn::SortingColumn(SortingColumn&& other187) noexcept { + column_idx = other187.column_idx; + descending = other187.descending; + nulls_first = other187.nulls_first; } -SortingColumn& SortingColumn::operator=(const SortingColumn& other163) noexcept { - column_idx = other163.column_idx; - descending = other163.descending; - nulls_first = other163.nulls_first; +SortingColumn& SortingColumn::operator=(const SortingColumn& other188) noexcept { + column_idx = other188.column_idx; + descending = other188.descending; + nulls_first = other188.nulls_first; return *this; } -SortingColumn& SortingColumn::operator=(SortingColumn&& other164) noexcept { - column_idx = other164.column_idx; - descending = other164.descending; - nulls_first = other164.nulls_first; +SortingColumn& SortingColumn::operator=(SortingColumn&& other189) noexcept { + column_idx = other189.column_idx; + descending = other189.descending; + nulls_first = other189.nulls_first; return *this; } void SortingColumn::printTo(std::ostream& out) const { @@ -2797,26 +3221,26 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) { swap(a.count, b.count); } -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other167) noexcept { - page_type = other167.page_type; - encoding = other167.encoding; - count = other167.count; +PageEncodingStats::PageEncodingStats(const PageEncodingStats& other192) noexcept { + page_type = other192.page_type; + encoding = other192.encoding; + count = other192.count; } -PageEncodingStats::PageEncodingStats(PageEncodingStats&& other168) noexcept { - page_type = other168.page_type; - encoding = other168.encoding; - count = other168.count; +PageEncodingStats::PageEncodingStats(PageEncodingStats&& other193) noexcept { + page_type = other193.page_type; + encoding = other193.encoding; + count = other193.count; } -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other169) noexcept { - page_type = other169.page_type; - encoding = other169.encoding; - count = other169.count; +PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other194) noexcept { + page_type = other194.page_type; + encoding = other194.encoding; + count = other194.count; return *this; } -PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other170) noexcept { - page_type = other170.page_type; - encoding = other170.encoding; - count = other170.count; +PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other195) noexcept { + page_type = other195.page_type; + encoding = other195.encoding; + count = other195.count; return *this; } void PageEncodingStats::printTo(std::ostream& out) const { @@ -2932,82 +3356,82 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) { swap(a.__isset, b.__isset); } -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other198) { - type = other198.type; - encodings = other198.encodings; - path_in_schema = other198.path_in_schema; - codec = other198.codec; - num_values = other198.num_values; - total_uncompressed_size = other198.total_uncompressed_size; - total_compressed_size = other198.total_compressed_size; - key_value_metadata = other198.key_value_metadata; - data_page_offset = other198.data_page_offset; - index_page_offset = other198.index_page_offset; - dictionary_page_offset = other198.dictionary_page_offset; - statistics = other198.statistics; - encoding_stats = other198.encoding_stats; - bloom_filter_offset = other198.bloom_filter_offset; - bloom_filter_length = other198.bloom_filter_length; - size_statistics = other198.size_statistics; - __isset = other198.__isset; -} -ColumnMetaData::ColumnMetaData(ColumnMetaData&& other199) noexcept { - type = other199.type; - encodings = std::move(other199.encodings); - path_in_schema = std::move(other199.path_in_schema); - codec = other199.codec; - num_values = other199.num_values; - total_uncompressed_size = other199.total_uncompressed_size; - total_compressed_size = other199.total_compressed_size; - key_value_metadata = std::move(other199.key_value_metadata); - data_page_offset = other199.data_page_offset; - index_page_offset = other199.index_page_offset; - dictionary_page_offset = other199.dictionary_page_offset; - statistics = std::move(other199.statistics); - encoding_stats = std::move(other199.encoding_stats); - bloom_filter_offset = other199.bloom_filter_offset; - bloom_filter_length = other199.bloom_filter_length; - size_statistics = std::move(other199.size_statistics); - __isset = other199.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other200) { - type = other200.type; - encodings = other200.encodings; - path_in_schema = other200.path_in_schema; - codec = other200.codec; - num_values = other200.num_values; - total_uncompressed_size = other200.total_uncompressed_size; - total_compressed_size = other200.total_compressed_size; - key_value_metadata = other200.key_value_metadata; - data_page_offset = other200.data_page_offset; - index_page_offset = other200.index_page_offset; - dictionary_page_offset = other200.dictionary_page_offset; - statistics = other200.statistics; - encoding_stats = other200.encoding_stats; - bloom_filter_offset = other200.bloom_filter_offset; - bloom_filter_length = other200.bloom_filter_length; - size_statistics = other200.size_statistics; - __isset = other200.__isset; +ColumnMetaData::ColumnMetaData(const ColumnMetaData& other223) { + type = other223.type; + encodings = other223.encodings; + path_in_schema = other223.path_in_schema; + codec = other223.codec; + num_values = other223.num_values; + total_uncompressed_size = other223.total_uncompressed_size; + total_compressed_size = other223.total_compressed_size; + key_value_metadata = other223.key_value_metadata; + data_page_offset = other223.data_page_offset; + index_page_offset = other223.index_page_offset; + dictionary_page_offset = other223.dictionary_page_offset; + statistics = other223.statistics; + encoding_stats = other223.encoding_stats; + bloom_filter_offset = other223.bloom_filter_offset; + bloom_filter_length = other223.bloom_filter_length; + size_statistics = other223.size_statistics; + __isset = other223.__isset; +} +ColumnMetaData::ColumnMetaData(ColumnMetaData&& other224) noexcept { + type = other224.type; + encodings = std::move(other224.encodings); + path_in_schema = std::move(other224.path_in_schema); + codec = other224.codec; + num_values = other224.num_values; + total_uncompressed_size = other224.total_uncompressed_size; + total_compressed_size = other224.total_compressed_size; + key_value_metadata = std::move(other224.key_value_metadata); + data_page_offset = other224.data_page_offset; + index_page_offset = other224.index_page_offset; + dictionary_page_offset = other224.dictionary_page_offset; + statistics = std::move(other224.statistics); + encoding_stats = std::move(other224.encoding_stats); + bloom_filter_offset = other224.bloom_filter_offset; + bloom_filter_length = other224.bloom_filter_length; + size_statistics = std::move(other224.size_statistics); + __isset = other224.__isset; +} +ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other225) { + type = other225.type; + encodings = other225.encodings; + path_in_schema = other225.path_in_schema; + codec = other225.codec; + num_values = other225.num_values; + total_uncompressed_size = other225.total_uncompressed_size; + total_compressed_size = other225.total_compressed_size; + key_value_metadata = other225.key_value_metadata; + data_page_offset = other225.data_page_offset; + index_page_offset = other225.index_page_offset; + dictionary_page_offset = other225.dictionary_page_offset; + statistics = other225.statistics; + encoding_stats = other225.encoding_stats; + bloom_filter_offset = other225.bloom_filter_offset; + bloom_filter_length = other225.bloom_filter_length; + size_statistics = other225.size_statistics; + __isset = other225.__isset; return *this; } -ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other201) noexcept { - type = other201.type; - encodings = std::move(other201.encodings); - path_in_schema = std::move(other201.path_in_schema); - codec = other201.codec; - num_values = other201.num_values; - total_uncompressed_size = other201.total_uncompressed_size; - total_compressed_size = other201.total_compressed_size; - key_value_metadata = std::move(other201.key_value_metadata); - data_page_offset = other201.data_page_offset; - index_page_offset = other201.index_page_offset; - dictionary_page_offset = other201.dictionary_page_offset; - statistics = std::move(other201.statistics); - encoding_stats = std::move(other201.encoding_stats); - bloom_filter_offset = other201.bloom_filter_offset; - bloom_filter_length = other201.bloom_filter_length; - size_statistics = std::move(other201.size_statistics); - __isset = other201.__isset; +ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other226) noexcept { + type = other226.type; + encodings = std::move(other226.encodings); + path_in_schema = std::move(other226.path_in_schema); + codec = other226.codec; + num_values = other226.num_values; + total_uncompressed_size = other226.total_uncompressed_size; + total_compressed_size = other226.total_compressed_size; + key_value_metadata = std::move(other226.key_value_metadata); + data_page_offset = other226.data_page_offset; + index_page_offset = other226.index_page_offset; + dictionary_page_offset = other226.dictionary_page_offset; + statistics = std::move(other226.statistics); + encoding_stats = std::move(other226.encoding_stats); + bloom_filter_offset = other226.bloom_filter_offset; + bloom_filter_length = other226.bloom_filter_length; + size_statistics = std::move(other226.size_statistics); + __isset = other226.__isset; return *this; } void ColumnMetaData::printTo(std::ostream& out) const { @@ -3049,18 +3473,18 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { (void) b; } -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other202) noexcept { - (void) other202; +EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other227) noexcept { + (void) other227; } -EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other203) noexcept { - (void) other203; +EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other228) noexcept { + (void) other228; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other204) noexcept { - (void) other204; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other229) noexcept { + (void) other229; return *this; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other205) noexcept { - (void) other205; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other230) noexcept { + (void) other230; return *this; } void EncryptionWithFooterKey::printTo(std::ostream& out) const { @@ -3096,26 +3520,26 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { swap(a.__isset, b.__isset); } -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other212) { - path_in_schema = other212.path_in_schema; - key_metadata = other212.key_metadata; - __isset = other212.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other237) { + path_in_schema = other237.path_in_schema; + key_metadata = other237.key_metadata; + __isset = other237.__isset; } -EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other213) noexcept { - path_in_schema = std::move(other213.path_in_schema); - key_metadata = std::move(other213.key_metadata); - __isset = other213.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other238) noexcept { + path_in_schema = std::move(other238.path_in_schema); + key_metadata = std::move(other238.key_metadata); + __isset = other238.__isset; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other214) { - path_in_schema = other214.path_in_schema; - key_metadata = other214.key_metadata; - __isset = other214.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other239) { + path_in_schema = other239.path_in_schema; + key_metadata = other239.key_metadata; + __isset = other239.__isset; return *this; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other215) noexcept { - path_in_schema = std::move(other215.path_in_schema); - key_metadata = std::move(other215.key_metadata); - __isset = other215.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other240) noexcept { + path_in_schema = std::move(other240.path_in_schema); + key_metadata = std::move(other240.key_metadata); + __isset = other240.__isset; return *this; } void EncryptionWithColumnKey::printTo(std::ostream& out) const { @@ -3154,26 +3578,26 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { swap(a.__isset, b.__isset); } -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other216) { - ENCRYPTION_WITH_FOOTER_KEY = other216.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other216.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other216.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other241) { + ENCRYPTION_WITH_FOOTER_KEY = other241.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other241.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other241.__isset; } -ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other217) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other217.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other217.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other217.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other242) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other242.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other242.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other242.__isset; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other218) { - ENCRYPTION_WITH_FOOTER_KEY = other218.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other218.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other218.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other243) { + ENCRYPTION_WITH_FOOTER_KEY = other243.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other243.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other243.__isset; return *this; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other219) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other219.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other219.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other219.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other244) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other244.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other244.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other244.__isset; return *this; } void ColumnCryptoMetaData::printTo(std::ostream& out) const { @@ -3253,54 +3677,54 @@ void swap(ColumnChunk &a, ColumnChunk &b) { swap(a.__isset, b.__isset); } -ColumnChunk::ColumnChunk(const ColumnChunk& other220) { - file_path = other220.file_path; - file_offset = other220.file_offset; - meta_data = other220.meta_data; - offset_index_offset = other220.offset_index_offset; - offset_index_length = other220.offset_index_length; - column_index_offset = other220.column_index_offset; - column_index_length = other220.column_index_length; - crypto_metadata = other220.crypto_metadata; - encrypted_column_metadata = other220.encrypted_column_metadata; - __isset = other220.__isset; -} -ColumnChunk::ColumnChunk(ColumnChunk&& other221) noexcept { - file_path = std::move(other221.file_path); - file_offset = other221.file_offset; - meta_data = std::move(other221.meta_data); - offset_index_offset = other221.offset_index_offset; - offset_index_length = other221.offset_index_length; - column_index_offset = other221.column_index_offset; - column_index_length = other221.column_index_length; - crypto_metadata = std::move(other221.crypto_metadata); - encrypted_column_metadata = std::move(other221.encrypted_column_metadata); - __isset = other221.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other222) { - file_path = other222.file_path; - file_offset = other222.file_offset; - meta_data = other222.meta_data; - offset_index_offset = other222.offset_index_offset; - offset_index_length = other222.offset_index_length; - column_index_offset = other222.column_index_offset; - column_index_length = other222.column_index_length; - crypto_metadata = other222.crypto_metadata; - encrypted_column_metadata = other222.encrypted_column_metadata; - __isset = other222.__isset; +ColumnChunk::ColumnChunk(const ColumnChunk& other245) { + file_path = other245.file_path; + file_offset = other245.file_offset; + meta_data = other245.meta_data; + offset_index_offset = other245.offset_index_offset; + offset_index_length = other245.offset_index_length; + column_index_offset = other245.column_index_offset; + column_index_length = other245.column_index_length; + crypto_metadata = other245.crypto_metadata; + encrypted_column_metadata = other245.encrypted_column_metadata; + __isset = other245.__isset; +} +ColumnChunk::ColumnChunk(ColumnChunk&& other246) noexcept { + file_path = std::move(other246.file_path); + file_offset = other246.file_offset; + meta_data = std::move(other246.meta_data); + offset_index_offset = other246.offset_index_offset; + offset_index_length = other246.offset_index_length; + column_index_offset = other246.column_index_offset; + column_index_length = other246.column_index_length; + crypto_metadata = std::move(other246.crypto_metadata); + encrypted_column_metadata = std::move(other246.encrypted_column_metadata); + __isset = other246.__isset; +} +ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other247) { + file_path = other247.file_path; + file_offset = other247.file_offset; + meta_data = other247.meta_data; + offset_index_offset = other247.offset_index_offset; + offset_index_length = other247.offset_index_length; + column_index_offset = other247.column_index_offset; + column_index_length = other247.column_index_length; + crypto_metadata = other247.crypto_metadata; + encrypted_column_metadata = other247.encrypted_column_metadata; + __isset = other247.__isset; return *this; } -ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other223) noexcept { - file_path = std::move(other223.file_path); - file_offset = other223.file_offset; - meta_data = std::move(other223.meta_data); - offset_index_offset = other223.offset_index_offset; - offset_index_length = other223.offset_index_length; - column_index_offset = other223.column_index_offset; - column_index_length = other223.column_index_length; - crypto_metadata = std::move(other223.crypto_metadata); - encrypted_column_metadata = std::move(other223.encrypted_column_metadata); - __isset = other223.__isset; +ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other248) noexcept { + file_path = std::move(other248.file_path); + file_offset = other248.file_offset; + meta_data = std::move(other248.meta_data); + offset_index_offset = other248.offset_index_offset; + offset_index_length = other248.offset_index_length; + column_index_offset = other248.column_index_offset; + column_index_length = other248.column_index_length; + crypto_metadata = std::move(other248.crypto_metadata); + encrypted_column_metadata = std::move(other248.encrypted_column_metadata); + __isset = other248.__isset; return *this; } void ColumnChunk::printTo(std::ostream& out) const { @@ -3373,46 +3797,46 @@ void swap(RowGroup &a, RowGroup &b) { swap(a.__isset, b.__isset); } -RowGroup::RowGroup(const RowGroup& other236) { - columns = other236.columns; - total_byte_size = other236.total_byte_size; - num_rows = other236.num_rows; - sorting_columns = other236.sorting_columns; - file_offset = other236.file_offset; - total_compressed_size = other236.total_compressed_size; - ordinal = other236.ordinal; - __isset = other236.__isset; -} -RowGroup::RowGroup(RowGroup&& other237) noexcept { - columns = std::move(other237.columns); - total_byte_size = other237.total_byte_size; - num_rows = other237.num_rows; - sorting_columns = std::move(other237.sorting_columns); - file_offset = other237.file_offset; - total_compressed_size = other237.total_compressed_size; - ordinal = other237.ordinal; - __isset = other237.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other238) { - columns = other238.columns; - total_byte_size = other238.total_byte_size; - num_rows = other238.num_rows; - sorting_columns = other238.sorting_columns; - file_offset = other238.file_offset; - total_compressed_size = other238.total_compressed_size; - ordinal = other238.ordinal; - __isset = other238.__isset; +RowGroup::RowGroup(const RowGroup& other261) { + columns = other261.columns; + total_byte_size = other261.total_byte_size; + num_rows = other261.num_rows; + sorting_columns = other261.sorting_columns; + file_offset = other261.file_offset; + total_compressed_size = other261.total_compressed_size; + ordinal = other261.ordinal; + __isset = other261.__isset; +} +RowGroup::RowGroup(RowGroup&& other262) noexcept { + columns = std::move(other262.columns); + total_byte_size = other262.total_byte_size; + num_rows = other262.num_rows; + sorting_columns = std::move(other262.sorting_columns); + file_offset = other262.file_offset; + total_compressed_size = other262.total_compressed_size; + ordinal = other262.ordinal; + __isset = other262.__isset; +} +RowGroup& RowGroup::operator=(const RowGroup& other263) { + columns = other263.columns; + total_byte_size = other263.total_byte_size; + num_rows = other263.num_rows; + sorting_columns = other263.sorting_columns; + file_offset = other263.file_offset; + total_compressed_size = other263.total_compressed_size; + ordinal = other263.ordinal; + __isset = other263.__isset; return *this; } -RowGroup& RowGroup::operator=(RowGroup&& other239) noexcept { - columns = std::move(other239.columns); - total_byte_size = other239.total_byte_size; - num_rows = other239.num_rows; - sorting_columns = std::move(other239.sorting_columns); - file_offset = other239.file_offset; - total_compressed_size = other239.total_compressed_size; - ordinal = other239.ordinal; - __isset = other239.__isset; +RowGroup& RowGroup::operator=(RowGroup&& other264) noexcept { + columns = std::move(other264.columns); + total_byte_size = other264.total_byte_size; + num_rows = other264.num_rows; + sorting_columns = std::move(other264.sorting_columns); + file_offset = other264.file_offset; + total_compressed_size = other264.total_compressed_size; + ordinal = other264.ordinal; + __isset = other264.__isset; return *this; } void RowGroup::printTo(std::ostream& out) const { @@ -3445,18 +3869,18 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { (void) b; } -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other240) noexcept { - (void) other240; +TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other265) noexcept { + (void) other265; } -TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other241) noexcept { - (void) other241; +TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other266) noexcept { + (void) other266; } -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other242) noexcept { - (void) other242; +TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other267) noexcept { + (void) other267; return *this; } -TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other243) noexcept { - (void) other243; +TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other268) noexcept { + (void) other268; return *this; } void TypeDefinedOrder::printTo(std::ostream& out) const { @@ -3487,22 +3911,22 @@ void swap(ColumnOrder &a, ColumnOrder &b) { swap(a.__isset, b.__isset); } -ColumnOrder::ColumnOrder(const ColumnOrder& other244) noexcept { - TYPE_ORDER = other244.TYPE_ORDER; - __isset = other244.__isset; +ColumnOrder::ColumnOrder(const ColumnOrder& other269) noexcept { + TYPE_ORDER = other269.TYPE_ORDER; + __isset = other269.__isset; } -ColumnOrder::ColumnOrder(ColumnOrder&& other245) noexcept { - TYPE_ORDER = std::move(other245.TYPE_ORDER); - __isset = other245.__isset; +ColumnOrder::ColumnOrder(ColumnOrder&& other270) noexcept { + TYPE_ORDER = std::move(other270.TYPE_ORDER); + __isset = other270.__isset; } -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other246) noexcept { - TYPE_ORDER = other246.TYPE_ORDER; - __isset = other246.__isset; +ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other271) noexcept { + TYPE_ORDER = other271.TYPE_ORDER; + __isset = other271.__isset; return *this; } -ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other247) noexcept { - TYPE_ORDER = std::move(other247.TYPE_ORDER); - __isset = other247.__isset; +ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other272) noexcept { + TYPE_ORDER = std::move(other272.TYPE_ORDER); + __isset = other272.__isset; return *this; } void ColumnOrder::printTo(std::ostream& out) const { @@ -3542,26 +3966,26 @@ void swap(PageLocation &a, PageLocation &b) { swap(a.first_row_index, b.first_row_index); } -PageLocation::PageLocation(const PageLocation& other248) noexcept { - offset = other248.offset; - compressed_page_size = other248.compressed_page_size; - first_row_index = other248.first_row_index; +PageLocation::PageLocation(const PageLocation& other273) noexcept { + offset = other273.offset; + compressed_page_size = other273.compressed_page_size; + first_row_index = other273.first_row_index; } -PageLocation::PageLocation(PageLocation&& other249) noexcept { - offset = other249.offset; - compressed_page_size = other249.compressed_page_size; - first_row_index = other249.first_row_index; +PageLocation::PageLocation(PageLocation&& other274) noexcept { + offset = other274.offset; + compressed_page_size = other274.compressed_page_size; + first_row_index = other274.first_row_index; } -PageLocation& PageLocation::operator=(const PageLocation& other250) noexcept { - offset = other250.offset; - compressed_page_size = other250.compressed_page_size; - first_row_index = other250.first_row_index; +PageLocation& PageLocation::operator=(const PageLocation& other275) noexcept { + offset = other275.offset; + compressed_page_size = other275.compressed_page_size; + first_row_index = other275.first_row_index; return *this; } -PageLocation& PageLocation::operator=(PageLocation&& other251) noexcept { - offset = other251.offset; - compressed_page_size = other251.compressed_page_size; - first_row_index = other251.first_row_index; +PageLocation& PageLocation::operator=(PageLocation&& other276) noexcept { + offset = other276.offset; + compressed_page_size = other276.compressed_page_size; + first_row_index = other276.first_row_index; return *this; } void PageLocation::printTo(std::ostream& out) const { @@ -3600,26 +4024,26 @@ void swap(OffsetIndex &a, OffsetIndex &b) { swap(a.__isset, b.__isset); } -OffsetIndex::OffsetIndex(const OffsetIndex& other264) { - page_locations = other264.page_locations; - unencoded_byte_array_data_bytes = other264.unencoded_byte_array_data_bytes; - __isset = other264.__isset; +OffsetIndex::OffsetIndex(const OffsetIndex& other289) { + page_locations = other289.page_locations; + unencoded_byte_array_data_bytes = other289.unencoded_byte_array_data_bytes; + __isset = other289.__isset; } -OffsetIndex::OffsetIndex(OffsetIndex&& other265) noexcept { - page_locations = std::move(other265.page_locations); - unencoded_byte_array_data_bytes = std::move(other265.unencoded_byte_array_data_bytes); - __isset = other265.__isset; +OffsetIndex::OffsetIndex(OffsetIndex&& other290) noexcept { + page_locations = std::move(other290.page_locations); + unencoded_byte_array_data_bytes = std::move(other290.unencoded_byte_array_data_bytes); + __isset = other290.__isset; } -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other266) { - page_locations = other266.page_locations; - unencoded_byte_array_data_bytes = other266.unencoded_byte_array_data_bytes; - __isset = other266.__isset; +OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other291) { + page_locations = other291.page_locations; + unencoded_byte_array_data_bytes = other291.unencoded_byte_array_data_bytes; + __isset = other291.__isset; return *this; } -OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other267) noexcept { - page_locations = std::move(other267.page_locations); - unencoded_byte_array_data_bytes = std::move(other267.unencoded_byte_array_data_bytes); - __isset = other267.__isset; +OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other292) noexcept { + page_locations = std::move(other292.page_locations); + unencoded_byte_array_data_bytes = std::move(other292.unencoded_byte_array_data_bytes); + __isset = other292.__isset; return *this; } void OffsetIndex::printTo(std::ostream& out) const { @@ -3665,6 +4089,11 @@ void ColumnIndex::__set_definition_level_histograms(const std::vector & this->definition_level_histograms = val; __isset.definition_level_histograms = true; } + +void ColumnIndex::__set_geometry_stats(const std::vector & val) { + this->geometry_stats = val; +__isset.geometry_stats = true; +} std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) { obj.printTo(out); @@ -3681,49 +4110,54 @@ void swap(ColumnIndex &a, ColumnIndex &b) { swap(a.null_counts, b.null_counts); swap(a.repetition_level_histograms, b.repetition_level_histograms); swap(a.definition_level_histograms, b.definition_level_histograms); + swap(a.geometry_stats, b.geometry_stats); swap(a.__isset, b.__isset); } -ColumnIndex::ColumnIndex(const ColumnIndex& other305) { - null_pages = other305.null_pages; - min_values = other305.min_values; - max_values = other305.max_values; - boundary_order = other305.boundary_order; - null_counts = other305.null_counts; - repetition_level_histograms = other305.repetition_level_histograms; - definition_level_histograms = other305.definition_level_histograms; - __isset = other305.__isset; -} -ColumnIndex::ColumnIndex(ColumnIndex&& other306) noexcept { - null_pages = std::move(other306.null_pages); - min_values = std::move(other306.min_values); - max_values = std::move(other306.max_values); - boundary_order = other306.boundary_order; - null_counts = std::move(other306.null_counts); - repetition_level_histograms = std::move(other306.repetition_level_histograms); - definition_level_histograms = std::move(other306.definition_level_histograms); - __isset = other306.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other307) { - null_pages = other307.null_pages; - min_values = other307.min_values; - max_values = other307.max_values; - boundary_order = other307.boundary_order; - null_counts = other307.null_counts; - repetition_level_histograms = other307.repetition_level_histograms; - definition_level_histograms = other307.definition_level_histograms; - __isset = other307.__isset; +ColumnIndex::ColumnIndex(const ColumnIndex& other336) { + null_pages = other336.null_pages; + min_values = other336.min_values; + max_values = other336.max_values; + boundary_order = other336.boundary_order; + null_counts = other336.null_counts; + repetition_level_histograms = other336.repetition_level_histograms; + definition_level_histograms = other336.definition_level_histograms; + geometry_stats = other336.geometry_stats; + __isset = other336.__isset; +} +ColumnIndex::ColumnIndex(ColumnIndex&& other337) noexcept { + null_pages = std::move(other337.null_pages); + min_values = std::move(other337.min_values); + max_values = std::move(other337.max_values); + boundary_order = other337.boundary_order; + null_counts = std::move(other337.null_counts); + repetition_level_histograms = std::move(other337.repetition_level_histograms); + definition_level_histograms = std::move(other337.definition_level_histograms); + geometry_stats = std::move(other337.geometry_stats); + __isset = other337.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other338) { + null_pages = other338.null_pages; + min_values = other338.min_values; + max_values = other338.max_values; + boundary_order = other338.boundary_order; + null_counts = other338.null_counts; + repetition_level_histograms = other338.repetition_level_histograms; + definition_level_histograms = other338.definition_level_histograms; + geometry_stats = other338.geometry_stats; + __isset = other338.__isset; return *this; } -ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other308) noexcept { - null_pages = std::move(other308.null_pages); - min_values = std::move(other308.min_values); - max_values = std::move(other308.max_values); - boundary_order = other308.boundary_order; - null_counts = std::move(other308.null_counts); - repetition_level_histograms = std::move(other308.repetition_level_histograms); - definition_level_histograms = std::move(other308.definition_level_histograms); - __isset = other308.__isset; +ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other339) noexcept { + null_pages = std::move(other339.null_pages); + min_values = std::move(other339.min_values); + max_values = std::move(other339.max_values); + boundary_order = other339.boundary_order; + null_counts = std::move(other339.null_counts); + repetition_level_histograms = std::move(other339.repetition_level_histograms); + definition_level_histograms = std::move(other339.definition_level_histograms); + geometry_stats = std::move(other339.geometry_stats); + __isset = other339.__isset; return *this; } void ColumnIndex::printTo(std::ostream& out) const { @@ -3736,6 +4170,7 @@ void ColumnIndex::printTo(std::ostream& out) const { out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); out << ", " << "repetition_level_histograms="; (__isset.repetition_level_histograms ? (out << to_string(repetition_level_histograms)) : (out << "")); out << ", " << "definition_level_histograms="; (__isset.definition_level_histograms ? (out << to_string(definition_level_histograms)) : (out << "")); + out << ", " << "geometry_stats="; (__isset.geometry_stats ? (out << to_string(geometry_stats)) : (out << "")); out << ")"; } @@ -3773,30 +4208,30 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) { swap(a.__isset, b.__isset); } -AesGcmV1::AesGcmV1(const AesGcmV1& other309) { - aad_prefix = other309.aad_prefix; - aad_file_unique = other309.aad_file_unique; - supply_aad_prefix = other309.supply_aad_prefix; - __isset = other309.__isset; +AesGcmV1::AesGcmV1(const AesGcmV1& other340) { + aad_prefix = other340.aad_prefix; + aad_file_unique = other340.aad_file_unique; + supply_aad_prefix = other340.supply_aad_prefix; + __isset = other340.__isset; } -AesGcmV1::AesGcmV1(AesGcmV1&& other310) noexcept { - aad_prefix = std::move(other310.aad_prefix); - aad_file_unique = std::move(other310.aad_file_unique); - supply_aad_prefix = other310.supply_aad_prefix; - __isset = other310.__isset; +AesGcmV1::AesGcmV1(AesGcmV1&& other341) noexcept { + aad_prefix = std::move(other341.aad_prefix); + aad_file_unique = std::move(other341.aad_file_unique); + supply_aad_prefix = other341.supply_aad_prefix; + __isset = other341.__isset; } -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other311) { - aad_prefix = other311.aad_prefix; - aad_file_unique = other311.aad_file_unique; - supply_aad_prefix = other311.supply_aad_prefix; - __isset = other311.__isset; +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other342) { + aad_prefix = other342.aad_prefix; + aad_file_unique = other342.aad_file_unique; + supply_aad_prefix = other342.supply_aad_prefix; + __isset = other342.__isset; return *this; } -AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other312) noexcept { - aad_prefix = std::move(other312.aad_prefix); - aad_file_unique = std::move(other312.aad_file_unique); - supply_aad_prefix = other312.supply_aad_prefix; - __isset = other312.__isset; +AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other343) noexcept { + aad_prefix = std::move(other343.aad_prefix); + aad_file_unique = std::move(other343.aad_file_unique); + supply_aad_prefix = other343.supply_aad_prefix; + __isset = other343.__isset; return *this; } void AesGcmV1::printTo(std::ostream& out) const { @@ -3842,30 +4277,30 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { swap(a.__isset, b.__isset); } -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other313) { - aad_prefix = other313.aad_prefix; - aad_file_unique = other313.aad_file_unique; - supply_aad_prefix = other313.supply_aad_prefix; - __isset = other313.__isset; +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other344) { + aad_prefix = other344.aad_prefix; + aad_file_unique = other344.aad_file_unique; + supply_aad_prefix = other344.supply_aad_prefix; + __isset = other344.__isset; } -AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other314) noexcept { - aad_prefix = std::move(other314.aad_prefix); - aad_file_unique = std::move(other314.aad_file_unique); - supply_aad_prefix = other314.supply_aad_prefix; - __isset = other314.__isset; +AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other345) noexcept { + aad_prefix = std::move(other345.aad_prefix); + aad_file_unique = std::move(other345.aad_file_unique); + supply_aad_prefix = other345.supply_aad_prefix; + __isset = other345.__isset; } -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other315) { - aad_prefix = other315.aad_prefix; - aad_file_unique = other315.aad_file_unique; - supply_aad_prefix = other315.supply_aad_prefix; - __isset = other315.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other346) { + aad_prefix = other346.aad_prefix; + aad_file_unique = other346.aad_file_unique; + supply_aad_prefix = other346.supply_aad_prefix; + __isset = other346.__isset; return *this; } -AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other316) noexcept { - aad_prefix = std::move(other316.aad_prefix); - aad_file_unique = std::move(other316.aad_file_unique); - supply_aad_prefix = other316.supply_aad_prefix; - __isset = other316.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other347) noexcept { + aad_prefix = std::move(other347.aad_prefix); + aad_file_unique = std::move(other347.aad_file_unique); + supply_aad_prefix = other347.supply_aad_prefix; + __isset = other347.__isset; return *this; } void AesGcmCtrV1::printTo(std::ostream& out) const { @@ -3905,26 +4340,26 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { swap(a.__isset, b.__isset); } -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other317) { - AES_GCM_V1 = other317.AES_GCM_V1; - AES_GCM_CTR_V1 = other317.AES_GCM_CTR_V1; - __isset = other317.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other348) { + AES_GCM_V1 = other348.AES_GCM_V1; + AES_GCM_CTR_V1 = other348.AES_GCM_CTR_V1; + __isset = other348.__isset; } -EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other318) noexcept { - AES_GCM_V1 = std::move(other318.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other318.AES_GCM_CTR_V1); - __isset = other318.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other349) noexcept { + AES_GCM_V1 = std::move(other349.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other349.AES_GCM_CTR_V1); + __isset = other349.__isset; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other319) { - AES_GCM_V1 = other319.AES_GCM_V1; - AES_GCM_CTR_V1 = other319.AES_GCM_CTR_V1; - __isset = other319.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other350) { + AES_GCM_V1 = other350.AES_GCM_V1; + AES_GCM_CTR_V1 = other350.AES_GCM_CTR_V1; + __isset = other350.__isset; return *this; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other320) noexcept { - AES_GCM_V1 = std::move(other320.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other320.AES_GCM_CTR_V1); - __isset = other320.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other351) noexcept { + AES_GCM_V1 = std::move(other351.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other351.AES_GCM_CTR_V1); + __isset = other351.__isset; return *this; } void EncryptionAlgorithm::printTo(std::ostream& out) const { @@ -4001,54 +4436,54 @@ void swap(FileMetaData &a, FileMetaData &b) { swap(a.__isset, b.__isset); } -FileMetaData::FileMetaData(const FileMetaData& other345) { - version = other345.version; - schema = other345.schema; - num_rows = other345.num_rows; - row_groups = other345.row_groups; - key_value_metadata = other345.key_value_metadata; - created_by = other345.created_by; - column_orders = other345.column_orders; - encryption_algorithm = other345.encryption_algorithm; - footer_signing_key_metadata = other345.footer_signing_key_metadata; - __isset = other345.__isset; -} -FileMetaData::FileMetaData(FileMetaData&& other346) noexcept { - version = other346.version; - schema = std::move(other346.schema); - num_rows = other346.num_rows; - row_groups = std::move(other346.row_groups); - key_value_metadata = std::move(other346.key_value_metadata); - created_by = std::move(other346.created_by); - column_orders = std::move(other346.column_orders); - encryption_algorithm = std::move(other346.encryption_algorithm); - footer_signing_key_metadata = std::move(other346.footer_signing_key_metadata); - __isset = other346.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other347) { - version = other347.version; - schema = other347.schema; - num_rows = other347.num_rows; - row_groups = other347.row_groups; - key_value_metadata = other347.key_value_metadata; - created_by = other347.created_by; - column_orders = other347.column_orders; - encryption_algorithm = other347.encryption_algorithm; - footer_signing_key_metadata = other347.footer_signing_key_metadata; - __isset = other347.__isset; +FileMetaData::FileMetaData(const FileMetaData& other376) { + version = other376.version; + schema = other376.schema; + num_rows = other376.num_rows; + row_groups = other376.row_groups; + key_value_metadata = other376.key_value_metadata; + created_by = other376.created_by; + column_orders = other376.column_orders; + encryption_algorithm = other376.encryption_algorithm; + footer_signing_key_metadata = other376.footer_signing_key_metadata; + __isset = other376.__isset; +} +FileMetaData::FileMetaData(FileMetaData&& other377) noexcept { + version = other377.version; + schema = std::move(other377.schema); + num_rows = other377.num_rows; + row_groups = std::move(other377.row_groups); + key_value_metadata = std::move(other377.key_value_metadata); + created_by = std::move(other377.created_by); + column_orders = std::move(other377.column_orders); + encryption_algorithm = std::move(other377.encryption_algorithm); + footer_signing_key_metadata = std::move(other377.footer_signing_key_metadata); + __isset = other377.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other378) { + version = other378.version; + schema = other378.schema; + num_rows = other378.num_rows; + row_groups = other378.row_groups; + key_value_metadata = other378.key_value_metadata; + created_by = other378.created_by; + column_orders = other378.column_orders; + encryption_algorithm = other378.encryption_algorithm; + footer_signing_key_metadata = other378.footer_signing_key_metadata; + __isset = other378.__isset; return *this; } -FileMetaData& FileMetaData::operator=(FileMetaData&& other348) noexcept { - version = other348.version; - schema = std::move(other348.schema); - num_rows = other348.num_rows; - row_groups = std::move(other348.row_groups); - key_value_metadata = std::move(other348.key_value_metadata); - created_by = std::move(other348.created_by); - column_orders = std::move(other348.column_orders); - encryption_algorithm = std::move(other348.encryption_algorithm); - footer_signing_key_metadata = std::move(other348.footer_signing_key_metadata); - __isset = other348.__isset; +FileMetaData& FileMetaData::operator=(FileMetaData&& other379) noexcept { + version = other379.version; + schema = std::move(other379.schema); + num_rows = other379.num_rows; + row_groups = std::move(other379.row_groups); + key_value_metadata = std::move(other379.key_value_metadata); + created_by = std::move(other379.created_by); + column_orders = std::move(other379.column_orders); + encryption_algorithm = std::move(other379.encryption_algorithm); + footer_signing_key_metadata = std::move(other379.footer_signing_key_metadata); + __isset = other379.__isset; return *this; } void FileMetaData::printTo(std::ostream& out) const { @@ -4093,26 +4528,26 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { swap(a.__isset, b.__isset); } -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other349) { - encryption_algorithm = other349.encryption_algorithm; - key_metadata = other349.key_metadata; - __isset = other349.__isset; +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other380) { + encryption_algorithm = other380.encryption_algorithm; + key_metadata = other380.key_metadata; + __isset = other380.__isset; } -FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other350) noexcept { - encryption_algorithm = std::move(other350.encryption_algorithm); - key_metadata = std::move(other350.key_metadata); - __isset = other350.__isset; +FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other381) noexcept { + encryption_algorithm = std::move(other381.encryption_algorithm); + key_metadata = std::move(other381.key_metadata); + __isset = other381.__isset; } -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other351) { - encryption_algorithm = other351.encryption_algorithm; - key_metadata = other351.key_metadata; - __isset = other351.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other382) { + encryption_algorithm = other382.encryption_algorithm; + key_metadata = other382.key_metadata; + __isset = other382.__isset; return *this; } -FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other352) noexcept { - encryption_algorithm = std::move(other352.encryption_algorithm); - key_metadata = std::move(other352.key_metadata); - __isset = other352.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other383) noexcept { + encryption_algorithm = std::move(other383.encryption_algorithm); + key_metadata = std::move(other383.key_metadata); + __isset = other383.__isset; return *this; } void FileCryptoMetaData::printTo(std::ostream& out) const { @@ -4122,5 +4557,5 @@ void FileCryptoMetaData::printTo(std::ostream& out) const { out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); out << ")"; } - -}} // namespace +} +} // namespace diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 6cf85fe5e73cc..c09a3a1d11e5f 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.19.0) + * Autogenerated by Thrift Compiler (0.20.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -20,6 +20,7 @@ #include "parquet/windows_compatibility.h" + namespace parquet { namespace format { /** @@ -50,7 +51,7 @@ std::string to_string(const Type::type& val); /** * DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. * ConvertedType is superseded by LogicalType. This enum should not be extended. - * + * * See LogicalTypes.md for conversion between ConvertedType and LogicalType. */ struct ConvertedType { @@ -78,12 +79,12 @@ struct ConvertedType { ENUM = 4, /** * A decimal value. - * + * * This may be used to annotate binary or fixed primitive types. The * underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. - * + * * This must be accompanied by a (maximum) precision and a scale in the * SchemaElement. The precision specifies the number of digits in the decimal * and the scale stores the location of the decimal point. For example 1.23 @@ -93,47 +94,47 @@ struct ConvertedType { DECIMAL = 5, /** * A Date - * + * * Stored as days since Unix epoch, encoded as the INT32 physical type. - * + * */ DATE = 6, /** * A time - * + * * The total number of milliseconds since midnight. The value is stored * as an INT32 physical type. */ TIME_MILLIS = 7, /** * A time. - * + * * The total number of microseconds since midnight. The value is stored as * an INT64 physical type. */ TIME_MICROS = 8, /** * A date/time combination - * + * * Date and time recorded as milliseconds since the Unix epoch. Recorded as * a physical type of INT64. */ TIMESTAMP_MILLIS = 9, /** * A date/time combination - * + * * Date and time recorded as microseconds since the Unix epoch. The value is * stored as an INT64 physical type. */ TIMESTAMP_MICROS = 10, /** * An unsigned integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ UINT_8 = 11, UINT_16 = 12, @@ -141,12 +142,12 @@ struct ConvertedType { UINT_64 = 14, /** * A signed integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ INT_8 = 15, INT_16 = 16, @@ -154,19 +155,19 @@ struct ConvertedType { INT_64 = 18, /** * An embedded JSON document - * + * * A JSON document embedded within a single UTF8 column. */ JSON = 19, /** * An embedded BSON document - * + * * A BSON document embedded within a single BINARY column. */ BSON = 20, /** * An interval of time - * + * * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 * This data is composed of three separate little endian unsigned * integers. Each stores a component of a duration of time. The first @@ -212,6 +213,50 @@ std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val std::string to_string(const FieldRepetitionType::type& val); +/** + * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge + * between points represent a straight cartesian line or the shortest line on + * the sphere. Please note that it only applies to polygons. + */ +struct Edges { + enum type { + PLANAR = 0, + SPHERICAL = 1 + }; +}; + +extern const std::map _Edges_VALUES_TO_NAMES; + +std::ostream& operator<<(std::ostream& out, const Edges::type& val); + +std::string to_string(const Edges::type& val); + +/** + * Physical type and encoding for the geometry type. + */ +struct GeometryEncoding { + enum type { + /** + * Allowed for physical type: BYTE_ARRAY. + * + * Well-known binary (WKB) representations of geometries. It supports 2D or + * 3D geometries of the standard geometry types (Point, LineString, Polygon, + * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This + * is the preferred option for maximum portability. + * + * This encoding enables GeometryStatistics to be set in the column chunk + * and page index. + */ + WKB = 0 + }; +}; + +extern const std::map _GeometryEncoding_VALUES_TO_NAMES; + +std::ostream& operator<<(std::ostream& out, const GeometryEncoding::type& val); + +std::string to_string(const GeometryEncoding::type& val); + /** * Encodings supported by Parquet. Not all encodings are valid for all types. These * enums are also used to specify the encoding of definition and repetition levels. @@ -267,12 +312,15 @@ struct Encoding { */ RLE_DICTIONARY = 8, /** - * Encoding for floating-point data. + * Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding stream and + * The individual bytes of a value are scattered to the corresponding stream and * the streams are concatenated. * This itself does not reduce the size of the data but can lead to better compression * afterwards. + * + * Added in 2.8 for FLOAT and DOUBLE. + * Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ BYTE_STREAM_SPLIT = 9 }; @@ -286,11 +334,11 @@ std::string to_string(const Encoding::type& val); /** * Supported compression algorithms. - * + * * Codecs added in format version X.Y can be read by readers based on X.Y and later. * Codec support may vary between readers based on the format version and * libraries available at runtime. - * + * * See Compression.md for a detailed specification of these algorithms. */ struct CompressionCodec { @@ -347,6 +395,12 @@ std::string to_string(const BoundaryOrder::type& val); class SizeStatistics; +class Covering; + +class BoundingBox; + +class GeometryStatistics; + class Statistics; class StringType; @@ -385,6 +439,8 @@ class JsonType; class BsonType; +class GeometryType; + class LogicalType; class SchemaElement; @@ -487,11 +543,11 @@ class SizeStatistics { * schema information multiplied by the number of non-null and null values. * The number of null/non-null values can be inferred from the histograms * below. - * + * * For example, if a column chunk is dictionary-encoded with dictionary * ["a", "bc", "cde"], and a data page contains the indices [0, 0, 1, 2], * then this value for that data page should be 7 (1 + 1 + 2 + 3). - * + * * This field should only be set for types that use BYTE_ARRAY as their * physical type. */ @@ -501,18 +557,18 @@ class SizeStatistics { * repetition (i.e. size=max repetition_level+1) where each element * represents the number of times the repetition level was observed in the * data. - * + * * This field may be omitted if max_repetition_level is 0 without loss * of information. - * + * */ std::vector repetition_level_histogram; /** * Same as repetition_level_histogram except for definition levels. - * + * * This field may be omitted if max_definition_level is 0 or 1 without * loss of information. - * + * */ std::vector definition_level_histogram; @@ -558,8 +614,274 @@ void swap(SizeStatistics &a, SizeStatistics &b); std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj); + +/** + * A custom WKB-encoded polygon or multi-polygon to represent a covering of + * geometries. For example, it may be a bounding box, or an evelope of geometries + * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if + * an edge of geographic coordinates crosses the antimeridian). In addition, it can + * also be used to provide vendor-agnostic coverings like S2 or H3 grids. + */ +class Covering { + public: + + Covering(const Covering&); + Covering(Covering&&) noexcept; + Covering& operator=(const Covering&); + Covering& operator=(Covering&&) noexcept; + Covering() noexcept + : geometry(), + edges(static_cast(0)) { + } + + virtual ~Covering() noexcept; + /** + * Bytes of a WKB-encoded geometry + */ + std::string geometry; + /** + * Edges of the geometry, which is independent of edges from the logical type + * + * @see Edges + */ + Edges::type edges; + + void __set_geometry(const std::string& val); + + void __set_edges(const Edges::type val); + + bool operator == (const Covering & rhs) const + { + if (!(geometry == rhs.geometry)) + return false; + if (!(edges == rhs.edges)) + return false; + return true; + } + bool operator != (const Covering &rhs) const { + return !(*this == rhs); + } + + bool operator < (const Covering & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(Covering &a, Covering &b); + +std::ostream& operator<<(std::ostream& out, const Covering& obj); + +typedef struct _BoundingBox__isset { + _BoundingBox__isset() : zmin(false), zmax(false), mmin(false), mmax(false) {} + bool zmin :1; + bool zmax :1; + bool mmin :1; + bool mmax :1; +} _BoundingBox__isset; + +/** + * Bounding box of geometries in the representation of min/max value pair of + * coordinates from each axis. Values of Z and M are omitted for 2D geometries. + */ +class BoundingBox { + public: + + BoundingBox(const BoundingBox&) noexcept; + BoundingBox(BoundingBox&&) noexcept; + BoundingBox& operator=(const BoundingBox&) noexcept; + BoundingBox& operator=(BoundingBox&&) noexcept; + BoundingBox() noexcept + : xmin(0), + xmax(0), + ymin(0), + ymax(0), + zmin(0), + zmax(0), + mmin(0), + mmax(0) { + } + + virtual ~BoundingBox() noexcept; + double xmin; + double xmax; + double ymin; + double ymax; + double zmin; + double zmax; + double mmin; + double mmax; + + _BoundingBox__isset __isset; + + void __set_xmin(const double val); + + void __set_xmax(const double val); + + void __set_ymin(const double val); + + void __set_ymax(const double val); + + void __set_zmin(const double val); + + void __set_zmax(const double val); + + void __set_mmin(const double val); + + void __set_mmax(const double val); + + bool operator == (const BoundingBox & rhs) const + { + if (!(xmin == rhs.xmin)) + return false; + if (!(xmax == rhs.xmax)) + return false; + if (!(ymin == rhs.ymin)) + return false; + if (!(ymax == rhs.ymax)) + return false; + if (__isset.zmin != rhs.__isset.zmin) + return false; + else if (__isset.zmin && !(zmin == rhs.zmin)) + return false; + if (__isset.zmax != rhs.__isset.zmax) + return false; + else if (__isset.zmax && !(zmax == rhs.zmax)) + return false; + if (__isset.mmin != rhs.__isset.mmin) + return false; + else if (__isset.mmin && !(mmin == rhs.mmin)) + return false; + if (__isset.mmax != rhs.__isset.mmax) + return false; + else if (__isset.mmax && !(mmax == rhs.mmax)) + return false; + return true; + } + bool operator != (const BoundingBox &rhs) const { + return !(*this == rhs); + } + + bool operator < (const BoundingBox & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(BoundingBox &a, BoundingBox &b); + +std::ostream& operator<<(std::ostream& out, const BoundingBox& obj); + +typedef struct _GeometryStatistics__isset { + _GeometryStatistics__isset() : bbox(false), covering(false), geometry_types(false) {} + bool bbox :1; + bool covering :1; + bool geometry_types :1; +} _GeometryStatistics__isset; + +/** + * Statistics specific to GEOMETRY logical type + */ +class GeometryStatistics { + public: + + GeometryStatistics(const GeometryStatistics&); + GeometryStatistics(GeometryStatistics&&) noexcept; + GeometryStatistics& operator=(const GeometryStatistics&); + GeometryStatistics& operator=(GeometryStatistics&&) noexcept; + GeometryStatistics() noexcept { + } + + virtual ~GeometryStatistics() noexcept; + /** + * A bounding box of geometries + */ + BoundingBox bbox; + /** + * A covering polygon of geometries + */ + Covering covering; + /** + * The geometry types of all geometries, or an empty array if they are not + * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] + * except that values in the list are WKB (ISO variant) integer codes [2]. Table + * below shows the most common geometry types and their codes: + * + * | Type | XY | XYZ | XYM | XYZM | + * | :----------------- | :--- | :--- | :--- | :--: | + * | Point | 0001 | 1001 | 2001 | 3001 | + * | LineString | 0002 | 1002 | 2002 | 3002 | + * | Polygon | 0003 | 1003 | 2003 | 3003 | + * | MultiPoint | 0004 | 1004 | 2004 | 3004 | + * | MultiLineString | 0005 | 1005 | 2005 | 3005 | + * | MultiPolygon | 0006 | 1006 | 2006 | 3006 | + * | GeometryCollection | 0007 | 1007 | 2007 | 3007 | + * + * In addition, the following rules are used: + * - A list of multiple values indicates that multiple geometry types are + * present (e.g. `[0003, 0006]`). + * - An empty array explicitly signals that the geometry types are not known. + * - The geometry types in the list must be unique (e.g. `[0001, 0001]` + * is not valid). + * + * Please refer to links below for more detail: + * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.0.0/format-specs/geoparquet.md?plain=1#L91 + */ + std::vector geometry_types; + + _GeometryStatistics__isset __isset; + + void __set_bbox(const BoundingBox& val); + + void __set_covering(const Covering& val); + + void __set_geometry_types(const std::vector & val); + + bool operator == (const GeometryStatistics & rhs) const + { + if (__isset.bbox != rhs.__isset.bbox) + return false; + else if (__isset.bbox && !(bbox == rhs.bbox)) + return false; + if (__isset.covering != rhs.__isset.covering) + return false; + else if (__isset.covering && !(covering == rhs.covering)) + return false; + if (__isset.geometry_types != rhs.__isset.geometry_types) + return false; + else if (__isset.geometry_types && !(geometry_types == rhs.geometry_types)) + return false; + return true; + } + bool operator != (const GeometryStatistics &rhs) const { + return !(*this == rhs); + } + + bool operator < (const GeometryStatistics & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(GeometryStatistics &a, GeometryStatistics &b); + +std::ostream& operator<<(std::ostream& out, const GeometryStatistics& obj); + typedef struct _Statistics__isset { - _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false), is_max_value_exact(false), is_min_value_exact(false) {} + _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false), is_max_value_exact(false), is_min_value_exact(false), geometry_stats(false) {} bool max :1; bool min :1; bool null_count :1; @@ -568,6 +890,7 @@ typedef struct _Statistics__isset { bool min_value :1; bool is_max_value_exact :1; bool is_min_value_exact :1; + bool geometry_stats :1; } _Statistics__isset; /** @@ -595,14 +918,14 @@ class Statistics { virtual ~Statistics() noexcept; /** * DEPRECATED: min and max value of the column. Use min_value and max_value. - * + * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. - * + * * These fields encode min and max values determined by signed comparison * only. New files should use the correct order for a column's logical type * and store the values in the min_value and max_value fields. - * + * * To support older readers, these may be set when the column order is * signed. */ @@ -618,13 +941,13 @@ class Statistics { int64_t distinct_count; /** * Lower and upper bound values for the column, determined by its ColumnOrder. - * + * * These may be the actual minimum and maximum values found on a page or column * chunk, but can also be (more compact) values that do not exist on a page or * column chunk. For example, instead of storing "Blart Versenwald III", a writer * may set min_value="B", max_value="C". Such more compact values must still be * valid values within the column's logical type. - * + * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. */ @@ -638,6 +961,10 @@ class Statistics { * If true, min_value is the actual minimum value for a column */ bool is_min_value_exact; + /** + * statistics specific to geometry logical type + */ + GeometryStatistics geometry_stats; _Statistics__isset __isset; @@ -657,6 +984,8 @@ class Statistics { void __set_is_min_value_exact(const bool val); + void __set_geometry_stats(const GeometryStatistics& val); + bool operator == (const Statistics & rhs) const { if (__isset.max != rhs.__isset.max) @@ -691,6 +1020,10 @@ class Statistics { return false; else if (__isset.is_min_value_exact && !(is_min_value_exact == rhs.is_min_value_exact)) return false; + if (__isset.geometry_stats != rhs.__isset.geometry_stats) + return false; + else if (__isset.geometry_stats && !(geometry_stats == rhs.geometry_stats)) + return false; return true; } bool operator != (const Statistics &rhs) const { @@ -962,7 +1295,7 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj); /** * Logical type to annotate a column that is always null. - * + * * Sometimes when discovering the schema of existing data, values are always * null and the physical type can't be determined. This annotation signals * the case where the physical type was guessed from all null values. @@ -1004,13 +1337,13 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj); /** * Decimal logical type annotation - * + * * Scale must be zero or a positive integer less than or equal to the precision. * Precision must be a non-zero positive integer. - * + * * To maintain forward-compatibility in v1, implementations using this logical * type must also set scale and precision on the annotated SchemaElement. - * + * * Allowed for physical types: INT32, INT64, FIXED, and BINARY */ class DecimalType { @@ -1234,7 +1567,7 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); /** * Timestamp logical type annotation - * + * * Allowed for physical types: INT64 */ class TimestampType { @@ -1285,7 +1618,7 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj); /** * Time logical type annotation - * + * * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) */ class TimeType { @@ -1336,9 +1669,9 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj); /** * Integer logical type annotation - * + * * bitWidth must be 8, 16, 32, or 64. - * + * * Allowed for physical types: INT32, INT64 */ class IntType { @@ -1390,7 +1723,7 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj); /** * Embedded JSON logical type annotation - * + * * Allowed for physical types: BINARY */ class JsonType { @@ -1430,7 +1763,7 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj); /** * Embedded BSON logical type annotation - * + * * Allowed for physical types: BINARY */ class BsonType { @@ -1467,8 +1800,100 @@ void swap(BsonType &a, BsonType &b); std::ostream& operator<<(std::ostream& out, const BsonType& obj); +typedef struct _GeometryType__isset { + _GeometryType__isset() : crs(false), metadata(false) {} + bool crs :1; + bool metadata :1; +} _GeometryType__isset; + +/** + * Geometry logical type annotation (added in 2.11.0) + */ +class GeometryType { + public: + + GeometryType(const GeometryType&); + GeometryType(GeometryType&&) noexcept; + GeometryType& operator=(const GeometryType&); + GeometryType& operator=(GeometryType&&) noexcept; + GeometryType() noexcept + : encoding(static_cast(0)), + edges(static_cast(0)), + crs(), + metadata() { + } + + virtual ~GeometryType() noexcept; + /** + * Physical type and encoding for the geometry type. Please refer to the + * definition of GeometryEncoding for more detail. + * + * @see GeometryEncoding + */ + GeometryEncoding::type encoding; + /** + * Edges of polygon. + * + * @see Edges + */ + Edges::type edges; + /** + * Coordinate Reference System, i.e. mapping of how coordinates refer to + * precise locations on earth, e.g. OGC:CRS84 + */ + std::string crs; + /** + * Additional informative metadata. + * It can be used by GeoParquet to offload some of the column metadata. + */ + std::string metadata; + + _GeometryType__isset __isset; + + void __set_encoding(const GeometryEncoding::type val); + + void __set_edges(const Edges::type val); + + void __set_crs(const std::string& val); + + void __set_metadata(const std::string& val); + + bool operator == (const GeometryType & rhs) const + { + if (!(encoding == rhs.encoding)) + return false; + if (!(edges == rhs.edges)) + return false; + if (__isset.crs != rhs.__isset.crs) + return false; + else if (__isset.crs && !(crs == rhs.crs)) + return false; + if (__isset.metadata != rhs.__isset.metadata) + return false; + else if (__isset.metadata && !(metadata == rhs.metadata)) + return false; + return true; + } + bool operator != (const GeometryType &rhs) const { + return !(*this == rhs); + } + + bool operator < (const GeometryType & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(GeometryType &a, GeometryType &b); + +std::ostream& operator<<(std::ostream& out, const GeometryType& obj); + typedef struct _LogicalType__isset { - _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false), FLOAT16(false) {} + _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false), FLOAT16(false), GEOMETRY(false) {} bool STRING :1; bool MAP :1; bool LIST :1; @@ -1483,11 +1908,12 @@ typedef struct _LogicalType__isset { bool BSON :1; bool UUID :1; bool FLOAT16 :1; + bool GEOMETRY :1; } _LogicalType__isset; /** * LogicalType annotations to replace ConvertedType. - * + * * To maintain compatibility, implementations using LogicalType for a * SchemaElement must also set the corresponding ConvertedType (if any) * from the following table. @@ -1495,9 +1921,9 @@ typedef struct _LogicalType__isset { class LogicalType { public: - LogicalType(const LogicalType&) noexcept; + LogicalType(const LogicalType&); LogicalType(LogicalType&&) noexcept; - LogicalType& operator=(const LogicalType&) noexcept; + LogicalType& operator=(const LogicalType&); LogicalType& operator=(LogicalType&&) noexcept; LogicalType() noexcept { } @@ -1517,6 +1943,7 @@ class LogicalType { BsonType BSON; UUIDType UUID; Float16Type FLOAT16; + GeometryType GEOMETRY; _LogicalType__isset __isset; @@ -1548,6 +1975,8 @@ class LogicalType { void __set_FLOAT16(const Float16Type& val); + void __set_GEOMETRY(const GeometryType& val); + bool operator == (const LogicalType & rhs) const { if (__isset.STRING != rhs.__isset.STRING) @@ -1606,6 +2035,10 @@ class LogicalType { return false; else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16)) return false; + if (__isset.GEOMETRY != rhs.__isset.GEOMETRY) + return false; + else if (__isset.GEOMETRY && !(GEOMETRY == rhs.GEOMETRY)) + return false; return true; } bool operator != (const LogicalType &rhs) const { @@ -1667,7 +2100,7 @@ class SchemaElement { virtual ~SchemaElement() noexcept; /** * Data type for this field. Not set if the current element is a non-leaf node - * + * * @see Type */ Type::type type; @@ -1681,7 +2114,7 @@ class SchemaElement { /** * repetition of the field. The root of the schema does not have a repetition_type. * All other nodes must have one - * + * * @see FieldRepetitionType */ FieldRepetitionType::type repetition_type; @@ -1699,16 +2132,16 @@ class SchemaElement { /** * DEPRECATED: When the schema is the result of a conversion from another model. * Used to record the original type to help with cross conversion. - * + * * This is superseded by logicalType. - * + * * @see ConvertedType */ ConvertedType::type converted_type; /** * DEPRECATED: Used when this column contains decimal data. * See the DECIMAL converted type for more details. - * + * * This is superseded by using the DecimalType annotation in logicalType. */ int32_t scale; @@ -1720,7 +2153,7 @@ class SchemaElement { int32_t field_id; /** * The logical type of this SchemaElement - * + * * LogicalType replaces ConvertedType, but ConvertedType is still required * for some logical types to ensure forward-compatibility in format v1. */ @@ -1837,19 +2270,19 @@ class DataPageHeader { int32_t num_values; /** * Encoding used for this data page * - * + * * @see Encoding */ Encoding::type encoding; /** * Encoding used for definition levels * - * + * * @see Encoding */ Encoding::type definition_level_encoding; /** * Encoding used for repetition levels * - * + * * @see Encoding */ Encoding::type repetition_level_encoding; @@ -1948,7 +2381,7 @@ typedef struct _DictionaryPageHeader__isset { * The dictionary page must be placed at the first position of the column chunk * if it is partly or completely dictionary encoded. At most one dictionary page * can be placed in a column chunk. - * + * */ class DictionaryPageHeader { public: @@ -1970,7 +2403,7 @@ class DictionaryPageHeader { int32_t num_values; /** * Encoding using this dictionary page * - * + * * @see Encoding */ Encoding::type encoding; @@ -2027,7 +2460,7 @@ typedef struct _DataPageHeaderV2__isset { * New page format allowing reading levels without decompressing the data * Repetition and definition levels are uncompressed * The remaining section containing the data is compressed if is_compressed is true - * + * */ class DataPageHeaderV2 { public: @@ -2062,7 +2495,7 @@ class DataPageHeaderV2 { int32_t num_rows; /** * Encoding used for data in this page * - * + * * @see Encoding */ Encoding::type encoding; @@ -2243,7 +2676,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); /** * Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash * algorithm. It uses 64 bits version of xxHash. - * + * */ class XxHash { public: @@ -2287,7 +2720,7 @@ typedef struct _BloomFilterHash__isset { /** * The hash function used in Bloom filter. This function takes the hash of a column value * using plain encoding. - * + * */ class BloomFilterHash { public: @@ -2338,7 +2771,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); /** * The compression used in the Bloom filter. - * + * */ class Uncompressed { public: @@ -2426,7 +2859,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); /** * Bloom filter header is stored at beginning of Bloom filter data of each column * and followed by its bitset. - * + * */ class BloomFilterHeader { public: @@ -2521,7 +2954,7 @@ class PageHeader { virtual ~PageHeader() noexcept; /** * the type of the page: indicates which of the *_header fields is set * - * + * * @see PageType */ PageType::type type; @@ -2535,7 +2968,7 @@ class PageHeader { int32_t compressed_page_size; /** * The 32-bit CRC checksum for the page, to be be calculated as follows: - * + * * - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, * the same as in e.g. GZip). * - All page types can have a CRC (v1 and v2 data pages, dictionary pages, @@ -2547,7 +2980,7 @@ class PageHeader { * encrypted). * - The CRC computation therefore takes place after any compression * and encryption steps, if any. - * + * * If enabled, this allows for disabling checksumming in HDFS if only a few * pages need to be read. */ @@ -2683,7 +3116,7 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj); /** - * Wrapper struct to specify sort order + * Sort order within a RowGroup of a leaf column */ class SortingColumn { public: @@ -2700,7 +3133,7 @@ class SortingColumn { virtual ~SortingColumn() noexcept; /** - * The column index (in this row group) * + * The ordinal position of the column (in this row group) * */ int32_t column_idx; /** @@ -2767,13 +3200,13 @@ class PageEncodingStats { virtual ~PageEncodingStats() noexcept; /** * the page type (data/dic/...) * - * + * * @see PageType */ PageType::type page_type; /** * encoding of the page * - * + * * @see Encoding */ Encoding::type encoding; @@ -2854,7 +3287,7 @@ class ColumnMetaData { virtual ~ColumnMetaData() noexcept; /** * Type of this column * - * + * * @see Type */ Type::type type; @@ -2869,7 +3302,7 @@ class ColumnMetaData { std::vector path_in_schema; /** * Compression codec * - * + * * @see CompressionCodec */ CompressionCodec::type codec; @@ -3220,7 +3653,7 @@ class ColumnChunk { /** * File where column data is stored. If not set, assumed to be same file as * metadata. This path is relative to the current file. - * + * */ std::string file_path; /** @@ -3231,7 +3664,7 @@ class ColumnChunk { * Column metadata for this chunk. This is the same content as what is at * file_path/file_offset. Having it here has it replicated in the file * metadata. - * + * */ ColumnMetaData meta_data; /** @@ -3362,7 +3795,7 @@ class RowGroup { /** * Metadata for each column chunk in this row group. * This list must have the same order as the SchemaElement list in FileMetaData. - * + * */ std::vector columns; /** @@ -3500,11 +3933,11 @@ typedef struct _ColumnOrder__isset { * Union to specify the order used for the min_value and max_value fields for a * column. This union takes the role of an enhanced enum that allows rich * elements (which will be needed for a collation-based ordering in the future). - * + * * Possible values are: * * TypeDefinedOrder - the column uses the order defined by its logical or * physical type (if there is no logical type). - * + * * If the reader does not support the value of this union, min and max stats * for this column should be ignored. */ @@ -3536,13 +3969,14 @@ class ColumnOrder { * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison + * INTERVAL - undefined * JSON - unsigned byte-wise comparison * BSON - unsigned byte-wise comparison * ENUM - unsigned byte-wise comparison * LIST - undefined * MAP - undefined - * + * GEOMETRY - undefined, use GeometryStatistics instead. + * * In the absence of logical types, the sort order is determined by the physical type: * BOOLEAN - false, true * INT32 - signed comparison @@ -3552,7 +3986,7 @@ class ColumnOrder { * DOUBLE - signed comparison of the represented value (*) * BYTE_ARRAY - unsigned byte-wise comparison * FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison - * + * * (*) Because the sorting order is not specified properly for floating * point values (relations vs. total ordering) the following * compatibility rules should be applied when reading statistics: @@ -3561,7 +3995,7 @@ class ColumnOrder { * - If the min is +0, the row group may contain -0 values as well. * - If the max is -0, the row group may contain +0 values as well. * - When looking for NaN values, min and max should be ignored. - * + * * When writing statistics the following rules should be followed: * - NaNs should not be written to min or max statistics fields. * - If the computed max value is zero (whether negative or positive), @@ -3670,6 +4104,13 @@ typedef struct _OffsetIndex__isset { bool unencoded_byte_array_data_bytes :1; } _OffsetIndex__isset; +/** + * Optional offsets for each data page in a ColumnChunk. + * + * Forms part of the page index, along with ColumnIndex. + * + * OffsetIndex may be present even if ColumnIndex is not. + */ class OffsetIndex { public: @@ -3688,7 +4129,7 @@ class OffsetIndex { std::vector page_locations; /** * Unencoded/uncompressed size for BYTE_ARRAY types. - * + * * See documention for unencoded_byte_array_data_bytes in SizeStatistics for * more details on this field. */ @@ -3729,15 +4170,22 @@ void swap(OffsetIndex &a, OffsetIndex &b); std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj); typedef struct _ColumnIndex__isset { - _ColumnIndex__isset() : null_counts(false), repetition_level_histograms(false), definition_level_histograms(false) {} + _ColumnIndex__isset() : null_counts(false), repetition_level_histograms(false), definition_level_histograms(false), geometry_stats(false) {} bool null_counts :1; bool repetition_level_histograms :1; bool definition_level_histograms :1; + bool geometry_stats :1; } _ColumnIndex__isset; /** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i] + * Optional statistics for each data page in a ColumnChunk. + * + * Forms part the page index, along with OffsetIndex. + * + * If this structure is present, OffsetIndex must also be present. + * + * For each field in this structure, [i] refers to the page at + * OffsetIndex.page_locations[i] */ class ColumnIndex { public: @@ -3776,7 +4224,7 @@ class ColumnIndex { * which direction. This allows readers to perform binary searches in both * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even * if the lists are ordered. - * + * * @see BoundaryOrder */ BoundaryOrder::type boundary_order; @@ -3788,21 +4236,25 @@ class ColumnIndex { * Contains repetition level histograms for each page * concatenated together. The repetition_level_histogram field on * SizeStatistics contains more details. - * + * * When present the length should always be (number of pages * * (max_repetition_level + 1)) elements. - * + * * Element 0 is the first element of the histogram for the first page. * Element (max_repetition_level + 1) is the first element of the histogram * for the second page. - * + * */ std::vector repetition_level_histograms; /** * Same as repetition_level_histograms except for definitions levels. - * + * */ std::vector definition_level_histograms; + /** + * A list containing statistics of GEOMETRY logical type for each page + */ + std::vector geometry_stats; _ColumnIndex__isset __isset; @@ -3820,6 +4272,8 @@ class ColumnIndex { void __set_definition_level_histograms(const std::vector & val); + void __set_geometry_stats(const std::vector & val); + bool operator == (const ColumnIndex & rhs) const { if (!(null_pages == rhs.null_pages)) @@ -3842,6 +4296,10 @@ class ColumnIndex { return false; else if (__isset.definition_level_histograms && !(definition_level_histograms == rhs.definition_level_histograms)) return false; + if (__isset.geometry_stats != rhs.__isset.geometry_stats) + return false; + else if (__isset.geometry_stats && !(geometry_stats == rhs.geometry_stats)) + return false; return true; } bool operator != (const ColumnIndex &rhs) const { @@ -4128,7 +4586,7 @@ class FileMetaData { * String for application that wrote this file. This should be in the format * version (build ). * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) - * + * */ std::string created_by; /** @@ -4138,12 +4596,12 @@ class FileMetaData { * matching the columns in the schema. The indexes are not necessary the same * though, because only leaf nodes of the schema are represented in the list * of sort orders. - * + * * Without column_orders, the meaning of the min_value and max_value fields * in the Statistics object and the ColumnIndex object is undefined. To ensure * well-defined behaviour, if these fields are written to a Parquet file, * column_orders must be written as well. - * + * * The obsolete min and max fields in the Statistics object are always sorted * by signed comparison regardless of column_orders. */ @@ -4295,8 +4753,8 @@ class FileCryptoMetaData { void swap(FileCryptoMetaData &a, FileCryptoMetaData &b); std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); - -}} // namespace +} +} // namespace #include "parquet_types.tcc" diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index ee02d7f0139fc..4436a5ecde8d6 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.19.0) + * Autogenerated by Thrift Compiler (0.20.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -135,6 +135,344 @@ uint32_t SizeStatistics::write(Protocol_* oprot) const { return xfer; } +template +uint32_t Covering::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_geometry = false; + bool isset_edges = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->geometry); + isset_geometry = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast16; + xfer += iprot->readI32(ecast16); + this->edges = static_cast(ecast16); + isset_edges = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_geometry) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_edges) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t Covering::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Covering"); + + xfer += oprot->writeFieldBegin("geometry", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->geometry); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("edges", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->edges)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BoundingBox::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_xmin = false; + bool isset_xmax = false; + bool isset_ymin = false; + bool isset_ymax = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->xmin); + isset_xmin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->xmax); + isset_xmax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->ymin); + isset_ymin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->ymax); + isset_ymax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->zmin); + this->__isset.zmin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->zmax); + this->__isset.zmax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->mmin); + this->__isset.mmin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->mmax); + this->__isset.mmax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_xmin) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_xmax) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_ymin) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_ymax) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t BoundingBox::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BoundingBox"); + + xfer += oprot->writeFieldBegin("xmin", ::apache::thrift::protocol::T_DOUBLE, 1); + xfer += oprot->writeDouble(this->xmin); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("xmax", ::apache::thrift::protocol::T_DOUBLE, 2); + xfer += oprot->writeDouble(this->xmax); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("ymin", ::apache::thrift::protocol::T_DOUBLE, 3); + xfer += oprot->writeDouble(this->ymin); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("ymax", ::apache::thrift::protocol::T_DOUBLE, 4); + xfer += oprot->writeDouble(this->ymax); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.zmin) { + xfer += oprot->writeFieldBegin("zmin", ::apache::thrift::protocol::T_DOUBLE, 5); + xfer += oprot->writeDouble(this->zmin); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.zmax) { + xfer += oprot->writeFieldBegin("zmax", ::apache::thrift::protocol::T_DOUBLE, 6); + xfer += oprot->writeDouble(this->zmax); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.mmin) { + xfer += oprot->writeFieldBegin("mmin", ::apache::thrift::protocol::T_DOUBLE, 7); + xfer += oprot->writeDouble(this->mmin); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.mmax) { + xfer += oprot->writeFieldBegin("mmax", ::apache::thrift::protocol::T_DOUBLE, 8); + xfer += oprot->writeDouble(this->mmax); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t GeometryStatistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->bbox.read(iprot); + this->__isset.bbox = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->covering.read(iprot); + this->__isset.covering = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->geometry_types.clear(); + uint32_t _size25; + ::apache::thrift::protocol::TType _etype28; + xfer += iprot->readListBegin(_etype28, _size25); + this->geometry_types.resize(_size25); + uint32_t _i29; + for (_i29 = 0; _i29 < _size25; ++_i29) + { + xfer += iprot->readI32(this->geometry_types[_i29]); + } + xfer += iprot->readListEnd(); + } + this->__isset.geometry_types = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t GeometryStatistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("GeometryStatistics"); + + if (this->__isset.bbox) { + xfer += oprot->writeFieldBegin("bbox", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->bbox.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.covering) { + xfer += oprot->writeFieldBegin("covering", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->covering.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.geometry_types) { + xfer += oprot->writeFieldBegin("geometry_types", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->geometry_types.size())); + std::vector ::const_iterator _iter30; + for (_iter30 = this->geometry_types.begin(); _iter30 != this->geometry_types.end(); ++_iter30) + { + xfer += oprot->writeI32((*_iter30)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + template uint32_t Statistics::read(Protocol_* iprot) { @@ -221,6 +559,14 @@ uint32_t Statistics::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 9: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->geometry_stats.read(iprot); + this->__isset.geometry_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -279,6 +625,11 @@ uint32_t Statistics::write(Protocol_* oprot) const { xfer += oprot->writeBool(this->is_min_value_exact); xfer += oprot->writeFieldEnd(); } + if (this->__isset.geometry_stats) { + xfer += oprot->writeFieldBegin("geometry_stats", ::apache::thrift::protocol::T_STRUCT, 9); + xfer += this->geometry_stats.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -1188,6 +1539,111 @@ uint32_t BsonType::write(Protocol_* oprot) const { return xfer; } +template +uint32_t GeometryType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_encoding = false; + bool isset_edges = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast111; + xfer += iprot->readI32(ecast111); + this->encoding = static_cast(ecast111); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast112; + xfer += iprot->readI32(ecast112); + this->edges = static_cast(ecast112); + isset_edges = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->crs); + this->__isset.crs = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->metadata); + this->__isset.metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_edges) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t GeometryType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("GeometryType"); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("edges", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->edges)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.crs) { + xfer += oprot->writeFieldBegin("crs", ::apache::thrift::protocol::T_STRING, 3); + xfer += oprot->writeString(this->crs); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.metadata) { + xfer += oprot->writeFieldBegin("metadata", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeBinary(this->metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + template uint32_t LogicalType::read(Protocol_* iprot) { @@ -1322,6 +1778,14 @@ uint32_t LogicalType::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 16: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->GEOMETRY.read(iprot); + this->__isset.GEOMETRY = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -1410,6 +1874,11 @@ uint32_t LogicalType::write(Protocol_* oprot) const { xfer += this->FLOAT16.write(oprot); xfer += oprot->writeFieldEnd(); } + if (this->__isset.GEOMETRY) { + xfer += oprot->writeFieldBegin("GEOMETRY", ::apache::thrift::protocol::T_STRUCT, 16); + xfer += this->GEOMETRY.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -1440,9 +1909,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast96; - xfer += iprot->readI32(ecast96); - this->type = static_cast(ecast96); + int32_t ecast121; + xfer += iprot->readI32(ecast121); + this->type = static_cast(ecast121); this->__isset.type = true; } else { xfer += iprot->skip(ftype); @@ -1458,9 +1927,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast97; - xfer += iprot->readI32(ecast97); - this->repetition_type = static_cast(ecast97); + int32_t ecast122; + xfer += iprot->readI32(ecast122); + this->repetition_type = static_cast(ecast122); this->__isset.repetition_type = true; } else { xfer += iprot->skip(ftype); @@ -1484,9 +1953,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 6: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast98; - xfer += iprot->readI32(ecast98); - this->converted_type = static_cast(ecast98); + int32_t ecast123; + xfer += iprot->readI32(ecast123); + this->converted_type = static_cast(ecast123); this->__isset.converted_type = true; } else { xfer += iprot->skip(ftype); @@ -1634,9 +2103,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast103; - xfer += iprot->readI32(ecast103); - this->encoding = static_cast(ecast103); + int32_t ecast128; + xfer += iprot->readI32(ecast128); + this->encoding = static_cast(ecast128); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1644,9 +2113,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast104; - xfer += iprot->readI32(ecast104); - this->definition_level_encoding = static_cast(ecast104); + int32_t ecast129; + xfer += iprot->readI32(ecast129); + this->definition_level_encoding = static_cast(ecast129); isset_definition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1654,9 +2123,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast105; - xfer += iprot->readI32(ecast105); - this->repetition_level_encoding = static_cast(ecast105); + int32_t ecast130; + xfer += iprot->readI32(ecast130); + this->repetition_level_encoding = static_cast(ecast130); isset_repetition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1796,9 +2265,9 @@ uint32_t DictionaryPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast114; - xfer += iprot->readI32(ecast114); - this->encoding = static_cast(ecast114); + int32_t ecast139; + xfer += iprot->readI32(ecast139); + this->encoding = static_cast(ecast139); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1906,9 +2375,9 @@ uint32_t DataPageHeaderV2::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast119; - xfer += iprot->readI32(ecast119); - this->encoding = static_cast(ecast119); + int32_t ecast144; + xfer += iprot->readI32(ecast144); + this->encoding = static_cast(ecast144); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2441,9 +2910,9 @@ uint32_t PageHeader::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast152; - xfer += iprot->readI32(ecast152); - this->type = static_cast(ecast152); + int32_t ecast177; + xfer += iprot->readI32(ecast177); + this->type = static_cast(ecast177); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -2761,9 +3230,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast165; - xfer += iprot->readI32(ecast165); - this->page_type = static_cast(ecast165); + int32_t ecast190; + xfer += iprot->readI32(ecast190); + this->page_type = static_cast(ecast190); isset_page_type = true; } else { xfer += iprot->skip(ftype); @@ -2771,9 +3240,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast166; - xfer += iprot->readI32(ecast166); - this->encoding = static_cast(ecast166); + int32_t ecast191; + xfer += iprot->readI32(ecast191); + this->encoding = static_cast(ecast191); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2860,9 +3329,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast171; - xfer += iprot->readI32(ecast171); - this->type = static_cast(ecast171); + int32_t ecast196; + xfer += iprot->readI32(ecast196); + this->type = static_cast(ecast196); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -2872,16 +3341,16 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encodings.clear(); - uint32_t _size172; - ::apache::thrift::protocol::TType _etype175; - xfer += iprot->readListBegin(_etype175, _size172); - this->encodings.resize(_size172); - uint32_t _i176; - for (_i176 = 0; _i176 < _size172; ++_i176) + uint32_t _size197; + ::apache::thrift::protocol::TType _etype200; + xfer += iprot->readListBegin(_etype200, _size197); + this->encodings.resize(_size197); + uint32_t _i201; + for (_i201 = 0; _i201 < _size197; ++_i201) { - int32_t ecast177; - xfer += iprot->readI32(ecast177); - this->encodings[_i176] = static_cast(ecast177); + int32_t ecast202; + xfer += iprot->readI32(ecast202); + this->encodings[_i201] = static_cast(ecast202); } xfer += iprot->readListEnd(); } @@ -2894,14 +3363,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size178; - ::apache::thrift::protocol::TType _etype181; - xfer += iprot->readListBegin(_etype181, _size178); - this->path_in_schema.resize(_size178); - uint32_t _i182; - for (_i182 = 0; _i182 < _size178; ++_i182) + uint32_t _size203; + ::apache::thrift::protocol::TType _etype206; + xfer += iprot->readListBegin(_etype206, _size203); + this->path_in_schema.resize(_size203); + uint32_t _i207; + for (_i207 = 0; _i207 < _size203; ++_i207) { - xfer += iprot->readString(this->path_in_schema[_i182]); + xfer += iprot->readString(this->path_in_schema[_i207]); } xfer += iprot->readListEnd(); } @@ -2912,9 +3381,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast183; - xfer += iprot->readI32(ecast183); - this->codec = static_cast(ecast183); + int32_t ecast208; + xfer += iprot->readI32(ecast208); + this->codec = static_cast(ecast208); isset_codec = true; } else { xfer += iprot->skip(ftype); @@ -2948,14 +3417,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size184; - ::apache::thrift::protocol::TType _etype187; - xfer += iprot->readListBegin(_etype187, _size184); - this->key_value_metadata.resize(_size184); - uint32_t _i188; - for (_i188 = 0; _i188 < _size184; ++_i188) + uint32_t _size209; + ::apache::thrift::protocol::TType _etype212; + xfer += iprot->readListBegin(_etype212, _size209); + this->key_value_metadata.resize(_size209); + uint32_t _i213; + for (_i213 = 0; _i213 < _size209; ++_i213) { - xfer += this->key_value_metadata[_i188].read(iprot); + xfer += this->key_value_metadata[_i213].read(iprot); } xfer += iprot->readListEnd(); } @@ -3000,14 +3469,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encoding_stats.clear(); - uint32_t _size189; - ::apache::thrift::protocol::TType _etype192; - xfer += iprot->readListBegin(_etype192, _size189); - this->encoding_stats.resize(_size189); - uint32_t _i193; - for (_i193 = 0; _i193 < _size189; ++_i193) + uint32_t _size214; + ::apache::thrift::protocol::TType _etype217; + xfer += iprot->readListBegin(_etype217, _size214); + this->encoding_stats.resize(_size214); + uint32_t _i218; + for (_i218 = 0; _i218 < _size214; ++_i218) { - xfer += this->encoding_stats[_i193].read(iprot); + xfer += this->encoding_stats[_i218].read(iprot); } xfer += iprot->readListEnd(); } @@ -3081,10 +3550,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter194; - for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) + std::vector ::const_iterator _iter219; + for (_iter219 = this->encodings.begin(); _iter219 != this->encodings.end(); ++_iter219) { - xfer += oprot->writeI32(static_cast((*_iter194))); + xfer += oprot->writeI32(static_cast((*_iter219))); } xfer += oprot->writeListEnd(); } @@ -3093,10 +3562,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter195; - for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) + std::vector ::const_iterator _iter220; + for (_iter220 = this->path_in_schema.begin(); _iter220 != this->path_in_schema.end(); ++_iter220) { - xfer += oprot->writeString((*_iter195)); + xfer += oprot->writeString((*_iter220)); } xfer += oprot->writeListEnd(); } @@ -3122,10 +3591,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter196; - for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) + std::vector ::const_iterator _iter221; + for (_iter221 = this->key_value_metadata.begin(); _iter221 != this->key_value_metadata.end(); ++_iter221) { - xfer += (*_iter196).write(oprot); + xfer += (*_iter221).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3154,10 +3623,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter197; - for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) + std::vector ::const_iterator _iter222; + for (_iter222 = this->encoding_stats.begin(); _iter222 != this->encoding_stats.end(); ++_iter222) { - xfer += (*_iter197).write(oprot); + xfer += (*_iter222).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3250,14 +3719,14 @@ uint32_t EncryptionWithColumnKey::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size206; - ::apache::thrift::protocol::TType _etype209; - xfer += iprot->readListBegin(_etype209, _size206); - this->path_in_schema.resize(_size206); - uint32_t _i210; - for (_i210 = 0; _i210 < _size206; ++_i210) + uint32_t _size231; + ::apache::thrift::protocol::TType _etype234; + xfer += iprot->readListBegin(_etype234, _size231); + this->path_in_schema.resize(_size231); + uint32_t _i235; + for (_i235 = 0; _i235 < _size231; ++_i235) { - xfer += iprot->readString(this->path_in_schema[_i210]); + xfer += iprot->readString(this->path_in_schema[_i235]); } xfer += iprot->readListEnd(); } @@ -3297,10 +3766,10 @@ uint32_t EncryptionWithColumnKey::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter211; - for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) + std::vector ::const_iterator _iter236; + for (_iter236 = this->path_in_schema.begin(); _iter236 != this->path_in_schema.end(); ++_iter236) { - xfer += oprot->writeString((*_iter211)); + xfer += oprot->writeString((*_iter236)); } xfer += oprot->writeListEnd(); } @@ -3580,14 +4049,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->columns.clear(); - uint32_t _size224; - ::apache::thrift::protocol::TType _etype227; - xfer += iprot->readListBegin(_etype227, _size224); - this->columns.resize(_size224); - uint32_t _i228; - for (_i228 = 0; _i228 < _size224; ++_i228) + uint32_t _size249; + ::apache::thrift::protocol::TType _etype252; + xfer += iprot->readListBegin(_etype252, _size249); + this->columns.resize(_size249); + uint32_t _i253; + for (_i253 = 0; _i253 < _size249; ++_i253) { - xfer += this->columns[_i228].read(iprot); + xfer += this->columns[_i253].read(iprot); } xfer += iprot->readListEnd(); } @@ -3616,14 +4085,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->sorting_columns.clear(); - uint32_t _size229; - ::apache::thrift::protocol::TType _etype232; - xfer += iprot->readListBegin(_etype232, _size229); - this->sorting_columns.resize(_size229); - uint32_t _i233; - for (_i233 = 0; _i233 < _size229; ++_i233) + uint32_t _size254; + ::apache::thrift::protocol::TType _etype257; + xfer += iprot->readListBegin(_etype257, _size254); + this->sorting_columns.resize(_size254); + uint32_t _i258; + for (_i258 = 0; _i258 < _size254; ++_i258) { - xfer += this->sorting_columns[_i233].read(iprot); + xfer += this->sorting_columns[_i258].read(iprot); } xfer += iprot->readListEnd(); } @@ -3683,10 +4152,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter234; - for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) + std::vector ::const_iterator _iter259; + for (_iter259 = this->columns.begin(); _iter259 != this->columns.end(); ++_iter259) { - xfer += (*_iter234).write(oprot); + xfer += (*_iter259).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3704,10 +4173,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter235; - for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) + std::vector ::const_iterator _iter260; + for (_iter260 = this->sorting_columns.begin(); _iter260 != this->sorting_columns.end(); ++_iter260) { - xfer += (*_iter235).write(oprot); + xfer += (*_iter260).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3948,14 +4417,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->page_locations.clear(); - uint32_t _size252; - ::apache::thrift::protocol::TType _etype255; - xfer += iprot->readListBegin(_etype255, _size252); - this->page_locations.resize(_size252); - uint32_t _i256; - for (_i256 = 0; _i256 < _size252; ++_i256) + uint32_t _size277; + ::apache::thrift::protocol::TType _etype280; + xfer += iprot->readListBegin(_etype280, _size277); + this->page_locations.resize(_size277); + uint32_t _i281; + for (_i281 = 0; _i281 < _size277; ++_i281) { - xfer += this->page_locations[_i256].read(iprot); + xfer += this->page_locations[_i281].read(iprot); } xfer += iprot->readListEnd(); } @@ -3968,14 +4437,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size257; - ::apache::thrift::protocol::TType _etype260; - xfer += iprot->readListBegin(_etype260, _size257); - this->unencoded_byte_array_data_bytes.resize(_size257); - uint32_t _i261; - for (_i261 = 0; _i261 < _size257; ++_i261) + uint32_t _size282; + ::apache::thrift::protocol::TType _etype285; + xfer += iprot->readListBegin(_etype285, _size282); + this->unencoded_byte_array_data_bytes.resize(_size282); + uint32_t _i286; + for (_i286 = 0; _i286 < _size282; ++_i286) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i286]); } xfer += iprot->readListEnd(); } @@ -4007,10 +4476,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter262; - for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) + std::vector ::const_iterator _iter287; + for (_iter287 = this->page_locations.begin(); _iter287 != this->page_locations.end(); ++_iter287) { - xfer += (*_iter262).write(oprot); + xfer += (*_iter287).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4020,10 +4489,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter263; - for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) + std::vector ::const_iterator _iter288; + for (_iter288 = this->unencoded_byte_array_data_bytes.begin(); _iter288 != this->unencoded_byte_array_data_bytes.end(); ++_iter288) { - xfer += oprot->writeI64((*_iter263)); + xfer += oprot->writeI64((*_iter288)); } xfer += oprot->writeListEnd(); } @@ -4064,14 +4533,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_pages.clear(); - uint32_t _size268; - ::apache::thrift::protocol::TType _etype271; - xfer += iprot->readListBegin(_etype271, _size268); - this->null_pages.resize(_size268); - uint32_t _i272; - for (_i272 = 0; _i272 < _size268; ++_i272) + uint32_t _size293; + ::apache::thrift::protocol::TType _etype296; + xfer += iprot->readListBegin(_etype296, _size293); + this->null_pages.resize(_size293); + uint32_t _i297; + for (_i297 = 0; _i297 < _size293; ++_i297) { - xfer += iprot->readBool(this->null_pages[_i272]); + xfer += iprot->readBool(this->null_pages[_i297]); } xfer += iprot->readListEnd(); } @@ -4084,14 +4553,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->min_values.clear(); - uint32_t _size273; - ::apache::thrift::protocol::TType _etype276; - xfer += iprot->readListBegin(_etype276, _size273); - this->min_values.resize(_size273); - uint32_t _i277; - for (_i277 = 0; _i277 < _size273; ++_i277) + uint32_t _size298; + ::apache::thrift::protocol::TType _etype301; + xfer += iprot->readListBegin(_etype301, _size298); + this->min_values.resize(_size298); + uint32_t _i302; + for (_i302 = 0; _i302 < _size298; ++_i302) { - xfer += iprot->readBinary(this->min_values[_i277]); + xfer += iprot->readBinary(this->min_values[_i302]); } xfer += iprot->readListEnd(); } @@ -4104,14 +4573,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->max_values.clear(); - uint32_t _size278; - ::apache::thrift::protocol::TType _etype281; - xfer += iprot->readListBegin(_etype281, _size278); - this->max_values.resize(_size278); - uint32_t _i282; - for (_i282 = 0; _i282 < _size278; ++_i282) + uint32_t _size303; + ::apache::thrift::protocol::TType _etype306; + xfer += iprot->readListBegin(_etype306, _size303); + this->max_values.resize(_size303); + uint32_t _i307; + for (_i307 = 0; _i307 < _size303; ++_i307) { - xfer += iprot->readBinary(this->max_values[_i282]); + xfer += iprot->readBinary(this->max_values[_i307]); } xfer += iprot->readListEnd(); } @@ -4122,9 +4591,9 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast283; - xfer += iprot->readI32(ecast283); - this->boundary_order = static_cast(ecast283); + int32_t ecast308; + xfer += iprot->readI32(ecast308); + this->boundary_order = static_cast(ecast308); isset_boundary_order = true; } else { xfer += iprot->skip(ftype); @@ -4134,14 +4603,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_counts.clear(); - uint32_t _size284; - ::apache::thrift::protocol::TType _etype287; - xfer += iprot->readListBegin(_etype287, _size284); - this->null_counts.resize(_size284); - uint32_t _i288; - for (_i288 = 0; _i288 < _size284; ++_i288) + uint32_t _size309; + ::apache::thrift::protocol::TType _etype312; + xfer += iprot->readListBegin(_etype312, _size309); + this->null_counts.resize(_size309); + uint32_t _i313; + for (_i313 = 0; _i313 < _size309; ++_i313) { - xfer += iprot->readI64(this->null_counts[_i288]); + xfer += iprot->readI64(this->null_counts[_i313]); } xfer += iprot->readListEnd(); } @@ -4154,14 +4623,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->repetition_level_histograms.clear(); - uint32_t _size289; - ::apache::thrift::protocol::TType _etype292; - xfer += iprot->readListBegin(_etype292, _size289); - this->repetition_level_histograms.resize(_size289); - uint32_t _i293; - for (_i293 = 0; _i293 < _size289; ++_i293) + uint32_t _size314; + ::apache::thrift::protocol::TType _etype317; + xfer += iprot->readListBegin(_etype317, _size314); + this->repetition_level_histograms.resize(_size314); + uint32_t _i318; + for (_i318 = 0; _i318 < _size314; ++_i318) { - xfer += iprot->readI64(this->repetition_level_histograms[_i293]); + xfer += iprot->readI64(this->repetition_level_histograms[_i318]); } xfer += iprot->readListEnd(); } @@ -4174,14 +4643,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->definition_level_histograms.clear(); - uint32_t _size294; - ::apache::thrift::protocol::TType _etype297; - xfer += iprot->readListBegin(_etype297, _size294); - this->definition_level_histograms.resize(_size294); - uint32_t _i298; - for (_i298 = 0; _i298 < _size294; ++_i298) + uint32_t _size319; + ::apache::thrift::protocol::TType _etype322; + xfer += iprot->readListBegin(_etype322, _size319); + this->definition_level_histograms.resize(_size319); + uint32_t _i323; + for (_i323 = 0; _i323 < _size319; ++_i323) { - xfer += iprot->readI64(this->definition_level_histograms[_i298]); + xfer += iprot->readI64(this->definition_level_histograms[_i323]); } xfer += iprot->readListEnd(); } @@ -4190,6 +4659,26 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 8: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->geometry_stats.clear(); + uint32_t _size324; + ::apache::thrift::protocol::TType _etype327; + xfer += iprot->readListBegin(_etype327, _size324); + this->geometry_stats.resize(_size324); + uint32_t _i328; + for (_i328 = 0; _i328 < _size324; ++_i328) + { + xfer += this->geometry_stats[_i328].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.geometry_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -4219,10 +4708,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter299; - for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) + std::vector ::const_iterator _iter329; + for (_iter329 = this->null_pages.begin(); _iter329 != this->null_pages.end(); ++_iter329) { - xfer += oprot->writeBool((*_iter299)); + xfer += oprot->writeBool((*_iter329)); } xfer += oprot->writeListEnd(); } @@ -4231,10 +4720,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter300; - for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) + std::vector ::const_iterator _iter330; + for (_iter330 = this->min_values.begin(); _iter330 != this->min_values.end(); ++_iter330) { - xfer += oprot->writeBinary((*_iter300)); + xfer += oprot->writeBinary((*_iter330)); } xfer += oprot->writeListEnd(); } @@ -4243,10 +4732,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter301; - for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) + std::vector ::const_iterator _iter331; + for (_iter331 = this->max_values.begin(); _iter331 != this->max_values.end(); ++_iter331) { - xfer += oprot->writeBinary((*_iter301)); + xfer += oprot->writeBinary((*_iter331)); } xfer += oprot->writeListEnd(); } @@ -4260,10 +4749,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter302; - for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) + std::vector ::const_iterator _iter332; + for (_iter332 = this->null_counts.begin(); _iter332 != this->null_counts.end(); ++_iter332) { - xfer += oprot->writeI64((*_iter302)); + xfer += oprot->writeI64((*_iter332)); } xfer += oprot->writeListEnd(); } @@ -4273,10 +4762,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter303; - for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) + std::vector ::const_iterator _iter333; + for (_iter333 = this->repetition_level_histograms.begin(); _iter333 != this->repetition_level_histograms.end(); ++_iter333) { - xfer += oprot->writeI64((*_iter303)); + xfer += oprot->writeI64((*_iter333)); } xfer += oprot->writeListEnd(); } @@ -4286,10 +4775,23 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter304; - for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) + std::vector ::const_iterator _iter334; + for (_iter334 = this->definition_level_histograms.begin(); _iter334 != this->definition_level_histograms.end(); ++_iter334) { - xfer += oprot->writeI64((*_iter304)); + xfer += oprot->writeI64((*_iter334)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.geometry_stats) { + xfer += oprot->writeFieldBegin("geometry_stats", ::apache::thrift::protocol::T_LIST, 8); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->geometry_stats.size())); + std::vector ::const_iterator _iter335; + for (_iter335 = this->geometry_stats.begin(); _iter335 != this->geometry_stats.end(); ++_iter335) + { + xfer += (*_iter335).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4577,14 +5079,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size321; - ::apache::thrift::protocol::TType _etype324; - xfer += iprot->readListBegin(_etype324, _size321); - this->schema.resize(_size321); - uint32_t _i325; - for (_i325 = 0; _i325 < _size321; ++_i325) + uint32_t _size352; + ::apache::thrift::protocol::TType _etype355; + xfer += iprot->readListBegin(_etype355, _size352); + this->schema.resize(_size352); + uint32_t _i356; + for (_i356 = 0; _i356 < _size352; ++_i356) { - xfer += this->schema[_i325].read(iprot); + xfer += this->schema[_i356].read(iprot); } xfer += iprot->readListEnd(); } @@ -4605,14 +5107,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size326; - ::apache::thrift::protocol::TType _etype329; - xfer += iprot->readListBegin(_etype329, _size326); - this->row_groups.resize(_size326); - uint32_t _i330; - for (_i330 = 0; _i330 < _size326; ++_i330) + uint32_t _size357; + ::apache::thrift::protocol::TType _etype360; + xfer += iprot->readListBegin(_etype360, _size357); + this->row_groups.resize(_size357); + uint32_t _i361; + for (_i361 = 0; _i361 < _size357; ++_i361) { - xfer += this->row_groups[_i330].read(iprot); + xfer += this->row_groups[_i361].read(iprot); } xfer += iprot->readListEnd(); } @@ -4625,14 +5127,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size331; - ::apache::thrift::protocol::TType _etype334; - xfer += iprot->readListBegin(_etype334, _size331); - this->key_value_metadata.resize(_size331); - uint32_t _i335; - for (_i335 = 0; _i335 < _size331; ++_i335) + uint32_t _size362; + ::apache::thrift::protocol::TType _etype365; + xfer += iprot->readListBegin(_etype365, _size362); + this->key_value_metadata.resize(_size362); + uint32_t _i366; + for (_i366 = 0; _i366 < _size362; ++_i366) { - xfer += this->key_value_metadata[_i335].read(iprot); + xfer += this->key_value_metadata[_i366].read(iprot); } xfer += iprot->readListEnd(); } @@ -4653,14 +5155,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->column_orders.clear(); - uint32_t _size336; - ::apache::thrift::protocol::TType _etype339; - xfer += iprot->readListBegin(_etype339, _size336); - this->column_orders.resize(_size336); - uint32_t _i340; - for (_i340 = 0; _i340 < _size336; ++_i340) + uint32_t _size367; + ::apache::thrift::protocol::TType _etype370; + xfer += iprot->readListBegin(_etype370, _size367); + this->column_orders.resize(_size367); + uint32_t _i371; + for (_i371 = 0; _i371 < _size367; ++_i371) { - xfer += this->column_orders[_i340].read(iprot); + xfer += this->column_orders[_i371].read(iprot); } xfer += iprot->readListEnd(); } @@ -4718,10 +5220,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter341; - for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) + std::vector ::const_iterator _iter372; + for (_iter372 = this->schema.begin(); _iter372 != this->schema.end(); ++_iter372) { - xfer += (*_iter341).write(oprot); + xfer += (*_iter372).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4734,10 +5236,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter342; - for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) + std::vector ::const_iterator _iter373; + for (_iter373 = this->row_groups.begin(); _iter373 != this->row_groups.end(); ++_iter373) { - xfer += (*_iter342).write(oprot); + xfer += (*_iter373).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4747,10 +5249,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter343; - for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) + std::vector ::const_iterator _iter374; + for (_iter374 = this->key_value_metadata.begin(); _iter374 != this->key_value_metadata.end(); ++_iter374) { - xfer += (*_iter343).write(oprot); + xfer += (*_iter374).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4765,10 +5267,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter344; - for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) + std::vector ::const_iterator _iter375; + for (_iter375 = this->column_orders.begin(); _iter375 != this->column_orders.end(); ++_iter375) { - xfer += (*_iter344).write(oprot); + xfer += (*_iter375).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4861,7 +5363,7 @@ uint32_t FileCryptoMetaData::write(Protocol_* oprot) const { xfer += oprot->writeStructEnd(); return xfer; } - -}} // namespace +} +} // namespace #endif From 6acb2f290b4ca13fc0fbd7254f84d4ee90aa09a2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 29 Jun 2024 22:24:35 -0300 Subject: [PATCH 03/61] add stubs --- cpp/src/parquet/types.cc | 71 ++++++++++++++++++++++++++++++++++++++++ cpp/src/parquet/types.h | 33 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 7b50ed48d06b0..bf5f556e633d7 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -520,6 +520,12 @@ std::shared_ptr LogicalType::Float16() { return Float16LogicalType::Make(); } +std::shared_ptr LogicalType::Geometry( + std::string crs, LogicalType::GeometryEdges::edges edges, + LogicalType::GeometryEncoding::geometry_encoding encoding, std::string metadata) { + return GeometryLogicalType::Make(std::move(crs), edges, encoding, std::move(metadata)); +} + std::shared_ptr LogicalType::None() { return NoLogicalType::Make(); } /* @@ -602,6 +608,7 @@ class LogicalType::Impl { class BSON; class UUID; class Float16; + class Geometry; class No; class Undefined; @@ -674,6 +681,9 @@ bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::U bool LogicalType::is_float16() const { return impl_->type() == LogicalType::Type::FLOAT16; } +bool LogicalType::is_geometry() const { + return impl_->type() == LogicalType::Type::GEOMETRY; +} bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; } bool LogicalType::is_valid() const { return impl_->type() != LogicalType::Type::UNDEFINED; @@ -1603,6 +1613,67 @@ class LogicalType::Impl::Float16 final : public LogicalType::Impl::Incompatible, GENERATE_MAKE(Float16) +class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible, + public LogicalType::Impl::SimpleApplicable { + public: + friend class GeometryLogicalType; + + std::string ToString() const override { throw std::runtime_error("not implemented"); } + std::string ToJSON() const override { throw std::runtime_error("not implemented"); } + format::LogicalType ToThrift() const override { + throw std::runtime_error("not implemented"); + } + bool Equals(const LogicalType& other) const override { + throw std::runtime_error("not implemented"); + } + + const std::string& crs() const { return crs_; } + LogicalType::GeometryEdges::edges edges() const { return edges_; } + LogicalType::GeometryEncoding::geometry_encoding encoding() const { return encoding_; } + const std::string& metadata() const { return metadata_; } + + private: + Geometry(std::string crs, LogicalType::GeometryEdges::edges edges, + LogicalType::GeometryEncoding::geometry_encoding encoding, + std::string metadata) + : LogicalType::Impl(LogicalType::Type::GEOMETRY, SortOrder::UNKNOWN), + LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY), + crs_(std::move(crs)), + edges_(edges), + encoding_(encoding), + metadata_(std::move(metadata)) {} + + std::string crs_; + LogicalType::GeometryEdges::edges edges_; + LogicalType::GeometryEncoding::geometry_encoding encoding_; + std::string metadata_; +}; + +const std::string& GeometryLogicalType::crs() const { + return (dynamic_cast(*impl_)).crs(); +} + +LogicalType::GeometryEdges::edges GeometryLogicalType::edges() const { + return (dynamic_cast(*impl_)).edges(); +} + +LogicalType::GeometryEncoding::geometry_encoding GeometryLogicalType::encoding() const { + return (dynamic_cast(*impl_)).encoding(); +} + +const std::string& GeometryLogicalType::metadata() const { + return (dynamic_cast(*impl_)).metadata(); +} + +std::shared_ptr GeometryLogicalType::Make( + std::string crs, LogicalType::GeometryEdges::edges edges, + LogicalType::GeometryEncoding::geometry_encoding encoding, std::string metadata) { + auto* logical_type = new GeometryLogicalType(); + logical_type->impl_.reset(new LogicalType::Impl::Geometry( + std::move(crs), edges, encoding, std::move(metadata))); + return std::shared_ptr(logical_type); +} + class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible, public LogicalType::Impl::UniversalApplicable { public: diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 70a1c4b73ec45..cc107dfa633b2 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -158,6 +158,7 @@ class PARQUET_EXPORT LogicalType { BSON, UUID, FLOAT16, + GEOMETRY, NONE // Not a real logical type; should always be last element }; }; @@ -166,6 +167,14 @@ class PARQUET_EXPORT LogicalType { enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS }; }; + struct GeometryEncoding { + enum geometry_encoding { WKB = 0 }; + }; + + struct GeometryEdges { + enum edges { PLANAR = 0, SPHERICAL = 1 }; + }; + /// \brief If possible, return a logical type equivalent to the given legacy /// converted type (and decimal metadata if applicable). static std::shared_ptr FromConvertedType( @@ -213,6 +222,12 @@ class PARQUET_EXPORT LogicalType { static std::shared_ptr UUID(); static std::shared_ptr Float16(); + static std::shared_ptr Geometry( + std::string crs = "", + LogicalType::GeometryEdges::edges edges = GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::geometry_encoding encoding = GeometryEncoding::WKB, + std::string metadata = ""); + /// \brief Create a placeholder for when no logical type is specified static std::shared_ptr None(); @@ -266,6 +281,7 @@ class PARQUET_EXPORT LogicalType { bool is_BSON() const; bool is_UUID() const; bool is_float16() const; + bool is_geometry() const; bool is_none() const; /// \brief Return true if this logical type is of a known type. bool is_valid() const; @@ -446,6 +462,23 @@ class PARQUET_EXPORT Float16LogicalType : public LogicalType { Float16LogicalType() = default; }; +class PARQUET_EXPORT GeometryLogicalType : public LogicalType { + public: + static std::shared_ptr Make( + std::string crs = "", + LogicalType::GeometryEdges::edges edges = GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::geometry_encoding encoding = GeometryEncoding::WKB, + std::string metadata = ""); + + const std::string& crs() const; + LogicalType::GeometryEdges::edges edges() const; + LogicalType::GeometryEncoding::geometry_encoding encoding() const; + const std::string& metadata() const; + + private: + GeometryLogicalType() = default; +}; + /// \brief Allowed for any physical type. class PARQUET_EXPORT NoLogicalType : public LogicalType { public: From 6ff585535af180ad7e2fd78d002781ecebdc4a5c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 29 Jun 2024 22:35:01 -0300 Subject: [PATCH 04/61] split methods out of line --- cpp/src/parquet/types.cc | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index bf5f556e633d7..fd8c72e082fdb 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -1618,14 +1618,10 @@ class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible public: friend class GeometryLogicalType; - std::string ToString() const override { throw std::runtime_error("not implemented"); } - std::string ToJSON() const override { throw std::runtime_error("not implemented"); } - format::LogicalType ToThrift() const override { - throw std::runtime_error("not implemented"); - } - bool Equals(const LogicalType& other) const override { - throw std::runtime_error("not implemented"); - } + std::string ToString() const override; + std::string ToJSON() const override; + format::LogicalType ToThrift() const override; + bool Equals(const LogicalType& other) const override; const std::string& crs() const { return crs_; } LogicalType::GeometryEdges::edges edges() const { return edges_; } @@ -1649,6 +1645,22 @@ class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible std::string metadata_; }; +std::string LogicalType::Impl::Geometry::ToString() const { + throw std::runtime_error("not implemented"); +} + +std::string LogicalType::Impl::Geometry::ToJSON() const { + throw std::runtime_error("not implemented"); +} + +format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { + throw std::runtime_error("not implemented"); +} + +bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { + throw std::runtime_error("not implemented"); +} + const std::string& GeometryLogicalType::crs() const { return (dynamic_cast(*impl_)).crs(); } From 0ac5d84737ebbfe2cafa82081113e1829bfb60da Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 9 Jul 2024 12:38:20 -0700 Subject: [PATCH 05/61] maybe to/from thrift --- cpp/src/parquet/types.cc | 51 +++++++++++++++++++++++++++++++++++++++- cpp/src/parquet/types.h | 4 ++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index fd8c72e082fdb..a54e83984e3b7 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -463,6 +463,31 @@ std::shared_ptr LogicalType::FromThrift( return UUIDLogicalType::Make(); } else if (type.__isset.FLOAT16) { return Float16LogicalType::Make(); + } else if (type.__isset.GEOMETRY) { + std::string crs; + if (type.GEOMETRY.__isset.crs) { + crs = type.GEOMETRY.crs; + } + + LogicalType::GeometryEdges::edges edges = LogicalType::GeometryEdges::UNKNOWN; + if (type.GEOMETRY.edges == format::Edges::PLANAR) { + edges = LogicalType::GeometryEdges::PLANAR; + } else if (type.GEOMETRY.edges == format::Edges::SPHERICAL) { + edges = LogicalType::GeometryEdges::SPHERICAL; + } + + LogicalType::GeometryEncoding::geometry_encoding encoding = + LogicalType::GeometryEncoding::UNKNOWN; + if (type.GEOMETRY.encoding == format::GeometryEncoding::WKB) { + encoding = LogicalType::GeometryEncoding::WKB; + } + + std::string metadata; + if (type.GEOMETRY.__isset.crs) { + metadata = type.GEOMETRY.metadata; + } + + return GeometryLogicalType::Make(crs, edges, encoding, metadata); } else { throw ParquetException("Metadata contains Thrift LogicalType that is not recognized"); } @@ -1654,7 +1679,31 @@ std::string LogicalType::Impl::Geometry::ToJSON() const { } format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { - throw std::runtime_error("not implemented"); + format::LogicalType type; + format::GeometryType geometry_type; + + // Canonially export crs of "" as an unset CRS + if (crs_.size() > 0) { + geometry_type.__set_crs(crs_); + } + + DCHECK(edges_ != LogicalType::GeometryEdges::UNKNOWN); + if (edges_ == LogicalType::GeometryEdges::SPHERICAL) { + geometry_type.__set_edges(format::Edges::SPHERICAL); + } else { + geometry_type.__set_edges(format::Edges::PLANAR); + } + + DCHECK_EQ(encoding_, LogicalType::GeometryEncoding::WKB); + geometry_type.__set_encoding(format::GeometryEncoding::WKB); + + // Canonically export empty metadata as unset + if (metadata_.size() > 0) { + geometry_type.__set_metadata(metadata_); + } + + type.__set_GEOMETRY(geometry_type); + return type; } bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index cc107dfa633b2..d708a558943a3 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -168,11 +168,11 @@ class PARQUET_EXPORT LogicalType { }; struct GeometryEncoding { - enum geometry_encoding { WKB = 0 }; + enum geometry_encoding { UNKNOWN = 0, WKB = 1 }; }; struct GeometryEdges { - enum edges { PLANAR = 0, SPHERICAL = 1 }; + enum edges { UNKNOWN = 0, PLANAR = 1, SPHERICAL = 2 }; }; /// \brief If possible, return a logical type equivalent to the given legacy From 8a80ac77179030776d131cde621d5d0162282b7a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 9 Jul 2024 16:53:41 -0700 Subject: [PATCH 06/61] a few more serializers --- cpp/src/parquet/types.cc | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index a54e83984e3b7..0b3e4b637d9c5 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -1638,6 +1638,14 @@ class LogicalType::Impl::Float16 final : public LogicalType::Impl::Incompatible, GENERATE_MAKE(Float16) +#define geometry_edges_string(u___) \ + ((u___) == LogicalType::GeometryEdges::PLANAR \ + ? "planar" \ + : ((u___) == LogicalType::GeometryEdges::SPHERICAL ? "spherical" : "unknown")) + +#define geometry_encoding_string(u___) \ + ((u___) == LogicalType::GeometryEncoding::WKB ? "wkb" : "unknown") + class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible, public LogicalType::Impl::SimpleApplicable { public: @@ -1671,11 +1679,32 @@ class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible }; std::string LogicalType::Impl::Geometry::ToString() const { - throw std::runtime_error("not implemented"); + std::stringstream type; + type << "Geometry(crs=" << crs_ << ", edges=" << geometry_edges_string(edges_) + << ", encoding=" << geometry_encoding_string(encoding_) + << ", metadata=" << metadata_ << ")"; + return type.str(); } std::string LogicalType::Impl::Geometry::ToJSON() const { - throw std::runtime_error("not implemented"); + std::stringstream json; + json << R"({"Type": "Geometry")"; + + if (crs_.size() > 0) { + // TODO(paleolimbot): we'll need to escape the crs or assume that it's valid JSON + json << R"(, "crs": )" << crs_; + } + + json << R"(, "edges": )" << geometry_edges_string(edges_); + json << R"(, "encoding": )" << geometry_encoding_string(encoding_); + + if (metadata_.size() > 0) { + // TODO(paleolimbot): we'll need to escape the metadata or assume that it's valid JSON + json << R"(, "metadata": )" << crs_; + } + + json << "}"; + return json.str(); } format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { From f3309972668c331ac7adda610b032652459842a9 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 6 Aug 2024 23:33:29 -0300 Subject: [PATCH 07/61] add basic test for serialization --- cpp/src/parquet/schema_test.cc | 5 +++++ cpp/src/parquet/types.cc | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index 2532a8656e69f..a3f367a49322c 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1544,6 +1544,11 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) { {LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"}, {LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"}, {LogicalType::Float16(), "Float16", R"({"Type": "Float16"})"}, + {LogicalType::Geometry(), "Geometry(crs=, edges=planar, encoding=wkb, metadata=)", + R"({"Type": "Geometry", "edges": "planar", "encoding": "wkb"})"}, + {LogicalType::Geometry("{}", LogicalType::GeometryEdges::SPHERICAL), + "Geometry(crs={}, edges=spherical, encoding=wkb, metadata=)", + R"({"Type": "Geometry", "crs": {}, "edges": "spherical", "encoding": "wkb"})"}, {LogicalType::None(), "None", R"({"Type": "None"})"}, }; diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 0b3e4b637d9c5..e9e339adaebfb 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -1695,8 +1695,8 @@ std::string LogicalType::Impl::Geometry::ToJSON() const { json << R"(, "crs": )" << crs_; } - json << R"(, "edges": )" << geometry_edges_string(edges_); - json << R"(, "encoding": )" << geometry_encoding_string(encoding_); + json << R"(, "edges": ")" << geometry_edges_string(edges_) << R"(")"; + json << R"(, "encoding": ")" << geometry_encoding_string(encoding_) << R"(")"; if (metadata_.size() > 0) { // TODO(paleolimbot): we'll need to escape the metadata or assume that it's valid JSON From b34b1c271bd99d2d33183dd14f909b5f9d377632 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 6 Aug 2024 23:37:11 -0300 Subject: [PATCH 08/61] add sort order check --- cpp/src/parquet/schema_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index a3f367a49322c..381c2e730facd 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1599,6 +1599,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) { {LogicalType::BSON(), SortOrder::UNSIGNED}, {LogicalType::UUID(), SortOrder::UNSIGNED}, {LogicalType::Float16(), SortOrder::SIGNED}, + {LogicalType::Geometry(), SortOrder::UNKNOWN}, {LogicalType::None(), SortOrder::UNKNOWN}}; for (const ExpectedSortOrder& c : cases) { From 7e9f9b288f588f8fc5e0026deb68e9c176ef8636 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 12 Aug 2024 16:21:35 -0300 Subject: [PATCH 09/61] update thrift --- cpp/src/generated/parquet_types.cpp | 2219 ++++++++++++++------------- cpp/src/generated/parquet_types.h | 62 +- cpp/src/generated/parquet_types.tcc | 596 +++---- cpp/src/parquet/parquet.thrift | 31 +- 4 files changed, 1489 insertions(+), 1419 deletions(-) diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 565e5f57622e1..f580c7905e8b4 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -772,12 +772,12 @@ Covering::~Covering() noexcept { } -void Covering::__set_geometry(const std::string& val) { - this->geometry = val; +void Covering::__set_kind(const std::string& val) { + this->kind = val; } -void Covering::__set_edges(const Edges::type val) { - this->edges = val; +void Covering::__set_value(const std::string& val) { + this->value = val; } std::ostream& operator<<(std::ostream& out, const Covering& obj) { @@ -788,33 +788,33 @@ std::ostream& operator<<(std::ostream& out, const Covering& obj) void swap(Covering &a, Covering &b) { using ::std::swap; - swap(a.geometry, b.geometry); - swap(a.edges, b.edges); + swap(a.kind, b.kind); + swap(a.value, b.value); } -Covering::Covering(const Covering& other17) { - geometry = other17.geometry; - edges = other17.edges; +Covering::Covering(const Covering& other16) { + kind = other16.kind; + value = other16.value; } -Covering::Covering(Covering&& other18) noexcept { - geometry = std::move(other18.geometry); - edges = other18.edges; +Covering::Covering(Covering&& other17) noexcept { + kind = std::move(other17.kind); + value = std::move(other17.value); } -Covering& Covering::operator=(const Covering& other19) { - geometry = other19.geometry; - edges = other19.edges; +Covering& Covering::operator=(const Covering& other18) { + kind = other18.kind; + value = other18.value; return *this; } -Covering& Covering::operator=(Covering&& other20) noexcept { - geometry = std::move(other20.geometry); - edges = other20.edges; +Covering& Covering::operator=(Covering&& other19) noexcept { + kind = std::move(other19.kind); + value = std::move(other19.value); return *this; } void Covering::printTo(std::ostream& out) const { using ::apache::thrift::to_string; out << "Covering("; - out << "geometry=" << to_string(geometry); - out << ", " << "edges=" << to_string(edges); + out << "kind=" << to_string(kind); + out << ", " << "value=" << to_string(value); out << ")"; } @@ -878,7 +878,18 @@ void swap(BoundingBox &a, BoundingBox &b) { swap(a.__isset, b.__isset); } -BoundingBox::BoundingBox(const BoundingBox& other21) noexcept { +BoundingBox::BoundingBox(const BoundingBox& other20) noexcept { + xmin = other20.xmin; + xmax = other20.xmax; + ymin = other20.ymin; + ymax = other20.ymax; + zmin = other20.zmin; + zmax = other20.zmax; + mmin = other20.mmin; + mmax = other20.mmax; + __isset = other20.__isset; +} +BoundingBox::BoundingBox(BoundingBox&& other21) noexcept { xmin = other21.xmin; xmax = other21.xmax; ymin = other21.ymin; @@ -889,7 +900,7 @@ BoundingBox::BoundingBox(const BoundingBox& other21) noexcept { mmax = other21.mmax; __isset = other21.__isset; } -BoundingBox::BoundingBox(BoundingBox&& other22) noexcept { +BoundingBox& BoundingBox::operator=(const BoundingBox& other22) noexcept { xmin = other22.xmin; xmax = other22.xmax; ymin = other22.ymin; @@ -899,8 +910,9 @@ BoundingBox::BoundingBox(BoundingBox&& other22) noexcept { mmin = other22.mmin; mmax = other22.mmax; __isset = other22.__isset; + return *this; } -BoundingBox& BoundingBox::operator=(const BoundingBox& other23) noexcept { +BoundingBox& BoundingBox::operator=(BoundingBox&& other23) noexcept { xmin = other23.xmin; xmax = other23.xmax; ymin = other23.ymin; @@ -912,18 +924,6 @@ BoundingBox& BoundingBox::operator=(const BoundingBox& other23) noexcept { __isset = other23.__isset; return *this; } -BoundingBox& BoundingBox::operator=(BoundingBox&& other24) noexcept { - xmin = other24.xmin; - xmax = other24.xmax; - ymin = other24.ymin; - ymax = other24.ymax; - zmin = other24.zmin; - zmax = other24.zmax; - mmin = other24.mmin; - mmax = other24.mmax; - __isset = other24.__isset; - return *this; -} void BoundingBox::printTo(std::ostream& out) const { using ::apache::thrift::to_string; out << "BoundingBox("; @@ -948,9 +948,9 @@ void GeometryStatistics::__set_bbox(const BoundingBox& val) { __isset.bbox = true; } -void GeometryStatistics::__set_covering(const Covering& val) { - this->covering = val; -__isset.covering = true; +void GeometryStatistics::__set_coverings(const std::vector & val) { + this->coverings = val; +__isset.coverings = true; } void GeometryStatistics::__set_geometry_types(const std::vector & val) { @@ -967,42 +967,42 @@ std::ostream& operator<<(std::ostream& out, const GeometryStatistics& obj) void swap(GeometryStatistics &a, GeometryStatistics &b) { using ::std::swap; swap(a.bbox, b.bbox); - swap(a.covering, b.covering); + swap(a.coverings, b.coverings); swap(a.geometry_types, b.geometry_types); swap(a.__isset, b.__isset); } -GeometryStatistics::GeometryStatistics(const GeometryStatistics& other31) { - bbox = other31.bbox; - covering = other31.covering; - geometry_types = other31.geometry_types; - __isset = other31.__isset; +GeometryStatistics::GeometryStatistics(const GeometryStatistics& other36) { + bbox = other36.bbox; + coverings = other36.coverings; + geometry_types = other36.geometry_types; + __isset = other36.__isset; } -GeometryStatistics::GeometryStatistics(GeometryStatistics&& other32) noexcept { - bbox = std::move(other32.bbox); - covering = std::move(other32.covering); - geometry_types = std::move(other32.geometry_types); - __isset = other32.__isset; +GeometryStatistics::GeometryStatistics(GeometryStatistics&& other37) noexcept { + bbox = std::move(other37.bbox); + coverings = std::move(other37.coverings); + geometry_types = std::move(other37.geometry_types); + __isset = other37.__isset; } -GeometryStatistics& GeometryStatistics::operator=(const GeometryStatistics& other33) { - bbox = other33.bbox; - covering = other33.covering; - geometry_types = other33.geometry_types; - __isset = other33.__isset; +GeometryStatistics& GeometryStatistics::operator=(const GeometryStatistics& other38) { + bbox = other38.bbox; + coverings = other38.coverings; + geometry_types = other38.geometry_types; + __isset = other38.__isset; return *this; } -GeometryStatistics& GeometryStatistics::operator=(GeometryStatistics&& other34) noexcept { - bbox = std::move(other34.bbox); - covering = std::move(other34.covering); - geometry_types = std::move(other34.geometry_types); - __isset = other34.__isset; +GeometryStatistics& GeometryStatistics::operator=(GeometryStatistics&& other39) noexcept { + bbox = std::move(other39.bbox); + coverings = std::move(other39.coverings); + geometry_types = std::move(other39.geometry_types); + __isset = other39.__isset; return *this; } void GeometryStatistics::printTo(std::ostream& out) const { using ::apache::thrift::to_string; out << "GeometryStatistics("; out << "bbox="; (__isset.bbox ? (out << to_string(bbox)) : (out << "")); - out << ", " << "covering="; (__isset.covering ? (out << to_string(covering)) : (out << "")); + out << ", " << "coverings="; (__isset.coverings ? (out << to_string(coverings)) : (out << "")); out << ", " << "geometry_types="; (__isset.geometry_types ? (out << to_string(geometry_types)) : (out << "")); out << ")"; } @@ -1077,54 +1077,54 @@ void swap(Statistics &a, Statistics &b) { swap(a.__isset, b.__isset); } -Statistics::Statistics(const Statistics& other35) { - max = other35.max; - min = other35.min; - null_count = other35.null_count; - distinct_count = other35.distinct_count; - max_value = other35.max_value; - min_value = other35.min_value; - is_max_value_exact = other35.is_max_value_exact; - is_min_value_exact = other35.is_min_value_exact; - geometry_stats = other35.geometry_stats; - __isset = other35.__isset; -} -Statistics::Statistics(Statistics&& other36) noexcept { - max = std::move(other36.max); - min = std::move(other36.min); - null_count = other36.null_count; - distinct_count = other36.distinct_count; - max_value = std::move(other36.max_value); - min_value = std::move(other36.min_value); - is_max_value_exact = other36.is_max_value_exact; - is_min_value_exact = other36.is_min_value_exact; - geometry_stats = std::move(other36.geometry_stats); - __isset = other36.__isset; -} -Statistics& Statistics::operator=(const Statistics& other37) { - max = other37.max; - min = other37.min; - null_count = other37.null_count; - distinct_count = other37.distinct_count; - max_value = other37.max_value; - min_value = other37.min_value; - is_max_value_exact = other37.is_max_value_exact; - is_min_value_exact = other37.is_min_value_exact; - geometry_stats = other37.geometry_stats; - __isset = other37.__isset; +Statistics::Statistics(const Statistics& other40) { + max = other40.max; + min = other40.min; + null_count = other40.null_count; + distinct_count = other40.distinct_count; + max_value = other40.max_value; + min_value = other40.min_value; + is_max_value_exact = other40.is_max_value_exact; + is_min_value_exact = other40.is_min_value_exact; + geometry_stats = other40.geometry_stats; + __isset = other40.__isset; +} +Statistics::Statistics(Statistics&& other41) noexcept { + max = std::move(other41.max); + min = std::move(other41.min); + null_count = other41.null_count; + distinct_count = other41.distinct_count; + max_value = std::move(other41.max_value); + min_value = std::move(other41.min_value); + is_max_value_exact = other41.is_max_value_exact; + is_min_value_exact = other41.is_min_value_exact; + geometry_stats = std::move(other41.geometry_stats); + __isset = other41.__isset; +} +Statistics& Statistics::operator=(const Statistics& other42) { + max = other42.max; + min = other42.min; + null_count = other42.null_count; + distinct_count = other42.distinct_count; + max_value = other42.max_value; + min_value = other42.min_value; + is_max_value_exact = other42.is_max_value_exact; + is_min_value_exact = other42.is_min_value_exact; + geometry_stats = other42.geometry_stats; + __isset = other42.__isset; return *this; } -Statistics& Statistics::operator=(Statistics&& other38) noexcept { - max = std::move(other38.max); - min = std::move(other38.min); - null_count = other38.null_count; - distinct_count = other38.distinct_count; - max_value = std::move(other38.max_value); - min_value = std::move(other38.min_value); - is_max_value_exact = other38.is_max_value_exact; - is_min_value_exact = other38.is_min_value_exact; - geometry_stats = std::move(other38.geometry_stats); - __isset = other38.__isset; +Statistics& Statistics::operator=(Statistics&& other43) noexcept { + max = std::move(other43.max); + min = std::move(other43.min); + null_count = other43.null_count; + distinct_count = other43.distinct_count; + max_value = std::move(other43.max_value); + min_value = std::move(other43.min_value); + is_max_value_exact = other43.is_max_value_exact; + is_min_value_exact = other43.is_min_value_exact; + geometry_stats = std::move(other43.geometry_stats); + __isset = other43.__isset; return *this; } void Statistics::printTo(std::ostream& out) const { @@ -1159,18 +1159,18 @@ void swap(StringType &a, StringType &b) { (void) b; } -StringType::StringType(const StringType& other39) noexcept { - (void) other39; +StringType::StringType(const StringType& other44) noexcept { + (void) other44; } -StringType::StringType(StringType&& other40) noexcept { - (void) other40; +StringType::StringType(StringType&& other45) noexcept { + (void) other45; } -StringType& StringType::operator=(const StringType& other41) noexcept { - (void) other41; +StringType& StringType::operator=(const StringType& other46) noexcept { + (void) other46; return *this; } -StringType& StringType::operator=(StringType&& other42) noexcept { - (void) other42; +StringType& StringType::operator=(StringType&& other47) noexcept { + (void) other47; return *this; } void StringType::printTo(std::ostream& out) const { @@ -1196,18 +1196,18 @@ void swap(UUIDType &a, UUIDType &b) { (void) b; } -UUIDType::UUIDType(const UUIDType& other43) noexcept { - (void) other43; +UUIDType::UUIDType(const UUIDType& other48) noexcept { + (void) other48; } -UUIDType::UUIDType(UUIDType&& other44) noexcept { - (void) other44; +UUIDType::UUIDType(UUIDType&& other49) noexcept { + (void) other49; } -UUIDType& UUIDType::operator=(const UUIDType& other45) noexcept { - (void) other45; +UUIDType& UUIDType::operator=(const UUIDType& other50) noexcept { + (void) other50; return *this; } -UUIDType& UUIDType::operator=(UUIDType&& other46) noexcept { - (void) other46; +UUIDType& UUIDType::operator=(UUIDType&& other51) noexcept { + (void) other51; return *this; } void UUIDType::printTo(std::ostream& out) const { @@ -1233,18 +1233,18 @@ void swap(MapType &a, MapType &b) { (void) b; } -MapType::MapType(const MapType& other47) noexcept { - (void) other47; +MapType::MapType(const MapType& other52) noexcept { + (void) other52; } -MapType::MapType(MapType&& other48) noexcept { - (void) other48; +MapType::MapType(MapType&& other53) noexcept { + (void) other53; } -MapType& MapType::operator=(const MapType& other49) noexcept { - (void) other49; +MapType& MapType::operator=(const MapType& other54) noexcept { + (void) other54; return *this; } -MapType& MapType::operator=(MapType&& other50) noexcept { - (void) other50; +MapType& MapType::operator=(MapType&& other55) noexcept { + (void) other55; return *this; } void MapType::printTo(std::ostream& out) const { @@ -1270,18 +1270,18 @@ void swap(ListType &a, ListType &b) { (void) b; } -ListType::ListType(const ListType& other51) noexcept { - (void) other51; +ListType::ListType(const ListType& other56) noexcept { + (void) other56; } -ListType::ListType(ListType&& other52) noexcept { - (void) other52; +ListType::ListType(ListType&& other57) noexcept { + (void) other57; } -ListType& ListType::operator=(const ListType& other53) noexcept { - (void) other53; +ListType& ListType::operator=(const ListType& other58) noexcept { + (void) other58; return *this; } -ListType& ListType::operator=(ListType&& other54) noexcept { - (void) other54; +ListType& ListType::operator=(ListType&& other59) noexcept { + (void) other59; return *this; } void ListType::printTo(std::ostream& out) const { @@ -1307,18 +1307,18 @@ void swap(EnumType &a, EnumType &b) { (void) b; } -EnumType::EnumType(const EnumType& other55) noexcept { - (void) other55; +EnumType::EnumType(const EnumType& other60) noexcept { + (void) other60; } -EnumType::EnumType(EnumType&& other56) noexcept { - (void) other56; +EnumType::EnumType(EnumType&& other61) noexcept { + (void) other61; } -EnumType& EnumType::operator=(const EnumType& other57) noexcept { - (void) other57; +EnumType& EnumType::operator=(const EnumType& other62) noexcept { + (void) other62; return *this; } -EnumType& EnumType::operator=(EnumType&& other58) noexcept { - (void) other58; +EnumType& EnumType::operator=(EnumType&& other63) noexcept { + (void) other63; return *this; } void EnumType::printTo(std::ostream& out) const { @@ -1344,18 +1344,18 @@ void swap(DateType &a, DateType &b) { (void) b; } -DateType::DateType(const DateType& other59) noexcept { - (void) other59; +DateType::DateType(const DateType& other64) noexcept { + (void) other64; } -DateType::DateType(DateType&& other60) noexcept { - (void) other60; +DateType::DateType(DateType&& other65) noexcept { + (void) other65; } -DateType& DateType::operator=(const DateType& other61) noexcept { - (void) other61; +DateType& DateType::operator=(const DateType& other66) noexcept { + (void) other66; return *this; } -DateType& DateType::operator=(DateType&& other62) noexcept { - (void) other62; +DateType& DateType::operator=(DateType&& other67) noexcept { + (void) other67; return *this; } void DateType::printTo(std::ostream& out) const { @@ -1381,18 +1381,18 @@ void swap(Float16Type &a, Float16Type &b) { (void) b; } -Float16Type::Float16Type(const Float16Type& other63) noexcept { - (void) other63; +Float16Type::Float16Type(const Float16Type& other68) noexcept { + (void) other68; } -Float16Type::Float16Type(Float16Type&& other64) noexcept { - (void) other64; +Float16Type::Float16Type(Float16Type&& other69) noexcept { + (void) other69; } -Float16Type& Float16Type::operator=(const Float16Type& other65) noexcept { - (void) other65; +Float16Type& Float16Type::operator=(const Float16Type& other70) noexcept { + (void) other70; return *this; } -Float16Type& Float16Type::operator=(Float16Type&& other66) noexcept { - (void) other66; +Float16Type& Float16Type::operator=(Float16Type&& other71) noexcept { + (void) other71; return *this; } void Float16Type::printTo(std::ostream& out) const { @@ -1418,18 +1418,18 @@ void swap(NullType &a, NullType &b) { (void) b; } -NullType::NullType(const NullType& other67) noexcept { - (void) other67; +NullType::NullType(const NullType& other72) noexcept { + (void) other72; } -NullType::NullType(NullType&& other68) noexcept { - (void) other68; +NullType::NullType(NullType&& other73) noexcept { + (void) other73; } -NullType& NullType::operator=(const NullType& other69) noexcept { - (void) other69; +NullType& NullType::operator=(const NullType& other74) noexcept { + (void) other74; return *this; } -NullType& NullType::operator=(NullType&& other70) noexcept { - (void) other70; +NullType& NullType::operator=(NullType&& other75) noexcept { + (void) other75; return *this; } void NullType::printTo(std::ostream& out) const { @@ -1463,22 +1463,22 @@ void swap(DecimalType &a, DecimalType &b) { swap(a.precision, b.precision); } -DecimalType::DecimalType(const DecimalType& other71) noexcept { - scale = other71.scale; - precision = other71.precision; +DecimalType::DecimalType(const DecimalType& other76) noexcept { + scale = other76.scale; + precision = other76.precision; } -DecimalType::DecimalType(DecimalType&& other72) noexcept { - scale = other72.scale; - precision = other72.precision; +DecimalType::DecimalType(DecimalType&& other77) noexcept { + scale = other77.scale; + precision = other77.precision; } -DecimalType& DecimalType::operator=(const DecimalType& other73) noexcept { - scale = other73.scale; - precision = other73.precision; +DecimalType& DecimalType::operator=(const DecimalType& other78) noexcept { + scale = other78.scale; + precision = other78.precision; return *this; } -DecimalType& DecimalType::operator=(DecimalType&& other74) noexcept { - scale = other74.scale; - precision = other74.precision; +DecimalType& DecimalType::operator=(DecimalType&& other79) noexcept { + scale = other79.scale; + precision = other79.precision; return *this; } void DecimalType::printTo(std::ostream& out) const { @@ -1506,18 +1506,18 @@ void swap(MilliSeconds &a, MilliSeconds &b) { (void) b; } -MilliSeconds::MilliSeconds(const MilliSeconds& other75) noexcept { - (void) other75; +MilliSeconds::MilliSeconds(const MilliSeconds& other80) noexcept { + (void) other80; } -MilliSeconds::MilliSeconds(MilliSeconds&& other76) noexcept { - (void) other76; +MilliSeconds::MilliSeconds(MilliSeconds&& other81) noexcept { + (void) other81; } -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other77) noexcept { - (void) other77; +MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other82) noexcept { + (void) other82; return *this; } -MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other78) noexcept { - (void) other78; +MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other83) noexcept { + (void) other83; return *this; } void MilliSeconds::printTo(std::ostream& out) const { @@ -1543,18 +1543,18 @@ void swap(MicroSeconds &a, MicroSeconds &b) { (void) b; } -MicroSeconds::MicroSeconds(const MicroSeconds& other79) noexcept { - (void) other79; +MicroSeconds::MicroSeconds(const MicroSeconds& other84) noexcept { + (void) other84; } -MicroSeconds::MicroSeconds(MicroSeconds&& other80) noexcept { - (void) other80; +MicroSeconds::MicroSeconds(MicroSeconds&& other85) noexcept { + (void) other85; } -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other81) noexcept { - (void) other81; +MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other86) noexcept { + (void) other86; return *this; } -MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other82) noexcept { - (void) other82; +MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other87) noexcept { + (void) other87; return *this; } void MicroSeconds::printTo(std::ostream& out) const { @@ -1580,18 +1580,18 @@ void swap(NanoSeconds &a, NanoSeconds &b) { (void) b; } -NanoSeconds::NanoSeconds(const NanoSeconds& other83) noexcept { - (void) other83; +NanoSeconds::NanoSeconds(const NanoSeconds& other88) noexcept { + (void) other88; } -NanoSeconds::NanoSeconds(NanoSeconds&& other84) noexcept { - (void) other84; +NanoSeconds::NanoSeconds(NanoSeconds&& other89) noexcept { + (void) other89; } -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other85) noexcept { - (void) other85; +NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other90) noexcept { + (void) other90; return *this; } -NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other86) noexcept { - (void) other86; +NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other91) noexcept { + (void) other91; return *this; } void NanoSeconds::printTo(std::ostream& out) const { @@ -1634,30 +1634,30 @@ void swap(TimeUnit &a, TimeUnit &b) { swap(a.__isset, b.__isset); } -TimeUnit::TimeUnit(const TimeUnit& other87) noexcept { - MILLIS = other87.MILLIS; - MICROS = other87.MICROS; - NANOS = other87.NANOS; - __isset = other87.__isset; +TimeUnit::TimeUnit(const TimeUnit& other92) noexcept { + MILLIS = other92.MILLIS; + MICROS = other92.MICROS; + NANOS = other92.NANOS; + __isset = other92.__isset; } -TimeUnit::TimeUnit(TimeUnit&& other88) noexcept { - MILLIS = std::move(other88.MILLIS); - MICROS = std::move(other88.MICROS); - NANOS = std::move(other88.NANOS); - __isset = other88.__isset; +TimeUnit::TimeUnit(TimeUnit&& other93) noexcept { + MILLIS = std::move(other93.MILLIS); + MICROS = std::move(other93.MICROS); + NANOS = std::move(other93.NANOS); + __isset = other93.__isset; } -TimeUnit& TimeUnit::operator=(const TimeUnit& other89) noexcept { - MILLIS = other89.MILLIS; - MICROS = other89.MICROS; - NANOS = other89.NANOS; - __isset = other89.__isset; +TimeUnit& TimeUnit::operator=(const TimeUnit& other94) noexcept { + MILLIS = other94.MILLIS; + MICROS = other94.MICROS; + NANOS = other94.NANOS; + __isset = other94.__isset; return *this; } -TimeUnit& TimeUnit::operator=(TimeUnit&& other90) noexcept { - MILLIS = std::move(other90.MILLIS); - MICROS = std::move(other90.MICROS); - NANOS = std::move(other90.NANOS); - __isset = other90.__isset; +TimeUnit& TimeUnit::operator=(TimeUnit&& other95) noexcept { + MILLIS = std::move(other95.MILLIS); + MICROS = std::move(other95.MICROS); + NANOS = std::move(other95.NANOS); + __isset = other95.__isset; return *this; } void TimeUnit::printTo(std::ostream& out) const { @@ -1694,22 +1694,22 @@ void swap(TimestampType &a, TimestampType &b) { swap(a.unit, b.unit); } -TimestampType::TimestampType(const TimestampType& other91) noexcept { - isAdjustedToUTC = other91.isAdjustedToUTC; - unit = other91.unit; +TimestampType::TimestampType(const TimestampType& other96) noexcept { + isAdjustedToUTC = other96.isAdjustedToUTC; + unit = other96.unit; } -TimestampType::TimestampType(TimestampType&& other92) noexcept { - isAdjustedToUTC = other92.isAdjustedToUTC; - unit = std::move(other92.unit); +TimestampType::TimestampType(TimestampType&& other97) noexcept { + isAdjustedToUTC = other97.isAdjustedToUTC; + unit = std::move(other97.unit); } -TimestampType& TimestampType::operator=(const TimestampType& other93) noexcept { - isAdjustedToUTC = other93.isAdjustedToUTC; - unit = other93.unit; +TimestampType& TimestampType::operator=(const TimestampType& other98) noexcept { + isAdjustedToUTC = other98.isAdjustedToUTC; + unit = other98.unit; return *this; } -TimestampType& TimestampType::operator=(TimestampType&& other94) noexcept { - isAdjustedToUTC = other94.isAdjustedToUTC; - unit = std::move(other94.unit); +TimestampType& TimestampType::operator=(TimestampType&& other99) noexcept { + isAdjustedToUTC = other99.isAdjustedToUTC; + unit = std::move(other99.unit); return *this; } void TimestampType::printTo(std::ostream& out) const { @@ -1745,22 +1745,22 @@ void swap(TimeType &a, TimeType &b) { swap(a.unit, b.unit); } -TimeType::TimeType(const TimeType& other95) noexcept { - isAdjustedToUTC = other95.isAdjustedToUTC; - unit = other95.unit; +TimeType::TimeType(const TimeType& other100) noexcept { + isAdjustedToUTC = other100.isAdjustedToUTC; + unit = other100.unit; } -TimeType::TimeType(TimeType&& other96) noexcept { - isAdjustedToUTC = other96.isAdjustedToUTC; - unit = std::move(other96.unit); +TimeType::TimeType(TimeType&& other101) noexcept { + isAdjustedToUTC = other101.isAdjustedToUTC; + unit = std::move(other101.unit); } -TimeType& TimeType::operator=(const TimeType& other97) noexcept { - isAdjustedToUTC = other97.isAdjustedToUTC; - unit = other97.unit; +TimeType& TimeType::operator=(const TimeType& other102) noexcept { + isAdjustedToUTC = other102.isAdjustedToUTC; + unit = other102.unit; return *this; } -TimeType& TimeType::operator=(TimeType&& other98) noexcept { - isAdjustedToUTC = other98.isAdjustedToUTC; - unit = std::move(other98.unit); +TimeType& TimeType::operator=(TimeType&& other103) noexcept { + isAdjustedToUTC = other103.isAdjustedToUTC; + unit = std::move(other103.unit); return *this; } void TimeType::printTo(std::ostream& out) const { @@ -1796,22 +1796,22 @@ void swap(IntType &a, IntType &b) { swap(a.isSigned, b.isSigned); } -IntType::IntType(const IntType& other99) noexcept { - bitWidth = other99.bitWidth; - isSigned = other99.isSigned; +IntType::IntType(const IntType& other104) noexcept { + bitWidth = other104.bitWidth; + isSigned = other104.isSigned; } -IntType::IntType(IntType&& other100) noexcept { - bitWidth = other100.bitWidth; - isSigned = other100.isSigned; +IntType::IntType(IntType&& other105) noexcept { + bitWidth = other105.bitWidth; + isSigned = other105.isSigned; } -IntType& IntType::operator=(const IntType& other101) noexcept { - bitWidth = other101.bitWidth; - isSigned = other101.isSigned; +IntType& IntType::operator=(const IntType& other106) noexcept { + bitWidth = other106.bitWidth; + isSigned = other106.isSigned; return *this; } -IntType& IntType::operator=(IntType&& other102) noexcept { - bitWidth = other102.bitWidth; - isSigned = other102.isSigned; +IntType& IntType::operator=(IntType&& other107) noexcept { + bitWidth = other107.bitWidth; + isSigned = other107.isSigned; return *this; } void IntType::printTo(std::ostream& out) const { @@ -1839,18 +1839,18 @@ void swap(JsonType &a, JsonType &b) { (void) b; } -JsonType::JsonType(const JsonType& other103) noexcept { - (void) other103; +JsonType::JsonType(const JsonType& other108) noexcept { + (void) other108; } -JsonType::JsonType(JsonType&& other104) noexcept { - (void) other104; +JsonType::JsonType(JsonType&& other109) noexcept { + (void) other109; } -JsonType& JsonType::operator=(const JsonType& other105) noexcept { - (void) other105; +JsonType& JsonType::operator=(const JsonType& other110) noexcept { + (void) other110; return *this; } -JsonType& JsonType::operator=(JsonType&& other106) noexcept { - (void) other106; +JsonType& JsonType::operator=(JsonType&& other111) noexcept { + (void) other111; return *this; } void JsonType::printTo(std::ostream& out) const { @@ -1876,18 +1876,18 @@ void swap(BsonType &a, BsonType &b) { (void) b; } -BsonType::BsonType(const BsonType& other107) noexcept { - (void) other107; +BsonType::BsonType(const BsonType& other112) noexcept { + (void) other112; } -BsonType::BsonType(BsonType&& other108) noexcept { - (void) other108; +BsonType::BsonType(BsonType&& other113) noexcept { + (void) other113; } -BsonType& BsonType::operator=(const BsonType& other109) noexcept { - (void) other109; +BsonType& BsonType::operator=(const BsonType& other114) noexcept { + (void) other114; return *this; } -BsonType& BsonType::operator=(BsonType&& other110) noexcept { - (void) other110; +BsonType& BsonType::operator=(BsonType&& other115) noexcept { + (void) other115; return *this; } void BsonType::printTo(std::ostream& out) const { @@ -1914,6 +1914,11 @@ void GeometryType::__set_crs(const std::string& val) { __isset.crs = true; } +void GeometryType::__set_crs_encoding(const std::string& val) { + this->crs_encoding = val; +__isset.crs_encoding = true; +} + void GeometryType::__set_metadata(const std::string& val) { this->metadata = val; __isset.metadata = true; @@ -1930,38 +1935,43 @@ void swap(GeometryType &a, GeometryType &b) { swap(a.encoding, b.encoding); swap(a.edges, b.edges); swap(a.crs, b.crs); + swap(a.crs_encoding, b.crs_encoding); swap(a.metadata, b.metadata); swap(a.__isset, b.__isset); } -GeometryType::GeometryType(const GeometryType& other113) { - encoding = other113.encoding; - edges = other113.edges; - crs = other113.crs; - metadata = other113.metadata; - __isset = other113.__isset; -} -GeometryType::GeometryType(GeometryType&& other114) noexcept { - encoding = other114.encoding; - edges = other114.edges; - crs = std::move(other114.crs); - metadata = std::move(other114.metadata); - __isset = other114.__isset; -} -GeometryType& GeometryType::operator=(const GeometryType& other115) { - encoding = other115.encoding; - edges = other115.edges; - crs = other115.crs; - metadata = other115.metadata; - __isset = other115.__isset; +GeometryType::GeometryType(const GeometryType& other118) { + encoding = other118.encoding; + edges = other118.edges; + crs = other118.crs; + crs_encoding = other118.crs_encoding; + metadata = other118.metadata; + __isset = other118.__isset; +} +GeometryType::GeometryType(GeometryType&& other119) noexcept { + encoding = other119.encoding; + edges = other119.edges; + crs = std::move(other119.crs); + crs_encoding = std::move(other119.crs_encoding); + metadata = std::move(other119.metadata); + __isset = other119.__isset; +} +GeometryType& GeometryType::operator=(const GeometryType& other120) { + encoding = other120.encoding; + edges = other120.edges; + crs = other120.crs; + crs_encoding = other120.crs_encoding; + metadata = other120.metadata; + __isset = other120.__isset; return *this; } -GeometryType& GeometryType::operator=(GeometryType&& other116) noexcept { - encoding = other116.encoding; - edges = other116.edges; - crs = std::move(other116.crs); - metadata = std::move(other116.metadata); - __isset = other116.__isset; +GeometryType& GeometryType::operator=(GeometryType&& other121) noexcept { + encoding = other121.encoding; + edges = other121.edges; + crs = std::move(other121.crs); + crs_encoding = std::move(other121.crs_encoding); + metadata = std::move(other121.metadata); + __isset = other121.__isset; return *this; } void GeometryType::printTo(std::ostream& out) const { @@ -1970,6 +1980,7 @@ void GeometryType::printTo(std::ostream& out) const { out << "encoding=" << to_string(encoding); out << ", " << "edges=" << to_string(edges); out << ", " << "crs="; (__isset.crs ? (out << to_string(crs)) : (out << "")); + out << ", " << "crs_encoding="; (__isset.crs_encoding ? (out << to_string(crs_encoding)) : (out << "")); out << ", " << "metadata="; (__isset.metadata ? (out << to_string(metadata)) : (out << "")); out << ")"; } @@ -2080,78 +2091,78 @@ void swap(LogicalType &a, LogicalType &b) { swap(a.__isset, b.__isset); } -LogicalType::LogicalType(const LogicalType& other117) { - STRING = other117.STRING; - MAP = other117.MAP; - LIST = other117.LIST; - ENUM = other117.ENUM; - DECIMAL = other117.DECIMAL; - DATE = other117.DATE; - TIME = other117.TIME; - TIMESTAMP = other117.TIMESTAMP; - INTEGER = other117.INTEGER; - UNKNOWN = other117.UNKNOWN; - JSON = other117.JSON; - BSON = other117.BSON; - UUID = other117.UUID; - FLOAT16 = other117.FLOAT16; - GEOMETRY = other117.GEOMETRY; - __isset = other117.__isset; -} -LogicalType::LogicalType(LogicalType&& other118) noexcept { - STRING = std::move(other118.STRING); - MAP = std::move(other118.MAP); - LIST = std::move(other118.LIST); - ENUM = std::move(other118.ENUM); - DECIMAL = std::move(other118.DECIMAL); - DATE = std::move(other118.DATE); - TIME = std::move(other118.TIME); - TIMESTAMP = std::move(other118.TIMESTAMP); - INTEGER = std::move(other118.INTEGER); - UNKNOWN = std::move(other118.UNKNOWN); - JSON = std::move(other118.JSON); - BSON = std::move(other118.BSON); - UUID = std::move(other118.UUID); - FLOAT16 = std::move(other118.FLOAT16); - GEOMETRY = std::move(other118.GEOMETRY); - __isset = other118.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other119) { - STRING = other119.STRING; - MAP = other119.MAP; - LIST = other119.LIST; - ENUM = other119.ENUM; - DECIMAL = other119.DECIMAL; - DATE = other119.DATE; - TIME = other119.TIME; - TIMESTAMP = other119.TIMESTAMP; - INTEGER = other119.INTEGER; - UNKNOWN = other119.UNKNOWN; - JSON = other119.JSON; - BSON = other119.BSON; - UUID = other119.UUID; - FLOAT16 = other119.FLOAT16; - GEOMETRY = other119.GEOMETRY; - __isset = other119.__isset; +LogicalType::LogicalType(const LogicalType& other122) { + STRING = other122.STRING; + MAP = other122.MAP; + LIST = other122.LIST; + ENUM = other122.ENUM; + DECIMAL = other122.DECIMAL; + DATE = other122.DATE; + TIME = other122.TIME; + TIMESTAMP = other122.TIMESTAMP; + INTEGER = other122.INTEGER; + UNKNOWN = other122.UNKNOWN; + JSON = other122.JSON; + BSON = other122.BSON; + UUID = other122.UUID; + FLOAT16 = other122.FLOAT16; + GEOMETRY = other122.GEOMETRY; + __isset = other122.__isset; +} +LogicalType::LogicalType(LogicalType&& other123) noexcept { + STRING = std::move(other123.STRING); + MAP = std::move(other123.MAP); + LIST = std::move(other123.LIST); + ENUM = std::move(other123.ENUM); + DECIMAL = std::move(other123.DECIMAL); + DATE = std::move(other123.DATE); + TIME = std::move(other123.TIME); + TIMESTAMP = std::move(other123.TIMESTAMP); + INTEGER = std::move(other123.INTEGER); + UNKNOWN = std::move(other123.UNKNOWN); + JSON = std::move(other123.JSON); + BSON = std::move(other123.BSON); + UUID = std::move(other123.UUID); + FLOAT16 = std::move(other123.FLOAT16); + GEOMETRY = std::move(other123.GEOMETRY); + __isset = other123.__isset; +} +LogicalType& LogicalType::operator=(const LogicalType& other124) { + STRING = other124.STRING; + MAP = other124.MAP; + LIST = other124.LIST; + ENUM = other124.ENUM; + DECIMAL = other124.DECIMAL; + DATE = other124.DATE; + TIME = other124.TIME; + TIMESTAMP = other124.TIMESTAMP; + INTEGER = other124.INTEGER; + UNKNOWN = other124.UNKNOWN; + JSON = other124.JSON; + BSON = other124.BSON; + UUID = other124.UUID; + FLOAT16 = other124.FLOAT16; + GEOMETRY = other124.GEOMETRY; + __isset = other124.__isset; return *this; } -LogicalType& LogicalType::operator=(LogicalType&& other120) noexcept { - STRING = std::move(other120.STRING); - MAP = std::move(other120.MAP); - LIST = std::move(other120.LIST); - ENUM = std::move(other120.ENUM); - DECIMAL = std::move(other120.DECIMAL); - DATE = std::move(other120.DATE); - TIME = std::move(other120.TIME); - TIMESTAMP = std::move(other120.TIMESTAMP); - INTEGER = std::move(other120.INTEGER); - UNKNOWN = std::move(other120.UNKNOWN); - JSON = std::move(other120.JSON); - BSON = std::move(other120.BSON); - UUID = std::move(other120.UUID); - FLOAT16 = std::move(other120.FLOAT16); - GEOMETRY = std::move(other120.GEOMETRY); - __isset = other120.__isset; +LogicalType& LogicalType::operator=(LogicalType&& other125) noexcept { + STRING = std::move(other125.STRING); + MAP = std::move(other125.MAP); + LIST = std::move(other125.LIST); + ENUM = std::move(other125.ENUM); + DECIMAL = std::move(other125.DECIMAL); + DATE = std::move(other125.DATE); + TIME = std::move(other125.TIME); + TIMESTAMP = std::move(other125.TIMESTAMP); + INTEGER = std::move(other125.INTEGER); + UNKNOWN = std::move(other125.UNKNOWN); + JSON = std::move(other125.JSON); + BSON = std::move(other125.BSON); + UUID = std::move(other125.UUID); + FLOAT16 = std::move(other125.FLOAT16); + GEOMETRY = std::move(other125.GEOMETRY); + __isset = other125.__isset; return *this; } void LogicalType::printTo(std::ostream& out) const { @@ -2250,58 +2261,58 @@ void swap(SchemaElement &a, SchemaElement &b) { swap(a.__isset, b.__isset); } -SchemaElement::SchemaElement(const SchemaElement& other124) { - type = other124.type; - type_length = other124.type_length; - repetition_type = other124.repetition_type; - name = other124.name; - num_children = other124.num_children; - converted_type = other124.converted_type; - scale = other124.scale; - precision = other124.precision; - field_id = other124.field_id; - logicalType = other124.logicalType; - __isset = other124.__isset; -} -SchemaElement::SchemaElement(SchemaElement&& other125) noexcept { - type = other125.type; - type_length = other125.type_length; - repetition_type = other125.repetition_type; - name = std::move(other125.name); - num_children = other125.num_children; - converted_type = other125.converted_type; - scale = other125.scale; - precision = other125.precision; - field_id = other125.field_id; - logicalType = std::move(other125.logicalType); - __isset = other125.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other126) { - type = other126.type; - type_length = other126.type_length; - repetition_type = other126.repetition_type; - name = other126.name; - num_children = other126.num_children; - converted_type = other126.converted_type; - scale = other126.scale; - precision = other126.precision; - field_id = other126.field_id; - logicalType = other126.logicalType; - __isset = other126.__isset; +SchemaElement::SchemaElement(const SchemaElement& other129) { + type = other129.type; + type_length = other129.type_length; + repetition_type = other129.repetition_type; + name = other129.name; + num_children = other129.num_children; + converted_type = other129.converted_type; + scale = other129.scale; + precision = other129.precision; + field_id = other129.field_id; + logicalType = other129.logicalType; + __isset = other129.__isset; +} +SchemaElement::SchemaElement(SchemaElement&& other130) noexcept { + type = other130.type; + type_length = other130.type_length; + repetition_type = other130.repetition_type; + name = std::move(other130.name); + num_children = other130.num_children; + converted_type = other130.converted_type; + scale = other130.scale; + precision = other130.precision; + field_id = other130.field_id; + logicalType = std::move(other130.logicalType); + __isset = other130.__isset; +} +SchemaElement& SchemaElement::operator=(const SchemaElement& other131) { + type = other131.type; + type_length = other131.type_length; + repetition_type = other131.repetition_type; + name = other131.name; + num_children = other131.num_children; + converted_type = other131.converted_type; + scale = other131.scale; + precision = other131.precision; + field_id = other131.field_id; + logicalType = other131.logicalType; + __isset = other131.__isset; return *this; } -SchemaElement& SchemaElement::operator=(SchemaElement&& other127) noexcept { - type = other127.type; - type_length = other127.type_length; - repetition_type = other127.repetition_type; - name = std::move(other127.name); - num_children = other127.num_children; - converted_type = other127.converted_type; - scale = other127.scale; - precision = other127.precision; - field_id = other127.field_id; - logicalType = std::move(other127.logicalType); - __isset = other127.__isset; +SchemaElement& SchemaElement::operator=(SchemaElement&& other132) noexcept { + type = other132.type; + type_length = other132.type_length; + repetition_type = other132.repetition_type; + name = std::move(other132.name); + num_children = other132.num_children; + converted_type = other132.converted_type; + scale = other132.scale; + precision = other132.precision; + field_id = other132.field_id; + logicalType = std::move(other132.logicalType); + __isset = other132.__isset; return *this; } void SchemaElement::printTo(std::ostream& out) const { @@ -2362,38 +2373,38 @@ void swap(DataPageHeader &a, DataPageHeader &b) { swap(a.__isset, b.__isset); } -DataPageHeader::DataPageHeader(const DataPageHeader& other131) { - num_values = other131.num_values; - encoding = other131.encoding; - definition_level_encoding = other131.definition_level_encoding; - repetition_level_encoding = other131.repetition_level_encoding; - statistics = other131.statistics; - __isset = other131.__isset; -} -DataPageHeader::DataPageHeader(DataPageHeader&& other132) noexcept { - num_values = other132.num_values; - encoding = other132.encoding; - definition_level_encoding = other132.definition_level_encoding; - repetition_level_encoding = other132.repetition_level_encoding; - statistics = std::move(other132.statistics); - __isset = other132.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other133) { - num_values = other133.num_values; - encoding = other133.encoding; - definition_level_encoding = other133.definition_level_encoding; - repetition_level_encoding = other133.repetition_level_encoding; - statistics = other133.statistics; - __isset = other133.__isset; +DataPageHeader::DataPageHeader(const DataPageHeader& other136) { + num_values = other136.num_values; + encoding = other136.encoding; + definition_level_encoding = other136.definition_level_encoding; + repetition_level_encoding = other136.repetition_level_encoding; + statistics = other136.statistics; + __isset = other136.__isset; +} +DataPageHeader::DataPageHeader(DataPageHeader&& other137) noexcept { + num_values = other137.num_values; + encoding = other137.encoding; + definition_level_encoding = other137.definition_level_encoding; + repetition_level_encoding = other137.repetition_level_encoding; + statistics = std::move(other137.statistics); + __isset = other137.__isset; +} +DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other138) { + num_values = other138.num_values; + encoding = other138.encoding; + definition_level_encoding = other138.definition_level_encoding; + repetition_level_encoding = other138.repetition_level_encoding; + statistics = other138.statistics; + __isset = other138.__isset; return *this; } -DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other134) noexcept { - num_values = other134.num_values; - encoding = other134.encoding; - definition_level_encoding = other134.definition_level_encoding; - repetition_level_encoding = other134.repetition_level_encoding; - statistics = std::move(other134.statistics); - __isset = other134.__isset; +DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other139) noexcept { + num_values = other139.num_values; + encoding = other139.encoding; + definition_level_encoding = other139.definition_level_encoding; + repetition_level_encoding = other139.repetition_level_encoding; + statistics = std::move(other139.statistics); + __isset = other139.__isset; return *this; } void DataPageHeader::printTo(std::ostream& out) const { @@ -2424,18 +2435,18 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) { (void) b; } -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other135) noexcept { - (void) other135; +IndexPageHeader::IndexPageHeader(const IndexPageHeader& other140) noexcept { + (void) other140; } -IndexPageHeader::IndexPageHeader(IndexPageHeader&& other136) noexcept { - (void) other136; +IndexPageHeader::IndexPageHeader(IndexPageHeader&& other141) noexcept { + (void) other141; } -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other137) noexcept { - (void) other137; +IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other142) noexcept { + (void) other142; return *this; } -IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other138) noexcept { - (void) other138; +IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other143) noexcept { + (void) other143; return *this; } void IndexPageHeader::printTo(std::ostream& out) const { @@ -2476,30 +2487,30 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { swap(a.__isset, b.__isset); } -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other140) noexcept { - num_values = other140.num_values; - encoding = other140.encoding; - is_sorted = other140.is_sorted; - __isset = other140.__isset; +DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other145) noexcept { + num_values = other145.num_values; + encoding = other145.encoding; + is_sorted = other145.is_sorted; + __isset = other145.__isset; } -DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other141) noexcept { - num_values = other141.num_values; - encoding = other141.encoding; - is_sorted = other141.is_sorted; - __isset = other141.__isset; +DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other146) noexcept { + num_values = other146.num_values; + encoding = other146.encoding; + is_sorted = other146.is_sorted; + __isset = other146.__isset; } -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other142) noexcept { - num_values = other142.num_values; - encoding = other142.encoding; - is_sorted = other142.is_sorted; - __isset = other142.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other147) noexcept { + num_values = other147.num_values; + encoding = other147.encoding; + is_sorted = other147.is_sorted; + __isset = other147.__isset; return *this; } -DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other143) noexcept { - num_values = other143.num_values; - encoding = other143.encoding; - is_sorted = other143.is_sorted; - __isset = other143.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other148) noexcept { + num_values = other148.num_values; + encoding = other148.encoding; + is_sorted = other148.is_sorted; + __isset = other148.__isset; return *this; } void DictionaryPageHeader::printTo(std::ostream& out) const { @@ -2569,50 +2580,50 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { swap(a.__isset, b.__isset); } -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other145) { - num_values = other145.num_values; - num_nulls = other145.num_nulls; - num_rows = other145.num_rows; - encoding = other145.encoding; - definition_levels_byte_length = other145.definition_levels_byte_length; - repetition_levels_byte_length = other145.repetition_levels_byte_length; - is_compressed = other145.is_compressed; - statistics = other145.statistics; - __isset = other145.__isset; -} -DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other146) noexcept { - num_values = other146.num_values; - num_nulls = other146.num_nulls; - num_rows = other146.num_rows; - encoding = other146.encoding; - definition_levels_byte_length = other146.definition_levels_byte_length; - repetition_levels_byte_length = other146.repetition_levels_byte_length; - is_compressed = other146.is_compressed; - statistics = std::move(other146.statistics); - __isset = other146.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other147) { - num_values = other147.num_values; - num_nulls = other147.num_nulls; - num_rows = other147.num_rows; - encoding = other147.encoding; - definition_levels_byte_length = other147.definition_levels_byte_length; - repetition_levels_byte_length = other147.repetition_levels_byte_length; - is_compressed = other147.is_compressed; - statistics = other147.statistics; - __isset = other147.__isset; +DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other150) { + num_values = other150.num_values; + num_nulls = other150.num_nulls; + num_rows = other150.num_rows; + encoding = other150.encoding; + definition_levels_byte_length = other150.definition_levels_byte_length; + repetition_levels_byte_length = other150.repetition_levels_byte_length; + is_compressed = other150.is_compressed; + statistics = other150.statistics; + __isset = other150.__isset; +} +DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other151) noexcept { + num_values = other151.num_values; + num_nulls = other151.num_nulls; + num_rows = other151.num_rows; + encoding = other151.encoding; + definition_levels_byte_length = other151.definition_levels_byte_length; + repetition_levels_byte_length = other151.repetition_levels_byte_length; + is_compressed = other151.is_compressed; + statistics = std::move(other151.statistics); + __isset = other151.__isset; +} +DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other152) { + num_values = other152.num_values; + num_nulls = other152.num_nulls; + num_rows = other152.num_rows; + encoding = other152.encoding; + definition_levels_byte_length = other152.definition_levels_byte_length; + repetition_levels_byte_length = other152.repetition_levels_byte_length; + is_compressed = other152.is_compressed; + statistics = other152.statistics; + __isset = other152.__isset; return *this; } -DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other148) noexcept { - num_values = other148.num_values; - num_nulls = other148.num_nulls; - num_rows = other148.num_rows; - encoding = other148.encoding; - definition_levels_byte_length = other148.definition_levels_byte_length; - repetition_levels_byte_length = other148.repetition_levels_byte_length; - is_compressed = other148.is_compressed; - statistics = std::move(other148.statistics); - __isset = other148.__isset; +DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other153) noexcept { + num_values = other153.num_values; + num_nulls = other153.num_nulls; + num_rows = other153.num_rows; + encoding = other153.encoding; + definition_levels_byte_length = other153.definition_levels_byte_length; + repetition_levels_byte_length = other153.repetition_levels_byte_length; + is_compressed = other153.is_compressed; + statistics = std::move(other153.statistics); + __isset = other153.__isset; return *this; } void DataPageHeaderV2::printTo(std::ostream& out) const { @@ -2646,18 +2657,18 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { (void) b; } -SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other149) noexcept { - (void) other149; +SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other154) noexcept { + (void) other154; } -SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other150) noexcept { - (void) other150; +SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other155) noexcept { + (void) other155; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other151) noexcept { - (void) other151; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other156) noexcept { + (void) other156; return *this; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other152) noexcept { - (void) other152; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other157) noexcept { + (void) other157; return *this; } void SplitBlockAlgorithm::printTo(std::ostream& out) const { @@ -2688,22 +2699,22 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { swap(a.__isset, b.__isset); } -BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other153) noexcept { - BLOCK = other153.BLOCK; - __isset = other153.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other158) noexcept { + BLOCK = other158.BLOCK; + __isset = other158.__isset; } -BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other154) noexcept { - BLOCK = std::move(other154.BLOCK); - __isset = other154.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other159) noexcept { + BLOCK = std::move(other159.BLOCK); + __isset = other159.__isset; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other155) noexcept { - BLOCK = other155.BLOCK; - __isset = other155.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other160) noexcept { + BLOCK = other160.BLOCK; + __isset = other160.__isset; return *this; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other156) noexcept { - BLOCK = std::move(other156.BLOCK); - __isset = other156.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other161) noexcept { + BLOCK = std::move(other161.BLOCK); + __isset = other161.__isset; return *this; } void BloomFilterAlgorithm::printTo(std::ostream& out) const { @@ -2730,18 +2741,18 @@ void swap(XxHash &a, XxHash &b) { (void) b; } -XxHash::XxHash(const XxHash& other157) noexcept { - (void) other157; +XxHash::XxHash(const XxHash& other162) noexcept { + (void) other162; } -XxHash::XxHash(XxHash&& other158) noexcept { - (void) other158; +XxHash::XxHash(XxHash&& other163) noexcept { + (void) other163; } -XxHash& XxHash::operator=(const XxHash& other159) noexcept { - (void) other159; +XxHash& XxHash::operator=(const XxHash& other164) noexcept { + (void) other164; return *this; } -XxHash& XxHash::operator=(XxHash&& other160) noexcept { - (void) other160; +XxHash& XxHash::operator=(XxHash&& other165) noexcept { + (void) other165; return *this; } void XxHash::printTo(std::ostream& out) const { @@ -2772,22 +2783,22 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) { swap(a.__isset, b.__isset); } -BloomFilterHash::BloomFilterHash(const BloomFilterHash& other161) noexcept { - XXHASH = other161.XXHASH; - __isset = other161.__isset; +BloomFilterHash::BloomFilterHash(const BloomFilterHash& other166) noexcept { + XXHASH = other166.XXHASH; + __isset = other166.__isset; } -BloomFilterHash::BloomFilterHash(BloomFilterHash&& other162) noexcept { - XXHASH = std::move(other162.XXHASH); - __isset = other162.__isset; +BloomFilterHash::BloomFilterHash(BloomFilterHash&& other167) noexcept { + XXHASH = std::move(other167.XXHASH); + __isset = other167.__isset; } -BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other163) noexcept { - XXHASH = other163.XXHASH; - __isset = other163.__isset; +BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other168) noexcept { + XXHASH = other168.XXHASH; + __isset = other168.__isset; return *this; } -BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other164) noexcept { - XXHASH = std::move(other164.XXHASH); - __isset = other164.__isset; +BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other169) noexcept { + XXHASH = std::move(other169.XXHASH); + __isset = other169.__isset; return *this; } void BloomFilterHash::printTo(std::ostream& out) const { @@ -2814,18 +2825,18 @@ void swap(Uncompressed &a, Uncompressed &b) { (void) b; } -Uncompressed::Uncompressed(const Uncompressed& other165) noexcept { - (void) other165; +Uncompressed::Uncompressed(const Uncompressed& other170) noexcept { + (void) other170; } -Uncompressed::Uncompressed(Uncompressed&& other166) noexcept { - (void) other166; +Uncompressed::Uncompressed(Uncompressed&& other171) noexcept { + (void) other171; } -Uncompressed& Uncompressed::operator=(const Uncompressed& other167) noexcept { - (void) other167; +Uncompressed& Uncompressed::operator=(const Uncompressed& other172) noexcept { + (void) other172; return *this; } -Uncompressed& Uncompressed::operator=(Uncompressed&& other168) noexcept { - (void) other168; +Uncompressed& Uncompressed::operator=(Uncompressed&& other173) noexcept { + (void) other173; return *this; } void Uncompressed::printTo(std::ostream& out) const { @@ -2856,22 +2867,22 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) { swap(a.__isset, b.__isset); } -BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other169) noexcept { - UNCOMPRESSED = other169.UNCOMPRESSED; - __isset = other169.__isset; +BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other174) noexcept { + UNCOMPRESSED = other174.UNCOMPRESSED; + __isset = other174.__isset; } -BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other170) noexcept { - UNCOMPRESSED = std::move(other170.UNCOMPRESSED); - __isset = other170.__isset; +BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other175) noexcept { + UNCOMPRESSED = std::move(other175.UNCOMPRESSED); + __isset = other175.__isset; } -BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other171) noexcept { - UNCOMPRESSED = other171.UNCOMPRESSED; - __isset = other171.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other176) noexcept { + UNCOMPRESSED = other176.UNCOMPRESSED; + __isset = other176.__isset; return *this; } -BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other172) noexcept { - UNCOMPRESSED = std::move(other172.UNCOMPRESSED); - __isset = other172.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other177) noexcept { + UNCOMPRESSED = std::move(other177.UNCOMPRESSED); + __isset = other177.__isset; return *this; } void BloomFilterCompression::printTo(std::ostream& out) const { @@ -2916,30 +2927,30 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) { swap(a.compression, b.compression); } -BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other173) noexcept { - numBytes = other173.numBytes; - algorithm = other173.algorithm; - hash = other173.hash; - compression = other173.compression; +BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other178) noexcept { + numBytes = other178.numBytes; + algorithm = other178.algorithm; + hash = other178.hash; + compression = other178.compression; } -BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other174) noexcept { - numBytes = other174.numBytes; - algorithm = std::move(other174.algorithm); - hash = std::move(other174.hash); - compression = std::move(other174.compression); +BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other179) noexcept { + numBytes = other179.numBytes; + algorithm = std::move(other179.algorithm); + hash = std::move(other179.hash); + compression = std::move(other179.compression); } -BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other175) noexcept { - numBytes = other175.numBytes; - algorithm = other175.algorithm; - hash = other175.hash; - compression = other175.compression; +BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other180) noexcept { + numBytes = other180.numBytes; + algorithm = other180.algorithm; + hash = other180.hash; + compression = other180.compression; return *this; } -BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other176) noexcept { - numBytes = other176.numBytes; - algorithm = std::move(other176.algorithm); - hash = std::move(other176.hash); - compression = std::move(other176.compression); +BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other181) noexcept { + numBytes = other181.numBytes; + algorithm = std::move(other181.algorithm); + hash = std::move(other181.hash); + compression = std::move(other181.compression); return *this; } void BloomFilterHeader::printTo(std::ostream& out) const { @@ -3013,50 +3024,50 @@ void swap(PageHeader &a, PageHeader &b) { swap(a.__isset, b.__isset); } -PageHeader::PageHeader(const PageHeader& other178) { - type = other178.type; - uncompressed_page_size = other178.uncompressed_page_size; - compressed_page_size = other178.compressed_page_size; - crc = other178.crc; - data_page_header = other178.data_page_header; - index_page_header = other178.index_page_header; - dictionary_page_header = other178.dictionary_page_header; - data_page_header_v2 = other178.data_page_header_v2; - __isset = other178.__isset; -} -PageHeader::PageHeader(PageHeader&& other179) noexcept { - type = other179.type; - uncompressed_page_size = other179.uncompressed_page_size; - compressed_page_size = other179.compressed_page_size; - crc = other179.crc; - data_page_header = std::move(other179.data_page_header); - index_page_header = std::move(other179.index_page_header); - dictionary_page_header = std::move(other179.dictionary_page_header); - data_page_header_v2 = std::move(other179.data_page_header_v2); - __isset = other179.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other180) { - type = other180.type; - uncompressed_page_size = other180.uncompressed_page_size; - compressed_page_size = other180.compressed_page_size; - crc = other180.crc; - data_page_header = other180.data_page_header; - index_page_header = other180.index_page_header; - dictionary_page_header = other180.dictionary_page_header; - data_page_header_v2 = other180.data_page_header_v2; - __isset = other180.__isset; +PageHeader::PageHeader(const PageHeader& other183) { + type = other183.type; + uncompressed_page_size = other183.uncompressed_page_size; + compressed_page_size = other183.compressed_page_size; + crc = other183.crc; + data_page_header = other183.data_page_header; + index_page_header = other183.index_page_header; + dictionary_page_header = other183.dictionary_page_header; + data_page_header_v2 = other183.data_page_header_v2; + __isset = other183.__isset; +} +PageHeader::PageHeader(PageHeader&& other184) noexcept { + type = other184.type; + uncompressed_page_size = other184.uncompressed_page_size; + compressed_page_size = other184.compressed_page_size; + crc = other184.crc; + data_page_header = std::move(other184.data_page_header); + index_page_header = std::move(other184.index_page_header); + dictionary_page_header = std::move(other184.dictionary_page_header); + data_page_header_v2 = std::move(other184.data_page_header_v2); + __isset = other184.__isset; +} +PageHeader& PageHeader::operator=(const PageHeader& other185) { + type = other185.type; + uncompressed_page_size = other185.uncompressed_page_size; + compressed_page_size = other185.compressed_page_size; + crc = other185.crc; + data_page_header = other185.data_page_header; + index_page_header = other185.index_page_header; + dictionary_page_header = other185.dictionary_page_header; + data_page_header_v2 = other185.data_page_header_v2; + __isset = other185.__isset; return *this; } -PageHeader& PageHeader::operator=(PageHeader&& other181) noexcept { - type = other181.type; - uncompressed_page_size = other181.uncompressed_page_size; - compressed_page_size = other181.compressed_page_size; - crc = other181.crc; - data_page_header = std::move(other181.data_page_header); - index_page_header = std::move(other181.index_page_header); - dictionary_page_header = std::move(other181.dictionary_page_header); - data_page_header_v2 = std::move(other181.data_page_header_v2); - __isset = other181.__isset; +PageHeader& PageHeader::operator=(PageHeader&& other186) noexcept { + type = other186.type; + uncompressed_page_size = other186.uncompressed_page_size; + compressed_page_size = other186.compressed_page_size; + crc = other186.crc; + data_page_header = std::move(other186.data_page_header); + index_page_header = std::move(other186.index_page_header); + dictionary_page_header = std::move(other186.dictionary_page_header); + data_page_header_v2 = std::move(other186.data_page_header_v2); + __isset = other186.__isset; return *this; } void PageHeader::printTo(std::ostream& out) const { @@ -3100,26 +3111,26 @@ void swap(KeyValue &a, KeyValue &b) { swap(a.__isset, b.__isset); } -KeyValue::KeyValue(const KeyValue& other182) { - key = other182.key; - value = other182.value; - __isset = other182.__isset; +KeyValue::KeyValue(const KeyValue& other187) { + key = other187.key; + value = other187.value; + __isset = other187.__isset; } -KeyValue::KeyValue(KeyValue&& other183) noexcept { - key = std::move(other183.key); - value = std::move(other183.value); - __isset = other183.__isset; +KeyValue::KeyValue(KeyValue&& other188) noexcept { + key = std::move(other188.key); + value = std::move(other188.value); + __isset = other188.__isset; } -KeyValue& KeyValue::operator=(const KeyValue& other184) { - key = other184.key; - value = other184.value; - __isset = other184.__isset; +KeyValue& KeyValue::operator=(const KeyValue& other189) { + key = other189.key; + value = other189.value; + __isset = other189.__isset; return *this; } -KeyValue& KeyValue::operator=(KeyValue&& other185) noexcept { - key = std::move(other185.key); - value = std::move(other185.value); - __isset = other185.__isset; +KeyValue& KeyValue::operator=(KeyValue&& other190) noexcept { + key = std::move(other190.key); + value = std::move(other190.value); + __isset = other190.__isset; return *this; } void KeyValue::printTo(std::ostream& out) const { @@ -3160,26 +3171,26 @@ void swap(SortingColumn &a, SortingColumn &b) { swap(a.nulls_first, b.nulls_first); } -SortingColumn::SortingColumn(const SortingColumn& other186) noexcept { - column_idx = other186.column_idx; - descending = other186.descending; - nulls_first = other186.nulls_first; +SortingColumn::SortingColumn(const SortingColumn& other191) noexcept { + column_idx = other191.column_idx; + descending = other191.descending; + nulls_first = other191.nulls_first; } -SortingColumn::SortingColumn(SortingColumn&& other187) noexcept { - column_idx = other187.column_idx; - descending = other187.descending; - nulls_first = other187.nulls_first; +SortingColumn::SortingColumn(SortingColumn&& other192) noexcept { + column_idx = other192.column_idx; + descending = other192.descending; + nulls_first = other192.nulls_first; } -SortingColumn& SortingColumn::operator=(const SortingColumn& other188) noexcept { - column_idx = other188.column_idx; - descending = other188.descending; - nulls_first = other188.nulls_first; +SortingColumn& SortingColumn::operator=(const SortingColumn& other193) noexcept { + column_idx = other193.column_idx; + descending = other193.descending; + nulls_first = other193.nulls_first; return *this; } -SortingColumn& SortingColumn::operator=(SortingColumn&& other189) noexcept { - column_idx = other189.column_idx; - descending = other189.descending; - nulls_first = other189.nulls_first; +SortingColumn& SortingColumn::operator=(SortingColumn&& other194) noexcept { + column_idx = other194.column_idx; + descending = other194.descending; + nulls_first = other194.nulls_first; return *this; } void SortingColumn::printTo(std::ostream& out) const { @@ -3221,26 +3232,26 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) { swap(a.count, b.count); } -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other192) noexcept { - page_type = other192.page_type; - encoding = other192.encoding; - count = other192.count; +PageEncodingStats::PageEncodingStats(const PageEncodingStats& other197) noexcept { + page_type = other197.page_type; + encoding = other197.encoding; + count = other197.count; } -PageEncodingStats::PageEncodingStats(PageEncodingStats&& other193) noexcept { - page_type = other193.page_type; - encoding = other193.encoding; - count = other193.count; +PageEncodingStats::PageEncodingStats(PageEncodingStats&& other198) noexcept { + page_type = other198.page_type; + encoding = other198.encoding; + count = other198.count; } -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other194) noexcept { - page_type = other194.page_type; - encoding = other194.encoding; - count = other194.count; +PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other199) noexcept { + page_type = other199.page_type; + encoding = other199.encoding; + count = other199.count; return *this; } -PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other195) noexcept { - page_type = other195.page_type; - encoding = other195.encoding; - count = other195.count; +PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other200) noexcept { + page_type = other200.page_type; + encoding = other200.encoding; + count = other200.count; return *this; } void PageEncodingStats::printTo(std::ostream& out) const { @@ -3356,82 +3367,82 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) { swap(a.__isset, b.__isset); } -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other223) { - type = other223.type; - encodings = other223.encodings; - path_in_schema = other223.path_in_schema; - codec = other223.codec; - num_values = other223.num_values; - total_uncompressed_size = other223.total_uncompressed_size; - total_compressed_size = other223.total_compressed_size; - key_value_metadata = other223.key_value_metadata; - data_page_offset = other223.data_page_offset; - index_page_offset = other223.index_page_offset; - dictionary_page_offset = other223.dictionary_page_offset; - statistics = other223.statistics; - encoding_stats = other223.encoding_stats; - bloom_filter_offset = other223.bloom_filter_offset; - bloom_filter_length = other223.bloom_filter_length; - size_statistics = other223.size_statistics; - __isset = other223.__isset; -} -ColumnMetaData::ColumnMetaData(ColumnMetaData&& other224) noexcept { - type = other224.type; - encodings = std::move(other224.encodings); - path_in_schema = std::move(other224.path_in_schema); - codec = other224.codec; - num_values = other224.num_values; - total_uncompressed_size = other224.total_uncompressed_size; - total_compressed_size = other224.total_compressed_size; - key_value_metadata = std::move(other224.key_value_metadata); - data_page_offset = other224.data_page_offset; - index_page_offset = other224.index_page_offset; - dictionary_page_offset = other224.dictionary_page_offset; - statistics = std::move(other224.statistics); - encoding_stats = std::move(other224.encoding_stats); - bloom_filter_offset = other224.bloom_filter_offset; - bloom_filter_length = other224.bloom_filter_length; - size_statistics = std::move(other224.size_statistics); - __isset = other224.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other225) { - type = other225.type; - encodings = other225.encodings; - path_in_schema = other225.path_in_schema; - codec = other225.codec; - num_values = other225.num_values; - total_uncompressed_size = other225.total_uncompressed_size; - total_compressed_size = other225.total_compressed_size; - key_value_metadata = other225.key_value_metadata; - data_page_offset = other225.data_page_offset; - index_page_offset = other225.index_page_offset; - dictionary_page_offset = other225.dictionary_page_offset; - statistics = other225.statistics; - encoding_stats = other225.encoding_stats; - bloom_filter_offset = other225.bloom_filter_offset; - bloom_filter_length = other225.bloom_filter_length; - size_statistics = other225.size_statistics; - __isset = other225.__isset; +ColumnMetaData::ColumnMetaData(const ColumnMetaData& other228) { + type = other228.type; + encodings = other228.encodings; + path_in_schema = other228.path_in_schema; + codec = other228.codec; + num_values = other228.num_values; + total_uncompressed_size = other228.total_uncompressed_size; + total_compressed_size = other228.total_compressed_size; + key_value_metadata = other228.key_value_metadata; + data_page_offset = other228.data_page_offset; + index_page_offset = other228.index_page_offset; + dictionary_page_offset = other228.dictionary_page_offset; + statistics = other228.statistics; + encoding_stats = other228.encoding_stats; + bloom_filter_offset = other228.bloom_filter_offset; + bloom_filter_length = other228.bloom_filter_length; + size_statistics = other228.size_statistics; + __isset = other228.__isset; +} +ColumnMetaData::ColumnMetaData(ColumnMetaData&& other229) noexcept { + type = other229.type; + encodings = std::move(other229.encodings); + path_in_schema = std::move(other229.path_in_schema); + codec = other229.codec; + num_values = other229.num_values; + total_uncompressed_size = other229.total_uncompressed_size; + total_compressed_size = other229.total_compressed_size; + key_value_metadata = std::move(other229.key_value_metadata); + data_page_offset = other229.data_page_offset; + index_page_offset = other229.index_page_offset; + dictionary_page_offset = other229.dictionary_page_offset; + statistics = std::move(other229.statistics); + encoding_stats = std::move(other229.encoding_stats); + bloom_filter_offset = other229.bloom_filter_offset; + bloom_filter_length = other229.bloom_filter_length; + size_statistics = std::move(other229.size_statistics); + __isset = other229.__isset; +} +ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other230) { + type = other230.type; + encodings = other230.encodings; + path_in_schema = other230.path_in_schema; + codec = other230.codec; + num_values = other230.num_values; + total_uncompressed_size = other230.total_uncompressed_size; + total_compressed_size = other230.total_compressed_size; + key_value_metadata = other230.key_value_metadata; + data_page_offset = other230.data_page_offset; + index_page_offset = other230.index_page_offset; + dictionary_page_offset = other230.dictionary_page_offset; + statistics = other230.statistics; + encoding_stats = other230.encoding_stats; + bloom_filter_offset = other230.bloom_filter_offset; + bloom_filter_length = other230.bloom_filter_length; + size_statistics = other230.size_statistics; + __isset = other230.__isset; return *this; } -ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other226) noexcept { - type = other226.type; - encodings = std::move(other226.encodings); - path_in_schema = std::move(other226.path_in_schema); - codec = other226.codec; - num_values = other226.num_values; - total_uncompressed_size = other226.total_uncompressed_size; - total_compressed_size = other226.total_compressed_size; - key_value_metadata = std::move(other226.key_value_metadata); - data_page_offset = other226.data_page_offset; - index_page_offset = other226.index_page_offset; - dictionary_page_offset = other226.dictionary_page_offset; - statistics = std::move(other226.statistics); - encoding_stats = std::move(other226.encoding_stats); - bloom_filter_offset = other226.bloom_filter_offset; - bloom_filter_length = other226.bloom_filter_length; - size_statistics = std::move(other226.size_statistics); - __isset = other226.__isset; +ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other231) noexcept { + type = other231.type; + encodings = std::move(other231.encodings); + path_in_schema = std::move(other231.path_in_schema); + codec = other231.codec; + num_values = other231.num_values; + total_uncompressed_size = other231.total_uncompressed_size; + total_compressed_size = other231.total_compressed_size; + key_value_metadata = std::move(other231.key_value_metadata); + data_page_offset = other231.data_page_offset; + index_page_offset = other231.index_page_offset; + dictionary_page_offset = other231.dictionary_page_offset; + statistics = std::move(other231.statistics); + encoding_stats = std::move(other231.encoding_stats); + bloom_filter_offset = other231.bloom_filter_offset; + bloom_filter_length = other231.bloom_filter_length; + size_statistics = std::move(other231.size_statistics); + __isset = other231.__isset; return *this; } void ColumnMetaData::printTo(std::ostream& out) const { @@ -3473,18 +3484,18 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { (void) b; } -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other227) noexcept { - (void) other227; +EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other232) noexcept { + (void) other232; } -EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other228) noexcept { - (void) other228; +EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other233) noexcept { + (void) other233; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other229) noexcept { - (void) other229; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other234) noexcept { + (void) other234; return *this; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other230) noexcept { - (void) other230; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other235) noexcept { + (void) other235; return *this; } void EncryptionWithFooterKey::printTo(std::ostream& out) const { @@ -3520,26 +3531,26 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { swap(a.__isset, b.__isset); } -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other237) { - path_in_schema = other237.path_in_schema; - key_metadata = other237.key_metadata; - __isset = other237.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other242) { + path_in_schema = other242.path_in_schema; + key_metadata = other242.key_metadata; + __isset = other242.__isset; } -EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other238) noexcept { - path_in_schema = std::move(other238.path_in_schema); - key_metadata = std::move(other238.key_metadata); - __isset = other238.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other243) noexcept { + path_in_schema = std::move(other243.path_in_schema); + key_metadata = std::move(other243.key_metadata); + __isset = other243.__isset; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other239) { - path_in_schema = other239.path_in_schema; - key_metadata = other239.key_metadata; - __isset = other239.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other244) { + path_in_schema = other244.path_in_schema; + key_metadata = other244.key_metadata; + __isset = other244.__isset; return *this; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other240) noexcept { - path_in_schema = std::move(other240.path_in_schema); - key_metadata = std::move(other240.key_metadata); - __isset = other240.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other245) noexcept { + path_in_schema = std::move(other245.path_in_schema); + key_metadata = std::move(other245.key_metadata); + __isset = other245.__isset; return *this; } void EncryptionWithColumnKey::printTo(std::ostream& out) const { @@ -3578,26 +3589,26 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { swap(a.__isset, b.__isset); } -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other241) { - ENCRYPTION_WITH_FOOTER_KEY = other241.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other241.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other241.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other246) { + ENCRYPTION_WITH_FOOTER_KEY = other246.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other246.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other246.__isset; } -ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other242) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other242.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other242.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other242.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other247) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other247.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other247.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other247.__isset; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other243) { - ENCRYPTION_WITH_FOOTER_KEY = other243.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other243.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other243.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other248) { + ENCRYPTION_WITH_FOOTER_KEY = other248.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other248.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other248.__isset; return *this; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other244) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other244.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other244.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other244.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other249) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other249.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other249.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other249.__isset; return *this; } void ColumnCryptoMetaData::printTo(std::ostream& out) const { @@ -3677,54 +3688,54 @@ void swap(ColumnChunk &a, ColumnChunk &b) { swap(a.__isset, b.__isset); } -ColumnChunk::ColumnChunk(const ColumnChunk& other245) { - file_path = other245.file_path; - file_offset = other245.file_offset; - meta_data = other245.meta_data; - offset_index_offset = other245.offset_index_offset; - offset_index_length = other245.offset_index_length; - column_index_offset = other245.column_index_offset; - column_index_length = other245.column_index_length; - crypto_metadata = other245.crypto_metadata; - encrypted_column_metadata = other245.encrypted_column_metadata; - __isset = other245.__isset; -} -ColumnChunk::ColumnChunk(ColumnChunk&& other246) noexcept { - file_path = std::move(other246.file_path); - file_offset = other246.file_offset; - meta_data = std::move(other246.meta_data); - offset_index_offset = other246.offset_index_offset; - offset_index_length = other246.offset_index_length; - column_index_offset = other246.column_index_offset; - column_index_length = other246.column_index_length; - crypto_metadata = std::move(other246.crypto_metadata); - encrypted_column_metadata = std::move(other246.encrypted_column_metadata); - __isset = other246.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other247) { - file_path = other247.file_path; - file_offset = other247.file_offset; - meta_data = other247.meta_data; - offset_index_offset = other247.offset_index_offset; - offset_index_length = other247.offset_index_length; - column_index_offset = other247.column_index_offset; - column_index_length = other247.column_index_length; - crypto_metadata = other247.crypto_metadata; - encrypted_column_metadata = other247.encrypted_column_metadata; - __isset = other247.__isset; +ColumnChunk::ColumnChunk(const ColumnChunk& other250) { + file_path = other250.file_path; + file_offset = other250.file_offset; + meta_data = other250.meta_data; + offset_index_offset = other250.offset_index_offset; + offset_index_length = other250.offset_index_length; + column_index_offset = other250.column_index_offset; + column_index_length = other250.column_index_length; + crypto_metadata = other250.crypto_metadata; + encrypted_column_metadata = other250.encrypted_column_metadata; + __isset = other250.__isset; +} +ColumnChunk::ColumnChunk(ColumnChunk&& other251) noexcept { + file_path = std::move(other251.file_path); + file_offset = other251.file_offset; + meta_data = std::move(other251.meta_data); + offset_index_offset = other251.offset_index_offset; + offset_index_length = other251.offset_index_length; + column_index_offset = other251.column_index_offset; + column_index_length = other251.column_index_length; + crypto_metadata = std::move(other251.crypto_metadata); + encrypted_column_metadata = std::move(other251.encrypted_column_metadata); + __isset = other251.__isset; +} +ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other252) { + file_path = other252.file_path; + file_offset = other252.file_offset; + meta_data = other252.meta_data; + offset_index_offset = other252.offset_index_offset; + offset_index_length = other252.offset_index_length; + column_index_offset = other252.column_index_offset; + column_index_length = other252.column_index_length; + crypto_metadata = other252.crypto_metadata; + encrypted_column_metadata = other252.encrypted_column_metadata; + __isset = other252.__isset; return *this; } -ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other248) noexcept { - file_path = std::move(other248.file_path); - file_offset = other248.file_offset; - meta_data = std::move(other248.meta_data); - offset_index_offset = other248.offset_index_offset; - offset_index_length = other248.offset_index_length; - column_index_offset = other248.column_index_offset; - column_index_length = other248.column_index_length; - crypto_metadata = std::move(other248.crypto_metadata); - encrypted_column_metadata = std::move(other248.encrypted_column_metadata); - __isset = other248.__isset; +ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other253) noexcept { + file_path = std::move(other253.file_path); + file_offset = other253.file_offset; + meta_data = std::move(other253.meta_data); + offset_index_offset = other253.offset_index_offset; + offset_index_length = other253.offset_index_length; + column_index_offset = other253.column_index_offset; + column_index_length = other253.column_index_length; + crypto_metadata = std::move(other253.crypto_metadata); + encrypted_column_metadata = std::move(other253.encrypted_column_metadata); + __isset = other253.__isset; return *this; } void ColumnChunk::printTo(std::ostream& out) const { @@ -3797,46 +3808,46 @@ void swap(RowGroup &a, RowGroup &b) { swap(a.__isset, b.__isset); } -RowGroup::RowGroup(const RowGroup& other261) { - columns = other261.columns; - total_byte_size = other261.total_byte_size; - num_rows = other261.num_rows; - sorting_columns = other261.sorting_columns; - file_offset = other261.file_offset; - total_compressed_size = other261.total_compressed_size; - ordinal = other261.ordinal; - __isset = other261.__isset; -} -RowGroup::RowGroup(RowGroup&& other262) noexcept { - columns = std::move(other262.columns); - total_byte_size = other262.total_byte_size; - num_rows = other262.num_rows; - sorting_columns = std::move(other262.sorting_columns); - file_offset = other262.file_offset; - total_compressed_size = other262.total_compressed_size; - ordinal = other262.ordinal; - __isset = other262.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other263) { - columns = other263.columns; - total_byte_size = other263.total_byte_size; - num_rows = other263.num_rows; - sorting_columns = other263.sorting_columns; - file_offset = other263.file_offset; - total_compressed_size = other263.total_compressed_size; - ordinal = other263.ordinal; - __isset = other263.__isset; +RowGroup::RowGroup(const RowGroup& other266) { + columns = other266.columns; + total_byte_size = other266.total_byte_size; + num_rows = other266.num_rows; + sorting_columns = other266.sorting_columns; + file_offset = other266.file_offset; + total_compressed_size = other266.total_compressed_size; + ordinal = other266.ordinal; + __isset = other266.__isset; +} +RowGroup::RowGroup(RowGroup&& other267) noexcept { + columns = std::move(other267.columns); + total_byte_size = other267.total_byte_size; + num_rows = other267.num_rows; + sorting_columns = std::move(other267.sorting_columns); + file_offset = other267.file_offset; + total_compressed_size = other267.total_compressed_size; + ordinal = other267.ordinal; + __isset = other267.__isset; +} +RowGroup& RowGroup::operator=(const RowGroup& other268) { + columns = other268.columns; + total_byte_size = other268.total_byte_size; + num_rows = other268.num_rows; + sorting_columns = other268.sorting_columns; + file_offset = other268.file_offset; + total_compressed_size = other268.total_compressed_size; + ordinal = other268.ordinal; + __isset = other268.__isset; return *this; } -RowGroup& RowGroup::operator=(RowGroup&& other264) noexcept { - columns = std::move(other264.columns); - total_byte_size = other264.total_byte_size; - num_rows = other264.num_rows; - sorting_columns = std::move(other264.sorting_columns); - file_offset = other264.file_offset; - total_compressed_size = other264.total_compressed_size; - ordinal = other264.ordinal; - __isset = other264.__isset; +RowGroup& RowGroup::operator=(RowGroup&& other269) noexcept { + columns = std::move(other269.columns); + total_byte_size = other269.total_byte_size; + num_rows = other269.num_rows; + sorting_columns = std::move(other269.sorting_columns); + file_offset = other269.file_offset; + total_compressed_size = other269.total_compressed_size; + ordinal = other269.ordinal; + __isset = other269.__isset; return *this; } void RowGroup::printTo(std::ostream& out) const { @@ -3869,18 +3880,18 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { (void) b; } -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other265) noexcept { - (void) other265; +TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other270) noexcept { + (void) other270; } -TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other266) noexcept { - (void) other266; +TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other271) noexcept { + (void) other271; } -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other267) noexcept { - (void) other267; +TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other272) noexcept { + (void) other272; return *this; } -TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other268) noexcept { - (void) other268; +TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other273) noexcept { + (void) other273; return *this; } void TypeDefinedOrder::printTo(std::ostream& out) const { @@ -3911,22 +3922,22 @@ void swap(ColumnOrder &a, ColumnOrder &b) { swap(a.__isset, b.__isset); } -ColumnOrder::ColumnOrder(const ColumnOrder& other269) noexcept { - TYPE_ORDER = other269.TYPE_ORDER; - __isset = other269.__isset; +ColumnOrder::ColumnOrder(const ColumnOrder& other274) noexcept { + TYPE_ORDER = other274.TYPE_ORDER; + __isset = other274.__isset; } -ColumnOrder::ColumnOrder(ColumnOrder&& other270) noexcept { - TYPE_ORDER = std::move(other270.TYPE_ORDER); - __isset = other270.__isset; +ColumnOrder::ColumnOrder(ColumnOrder&& other275) noexcept { + TYPE_ORDER = std::move(other275.TYPE_ORDER); + __isset = other275.__isset; } -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other271) noexcept { - TYPE_ORDER = other271.TYPE_ORDER; - __isset = other271.__isset; +ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other276) noexcept { + TYPE_ORDER = other276.TYPE_ORDER; + __isset = other276.__isset; return *this; } -ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other272) noexcept { - TYPE_ORDER = std::move(other272.TYPE_ORDER); - __isset = other272.__isset; +ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other277) noexcept { + TYPE_ORDER = std::move(other277.TYPE_ORDER); + __isset = other277.__isset; return *this; } void ColumnOrder::printTo(std::ostream& out) const { @@ -3966,26 +3977,26 @@ void swap(PageLocation &a, PageLocation &b) { swap(a.first_row_index, b.first_row_index); } -PageLocation::PageLocation(const PageLocation& other273) noexcept { - offset = other273.offset; - compressed_page_size = other273.compressed_page_size; - first_row_index = other273.first_row_index; +PageLocation::PageLocation(const PageLocation& other278) noexcept { + offset = other278.offset; + compressed_page_size = other278.compressed_page_size; + first_row_index = other278.first_row_index; } -PageLocation::PageLocation(PageLocation&& other274) noexcept { - offset = other274.offset; - compressed_page_size = other274.compressed_page_size; - first_row_index = other274.first_row_index; +PageLocation::PageLocation(PageLocation&& other279) noexcept { + offset = other279.offset; + compressed_page_size = other279.compressed_page_size; + first_row_index = other279.first_row_index; } -PageLocation& PageLocation::operator=(const PageLocation& other275) noexcept { - offset = other275.offset; - compressed_page_size = other275.compressed_page_size; - first_row_index = other275.first_row_index; +PageLocation& PageLocation::operator=(const PageLocation& other280) noexcept { + offset = other280.offset; + compressed_page_size = other280.compressed_page_size; + first_row_index = other280.first_row_index; return *this; } -PageLocation& PageLocation::operator=(PageLocation&& other276) noexcept { - offset = other276.offset; - compressed_page_size = other276.compressed_page_size; - first_row_index = other276.first_row_index; +PageLocation& PageLocation::operator=(PageLocation&& other281) noexcept { + offset = other281.offset; + compressed_page_size = other281.compressed_page_size; + first_row_index = other281.first_row_index; return *this; } void PageLocation::printTo(std::ostream& out) const { @@ -4024,26 +4035,26 @@ void swap(OffsetIndex &a, OffsetIndex &b) { swap(a.__isset, b.__isset); } -OffsetIndex::OffsetIndex(const OffsetIndex& other289) { - page_locations = other289.page_locations; - unencoded_byte_array_data_bytes = other289.unencoded_byte_array_data_bytes; - __isset = other289.__isset; +OffsetIndex::OffsetIndex(const OffsetIndex& other294) { + page_locations = other294.page_locations; + unencoded_byte_array_data_bytes = other294.unencoded_byte_array_data_bytes; + __isset = other294.__isset; } -OffsetIndex::OffsetIndex(OffsetIndex&& other290) noexcept { - page_locations = std::move(other290.page_locations); - unencoded_byte_array_data_bytes = std::move(other290.unencoded_byte_array_data_bytes); - __isset = other290.__isset; +OffsetIndex::OffsetIndex(OffsetIndex&& other295) noexcept { + page_locations = std::move(other295.page_locations); + unencoded_byte_array_data_bytes = std::move(other295.unencoded_byte_array_data_bytes); + __isset = other295.__isset; } -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other291) { - page_locations = other291.page_locations; - unencoded_byte_array_data_bytes = other291.unencoded_byte_array_data_bytes; - __isset = other291.__isset; +OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other296) { + page_locations = other296.page_locations; + unencoded_byte_array_data_bytes = other296.unencoded_byte_array_data_bytes; + __isset = other296.__isset; return *this; } -OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other292) noexcept { - page_locations = std::move(other292.page_locations); - unencoded_byte_array_data_bytes = std::move(other292.unencoded_byte_array_data_bytes); - __isset = other292.__isset; +OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other297) noexcept { + page_locations = std::move(other297.page_locations); + unencoded_byte_array_data_bytes = std::move(other297.unencoded_byte_array_data_bytes); + __isset = other297.__isset; return *this; } void OffsetIndex::printTo(std::ostream& out) const { @@ -4114,50 +4125,50 @@ void swap(ColumnIndex &a, ColumnIndex &b) { swap(a.__isset, b.__isset); } -ColumnIndex::ColumnIndex(const ColumnIndex& other336) { - null_pages = other336.null_pages; - min_values = other336.min_values; - max_values = other336.max_values; - boundary_order = other336.boundary_order; - null_counts = other336.null_counts; - repetition_level_histograms = other336.repetition_level_histograms; - definition_level_histograms = other336.definition_level_histograms; - geometry_stats = other336.geometry_stats; - __isset = other336.__isset; -} -ColumnIndex::ColumnIndex(ColumnIndex&& other337) noexcept { - null_pages = std::move(other337.null_pages); - min_values = std::move(other337.min_values); - max_values = std::move(other337.max_values); - boundary_order = other337.boundary_order; - null_counts = std::move(other337.null_counts); - repetition_level_histograms = std::move(other337.repetition_level_histograms); - definition_level_histograms = std::move(other337.definition_level_histograms); - geometry_stats = std::move(other337.geometry_stats); - __isset = other337.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other338) { - null_pages = other338.null_pages; - min_values = other338.min_values; - max_values = other338.max_values; - boundary_order = other338.boundary_order; - null_counts = other338.null_counts; - repetition_level_histograms = other338.repetition_level_histograms; - definition_level_histograms = other338.definition_level_histograms; - geometry_stats = other338.geometry_stats; - __isset = other338.__isset; +ColumnIndex::ColumnIndex(const ColumnIndex& other341) { + null_pages = other341.null_pages; + min_values = other341.min_values; + max_values = other341.max_values; + boundary_order = other341.boundary_order; + null_counts = other341.null_counts; + repetition_level_histograms = other341.repetition_level_histograms; + definition_level_histograms = other341.definition_level_histograms; + geometry_stats = other341.geometry_stats; + __isset = other341.__isset; +} +ColumnIndex::ColumnIndex(ColumnIndex&& other342) noexcept { + null_pages = std::move(other342.null_pages); + min_values = std::move(other342.min_values); + max_values = std::move(other342.max_values); + boundary_order = other342.boundary_order; + null_counts = std::move(other342.null_counts); + repetition_level_histograms = std::move(other342.repetition_level_histograms); + definition_level_histograms = std::move(other342.definition_level_histograms); + geometry_stats = std::move(other342.geometry_stats); + __isset = other342.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other343) { + null_pages = other343.null_pages; + min_values = other343.min_values; + max_values = other343.max_values; + boundary_order = other343.boundary_order; + null_counts = other343.null_counts; + repetition_level_histograms = other343.repetition_level_histograms; + definition_level_histograms = other343.definition_level_histograms; + geometry_stats = other343.geometry_stats; + __isset = other343.__isset; return *this; } -ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other339) noexcept { - null_pages = std::move(other339.null_pages); - min_values = std::move(other339.min_values); - max_values = std::move(other339.max_values); - boundary_order = other339.boundary_order; - null_counts = std::move(other339.null_counts); - repetition_level_histograms = std::move(other339.repetition_level_histograms); - definition_level_histograms = std::move(other339.definition_level_histograms); - geometry_stats = std::move(other339.geometry_stats); - __isset = other339.__isset; +ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other344) noexcept { + null_pages = std::move(other344.null_pages); + min_values = std::move(other344.min_values); + max_values = std::move(other344.max_values); + boundary_order = other344.boundary_order; + null_counts = std::move(other344.null_counts); + repetition_level_histograms = std::move(other344.repetition_level_histograms); + definition_level_histograms = std::move(other344.definition_level_histograms); + geometry_stats = std::move(other344.geometry_stats); + __isset = other344.__isset; return *this; } void ColumnIndex::printTo(std::ostream& out) const { @@ -4208,30 +4219,30 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) { swap(a.__isset, b.__isset); } -AesGcmV1::AesGcmV1(const AesGcmV1& other340) { - aad_prefix = other340.aad_prefix; - aad_file_unique = other340.aad_file_unique; - supply_aad_prefix = other340.supply_aad_prefix; - __isset = other340.__isset; +AesGcmV1::AesGcmV1(const AesGcmV1& other345) { + aad_prefix = other345.aad_prefix; + aad_file_unique = other345.aad_file_unique; + supply_aad_prefix = other345.supply_aad_prefix; + __isset = other345.__isset; } -AesGcmV1::AesGcmV1(AesGcmV1&& other341) noexcept { - aad_prefix = std::move(other341.aad_prefix); - aad_file_unique = std::move(other341.aad_file_unique); - supply_aad_prefix = other341.supply_aad_prefix; - __isset = other341.__isset; +AesGcmV1::AesGcmV1(AesGcmV1&& other346) noexcept { + aad_prefix = std::move(other346.aad_prefix); + aad_file_unique = std::move(other346.aad_file_unique); + supply_aad_prefix = other346.supply_aad_prefix; + __isset = other346.__isset; } -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other342) { - aad_prefix = other342.aad_prefix; - aad_file_unique = other342.aad_file_unique; - supply_aad_prefix = other342.supply_aad_prefix; - __isset = other342.__isset; +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other347) { + aad_prefix = other347.aad_prefix; + aad_file_unique = other347.aad_file_unique; + supply_aad_prefix = other347.supply_aad_prefix; + __isset = other347.__isset; return *this; } -AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other343) noexcept { - aad_prefix = std::move(other343.aad_prefix); - aad_file_unique = std::move(other343.aad_file_unique); - supply_aad_prefix = other343.supply_aad_prefix; - __isset = other343.__isset; +AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other348) noexcept { + aad_prefix = std::move(other348.aad_prefix); + aad_file_unique = std::move(other348.aad_file_unique); + supply_aad_prefix = other348.supply_aad_prefix; + __isset = other348.__isset; return *this; } void AesGcmV1::printTo(std::ostream& out) const { @@ -4277,30 +4288,30 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { swap(a.__isset, b.__isset); } -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other344) { - aad_prefix = other344.aad_prefix; - aad_file_unique = other344.aad_file_unique; - supply_aad_prefix = other344.supply_aad_prefix; - __isset = other344.__isset; +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other349) { + aad_prefix = other349.aad_prefix; + aad_file_unique = other349.aad_file_unique; + supply_aad_prefix = other349.supply_aad_prefix; + __isset = other349.__isset; } -AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other345) noexcept { - aad_prefix = std::move(other345.aad_prefix); - aad_file_unique = std::move(other345.aad_file_unique); - supply_aad_prefix = other345.supply_aad_prefix; - __isset = other345.__isset; +AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other350) noexcept { + aad_prefix = std::move(other350.aad_prefix); + aad_file_unique = std::move(other350.aad_file_unique); + supply_aad_prefix = other350.supply_aad_prefix; + __isset = other350.__isset; } -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other346) { - aad_prefix = other346.aad_prefix; - aad_file_unique = other346.aad_file_unique; - supply_aad_prefix = other346.supply_aad_prefix; - __isset = other346.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other351) { + aad_prefix = other351.aad_prefix; + aad_file_unique = other351.aad_file_unique; + supply_aad_prefix = other351.supply_aad_prefix; + __isset = other351.__isset; return *this; } -AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other347) noexcept { - aad_prefix = std::move(other347.aad_prefix); - aad_file_unique = std::move(other347.aad_file_unique); - supply_aad_prefix = other347.supply_aad_prefix; - __isset = other347.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other352) noexcept { + aad_prefix = std::move(other352.aad_prefix); + aad_file_unique = std::move(other352.aad_file_unique); + supply_aad_prefix = other352.supply_aad_prefix; + __isset = other352.__isset; return *this; } void AesGcmCtrV1::printTo(std::ostream& out) const { @@ -4340,26 +4351,26 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { swap(a.__isset, b.__isset); } -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other348) { - AES_GCM_V1 = other348.AES_GCM_V1; - AES_GCM_CTR_V1 = other348.AES_GCM_CTR_V1; - __isset = other348.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other353) { + AES_GCM_V1 = other353.AES_GCM_V1; + AES_GCM_CTR_V1 = other353.AES_GCM_CTR_V1; + __isset = other353.__isset; } -EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other349) noexcept { - AES_GCM_V1 = std::move(other349.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other349.AES_GCM_CTR_V1); - __isset = other349.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other354) noexcept { + AES_GCM_V1 = std::move(other354.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other354.AES_GCM_CTR_V1); + __isset = other354.__isset; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other350) { - AES_GCM_V1 = other350.AES_GCM_V1; - AES_GCM_CTR_V1 = other350.AES_GCM_CTR_V1; - __isset = other350.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other355) { + AES_GCM_V1 = other355.AES_GCM_V1; + AES_GCM_CTR_V1 = other355.AES_GCM_CTR_V1; + __isset = other355.__isset; return *this; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other351) noexcept { - AES_GCM_V1 = std::move(other351.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other351.AES_GCM_CTR_V1); - __isset = other351.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other356) noexcept { + AES_GCM_V1 = std::move(other356.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other356.AES_GCM_CTR_V1); + __isset = other356.__isset; return *this; } void EncryptionAlgorithm::printTo(std::ostream& out) const { @@ -4436,54 +4447,54 @@ void swap(FileMetaData &a, FileMetaData &b) { swap(a.__isset, b.__isset); } -FileMetaData::FileMetaData(const FileMetaData& other376) { - version = other376.version; - schema = other376.schema; - num_rows = other376.num_rows; - row_groups = other376.row_groups; - key_value_metadata = other376.key_value_metadata; - created_by = other376.created_by; - column_orders = other376.column_orders; - encryption_algorithm = other376.encryption_algorithm; - footer_signing_key_metadata = other376.footer_signing_key_metadata; - __isset = other376.__isset; -} -FileMetaData::FileMetaData(FileMetaData&& other377) noexcept { - version = other377.version; - schema = std::move(other377.schema); - num_rows = other377.num_rows; - row_groups = std::move(other377.row_groups); - key_value_metadata = std::move(other377.key_value_metadata); - created_by = std::move(other377.created_by); - column_orders = std::move(other377.column_orders); - encryption_algorithm = std::move(other377.encryption_algorithm); - footer_signing_key_metadata = std::move(other377.footer_signing_key_metadata); - __isset = other377.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other378) { - version = other378.version; - schema = other378.schema; - num_rows = other378.num_rows; - row_groups = other378.row_groups; - key_value_metadata = other378.key_value_metadata; - created_by = other378.created_by; - column_orders = other378.column_orders; - encryption_algorithm = other378.encryption_algorithm; - footer_signing_key_metadata = other378.footer_signing_key_metadata; - __isset = other378.__isset; +FileMetaData::FileMetaData(const FileMetaData& other381) { + version = other381.version; + schema = other381.schema; + num_rows = other381.num_rows; + row_groups = other381.row_groups; + key_value_metadata = other381.key_value_metadata; + created_by = other381.created_by; + column_orders = other381.column_orders; + encryption_algorithm = other381.encryption_algorithm; + footer_signing_key_metadata = other381.footer_signing_key_metadata; + __isset = other381.__isset; +} +FileMetaData::FileMetaData(FileMetaData&& other382) noexcept { + version = other382.version; + schema = std::move(other382.schema); + num_rows = other382.num_rows; + row_groups = std::move(other382.row_groups); + key_value_metadata = std::move(other382.key_value_metadata); + created_by = std::move(other382.created_by); + column_orders = std::move(other382.column_orders); + encryption_algorithm = std::move(other382.encryption_algorithm); + footer_signing_key_metadata = std::move(other382.footer_signing_key_metadata); + __isset = other382.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other383) { + version = other383.version; + schema = other383.schema; + num_rows = other383.num_rows; + row_groups = other383.row_groups; + key_value_metadata = other383.key_value_metadata; + created_by = other383.created_by; + column_orders = other383.column_orders; + encryption_algorithm = other383.encryption_algorithm; + footer_signing_key_metadata = other383.footer_signing_key_metadata; + __isset = other383.__isset; return *this; } -FileMetaData& FileMetaData::operator=(FileMetaData&& other379) noexcept { - version = other379.version; - schema = std::move(other379.schema); - num_rows = other379.num_rows; - row_groups = std::move(other379.row_groups); - key_value_metadata = std::move(other379.key_value_metadata); - created_by = std::move(other379.created_by); - column_orders = std::move(other379.column_orders); - encryption_algorithm = std::move(other379.encryption_algorithm); - footer_signing_key_metadata = std::move(other379.footer_signing_key_metadata); - __isset = other379.__isset; +FileMetaData& FileMetaData::operator=(FileMetaData&& other384) noexcept { + version = other384.version; + schema = std::move(other384.schema); + num_rows = other384.num_rows; + row_groups = std::move(other384.row_groups); + key_value_metadata = std::move(other384.key_value_metadata); + created_by = std::move(other384.created_by); + column_orders = std::move(other384.column_orders); + encryption_algorithm = std::move(other384.encryption_algorithm); + footer_signing_key_metadata = std::move(other384.footer_signing_key_metadata); + __isset = other384.__isset; return *this; } void FileMetaData::printTo(std::ostream& out) const { @@ -4528,26 +4539,26 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { swap(a.__isset, b.__isset); } -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other380) { - encryption_algorithm = other380.encryption_algorithm; - key_metadata = other380.key_metadata; - __isset = other380.__isset; +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other385) { + encryption_algorithm = other385.encryption_algorithm; + key_metadata = other385.key_metadata; + __isset = other385.__isset; } -FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other381) noexcept { - encryption_algorithm = std::move(other381.encryption_algorithm); - key_metadata = std::move(other381.key_metadata); - __isset = other381.__isset; +FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other386) noexcept { + encryption_algorithm = std::move(other386.encryption_algorithm); + key_metadata = std::move(other386.key_metadata); + __isset = other386.__isset; } -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other382) { - encryption_algorithm = other382.encryption_algorithm; - key_metadata = other382.key_metadata; - __isset = other382.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other387) { + encryption_algorithm = other387.encryption_algorithm; + key_metadata = other387.key_metadata; + __isset = other387.__isset; return *this; } -FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other383) noexcept { - encryption_algorithm = std::move(other383.encryption_algorithm); - key_metadata = std::move(other383.key_metadata); - __isset = other383.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other388) noexcept { + encryption_algorithm = std::move(other388.encryption_algorithm); + key_metadata = std::move(other388.key_metadata); + __isset = other388.__isset; return *this; } void FileCryptoMetaData::printTo(std::ostream& out) const { diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index c09a3a1d11e5f..0a857c4c6e8dd 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -617,8 +617,8 @@ std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj); /** * A custom WKB-encoded polygon or multi-polygon to represent a covering of - * geometries. For example, it may be a bounding box, or an evelope of geometries - * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if + * geometries. For example, it may be a bounding box or an envelope of geometries + * when a bounding box cannot be built (e.g., a geometry has spherical edges, or if * an edge of geographic coordinates crosses the antimeridian). In addition, it can * also be used to provide vendor-agnostic coverings like S2 or H3 grids. */ @@ -630,31 +630,32 @@ class Covering { Covering& operator=(const Covering&); Covering& operator=(Covering&&) noexcept; Covering() noexcept - : geometry(), - edges(static_cast(0)) { + : kind(), + value() { } virtual ~Covering() noexcept; /** - * Bytes of a WKB-encoded geometry + * A type of covering. Currently accepted values: "WKB". */ - std::string geometry; + std::string kind; /** - * Edges of the geometry, which is independent of edges from the logical type - * - * @see Edges + * A payload specific to kind: + * - WKB: well-known binary of a POLYGON that completely covers the contents. + * This will be interpreted according to the same CRS and edges defined by + * the logical type. */ - Edges::type edges; + std::string value; - void __set_geometry(const std::string& val); + void __set_kind(const std::string& val); - void __set_edges(const Edges::type val); + void __set_value(const std::string& val); bool operator == (const Covering & rhs) const { - if (!(geometry == rhs.geometry)) + if (!(kind == rhs.kind)) return false; - if (!(edges == rhs.edges)) + if (!(value == rhs.value)) return false; return true; } @@ -781,9 +782,9 @@ void swap(BoundingBox &a, BoundingBox &b); std::ostream& operator<<(std::ostream& out, const BoundingBox& obj); typedef struct _GeometryStatistics__isset { - _GeometryStatistics__isset() : bbox(false), covering(false), geometry_types(false) {} + _GeometryStatistics__isset() : bbox(false), coverings(false), geometry_types(false) {} bool bbox :1; - bool covering :1; + bool coverings :1; bool geometry_types :1; } _GeometryStatistics__isset; @@ -806,9 +807,9 @@ class GeometryStatistics { */ BoundingBox bbox; /** - * A covering polygon of geometries + * A list of coverings of geometries */ - Covering covering; + std::vector coverings; /** * The geometry types of all geometries, or an empty array if they are not * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] @@ -842,7 +843,7 @@ class GeometryStatistics { void __set_bbox(const BoundingBox& val); - void __set_covering(const Covering& val); + void __set_coverings(const std::vector & val); void __set_geometry_types(const std::vector & val); @@ -852,9 +853,9 @@ class GeometryStatistics { return false; else if (__isset.bbox && !(bbox == rhs.bbox)) return false; - if (__isset.covering != rhs.__isset.covering) + if (__isset.coverings != rhs.__isset.coverings) return false; - else if (__isset.covering && !(covering == rhs.covering)) + else if (__isset.coverings && !(coverings == rhs.coverings)) return false; if (__isset.geometry_types != rhs.__isset.geometry_types) return false; @@ -1801,8 +1802,9 @@ void swap(BsonType &a, BsonType &b); std::ostream& operator<<(std::ostream& out, const BsonType& obj); typedef struct _GeometryType__isset { - _GeometryType__isset() : crs(false), metadata(false) {} + _GeometryType__isset() : crs(false), crs_encoding(false), metadata(false) {} bool crs :1; + bool crs_encoding :1; bool metadata :1; } _GeometryType__isset; @@ -1820,6 +1822,7 @@ class GeometryType { : encoding(static_cast(0)), edges(static_cast(0)), crs(), + crs_encoding(), metadata() { } @@ -1839,9 +1842,14 @@ class GeometryType { Edges::type edges; /** * Coordinate Reference System, i.e. mapping of how coordinates refer to - * precise locations on earth, e.g. OGC:CRS84 + * precise locations on earth. */ std::string crs; + /** + * Encoding used in the above crs field. + * Currently the only allowed value is "PROJJSON". + */ + std::string crs_encoding; /** * Additional informative metadata. * It can be used by GeoParquet to offload some of the column metadata. @@ -1856,6 +1864,8 @@ class GeometryType { void __set_crs(const std::string& val); + void __set_crs_encoding(const std::string& val); + void __set_metadata(const std::string& val); bool operator == (const GeometryType & rhs) const @@ -1868,6 +1878,10 @@ class GeometryType { return false; else if (__isset.crs && !(crs == rhs.crs)) return false; + if (__isset.crs_encoding != rhs.__isset.crs_encoding) + return false; + else if (__isset.crs_encoding && !(crs_encoding == rhs.crs_encoding)) + return false; if (__isset.metadata != rhs.__isset.metadata) return false; else if (__isset.metadata && !(metadata == rhs.metadata)) @@ -4753,9 +4767,11 @@ class FileCryptoMetaData { void swap(FileCryptoMetaData &a, FileCryptoMetaData &b); std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); + } } // namespace + #include "parquet_types.tcc" #endif diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index 4436a5ecde8d6..f0e5b5ea37c6c 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -148,8 +148,8 @@ uint32_t Covering::read(Protocol_* iprot) { using ::apache::thrift::protocol::TProtocolException; - bool isset_geometry = false; - bool isset_edges = false; + bool isset_kind = false; + bool isset_value = false; while (true) { @@ -161,18 +161,16 @@ uint32_t Covering::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->geometry); - isset_geometry = true; + xfer += iprot->readString(this->kind); + isset_kind = true; } else { xfer += iprot->skip(ftype); } break; case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast16; - xfer += iprot->readI32(ecast16); - this->edges = static_cast(ecast16); - isset_edges = true; + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->value); + isset_value = true; } else { xfer += iprot->skip(ftype); } @@ -186,9 +184,9 @@ uint32_t Covering::read(Protocol_* iprot) { xfer += iprot->readStructEnd(); - if (!isset_geometry) + if (!isset_kind) throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_edges) + if (!isset_value) throw TProtocolException(TProtocolException::INVALID_DATA); return xfer; } @@ -199,12 +197,12 @@ uint32_t Covering::write(Protocol_* oprot) const { ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); xfer += oprot->writeStructBegin("Covering"); - xfer += oprot->writeFieldBegin("geometry", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->geometry); + xfer += oprot->writeFieldBegin("kind", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->kind); xfer += oprot->writeFieldEnd(); - xfer += oprot->writeFieldBegin("edges", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->edges)); + xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->value); xfer += oprot->writeFieldEnd(); xfer += oprot->writeFieldStop(); @@ -400,9 +398,21 @@ uint32_t GeometryStatistics::read(Protocol_* iprot) { } break; case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->covering.read(iprot); - this->__isset.covering = true; + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->coverings.clear(); + uint32_t _size24; + ::apache::thrift::protocol::TType _etype27; + xfer += iprot->readListBegin(_etype27, _size24); + this->coverings.resize(_size24); + uint32_t _i28; + for (_i28 = 0; _i28 < _size24; ++_i28) + { + xfer += this->coverings[_i28].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.coverings = true; } else { xfer += iprot->skip(ftype); } @@ -411,14 +421,14 @@ uint32_t GeometryStatistics::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->geometry_types.clear(); - uint32_t _size25; - ::apache::thrift::protocol::TType _etype28; - xfer += iprot->readListBegin(_etype28, _size25); - this->geometry_types.resize(_size25); - uint32_t _i29; - for (_i29 = 0; _i29 < _size25; ++_i29) + uint32_t _size29; + ::apache::thrift::protocol::TType _etype32; + xfer += iprot->readListBegin(_etype32, _size29); + this->geometry_types.resize(_size29); + uint32_t _i33; + for (_i33 = 0; _i33 < _size29; ++_i33) { - xfer += iprot->readI32(this->geometry_types[_i29]); + xfer += iprot->readI32(this->geometry_types[_i33]); } xfer += iprot->readListEnd(); } @@ -450,19 +460,27 @@ uint32_t GeometryStatistics::write(Protocol_* oprot) const { xfer += this->bbox.write(oprot); xfer += oprot->writeFieldEnd(); } - if (this->__isset.covering) { - xfer += oprot->writeFieldBegin("covering", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->covering.write(oprot); + if (this->__isset.coverings) { + xfer += oprot->writeFieldBegin("coverings", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->coverings.size())); + std::vector ::const_iterator _iter34; + for (_iter34 = this->coverings.begin(); _iter34 != this->coverings.end(); ++_iter34) + { + xfer += (*_iter34).write(oprot); + } + xfer += oprot->writeListEnd(); + } xfer += oprot->writeFieldEnd(); } if (this->__isset.geometry_types) { xfer += oprot->writeFieldBegin("geometry_types", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->geometry_types.size())); - std::vector ::const_iterator _iter30; - for (_iter30 = this->geometry_types.begin(); _iter30 != this->geometry_types.end(); ++_iter30) + std::vector ::const_iterator _iter35; + for (_iter35 = this->geometry_types.begin(); _iter35 != this->geometry_types.end(); ++_iter35) { - xfer += oprot->writeI32((*_iter30)); + xfer += oprot->writeI32((*_iter35)); } xfer += oprot->writeListEnd(); } @@ -1565,9 +1583,9 @@ uint32_t GeometryType::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast111; - xfer += iprot->readI32(ecast111); - this->encoding = static_cast(ecast111); + int32_t ecast116; + xfer += iprot->readI32(ecast116); + this->encoding = static_cast(ecast116); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1575,9 +1593,9 @@ uint32_t GeometryType::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast112; - xfer += iprot->readI32(ecast112); - this->edges = static_cast(ecast112); + int32_t ecast117; + xfer += iprot->readI32(ecast117); + this->edges = static_cast(ecast117); isset_edges = true; } else { xfer += iprot->skip(ftype); @@ -1592,6 +1610,14 @@ uint32_t GeometryType::read(Protocol_* iprot) { } break; case 4: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->crs_encoding); + this->__isset.crs_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: if (ftype == ::apache::thrift::protocol::T_STRING) { xfer += iprot->readBinary(this->metadata); this->__isset.metadata = true; @@ -1634,8 +1660,13 @@ uint32_t GeometryType::write(Protocol_* oprot) const { xfer += oprot->writeString(this->crs); xfer += oprot->writeFieldEnd(); } + if (this->__isset.crs_encoding) { + xfer += oprot->writeFieldBegin("crs_encoding", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeString(this->crs_encoding); + xfer += oprot->writeFieldEnd(); + } if (this->__isset.metadata) { - xfer += oprot->writeFieldBegin("metadata", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeFieldBegin("metadata", ::apache::thrift::protocol::T_STRING, 5); xfer += oprot->writeBinary(this->metadata); xfer += oprot->writeFieldEnd(); } @@ -1909,9 +1940,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast121; - xfer += iprot->readI32(ecast121); - this->type = static_cast(ecast121); + int32_t ecast126; + xfer += iprot->readI32(ecast126); + this->type = static_cast(ecast126); this->__isset.type = true; } else { xfer += iprot->skip(ftype); @@ -1927,9 +1958,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast122; - xfer += iprot->readI32(ecast122); - this->repetition_type = static_cast(ecast122); + int32_t ecast127; + xfer += iprot->readI32(ecast127); + this->repetition_type = static_cast(ecast127); this->__isset.repetition_type = true; } else { xfer += iprot->skip(ftype); @@ -1953,9 +1984,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 6: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast123; - xfer += iprot->readI32(ecast123); - this->converted_type = static_cast(ecast123); + int32_t ecast128; + xfer += iprot->readI32(ecast128); + this->converted_type = static_cast(ecast128); this->__isset.converted_type = true; } else { xfer += iprot->skip(ftype); @@ -2103,9 +2134,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast128; - xfer += iprot->readI32(ecast128); - this->encoding = static_cast(ecast128); + int32_t ecast133; + xfer += iprot->readI32(ecast133); + this->encoding = static_cast(ecast133); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2113,9 +2144,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast129; - xfer += iprot->readI32(ecast129); - this->definition_level_encoding = static_cast(ecast129); + int32_t ecast134; + xfer += iprot->readI32(ecast134); + this->definition_level_encoding = static_cast(ecast134); isset_definition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2123,9 +2154,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast130; - xfer += iprot->readI32(ecast130); - this->repetition_level_encoding = static_cast(ecast130); + int32_t ecast135; + xfer += iprot->readI32(ecast135); + this->repetition_level_encoding = static_cast(ecast135); isset_repetition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2265,9 +2296,9 @@ uint32_t DictionaryPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast139; - xfer += iprot->readI32(ecast139); - this->encoding = static_cast(ecast139); + int32_t ecast144; + xfer += iprot->readI32(ecast144); + this->encoding = static_cast(ecast144); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2375,9 +2406,9 @@ uint32_t DataPageHeaderV2::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast144; - xfer += iprot->readI32(ecast144); - this->encoding = static_cast(ecast144); + int32_t ecast149; + xfer += iprot->readI32(ecast149); + this->encoding = static_cast(ecast149); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2910,9 +2941,9 @@ uint32_t PageHeader::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast177; - xfer += iprot->readI32(ecast177); - this->type = static_cast(ecast177); + int32_t ecast182; + xfer += iprot->readI32(ecast182); + this->type = static_cast(ecast182); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -3230,9 +3261,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast190; - xfer += iprot->readI32(ecast190); - this->page_type = static_cast(ecast190); + int32_t ecast195; + xfer += iprot->readI32(ecast195); + this->page_type = static_cast(ecast195); isset_page_type = true; } else { xfer += iprot->skip(ftype); @@ -3240,9 +3271,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast191; - xfer += iprot->readI32(ecast191); - this->encoding = static_cast(ecast191); + int32_t ecast196; + xfer += iprot->readI32(ecast196); + this->encoding = static_cast(ecast196); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -3329,9 +3360,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast196; - xfer += iprot->readI32(ecast196); - this->type = static_cast(ecast196); + int32_t ecast201; + xfer += iprot->readI32(ecast201); + this->type = static_cast(ecast201); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -3341,16 +3372,16 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encodings.clear(); - uint32_t _size197; - ::apache::thrift::protocol::TType _etype200; - xfer += iprot->readListBegin(_etype200, _size197); - this->encodings.resize(_size197); - uint32_t _i201; - for (_i201 = 0; _i201 < _size197; ++_i201) + uint32_t _size202; + ::apache::thrift::protocol::TType _etype205; + xfer += iprot->readListBegin(_etype205, _size202); + this->encodings.resize(_size202); + uint32_t _i206; + for (_i206 = 0; _i206 < _size202; ++_i206) { - int32_t ecast202; - xfer += iprot->readI32(ecast202); - this->encodings[_i201] = static_cast(ecast202); + int32_t ecast207; + xfer += iprot->readI32(ecast207); + this->encodings[_i206] = static_cast(ecast207); } xfer += iprot->readListEnd(); } @@ -3363,14 +3394,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size203; - ::apache::thrift::protocol::TType _etype206; - xfer += iprot->readListBegin(_etype206, _size203); - this->path_in_schema.resize(_size203); - uint32_t _i207; - for (_i207 = 0; _i207 < _size203; ++_i207) + uint32_t _size208; + ::apache::thrift::protocol::TType _etype211; + xfer += iprot->readListBegin(_etype211, _size208); + this->path_in_schema.resize(_size208); + uint32_t _i212; + for (_i212 = 0; _i212 < _size208; ++_i212) { - xfer += iprot->readString(this->path_in_schema[_i207]); + xfer += iprot->readString(this->path_in_schema[_i212]); } xfer += iprot->readListEnd(); } @@ -3381,9 +3412,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast208; - xfer += iprot->readI32(ecast208); - this->codec = static_cast(ecast208); + int32_t ecast213; + xfer += iprot->readI32(ecast213); + this->codec = static_cast(ecast213); isset_codec = true; } else { xfer += iprot->skip(ftype); @@ -3417,14 +3448,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size209; - ::apache::thrift::protocol::TType _etype212; - xfer += iprot->readListBegin(_etype212, _size209); - this->key_value_metadata.resize(_size209); - uint32_t _i213; - for (_i213 = 0; _i213 < _size209; ++_i213) + uint32_t _size214; + ::apache::thrift::protocol::TType _etype217; + xfer += iprot->readListBegin(_etype217, _size214); + this->key_value_metadata.resize(_size214); + uint32_t _i218; + for (_i218 = 0; _i218 < _size214; ++_i218) { - xfer += this->key_value_metadata[_i213].read(iprot); + xfer += this->key_value_metadata[_i218].read(iprot); } xfer += iprot->readListEnd(); } @@ -3469,14 +3500,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encoding_stats.clear(); - uint32_t _size214; - ::apache::thrift::protocol::TType _etype217; - xfer += iprot->readListBegin(_etype217, _size214); - this->encoding_stats.resize(_size214); - uint32_t _i218; - for (_i218 = 0; _i218 < _size214; ++_i218) + uint32_t _size219; + ::apache::thrift::protocol::TType _etype222; + xfer += iprot->readListBegin(_etype222, _size219); + this->encoding_stats.resize(_size219); + uint32_t _i223; + for (_i223 = 0; _i223 < _size219; ++_i223) { - xfer += this->encoding_stats[_i218].read(iprot); + xfer += this->encoding_stats[_i223].read(iprot); } xfer += iprot->readListEnd(); } @@ -3550,10 +3581,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter219; - for (_iter219 = this->encodings.begin(); _iter219 != this->encodings.end(); ++_iter219) + std::vector ::const_iterator _iter224; + for (_iter224 = this->encodings.begin(); _iter224 != this->encodings.end(); ++_iter224) { - xfer += oprot->writeI32(static_cast((*_iter219))); + xfer += oprot->writeI32(static_cast((*_iter224))); } xfer += oprot->writeListEnd(); } @@ -3562,10 +3593,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter220; - for (_iter220 = this->path_in_schema.begin(); _iter220 != this->path_in_schema.end(); ++_iter220) + std::vector ::const_iterator _iter225; + for (_iter225 = this->path_in_schema.begin(); _iter225 != this->path_in_schema.end(); ++_iter225) { - xfer += oprot->writeString((*_iter220)); + xfer += oprot->writeString((*_iter225)); } xfer += oprot->writeListEnd(); } @@ -3591,10 +3622,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter221; - for (_iter221 = this->key_value_metadata.begin(); _iter221 != this->key_value_metadata.end(); ++_iter221) + std::vector ::const_iterator _iter226; + for (_iter226 = this->key_value_metadata.begin(); _iter226 != this->key_value_metadata.end(); ++_iter226) { - xfer += (*_iter221).write(oprot); + xfer += (*_iter226).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3623,10 +3654,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter222; - for (_iter222 = this->encoding_stats.begin(); _iter222 != this->encoding_stats.end(); ++_iter222) + std::vector ::const_iterator _iter227; + for (_iter227 = this->encoding_stats.begin(); _iter227 != this->encoding_stats.end(); ++_iter227) { - xfer += (*_iter222).write(oprot); + xfer += (*_iter227).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3719,14 +3750,14 @@ uint32_t EncryptionWithColumnKey::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size231; - ::apache::thrift::protocol::TType _etype234; - xfer += iprot->readListBegin(_etype234, _size231); - this->path_in_schema.resize(_size231); - uint32_t _i235; - for (_i235 = 0; _i235 < _size231; ++_i235) + uint32_t _size236; + ::apache::thrift::protocol::TType _etype239; + xfer += iprot->readListBegin(_etype239, _size236); + this->path_in_schema.resize(_size236); + uint32_t _i240; + for (_i240 = 0; _i240 < _size236; ++_i240) { - xfer += iprot->readString(this->path_in_schema[_i235]); + xfer += iprot->readString(this->path_in_schema[_i240]); } xfer += iprot->readListEnd(); } @@ -3766,10 +3797,10 @@ uint32_t EncryptionWithColumnKey::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter236; - for (_iter236 = this->path_in_schema.begin(); _iter236 != this->path_in_schema.end(); ++_iter236) + std::vector ::const_iterator _iter241; + for (_iter241 = this->path_in_schema.begin(); _iter241 != this->path_in_schema.end(); ++_iter241) { - xfer += oprot->writeString((*_iter236)); + xfer += oprot->writeString((*_iter241)); } xfer += oprot->writeListEnd(); } @@ -4049,14 +4080,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->columns.clear(); - uint32_t _size249; - ::apache::thrift::protocol::TType _etype252; - xfer += iprot->readListBegin(_etype252, _size249); - this->columns.resize(_size249); - uint32_t _i253; - for (_i253 = 0; _i253 < _size249; ++_i253) + uint32_t _size254; + ::apache::thrift::protocol::TType _etype257; + xfer += iprot->readListBegin(_etype257, _size254); + this->columns.resize(_size254); + uint32_t _i258; + for (_i258 = 0; _i258 < _size254; ++_i258) { - xfer += this->columns[_i253].read(iprot); + xfer += this->columns[_i258].read(iprot); } xfer += iprot->readListEnd(); } @@ -4085,14 +4116,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->sorting_columns.clear(); - uint32_t _size254; - ::apache::thrift::protocol::TType _etype257; - xfer += iprot->readListBegin(_etype257, _size254); - this->sorting_columns.resize(_size254); - uint32_t _i258; - for (_i258 = 0; _i258 < _size254; ++_i258) + uint32_t _size259; + ::apache::thrift::protocol::TType _etype262; + xfer += iprot->readListBegin(_etype262, _size259); + this->sorting_columns.resize(_size259); + uint32_t _i263; + for (_i263 = 0; _i263 < _size259; ++_i263) { - xfer += this->sorting_columns[_i258].read(iprot); + xfer += this->sorting_columns[_i263].read(iprot); } xfer += iprot->readListEnd(); } @@ -4152,10 +4183,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter259; - for (_iter259 = this->columns.begin(); _iter259 != this->columns.end(); ++_iter259) + std::vector ::const_iterator _iter264; + for (_iter264 = this->columns.begin(); _iter264 != this->columns.end(); ++_iter264) { - xfer += (*_iter259).write(oprot); + xfer += (*_iter264).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4173,10 +4204,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter260; - for (_iter260 = this->sorting_columns.begin(); _iter260 != this->sorting_columns.end(); ++_iter260) + std::vector ::const_iterator _iter265; + for (_iter265 = this->sorting_columns.begin(); _iter265 != this->sorting_columns.end(); ++_iter265) { - xfer += (*_iter260).write(oprot); + xfer += (*_iter265).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4417,14 +4448,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->page_locations.clear(); - uint32_t _size277; - ::apache::thrift::protocol::TType _etype280; - xfer += iprot->readListBegin(_etype280, _size277); - this->page_locations.resize(_size277); - uint32_t _i281; - for (_i281 = 0; _i281 < _size277; ++_i281) + uint32_t _size282; + ::apache::thrift::protocol::TType _etype285; + xfer += iprot->readListBegin(_etype285, _size282); + this->page_locations.resize(_size282); + uint32_t _i286; + for (_i286 = 0; _i286 < _size282; ++_i286) { - xfer += this->page_locations[_i281].read(iprot); + xfer += this->page_locations[_i286].read(iprot); } xfer += iprot->readListEnd(); } @@ -4437,14 +4468,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size282; - ::apache::thrift::protocol::TType _etype285; - xfer += iprot->readListBegin(_etype285, _size282); - this->unencoded_byte_array_data_bytes.resize(_size282); - uint32_t _i286; - for (_i286 = 0; _i286 < _size282; ++_i286) + uint32_t _size287; + ::apache::thrift::protocol::TType _etype290; + xfer += iprot->readListBegin(_etype290, _size287); + this->unencoded_byte_array_data_bytes.resize(_size287); + uint32_t _i291; + for (_i291 = 0; _i291 < _size287; ++_i291) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i286]); + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i291]); } xfer += iprot->readListEnd(); } @@ -4476,10 +4507,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter287; - for (_iter287 = this->page_locations.begin(); _iter287 != this->page_locations.end(); ++_iter287) + std::vector ::const_iterator _iter292; + for (_iter292 = this->page_locations.begin(); _iter292 != this->page_locations.end(); ++_iter292) { - xfer += (*_iter287).write(oprot); + xfer += (*_iter292).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4489,10 +4520,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter288; - for (_iter288 = this->unencoded_byte_array_data_bytes.begin(); _iter288 != this->unencoded_byte_array_data_bytes.end(); ++_iter288) + std::vector ::const_iterator _iter293; + for (_iter293 = this->unencoded_byte_array_data_bytes.begin(); _iter293 != this->unencoded_byte_array_data_bytes.end(); ++_iter293) { - xfer += oprot->writeI64((*_iter288)); + xfer += oprot->writeI64((*_iter293)); } xfer += oprot->writeListEnd(); } @@ -4533,14 +4564,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_pages.clear(); - uint32_t _size293; - ::apache::thrift::protocol::TType _etype296; - xfer += iprot->readListBegin(_etype296, _size293); - this->null_pages.resize(_size293); - uint32_t _i297; - for (_i297 = 0; _i297 < _size293; ++_i297) + uint32_t _size298; + ::apache::thrift::protocol::TType _etype301; + xfer += iprot->readListBegin(_etype301, _size298); + this->null_pages.resize(_size298); + uint32_t _i302; + for (_i302 = 0; _i302 < _size298; ++_i302) { - xfer += iprot->readBool(this->null_pages[_i297]); + xfer += iprot->readBool(this->null_pages[_i302]); } xfer += iprot->readListEnd(); } @@ -4553,14 +4584,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->min_values.clear(); - uint32_t _size298; - ::apache::thrift::protocol::TType _etype301; - xfer += iprot->readListBegin(_etype301, _size298); - this->min_values.resize(_size298); - uint32_t _i302; - for (_i302 = 0; _i302 < _size298; ++_i302) + uint32_t _size303; + ::apache::thrift::protocol::TType _etype306; + xfer += iprot->readListBegin(_etype306, _size303); + this->min_values.resize(_size303); + uint32_t _i307; + for (_i307 = 0; _i307 < _size303; ++_i307) { - xfer += iprot->readBinary(this->min_values[_i302]); + xfer += iprot->readBinary(this->min_values[_i307]); } xfer += iprot->readListEnd(); } @@ -4573,14 +4604,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->max_values.clear(); - uint32_t _size303; - ::apache::thrift::protocol::TType _etype306; - xfer += iprot->readListBegin(_etype306, _size303); - this->max_values.resize(_size303); - uint32_t _i307; - for (_i307 = 0; _i307 < _size303; ++_i307) + uint32_t _size308; + ::apache::thrift::protocol::TType _etype311; + xfer += iprot->readListBegin(_etype311, _size308); + this->max_values.resize(_size308); + uint32_t _i312; + for (_i312 = 0; _i312 < _size308; ++_i312) { - xfer += iprot->readBinary(this->max_values[_i307]); + xfer += iprot->readBinary(this->max_values[_i312]); } xfer += iprot->readListEnd(); } @@ -4591,9 +4622,9 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast308; - xfer += iprot->readI32(ecast308); - this->boundary_order = static_cast(ecast308); + int32_t ecast313; + xfer += iprot->readI32(ecast313); + this->boundary_order = static_cast(ecast313); isset_boundary_order = true; } else { xfer += iprot->skip(ftype); @@ -4603,14 +4634,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_counts.clear(); - uint32_t _size309; - ::apache::thrift::protocol::TType _etype312; - xfer += iprot->readListBegin(_etype312, _size309); - this->null_counts.resize(_size309); - uint32_t _i313; - for (_i313 = 0; _i313 < _size309; ++_i313) + uint32_t _size314; + ::apache::thrift::protocol::TType _etype317; + xfer += iprot->readListBegin(_etype317, _size314); + this->null_counts.resize(_size314); + uint32_t _i318; + for (_i318 = 0; _i318 < _size314; ++_i318) { - xfer += iprot->readI64(this->null_counts[_i313]); + xfer += iprot->readI64(this->null_counts[_i318]); } xfer += iprot->readListEnd(); } @@ -4623,14 +4654,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->repetition_level_histograms.clear(); - uint32_t _size314; - ::apache::thrift::protocol::TType _etype317; - xfer += iprot->readListBegin(_etype317, _size314); - this->repetition_level_histograms.resize(_size314); - uint32_t _i318; - for (_i318 = 0; _i318 < _size314; ++_i318) + uint32_t _size319; + ::apache::thrift::protocol::TType _etype322; + xfer += iprot->readListBegin(_etype322, _size319); + this->repetition_level_histograms.resize(_size319); + uint32_t _i323; + for (_i323 = 0; _i323 < _size319; ++_i323) { - xfer += iprot->readI64(this->repetition_level_histograms[_i318]); + xfer += iprot->readI64(this->repetition_level_histograms[_i323]); } xfer += iprot->readListEnd(); } @@ -4643,14 +4674,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->definition_level_histograms.clear(); - uint32_t _size319; - ::apache::thrift::protocol::TType _etype322; - xfer += iprot->readListBegin(_etype322, _size319); - this->definition_level_histograms.resize(_size319); - uint32_t _i323; - for (_i323 = 0; _i323 < _size319; ++_i323) + uint32_t _size324; + ::apache::thrift::protocol::TType _etype327; + xfer += iprot->readListBegin(_etype327, _size324); + this->definition_level_histograms.resize(_size324); + uint32_t _i328; + for (_i328 = 0; _i328 < _size324; ++_i328) { - xfer += iprot->readI64(this->definition_level_histograms[_i323]); + xfer += iprot->readI64(this->definition_level_histograms[_i328]); } xfer += iprot->readListEnd(); } @@ -4663,14 +4694,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->geometry_stats.clear(); - uint32_t _size324; - ::apache::thrift::protocol::TType _etype327; - xfer += iprot->readListBegin(_etype327, _size324); - this->geometry_stats.resize(_size324); - uint32_t _i328; - for (_i328 = 0; _i328 < _size324; ++_i328) + uint32_t _size329; + ::apache::thrift::protocol::TType _etype332; + xfer += iprot->readListBegin(_etype332, _size329); + this->geometry_stats.resize(_size329); + uint32_t _i333; + for (_i333 = 0; _i333 < _size329; ++_i333) { - xfer += this->geometry_stats[_i328].read(iprot); + xfer += this->geometry_stats[_i333].read(iprot); } xfer += iprot->readListEnd(); } @@ -4708,10 +4739,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter329; - for (_iter329 = this->null_pages.begin(); _iter329 != this->null_pages.end(); ++_iter329) + std::vector ::const_iterator _iter334; + for (_iter334 = this->null_pages.begin(); _iter334 != this->null_pages.end(); ++_iter334) { - xfer += oprot->writeBool((*_iter329)); + xfer += oprot->writeBool((*_iter334)); } xfer += oprot->writeListEnd(); } @@ -4720,10 +4751,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter330; - for (_iter330 = this->min_values.begin(); _iter330 != this->min_values.end(); ++_iter330) + std::vector ::const_iterator _iter335; + for (_iter335 = this->min_values.begin(); _iter335 != this->min_values.end(); ++_iter335) { - xfer += oprot->writeBinary((*_iter330)); + xfer += oprot->writeBinary((*_iter335)); } xfer += oprot->writeListEnd(); } @@ -4732,10 +4763,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter331; - for (_iter331 = this->max_values.begin(); _iter331 != this->max_values.end(); ++_iter331) + std::vector ::const_iterator _iter336; + for (_iter336 = this->max_values.begin(); _iter336 != this->max_values.end(); ++_iter336) { - xfer += oprot->writeBinary((*_iter331)); + xfer += oprot->writeBinary((*_iter336)); } xfer += oprot->writeListEnd(); } @@ -4749,10 +4780,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter332; - for (_iter332 = this->null_counts.begin(); _iter332 != this->null_counts.end(); ++_iter332) + std::vector ::const_iterator _iter337; + for (_iter337 = this->null_counts.begin(); _iter337 != this->null_counts.end(); ++_iter337) { - xfer += oprot->writeI64((*_iter332)); + xfer += oprot->writeI64((*_iter337)); } xfer += oprot->writeListEnd(); } @@ -4762,10 +4793,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter333; - for (_iter333 = this->repetition_level_histograms.begin(); _iter333 != this->repetition_level_histograms.end(); ++_iter333) + std::vector ::const_iterator _iter338; + for (_iter338 = this->repetition_level_histograms.begin(); _iter338 != this->repetition_level_histograms.end(); ++_iter338) { - xfer += oprot->writeI64((*_iter333)); + xfer += oprot->writeI64((*_iter338)); } xfer += oprot->writeListEnd(); } @@ -4775,10 +4806,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter334; - for (_iter334 = this->definition_level_histograms.begin(); _iter334 != this->definition_level_histograms.end(); ++_iter334) + std::vector ::const_iterator _iter339; + for (_iter339 = this->definition_level_histograms.begin(); _iter339 != this->definition_level_histograms.end(); ++_iter339) { - xfer += oprot->writeI64((*_iter334)); + xfer += oprot->writeI64((*_iter339)); } xfer += oprot->writeListEnd(); } @@ -4788,10 +4819,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("geometry_stats", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->geometry_stats.size())); - std::vector ::const_iterator _iter335; - for (_iter335 = this->geometry_stats.begin(); _iter335 != this->geometry_stats.end(); ++_iter335) + std::vector ::const_iterator _iter340; + for (_iter340 = this->geometry_stats.begin(); _iter340 != this->geometry_stats.end(); ++_iter340) { - xfer += (*_iter335).write(oprot); + xfer += (*_iter340).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5079,14 +5110,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size352; - ::apache::thrift::protocol::TType _etype355; - xfer += iprot->readListBegin(_etype355, _size352); - this->schema.resize(_size352); - uint32_t _i356; - for (_i356 = 0; _i356 < _size352; ++_i356) + uint32_t _size357; + ::apache::thrift::protocol::TType _etype360; + xfer += iprot->readListBegin(_etype360, _size357); + this->schema.resize(_size357); + uint32_t _i361; + for (_i361 = 0; _i361 < _size357; ++_i361) { - xfer += this->schema[_i356].read(iprot); + xfer += this->schema[_i361].read(iprot); } xfer += iprot->readListEnd(); } @@ -5107,14 +5138,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size357; - ::apache::thrift::protocol::TType _etype360; - xfer += iprot->readListBegin(_etype360, _size357); - this->row_groups.resize(_size357); - uint32_t _i361; - for (_i361 = 0; _i361 < _size357; ++_i361) + uint32_t _size362; + ::apache::thrift::protocol::TType _etype365; + xfer += iprot->readListBegin(_etype365, _size362); + this->row_groups.resize(_size362); + uint32_t _i366; + for (_i366 = 0; _i366 < _size362; ++_i366) { - xfer += this->row_groups[_i361].read(iprot); + xfer += this->row_groups[_i366].read(iprot); } xfer += iprot->readListEnd(); } @@ -5127,14 +5158,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size362; - ::apache::thrift::protocol::TType _etype365; - xfer += iprot->readListBegin(_etype365, _size362); - this->key_value_metadata.resize(_size362); - uint32_t _i366; - for (_i366 = 0; _i366 < _size362; ++_i366) + uint32_t _size367; + ::apache::thrift::protocol::TType _etype370; + xfer += iprot->readListBegin(_etype370, _size367); + this->key_value_metadata.resize(_size367); + uint32_t _i371; + for (_i371 = 0; _i371 < _size367; ++_i371) { - xfer += this->key_value_metadata[_i366].read(iprot); + xfer += this->key_value_metadata[_i371].read(iprot); } xfer += iprot->readListEnd(); } @@ -5155,14 +5186,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->column_orders.clear(); - uint32_t _size367; - ::apache::thrift::protocol::TType _etype370; - xfer += iprot->readListBegin(_etype370, _size367); - this->column_orders.resize(_size367); - uint32_t _i371; - for (_i371 = 0; _i371 < _size367; ++_i371) + uint32_t _size372; + ::apache::thrift::protocol::TType _etype375; + xfer += iprot->readListBegin(_etype375, _size372); + this->column_orders.resize(_size372); + uint32_t _i376; + for (_i376 = 0; _i376 < _size372; ++_i376) { - xfer += this->column_orders[_i371].read(iprot); + xfer += this->column_orders[_i376].read(iprot); } xfer += iprot->readListEnd(); } @@ -5220,10 +5251,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter372; - for (_iter372 = this->schema.begin(); _iter372 != this->schema.end(); ++_iter372) + std::vector ::const_iterator _iter377; + for (_iter377 = this->schema.begin(); _iter377 != this->schema.end(); ++_iter377) { - xfer += (*_iter372).write(oprot); + xfer += (*_iter377).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5236,10 +5267,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter373; - for (_iter373 = this->row_groups.begin(); _iter373 != this->row_groups.end(); ++_iter373) + std::vector ::const_iterator _iter378; + for (_iter378 = this->row_groups.begin(); _iter378 != this->row_groups.end(); ++_iter378) { - xfer += (*_iter373).write(oprot); + xfer += (*_iter378).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5249,10 +5280,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter374; - for (_iter374 = this->key_value_metadata.begin(); _iter374 != this->key_value_metadata.end(); ++_iter374) + std::vector ::const_iterator _iter379; + for (_iter379 = this->key_value_metadata.begin(); _iter379 != this->key_value_metadata.end(); ++_iter379) { - xfer += (*_iter374).write(oprot); + xfer += (*_iter379).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5267,10 +5298,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter375; - for (_iter375 = this->column_orders.begin(); _iter375 != this->column_orders.end(); ++_iter375) + std::vector ::const_iterator _iter380; + for (_iter380 = this->column_orders.begin(); _iter380 != this->column_orders.end(); ++_iter380) { - xfer += (*_iter375).write(oprot); + xfer += (*_iter380).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5363,6 +5394,7 @@ uint32_t FileCryptoMetaData::write(Protocol_* oprot) const { xfer += oprot->writeStructEnd(); return xfer; } + } } // namespace diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index 1653d7365ea49..61a40882a6155 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -249,16 +249,22 @@ enum Edges { /** * A custom WKB-encoded polygon or multi-polygon to represent a covering of - * geometries. For example, it may be a bounding box, or an evelope of geometries - * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if + * geometries. For example, it may be a bounding box or an envelope of geometries + * when a bounding box cannot be built (e.g., a geometry has spherical edges, or if * an edge of geographic coordinates crosses the antimeridian). In addition, it can * also be used to provide vendor-agnostic coverings like S2 or H3 grids. */ struct Covering { - /** Bytes of a WKB-encoded geometry */ - 1: required binary geometry; - /** Edges of the geometry, which is independent of edges from the logical type */ - 2: required Edges edges; + /** + * A type of covering. Currently accepted values: "WKB". + */ + 1: required string kind; + /** A payload specific to kind: + * - WKB: well-known binary of a POLYGON that completely covers the contents. + * This will be interpreted according to the same CRS and edges defined by + * the logical type. + */ + 2: required binary value; } /** @@ -281,8 +287,8 @@ struct GeometryStatistics { /** A bounding box of geometries */ 1: optional BoundingBox bbox; - /** A covering polygon of geometries */ - 2: optional Covering covering; + /** A list of coverings of geometries */ + 2: optional list coverings; /** * The geometry types of all geometries, or an empty array if they are not @@ -488,14 +494,19 @@ struct GeometryType { 2: required Edges edges; /** * Coordinate Reference System, i.e. mapping of how coordinates refer to - * precise locations on earth, e.g. OGC:CRS84 + * precise locations on earth. */ 3: optional string crs; + /** + * Encoding used in the above crs field. + * Currently the only allowed value is "PROJJSON". + */ + 4: optional string crs_encoding; /** * Additional informative metadata. * It can be used by GeoParquet to offload some of the column metadata. */ - 4: optional binary metadata; + 5: optional binary metadata; } /** From 6a9ac3a0e0fc68a8fdf3fd78fe60c9e2e1189bb2 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 12 Aug 2024 17:17:01 -0300 Subject: [PATCH 10/61] start geom utiles --- cpp/src/parquet/geometry_util.h | 81 +++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 cpp/src/parquet/geometry_util.h diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h new file mode 100644 index 0000000000000..99eb40bf3f14c --- /dev/null +++ b/cpp/src/parquet/geometry_util.h @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/util/logging.h" +#include "arrow/util/ubsan.h" + +namespace parquet { + +namespace geometry { + +constexpr double kInf = std::numeric_limits::infinity(); + +struct BoundingBox { + BoundingBox(): min{kInf, kInf, kInf, kInf}, max{-kInf, -kInf, -kInf, -kInf} {} + double min[4]; + double max[4]; +}; + +class WKBCoordSeq { +public: + size_t Init(const uint8_t* data, size_t data_size) { + if (data_size < sizeof(uint32_t)) { + // error + } + + // Read uint32_t size_coords + memcpy(&num_coords_, data, sizeof(uint32_t)); + + size_t data_size_required = sizeof(uint32_t) + (num_coords_ * num_dims_ * sizeof(double)); + + if (data_size_required > data_size) { + // error + } + + data_ = data; + coord_data_size_ = data_size_required - sizeof(uint32_t); + return data_size_required; + } + + void UpdateBox(BoundingBox* box) { + size_t coord_size_bytes = num_dims_ * sizeof(double); + double coord[4]; + for (size_t offset = 0; offset < coord_data_size_; offset += coord_size_bytes) { + memcpy(coord, data_ + offset, coord_size_bytes); + for (uint32_t i = 0; i < num_dims_; i++) { + box->max[i] = std::max(box->max[i], coord[i]); + } + } + } + + private: + const uint8_t* data_; + size_t coord_data_size_; + uint32_t num_coords_; + uint32_t num_dims_; +}; + + + +} // namespace geometry + +} // namespace parquet From a22908f9a014a0f9ff9a100a65e07d9d6eafe810 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 13 Aug 2024 21:53:39 -0300 Subject: [PATCH 11/61] test roundtrip thrift cases --- cpp/src/parquet/schema_test.cc | 4 ++++ cpp/src/parquet/types.cc | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index 381c2e730facd..fb498c543cdbb 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -2271,6 +2271,10 @@ TEST(TestLogicalTypeSerialization, Roundtrips) { {LogicalType::BSON(), Type::BYTE_ARRAY, -1}, {LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16}, {LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2}, + {LogicalType::Geometry(), Type::BYTE_ARRAY, -1}, + {LogicalType::Geometry("non-empty crs", LogicalType::GeometryEdges::SPHERICAL, + LogicalType::GeometryEncoding::WKB, "non-empty metadata"), + Type::BYTE_ARRAY, -1}, {LogicalType::None(), Type::BOOLEAN, -1}}; for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) { diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index e9e339adaebfb..5f5d65e54537a 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -1736,7 +1736,15 @@ format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { } bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { - throw std::runtime_error("not implemented"); + if (other.is_geometry()) { + const auto& other_geometry = checked_cast(other); + return crs() == other_geometry.crs() && + edges() == other_geometry.edges() && + encoding() == other_geometry.encoding() && + metadata() == other_geometry.metadata(); + } else { + return false; + } } const std::string& GeometryLogicalType::crs() const { From dcc083a2aad2f96655deecf98fa0ebe5e755d793 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 01:08:41 -0300 Subject: [PATCH 12/61] more geometry utils --- cpp/src/parquet/geometry_util.h | 423 +++++++++++++++++++++++++++++--- 1 file changed, 393 insertions(+), 30 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 99eb40bf3f14c..f620c9fcda290 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -20,62 +20,425 @@ #include #include +#include "arrow/util/endian.h" #include "arrow/util/logging.h" #include "arrow/util/ubsan.h" +#include "parquet/exception.h" -namespace parquet { - -namespace geometry { +namespace parquet::geometry { constexpr double kInf = std::numeric_limits::infinity(); +struct Dimensions { + enum dimensions { XY = 1, XYZ = 2, XYM = 3, XYZM = 4 }; + + static dimensions FromWKB(uint32_t wkb_geometry_type) { + switch (wkb_geometry_type / 1000) { + case 0: + return XY; + case 1: + return XYZ; + case 2: + return XYM; + case 3: + return XYZM; + default: + throw ParquetException("Invalid wkb_geometry_type: ", wkb_geometry_type); + } + } + + template + constexpr static uint32_t size(); + + template <> + constexpr uint32_t size() { + return 2; + } + + template <> + constexpr uint32_t size() { + return 3; + } + + template <> + constexpr uint32_t size() { + return 3; + } + + template <> + constexpr uint32_t size() { + return 4; + } +}; + +struct GeometryType { + enum geometry_type { + POINT = 1, + LINESTRING = 2, + POLYGON = 3, + MULTIPOINT = 4, + MULTILINESTRING = 5, + MULTIPOLYGON = 6, + GEOMETRYCOLLECTION = 7 + }; + + static geometry_type FromWKB(uint32_t wkb_geometry_type) { + switch (wkb_geometry_type % 1000) { + case 1: + return POINT; + case 2: + return LINESTRING; + case 3: + return POLYGON; + case 4: + return MULTIPOINT; + case 5: + return MULTILINESTRING; + case 6: + return MULTIPOLYGON; + case 7: + return GEOMETRYCOLLECTION; + default: + throw ParquetException("Invalid wkb_geometry_type: ", wkb_geometry_type); + } + } +}; + +struct WKBGeometryHeader { + GeometryType::geometry_type geometry_type; + Dimensions::dimensions dimensions; + bool swap; +}; + +class WKBBuffer { + public: + WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} + + WKBGeometryHeader ReadGeometryHeader() { + WKBGeometryHeader out; + + uint8_t endian = ReadUInt8(); +#if defined(ARROW_LITTLE_ENDIAN) + out.swap = endian != 0x01; +#else + out.swap = endian != 0x00; +#endif + + uint32_t wkb_geometry_type = ReadUInt32(out.swap); + out.geometry_type = GeometryType::FromWKB(wkb_geometry_type); + out.dimensions = Dimensions::FromWKB(wkb_geometry_type); + + return out; + } + + uint8_t ReadUInt8() { + if (size_ < 1) { + throw ParquetException("Can't read 1 byte from empty WKBBuffer"); + } + + size_ -= 1; + return *data_++; + } + + uint32_t ReadUInt32(bool swap) { + if (ARROW_PREDICT_FALSE(swap)) { + return ReadUInt32(); + } else { + return ReadUInt32(); + } + } + + template + uint32_t ReadUInt32() { + if (size_ < sizeof(uint32_t)) { + throw ParquetException("Can't read 4 bytes from WKBBuffer with ", size_, + "remaining"); + } + + uint32_t value; + memcpy(&value, data_, sizeof(uint32_t)); + data_ += sizeof(uint32_t); + size_ -= sizeof(uint32_t); + + if constexpr (swap) { + value = arrow::bit_util::ByteSwap(value); + } + + return value; + } + + template + void ReadDoubles(uint32_t n, double* out) { + if (n == 0) { + return; + } + + size_t total_bytes = n * sizeof(double); + if (size_ < total_bytes) { + throw ParquetException("Can't read ", total_bytes, " bytes from WKBBuffer with ", + size_, "remaining"); + } + + memcpy(out, data_, total_bytes); + data_ += total_bytes; + size_ -= total_bytes; + + if constexpr (swap) { + for (uint32_t i = 0; i < n; i++) { + out[i] = arrow::bit_util::ByteSwap(out[i]); + } + } + } + + size_t size() { return size_; } + + private: + const uint8_t* data_; + size_t size_; +}; + struct BoundingBox { - BoundingBox(): min{kInf, kInf, kInf, kInf}, max{-kInf, -kInf, -kInf, -kInf} {} + explicit BoundingBox(Dimensions::dimensions dimensions) + : dimensions(dimensions), + min{kInf, kInf, kInf, kInf}, + max{-kInf, -kInf, -kInf, -kInf} {} + + Dimensions::dimensions dimensions; double min[4]; double max[4]; }; -class WKBCoordSeq { -public: - size_t Init(const uint8_t* data, size_t data_size) { - if (data_size < sizeof(uint32_t)) { - // error +template +class WKBSequenceBounder { + public: + explicit WKBSequenceBounder(double* chunk) : box_(dims), chunk_(chunk) {} + + void BoundPoint(WKBBuffer* src) { + constexpr uint32_t coord_size = Dimensions::size(); + src->ReadDoubles(coord_size, chunk_); + for (uint32_t dim = 0; dim < coord_size; dim++) { + if (ARROW_PREDICT_TRUE(!std::isnan(chunk_[dim]))) { + box_.min[dim] = std::min(box_.min[dim], chunk_[dim]); + box_.max[dim] = std::max(box_.max[dim], chunk_[dim]); + } + } + } + + void BoundSequence(WKBBuffer* src) { + constexpr uint32_t coord_size = Dimensions::size(); + constexpr uint32_t coords_per_chunk = chunk_size / sizeof(double) / coord_size; + + uint32_t n_coords = src->ReadUInt32(); + uint32_t n_chunks = n_coords / coords_per_chunk; + for (uint32_t i = 0; i < n_chunks; i++) { + src->ReadDoubles(coords_per_chunk, chunk_); + BoundChunk(coords_per_chunk); } - // Read uint32_t size_coords - memcpy(&num_coords_, data, sizeof(uint32_t)); + uint32_t remaining_coords = n_coords - (n_chunks * coords_per_chunk); + src->ReadDoubles(remaining_coords, chunk_); + BoundChunk(remaining_coords); + } + + void BoundRings(WKBBuffer* src) { + uint32_t n_rings = src->ReadUInt32(); + for (uint32_t i = 0; i < n_rings; i++) { + BoundSequence(src); + } + } + + void Finish(BoundingBox* out) { ParquetException::NYI(); } + + private: + BoundingBox box_; + double* chunk_; + + void BoundChunk(uint32_t n_coords) { + constexpr uint32_t coord_size = Dimensions::size(); + for (uint32_t dim = 0; dim < coord_size; dim++) { + for (uint32_t i = 0; i < n_coords; i++) { + box_.min[dim] = std::min(box_.min[dim], chunk_[i * coord_size + dim]); + box_.max[dim] = std::max(box_.max[dim], chunk_[i * coord_size + dim]); + } + } + } +}; - size_t data_size_required = sizeof(uint32_t) + (num_coords_ * num_dims_ * sizeof(double)); +class WKBGenericSequenceBounder { + public: + WKBGenericSequenceBounder() + : xy_(chunk_), + xyz_(chunk_), + xym_(chunk_), + xyzm_(chunk_), + xy_swap_(chunk_), + xyz_swap_(chunk_), + xym_swap_(chunk_), + xyzm_swap_(chunk_) {} - if (data_size_required > data_size) { - // error + void BoundPoint(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { + if (ARROW_PREDICT_FALSE(swap)) { + switch (dimensions) { + case Dimensions::XY: + xy_.BoundPoint(src); + break; + case Dimensions::XYZ: + xyz_.BoundPoint(src); + break; + case Dimensions::XYM: + xym_.BoundPoint(src); + break; + case Dimensions::XYZM: + xyzm_.BoundPoint(src); + break; + } + } else { + switch (dimensions) { + case Dimensions::XY: + xy_swap_.BoundPoint(src); + break; + case Dimensions::XYZ: + xyz_swap_.BoundPoint(src); + break; + case Dimensions::XYM: + xym_swap_.BoundPoint(src); + break; + case Dimensions::XYZM: + xyzm_swap_.BoundPoint(src); + break; + } } + } - data_ = data; - coord_data_size_ = data_size_required - sizeof(uint32_t); - return data_size_required; + void BoundSequence(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { + if (ARROW_PREDICT_FALSE(swap)) { + switch (dimensions) { + case Dimensions::XY: + xy_.BoundSequence(src); + break; + case Dimensions::XYZ: + xyz_.BoundSequence(src); + break; + case Dimensions::XYM: + xym_.BoundSequence(src); + break; + case Dimensions::XYZM: + xyzm_.BoundSequence(src); + break; + } + } else { + switch (dimensions) { + case Dimensions::XY: + xy_swap_.BoundSequence(src); + break; + case Dimensions::XYZ: + xyz_swap_.BoundSequence(src); + break; + case Dimensions::XYM: + xym_swap_.BoundSequence(src); + break; + case Dimensions::XYZM: + xyzm_swap_.BoundSequence(src); + break; + } + } } - void UpdateBox(BoundingBox* box) { - size_t coord_size_bytes = num_dims_ * sizeof(double); - double coord[4]; - for (size_t offset = 0; offset < coord_data_size_; offset += coord_size_bytes) { - memcpy(coord, data_ + offset, coord_size_bytes); - for (uint32_t i = 0; i < num_dims_; i++) { - box->max[i] = std::max(box->max[i], coord[i]); + void BoundRings(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { + if (ARROW_PREDICT_FALSE(swap)) { + switch (dimensions) { + case Dimensions::XY: + xy_.BoundRings(src); + break; + case Dimensions::XYZ: + xyz_.BoundRings(src); + break; + case Dimensions::XYM: + xym_.BoundRings(src); + break; + case Dimensions::XYZM: + xyzm_.BoundRings(src); + break; + } + } else { + switch (dimensions) { + case Dimensions::XY: + xy_swap_.BoundRings(src); + break; + case Dimensions::XYZ: + xyz_swap_.BoundRings(src); + break; + case Dimensions::XYM: + xym_swap_.BoundRings(src); + break; + case Dimensions::XYZM: + xyzm_swap_.BoundRings(src); + break; } } } + void Finish(BoundingBox* out) { + xy_.Finish(out); + xyz_.Finish(out); + xym_.Finish(out); + xyzm_.Finish(out); + xy_swap_.Finish(out); + xyz_swap_.Finish(out); + xym_swap_.Finish(out); + xyzm_swap_.Finish(out); + } + private: - const uint8_t* data_; - size_t coord_data_size_; - uint32_t num_coords_; - uint32_t num_dims_; + double chunk_[64]; + WKBSequenceBounder xy_; + WKBSequenceBounder xyz_; + WKBSequenceBounder xym_; + WKBSequenceBounder xyzm_; + WKBSequenceBounder xy_swap_; + WKBSequenceBounder xyz_swap_; + WKBSequenceBounder xym_swap_; + WKBSequenceBounder xyzm_swap_; }; +class WKBGeometryBounder { + public: + WKBGeometryBounder() : box_(Dimensions::XYZM) {} + void BoundGeometry(WKBBuffer* src) { + WKBGeometryHeader header = src->ReadGeometryHeader(); + switch (header.geometry_type) { + case GeometryType::POINT: + bounder_.BoundPoint(src, header.dimensions, header.swap); + break; + case GeometryType::LINESTRING: + bounder_.BoundSequence(src, header.dimensions, header.swap); + break; + case GeometryType::POLYGON: + bounder_.BoundRings(src, header.dimensions, header.swap); + break; -} // namespace geometry + // These are all encoded the same in WKB, even though this encoding would + // allow for parts to be of a different geometry type. For the purposes of + // bounding, this does not cause us problems. + case GeometryType::MULTIPOINT: + case GeometryType::MULTILINESTRING: + case GeometryType::MULTIPOLYGON: + case GeometryType::GEOMETRYCOLLECTION: { + uint32_t n_parts = src->ReadUInt32(header.swap); + for (uint32_t i = 0; i < n_parts; i++) { + BoundGeometry(src); + } + break; + } + } + } + + void Finish(BoundingBox* out) { bounder_.Finish(out); } + + private: + BoundingBox box_; + WKBGenericSequenceBounder bounder_; +}; -} // namespace parquet +} // namespace parquet::geometry From 9acc840b61ba91368e819523610b301d4412e9f6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 01:15:20 -0300 Subject: [PATCH 13/61] more --- cpp/src/parquet/geometry_util.h | 46 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index f620c9fcda290..794ea1cb81451 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -114,23 +114,6 @@ class WKBBuffer { public: WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} - WKBGeometryHeader ReadGeometryHeader() { - WKBGeometryHeader out; - - uint8_t endian = ReadUInt8(); -#if defined(ARROW_LITTLE_ENDIAN) - out.swap = endian != 0x01; -#else - out.swap = endian != 0x00; -#endif - - uint32_t wkb_geometry_type = ReadUInt32(out.swap); - out.geometry_type = GeometryType::FromWKB(wkb_geometry_type); - out.dimensions = Dimensions::FromWKB(wkb_geometry_type); - - return out; - } - uint8_t ReadUInt8() { if (size_ < 1) { throw ParquetException("Can't read 1 byte from empty WKBBuffer"); @@ -247,7 +230,11 @@ class WKBSequenceBounder { } } - void Finish(BoundingBox* out) { ParquetException::NYI(); } + void Finish(BoundingBox* out) { + // Probably a more elgant way to do this, but we need to map the dimensions + // we have to the dimensions of the bounding box. + ParquetException::NYI(); + } private: BoundingBox box_; @@ -264,6 +251,7 @@ class WKBSequenceBounder { } }; +// We could avoid this madness by not templating the WKBSequenceBounder class WKBGenericSequenceBounder { public: WKBGenericSequenceBounder() @@ -406,16 +394,26 @@ class WKBGeometryBounder { WKBGeometryBounder() : box_(Dimensions::XYZM) {} void BoundGeometry(WKBBuffer* src) { - WKBGeometryHeader header = src->ReadGeometryHeader(); - switch (header.geometry_type) { + uint8_t endian = src->ReadUInt8(); +#if defined(ARROW_LITTLE_ENDIAN) + bool swap = endian != 0x01; +#else + bool swap = endian != 0x00; +#endif + + uint32_t wkb_geometry_type = src->ReadUInt32(swap); + auto geometry_type = GeometryType::FromWKB(wkb_geometry_type); + auto dimensions = Dimensions::FromWKB(wkb_geometry_type); + + switch (geometry_type) { case GeometryType::POINT: - bounder_.BoundPoint(src, header.dimensions, header.swap); + bounder_.BoundPoint(src, dimensions, swap); break; case GeometryType::LINESTRING: - bounder_.BoundSequence(src, header.dimensions, header.swap); + bounder_.BoundSequence(src, dimensions, swap); break; case GeometryType::POLYGON: - bounder_.BoundRings(src, header.dimensions, header.swap); + bounder_.BoundRings(src, dimensions, swap); break; // These are all encoded the same in WKB, even though this encoding would @@ -425,7 +423,7 @@ class WKBGeometryBounder { case GeometryType::MULTILINESTRING: case GeometryType::MULTIPOLYGON: case GeometryType::GEOMETRYCOLLECTION: { - uint32_t n_parts = src->ReadUInt32(header.swap); + uint32_t n_parts = src->ReadUInt32(swap); for (uint32_t i = 0; i < n_parts; i++) { BoundGeometry(src); } From e18412c80f0d10a6082511cf8c639ad6af63b765 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 11:08:42 -0300 Subject: [PATCH 14/61] bounder --- cpp/src/parquet/geometry_util.h | 148 +++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 48 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 794ea1cb81451..2e97f964cdc42 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -19,6 +19,7 @@ #include #include +#include #include "arrow/util/endian.h" #include "arrow/util/logging.h" @@ -69,6 +70,23 @@ struct Dimensions { constexpr uint32_t size() { return 4; } + + // Where to look in a coordinate with this dimension + // for the X, Y, Z, and M dimensions, respectively. + static std::array ToXYZM(dimensions dims) { + switch (dims) { + case XY: + return {0, 1, -1, -1}; + case XYZ: + return {0, 1, 2, -1}; + case XYM: + return {0, 1, -1, 2}; + case XYZM: + return {0, 1, 2, 3}; + default: + return {-1, -1, -1, -1}; + } + } }; struct GeometryType { @@ -181,11 +199,45 @@ class WKBBuffer { }; struct BoundingBox { - explicit BoundingBox(Dimensions::dimensions dimensions) + explicit BoundingBox(Dimensions::dimensions dimensions = Dimensions::XYZM) : dimensions(dimensions), min{kInf, kInf, kInf, kInf}, max{-kInf, -kInf, -kInf, -kInf} {} + BoundingBox(const BoundingBox& other) = default; + + void Merge(const BoundingBox& other) { + if (ARROW_PREDICT_TRUE(dimensions == other.dimensions)) { + for (int i = 0; i < 4; i++) { + min[i] = std::min(min[i], other.min[i]); + max[i] = std::max(max[i], other.max[i]); + } + + return; + } else if (dimensions == Dimensions::XYZM) { + Merge(other.Canonicalize()); + } else { + ParquetException::NYI(); + } + } + + BoundingBox Canonicalize() const { + BoundingBox xyzm(Dimensions::XYZM); + auto to_xyzm = Dimensions::ToXYZM(dimensions); + for (int i = 0; i < 4; i++) { + int dim_to_xyzm = to_xyzm[i]; + if (dim_to_xyzm == -1) { + xyzm.min[i] = kInf; + xyzm.max[i] = -kInf; + } else { + xyzm.min[i] = min[dim_to_xyzm]; + xyzm.max[i] = max[dim_to_xyzm]; + } + } + + return xyzm; + } + Dimensions::dimensions dimensions; double min[4]; double max[4]; @@ -196,7 +248,7 @@ class WKBSequenceBounder { public: explicit WKBSequenceBounder(double* chunk) : box_(dims), chunk_(chunk) {} - void BoundPoint(WKBBuffer* src) { + void ReadPoint(WKBBuffer* src) { constexpr uint32_t coord_size = Dimensions::size(); src->ReadDoubles(coord_size, chunk_); for (uint32_t dim = 0; dim < coord_size; dim++) { @@ -207,7 +259,7 @@ class WKBSequenceBounder { } } - void BoundSequence(WKBBuffer* src) { + void ReadSequence(WKBBuffer* src) { constexpr uint32_t coord_size = Dimensions::size(); constexpr uint32_t coords_per_chunk = chunk_size / sizeof(double) / coord_size; @@ -215,32 +267,28 @@ class WKBSequenceBounder { uint32_t n_chunks = n_coords / coords_per_chunk; for (uint32_t i = 0; i < n_chunks; i++) { src->ReadDoubles(coords_per_chunk, chunk_); - BoundChunk(coords_per_chunk); + ReadChunk(coords_per_chunk); } uint32_t remaining_coords = n_coords - (n_chunks * coords_per_chunk); src->ReadDoubles(remaining_coords, chunk_); - BoundChunk(remaining_coords); + ReadChunk(remaining_coords); } - void BoundRings(WKBBuffer* src) { + void ReadRings(WKBBuffer* src) { uint32_t n_rings = src->ReadUInt32(); for (uint32_t i = 0; i < n_rings; i++) { - BoundSequence(src); + ReadSequence(src); } } - void Finish(BoundingBox* out) { - // Probably a more elgant way to do this, but we need to map the dimensions - // we have to the dimensions of the bounding box. - ParquetException::NYI(); - } + void Finish(BoundingBox* out) { out->Merge(box_); } private: BoundingBox box_; double* chunk_; - void BoundChunk(uint32_t n_coords) { + void ReadChunk(uint32_t n_coords) { constexpr uint32_t coord_size = Dimensions::size(); for (uint32_t dim = 0; dim < coord_size; dim++) { for (uint32_t i = 0; i < n_coords; i++) { @@ -251,7 +299,7 @@ class WKBSequenceBounder { } }; -// We could avoid this madness by not templating the WKBSequenceBounder +// We could (should?) avoid this madness by not templating the WKBSequenceBounder class WKBGenericSequenceBounder { public: WKBGenericSequenceBounder() @@ -264,103 +312,103 @@ class WKBGenericSequenceBounder { xym_swap_(chunk_), xyzm_swap_(chunk_) {} - void BoundPoint(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { + void ReadPoint(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_FALSE(swap)) { switch (dimensions) { case Dimensions::XY: - xy_.BoundPoint(src); + xy_.ReadPoint(src); break; case Dimensions::XYZ: - xyz_.BoundPoint(src); + xyz_.ReadPoint(src); break; case Dimensions::XYM: - xym_.BoundPoint(src); + xym_.ReadPoint(src); break; case Dimensions::XYZM: - xyzm_.BoundPoint(src); + xyzm_.ReadPoint(src); break; } } else { switch (dimensions) { case Dimensions::XY: - xy_swap_.BoundPoint(src); + xy_swap_.ReadPoint(src); break; case Dimensions::XYZ: - xyz_swap_.BoundPoint(src); + xyz_swap_.ReadPoint(src); break; case Dimensions::XYM: - xym_swap_.BoundPoint(src); + xym_swap_.ReadPoint(src); break; case Dimensions::XYZM: - xyzm_swap_.BoundPoint(src); + xyzm_swap_.ReadPoint(src); break; } } } - void BoundSequence(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { + void ReadSequence(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_FALSE(swap)) { switch (dimensions) { case Dimensions::XY: - xy_.BoundSequence(src); + xy_.ReadSequence(src); break; case Dimensions::XYZ: - xyz_.BoundSequence(src); + xyz_.ReadSequence(src); break; case Dimensions::XYM: - xym_.BoundSequence(src); + xym_.ReadSequence(src); break; case Dimensions::XYZM: - xyzm_.BoundSequence(src); + xyzm_.ReadSequence(src); break; } } else { switch (dimensions) { case Dimensions::XY: - xy_swap_.BoundSequence(src); + xy_swap_.ReadSequence(src); break; case Dimensions::XYZ: - xyz_swap_.BoundSequence(src); + xyz_swap_.ReadSequence(src); break; case Dimensions::XYM: - xym_swap_.BoundSequence(src); + xym_swap_.ReadSequence(src); break; case Dimensions::XYZM: - xyzm_swap_.BoundSequence(src); + xyzm_swap_.ReadSequence(src); break; } } } - void BoundRings(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { + void ReadRings(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_FALSE(swap)) { switch (dimensions) { case Dimensions::XY: - xy_.BoundRings(src); + xy_.ReadRings(src); break; case Dimensions::XYZ: - xyz_.BoundRings(src); + xyz_.ReadRings(src); break; case Dimensions::XYM: - xym_.BoundRings(src); + xym_.ReadRings(src); break; case Dimensions::XYZM: - xyzm_.BoundRings(src); + xyzm_.ReadRings(src); break; } } else { switch (dimensions) { case Dimensions::XY: - xy_swap_.BoundRings(src); + xy_swap_.ReadRings(src); break; case Dimensions::XYZ: - xyz_swap_.BoundRings(src); + xyz_swap_.ReadRings(src); break; case Dimensions::XYM: - xym_swap_.BoundRings(src); + xym_swap_.ReadRings(src); break; case Dimensions::XYZM: - xyzm_swap_.BoundRings(src); + xyzm_swap_.ReadRings(src); break; } } @@ -393,7 +441,7 @@ class WKBGeometryBounder { public: WKBGeometryBounder() : box_(Dimensions::XYZM) {} - void BoundGeometry(WKBBuffer* src) { + void ReadGeometry(WKBBuffer* src) { uint8_t endian = src->ReadUInt8(); #if defined(ARROW_LITTLE_ENDIAN) bool swap = endian != 0x01; @@ -405,27 +453,30 @@ class WKBGeometryBounder { auto geometry_type = GeometryType::FromWKB(wkb_geometry_type); auto dimensions = Dimensions::FromWKB(wkb_geometry_type); + // Keep track of geometry types encountered + wkb_types_.insert(wkb_geometry_type); + switch (geometry_type) { case GeometryType::POINT: - bounder_.BoundPoint(src, dimensions, swap); + bounder_.ReadPoint(src, dimensions, swap); break; case GeometryType::LINESTRING: - bounder_.BoundSequence(src, dimensions, swap); + bounder_.ReadSequence(src, dimensions, swap); break; case GeometryType::POLYGON: - bounder_.BoundRings(src, dimensions, swap); + bounder_.ReadRings(src, dimensions, swap); break; // These are all encoded the same in WKB, even though this encoding would - // allow for parts to be of a different geometry type. For the purposes of - // bounding, this does not cause us problems. + // allow for parts to be of a different geometry type or different dimensions. + // For the purposes of bounding, this does not cause us problems. case GeometryType::MULTIPOINT: case GeometryType::MULTILINESTRING: case GeometryType::MULTIPOLYGON: case GeometryType::GEOMETRYCOLLECTION: { uint32_t n_parts = src->ReadUInt32(swap); for (uint32_t i = 0; i < n_parts; i++) { - BoundGeometry(src); + ReadGeometry(src); } break; } @@ -437,6 +488,7 @@ class WKBGeometryBounder { private: BoundingBox box_; WKBGenericSequenceBounder bounder_; + std::unordered_set wkb_types_; }; } // namespace parquet::geometry From 76b3f59ec633464f620c9bf73aa22ee111a49b83 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 11:41:27 -0300 Subject: [PATCH 15/61] add basic test --- cpp/src/parquet/CMakeLists.txt | 2 ++ cpp/src/parquet/geometry_util.h | 30 +++++++++++----- cpp/src/parquet/geometry_util_test.cc | 52 +++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 cpp/src/parquet/geometry_util_test.cc diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 17574261d891d..8110e43f83352 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -378,6 +378,8 @@ add_parquet_test(internals-test public_api_test.cc types_test.cc) +add_parquet_test(geometry-test SOURCES geometry_util_test.cc) + set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 2e97f964cdc42..233ac2cbc42e0 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -122,12 +122,6 @@ struct GeometryType { } }; -struct WKBGeometryHeader { - GeometryType::geometry_type geometry_type; - Dimensions::dimensions dimensions; - bool swap; -}; - class WKBBuffer { public: WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} @@ -206,6 +200,13 @@ struct BoundingBox { BoundingBox(const BoundingBox& other) = default; + void Reset() { + for (int i = 0; i < 4; i++) { + min[i] = kInf; + max[i] = -kInf; + } + } + void Merge(const BoundingBox& other) { if (ARROW_PREDICT_TRUE(dimensions == other.dimensions)) { for (int i = 0; i < 4; i++) { @@ -215,13 +216,13 @@ struct BoundingBox { return; } else if (dimensions == Dimensions::XYZM) { - Merge(other.Canonicalize()); + Merge(other.ToXYZM()); } else { ParquetException::NYI(); } } - BoundingBox Canonicalize() const { + BoundingBox ToXYZM() const { BoundingBox xyzm(Dimensions::XYZM); auto to_xyzm = Dimensions::ToXYZM(dimensions); for (int i = 0; i < 4; i++) { @@ -282,6 +283,8 @@ class WKBSequenceBounder { } } + void Reset() { box_.Reset(); } + void Finish(BoundingBox* out) { out->Merge(box_); } private: @@ -425,6 +428,17 @@ class WKBGenericSequenceBounder { xyzm_swap_.Finish(out); } + void Reset() { + xy_.Reset(); + xyz_.Reset(); + xym_.Reset(); + xyzm_.Reset(); + xy_swap_.Reset(); + xyz_swap_.Reset(); + xym_swap_.Reset(); + xyzm_swap_.Reset(); + } + private: double chunk_[64]; WKBSequenceBounder xy_; diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc new file mode 100644 index 0000000000000..02aed4e6d9f14 --- /dev/null +++ b/cpp/src/parquet/geometry_util_test.cc @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "arrow/testing/gtest_compat.h" + +#include "parquet/geometry_util.h" + +namespace parquet::geometry { + +TEST(TestGeometryUtil, TestDimensions) { + EXPECT_EQ(Dimensions::size(), 2); + EXPECT_EQ(Dimensions::size(), 3); + EXPECT_EQ(Dimensions::size(), 3); + EXPECT_EQ(Dimensions::size(), 4); + + EXPECT_EQ(Dimensions::FromWKB(1), Dimensions::XY); + EXPECT_EQ(Dimensions::FromWKB(1001), Dimensions::XYZ); + EXPECT_EQ(Dimensions::FromWKB(2001), Dimensions::XYM); + EXPECT_EQ(Dimensions::FromWKB(3001), Dimensions::XYZM); + EXPECT_THROW(Dimensions::FromWKB(4001), ParquetException); +} + +TEST(TestGeometryUtil, TestGeometryType) { + EXPECT_EQ(GeometryType::FromWKB(1), GeometryType::POINT); + EXPECT_EQ(GeometryType::FromWKB(1001), GeometryType::POINT); + EXPECT_EQ(GeometryType::FromWKB(1002), GeometryType::LINESTRING); + EXPECT_EQ(GeometryType::FromWKB(1003), GeometryType::POLYGON); + EXPECT_EQ(GeometryType::FromWKB(1004), GeometryType::MULTIPOINT); + EXPECT_EQ(GeometryType::FromWKB(1005), GeometryType::MULTILINESTRING); + EXPECT_EQ(GeometryType::FromWKB(1006), GeometryType::MULTIPOLYGON); + EXPECT_EQ(GeometryType::FromWKB(1007), GeometryType::GEOMETRYCOLLECTION); + EXPECT_THROW(GeometryType::FromWKB(4001), ParquetException); +} + +} // namespace parquet::geometry From fc77ff227adc2caf76a07571715e50bf0c3538ac Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 12:08:48 -0300 Subject: [PATCH 16/61] a few more strings --- cpp/src/parquet/geometry_util.h | 60 +++++++++++++++++++++++++++ cpp/src/parquet/geometry_util_test.cc | 21 +++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 233ac2cbc42e0..1461ae6361daa 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -87,6 +88,21 @@ struct Dimensions { return {-1, -1, -1, -1}; } } + + static std::string ToString(dimensions dims) { + switch (dims) { + case XY: + return "XY"; + case XYZ: + return "XYZ"; + case XYM: + return "XYM"; + case XYZM: + return "XYZM"; + default: + return ""; + } + } }; struct GeometryType { @@ -120,6 +136,27 @@ struct GeometryType { throw ParquetException("Invalid wkb_geometry_type: ", wkb_geometry_type); } } + + static std::string ToString(geometry_type geometry_type) { + switch (geometry_type) { + case POINT: + return "POINT"; + case LINESTRING: + return "LINESTRING"; + case POLYGON: + return "POLYGON"; + case MULTIPOINT: + return "MULTIPOINT"; + case MULTILINESTRING: + return "MULTILINESTRING"; + case MULTIPOLYGON: + return "MULTIPOLYGON"; + case GEOMETRYCOLLECTION: + return "GEOMETRYCOLLECTION"; + default: + return ""; + } + } }; class WKBBuffer { @@ -193,6 +230,12 @@ class WKBBuffer { }; struct BoundingBox { + BoundingBox(Dimensions::dimensions dimensions, const std::array& mins, + const std::array& maxes) + : dimensions(dimensions) { + std::memcpy(min, mins.data(), sizeof(min)); + std::memcpy(max, maxes.data(), sizeof(max)); + } explicit BoundingBox(Dimensions::dimensions dimensions = Dimensions::XYZM) : dimensions(dimensions), min{kInf, kInf, kInf, kInf}, @@ -239,11 +282,28 @@ struct BoundingBox { return xyzm; } + std::string ToString() const { + std::stringstream ss; + ss << "BoundingBox " << Dimensions::ToString(dimensions) << " [" << min[0] << " => " + << max[0]; + for (int i = 1; i < 4; i++) { + ss << ", " << min[i] << " => " << max[i]; + } + + return ss.str(); + } + Dimensions::dimensions dimensions; double min[4]; double max[4]; }; +bool operator==(const BoundingBox& lhs, const BoundingBox& rhs) { + return lhs.dimensions == rhs.dimensions && + std::memcmp(lhs.min, rhs.min, sizeof(lhs.min)) == 0 && + std::memcmp(lhs.max, rhs.max, sizeof(lhs.max)) == 0; +} + template class WKBSequenceBounder { public: diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index 02aed4e6d9f14..fa23500a3801b 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -30,6 +30,11 @@ TEST(TestGeometryUtil, TestDimensions) { EXPECT_EQ(Dimensions::size(), 3); EXPECT_EQ(Dimensions::size(), 4); + EXPECT_EQ(Dimensions::ToString(Dimensions::XY), "XY"); + EXPECT_EQ(Dimensions::ToString(Dimensions::XYZ), "XYZ"); + EXPECT_EQ(Dimensions::ToString(Dimensions::XYM), "XYM"); + EXPECT_EQ(Dimensions::ToString(Dimensions::XYZM), "XYZM"); + EXPECT_EQ(Dimensions::FromWKB(1), Dimensions::XY); EXPECT_EQ(Dimensions::FromWKB(1001), Dimensions::XYZ); EXPECT_EQ(Dimensions::FromWKB(2001), Dimensions::XYM); @@ -38,6 +43,15 @@ TEST(TestGeometryUtil, TestDimensions) { } TEST(TestGeometryUtil, TestGeometryType) { + EXPECT_EQ(GeometryType::ToString(GeometryType::POINT), "POINT"); + EXPECT_EQ(GeometryType::ToString(GeometryType::LINESTRING), "LINESTRING"); + EXPECT_EQ(GeometryType::ToString(GeometryType::POLYGON), "POLYGON"); + EXPECT_EQ(GeometryType::ToString(GeometryType::MULTIPOINT), "MULTIPOINT"); + EXPECT_EQ(GeometryType::ToString(GeometryType::MULTILINESTRING), "MULTILINESTRING"); + EXPECT_EQ(GeometryType::ToString(GeometryType::MULTIPOLYGON), "MULTIPOLYGON"); + EXPECT_EQ(GeometryType::ToString(GeometryType::GEOMETRYCOLLECTION), + "GEOMETRYCOLLECTION"); + EXPECT_EQ(GeometryType::FromWKB(1), GeometryType::POINT); EXPECT_EQ(GeometryType::FromWKB(1001), GeometryType::POINT); EXPECT_EQ(GeometryType::FromWKB(1002), GeometryType::LINESTRING); @@ -46,7 +60,12 @@ TEST(TestGeometryUtil, TestGeometryType) { EXPECT_EQ(GeometryType::FromWKB(1005), GeometryType::MULTILINESTRING); EXPECT_EQ(GeometryType::FromWKB(1006), GeometryType::MULTIPOLYGON); EXPECT_EQ(GeometryType::FromWKB(1007), GeometryType::GEOMETRYCOLLECTION); - EXPECT_THROW(GeometryType::FromWKB(4001), ParquetException); + EXPECT_THROW(GeometryType::FromWKB(1100), ParquetException); +} + +TEST(TestGeometryUtil, TestBoundingBox) { + EXPECT_EQ(BoundingBox(), BoundingBox(Dimensions::XYZM, {kInf, kInf, kInf, kInf}, + {-kInf, -kInf, -kInf, -kInf})); } } // namespace parquet::geometry From 21f11a1b7270d73f0443e5acf855e8010effd797 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 12:19:05 -0300 Subject: [PATCH 17/61] test some bounding box things --- cpp/src/parquet/geometry_util.h | 2 ++ cpp/src/parquet/geometry_util_test.cc | 29 +++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 1461ae6361daa..17bce71226370 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -290,6 +290,8 @@ struct BoundingBox { ss << ", " << min[i] << " => " << max[i]; } + ss << "]"; + return ss.str(); } diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index fa23500a3801b..0ed6a58cc43d6 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -64,8 +64,33 @@ TEST(TestGeometryUtil, TestGeometryType) { } TEST(TestGeometryUtil, TestBoundingBox) { - EXPECT_EQ(BoundingBox(), BoundingBox(Dimensions::XYZM, {kInf, kInf, kInf, kInf}, - {-kInf, -kInf, -kInf, -kInf})); + BoundingBox box; + EXPECT_EQ(box, BoundingBox(Dimensions::XYZM, {kInf, kInf, kInf, kInf}, + {-kInf, -kInf, -kInf, -kInf})); + EXPECT_EQ(box.ToString(), + "BoundingBox XYZM [inf => -inf, inf => -inf, inf => -inf, inf => -inf]"); + + BoundingBox box_xyzm(Dimensions::XYZM, {-1, -2, -3, -4}, {1, 2, 3, 4}); + + BoundingBox box_xy(Dimensions::XY, {-10, -20, kInf, kInf}, {10, 20, -kInf, -kInf}); + BoundingBox box_xyz(Dimensions::XYZ, {kInf, kInf, -30, kInf}, + {-kInf, -kInf, 30, -kInf}); + BoundingBox box_xym(Dimensions::XYM, {kInf, kInf, -40, kInf}, + {-kInf, -kInf, 40, -kInf}); + + box_xyzm.Merge(box_xy); + EXPECT_EQ(box_xyzm, BoundingBox(Dimensions::XYZM, {-10, -20, -3, -4}, {10, 20, 3, 4})); + + box_xyzm.Merge(box_xyz); + EXPECT_EQ(box_xyzm, + BoundingBox(Dimensions::XYZM, {-10, -20, -30, -4}, {10, 20, 30, 4})); + + box_xyzm.Merge(box_xym); + EXPECT_EQ(box_xyzm, + BoundingBox(Dimensions::XYZM, {-10, -20, -30, -40}, {10, 20, 30, 40})); + + box_xyzm.Reset(); + EXPECT_EQ(box_xyzm, BoundingBox()); } } // namespace parquet::geometry From 6bbce5cd6d995cdf6207219cf8d1caeb58de4f37 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 13:40:39 -0300 Subject: [PATCH 18/61] more tests --- cpp/src/parquet/geometry_util.h | 22 +++++++-- cpp/src/parquet/geometry_util_test.cc | 71 +++++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 17bce71226370..2c0b235faf8e0 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -72,6 +72,21 @@ struct Dimensions { return 4; } + static uint32_t size(dimensions dims) { + switch (dims) { + case XY: + return size(); + case XYZ: + return size(); + case XYM: + return size(); + case XYZM: + return size(); + default: + return 0; + } + } + // Where to look in a coordinate with this dimension // for the X, Y, Z, and M dimensions, respectively. static std::array ToXYZM(dimensions dims) { @@ -242,6 +257,7 @@ struct BoundingBox { max{-kInf, -kInf, -kInf, -kInf} {} BoundingBox(const BoundingBox& other) = default; + BoundingBox& operator=(const BoundingBox&) = default; void Reset() { for (int i = 0; i < 4; i++) { @@ -378,7 +394,7 @@ class WKBGenericSequenceBounder { xyzm_swap_(chunk_) {} void ReadPoint(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { - if (ARROW_PREDICT_FALSE(swap)) { + if (ARROW_PREDICT_TRUE(!swap)) { switch (dimensions) { case Dimensions::XY: xy_.ReadPoint(src); @@ -412,7 +428,7 @@ class WKBGenericSequenceBounder { } void ReadSequence(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { - if (ARROW_PREDICT_FALSE(swap)) { + if (ARROW_PREDICT_TRUE(!swap)) { switch (dimensions) { case Dimensions::XY: xy_.ReadSequence(src); @@ -446,7 +462,7 @@ class WKBGenericSequenceBounder { } void ReadRings(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { - if (ARROW_PREDICT_FALSE(swap)) { + if (ARROW_PREDICT_TRUE(!swap)) { switch (dimensions) { case Dimensions::XY: xy_.ReadRings(src); diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index 0ed6a58cc43d6..b35a515532d96 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -25,10 +25,10 @@ namespace parquet::geometry { TEST(TestGeometryUtil, TestDimensions) { - EXPECT_EQ(Dimensions::size(), 2); - EXPECT_EQ(Dimensions::size(), 3); - EXPECT_EQ(Dimensions::size(), 3); - EXPECT_EQ(Dimensions::size(), 4); + EXPECT_EQ(Dimensions::size(Dimensions::XY), 2); + EXPECT_EQ(Dimensions::size(Dimensions::XYZ), 3); + EXPECT_EQ(Dimensions::size(Dimensions::XYM), 3); + EXPECT_EQ(Dimensions::size(Dimensions::XYZM), 4); EXPECT_EQ(Dimensions::ToString(Dimensions::XY), "XY"); EXPECT_EQ(Dimensions::ToString(Dimensions::XYZ), "XYZ"); @@ -93,4 +93,67 @@ TEST(TestGeometryUtil, TestBoundingBox) { EXPECT_EQ(box_xyzm, BoundingBox()); } +struct WKBTestCase { + WKBTestCase() = default; + WKBTestCase(GeometryType::geometry_type x, Dimensions::dimensions y, + const std::vector& z, const std::vector& box_values = {}) + : geometry_type(x), dimensions(y), wkb(z) { + std::array mins = {kInf, kInf, kInf, kInf}; + std::array maxes{-kInf, -kInf, -kInf, -kInf}; + for (uint32_t i = 0; i < Dimensions::size(y); i++) { + mins[i] = box_values[i * 2]; + maxes[i] = box_values[i * 2 + 1]; + } + box = BoundingBox(y, mins, maxes).ToXYZM(); + } + WKBTestCase(const WKBTestCase& other) = default; + + GeometryType::geometry_type geometry_type; + Dimensions::dimensions dimensions; + std::vector wkb; + BoundingBox box; +}; + +std::ostream& operator<<(std::ostream& os, const WKBTestCase& obj) { + os << GeometryType::ToString(obj.geometry_type) << " " + << Dimensions::ToString(obj.dimensions); + return os; +} + +std::ostream& operator<<(std::ostream& os, const BoundingBox& obj) { + os << obj.ToString(); + return os; +} + +class WKBTestFixture : public ::testing::TestWithParam { + protected: + WKBTestCase test_case; +}; + +TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { + auto item = GetParam(); + + BoundingBox box; + WKBGeometryBounder bounder; + bounder.Finish(&box); + EXPECT_EQ(box, BoundingBox()); + + WKBBuffer buf(item.wkb.data(), item.wkb.size()); + bounder.ReadGeometry(&buf); + EXPECT_EQ(buf.size(), 0); + + bounder.Finish(&box); + EXPECT_EQ(box, item.box); +} + +INSTANTIATE_TEST_SUITE_P( + TestGeometryUtil, WKBTestFixture, + ::testing::Values(WKBTestCase(GeometryType::POINT, Dimensions::XY, + {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x62, 0x64, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x64}, + {30, 10, 30, 10}) + // foofy + )); + } // namespace parquet::geometry From bb41b064bf58b060e0f8cea9f98c97767a90171e Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 13:57:34 -0300 Subject: [PATCH 19/61] fix test --- cpp/src/parquet/geometry_util_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index b35a515532d96..8bda14e7e6f42 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -101,8 +101,8 @@ struct WKBTestCase { std::array mins = {kInf, kInf, kInf, kInf}; std::array maxes{-kInf, -kInf, -kInf, -kInf}; for (uint32_t i = 0; i < Dimensions::size(y); i++) { - mins[i] = box_values[i * 2]; - maxes[i] = box_values[i * 2 + 1]; + mins[i] = box_values[i]; + maxes[i] = box_values[Dimensions::size(y) + i]; } box = BoundingBox(y, mins, maxes).ToXYZM(); } @@ -150,8 +150,8 @@ INSTANTIATE_TEST_SUITE_P( TestGeometryUtil, WKBTestFixture, ::testing::Values(WKBTestCase(GeometryType::POINT, Dimensions::XY, {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x62, 0x64, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x64}, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {30, 10, 30, 10}) // foofy )); From 88a42f5e685b86307e835c4e352cc31c99650c20 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 14:25:06 -0300 Subject: [PATCH 20/61] with passing tests --- cpp/src/parquet/geometry_util.h | 4 +- cpp/src/parquet/geometry_util_test.cc | 272 +++++++++++++++++++++++++- 2 files changed, 267 insertions(+), 9 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 2c0b235faf8e0..262159095027d 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -340,7 +340,7 @@ class WKBSequenceBounder { void ReadSequence(WKBBuffer* src) { constexpr uint32_t coord_size = Dimensions::size(); - constexpr uint32_t coords_per_chunk = chunk_size / sizeof(double) / coord_size; + constexpr uint32_t coords_per_chunk = chunk_size / coord_size; uint32_t n_coords = src->ReadUInt32(); uint32_t n_chunks = n_coords / coords_per_chunk; @@ -350,7 +350,7 @@ class WKBSequenceBounder { } uint32_t remaining_coords = n_coords - (n_chunks * coords_per_chunk); - src->ReadDoubles(remaining_coords, chunk_); + src->ReadDoubles(remaining_coords * coord_size, chunk_); ReadChunk(remaining_coords); } diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index 8bda14e7e6f42..ba03626997c14 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -148,12 +148,270 @@ TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { INSTANTIATE_TEST_SUITE_P( TestGeometryUtil, WKBTestFixture, - ::testing::Values(WKBTestCase(GeometryType::POINT, Dimensions::XY, - {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, - {30, 10, 30, 10}) - // foofy - )); + ::testing::Values( + WKBTestCase(GeometryType::POINT, Dimensions::XY, + {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {30, 10, 30, 10}), + WKBTestCase(GeometryType::POINT, Dimensions::XYZ, + {0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {30, 10, 40, 30, 10, 40}), + WKBTestCase(GeometryType::POINT, Dimensions::XYM, + {0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 300, 30, 10, 300}), + WKBTestCase(GeometryType::POINT, Dimensions::XYZM, + {0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 40, 300, 30, 10, 40, 300}), + WKBTestCase(GeometryType::LINESTRING, Dimensions::XY, + {0x01, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {10, 10, 40, 40}), + WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZ, + {0x01, 0xea, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x54, 0x40}, + {10, 10, 40, 40, 40, 80}), + WKBTestCase(GeometryType::LINESTRING, Dimensions::XYM, + {0x01, 0xd2, 0x07, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, + 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x99, 0x40}, + {10, 10, 300, 40, 40, 1600}), + WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZM, + {0x01, 0xba, 0x0b, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, + {10, 10, 40, 300, 40, 40, 80, 1600}), + WKBTestCase(GeometryType::POLYGON, Dimensions::XY, + {0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40}, + {10, 10, 40, 40}), + WKBTestCase( + GeometryType::POLYGON, Dimensions::XYZ, + {0x01, 0xeb, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40}, + {10, 10, 30, 40, 40, 80}), + WKBTestCase( + GeometryType::POLYGON, Dimensions::XYM, + {0x01, 0xd3, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x72, 0x40}, + {10, 10, 200, 40, 40, 1600}), + WKBTestCase( + GeometryType::POLYGON, Dimensions::XYZM, + {0x01, 0xbb, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x72, 0x40}, + {10, 10, 30, 200, 40, 40, 80, 1600}), + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XY, + {0x01, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {30, 10, 30, 10}), + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZ, + {0x01, 0xec, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {30, 10, 40, 30, 10, 40}), + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYM, + {0x01, 0xd4, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 300, 30, 10, 300}), + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZM, + {0x01, 0xbc, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 40, 300, 30, 10, 40, 300}), + WKBTestCase(GeometryType::MULTILINESTRING, Dimensions::XY, + {0x01, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x02, + 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {10, 10, 40, 40}), + WKBTestCase( + GeometryType::MULTILINESTRING, Dimensions::XYZ, + {0x01, 0xed, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xea, 0x03, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40}, + {10, 10, 40, 40, 40, 80}), + WKBTestCase( + GeometryType::MULTILINESTRING, Dimensions::XYM, + {0x01, 0xd5, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd2, 0x07, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, + {10, 10, 300, 40, 40, 1600}), + WKBTestCase( + GeometryType::MULTILINESTRING, Dimensions::XYZM, + {0x01, 0xbd, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xba, 0x0b, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, + {10, 10, 40, 300, 40, 40, 80, 1600}), + WKBTestCase( + GeometryType::MULTIPOLYGON, Dimensions::XY, + {0x01, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {10, 10, 40, 40}), + WKBTestCase( + GeometryType::MULTIPOLYGON, Dimensions::XYZ, + {0x01, 0xee, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xeb, 0x03, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {10, 10, 30, 40, 40, 80}), + WKBTestCase( + GeometryType::MULTIPOLYGON, Dimensions::XYM, + {0x01, 0xd6, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd3, 0x07, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {10, 10, 200, 40, 40, 1600}), + WKBTestCase(GeometryType::MULTIPOLYGON, Dimensions::XYZM, + {0x01, 0xbe, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xbb, + 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {10, 10, 30, 200, 40, 40, 80, 1600}), + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XY, + {0x01, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {30, 10, 30, 10}), + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZ, + {0x01, 0xef, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {30, 10, 40, 30, 10, 40}), + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYM, + {0x01, 0xd7, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 300, 30, 10, 300}), + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZM, + {0x01, 0xbf, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 40, 300, 30, 10, 40, 300}) + // foofy + )); } // namespace parquet::geometry From 3798ef1afa876cc3daecbdfa21a8efb4af3ed835 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 14:26:40 -0300 Subject: [PATCH 21/61] add in WKT equiv --- cpp/src/parquet/geometry_util_test.cc | 34 ++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index ba03626997c14..953c2644e0480 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -149,26 +149,31 @@ TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { INSTANTIATE_TEST_SUITE_P( TestGeometryUtil, WKBTestFixture, ::testing::Values( + // POINT (30 10) WKBTestCase(GeometryType::POINT, Dimensions::XY, {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {30, 10, 30, 10}), + // POINT Z (30 10 40) WKBTestCase(GeometryType::POINT, Dimensions::XYZ, {0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {30, 10, 40, 30, 10, 40}), + // POINT M (30 10 300) WKBTestCase(GeometryType::POINT, Dimensions::XYM, {0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 300, 30, 10, 300}), + // POINT ZM (30 10 40 300) WKBTestCase(GeometryType::POINT, Dimensions::XYZM, {0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 40, 300, 30, 10, 40, 300}), + // LINESTRING (30 10, 10 30, 40 40) WKBTestCase(GeometryType::LINESTRING, Dimensions::XY, {0x01, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, @@ -177,6 +182,7 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {10, 10, 40, 40}), + // LINESTRING Z (30 10 40, 10 30 40, 40 40 80) WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZ, {0x01, 0xea, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -187,6 +193,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40}, {10, 10, 40, 40, 40, 80}), + // LINESTRING M (30 10 300, 10 30 300, 40 40 1600) WKBTestCase(GeometryType::LINESTRING, Dimensions::XYM, {0x01, 0xd2, 0x07, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -197,6 +204,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, {10, 10, 300, 40, 40, 1600}), + // LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600) WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZM, {0x01, 0xba, 0x0b, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -209,6 +217,7 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, {10, 10, 40, 300, 40, 40, 80, 1600}), + // POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)) WKBTestCase(GeometryType::POLYGON, Dimensions::XY, {0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, @@ -220,6 +229,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {10, 10, 40, 40}), + // POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)) WKBTestCase( GeometryType::POLYGON, Dimensions::XYZ, {0x01, 0xeb, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, @@ -234,6 +244,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {10, 10, 30, 40, 40, 80}), + // POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)) WKBTestCase( GeometryType::POLYGON, Dimensions::XYM, {0x01, 0xd3, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, @@ -248,6 +259,8 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {10, 10, 200, 40, 40, 1600}), + // POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 + // 300)) WKBTestCase( GeometryType::POLYGON, Dimensions::XYZM, {0x01, 0xbb, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, @@ -265,23 +278,27 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {10, 10, 30, 200, 40, 40, 80, 1600}), + // MULTIPOINT ((30 10)) WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XY, {0x01, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {30, 10, 30, 10}), + // MULTIPOINT Z ((30 10 40)) WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZ, {0x01, 0xec, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {30, 10, 40, 30, 10, 40}), + // MULTIPOINT M ((30 10 300)) WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYM, {0x01, 0xd4, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 300, 30, 10, 300}), + // MULTIPOINT ZM ((30 10 40 300)) WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZM, {0x01, 0xbc, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -289,6 +306,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 40, 300, 30, 10, 40, 300}), + // MULTILINESTRING ((30 10, 10 30, 40 40)) WKBTestCase(GeometryType::MULTILINESTRING, Dimensions::XY, {0x01, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -297,6 +315,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {10, 10, 40, 40}), + // MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80)) WKBTestCase( GeometryType::MULTILINESTRING, Dimensions::XYZ, {0x01, 0xed, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xea, 0x03, 0x00, @@ -307,6 +326,7 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40}, {10, 10, 40, 40, 40, 80}), + // MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600)) WKBTestCase( GeometryType::MULTILINESTRING, Dimensions::XYM, {0x01, 0xd5, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd2, 0x07, 0x00, @@ -317,6 +337,7 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, {10, 10, 300, 40, 40, 1600}), + // MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600)) WKBTestCase( GeometryType::MULTILINESTRING, Dimensions::XYZM, {0x01, 0xbd, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xba, 0x0b, 0x00, @@ -329,6 +350,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, {10, 10, 40, 300, 40, 40, 80, 1600}), + // MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))) WKBTestCase( GeometryType::MULTIPOLYGON, Dimensions::XY, {0x01, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, @@ -340,6 +362,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {10, 10, 40, 40}), + // MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))) WKBTestCase( GeometryType::MULTIPOLYGON, Dimensions::XYZ, {0x01, 0xee, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xeb, 0x03, 0x00, @@ -354,6 +377,7 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {10, 10, 30, 40, 40, 80}), + // MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))) WKBTestCase( GeometryType::MULTIPOLYGON, Dimensions::XYM, {0x01, 0xd6, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd3, 0x07, 0x00, @@ -368,6 +392,8 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {10, 10, 200, 40, 40, 1600}), + // MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 + // 10 40 300))) WKBTestCase(GeometryType::MULTIPOLYGON, Dimensions::XYZM, {0x01, 0xbe, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xbb, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, @@ -387,31 +413,33 @@ INSTANTIATE_TEST_SUITE_P( 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {10, 10, 30, 200, 40, 40, 80, 1600}), + // GEOMETRYCOLLECTION (POINT (30 10)) WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XY, {0x01, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {30, 10, 30, 10}), + // GEOMETRYCOLLECTION Z (POINT Z (30 10 40)) WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZ, {0x01, 0xef, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {30, 10, 40, 30, 10, 40}), + // GEOMETRYCOLLECTION M (POINT M (30 10 300)) WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYM, {0x01, 0xd7, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 300, 30, 10, 300}), + // GEOMETRYCOLLECTION ZM (POINT ZM (30 10 40 300)) WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZM, {0x01, 0xbf, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, - {30, 10, 40, 300, 30, 10, 40, 300}) - // foofy - )); + {30, 10, 40, 300, 30, 10, 40, 300}))); } // namespace parquet::geometry From ccbd61650402510122aafc3d6d17b87290317a9f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 14 Aug 2024 14:41:29 -0300 Subject: [PATCH 22/61] more tests --- cpp/src/parquet/geometry_util.h | 29 +++++++++++++++++++++------ cpp/src/parquet/geometry_util_test.cc | 13 +++++++----- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 262159095027d..c76c500efc299 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -32,7 +32,7 @@ namespace parquet::geometry { constexpr double kInf = std::numeric_limits::infinity(); struct Dimensions { - enum dimensions { XY = 1, XYZ = 2, XYM = 3, XYZM = 4 }; + enum dimensions { XY = 0, XYZ = 1, XYM = 2, XYZM = 3 }; static dimensions FromWKB(uint32_t wkb_geometry_type) { switch (wkb_geometry_type / 1000) { @@ -533,7 +533,7 @@ class WKBGeometryBounder { public: WKBGeometryBounder() : box_(Dimensions::XYZM) {} - void ReadGeometry(WKBBuffer* src) { + void ReadGeometry(WKBBuffer* src, bool record_wkb_type = true) { uint8_t endian = src->ReadUInt8(); #if defined(ARROW_LITTLE_ENDIAN) bool swap = endian != 0x01; @@ -545,8 +545,10 @@ class WKBGeometryBounder { auto geometry_type = GeometryType::FromWKB(wkb_geometry_type); auto dimensions = Dimensions::FromWKB(wkb_geometry_type); - // Keep track of geometry types encountered - wkb_types_.insert(wkb_geometry_type); + // Keep track of geometry types encountered if at the top level + if (record_wkb_type) { + wkb_types_.insert(wkb_geometry_type); + } switch (geometry_type) { case GeometryType::POINT: @@ -568,14 +570,29 @@ class WKBGeometryBounder { case GeometryType::GEOMETRYCOLLECTION: { uint32_t n_parts = src->ReadUInt32(swap); for (uint32_t i = 0; i < n_parts; i++) { - ReadGeometry(src); + ReadGeometry(src, /*record_wkb_type*/ false); } break; } } } - void Finish(BoundingBox* out) { bounder_.Finish(out); } + const BoundingBox& Bounds() { + bounder_.Finish(&box_); + return box_; + } + + std::vector WkbTypes() { + std::vector out(wkb_types_.begin(), wkb_types_.end()); + std::sort(out.begin(), out.end()); + return out; + } + + void Reset() { + box_.Reset(); + bounder_.Reset(); + wkb_types_.clear(); + } private: BoundingBox box_; diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index 953c2644e0480..8d65e820dc531 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -133,17 +133,20 @@ class WKBTestFixture : public ::testing::TestWithParam { TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { auto item = GetParam(); - BoundingBox box; WKBGeometryBounder bounder; - bounder.Finish(&box); - EXPECT_EQ(box, BoundingBox()); + EXPECT_EQ(bounder.Bounds(), BoundingBox()); WKBBuffer buf(item.wkb.data(), item.wkb.size()); bounder.ReadGeometry(&buf); EXPECT_EQ(buf.size(), 0); - bounder.Finish(&box); - EXPECT_EQ(box, item.box); + EXPECT_EQ(bounder.Bounds(), item.box); + uint32_t wkb_type = item.dimensions * 1000 + item.geometry_type; + EXPECT_THAT(bounder.WkbTypes(), ::testing::ElementsAre(::testing::Eq(wkb_type))); + + bounder.Reset(); + EXPECT_EQ(bounder.Bounds(), BoundingBox()); + EXPECT_TRUE(bounder.WkbTypes().empty()); } INSTANTIATE_TEST_SUITE_P( From 3752f837bf4bfec625937c95df7ccedf8cf024c4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 17 Aug 2024 00:00:49 -0300 Subject: [PATCH 23/61] start on stats --- cpp/src/parquet/geometry_util.h | 6 +++--- cpp/src/parquet/statistics.cc | 20 ++++++++++++++++++++ cpp/src/parquet/statistics.h | 19 +++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index c76c500efc299..31c2b35784a19 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -208,7 +208,7 @@ class WKBBuffer { size_ -= sizeof(uint32_t); if constexpr (swap) { - value = arrow::bit_util::ByteSwap(value); + value = ::arrow::bit_util::ByteSwap(value); } return value; @@ -232,7 +232,7 @@ class WKBBuffer { if constexpr (swap) { for (uint32_t i = 0; i < n; i++) { - out[i] = arrow::bit_util::ByteSwap(out[i]); + out[i] = ::arrow::bit_util::ByteSwap(out[i]); } } } @@ -316,7 +316,7 @@ struct BoundingBox { double max[4]; }; -bool operator==(const BoundingBox& lhs, const BoundingBox& rhs) { +inline bool operator==(const BoundingBox& lhs, const BoundingBox& rhs) { return lhs.dimensions == rhs.dimensions && std::memcmp(lhs.min, rhs.min, sizeof(lhs.min)) == 0 && std::memcmp(lhs.max, rhs.max, sizeof(lhs.max)) == 0; diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index e54b94f1a861a..a5385558806b4 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -36,6 +36,7 @@ #include "arrow/visit_data_inline.h" #include "parquet/encoding.h" #include "parquet/exception.h" +#include "parquet/geometry_util.h" #include "parquet/platform.h" #include "parquet/schema.h" @@ -618,6 +619,7 @@ class TypedStatisticsImpl : public TypedStatistics { bool HasDistinctCount() const override { return has_distinct_count_; }; bool HasMinMax() const override { return has_min_max_; } bool HasNullCount() const override { return has_null_count_; }; + bool HasGeometryStatistics() const override { return geometry_statistics_ != nullptr; } void IncrementNullCount(int64_t n) override { statistics_.null_count += n; @@ -630,6 +632,8 @@ class TypedStatisticsImpl : public TypedStatistics { switch (type) { case LogicalType::Type::FLOAT16: return true; + case LogicalType::Type::GEOMETRY: + return true; default: return false; } @@ -654,6 +658,15 @@ class TypedStatisticsImpl : public TypedStatistics { if (!MinMaxEqual(other)) return false; } + if (HasGeometryStatistics() != other.HasGeometryStatistics()) { + return false; + } + + if (HasGeometryStatistics() && + !geometry_statistics_->Equals(*other.GeometryStatistics())) { + return false; + } + return null_count() == other.null_count() && distinct_count() == other.distinct_count() && num_values() == other.num_values(); @@ -773,6 +786,7 @@ class TypedStatisticsImpl : public TypedStatistics { std::shared_ptr> comparator_; std::shared_ptr min_buffer_, max_buffer_; LogicalType::Type::type logical_type_ = LogicalType::Type::NONE; + std::shared_ptr geometry_statistics_; void PlainEncode(const T& src, std::string* dst) const; void PlainDecode(const std::string& src, T* dst) const; @@ -865,6 +879,12 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, if (num_values == 0) return; SetMinMaxPair(comparator_->GetMinMax(values, num_values)); + + if constexpr (std::is_same::value) { + if (logical_type_ == LogicalType::Type::GEOMETRY) { + geometry_statistics_->Update(values, num_values, null_count); + } + } } template diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index c5da44a7b6665..a122d9b54d60f 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -24,6 +24,7 @@ #include #include +#include "parquet/geometry_util.h" #include "parquet/platform.h" #include "parquet/types.h" @@ -114,6 +115,20 @@ std::shared_ptr> MakeComparator(const ColumnDescriptor* d return std::static_pointer_cast>(Comparator::Make(descr)); } +class GeometryStatistics { + public: + bool Equals(const GeometryStatistics& other) const { ParquetException::NYI(); } + + void Merge(const GeometryStatistics& other) { ParquetException::NYI(); } + + void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { + ParquetException::NYI(); + } + + private: + geometry::WKBGeometryBounder bounder_; +}; + // ---------------------------------------------------------------------- /// \brief Structure represented encoded statistics to be written to @@ -250,6 +265,10 @@ class PARQUET_EXPORT Statistics { /// with TypedStatistics::min and max virtual bool HasMinMax() const = 0; + virtual bool HasGeometryStatistics() const { return false; }; + + virtual const GeometryStatistics* GeometryStatistics() const { return nullptr; } + /// \brief Reset state of object to initial (no data observed) state virtual void Reset() = 0; From 2ff6078a285aece19c8833cb5f8823643a2c4c3b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 19 Aug 2024 10:15:40 -0300 Subject: [PATCH 24/61] implement update/merge for geometry statistics --- cpp/src/parquet/geometry_util.h | 17 ++++-- cpp/src/parquet/geometry_util_test.cc | 1 + cpp/src/parquet/statistics.cc | 76 +++++++++++++++++++++++++++ cpp/src/parquet/statistics.h | 15 +++--- 4 files changed, 97 insertions(+), 12 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 31c2b35784a19..b166270af5bc5 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -176,8 +176,14 @@ struct GeometryType { class WKBBuffer { public: + WKBBuffer() : data_(nullptr), size_(0) {} WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} + void Init(const uint8_t* data, int64_t size) { + data_ = data; + size_ = size; + } + uint8_t ReadUInt8() { if (size_ < 1) { throw ParquetException("Can't read 1 byte from empty WKBBuffer"); @@ -577,17 +583,18 @@ class WKBGeometryBounder { } } - const BoundingBox& Bounds() { - bounder_.Finish(&box_); - return box_; - } + void ReadBox(const BoundingBox& box) { box_.Merge(box); } - std::vector WkbTypes() { + const BoundingBox& Bounds() const { return box_; } + + std::vector WkbTypes() const { std::vector out(wkb_types_.begin(), wkb_types_.end()); std::sort(out.begin(), out.end()); return out; } + void Flush() { bounder_.Finish(&box_); } + void Reset() { box_.Reset(); bounder_.Reset(); diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_test.cc index 8d65e820dc531..b92c9b6a8d5af 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_test.cc @@ -140,6 +140,7 @@ TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { bounder.ReadGeometry(&buf); EXPECT_EQ(buf.size(), 0); + bounder.Flush(); EXPECT_EQ(bounder.Bounds(), item.box); uint32_t wkb_type = item.dimensions * 1000 + item.geometry_type; EXPECT_THAT(bounder.WkbTypes(), ::testing::ElementsAre(::testing::Eq(wkb_type))); diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index a5385558806b4..272cd2bfe1745 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -48,6 +48,81 @@ using arrow::util::SafeCopy; using arrow::util::SafeLoad; namespace parquet { + +class GeometryStatisticsImpl { + public: + bool Equals(const GeometryStatisticsImpl& other) const { + if (is_valid_ != other.is_valid_) { + return false; + } + + if (!is_valid_ && !other.is_valid_) { + return true; + } + + auto wkb_types = bounder_.WkbTypes(); + auto other_wkb_types = other.bounder_.WkbTypes(); + if (wkb_types.size() != other_wkb_types.size()) { + return false; + } + + for (size_t i = 0; i < wkb_types.size(); i++) { + if (wkb_types[i] != other_wkb_types[i]) { + return false; + } + } + + return bounder_.Bounds() == other.bounder_.Bounds(); + } + + void Merge(const GeometryStatisticsImpl& other) { + if (!is_valid_ || !other.is_valid_) { + is_valid_ = false; + return; + } + + bounder_.ReadBox(other.bounder_.Bounds()); + } + + void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { + if (!is_valid_) { + return; + } + + geometry::WKBBuffer buf; + try { + for (int64_t i = 0; i < num_values; i++) { + const ByteArray& item = values[i]; + buf.Init(item.ptr, item.len); + bounder_.ReadGeometry(&buf); + } + } catch (ParquetException& e) { + is_valid_ = false; + } + } + + private: + geometry::WKBGeometryBounder bounder_; + bool is_valid_{}; +}; + +GeometryStatistics::GeometryStatistics() { + impl_ = std::make_unique(); +} + +bool GeometryStatistics::Equals(const GeometryStatistics& other) const { + return impl_->Equals(*other.impl_); +} + +void GeometryStatistics::Merge(const GeometryStatistics& other) { + impl_->Merge(*other.impl_); +} + +void GeometryStatistics::Update(const ByteArray* values, int64_t num_values, + int64_t null_count) { + impl_->Update(values, num_values, null_count); +} + namespace { // ---------------------------------------------------------------------- @@ -554,6 +629,7 @@ LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) { } return LogicalType::Type::NONE; } + LogicalType::Type::type LogicalTypeId(const Statistics& stats) { return LogicalTypeId(stats.descr()); } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index a122d9b54d60f..649fef84a613b 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -24,7 +24,6 @@ #include #include -#include "parquet/geometry_util.h" #include "parquet/platform.h" #include "parquet/types.h" @@ -115,18 +114,20 @@ std::shared_ptr> MakeComparator(const ColumnDescriptor* d return std::static_pointer_cast>(Comparator::Make(descr)); } +class GeometryStatisticsImpl; + class GeometryStatistics { public: - bool Equals(const GeometryStatistics& other) const { ParquetException::NYI(); } + GeometryStatistics(); - void Merge(const GeometryStatistics& other) { ParquetException::NYI(); } + bool Equals(const GeometryStatistics& other) const; - void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { - ParquetException::NYI(); - } + void Merge(const GeometryStatistics& other); + + void Update(const ByteArray* values, int64_t num_values, int64_t null_count); private: - geometry::WKBGeometryBounder bounder_; + std::unique_ptr impl_; }; // ---------------------------------------------------------------------- From 79e471509776cb90e59ef5165ff93c04390c0f7f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 19 Aug 2024 11:02:26 -0300 Subject: [PATCH 25/61] more complete stats --- cpp/src/parquet/statistics.cc | 2 ++ cpp/src/parquet/statistics.h | 38 ++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 272cd2bfe1745..7fc3009ce2148 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -96,6 +96,8 @@ class GeometryStatisticsImpl { buf.Init(item.ptr, item.len); bounder_.ReadGeometry(&buf); } + + bounder_.Flush(); } catch (ParquetException& e) { is_valid_ = false; } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 649fef84a613b..572d6a6c7bd99 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -114,9 +114,34 @@ std::shared_ptr> MakeComparator(const ColumnDescriptor* d return std::static_pointer_cast>(Comparator::Make(descr)); } +class PARQUET_EXPORT EncodedGeometryStatistics { + public: + static constexpr double kInf = std::numeric_limits::infinity(); + + EncodedGeometryStatistics() = default; + EncodedGeometryStatistics(const EncodedGeometryStatistics&) = default; + EncodedGeometryStatistics(EncodedGeometryStatistics&&) = default; + EncodedGeometryStatistics& operator=(const EncodedGeometryStatistics&) = default; + + double xmin{kInf}; + double xmax{-kInf}; + double ymin{kInf}; + double ymax{-kInf}; + double zmin{kInf}; + double zmax{-kInf}; + double mmin{kInf}; + double mmax{-kInf}; + std::vector> coverings; + std::vector geometry_types; + + bool has_z() const { return (zmax - zmin) > 0; } + + bool has_m() const { return (mmax - mmin) > 0; } +}; + class GeometryStatisticsImpl; -class GeometryStatistics { +class PARQUET_EXPORT GeometryStatistics { public: GeometryStatistics(); @@ -137,12 +162,16 @@ class GeometryStatistics { class PARQUET_EXPORT EncodedStatistics { std::string max_, min_; bool is_signed_ = false; + EncodedGeometryStatistics geometry_statistics_; public: EncodedStatistics() = default; const std::string& max() const { return max_; } const std::string& min() const { return min_; } + const EncodedGeometryStatistics& geometry_statistics() const { + return geometry_statistics_; + } int64_t null_count = 0; int64_t distinct_count = 0; @@ -151,6 +180,7 @@ class PARQUET_EXPORT EncodedStatistics { bool has_max = false; bool has_null_count = false; bool has_distinct_count = false; + bool has_geometry_statistics = false; // When all values in the statistics are null, it is set to true. // Otherwise, at least one value is not null, or we are not sure at all. @@ -205,6 +235,12 @@ class PARQUET_EXPORT EncodedStatistics { has_distinct_count = true; return *this; } + + EncodedStatistics& set_geometry(EncodedGeometryStatistics geometry_statistics) { + geometry_statistics_ = std::move(geometry_statistics); + has_geometry_statistics = true; + return *this; + } }; /// \brief Base type for computing column statistics while writing a file From 44447576bdbfcbb25c26345178324b51396b29d7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 19 Aug 2024 11:30:31 -0300 Subject: [PATCH 26/61] start on factory methods --- cpp/src/parquet/metadata.cc | 10 ++++++---- cpp/src/parquet/statistics.cc | 18 +++++++++--------- cpp/src/parquet/statistics.h | 18 +++++++++++------- cpp/src/parquet/statistics_test.cc | 8 ++++---- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 10c8afaf37507..a0e1d382cc3a4 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -96,18 +96,20 @@ static std::shared_ptr MakeTypedColumnStats( return MakeStatistics( descr, metadata.statistics.min_value, metadata.statistics.max_value, metadata.num_values - metadata.statistics.null_count, - metadata.statistics.null_count, metadata.statistics.distinct_count, + metadata.statistics.null_count, metadata.statistics.distinct_count, {}, metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value, metadata.statistics.__isset.null_count, - metadata.statistics.__isset.distinct_count); + metadata.statistics.__isset.distinct_count, + metadata.statistics.__isset.geometry_stats); } // Default behavior return MakeStatistics( descr, metadata.statistics.min, metadata.statistics.max, metadata.num_values - metadata.statistics.null_count, - metadata.statistics.null_count, metadata.statistics.distinct_count, + metadata.statistics.null_count, metadata.statistics.distinct_count, {}, metadata.statistics.__isset.max && metadata.statistics.__isset.min, - metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count); + metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, + metadata.statistics.__isset.geometry_stats); } std::shared_ptr MakeColumnStats(const format::ColumnMetaData& meta_data, diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 7fc3009ce2148..a0096fca7b059 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -1132,18 +1132,18 @@ std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool) { DCHECK(encoded_stats != nullptr); return Make(descr, encoded_stats->min(), encoded_stats->max(), num_values, - encoded_stats->null_count, encoded_stats->distinct_count, + encoded_stats->null_count, encoded_stats->distinct_count, {}, encoded_stats->has_min && encoded_stats->has_max, - encoded_stats->has_null_count, encoded_stats->has_distinct_count, pool); + encoded_stats->has_null_count, encoded_stats->has_distinct_count, false, + pool); } -std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, - const std::string& encoded_min, - const std::string& encoded_max, - int64_t num_values, int64_t null_count, - int64_t distinct_count, bool has_min_max, - bool has_null_count, bool has_distinct_count, - ::arrow::MemoryPool* pool) { +std::shared_ptr Statistics::Make( + const ColumnDescriptor* descr, const std::string& encoded_min, + const std::string& encoded_max, int64_t num_values, int64_t null_count, + int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, + bool has_min_max, bool has_null_count, bool has_distinct_count, + bool has_geometry_statistics, ::arrow::MemoryPool* pool) { #define MAKE_STATS(CAP_TYPE, KLASS) \ case Type::CAP_TYPE: \ return std::make_shared>( \ diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 572d6a6c7bd99..8bca36063b9a6 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -271,8 +271,9 @@ class PARQUET_EXPORT Statistics { static std::shared_ptr Make( const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, bool has_min_max, bool has_null_count, - bool has_distinct_count, + int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, + bool has_min_max, bool has_null_count, bool has_distinct_count, + bool has_geometry_statistics, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); // Helper function to convert EncodedStatistics to Statistics. @@ -428,11 +429,14 @@ template std::shared_ptr> MakeStatistics( const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, bool has_min_max, bool has_null_count, - bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { - return std::static_pointer_cast>(Statistics::Make( - descr, encoded_min, encoded_max, num_values, null_count, distinct_count, - has_min_max, has_null_count, has_distinct_count, pool)); + int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, + bool has_min_max, bool has_null_count, bool has_distinct_count, + bool has_geometry_statistics, + ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { + return std::static_pointer_cast>( + Statistics::Make(descr, encoded_min, encoded_max, num_values, null_count, + distinct_count, geometry_statistics, has_min_max, has_null_count, + has_distinct_count, has_geometry_statistics, pool)); } } // namespace parquet diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index dad414ac89b47..5df1a3340c13a 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -320,7 +320,7 @@ class TestStatistics : public PrimitiveTypedTest { auto statistics2 = MakeStatistics(this->schema_.Column(0), encoded_min, encoded_max, - this->values_.size(), 0, 0, true, true, true); + this->values_.size(), 0, 0, {}, true, true, true, false); auto statistics3 = MakeStatistics(this->schema_.Column(0)); std::vector valid_bits( @@ -541,9 +541,9 @@ void TestStatistics::TestMinMaxEncode() { std::string(reinterpret_cast(statistics1->max().ptr), statistics1->max().len)); - auto statistics2 = - MakeStatistics(this->schema_.Column(0), encoded_min, encoded_max, - this->values_.size(), 0, 0, true, true, true); + auto statistics2 = MakeStatistics(this->schema_.Column(0), encoded_min, + encoded_max, this->values_.size(), 0, + 0, {}, true, true, true, false); ASSERT_EQ(encoded_min, statistics2->EncodeMin()); ASSERT_EQ(encoded_max, statistics2->EncodeMax()); From 176c99765738ae6d5c70f927770f53da26454ca5 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 19 Aug 2024 12:09:52 -0300 Subject: [PATCH 27/61] more stats things --- cpp/src/parquet/geometry_util.h | 4 ++ cpp/src/parquet/statistics.cc | 75 +++++++++++++++++++++++++++++++++ cpp/src/parquet/statistics.h | 6 +++ 3 files changed, 85 insertions(+) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index b166270af5bc5..4dccc0cf3bf3f 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -585,6 +585,10 @@ class WKBGeometryBounder { void ReadBox(const BoundingBox& box) { box_.Merge(box); } + void ReadGeometryTypes(const std::vector& geometry_types) { + wkb_types_.insert(geometry_types.begin(), geometry_types.end()); + } + const BoundingBox& Bounds() const { return box_; } std::vector WkbTypes() const { diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index a0096fca7b059..96b22f6a129f6 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -82,6 +82,7 @@ class GeometryStatisticsImpl { } bounder_.ReadBox(other.bounder_.Bounds()); + bounder_.ReadGeometryTypes(other.bounder_.WkbTypes()); } void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { @@ -103,6 +104,66 @@ class GeometryStatisticsImpl { } } + EncodedGeometryStatistics Encode() const { + const double* mins = bounder_.Bounds().min; + const double* maxes = bounder_.Bounds().max; + + EncodedGeometryStatistics out; + out.geometry_types = bounder_.WkbTypes(); + + out.xmin = mins[0]; + out.xmax = maxes[0]; + out.ymin = mins[1]; + out.ymax = maxes[1]; + out.zmin = mins[2]; + out.zmax = maxes[2]; + out.mmin = mins[3]; + out.mmax = maxes[3]; + + return out; + } + + void Update(const EncodedGeometryStatistics& encoded) { + if (!is_valid_) { + return; + } + + geometry::BoundingBox box; + box.min[0] = encoded.xmin; + box.max[0] = encoded.xmax; + box.min[1] = encoded.ymin; + box.max[1] = encoded.ymax; + + if (encoded.has_z()) { + box.min[2] = encoded.zmin; + box.max[2] = encoded.zmax; + } + + if (encoded.has_m()) { + box.min[3] = encoded.mmin; + box.max[3] = encoded.mmax; + } + + bounder_.ReadBox(box); + bounder_.ReadGeometryTypes(encoded.geometry_types); + + try { + for (const auto& covering : encoded.coverings) { + if (covering.first == "WKB") { + geometry::WKBBuffer buf( + reinterpret_cast(covering.second.data()), + covering.second.size()); + bounder_.ReadGeometry(&buf, false); + } + } + } catch (ParquetException& e) { + is_valid_ = false; + return; + } + } + + bool is_valid() const { return is_valid_; } + private: geometry::WKBGeometryBounder bounder_; bool is_valid_{}; @@ -125,6 +186,17 @@ void GeometryStatistics::Update(const ByteArray* values, int64_t num_values, impl_->Update(values, num_values, null_count); } +bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } + +EncodedGeometryStatistics GeometryStatistics::Encode() { return impl_->Encode(); } + +std::unique_ptr GeometryStatistics::Decode( + const EncodedGeometryStatistics& encoded) { + auto out = std::make_unique(); + out->impl_->Update(encoded); + return out; +} + namespace { // ---------------------------------------------------------------------- @@ -839,6 +911,9 @@ class TypedStatisticsImpl : public TypedStatistics { if (HasDistinctCount()) { s.set_distinct_count(this->distinct_count()); } + if (HasGeometryStatistics() && geometry_statistics_->is_valid()) { + s.set_geometry(geometry_statistics_->Encode()); + } return s; } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 8bca36063b9a6..9ffbbc109c88e 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -151,6 +151,12 @@ class PARQUET_EXPORT GeometryStatistics { void Update(const ByteArray* values, int64_t num_values, int64_t null_count); + EncodedGeometryStatistics Encode(); + + bool is_valid() const; + + static std::unique_ptr Decode(const EncodedGeometryStatistics& encoded); + private: std::unique_ptr impl_; }; From 1de193e31202e7d8f5af4ac9d33d20c294999f6d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 19 Aug 2024 14:09:51 -0300 Subject: [PATCH 28/61] maybe work with serde --- cpp/src/parquet/metadata.cc | 38 +++++++++++++++++++++++++++++++++-- cpp/src/parquet/statistics.cc | 7 ++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index a0e1d382cc3a4..5c7e28aca11ba 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -88,6 +88,38 @@ std::string ParquetVersionToString(ParquetVersion::type ver) { return "UNKNOWN"; } +static EncodedGeometryStatistics MakeEncodedGeometryStatistics( + const format::Statistics& stats) { + EncodedGeometryStatistics out; + + if (stats.__isset.geometry_stats) { + const format::GeometryStatistics& geom_stats = stats.geometry_stats; + out.geometry_types = std::vector(geom_stats.geometry_types.begin(), + geom_stats.geometry_types.end()); + + out.xmin = geom_stats.bbox.xmin; + out.xmax = geom_stats.bbox.xmax; + out.ymin = geom_stats.bbox.ymin; + out.ymax = geom_stats.bbox.ymax; + + if (geom_stats.bbox.__isset.zmin && geom_stats.bbox.__isset.zmax) { + out.zmin = geom_stats.bbox.zmin; + out.zmax = geom_stats.bbox.zmax; + } + + if (geom_stats.bbox.__isset.mmin && geom_stats.bbox.__isset.mmax) { + out.mmin = geom_stats.bbox.mmin; + out.mmax = geom_stats.bbox.mmax; + } + + for (const auto& covering : geom_stats.coverings) { + out.coverings.emplace_back(covering.kind, covering.value); + } + } + + return out; +} + template static std::shared_ptr MakeTypedColumnStats( const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { @@ -96,7 +128,8 @@ static std::shared_ptr MakeTypedColumnStats( return MakeStatistics( descr, metadata.statistics.min_value, metadata.statistics.max_value, metadata.num_values - metadata.statistics.null_count, - metadata.statistics.null_count, metadata.statistics.distinct_count, {}, + metadata.statistics.null_count, metadata.statistics.distinct_count, + MakeEncodedGeometryStatistics(metadata.statistics), metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, @@ -106,7 +139,8 @@ static std::shared_ptr MakeTypedColumnStats( return MakeStatistics( descr, metadata.statistics.min, metadata.statistics.max, metadata.num_values - metadata.statistics.null_count, - metadata.statistics.null_count, metadata.statistics.distinct_count, {}, + metadata.statistics.null_count, metadata.statistics.distinct_count, + MakeEncodedGeometryStatistics(metadata.statistics), metadata.statistics.__isset.max && metadata.statistics.__isset.min, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, metadata.statistics.__isset.geometry_stats); diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 96b22f6a129f6..eeed161d9e093 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -1207,10 +1207,11 @@ std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool) { DCHECK(encoded_stats != nullptr); return Make(descr, encoded_stats->min(), encoded_stats->max(), num_values, - encoded_stats->null_count, encoded_stats->distinct_count, {}, + encoded_stats->null_count, encoded_stats->distinct_count, + encoded_stats->geometry_statistics(), encoded_stats->has_min && encoded_stats->has_max, - encoded_stats->has_null_count, encoded_stats->has_distinct_count, false, - pool); + encoded_stats->has_null_count, encoded_stats->has_distinct_count, + encoded_stats->has_geometry_statistics, pool); } std::shared_ptr Statistics::Make( From d2f815708c053718d55058178e6fca1b333dbd1a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 19 Aug 2024 14:21:06 -0300 Subject: [PATCH 29/61] Update cpp/src/parquet/types.cc Co-authored-by: Gang Wu --- cpp/src/parquet/types.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 5f5d65e54537a..6707fb78ed4e2 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -483,7 +483,7 @@ std::shared_ptr LogicalType::FromThrift( } std::string metadata; - if (type.GEOMETRY.__isset.crs) { + if (type.GEOMETRY.__isset.metadata) { metadata = type.GEOMETRY.metadata; } From b525e845e568cdee9de5b538bb5abeb5d49bf9bb Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 3 Sep 2024 15:34:42 +0800 Subject: [PATCH 30/61] Updated parquet.thrift and re-generated cpp sources --- cpp/src/generated/parquet_types.cpp | 38 ++++++---- cpp/src/generated/parquet_types.h | 97 +++++++++++++++++++------- cpp/src/generated/parquet_types.tcc | 7 +- cpp/src/parquet/parquet.thrift | 103 +++++++++++++++++++++------- 4 files changed, 176 insertions(+), 69 deletions(-) diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index f580c7905e8b4..c1102440a107a 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -405,13 +405,18 @@ int _kGeometryEncodingValues[] = { /** * Allowed for physical type: BYTE_ARRAY. * - * Well-known binary (WKB) representations of geometries. It supports 2D or - * 3D geometries of the standard geometry types (Point, LineString, Polygon, - * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This - * is the preferred option for maximum portability. + * Well-known binary (WKB) representations of geometries. * - * This encoding enables GeometryStatistics to be set in the column chunk - * and page index. + * To be clear, we follow the same rule of WKB and coordinate axis order from + * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the + * standard geometry types (Point, LineString, Polygon, MultiPoint, + * MultiLineString, MultiPolygon, and GeometryCollection). + * + * This is the preferred encoding for maximum portability. It also supports + * GeometryStatistics to be set in the column chunk and page index. + * + * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 */ GeometryEncoding::WKB }; @@ -419,13 +424,18 @@ const char* _kGeometryEncodingNames[] = { /** * Allowed for physical type: BYTE_ARRAY. * - * Well-known binary (WKB) representations of geometries. It supports 2D or - * 3D geometries of the standard geometry types (Point, LineString, Polygon, - * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This - * is the preferred option for maximum portability. + * Well-known binary (WKB) representations of geometries. + * + * To be clear, we follow the same rule of WKB and coordinate axis order from + * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the + * standard geometry types (Point, LineString, Polygon, MultiPoint, + * MultiLineString, MultiPolygon, and GeometryCollection). * - * This encoding enables GeometryStatistics to be set in the column chunk - * and page index. + * This is the preferred encoding for maximum portability. It also supports + * GeometryStatistics to be set in the column chunk and page index. + * + * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 */ "WKB" }; @@ -4568,5 +4578,5 @@ void FileCryptoMetaData::printTo(std::ostream& out) const { out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); out << ")"; } -} -} // namespace + +}} // namespace diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 0a857c4c6e8dd..b190f9d21997b 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -20,7 +20,6 @@ #include "parquet/windows_compatibility.h" - namespace parquet { namespace format { /** @@ -216,7 +215,7 @@ std::string to_string(const FieldRepetitionType::type& val); /** * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge * between points represent a straight cartesian line or the shortest line on - * the sphere. Please note that it only applies to polygons. + * the sphere. It applies to all non-point geometry objects. */ struct Edges { enum type { @@ -239,13 +238,18 @@ struct GeometryEncoding { /** * Allowed for physical type: BYTE_ARRAY. * - * Well-known binary (WKB) representations of geometries. It supports 2D or - * 3D geometries of the standard geometry types (Point, LineString, Polygon, - * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This - * is the preferred option for maximum portability. + * Well-known binary (WKB) representations of geometries. + * + * To be clear, we follow the same rule of WKB and coordinate axis order from + * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the + * standard geometry types (Point, LineString, Polygon, MultiPoint, + * MultiLineString, MultiPolygon, and GeometryCollection). * - * This encoding enables GeometryStatistics to be set in the column chunk - * and page index. + * This is the preferred encoding for maximum portability. It also supports + * GeometryStatistics to be set in the column chunk and page index. + * + * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 */ WKB = 0 }; @@ -616,9 +620,9 @@ std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj); /** - * A custom WKB-encoded polygon or multi-polygon to represent a covering of + * A custom binary-encoded polygon or multi-polygon to represent a covering of * geometries. For example, it may be a bounding box or an envelope of geometries - * when a bounding box cannot be built (e.g., a geometry has spherical edges, or if + * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if * an edge of geographic coordinates crosses the antimeridian). In addition, it can * also be used to provide vendor-agnostic coverings like S2 or H3 grids. */ @@ -640,10 +644,10 @@ class Covering { */ std::string kind; /** - * A payload specific to kind: - * - WKB: well-known binary of a POLYGON that completely covers the contents. - * This will be interpreted according to the same CRS and edges defined by - * the logical type. + * A payload specific to kind. Below are the supported values: + * - WKB: well-known binary of a POLYGON or MULTI-POLYGON that completely + * covers the contents. This will be interpreted according to the same CRS + * and edges defined by the logical type. */ std::string value; @@ -688,6 +692,9 @@ typedef struct _BoundingBox__isset { /** * Bounding box of geometries in the representation of min/max value pair of * coordinates from each axis. Values of Z and M are omitted for 2D geometries. + * Filter pushdown on geometries are only safe for planar spatial predicate + * but it is recommended that the writer always generates bounding box statistics, + * regardless of whether the geometries are planar or spherical. */ class BoundingBox { public: @@ -807,7 +814,10 @@ class GeometryStatistics { */ BoundingBox bbox; /** - * A list of coverings of geometries + * A list of coverings of geometries. + * Note that It is allowed to have more than one covering of the same kind and + * implementation is free to use any of them. It is recommended to have at most + * one covering for each kind. */ std::vector coverings; /** @@ -835,7 +845,7 @@ class GeometryStatistics { * * Please refer to links below for more detail: * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.0.0/format-specs/geoparquet.md?plain=1#L91 + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L159 */ std::vector geometry_types; @@ -1828,31 +1838,70 @@ class GeometryType { virtual ~GeometryType() noexcept; /** - * Physical type and encoding for the geometry type. Please refer to the - * definition of GeometryEncoding for more detail. + * Physical type and encoding for the geometry type. + * Please refer to the definition of GeometryEncoding for more detail. * * @see GeometryEncoding */ GeometryEncoding::type encoding; /** - * Edges of polygon. + * Edges of geometry type. + * Please refer to the definition of Edges for more detail. * * @see Edges */ Edges::type edges; /** * Coordinate Reference System, i.e. mapping of how coordinates refer to - * precise locations on earth. + * precise locations on earth. Writers are not required to set this field. + * Once crs is set, crs_encoding field below MUST be set together. + * For example, "OGC:CRS84" can be set in the form of PROJJSON as below: + * { + * "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json", + * "type": "GeographicCRS", + * "name": "WGS 84 longitude-latitude", + * "datum": { + * "type": "GeodeticReferenceFrame", + * "name": "World Geodetic System 1984", + * "ellipsoid": { + * "name": "WGS 84", + * "semi_major_axis": 6378137, + * "inverse_flattening": 298.257223563 + * } + * }, + * "coordinate_system": { + * "subtype": "ellipsoidal", + * "axis": [ + * { + * "name": "Geodetic longitude", + * "abbreviation": "Lon", + * "direction": "east", + * "unit": "degree" + * }, + * { + * "name": "Geodetic latitude", + * "abbreviation": "Lat", + * "direction": "north", + * "unit": "degree" + * } + * ] + * }, + * "id": { + * "authority": "OGC", + * "code": "CRS84" + * } + * } */ std::string crs; /** - * Encoding used in the above crs field. + * Encoding used in the above crs field. It MUST be set if crs field is set. * Currently the only allowed value is "PROJJSON". */ std::string crs_encoding; /** * Additional informative metadata. - * It can be used by GeoParquet to offload some of the column metadata. + * GeoParquet could offload its column metadata in a JSON-encoded UTF-8 string: + * https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L46 */ std::string metadata; @@ -4768,9 +4817,7 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b); std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); -} -} // namespace - +}} // namespace #include "parquet_types.tcc" diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index f0e5b5ea37c6c..33a457e4ad954 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -1619,7 +1619,7 @@ uint32_t GeometryType::read(Protocol_* iprot) { break; case 5: if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->metadata); + xfer += iprot->readString(this->metadata); this->__isset.metadata = true; } else { xfer += iprot->skip(ftype); @@ -1667,7 +1667,7 @@ uint32_t GeometryType::write(Protocol_* oprot) const { } if (this->__isset.metadata) { xfer += oprot->writeFieldBegin("metadata", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->metadata); + xfer += oprot->writeString(this->metadata); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); @@ -5395,7 +5395,6 @@ uint32_t FileCryptoMetaData::write(Protocol_* oprot) const { return xfer; } -} -} // namespace +}} // namespace #endif diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index 61a40882a6155..82ad4ff789369 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -20,7 +20,8 @@ /** * File format description for the parquet file format */ -namespace cpp parquet +cpp_include "parquet/windows_compatibility.h" +namespace cpp parquet.format namespace java org.apache.parquet.format /** @@ -240,7 +241,7 @@ struct SizeStatistics { /** * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge * between points represent a straight cartesian line or the shortest line on - * the sphere. Please note that it only applies to polygons. + * the sphere. It applies to all non-point geometry objects. */ enum Edges { PLANAR = 0; @@ -248,9 +249,9 @@ enum Edges { } /** - * A custom WKB-encoded polygon or multi-polygon to represent a covering of + * A custom binary-encoded polygon or multi-polygon to represent a covering of * geometries. For example, it may be a bounding box or an envelope of geometries - * when a bounding box cannot be built (e.g., a geometry has spherical edges, or if + * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if * an edge of geographic coordinates crosses the antimeridian). In addition, it can * also be used to provide vendor-agnostic coverings like S2 or H3 grids. */ @@ -259,10 +260,11 @@ struct Covering { * A type of covering. Currently accepted values: "WKB". */ 1: required string kind; - /** A payload specific to kind: - * - WKB: well-known binary of a POLYGON that completely covers the contents. - * This will be interpreted according to the same CRS and edges defined by - * the logical type. + /** + * A payload specific to kind. Below are the supported values: + * - WKB: well-known binary of a POLYGON or MULTI-POLYGON that completely + * covers the contents. This will be interpreted according to the same CRS + * and edges defined by the logical type. */ 2: required binary value; } @@ -270,6 +272,9 @@ struct Covering { /** * Bounding box of geometries in the representation of min/max value pair of * coordinates from each axis. Values of Z and M are omitted for 2D geometries. + * Filter pushdown on geometries are only safe for planar spatial predicate + * but it is recommended that the writer always generates bounding box statistics, + * regardless of whether the geometries are planar or spherical. */ struct BoundingBox { 1: required double xmin; @@ -287,7 +292,12 @@ struct GeometryStatistics { /** A bounding box of geometries */ 1: optional BoundingBox bbox; - /** A list of coverings of geometries */ + /** + * A list of coverings of geometries. + * Note that It is allowed to have more than one covering of the same kind and + * implementation is free to use any of them. It is recommended to have at most + * one covering for each kind. + */ 2: optional list coverings; /** @@ -315,7 +325,7 @@ struct GeometryStatistics { * * Please refer to links below for more detail: * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.0.0/format-specs/geoparquet.md?plain=1#L91 + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L159 */ 3: optional list geometry_types; } @@ -466,17 +476,20 @@ enum GeometryEncoding { /** * Allowed for physical type: BYTE_ARRAY. * - * Well-known binary (WKB) representations of geometries. It supports 2D or - * 3D geometries of the standard geometry types (Point, LineString, Polygon, - * MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection). This - * is the preferred option for maximum portability. + * Well-known binary (WKB) representations of geometries. + * + * To be clear, we follow the same rule of WKB and coordinate axis order from + * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the + * standard geometry types (Point, LineString, Polygon, MultiPoint, + * MultiLineString, MultiPolygon, and GeometryCollection). * - * This encoding enables GeometryStatistics to be set in the column chunk - * and page index. + * This is the preferred encoding for maximum portability. It also supports + * GeometryStatistics to be set in the column chunk and page index. + * + * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 */ WKB = 0; - - // TODO: add native encoding from GeoParquet/GeoArrow } /** @@ -484,29 +497,68 @@ enum GeometryEncoding { */ struct GeometryType { /** - * Physical type and encoding for the geometry type. Please refer to the - * definition of GeometryEncoding for more detail. + * Physical type and encoding for the geometry type. + * Please refer to the definition of GeometryEncoding for more detail. */ 1: required GeometryEncoding encoding; /** - * Edges of polygon. + * Edges of geometry type. + * Please refer to the definition of Edges for more detail. */ 2: required Edges edges; /** * Coordinate Reference System, i.e. mapping of how coordinates refer to - * precise locations on earth. + * precise locations on earth. Writers are not required to set this field. + * Once crs is set, crs_encoding field below MUST be set together. + * For example, "OGC:CRS84" can be set in the form of PROJJSON as below: + * { + * "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json", + * "type": "GeographicCRS", + * "name": "WGS 84 longitude-latitude", + * "datum": { + * "type": "GeodeticReferenceFrame", + * "name": "World Geodetic System 1984", + * "ellipsoid": { + * "name": "WGS 84", + * "semi_major_axis": 6378137, + * "inverse_flattening": 298.257223563 + * } + * }, + * "coordinate_system": { + * "subtype": "ellipsoidal", + * "axis": [ + * { + * "name": "Geodetic longitude", + * "abbreviation": "Lon", + * "direction": "east", + * "unit": "degree" + * }, + * { + * "name": "Geodetic latitude", + * "abbreviation": "Lat", + * "direction": "north", + * "unit": "degree" + * } + * ] + * }, + * "id": { + * "authority": "OGC", + * "code": "CRS84" + * } + * } */ 3: optional string crs; /** - * Encoding used in the above crs field. + * Encoding used in the above crs field. It MUST be set if crs field is set. * Currently the only allowed value is "PROJJSON". */ 4: optional string crs_encoding; /** * Additional informative metadata. - * It can be used by GeoParquet to offload some of the column metadata. + * GeoParquet could offload its column metadata in a JSON-encoded UTF-8 string: + * https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L46 */ - 5: optional binary metadata; + 5: optional string metadata; } /** @@ -1331,4 +1383,3 @@ struct FileCryptoMetaData { * and (possibly) columns **/ 2: optional binary key_metadata } - From 6f0500e9bd65c9886ba75ff218acb5b9f5b92b08 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Sep 2024 00:15:55 +0800 Subject: [PATCH 31/61] Geometry value writer could make use of the geometry statistics class to populate geometry statistics --- cpp/src/parquet/column_writer.cc | 5 +- cpp/src/parquet/column_writer_test.cc | 112 ++++++++++++++++++++++++++ cpp/src/parquet/geometry_util.h | 3 + cpp/src/parquet/statistics.cc | 35 +++++++- cpp/src/parquet/statistics.h | 5 +- cpp/src/parquet/test_util.h | 6 +- cpp/src/parquet/thrift_internal.h | 22 +++++ 7 files changed, 182 insertions(+), 6 deletions(-) diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index f859ec9653f78..998d644623777 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1219,8 +1219,11 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< // Will be null if not using dictionary, but that's ok current_dict_encoder_ = dynamic_cast*>(current_encoder_.get()); + bool is_geometry = + (descr_->logical_type() != nullptr && descr_->logical_type()->is_geometry()); + bool has_sort_order = SortOrder::UNKNOWN != descr_->sort_order(); if (properties->statistics_enabled(descr_->path()) && - (SortOrder::UNKNOWN != descr_->sort_order())) { + (is_geometry || has_sort_order)) { page_statistics_ = MakeStatistics(descr_, allocator_); chunk_statistics_ = MakeStatistics(descr_, allocator_); } diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index c99efd17961aa..095037057823f 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -19,6 +19,7 @@ #include #include +#include <_types/_uint32_t.h> #include #include @@ -385,6 +386,10 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { return metadata_accessor->encoding_stats(); } + std::unique_ptr metadata_accessor() { + return ColumnChunkMetaData::Make(metadata_->contents(), this->descr_); + } + protected: int64_t values_read_; // Keep the reader alive as for ByteArray the lifetime of the ByteArray @@ -1705,5 +1710,112 @@ TEST(TestColumnWriter, WriteDataPageV2HeaderNullCount) { } } +// Test writing and reading geometry columns +class TestGeometryValuesWriter : public TestPrimitiveWriter { + public: + static const char *CRS; + static const char* METADATA; + + void SetUpSchema(Repetition::type repetition, int num_columns) override { + std::vector fields; + + for (int i = 0; i < num_columns; ++i) { + std::string name = TestColumnName(i); + std::shared_ptr logical_type = GeometryLogicalType::Make( + CRS, LogicalType::GeometryEdges::PLANAR, LogicalType::GeometryEncoding::WKB, METADATA); + fields.push_back(schema::PrimitiveNode::Make(name, repetition, logical_type, + ByteArrayType::type_num)); + } + node_ = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields); + schema_.Init(node_); + } + + void GenerateData(int64_t num_values, uint32_t seed = 0) { + def_levels_.resize(num_values); + values_.resize(num_values); + + uint32_t point_wkb_size = 21; + buffer_.resize(num_values * point_wkb_size); + uint8_t *ptr = buffer_.data(); + for (int k = 0; k < num_values; k++) { + // Point with coordinates (k, k + 1), encoded as WKB + ptr[0] = 0x01; // 1: little endian + uint32_t geom_type = 1; // 1: POINT (2D) + memcpy(&ptr[1], &geom_type, 4); + double x = k; + double y = k + 1; + memcpy(&ptr[5], &x, 8); + memcpy(&ptr[13], &y, 8); + + // Set this WKB value to values_[k] + values_[k].len = point_wkb_size; + values_[k].ptr = ptr; + ptr += point_wkb_size; + } + + values_ptr_ = values_.data(); + + std::fill(def_levels_.begin(), def_levels_.end(), 1); + } + + void TestWriteAndRead(ParquetVersion::type version, + ParquetDataPageVersion data_page_version) { + this->SetUpSchema(Repetition::REQUIRED, 1); + this->GenerateData(SMALL_SIZE); + size_t num_values = this->values_.size(); + auto writer = + this->BuildWriter(num_values, ColumnProperties(), version, data_page_version, + /*enable_checksum*/ false); + std::vector definition_levels(num_values, 0); + std::vector repetition_levels(num_values, 0); + writer->WriteBatch(this->values_.size(), definition_levels.data(), + repetition_levels.data(), this->values_.data()); + + writer->Close(); + this->ReadColumn(); + for (size_t i = 0; i < num_values; i++) { + // ASSERT_EQ((i % 2 == 0) ? true : false, this->values_out_[i]) << i; + const ByteArray &value = this->values_out_[i]; + EXPECT_EQ(21, value.len); + EXPECT_EQ(1, value.ptr[0]); + uint32_t geom_type = 0; + double x = 0; + double y = 0; + memcpy(&geom_type, &value.ptr[1], 4); + memcpy(&x, &value.ptr[5], 8); + memcpy(&y, &value.ptr[13], 8); + EXPECT_EQ(1, geom_type); + EXPECT_DOUBLE_EQ(i, x); + EXPECT_DOUBLE_EQ(i + 1, y); + } + + auto metadata_accessor = this->metadata_accessor(); + // auto statistics = metadata_accessor->statistics(); + + // auto metadata_encodings = this->metadata_encodings(); + // std::set metadata_encodings_set{metadata_encodings.begin(), + // metadata_encodings.end()}; + // EXPECT_EQ(expected_encodings, metadata_encodings_set); + } +}; + +const char* TestGeometryValuesWriter::CRS = R"({"id": {"authority": "OGC", "code": "CRS84"}})"; +const char* TestGeometryValuesWriter::METADATA = "test_metadata"; + + +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV1) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(ParquetVersion::PARQUET_1_0, data_page_version); + } +} + +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV2) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(ParquetVersion::PARQUET_2_4, data_page_version); + } +} + } // namespace test } // namespace parquet diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 4dccc0cf3bf3f..3cba84016b044 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -332,6 +332,7 @@ template class WKBSequenceBounder { public: explicit WKBSequenceBounder(double* chunk) : box_(dims), chunk_(chunk) {} + WKBSequenceBounder(const WKBSequenceBounder &) = default; void ReadPoint(WKBBuffer* src) { constexpr uint32_t coord_size = Dimensions::size(); @@ -398,6 +399,7 @@ class WKBGenericSequenceBounder { xyz_swap_(chunk_), xym_swap_(chunk_), xyzm_swap_(chunk_) {} + WKBGenericSequenceBounder(const WKBGenericSequenceBounder &) = default; void ReadPoint(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_TRUE(!swap)) { @@ -538,6 +540,7 @@ class WKBGenericSequenceBounder { class WKBGeometryBounder { public: WKBGeometryBounder() : box_(Dimensions::XYZM) {} + WKBGeometryBounder(const WKBGeometryBounder &) = default; void ReadGeometry(WKBBuffer* src, bool record_wkb_type = true) { uint8_t endian = src->ReadUInt8(); diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index eeed161d9e093..e642378d98905 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include "parquet/geometry_util.h" #include "parquet/platform.h" #include "parquet/schema.h" +#include "parquet/types.h" using arrow::default_memory_pool; using arrow::MemoryPool; @@ -51,6 +53,9 @@ namespace parquet { class GeometryStatisticsImpl { public: + GeometryStatisticsImpl() = default; + GeometryStatisticsImpl(const GeometryStatisticsImpl&) = default; + bool Equals(const GeometryStatisticsImpl& other) const { if (is_valid_ != other.is_valid_) { return false; @@ -166,13 +171,17 @@ class GeometryStatisticsImpl { private: geometry::WKBGeometryBounder bounder_; - bool is_valid_{}; + bool is_valid_ = true; }; GeometryStatistics::GeometryStatistics() { impl_ = std::make_unique(); } +GeometryStatistics::GeometryStatistics(std::unique_ptr impl) + : impl_(std::move(impl)) { +} + bool GeometryStatistics::Equals(const GeometryStatistics& other) const { return impl_->Equals(*other.impl_); } @@ -197,6 +206,11 @@ std::unique_ptr GeometryStatistics::Decode( return out; } +std::shared_ptr GeometryStatistics::clone() const { + std::unique_ptr impl = std::make_unique(*impl_); + return std::make_shared(std::move(impl)); +} + namespace { // ---------------------------------------------------------------------- @@ -770,6 +784,7 @@ class TypedStatisticsImpl : public TypedStatistics { bool HasMinMax() const override { return has_min_max_; } bool HasNullCount() const override { return has_null_count_; }; bool HasGeometryStatistics() const override { return geometry_statistics_ != nullptr; } + const GeometryStatistics* geometry_statistics() const override { return geometry_statistics_.get(); } void IncrementNullCount(int64_t n) override { statistics_.null_count += n; @@ -813,7 +828,7 @@ class TypedStatisticsImpl : public TypedStatistics { } if (HasGeometryStatistics() && - !geometry_statistics_->Equals(*other.GeometryStatistics())) { + !geometry_statistics_->Equals(*other.geometry_statistics())) { return false; } @@ -857,6 +872,12 @@ class TypedStatisticsImpl : public TypedStatistics { if (other.HasMinMax()) { SetMinMax(other.min(), other.max()); } + + if (this->HasGeometryStatistics() && other.HasGeometryStatistics()) { + this->geometry_statistics_->Merge(*other.geometry_statistics()); + } else if (other.HasGeometryStatistics()) { + this->geometry_statistics_ = other.geometry_statistics()->clone(); + } } void Update(const T* values, int64_t num_values, int64_t null_count) override; @@ -1035,6 +1056,9 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { + if (geometry_statistics_ == nullptr) { + geometry_statistics_ = std::make_unique(); + } geometry_statistics_->Update(values, num_values, null_count); } } @@ -1131,7 +1155,12 @@ std::shared_ptr DoMakeComparator(Type::type physical_type, ParquetException::NYI("Unsigned Compare not implemented"); } } else { - throw ParquetException("UNKNOWN Sort Order"); + if (logical_type == LogicalType::Type::GEOMETRY && + physical_type == Type::BYTE_ARRAY) { + return std::make_shared>(); + } else { + throw ParquetException("UNKNOWN Sort Order"); + } } return nullptr; } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 9ffbbc109c88e..d45024a7fbabf 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -144,6 +144,7 @@ class GeometryStatisticsImpl; class PARQUET_EXPORT GeometryStatistics { public: GeometryStatistics(); + explicit GeometryStatistics(std::unique_ptr impl); bool Equals(const GeometryStatistics& other) const; @@ -155,6 +156,8 @@ class PARQUET_EXPORT GeometryStatistics { bool is_valid() const; + std::shared_ptr clone() const; + static std::unique_ptr Decode(const EncodedGeometryStatistics& encoded); private: @@ -311,7 +314,7 @@ class PARQUET_EXPORT Statistics { virtual bool HasGeometryStatistics() const { return false; }; - virtual const GeometryStatistics* GeometryStatistics() const { return nullptr; } + virtual const GeometryStatistics* geometry_statistics() const { return nullptr; } /// \brief Reset state of object to initial (no data observed) state virtual void Reset() = 0; diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index 59728cf53f699..4ce6b5295f47e 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -660,7 +660,7 @@ class PrimitiveTypedTest : public ::testing::Test { public: using c_type = typename TestType::c_type; - void SetUpSchema(Repetition::type repetition, int num_columns = 1) { + virtual void SetUpSchema(Repetition::type repetition, int num_columns) { std::vector fields; for (int i = 0; i < num_columns; ++i) { @@ -672,6 +672,10 @@ class PrimitiveTypedTest : public ::testing::Test { schema_.Init(node_); } + void SetUpSchema(Repetition::type repetition) { + this->SetUpSchema(repetition, 1); + } + void GenerateData(int64_t num_values, uint32_t seed = 0); void SetupValuesOut(int64_t num_values); void SyncValuesOut(); diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index b21b0e07afba2..f1ac1f6e5c80b 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -348,6 +348,28 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { statistics.__set_distinct_count(stats.distinct_count); } + if (stats.has_geometry_statistics) { + const EncodedGeometryStatistics& encoded_geometry_stats = stats.geometry_statistics(); + format::GeometryStatistics geometry_statistics; + std::vector geometry_types(encoded_geometry_stats.geometry_types.size()); + std::transform(encoded_geometry_stats.geometry_types.begin(), encoded_geometry_stats.geometry_types.end(), + geometry_types.begin(), [](uint32_t value) { + return static_cast(value); + }); + geometry_statistics.__set_geometry_types(geometry_types); + format::BoundingBox bbox; + bbox.__set_xmin(encoded_geometry_stats.xmin); + bbox.__set_xmax(encoded_geometry_stats.xmax); + bbox.__set_ymin(encoded_geometry_stats.ymin); + bbox.__set_ymax(encoded_geometry_stats.ymax); + bbox.__set_zmin(encoded_geometry_stats.zmin); + bbox.__set_zmax(encoded_geometry_stats.zmax); + bbox.__set_mmin(encoded_geometry_stats.mmin); + bbox.__set_mmax(encoded_geometry_stats.mmax); + geometry_statistics.__set_bbox(bbox); + statistics.__set_geometry_stats(geometry_statistics); + } + return statistics; } From c052ae08b78f3d3257b994c9a50021a1938eb205 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Sep 2024 10:59:43 +0800 Subject: [PATCH 32/61] Geometry column writer now populates correct statistics --- cpp/src/parquet/column_writer_test.cc | 23 ++++++++++++--------- cpp/src/parquet/metadata.cc | 6 ++++-- cpp/src/parquet/statistics.cc | 29 +++++++++++++++++++++------ cpp/src/parquet/statistics.h | 2 +- cpp/src/parquet/thrift_internal.h | 18 ++++++++--------- 5 files changed, 51 insertions(+), 27 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 095037057823f..f873bf5715114 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -386,8 +386,11 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { return metadata_accessor->encoding_stats(); } - std::unique_ptr metadata_accessor() { - return ColumnChunkMetaData::Make(metadata_->contents(), this->descr_); + EncodedStatistics metadata_encoded_stats() { + ApplicationVersion app_version(this->writer_properties_->created_by()); + auto metadata_accessor = ColumnChunkMetaData::Make( + metadata_->contents(), this->descr_, default_reader_properties(), &app_version); + return metadata_accessor->statistics()->Encode(); } protected: @@ -1789,13 +1792,15 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_DOUBLE_EQ(i + 1, y); } - auto metadata_accessor = this->metadata_accessor(); - // auto statistics = metadata_accessor->statistics(); - - // auto metadata_encodings = this->metadata_encodings(); - // std::set metadata_encodings_set{metadata_encodings.begin(), - // metadata_encodings.end()}; - // EXPECT_EQ(expected_encodings, metadata_encodings_set); + auto encoded_statistics = metadata_encoded_stats(); + EXPECT_TRUE(encoded_statistics.has_geometry_statistics); + auto geometry_statistics = encoded_statistics.geometry_statistics(); + EXPECT_EQ(1, geometry_statistics.geometry_types.size()); + EXPECT_EQ(1, geometry_statistics.geometry_types[0]); + EXPECT_DOUBLE_EQ(0, geometry_statistics.xmin); + EXPECT_DOUBLE_EQ(1, geometry_statistics.ymin); + EXPECT_DOUBLE_EQ(99, geometry_statistics.xmax); + EXPECT_DOUBLE_EQ(100, geometry_statistics.ymax); } }; diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 5c7e28aca11ba..29b934192ef8e 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -294,8 +294,10 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { DCHECK(writer_version_ != nullptr); // If the column statistics don't exist or column sort order is unknown // we cannot use the column stats + auto logical_type = descr_->logical_type(); + bool is_geometry = (logical_type != nullptr && logical_type->is_geometry()); if (!column_metadata_->__isset.statistics || - descr_->sort_order() == SortOrder::UNKNOWN) { + (descr_->sort_order() == SortOrder::UNKNOWN && !is_geometry)) { return false; } if (possible_stats_ == nullptr) { @@ -1522,7 +1524,7 @@ bool ApplicationVersion::HasCorrectStatistics(Type::type col_type, } // Unknown sort order has incorrect stats - if (SortOrder::UNKNOWN == sort_order) { + if (SortOrder::UNKNOWN == sort_order && !statistics.has_geometry_statistics) { return false; } diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index e642378d98905..81af57f622b79 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -199,11 +199,8 @@ bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } EncodedGeometryStatistics GeometryStatistics::Encode() { return impl_->Encode(); } -std::unique_ptr GeometryStatistics::Decode( - const EncodedGeometryStatistics& encoded) { - auto out = std::make_unique(); - out->impl_->Update(encoded); - return out; +void GeometryStatistics::Decode(const EncodedGeometryStatistics& encoded) { + impl_->Update(encoded); } std::shared_ptr GeometryStatistics::clone() const { @@ -780,6 +777,22 @@ class TypedStatisticsImpl : public TypedStatistics { has_min_max_ = has_min_max; } + // Create stats from a thrift Statistics object. + TypedStatisticsImpl(const ColumnDescriptor* descr, const std::string& encoded_min, + const std::string& encoded_max, int64_t num_values, + int64_t null_count, int64_t distinct_count, + const EncodedGeometryStatistics& geometry_statistics, + bool has_min_max, bool has_null_count, bool has_distinct_count, + bool has_geometry_statistics, MemoryPool* pool) + : TypedStatisticsImpl(descr, encoded_min, encoded_max, num_values, null_count, + distinct_count, has_min_max, has_null_count, + has_distinct_count, pool) { + if (has_geometry_statistics) { + geometry_statistics_ = std::make_shared(); + geometry_statistics_->Decode(geometry_statistics); + } + } + bool HasDistinctCount() const override { return has_distinct_count_; }; bool HasMinMax() const override { return has_min_max_; } bool HasNullCount() const override { return has_null_count_; }; @@ -1261,8 +1274,12 @@ std::shared_ptr Statistics::Make( MAKE_STATS(INT64, Int64Type); MAKE_STATS(FLOAT, FloatType); MAKE_STATS(DOUBLE, DoubleType); - MAKE_STATS(BYTE_ARRAY, ByteArrayType); MAKE_STATS(FIXED_LEN_BYTE_ARRAY, FLBAType); + case Type::BYTE_ARRAY: + return std::make_shared>( + descr, encoded_min, encoded_max, num_values, null_count, distinct_count, + geometry_statistics, has_min_max, has_null_count, has_distinct_count, + has_geometry_statistics, pool); default: break; } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index d45024a7fbabf..a62c860b89190 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -158,7 +158,7 @@ class PARQUET_EXPORT GeometryStatistics { std::shared_ptr clone() const; - static std::unique_ptr Decode(const EncodedGeometryStatistics& encoded); + void Decode(const EncodedGeometryStatistics& encoded); private: std::unique_ptr impl_; diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index f1ac1f6e5c80b..040f11d5a3da2 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -351,21 +351,21 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { if (stats.has_geometry_statistics) { const EncodedGeometryStatistics& encoded_geometry_stats = stats.geometry_statistics(); format::GeometryStatistics geometry_statistics; - std::vector geometry_types(encoded_geometry_stats.geometry_types.size()); - std::transform(encoded_geometry_stats.geometry_types.begin(), encoded_geometry_stats.geometry_types.end(), - geometry_types.begin(), [](uint32_t value) { - return static_cast(value); - }); + std::vector geometry_types(encoded_geometry_stats.geometry_types.begin(), encoded_geometry_stats.geometry_types.end()); geometry_statistics.__set_geometry_types(geometry_types); format::BoundingBox bbox; bbox.__set_xmin(encoded_geometry_stats.xmin); bbox.__set_xmax(encoded_geometry_stats.xmax); bbox.__set_ymin(encoded_geometry_stats.ymin); bbox.__set_ymax(encoded_geometry_stats.ymax); - bbox.__set_zmin(encoded_geometry_stats.zmin); - bbox.__set_zmax(encoded_geometry_stats.zmax); - bbox.__set_mmin(encoded_geometry_stats.mmin); - bbox.__set_mmax(encoded_geometry_stats.mmax); + if (encoded_geometry_stats.has_z()) { + bbox.__set_zmin(encoded_geometry_stats.zmin); + bbox.__set_zmax(encoded_geometry_stats.zmax); + } + if (encoded_geometry_stats.has_m()) { + bbox.__set_mmin(encoded_geometry_stats.mmin); + bbox.__set_mmax(encoded_geometry_stats.mmax); + } geometry_statistics.__set_bbox(bbox); statistics.__set_geometry_stats(geometry_statistics); } From 3c6b2223f774f0913a7fc64b02ef9dc58d24d97c Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Sep 2024 11:15:35 +0800 Subject: [PATCH 33/61] format/tidy --- cpp/src/parquet/column_writer_test.cc | 20 +++++++++++--------- cpp/src/parquet/geometry_util.h | 3 ++- cpp/src/parquet/statistics.cc | 13 ++++++++----- cpp/src/parquet/statistics.h | 5 +++-- cpp/src/parquet/thrift_internal.h | 7 ++++--- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index f873bf5715114..402cfb7986e3e 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -390,7 +390,7 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { ApplicationVersion app_version(this->writer_properties_->created_by()); auto metadata_accessor = ColumnChunkMetaData::Make( metadata_->contents(), this->descr_, default_reader_properties(), &app_version); - return metadata_accessor->statistics()->Encode(); + return metadata_accessor->statistics()->Encode(); } protected: @@ -1725,7 +1725,9 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { for (int i = 0; i < num_columns; ++i) { std::string name = TestColumnName(i); std::shared_ptr logical_type = GeometryLogicalType::Make( - CRS, LogicalType::GeometryEdges::PLANAR, LogicalType::GeometryEncoding::WKB, METADATA); + CRS, LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB, + METADATA); fields.push_back(schema::PrimitiveNode::Make(name, repetition, logical_type, ByteArrayType::type_num)); } @@ -1755,7 +1757,7 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { values_[k].ptr = ptr; ptr += point_wkb_size; } - + values_ptr_ = values_.data(); std::fill(def_levels_.begin(), def_levels_.end(), 1); @@ -1770,17 +1772,17 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { this->BuildWriter(num_values, ColumnProperties(), version, data_page_version, /*enable_checksum*/ false); std::vector definition_levels(num_values, 0); - std::vector repetition_levels(num_values, 0); + std::vector repetition_levels(num_values, 0); writer->WriteBatch(this->values_.size(), definition_levels.data(), repetition_levels.data(), this->values_.data()); - + writer->Close(); this->ReadColumn(); for (size_t i = 0; i < num_values; i++) { // ASSERT_EQ((i % 2 == 0) ? true : false, this->values_out_[i]) << i; const ByteArray &value = this->values_out_[i]; EXPECT_EQ(21, value.len); - EXPECT_EQ(1, value.ptr[0]); + EXPECT_EQ(1, value.ptr[0]); uint32_t geom_type = 0; double x = 0; double y = 0; @@ -1800,8 +1802,8 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_DOUBLE_EQ(0, geometry_statistics.xmin); EXPECT_DOUBLE_EQ(1, geometry_statistics.ymin); EXPECT_DOUBLE_EQ(99, geometry_statistics.xmax); - EXPECT_DOUBLE_EQ(100, geometry_statistics.ymax); - } + EXPECT_DOUBLE_EQ(100, geometry_statistics.ymax); + } }; const char* TestGeometryValuesWriter::CRS = R"({"id": {"authority": "OGC", "code": "CRS84"}})"; @@ -1819,7 +1821,7 @@ TEST_F(TestGeometryValuesWriter, TestWriteAndReadV2) { for (auto data_page_version : {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { TestWriteAndRead(ParquetVersion::PARQUET_2_4, data_page_version); - } + } } } // namespace test diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 3cba84016b044..7680378a3cec3 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -24,6 +24,7 @@ #include "arrow/util/endian.h" #include "arrow/util/logging.h" +#include "arrow/util/macros.h" #include "arrow/util/ubsan.h" #include "parquet/exception.h" @@ -176,7 +177,7 @@ struct GeometryType { class WKBBuffer { public: - WKBBuffer() : data_(nullptr), size_(0) {} + WKBBuffer() : data_(NULLPTR), size_(0) {} WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} void Init(const uint8_t* data, int64_t size) { diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 81af57f622b79..7e4731abb5491 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -53,9 +53,9 @@ namespace parquet { class GeometryStatisticsImpl { public: - GeometryStatisticsImpl() = default; + GeometryStatisticsImpl() = default; GeometryStatisticsImpl(const GeometryStatisticsImpl&) = default; - + bool Equals(const GeometryStatisticsImpl& other) const { if (is_valid_ != other.is_valid_) { return false; @@ -204,7 +204,8 @@ void GeometryStatistics::Decode(const EncodedGeometryStatistics& encoded) { } std::shared_ptr GeometryStatistics::clone() const { - std::unique_ptr impl = std::make_unique(*impl_); + std::unique_ptr impl = + std::make_unique(*impl_); return std::make_shared(std::move(impl)); } @@ -797,7 +798,9 @@ class TypedStatisticsImpl : public TypedStatistics { bool HasMinMax() const override { return has_min_max_; } bool HasNullCount() const override { return has_null_count_; }; bool HasGeometryStatistics() const override { return geometry_statistics_ != nullptr; } - const GeometryStatistics* geometry_statistics() const override { return geometry_statistics_.get(); } + const GeometryStatistics* geometry_statistics() const override { + return geometry_statistics_.get(); + } void IncrementNullCount(int64_t n) override { statistics_.null_count += n; @@ -1172,7 +1175,7 @@ std::shared_ptr DoMakeComparator(Type::type physical_type, physical_type == Type::BYTE_ARRAY) { return std::make_shared>(); } else { - throw ParquetException("UNKNOWN Sort Order"); + throw ParquetException("UNKNOWN Sort Order"); } } return nullptr; diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index a62c860b89190..4e785d6a15f6d 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -24,6 +24,7 @@ #include #include +#include "arrow/util/macros.h" #include "parquet/platform.h" #include "parquet/types.h" @@ -312,9 +313,9 @@ class PARQUET_EXPORT Statistics { /// with TypedStatistics::min and max virtual bool HasMinMax() const = 0; - virtual bool HasGeometryStatistics() const { return false; }; + virtual bool HasGeometryStatistics() const { return false; } - virtual const GeometryStatistics* geometry_statistics() const { return nullptr; } + virtual const GeometryStatistics* geometry_statistics() const { return NULLPTR; } /// \brief Reset state of object to initial (no data observed) state virtual void Reset() = 0; diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 040f11d5a3da2..6e82c3f454f38 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -351,7 +351,8 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { if (stats.has_geometry_statistics) { const EncodedGeometryStatistics& encoded_geometry_stats = stats.geometry_statistics(); format::GeometryStatistics geometry_statistics; - std::vector geometry_types(encoded_geometry_stats.geometry_types.begin(), encoded_geometry_stats.geometry_types.end()); + std::vector geometry_types(encoded_geometry_stats.geometry_types.begin(), + encoded_geometry_stats.geometry_types.end()); geometry_statistics.__set_geometry_types(geometry_types); format::BoundingBox bbox; bbox.__set_xmin(encoded_geometry_stats.xmin); @@ -364,8 +365,8 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { } if (encoded_geometry_stats.has_m()) { bbox.__set_mmin(encoded_geometry_stats.mmin); - bbox.__set_mmax(encoded_geometry_stats.mmax); - } + bbox.__set_mmax(encoded_geometry_stats.mmax); + } geometry_statistics.__set_bbox(bbox); statistics.__set_geometry_stats(geometry_statistics); } From f6ae9aec4d1d0f8fe08beb8a80aff5559e044518 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Sep 2024 13:14:40 +0800 Subject: [PATCH 34/61] Run clang-tidy --- cpp/src/parquet/column_writer_test.cc | 19 +++++++++---------- cpp/src/parquet/geometry_util.h | 6 +++--- cpp/src/parquet/statistics.cc | 3 +-- cpp/src/parquet/test_util.h | 4 +--- cpp/src/parquet/types.cc | 7 +++---- 5 files changed, 17 insertions(+), 22 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 402cfb7986e3e..7916f9ce70fba 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1716,7 +1716,7 @@ TEST(TestColumnWriter, WriteDataPageV2HeaderNullCount) { // Test writing and reading geometry columns class TestGeometryValuesWriter : public TestPrimitiveWriter { public: - static const char *CRS; + static const char* CRS; static const char* METADATA; void SetUpSchema(Repetition::type repetition, int num_columns) override { @@ -1724,10 +1724,9 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { for (int i = 0; i < num_columns; ++i) { std::string name = TestColumnName(i); - std::shared_ptr logical_type = GeometryLogicalType::Make( - CRS, LogicalType::GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::WKB, - METADATA); + std::shared_ptr logical_type = + GeometryLogicalType::Make(CRS, LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB, METADATA); fields.push_back(schema::PrimitiveNode::Make(name, repetition, logical_type, ByteArrayType::type_num)); } @@ -1741,10 +1740,10 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { uint32_t point_wkb_size = 21; buffer_.resize(num_values * point_wkb_size); - uint8_t *ptr = buffer_.data(); + uint8_t* ptr = buffer_.data(); for (int k = 0; k < num_values; k++) { // Point with coordinates (k, k + 1), encoded as WKB - ptr[0] = 0x01; // 1: little endian + ptr[0] = 0x01; // 1: little endian uint32_t geom_type = 1; // 1: POINT (2D) memcpy(&ptr[1], &geom_type, 4); double x = k; @@ -1780,7 +1779,7 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { this->ReadColumn(); for (size_t i = 0; i < num_values; i++) { // ASSERT_EQ((i % 2 == 0) ? true : false, this->values_out_[i]) << i; - const ByteArray &value = this->values_out_[i]; + const ByteArray& value = this->values_out_[i]; EXPECT_EQ(21, value.len); EXPECT_EQ(1, value.ptr[0]); uint32_t geom_type = 0; @@ -1806,10 +1805,10 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } }; -const char* TestGeometryValuesWriter::CRS = R"({"id": {"authority": "OGC", "code": "CRS84"}})"; +const char* TestGeometryValuesWriter::CRS = + R"({"id": {"authority": "OGC", "code": "CRS84"}})"; const char* TestGeometryValuesWriter::METADATA = "test_metadata"; - TEST_F(TestGeometryValuesWriter, TestWriteAndReadV1) { for (auto data_page_version : {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 7680378a3cec3..4ba77c822d7f6 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -333,7 +333,7 @@ template class WKBSequenceBounder { public: explicit WKBSequenceBounder(double* chunk) : box_(dims), chunk_(chunk) {} - WKBSequenceBounder(const WKBSequenceBounder &) = default; + WKBSequenceBounder(const WKBSequenceBounder&) = default; void ReadPoint(WKBBuffer* src) { constexpr uint32_t coord_size = Dimensions::size(); @@ -400,7 +400,7 @@ class WKBGenericSequenceBounder { xyz_swap_(chunk_), xym_swap_(chunk_), xyzm_swap_(chunk_) {} - WKBGenericSequenceBounder(const WKBGenericSequenceBounder &) = default; + WKBGenericSequenceBounder(const WKBGenericSequenceBounder&) = default; void ReadPoint(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_TRUE(!swap)) { @@ -541,7 +541,7 @@ class WKBGenericSequenceBounder { class WKBGeometryBounder { public: WKBGeometryBounder() : box_(Dimensions::XYZM) {} - WKBGeometryBounder(const WKBGeometryBounder &) = default; + WKBGeometryBounder(const WKBGeometryBounder&) = default; void ReadGeometry(WKBBuffer* src, bool record_wkb_type = true) { uint8_t endian = src->ReadUInt8(); diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 7e4731abb5491..af40838e8dba4 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -179,8 +179,7 @@ GeometryStatistics::GeometryStatistics() { } GeometryStatistics::GeometryStatistics(std::unique_ptr impl) - : impl_(std::move(impl)) { -} + : impl_(std::move(impl)) {} bool GeometryStatistics::Equals(const GeometryStatistics& other) const { return impl_->Equals(*other.impl_); diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index 4ce6b5295f47e..d22cea7b431ad 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -672,9 +672,7 @@ class PrimitiveTypedTest : public ::testing::Test { schema_.Init(node_); } - void SetUpSchema(Repetition::type repetition) { - this->SetUpSchema(repetition, 1); - } + void SetUpSchema(Repetition::type repetition) { this->SetUpSchema(repetition, 1); } void GenerateData(int64_t num_values, uint32_t seed = 0); void SetupValuesOut(int64_t num_values); diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 6707fb78ed4e2..db8400a80c0d8 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -1738,10 +1738,9 @@ format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { if (other.is_geometry()) { const auto& other_geometry = checked_cast(other); - return crs() == other_geometry.crs() && - edges() == other_geometry.edges() && - encoding() == other_geometry.encoding() && - metadata() == other_geometry.metadata(); + return crs() == other_geometry.crs() && edges() == other_geometry.edges() && + encoding() == other_geometry.encoding() && + metadata() == other_geometry.metadata(); } else { return false; } From 1c9523ba33b3d55d4c34782a04b5a1019f016aec Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 5 Sep 2024 18:04:06 +0800 Subject: [PATCH 35/61] Added a test that writes and reads a parquet file containing a geometry column --- cpp/src/parquet/column_writer_test.cc | 1 - cpp/src/parquet/reader_test.cc | 116 ++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 7916f9ce70fba..5e78f1138e7ce 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1778,7 +1778,6 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { writer->Close(); this->ReadColumn(); for (size_t i = 0; i < num_values; i++) { - // ASSERT_EQ((i % 2 == 0) ? true : false, this->values_out_[i]) << i; const ByteArray& value = this->values_out_[i]; EXPECT_EQ(21, value.len); EXPECT_EQ(1, value.ptr[0]); diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index fb77ba6cbc178..f99d2437b2843 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -28,6 +28,8 @@ #include #include "arrow/array.h" +#include "arrow/array/array_binary.h" +#include "arrow/array/builder_binary.h" #include "arrow/buffer.h" #include "arrow/io/file.h" #include "arrow/testing/future_util.h" @@ -39,13 +41,16 @@ #include "parquet/column_reader.h" #include "parquet/column_scanner.h" +#include "parquet/column_writer.h" #include "parquet/file_reader.h" #include "parquet/file_writer.h" #include "parquet/metadata.h" #include "parquet/page_index.h" #include "parquet/platform.h" #include "parquet/printer.h" +#include "parquet/statistics.h" #include "parquet/test_util.h" +#include "parquet/types.h" using arrow::internal::checked_pointer_cast; using arrow::internal::Zip; @@ -1812,4 +1817,115 @@ TEST(PageIndexReaderTest, ReadFileWithoutPageIndex) { ASSERT_EQ(nullptr, row_group_index_reader); } +TEST(TestFileReader, GeometryLogicalType) { + const int num_rows = 1000; + + // Make schema + schema::NodeVector fields; + fields.push_back(PrimitiveNode::Make( + "g", Repetition::REQUIRED, + GeometryLogicalType::Make(R"({"id": {"authority": "OGC", "code": "CRS84"}})", + LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB, "metadata0"), + Type::BYTE_ARRAY)); + auto schema = std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); + + // Write small batches and small data pages + std::shared_ptr writer_props = + WriterProperties::Builder().write_batch_size(64)->data_pagesize(128)->build(); + + ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create()); + std::shared_ptr file_writer = + ParquetFileWriter::Open(out_file, schema, writer_props); + RowGroupWriter* rg_writer = file_writer->AppendRowGroup(); + + // write WKB points to columns + auto* writer = static_cast(rg_writer->NextColumn()); + uint32_t point_wkb_size = 21; + std::vector buffer(point_wkb_size * num_rows); + uint8_t* ptr = buffer.data(); + std::vector values(num_rows); + for (int k = 0; k < num_rows; k++) { + // Point with coordinates (k, k + 1), encoded as WKB + ptr[0] = 0x01; // 1: little endian + uint32_t geom_type = 1; // 1: POINT (2D) + memcpy(&ptr[1], &geom_type, 4); + double x = k; + double y = k + 1; + memcpy(&ptr[5], &x, 8); + memcpy(&ptr[13], &y, 8); + + // Set this WKB value to values_[k] + values[k].len = point_wkb_size; + values[k].ptr = ptr; + ptr += point_wkb_size; + } + writer->WriteBatch(num_rows, nullptr, nullptr, values.data()); + + rg_writer->Close(); + file_writer->Close(); + + // Open the reader + ASSERT_OK_AND_ASSIGN(auto file_buf, out_file->Finish()); + auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf); + + ReaderProperties reader_props; + reader_props.enable_buffered_stream(); + reader_props.set_buffer_size(64); + std::unique_ptr file_reader = + ParquetFileReader::Open(in_file, reader_props); + + // Check that the geometry statistics are correctly written and read + std::shared_ptr metadata = file_reader->metadata(); + int num_row_groups = metadata->num_row_groups(); + for (int i = 0; i < num_row_groups; i++) { + std::unique_ptr row_group_metadata = metadata->RowGroup(i); + std::unique_ptr column_chunk_metadata = + row_group_metadata->ColumnChunk(0); + EncodedStatistics encoded_statistics = column_chunk_metadata->statistics()->Encode(); + EXPECT_TRUE(encoded_statistics.has_geometry_statistics); + const EncodedGeometryStatistics& geom_stats = + encoded_statistics.geometry_statistics(); + EXPECT_EQ(1, geom_stats.geometry_types.size()); + EXPECT_EQ(1, geom_stats.geometry_types[0]); + EXPECT_GE(geom_stats.xmin, 0); + EXPECT_GT(geom_stats.xmax, geom_stats.xmin); + EXPECT_GT(geom_stats.ymin, 0); + EXPECT_GT(geom_stats.ymax, geom_stats.ymin); + } + + // Check the geometry values + auto row_group = file_reader->RowGroup(0); + std::shared_ptr reader = + std::static_pointer_cast(row_group->Column(0)); + int64_t total_values_read = 0; + while (total_values_read < num_rows) { + std::vector out(num_rows); + int64_t values_read = 0; + int64_t levels_read = + reader->ReadBatch(num_rows, nullptr, nullptr, out.data(), &values_read); + ASSERT_GE(levels_read, 1); + ASSERT_GE(values_read, 1); + + // Check the batch + for (int64_t i = 0; i < values_read; i++) { + const ByteArray& value = out[i]; + EXPECT_EQ(21, value.len); + EXPECT_EQ(1, value.ptr[0]); + uint32_t geom_type = 0; + double x = 0; + double y = 0; + memcpy(&geom_type, &value.ptr[1], 4); + memcpy(&x, &value.ptr[5], 8); + memcpy(&y, &value.ptr[13], 8); + EXPECT_EQ(1, geom_type); + EXPECT_DOUBLE_EQ(i + total_values_read, x); + EXPECT_DOUBLE_EQ(i + 1 + total_values_read, y); + } + + total_values_read += values_read; + } +} + } // namespace parquet From 5a50790f76e1bd564f7e41df4b55363d43c85157 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Fri, 6 Sep 2024 08:01:04 +0800 Subject: [PATCH 36/61] Remove redundant include --- cpp/src/parquet/column_writer_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 5e78f1138e7ce..a3de31c6c10b0 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -19,7 +19,6 @@ #include #include -#include <_types/_uint32_t.h> #include #include From 51e4ab84ffa6b00cfd935a35002d67dd340e18b7 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Fri, 6 Sep 2024 09:35:33 +0800 Subject: [PATCH 37/61] Fix problems found by reviewers --- cpp/src/parquet/column_writer_test.cc | 26 ++++------------ cpp/src/parquet/geometry_util.h | 42 ++++++++++++++++++++++++-- cpp/src/parquet/reader_test.cc | 26 ++++------------ cpp/src/parquet/test_util.h | 43 +++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 44 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index a3de31c6c10b0..1a0398bf6d811 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1737,23 +1737,13 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { def_levels_.resize(num_values); values_.resize(num_values); - uint32_t point_wkb_size = 21; - buffer_.resize(num_values * point_wkb_size); + buffer_.resize(num_values * WKB_POINT_SIZE); uint8_t* ptr = buffer_.data(); for (int k = 0; k < num_values; k++) { - // Point with coordinates (k, k + 1), encoded as WKB - ptr[0] = 0x01; // 1: little endian - uint32_t geom_type = 1; // 1: POINT (2D) - memcpy(&ptr[1], &geom_type, 4); - double x = k; - double y = k + 1; - memcpy(&ptr[5], &x, 8); - memcpy(&ptr[13], &y, 8); - - // Set this WKB value to values_[k] - values_[k].len = point_wkb_size; + GenerateWKBPoint(ptr, k, k + 1); + values_[k].len = WKB_POINT_SIZE; values_[k].ptr = ptr; - ptr += point_wkb_size; + ptr += WKB_POINT_SIZE; } values_ptr_ = values_.data(); @@ -1778,15 +1768,9 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { this->ReadColumn(); for (size_t i = 0; i < num_values; i++) { const ByteArray& value = this->values_out_[i]; - EXPECT_EQ(21, value.len); - EXPECT_EQ(1, value.ptr[0]); - uint32_t geom_type = 0; double x = 0; double y = 0; - memcpy(&geom_type, &value.ptr[1], 4); - memcpy(&x, &value.ptr[5], 8); - memcpy(&y, &value.ptr[13], 8); - EXPECT_EQ(1, geom_type); + EXPECT_TRUE(GetWKBPointCoordinate(value, &x, &y)); EXPECT_DOUBLE_EQ(i, x); EXPECT_DOUBLE_EQ(i + 1, y); } diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 4ba77c822d7f6..1ba4b909d3ae2 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -153,6 +153,42 @@ struct GeometryType { } } + static uint32_t ToWKB(geometry_type geometry_type, bool has_z, bool has_m) { + uint32_t wkb_geom_type = 0; + switch (geometry_type) { + case POINT: + wkb_geom_type = 1; + break; + case LINESTRING: + wkb_geom_type = 2; + break; + case POLYGON: + wkb_geom_type = 3; + break; + case MULTIPOINT: + wkb_geom_type = 4; + break; + case MULTILINESTRING: + wkb_geom_type = 5; + break; + case MULTIPOLYGON: + wkb_geom_type = 6; + break; + case GEOMETRYCOLLECTION: + wkb_geom_type = 7; + break; + default: + throw ParquetException("Invalid geometry_type: ", geometry_type); + } + if (has_z) { + wkb_geom_type += 1000; + } + if (has_m) { + wkb_geom_type += 2000; + } + return wkb_geom_type; + } + static std::string ToString(geometry_type geometry_type) { switch (geometry_type) { case POINT: @@ -177,6 +213,8 @@ struct GeometryType { class WKBBuffer { public: + enum Endianness { WKB_BIG_ENDIAN = 0, WKB_LITTLE_ENDIAN = 1 }; + WKBBuffer() : data_(NULLPTR), size_(0) {} WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} @@ -546,9 +584,9 @@ class WKBGeometryBounder { void ReadGeometry(WKBBuffer* src, bool record_wkb_type = true) { uint8_t endian = src->ReadUInt8(); #if defined(ARROW_LITTLE_ENDIAN) - bool swap = endian != 0x01; + bool swap = endian != WKBBuffer::WKB_LITTLE_ENDIAN; #else - bool swap = endian != 0x00; + bool swap = endian != WKBBuffer::WKB_BIG_ENDIAN; #endif uint32_t wkb_geometry_type = src->ReadUInt32(swap); diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index f99d2437b2843..4084b38e4116b 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1842,24 +1842,14 @@ TEST(TestFileReader, GeometryLogicalType) { // write WKB points to columns auto* writer = static_cast(rg_writer->NextColumn()); - uint32_t point_wkb_size = 21; - std::vector buffer(point_wkb_size * num_rows); + std::vector buffer(test::WKB_POINT_SIZE * num_rows); uint8_t* ptr = buffer.data(); std::vector values(num_rows); for (int k = 0; k < num_rows; k++) { - // Point with coordinates (k, k + 1), encoded as WKB - ptr[0] = 0x01; // 1: little endian - uint32_t geom_type = 1; // 1: POINT (2D) - memcpy(&ptr[1], &geom_type, 4); - double x = k; - double y = k + 1; - memcpy(&ptr[5], &x, 8); - memcpy(&ptr[13], &y, 8); - - // Set this WKB value to values_[k] - values[k].len = point_wkb_size; + test::GenerateWKBPoint(ptr, k, k + 1); + values[k].len = test::WKB_POINT_SIZE; values[k].ptr = ptr; - ptr += point_wkb_size; + ptr += test::WKB_POINT_SIZE; } writer->WriteBatch(num_rows, nullptr, nullptr, values.data()); @@ -1911,15 +1901,9 @@ TEST(TestFileReader, GeometryLogicalType) { // Check the batch for (int64_t i = 0; i < values_read; i++) { const ByteArray& value = out[i]; - EXPECT_EQ(21, value.len); - EXPECT_EQ(1, value.ptr[0]); - uint32_t geom_type = 0; double x = 0; double y = 0; - memcpy(&geom_type, &value.ptr[1], 4); - memcpy(&x, &value.ptr[5], 8); - memcpy(&y, &value.ptr[13], 8); - EXPECT_EQ(1, geom_type); + EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); EXPECT_DOUBLE_EQ(i + total_values_read, x); EXPECT_DOUBLE_EQ(i + 1 + total_values_read, y); } diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index d22cea7b431ad..1f91fde26706d 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -39,6 +39,7 @@ #include "parquet/column_reader.h" #include "parquet/column_writer.h" #include "parquet/encoding.h" +#include "parquet/geometry_util.h" #include "parquet/platform.h" // https://github.com/google/googletest/pull/2904 might not be available @@ -832,5 +833,47 @@ inline void GenerateData(int num_values, FLBA* out, std::vector* random_fixed_byte_array(num_values, 0, heap->data(), kGenerateDataFLBALength, out); } +// ---------------------------------------------------------------------- +// Test utility functions for geometry + +static constexpr int WKB_NATIVE_ENDIANNESS = +#if defined(ARROW_LITTLE_ENDIAN) + geometry::WKBBuffer::WKB_LITTLE_ENDIAN +#else + geometry::WKBBuffer::WKB_BIG_ENDIAN +#endif + ; + +static constexpr int WKB_POINT_SIZE = 21; // 1:endianness + 4:type + 8:x + 8:y + +inline int GenerateWKBPoint(uint8_t* ptr, double x, double y) { + ptr[0] = WKB_NATIVE_ENDIANNESS; + uint32_t geom_type = + geometry::GeometryType::ToWKB(geometry::GeometryType::POINT, false, false); + memcpy(&ptr[1], &geom_type, 4); + memcpy(&ptr[5], &x, 8); + memcpy(&ptr[13], &y, 8); + return WKB_POINT_SIZE; +} + +inline bool GetWKBPointCoordinate(const ByteArray& value, double* out_x, double* out_y) { + if (value.len != WKB_POINT_SIZE) { + return false; + } + if (value.ptr[0] != WKB_NATIVE_ENDIANNESS) { + return false; + } + uint32_t expected_geom_type = + geometry::GeometryType::ToWKB(geometry::GeometryType::POINT, false, false); + uint32_t geom_type = 0; + memcpy(&geom_type, &value.ptr[1], 4); + if (geom_type != expected_geom_type) { + return false; + } + memcpy(out_x, &value.ptr[5], 8); + memcpy(out_y, &value.ptr[13], 8); + return true; +} + } // namespace test } // namespace parquet From c40c04e269f0e0da80aad19986ab5a1921cefa02 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Fri, 6 Sep 2024 10:08:32 +0800 Subject: [PATCH 38/61] Try to make it build properly on other platforms --- cpp/src/parquet/geometry_util.h | 70 +++++++++++++++++---------------- cpp/src/parquet/test_util.h | 6 +-- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util.h index 1ba4b909d3ae2..024a131298c71 100644 --- a/cpp/src/parquet/geometry_util.h +++ b/cpp/src/parquet/geometry_util.h @@ -53,40 +53,7 @@ struct Dimensions { template constexpr static uint32_t size(); - template <> - constexpr uint32_t size() { - return 2; - } - - template <> - constexpr uint32_t size() { - return 3; - } - - template <> - constexpr uint32_t size() { - return 3; - } - - template <> - constexpr uint32_t size() { - return 4; - } - - static uint32_t size(dimensions dims) { - switch (dims) { - case XY: - return size(); - case XYZ: - return size(); - case XYM: - return size(); - case XYZM: - return size(); - default: - return 0; - } - } + static uint32_t size(dimensions dims); // Where to look in a coordinate with this dimension // for the X, Y, Z, and M dimensions, respectively. @@ -121,6 +88,41 @@ struct Dimensions { } }; +template <> +constexpr uint32_t Dimensions::size() { + return 2; +} + +template <> +constexpr uint32_t Dimensions::size() { + return 3; +} + +template <> +constexpr uint32_t Dimensions::size() { + return 3; +} + +template <> +constexpr uint32_t Dimensions::size() { + return 4; +} + +inline uint32_t Dimensions::size(dimensions dims) { + switch (dims) { + case XY: + return size(); + case XYZ: + return size(); + case XYM: + return size(); + case XYZM: + return size(); + default: + return 0; + } +} + struct GeometryType { enum geometry_type { POINT = 1, diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index 1f91fde26706d..08319f524e14a 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -836,13 +836,11 @@ inline void GenerateData(int num_values, FLBA* out, std::vector* // ---------------------------------------------------------------------- // Test utility functions for geometry -static constexpr int WKB_NATIVE_ENDIANNESS = #if defined(ARROW_LITTLE_ENDIAN) - geometry::WKBBuffer::WKB_LITTLE_ENDIAN +static constexpr int WKB_NATIVE_ENDIANNESS = geometry::WKBBuffer::WKB_LITTLE_ENDIAN; #else - geometry::WKBBuffer::WKB_BIG_ENDIAN +static constexpr int WKB_NATIVE_ENDIANNESS = geometry::WKBBuffer::WKB_BIG_ENDIAN; #endif - ; static constexpr int WKB_POINT_SIZE = 21; // 1:endianness + 4:type + 8:x + 8:y From fb134d3af036ee5e240c7475477049c28bd6951c Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Fri, 6 Sep 2024 11:18:10 +0800 Subject: [PATCH 39/61] Address review comments in https://github.com/apache/arrow/pull/43196 --- cpp/src/parquet/CMakeLists.txt | 2 +- cpp/src/parquet/column_writer_test.cc | 4 +-- ...ometry_util.h => geometry_util_internal.h} | 0 ...test.cc => geometry_util_internal_test.cc} | 2 +- cpp/src/parquet/statistics.cc | 34 ++++++++++++++++++- cpp/src/parquet/statistics.h | 4 +++ cpp/src/parquet/test_util.h | 2 +- cpp/src/parquet/types.cc | 8 +++++ 8 files changed, 50 insertions(+), 6 deletions(-) rename cpp/src/parquet/{geometry_util.h => geometry_util_internal.h} (100%) rename cpp/src/parquet/{geometry_util_test.cc => geometry_util_internal_test.cc} (99%) diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 6a00394659fb3..a6e92eabcebfd 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -379,7 +379,7 @@ add_parquet_test(internals-test public_api_test.cc types_test.cc) -add_parquet_test(geometry-test SOURCES geometry_util_test.cc) +add_parquet_test(geometry-test SOURCES geometry_util_internal_test.cc) set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index a976e9776ce09..225bcf4c65e88 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -398,7 +398,7 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { ColumnChunkMetaData::Make(metadata_->contents(), this->descr_); return metadata_accessor->key_value_metadata(); } - + EncodedStatistics metadata_encoded_stats() { ApplicationVersion app_version(this->writer_properties_->created_by()); auto metadata_accessor = ColumnChunkMetaData::Make( @@ -1780,7 +1780,7 @@ TEST_F(TestInt32Writer, WriteKeyValueMetadataEndToEnd) { ASSERT_OK_AND_ASSIGN(auto value, key_value_metadata->Get("foo")); ASSERT_EQ("bar", value); } - + // Test writing and reading geometry columns class TestGeometryValuesWriter : public TestPrimitiveWriter { public: diff --git a/cpp/src/parquet/geometry_util.h b/cpp/src/parquet/geometry_util_internal.h similarity index 100% rename from cpp/src/parquet/geometry_util.h rename to cpp/src/parquet/geometry_util_internal.h diff --git a/cpp/src/parquet/geometry_util_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc similarity index 99% rename from cpp/src/parquet/geometry_util_test.cc rename to cpp/src/parquet/geometry_util_internal_test.cc index b92c9b6a8d5af..66d7639d6f852 100644 --- a/cpp/src/parquet/geometry_util_test.cc +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -20,7 +20,7 @@ #include "arrow/testing/gtest_compat.h" -#include "parquet/geometry_util.h" +#include "parquet/geometry_util_internal.h" namespace parquet::geometry { diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index af40838e8dba4..87dc56aa693ec 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -37,7 +37,7 @@ #include "arrow/visit_data_inline.h" #include "parquet/encoding.h" #include "parquet/exception.h" -#include "parquet/geometry_util.h" +#include "parquet/geometry_util_internal.h" #include "parquet/platform.h" #include "parquet/schema.h" #include "parquet/types.h" @@ -109,6 +109,28 @@ class GeometryStatisticsImpl { } } + void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, int64_t num_spaced_values, + int64_t num_values, int64_t null_count) { + DCHECK_GT(num_spaced_values, 0); + + geometry::WKBBuffer buf; + try { + ::arrow::internal::VisitSetBitRunsVoid( + valid_bits, valid_bits_offset, num_spaced_values, + [&](int64_t position, int64_t length) { + for (int64_t i = 0; i < num_spaced_values; i++) { + ByteArray item = SafeLoad(values + i + position); + buf.Init(item.ptr, item.len); + bounder_.ReadGeometry(&buf); + } + }); + bounder_.Flush(); + } catch (ParquetException& e) { + is_valid_ = false; + } + } + EncodedGeometryStatistics Encode() const { const double* mins = bounder_.Bounds().min; const double* maxes = bounder_.Bounds().max; @@ -1093,6 +1115,16 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va if (num_values == 0) return; SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, valid_bits_offset)); + + if constexpr (std::is_same::value) { + if (logical_type_ == LogicalType::Type::GEOMETRY) { + if (geometry_statistics_ == nullptr) { + geometry_statistics_ = std::make_unique(); + } + geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, + num_spaced_values, num_values, null_count); + } + } } template diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 4e785d6a15f6d..072970e913344 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -153,6 +153,10 @@ class PARQUET_EXPORT GeometryStatistics { void Update(const ByteArray* values, int64_t num_values, int64_t null_count); + void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, int64_t num_spaced_values, + int64_t num_values, int64_t null_count); + EncodedGeometryStatistics Encode(); bool is_valid() const; diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index 08319f524e14a..2d38b27fe015e 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -39,7 +39,7 @@ #include "parquet/column_reader.h" #include "parquet/column_writer.h" #include "parquet/encoding.h" -#include "parquet/geometry_util.h" +#include "parquet/geometry_util_internal.h" #include "parquet/platform.h" // https://github.com/google/googletest/pull/2904 might not be available diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index db8400a80c0d8..5dd3d79eda060 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -474,12 +474,20 @@ std::shared_ptr LogicalType::FromThrift( edges = LogicalType::GeometryEdges::PLANAR; } else if (type.GEOMETRY.edges == format::Edges::SPHERICAL) { edges = LogicalType::GeometryEdges::SPHERICAL; + } else { + std::stringstream ss; + ss << "Unknown value for geometry edges: " << type.GEOMETRY.edges; + throw ParquetException(ss.str()); } LogicalType::GeometryEncoding::geometry_encoding encoding = LogicalType::GeometryEncoding::UNKNOWN; if (type.GEOMETRY.encoding == format::GeometryEncoding::WKB) { encoding = LogicalType::GeometryEncoding::WKB; + } else { + std::stringstream ss; + ss << "Unknown value for geometry encoding: " << type.GEOMETRY.edges; + throw ParquetException(ss.str()); } std::string metadata; From 2f4329e21d07ca159a499e6f8fe825e0c507fba4 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Fri, 6 Sep 2024 12:06:26 +0800 Subject: [PATCH 40/61] Resolve compile errors for MSVC --- cpp/src/parquet/column_writer_test.cc | 6 ++++-- cpp/src/parquet/reader_test.cc | 6 ++++-- cpp/src/parquet/statistics.cc | 16 +++++++++++++--- cpp/src/parquet/statistics.h | 2 ++ 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 225bcf4c65e88..38f7eb67c54a0 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1840,8 +1840,10 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { double x = 0; double y = 0; EXPECT_TRUE(GetWKBPointCoordinate(value, &x, &y)); - EXPECT_DOUBLE_EQ(i, x); - EXPECT_DOUBLE_EQ(i + 1, y); + double expected_x = i; + double expected_y = i + 1; + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); } auto encoded_statistics = metadata_encoded_stats(); diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 4084b38e4116b..dd1738f890b79 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1904,8 +1904,10 @@ TEST(TestFileReader, GeometryLogicalType) { double x = 0; double y = 0; EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); - EXPECT_DOUBLE_EQ(i + total_values_read, x); - EXPECT_DOUBLE_EQ(i + 1 + total_values_read, y); + double expected_x = i + total_values_read; + double expected_y = i + 1 + total_values_read; + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); } total_values_read += values_read; diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 87dc56aa693ec..e83d2ae37bbad 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -104,7 +104,7 @@ class GeometryStatisticsImpl { } bounder_.Flush(); - } catch (ParquetException& e) { + } catch (ParquetException&) { is_valid_ = false; } } @@ -126,7 +126,7 @@ class GeometryStatisticsImpl { } }); bounder_.Flush(); - } catch (ParquetException& e) { + } catch (ParquetException&) { is_valid_ = false; } } @@ -183,7 +183,7 @@ class GeometryStatisticsImpl { bounder_.ReadGeometry(&buf, false); } } - } catch (ParquetException& e) { + } catch (ParquetException&) { is_valid_ = false; return; } @@ -203,6 +203,8 @@ GeometryStatistics::GeometryStatistics() { GeometryStatistics::GeometryStatistics(std::unique_ptr impl) : impl_(std::move(impl)) {} +GeometryStatistics::~GeometryStatistics() = default; + bool GeometryStatistics::Equals(const GeometryStatistics& other) const { return impl_->Equals(*other.impl_); } @@ -216,6 +218,14 @@ void GeometryStatistics::Update(const ByteArray* values, int64_t num_values, impl_->Update(values, num_values, null_count); } +void GeometryStatistics::UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, + int64_t num_spaced_values, int64_t num_values, + int64_t null_count) { + impl_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, + num_values, null_count); +} + bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } EncodedGeometryStatistics GeometryStatistics::Encode() { return impl_->Encode(); } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 072970e913344..eaa3cacc5d57b 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -147,6 +147,8 @@ class PARQUET_EXPORT GeometryStatistics { GeometryStatistics(); explicit GeometryStatistics(std::unique_ptr impl); + ~GeometryStatistics(); + bool Equals(const GeometryStatistics& other) const; void Merge(const GeometryStatistics& other); From 1db855f0a648dcca5174fe7b1861ae99661b6740 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 10 Sep 2024 19:09:07 +0800 Subject: [PATCH 41/61] Expose getters in GeometryStatistics, Change geometry_types from std::vector to std::vector, several other minor fixes. --- cpp/src/parquet/column_writer_test.cc | 29 ++++---- cpp/src/parquet/geometry_util_internal.h | 16 ++-- .../parquet/geometry_util_internal_test.cc | 6 +- cpp/src/parquet/metadata.cc | 3 +- cpp/src/parquet/reader_test.cc | 4 +- cpp/src/parquet/statistics.cc | 74 +++++++++++++++++-- cpp/src/parquet/statistics.h | 28 +++++-- cpp/src/parquet/thrift_internal.h | 4 +- 8 files changed, 122 insertions(+), 42 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 38f7eb67c54a0..37599cf4f4c59 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -399,11 +399,11 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { return metadata_accessor->key_value_metadata(); } - EncodedStatistics metadata_encoded_stats() { + std::shared_ptr metadata_stats() { ApplicationVersion app_version(this->writer_properties_->created_by()); auto metadata_accessor = ColumnChunkMetaData::Make( metadata_->contents(), this->descr_, default_reader_properties(), &app_version); - return metadata_accessor->statistics()->Encode(); + return metadata_accessor->statistics(); } protected: @@ -1840,21 +1840,24 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { double x = 0; double y = 0; EXPECT_TRUE(GetWKBPointCoordinate(value, &x, &y)); - double expected_x = i; - double expected_y = i + 1; + auto expected_x = static_cast(i); + auto expected_y = static_cast(i + 1); EXPECT_DOUBLE_EQ(expected_x, x); EXPECT_DOUBLE_EQ(expected_y, y); } - auto encoded_statistics = metadata_encoded_stats(); - EXPECT_TRUE(encoded_statistics.has_geometry_statistics); - auto geometry_statistics = encoded_statistics.geometry_statistics(); - EXPECT_EQ(1, geometry_statistics.geometry_types.size()); - EXPECT_EQ(1, geometry_statistics.geometry_types[0]); - EXPECT_DOUBLE_EQ(0, geometry_statistics.xmin); - EXPECT_DOUBLE_EQ(1, geometry_statistics.ymin); - EXPECT_DOUBLE_EQ(99, geometry_statistics.xmax); - EXPECT_DOUBLE_EQ(100, geometry_statistics.ymax); + std::shared_ptr statistics = metadata_stats(); + EXPECT_TRUE(statistics->HasGeometryStatistics()); + const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); + std::vector geometry_types = geometry_statistics->GetGeometryTypes(); + EXPECT_EQ(1, geometry_types.size()); + EXPECT_EQ(1, geometry_types[0]); + EXPECT_DOUBLE_EQ(0, geometry_statistics->GetXMin()); + EXPECT_DOUBLE_EQ(1, geometry_statistics->GetYMin()); + EXPECT_DOUBLE_EQ(99, geometry_statistics->GetXMax()); + EXPECT_DOUBLE_EQ(100, geometry_statistics->GetYMax()); + EXPECT_FALSE(geometry_statistics->HasZ()); + EXPECT_FALSE(geometry_statistics->HasM()); } }; diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index 024a131298c71..0ad02b073afec 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -597,7 +597,9 @@ class WKBGeometryBounder { // Keep track of geometry types encountered if at the top level if (record_wkb_type) { - wkb_types_.insert(wkb_geometry_type); + GeometryType::geometry_type geometry_type = + GeometryType::FromWKB(wkb_geometry_type); + geometry_types_.insert(geometry_type); } switch (geometry_type) { @@ -629,14 +631,14 @@ class WKBGeometryBounder { void ReadBox(const BoundingBox& box) { box_.Merge(box); } - void ReadGeometryTypes(const std::vector& geometry_types) { - wkb_types_.insert(geometry_types.begin(), geometry_types.end()); + void ReadGeometryTypes(const std::vector& geometry_types) { + geometry_types_.insert(geometry_types.begin(), geometry_types.end()); } const BoundingBox& Bounds() const { return box_; } - std::vector WkbTypes() const { - std::vector out(wkb_types_.begin(), wkb_types_.end()); + std::vector GeometryTypes() const { + std::vector out(geometry_types_.begin(), geometry_types_.end()); std::sort(out.begin(), out.end()); return out; } @@ -646,13 +648,13 @@ class WKBGeometryBounder { void Reset() { box_.Reset(); bounder_.Reset(); - wkb_types_.clear(); + geometry_types_.clear(); } private: BoundingBox box_; WKBGenericSequenceBounder bounder_; - std::unordered_set wkb_types_; + std::unordered_set geometry_types_; }; } // namespace parquet::geometry diff --git a/cpp/src/parquet/geometry_util_internal_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc index 66d7639d6f852..4d4c907f37e80 100644 --- a/cpp/src/parquet/geometry_util_internal_test.cc +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -142,12 +142,12 @@ TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { bounder.Flush(); EXPECT_EQ(bounder.Bounds(), item.box); - uint32_t wkb_type = item.dimensions * 1000 + item.geometry_type; - EXPECT_THAT(bounder.WkbTypes(), ::testing::ElementsAre(::testing::Eq(wkb_type))); + EXPECT_THAT(bounder.GeometryTypes(), + ::testing::ElementsAre(::testing::Eq(item.geometry_type))); bounder.Reset(); EXPECT_EQ(bounder.Bounds(), BoundingBox()); - EXPECT_TRUE(bounder.WkbTypes().empty()); + EXPECT_TRUE(bounder.GeometryTypes().empty()); } INSTANTIATE_TEST_SUITE_P( diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 7c69b97e2d49d..b58a6b0e6f50e 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -94,8 +94,7 @@ static EncodedGeometryStatistics MakeEncodedGeometryStatistics( if (stats.__isset.geometry_stats) { const format::GeometryStatistics& geom_stats = stats.geometry_stats; - out.geometry_types = std::vector(geom_stats.geometry_types.begin(), - geom_stats.geometry_types.end()); + out.geometry_types = geom_stats.geometry_types; out.xmin = geom_stats.bbox.xmin; out.xmax = geom_stats.bbox.xmax; diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index dd1738f890b79..b28fec1b717ba 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1904,8 +1904,8 @@ TEST(TestFileReader, GeometryLogicalType) { double x = 0; double y = 0; EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); - double expected_x = i + total_values_read; - double expected_y = i + 1 + total_values_read; + auto expected_x = static_cast(i + total_values_read); + auto expected_y = static_cast(i + 1 + total_values_read); EXPECT_DOUBLE_EQ(expected_x, x); EXPECT_DOUBLE_EQ(expected_y, y); } diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index e83d2ae37bbad..a92974b308026 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -65,14 +65,14 @@ class GeometryStatisticsImpl { return true; } - auto wkb_types = bounder_.WkbTypes(); - auto other_wkb_types = other.bounder_.WkbTypes(); - if (wkb_types.size() != other_wkb_types.size()) { + auto geometry_types = bounder_.GeometryTypes(); + auto other_geometry_types = other.bounder_.GeometryTypes(); + if (geometry_types.size() != other_geometry_types.size()) { return false; } - for (size_t i = 0; i < wkb_types.size(); i++) { - if (wkb_types[i] != other_wkb_types[i]) { + for (size_t i = 0; i < geometry_types.size(); i++) { + if (geometry_types[i] != other_geometry_types[i]) { return false; } } @@ -87,7 +87,7 @@ class GeometryStatisticsImpl { } bounder_.ReadBox(other.bounder_.Bounds()); - bounder_.ReadGeometryTypes(other.bounder_.WkbTypes()); + bounder_.ReadGeometryTypes(other.bounder_.GeometryTypes()); } void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { @@ -136,7 +136,7 @@ class GeometryStatisticsImpl { const double* maxes = bounder_.Bounds().max; EncodedGeometryStatistics out; - out.geometry_types = bounder_.WkbTypes(); + out.geometry_types = bounder_.GeometryTypes(); out.xmin = mins[0]; out.xmax = maxes[0]; @@ -191,6 +191,12 @@ class GeometryStatisticsImpl { bool is_valid() const { return is_valid_; } + const double* GetMinBounds() { return bounder_.Bounds().min; } + + const double* GetMaxBounds() { return bounder_.Bounds().max; } + + std::vector GetGeometryTypes() const { return bounder_.GeometryTypes(); } + private: geometry::WKBGeometryBounder bounder_; bool is_valid_ = true; @@ -240,6 +246,60 @@ std::shared_ptr GeometryStatistics::clone() const { return std::make_shared(std::move(impl)); } +double GeometryStatistics::GetXMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[0]; +} + +double GeometryStatistics::GetXMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[0]; +} + +double GeometryStatistics::GetYMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[1]; +} + +double GeometryStatistics::GetYMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[1]; +} + +double GeometryStatistics::GetZMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[2]; +} + +double GeometryStatistics::GetZMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[2]; +} + +double GeometryStatistics::GetMMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[3]; +} + +double GeometryStatistics::GetMMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[3]; +} + +bool GeometryStatistics::HasZ() const { return (GetZMax() - GetZMin()) > 0; } + +bool GeometryStatistics::HasM() const { return (GetMMax() - GetMMin()) > 0; } + +std::vector GeometryStatistics::GetGeometryTypes() const { + return impl_->GetGeometryTypes(); +} + +std::vector> GeometryStatistics::GetCoverings() + const { + // TODO (kontinuation): support coverings + return {}; +} + namespace { // ---------------------------------------------------------------------- diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index eaa3cacc5d57b..2a395da97a9d0 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -133,11 +133,11 @@ class PARQUET_EXPORT EncodedGeometryStatistics { double mmin{kInf}; double mmax{-kInf}; std::vector> coverings; - std::vector geometry_types; + std::vector geometry_types; - bool has_z() const { return (zmax - zmin) > 0; } + bool has_z() const { return (zmax - zmin) >= 0; } - bool has_m() const { return (mmax - mmin) > 0; } + bool has_m() const { return (mmax - mmin) >= 0; } }; class GeometryStatisticsImpl; @@ -167,6 +167,21 @@ class PARQUET_EXPORT GeometryStatistics { void Decode(const EncodedGeometryStatistics& encoded); + double GetXMin() const; + double GetXMax() const; + double GetYMin() const; + double GetYMax() const; + double GetZMin() const; + double GetZMax() const; + double GetMMin() const; + double GetMMax() const; + + bool HasZ() const; + bool HasM() const; + + std::vector GetGeometryTypes() const; + std::vector> GetCoverings() const; + private: std::unique_ptr impl_; }; @@ -178,7 +193,6 @@ class PARQUET_EXPORT GeometryStatistics { class PARQUET_EXPORT EncodedStatistics { std::string max_, min_; bool is_signed_ = false; - EncodedGeometryStatistics geometry_statistics_; public: EncodedStatistics() = default; @@ -196,7 +210,6 @@ class PARQUET_EXPORT EncodedStatistics { bool has_max = false; bool has_null_count = false; bool has_distinct_count = false; - bool has_geometry_statistics = false; // When all values in the statistics are null, it is set to true. // Otherwise, at least one value is not null, or we are not sure at all. @@ -204,6 +217,11 @@ class PARQUET_EXPORT EncodedStatistics { // is a null page or not. bool all_null_value = false; + // Statistics for geometry column. geometry_statistics_ is only valid when + // has_geometry_statistics is true. + EncodedGeometryStatistics geometry_statistics_; + bool has_geometry_statistics = false; + // From parquet-mr // Don't write stats larger than the max size rather than truncating. The // rationale is that some engines may use the minimum value in the page as diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 7b9e37aa1ea63..92aba0ef30bdf 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -351,9 +351,7 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { if (stats.has_geometry_statistics) { const EncodedGeometryStatistics& encoded_geometry_stats = stats.geometry_statistics(); format::GeometryStatistics geometry_statistics; - std::vector geometry_types(encoded_geometry_stats.geometry_types.begin(), - encoded_geometry_stats.geometry_types.end()); - geometry_statistics.__set_geometry_types(geometry_types); + geometry_statistics.__set_geometry_types(encoded_geometry_stats.geometry_types); format::BoundingBox bbox; bbox.__set_xmin(encoded_geometry_stats.xmin); bbox.__set_xmax(encoded_geometry_stats.xmax); From ad92bb6d113121565719fc12b9955bdf6775758c Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 11 Sep 2024 15:59:10 +0800 Subject: [PATCH 42/61] Add test case for UpdateSpaced, don't generate min/max stats for geometry columns. --- cpp/src/parquet/column_writer_test.cc | 84 ++++++++++++++++++++++++--- cpp/src/parquet/reader_test.cc | 23 ++++---- cpp/src/parquet/statistics.cc | 54 +++++++++++++++-- cpp/src/parquet/statistics.h | 2 + 4 files changed, 141 insertions(+), 22 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 37599cf4f4c59..ee0cc30e97dc8 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1803,7 +1803,6 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } void GenerateData(int64_t num_values, uint32_t seed = 0) { - def_levels_.resize(num_values); values_.resize(num_values); buffer_.resize(num_values * WKB_POINT_SIZE); @@ -1816,8 +1815,6 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } values_ptr_ = values_.data(); - - std::fill(def_levels_.begin(), def_levels_.end(), 1); } void TestWriteAndRead(ParquetVersion::type version, @@ -1828,10 +1825,7 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { auto writer = this->BuildWriter(num_values, ColumnProperties(), version, data_page_version, /*enable_checksum*/ false); - std::vector definition_levels(num_values, 0); - std::vector repetition_levels(num_values, 0); - writer->WriteBatch(this->values_.size(), definition_levels.data(), - repetition_levels.data(), this->values_.data()); + writer->WriteBatch(this->values_.size(), nullptr, nullptr, this->values_.data()); writer->Close(); this->ReadColumn(); @@ -1847,6 +1841,7 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } std::shared_ptr statistics = metadata_stats(); + EXPECT_FALSE(statistics->HasMinMax()); EXPECT_TRUE(statistics->HasGeometryStatistics()); const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); std::vector geometry_types = geometry_statistics->GetGeometryTypes(); @@ -1859,6 +1854,67 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_FALSE(geometry_statistics->HasZ()); EXPECT_FALSE(geometry_statistics->HasM()); } + + void TestWriteAndReadSpaced(ParquetVersion::type version, + ParquetDataPageVersion data_page_version) { + this->SetUpSchema(Repetition::OPTIONAL, 1); + this->GenerateData(SMALL_SIZE); + size_t num_values = this->values_.size(); + + std::vector definition_levels(num_values, 1); + std::vector repetition_levels(num_values, 0); + std::vector non_null_indices; + + // Replace some of the generated data with NULL + for (size_t i = 0; i < num_values; i++) { + if (i % 3 == 0) { + definition_levels[i] = 0; + } else { + non_null_indices.push_back(i); + } + } + + // Construct valid bits using definition levels + std::vector valid_bytes = + std::vector(definition_levels.begin(), definition_levels.end()); + std::shared_ptr valid_bits; + ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits(valid_bytes)); + + auto writer = + this->BuildWriter(num_values, ColumnProperties(), version, data_page_version, + /*enable_checksum*/ false); + writer->WriteBatchSpaced(this->values_.size(), definition_levels.data(), + repetition_levels.data(), valid_bits->data(), 0, + this->values_.data()); + + writer->Close(); + this->ReadColumn(); + size_t expected_values_read = non_null_indices.size(); + EXPECT_EQ(expected_values_read, values_read_); + for (int64_t i = 0; i < values_read_; i++) { + const ByteArray& value = this->values_out_[i]; + double x = 0; + double y = 0; + EXPECT_TRUE(GetWKBPointCoordinate(value, &x, &y)); + auto expected_x = static_cast(non_null_indices[i]); + auto expected_y = static_cast(non_null_indices[i] + 1); + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); + } + + std::shared_ptr statistics = metadata_stats(); + EXPECT_TRUE(statistics->HasGeometryStatistics()); + const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); + std::vector geometry_types = geometry_statistics->GetGeometryTypes(); + EXPECT_EQ(1, geometry_types.size()); + EXPECT_EQ(1, geometry_types[0]); + EXPECT_DOUBLE_EQ(1, geometry_statistics->GetXMin()); + EXPECT_DOUBLE_EQ(2, geometry_statistics->GetYMin()); + EXPECT_DOUBLE_EQ(98, geometry_statistics->GetXMax()); + EXPECT_DOUBLE_EQ(99, geometry_statistics->GetYMax()); + EXPECT_FALSE(geometry_statistics->HasZ()); + EXPECT_FALSE(geometry_statistics->HasM()); + } }; const char* TestGeometryValuesWriter::CRS = @@ -1879,5 +1935,19 @@ TEST_F(TestGeometryValuesWriter, TestWriteAndReadV2) { } } +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV1Spaced) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndReadSpaced(ParquetVersion::PARQUET_1_0, data_page_version); + } +} + +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV2Spaced) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndReadSpaced(ParquetVersion::PARQUET_2_4, data_page_version); + } +} + } // namespace test } // namespace parquet diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index b28fec1b717ba..c5af6ae6368eb 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1873,16 +1873,19 @@ TEST(TestFileReader, GeometryLogicalType) { std::unique_ptr row_group_metadata = metadata->RowGroup(i); std::unique_ptr column_chunk_metadata = row_group_metadata->ColumnChunk(0); - EncodedStatistics encoded_statistics = column_chunk_metadata->statistics()->Encode(); - EXPECT_TRUE(encoded_statistics.has_geometry_statistics); - const EncodedGeometryStatistics& geom_stats = - encoded_statistics.geometry_statistics(); - EXPECT_EQ(1, geom_stats.geometry_types.size()); - EXPECT_EQ(1, geom_stats.geometry_types[0]); - EXPECT_GE(geom_stats.xmin, 0); - EXPECT_GT(geom_stats.xmax, geom_stats.xmin); - EXPECT_GT(geom_stats.ymin, 0); - EXPECT_GT(geom_stats.ymax, geom_stats.ymin); + std::shared_ptr statistics = column_chunk_metadata->statistics(); + EXPECT_FALSE(statistics->HasMinMax()); + EXPECT_TRUE(statistics->HasGeometryStatistics()); + const GeometryStatistics* geom_stats = statistics->geometry_statistics(); + std::vector geometry_types = geom_stats->GetGeometryTypes(); + EXPECT_EQ(1, geometry_types.size()); + EXPECT_EQ(1, geometry_types[0]); + EXPECT_GE(geom_stats->GetXMin(), 0); + EXPECT_GT(geom_stats->GetXMax(), geom_stats->GetXMin()); + EXPECT_GT(geom_stats->GetYMin(), 0); + EXPECT_GT(geom_stats->GetYMax(), geom_stats->GetYMin()); + EXPECT_FALSE(geom_stats->HasZ()); + EXPECT_FALSE(geom_stats->HasM()); } // Check the geometry values diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index a92974b308026..02074bfecfa75 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -27,12 +27,14 @@ #include #include "arrow/array.h" +#include "arrow/array/array_binary.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/checked_cast.h" #include "arrow/util/float16.h" #include "arrow/util/logging.h" +#include "arrow/util/macros.h" #include "arrow/util/ubsan.h" #include "arrow/visit_data_inline.h" #include "parquet/encoding.h" @@ -119,7 +121,7 @@ class GeometryStatisticsImpl { ::arrow::internal::VisitSetBitRunsVoid( valid_bits, valid_bits_offset, num_spaced_values, [&](int64_t position, int64_t length) { - for (int64_t i = 0; i < num_spaced_values; i++) { + for (int64_t i = 0; i < length; i++) { ByteArray item = SafeLoad(values + i + position); buf.Init(item.ptr, item.len); bounder_.ReadGeometry(&buf); @@ -131,6 +133,26 @@ class GeometryStatisticsImpl { } } + void Update(const ::arrow::Array& values, bool update_counts) { + ARROW_UNUSED(update_counts); + + const auto& binary_array = static_cast(values); + geometry::WKBBuffer buf; + try { + for (int64_t i = 0; i < binary_array.length(); ++i) { + if (!binary_array.IsNull(i)) { + std::string_view byte_array = binary_array.GetView(i); + buf.Init(reinterpret_cast(byte_array.data()), + byte_array.length()); + bounder_.ReadGeometry(&buf); + bounder_.Flush(); + } + } + } catch (ParquetException&) { + is_valid_ = false; + } + } + EncodedGeometryStatistics Encode() const { const double* mins = bounder_.Bounds().min; const double* maxes = bounder_.Bounds().max; @@ -232,6 +254,10 @@ void GeometryStatistics::UpdateSpaced(const ByteArray* values, const uint8_t* va num_values, null_count); } +void GeometryStatistics::Update(const ::arrow::Array& values, bool update_counts) { + impl_->Update(values, update_counts); +} + bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } EncodedGeometryStatistics GeometryStatistics::Encode() { return impl_->Encode(); } @@ -1002,7 +1028,18 @@ class TypedStatisticsImpl : public TypedStatistics { return; } - SetMinMaxPair(comparator_->GetMinMax(values)); + if constexpr (std::is_same::value) { + if (logical_type_ == LogicalType::Type::GEOMETRY) { + if (geometry_statistics_ == nullptr) { + geometry_statistics_ = std::make_unique(); + } + geometry_statistics_->Update(values, update_counts); + } else { + SetMinMaxPair(comparator_->GetMinMax(values)); + } + } else { + SetMinMaxPair(comparator_->GetMinMax(values)); + } } const T& min() const override { return min_; } @@ -1159,7 +1196,6 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, IncrementNumValues(num_values); if (num_values == 0) return; - SetMinMaxPair(comparator_->GetMinMax(values, num_values)); if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { @@ -1167,7 +1203,11 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, geometry_statistics_ = std::make_unique(); } geometry_statistics_->Update(values, num_values, null_count); + } else { + SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } + } else { + SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } } @@ -1183,8 +1223,6 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va IncrementNumValues(num_values); if (num_values == 0) return; - SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, - valid_bits_offset)); if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { @@ -1193,7 +1231,13 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va } geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, num_values, null_count); + } else { + SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, + valid_bits_offset)); } + } else { + SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, + valid_bits_offset)); } } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 2a395da97a9d0..6e6aea932ce18 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -159,6 +159,8 @@ class PARQUET_EXPORT GeometryStatistics { int64_t valid_bits_offset, int64_t num_spaced_values, int64_t num_values, int64_t null_count); + void Update(const ::arrow::Array& values, bool update_counts); + EncodedGeometryStatistics Encode(); bool is_valid() const; From f782e309b4f80b2eb11f97ec8aa08fb19fb52df8 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 11 Sep 2024 20:20:32 +0800 Subject: [PATCH 43/61] Support covering --- cpp/src/parquet/column_writer_test.cc | 23 +++++++++++++++++-- cpp/src/parquet/geometry_util_internal.h | 29 ++++++++++++++++++++++++ cpp/src/parquet/statistics.cc | 19 ++++++++++++++-- cpp/src/parquet/thrift_internal.h | 10 ++++++++ 4 files changed, 77 insertions(+), 4 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index ee0cc30e97dc8..37c8116cc605d 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -35,6 +35,7 @@ #include "parquet/column_writer.h" #include "parquet/file_reader.h" #include "parquet/file_writer.h" +#include "parquet/geometry_util_internal.h" #include "parquet/metadata.h" #include "parquet/platform.h" #include "parquet/properties.h" @@ -1853,6 +1854,21 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_DOUBLE_EQ(100, geometry_statistics->GetYMax()); EXPECT_FALSE(geometry_statistics->HasZ()); EXPECT_FALSE(geometry_statistics->HasM()); + + auto coverings = geometry_statistics->GetCoverings(); + EXPECT_EQ(1, coverings.size()); + EXPECT_EQ("WKB", coverings[0].first); + geometry::WKBGeometryBounder bounder; + const std::string& wkb = coverings[0].second; + geometry::WKBBuffer wkb_buffer(reinterpret_cast(wkb.data()), + wkb.size()); + bounder.ReadGeometry(&wkb_buffer); + bounder.Flush(); + auto bounds = bounder.Bounds(); + EXPECT_DOUBLE_EQ(0, bounds.min[0]); + EXPECT_DOUBLE_EQ(1, bounds.min[1]); + EXPECT_DOUBLE_EQ(99, bounds.max[0]); + EXPECT_DOUBLE_EQ(100, bounds.max[1]); } void TestWriteAndReadSpaced(ParquetVersion::type version, @@ -1875,8 +1891,10 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } // Construct valid bits using definition levels - std::vector valid_bytes = - std::vector(definition_levels.begin(), definition_levels.end()); + std::vector valid_bytes(num_values); + std::transform(definition_levels.begin(), definition_levels.end(), + valid_bytes.begin(), + [&](int64_t level) { return static_cast(level); }); std::shared_ptr valid_bits; ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits(valid_bytes)); @@ -1903,6 +1921,7 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } std::shared_ptr statistics = metadata_stats(); + EXPECT_FALSE(statistics->HasMinMax()); EXPECT_TRUE(statistics->HasGeometryStatistics()); const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); std::vector geometry_types = geometry_statistics->GetGeometryTypes(); diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index 0ad02b073afec..28ca4849e779b 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -657,4 +657,33 @@ class WKBGeometryBounder { std::unordered_set geometry_types_; }; +inline std::string MakeCoveringWKBFromBound(double xmin, double xmax, double ymin, + double ymax) { + std::string wkb_data(93, 0); + + // endianness and header + auto data = reinterpret_cast(wkb_data.data()); + data[0] = ARROW_LITTLE_ENDIAN; + uint32_t wkb_type = 3; // POLYGON + memcpy(&data[1], &wkb_type, 4); + + // n_rings and n_coords + uint32_t n_rings = 1; + uint32_t n_coords = 5; + memcpy(&data[5], &n_rings, 4); + memcpy(&data[9], &n_coords, 4); + + // coordinates + double coords[5][2] = { + {xmin, ymin}, {xmax, ymin}, {xmax, ymax}, {xmin, ymax}, {xmin, ymin}}; + uint8_t* ptr = &data[13]; + for (auto coord : coords) { + memcpy(ptr, &coord[0], 8); + memcpy(ptr + 8, &coord[1], 8); + ptr += 16; + } + + return wkb_data; +} + } // namespace parquet::geometry diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 02074bfecfa75..f2fbdf456d410 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -169,6 +169,16 @@ class GeometryStatisticsImpl { out.mmin = mins[3]; out.mmax = maxes[3]; + if (coverings_.empty()) { + // Generate coverings from bounding box if coverings is not present + std::string kind = "WKB"; + std::string value = + geometry::MakeCoveringWKBFromBound(out.xmin, out.xmax, out.ymin, out.ymax); + out.coverings.emplace_back(kind, value); + } else { + out.coverings = coverings_; + } + return out; } @@ -195,6 +205,7 @@ class GeometryStatisticsImpl { bounder_.ReadBox(box); bounder_.ReadGeometryTypes(encoded.geometry_types); + coverings_ = encoded.coverings; try { for (const auto& covering : encoded.coverings) { @@ -219,8 +230,13 @@ class GeometryStatisticsImpl { std::vector GetGeometryTypes() const { return bounder_.GeometryTypes(); } + std::vector> GetCoverings() const { + return coverings_; + } + private: geometry::WKBGeometryBounder bounder_; + std::vector> coverings_; bool is_valid_ = true; }; @@ -322,8 +338,7 @@ std::vector GeometryStatistics::GetGeometryTypes() const { std::vector> GeometryStatistics::GetCoverings() const { - // TODO (kontinuation): support coverings - return {}; + return impl_->GetCoverings(); } namespace { diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 92aba0ef30bdf..33d43221efef5 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -366,6 +366,16 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { bbox.__set_mmax(encoded_geometry_stats.mmax); } geometry_statistics.__set_bbox(bbox); + + std::vector coverings; + coverings.reserve(encoded_geometry_stats.coverings.size()); + for (const auto& pair : encoded_geometry_stats.coverings) { + format::Covering covering; + covering.__set_kind(pair.first); + covering.__set_value(pair.second); + coverings.push_back(std::move(covering)); + } + geometry_statistics.__set_coverings(coverings); statistics.__set_geometry_stats(geometry_statistics); } From 9813f48a412a35631e1a605a34400d287f96ac2d Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 12 Sep 2024 10:27:50 +0800 Subject: [PATCH 44/61] MakeStatistics and Statistics::Make should not be a breaking change --- cpp/src/parquet/statistics.cc | 36 ++++++++++++++++++++++++----------- cpp/src/parquet/statistics.h | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index f2fbdf456d410..3f96afb6241fd 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -1419,12 +1419,13 @@ std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, encoded_stats->has_geometry_statistics, pool); } -std::shared_ptr Statistics::Make( - const ColumnDescriptor* descr, const std::string& encoded_min, - const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, - bool has_min_max, bool has_null_count, bool has_distinct_count, - bool has_geometry_statistics, ::arrow::MemoryPool* pool) { +std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, + const std::string& encoded_min, + const std::string& encoded_max, + int64_t num_values, int64_t null_count, + int64_t distinct_count, bool has_min_max, + bool has_null_count, bool has_distinct_count, + ::arrow::MemoryPool* pool) { #define MAKE_STATS(CAP_TYPE, KLASS) \ case Type::CAP_TYPE: \ return std::make_shared>( \ @@ -1437,12 +1438,8 @@ std::shared_ptr Statistics::Make( MAKE_STATS(INT64, Int64Type); MAKE_STATS(FLOAT, FloatType); MAKE_STATS(DOUBLE, DoubleType); + MAKE_STATS(BYTE_ARRAY, ByteArrayType); MAKE_STATS(FIXED_LEN_BYTE_ARRAY, FLBAType); - case Type::BYTE_ARRAY: - return std::make_shared>( - descr, encoded_min, encoded_max, num_values, null_count, distinct_count, - geometry_statistics, has_min_max, has_null_count, has_distinct_count, - has_geometry_statistics, pool); default: break; } @@ -1451,4 +1448,21 @@ std::shared_ptr Statistics::Make( return nullptr; } +std::shared_ptr Statistics::Make( + const ColumnDescriptor* descr, const std::string& encoded_min, + const std::string& encoded_max, int64_t num_values, int64_t null_count, + int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, + bool has_min_max, bool has_null_count, bool has_distinct_count, + bool has_geometry_statistics, ::arrow::MemoryPool* pool) { + if (descr->physical_type() == Type::BYTE_ARRAY) { + return std::make_shared>( + descr, encoded_min, encoded_max, num_values, null_count, distinct_count, + geometry_statistics, has_min_max, has_null_count, has_distinct_count, + has_geometry_statistics, pool); + } else { + return Make(descr, encoded_min, encoded_max, num_values, null_count, distinct_count, + has_min_max, has_null_count, has_distinct_count, pool); + } +} + } // namespace parquet diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 6e6aea932ce18..fc8d914bbc24f 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -304,6 +304,27 @@ class PARQUET_EXPORT Statistics { /// \param[in] has_null_count whether the null_count statistics are set /// \param[in] has_distinct_count whether the distinct_count statistics are set /// \param[in] pool a memory pool to use for any memory allocations, optional + static std::shared_ptr Make( + const ColumnDescriptor* descr, const std::string& encoded_min, + const std::string& encoded_max, int64_t num_values, int64_t null_count, + int64_t distinct_count, bool has_min_max, bool has_null_count, + bool has_distinct_count, + ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); + + /// \brief Create a new statistics instance given a column schema + /// definition and preexisting state + /// \param[in] descr the column schema + /// \param[in] encoded_min the encoded minimum value + /// \param[in] encoded_max the encoded maximum value + /// \param[in] num_values total number of values + /// \param[in] null_count number of null values + /// \param[in] distinct_count number of distinct values + /// \param[in] geometry_statistics the geometry statistics + /// \param[in] has_min_max whether the min/max statistics are set + /// \param[in] has_null_count whether the null_count statistics are set + /// \param[in] has_distinct_count whether the distinct_count statistics are set + /// \param[in] has_geometry_statistics whether the geometry statistics are set + /// \param[in] pool a memory pool to use for any memory allocations, optional static std::shared_ptr Make( const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, @@ -460,6 +481,18 @@ std::shared_ptr> MakeStatistics(const typename DType::c_t DType::type_num, &min, &max, num_values, null_count, distinct_count)); } +/// \brief Typed version of Statistics::Make +template +std::shared_ptr> MakeStatistics( + const ColumnDescriptor* descr, const std::string& encoded_min, + const std::string& encoded_max, int64_t num_values, int64_t null_count, + int64_t distinct_count, bool has_min_max, bool has_null_count, + bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { + return std::static_pointer_cast>(Statistics::Make( + descr, encoded_min, encoded_max, num_values, null_count, distinct_count, + has_min_max, has_null_count, has_distinct_count, pool)); +} + /// \brief Typed version of Statistics::Make template std::shared_ptr> MakeStatistics( From c56133c7573092aa074d7822e19d8e06dd0977bf Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 12 Sep 2024 19:03:29 +0800 Subject: [PATCH 45/61] ColumnIndex, as well as some other fixes and refacturings --- cpp/src/parquet/CMakeLists.txt | 3 +- cpp/src/parquet/column_reader.cc | 4 + cpp/src/parquet/geometry_util_internal.h | 4 +- .../parquet/geometry_util_internal_test.cc | 16 ++ cpp/src/parquet/metadata.cc | 37 +-- cpp/src/parquet/page_index.cc | 102 ++++++-- cpp/src/parquet/page_index.h | 15 ++ cpp/src/parquet/page_index_test.cc | 56 +++- cpp/src/parquet/reader_test.cc | 244 ++++++++++++------ cpp/src/parquet/statistics.cc | 13 + cpp/src/parquet/statistics.h | 3 + cpp/src/parquet/thrift_internal.h | 90 +++++-- 12 files changed, 424 insertions(+), 163 deletions(-) diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index a6e92eabcebfd..f5d9a77dfb54f 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -375,12 +375,11 @@ add_parquet_test(internals-test statistics_test.cc encoding_test.cc metadata_test.cc + geometry_util_internal_test.cc page_index_test.cc public_api_test.cc types_test.cc) -add_parquet_test(geometry-test SOURCES geometry_util_internal_test.cc) - set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON SKIP_UNITY_BUILD_INCLUSION ON) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 60a8a2176b0a8..e982675ed390f 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -243,6 +243,10 @@ EncodedStatistics ExtractStatsFromHeader(const H& header) { if (stats.__isset.distinct_count) { page_statistics.set_distinct_count(stats.distinct_count); } + if (stats.__isset.geometry_stats) { + page_statistics.set_geometry( + FromThrift(stats.geometry_stats, stats.__isset.geometry_stats)); + } return page_statistics; } diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index 28ca4849e779b..a1dd9bb6da711 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -657,8 +657,8 @@ class WKBGeometryBounder { std::unordered_set geometry_types_; }; -inline std::string MakeCoveringWKBFromBound(double xmin, double xmax, double ymin, - double ymax) { +static inline std::string MakeCoveringWKBFromBound(double xmin, double xmax, double ymin, + double ymax) { std::string wkb_data(93, 0); // endianness and header diff --git a/cpp/src/parquet/geometry_util_internal_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc index 4d4c907f37e80..3763e7ff5de2b 100644 --- a/cpp/src/parquet/geometry_util_internal_test.cc +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -17,6 +17,7 @@ #include #include +#include #include "arrow/testing/gtest_compat.h" @@ -446,4 +447,19 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 40, 300, 30, 10, 40, 300}))); +TEST(TestGeometryUtil, MakeCoveringWKBFromBound) { + std::string wkb_covering = MakeCoveringWKBFromBound(10, 20, 30, 40); + // POLYGON ((10 30, 20 30, 20 40, 10 40, 10 30)) + std::vector expected_wkb = { + 0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40}; + EXPECT_EQ(expected_wkb.size(), wkb_covering.size()); + EXPECT_EQ(0, memcmp(wkb_covering.data(), expected_wkb.data(), expected_wkb.size())); +} + } // namespace parquet::geometry diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index b58a6b0e6f50e..e6ecf3a866484 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -88,37 +88,6 @@ std::string ParquetVersionToString(ParquetVersion::type ver) { return "UNKNOWN"; } -static EncodedGeometryStatistics MakeEncodedGeometryStatistics( - const format::Statistics& stats) { - EncodedGeometryStatistics out; - - if (stats.__isset.geometry_stats) { - const format::GeometryStatistics& geom_stats = stats.geometry_stats; - out.geometry_types = geom_stats.geometry_types; - - out.xmin = geom_stats.bbox.xmin; - out.xmax = geom_stats.bbox.xmax; - out.ymin = geom_stats.bbox.ymin; - out.ymax = geom_stats.bbox.ymax; - - if (geom_stats.bbox.__isset.zmin && geom_stats.bbox.__isset.zmax) { - out.zmin = geom_stats.bbox.zmin; - out.zmax = geom_stats.bbox.zmax; - } - - if (geom_stats.bbox.__isset.mmin && geom_stats.bbox.__isset.mmax) { - out.mmin = geom_stats.bbox.mmin; - out.mmax = geom_stats.bbox.mmax; - } - - for (const auto& covering : geom_stats.coverings) { - out.coverings.emplace_back(covering.kind, covering.value); - } - } - - return out; -} - template static std::shared_ptr MakeTypedColumnStats( const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { @@ -128,7 +97,8 @@ static std::shared_ptr MakeTypedColumnStats( descr, metadata.statistics.min_value, metadata.statistics.max_value, metadata.num_values - metadata.statistics.null_count, metadata.statistics.null_count, metadata.statistics.distinct_count, - MakeEncodedGeometryStatistics(metadata.statistics), + FromThrift(metadata.statistics.geometry_stats, + metadata.statistics.__isset.geometry_stats), metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, @@ -139,7 +109,8 @@ static std::shared_ptr MakeTypedColumnStats( descr, metadata.statistics.min, metadata.statistics.max, metadata.num_values - metadata.statistics.null_count, metadata.statistics.null_count, metadata.statistics.distinct_count, - MakeEncodedGeometryStatistics(metadata.statistics), + FromThrift(metadata.statistics.geometry_stats, + metadata.statistics.__isset.geometry_stats), metadata.statistics.__isset.max && metadata.statistics.__isset.min, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, metadata.statistics.__isset.geometry_stats); diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc index afda4c6064b36..cb9ebdd4296c8 100644 --- a/cpp/src/parquet/page_index.cc +++ b/cpp/src/parquet/page_index.cc @@ -94,12 +94,16 @@ class TypedColumnIndexImpl : public TypedColumnIndex { TypedColumnIndexImpl(const ColumnDescriptor& descr, format::ColumnIndex column_index) : column_index_(std::move(column_index)) { // Make sure the number of pages is valid and it does not overflow to int32_t. + bool is_geometry = + (descr.logical_type() != nullptr && descr.logical_type()->is_geometry()); const size_t num_pages = column_index_.null_pages.size(); if (num_pages >= static_cast(std::numeric_limits::max()) || - column_index_.min_values.size() != num_pages || - column_index_.max_values.size() != num_pages || + (!is_geometry && (column_index_.min_values.size() != num_pages || + column_index_.max_values.size() != num_pages)) || (column_index_.__isset.null_counts && - column_index_.null_counts.size() != num_pages)) { + column_index_.null_counts.size() != num_pages) || + (column_index_.__isset.geometry_stats && + column_index_.geometry_stats.size() != num_pages)) { throw ParquetException("Invalid column index"); } @@ -110,23 +114,49 @@ class TypedColumnIndexImpl : public TypedColumnIndex { })); DCHECK_LE(num_non_null_pages, num_pages); - // Allocate slots for decoded values. - min_values_.resize(num_pages); - max_values_.resize(num_pages); non_null_page_indices_.reserve(num_non_null_pages); - - // Decode min and max values according to the physical type. - // Note that null page are skipped. - auto plain_decoder = MakeTypedDecoder(Encoding::PLAIN, &descr); for (size_t i = 0; i < num_pages; ++i) { if (!column_index_.null_pages[i]) { - // The check on `num_pages` has guaranteed the cast below is safe. non_null_page_indices_.emplace_back(static_cast(i)); - Decode(plain_decoder, column_index_.min_values[i], &min_values_, i); - Decode(plain_decoder, column_index_.max_values[i], &max_values_, i); } } DCHECK_EQ(num_non_null_pages, non_null_page_indices_.size()); + + if (!is_geometry) { + // Allocate slots for decoded values. + min_values_.resize(num_pages); + max_values_.resize(num_pages); + + // Decode min and max values according to the physical type. + // Note that null page are skipped. + auto plain_decoder = MakeTypedDecoder(Encoding::PLAIN, &descr); + for (size_t i = 0; i < num_pages; ++i) { + if (!column_index_.null_pages[i]) { + // The check on `num_pages` has guaranteed the cast below is safe. + Decode(plain_decoder, column_index_.min_values[i], &min_values_, i); + Decode(plain_decoder, column_index_.max_values[i], &max_values_, i); + } + } + } else { + // Decode geometry statistics. + // Note that null pages are skipped. + if (column_index_.__isset.geometry_stats) { + encoded_geometry_statistics_.resize(num_pages); + for (size_t i = 0; i < num_pages; ++i) { + if (!column_index_.null_pages[i]) { + encoded_geometry_statistics_[i] = + FromThrift(column_index_.geometry_stats[i], true); + } + } + + geometry_statistics_.reserve(num_pages); + for (const auto& encoded_geom_stat : encoded_geometry_statistics_) { + GeometryStatistics geom_stat; + geom_stat.Decode(encoded_geom_stat); + geometry_statistics_.push_back(std::move(geom_stat)); + } + } + } } const std::vector& null_pages() const override { @@ -159,6 +189,15 @@ class TypedColumnIndexImpl : public TypedColumnIndex { const std::vector& max_values() const override { return max_values_; } + const std::vector& encoded_geometry_statistics() + const override { + return encoded_geometry_statistics_; + } + + const std::vector& geometry_statistics() const override { + return geometry_statistics_; + } + private: /// Wrapped thrift column index. const format::ColumnIndex column_index_; @@ -167,6 +206,10 @@ class TypedColumnIndexImpl : public TypedColumnIndex { std::vector max_values_; /// A list of page indices for non-null pages. std::vector non_null_page_indices_; + /// A list of encoded geometry statistics + std::vector encoded_geometry_statistics_; + /// A list of geometry statistics + std::vector geometry_statistics_; }; class OffsetIndexImpl : public OffsetIndex { @@ -474,17 +517,29 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { column_index_.null_pages.emplace_back(true); column_index_.min_values.emplace_back(""); column_index_.max_values.emplace_back(""); - } else if (stats.has_min && stats.has_max) { - const size_t page_ordinal = column_index_.null_pages.size(); - non_null_page_indices_.emplace_back(page_ordinal); - column_index_.min_values.emplace_back(stats.min()); - column_index_.max_values.emplace_back(stats.max()); - column_index_.null_pages.emplace_back(false); } else { - /// This is a non-null page but it lacks of meaningful min/max values. - /// Discard the column index. - state_ = BuilderState::kDiscarded; - return; + bool discard = true; + if (stats.has_min && stats.has_max) { + const size_t page_ordinal = column_index_.null_pages.size(); + non_null_page_indices_.emplace_back(page_ordinal); + column_index_.min_values.emplace_back(stats.min()); + column_index_.max_values.emplace_back(stats.max()); + discard = false; + } + if (stats.has_geometry_statistics) { + column_index_.__isset.geometry_stats = true; + column_index_.geometry_stats.emplace_back(ToThrift(stats.geometry_statistics())); + discard = false; + } + + if (!discard) { + column_index_.null_pages.emplace_back(false); + } else { + /// This is a non-null page but it lacks of meaningful min/max values + /// or geometry statistics. Discard the column index. + state_ = BuilderState::kDiscarded; + return; + } } if (column_index_.__isset.null_counts && stats.has_null_count) { @@ -897,6 +952,7 @@ std::unique_ptr ColumnIndex::Make(const ColumnDescriptor& descr, case Type::BYTE_ARRAY: return std::make_unique>( descr, std::move(column_index)); + case Type::FIXED_LEN_BYTE_ARRAY: return std::make_unique>(descr, std::move(column_index)); diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h index d45c59cab223f..0866a06fe1559 100644 --- a/cpp/src/parquet/page_index.h +++ b/cpp/src/parquet/page_index.h @@ -27,6 +27,8 @@ namespace parquet { class EncodedStatistics; +class EncodedGeometryStatistics; +class GeometryStatistics; struct PageIndexLocation; /// \brief ColumnIndex is a proxy around format::ColumnIndex. @@ -76,6 +78,13 @@ class PARQUET_EXPORT ColumnIndex { /// \brief A vector of page indices for non-null pages. virtual const std::vector& non_null_page_indices() const = 0; + + /// \brief A vector of encoded geometry statistics for each data page in this column. + /// + /// `null_pages` should be inspected first, as only pages with non-null values + /// may have their upper bounds populated. + virtual const std::vector& encoded_geometry_statistics() + const = 0; }; /// \brief Typed implementation of ColumnIndex. @@ -96,6 +105,12 @@ class PARQUET_EXPORT TypedColumnIndex : public ColumnIndex { /// /// Just like `min_values`, but for upper bounds instead of lower bounds. virtual const std::vector& max_values() const = 0; + + /// \brief A vector of geometry statistics for each data page in this column. + /// + /// This is like `min_values` and `max_values`, but for geometry statistics + /// instead of lower/upper bounds + virtual const std::vector& geometry_statistics() const = 0; }; using BoolColumnIndex = TypedColumnIndex; diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc index 4db49b4267415..ea4396a15ebb0 100644 --- a/cpp/src/parquet/page_index_test.cc +++ b/cpp/src/parquet/page_index_test.cc @@ -23,10 +23,13 @@ #include "arrow/io/file.h" #include "arrow/util/float16.h" #include "parquet/file_reader.h" +#include "parquet/geometry_util_internal.h" #include "parquet/metadata.h" #include "parquet/schema.h" +#include "parquet/statistics.h" #include "parquet/test_util.h" #include "parquet/thrift_internal.h" +#include "parquet/types.h" namespace parquet { @@ -459,6 +462,8 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, const std::vector& page_stats, BoundaryOrder::type boundary_order, bool has_null_counts) { auto descr = std::make_unique(node, /*max_definition_level=*/1, 0); + bool is_geometry = + (descr->logical_type() != nullptr && descr->logical_type()->is_geometry()); auto builder = ColumnIndexBuilder::Make(descr.get()); for (const auto& stats : page_stats) { @@ -484,11 +489,27 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, const size_t num_pages = column_index->null_pages().size(); for (size_t i = 0; i < num_pages; ++i) { ASSERT_EQ(page_stats[i].all_null_value, column_index->null_pages()[i]); - ASSERT_EQ(page_stats[i].min(), column_index->encoded_min_values()[i]); - ASSERT_EQ(page_stats[i].max(), column_index->encoded_max_values()[i]); + if (!is_geometry) { + ASSERT_EQ(page_stats[i].min(), column_index->encoded_min_values()[i]); + ASSERT_EQ(page_stats[i].max(), column_index->encoded_max_values()[i]); + } if (has_null_counts) { ASSERT_EQ(page_stats[i].null_count, column_index->null_counts()[i]); } + if (page_stats[i].has_geometry_statistics) { + const auto& expected_stats = page_stats[i].geometry_statistics(); + const auto& actual_stats = column_index->encoded_geometry_statistics()[i]; + ASSERT_EQ(expected_stats.geometry_types, actual_stats.geometry_types); + ASSERT_EQ(expected_stats.coverings, actual_stats.coverings); + ASSERT_DOUBLE_EQ(expected_stats.xmin, actual_stats.xmin); + ASSERT_DOUBLE_EQ(expected_stats.xmax, actual_stats.xmax); + ASSERT_DOUBLE_EQ(expected_stats.ymin, actual_stats.ymin); + ASSERT_DOUBLE_EQ(expected_stats.ymax, actual_stats.ymax); + ASSERT_DOUBLE_EQ(expected_stats.zmin, actual_stats.zmin); + ASSERT_DOUBLE_EQ(expected_stats.zmax, actual_stats.zmax); + ASSERT_DOUBLE_EQ(expected_stats.mmin, actual_stats.mmin); + ASSERT_DOUBLE_EQ(expected_stats.mmax, actual_stats.mmax); + } } } } @@ -601,6 +622,37 @@ TEST(PageIndex, WriteFloat16ColumnIndex) { /*has_null_counts=*/false); } +TEST(PageIndex, WriteGeometryColumnIndex) { + std::vector page_stats(3); + + EncodedGeometryStatistics geom_stats[3]; + for (int i = 0; i < 3; i++) { + geom_stats[i].xmin = i + 1; + geom_stats[i].xmax = i + 2; + geom_stats[i].ymin = i + 3; + geom_stats[i].ymax = i + 4; + geom_stats[i].zmin = i + 5; + geom_stats[i].zmax = i + 6; + geom_stats[i].mmin = i + 7; + geom_stats[i].mmax = i + 8; + geom_stats[i].geometry_types = {i + 1}; + std::string covering = geometry::MakeCoveringWKBFromBound( + geom_stats[i].xmin, geom_stats[i].xmax, geom_stats[i].ymin, geom_stats[i].ymax); + geom_stats[i].coverings = {{"WKB", covering}}; + page_stats.at(i).set_geometry(geom_stats[i]); + } + + schema::NodePtr node = schema::PrimitiveNode::Make( + "c1", Repetition::OPTIONAL, + GeometryLogicalType::Make(R"({"id": {"authority": "OGC", "code": "CRS84"}})", + LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB, "metadata0"), + Type::BYTE_ARRAY); + + TestWriteTypedColumnIndex(node, page_stats, BoundaryOrder::Unordered, + /*has_null_counts=*/false); +} + TEST(PageIndex, WriteColumnIndexWithAllNullPages) { // All values are null. std::vector page_stats(3); diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index c5af6ae6368eb..887ebdb4a52bd 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1817,63 +1817,125 @@ TEST(PageIndexReaderTest, ReadFileWithoutPageIndex) { ASSERT_EQ(nullptr, row_group_index_reader); } -TEST(TestFileReader, GeometryLogicalType) { - const int num_rows = 1000; - - // Make schema - schema::NodeVector fields; - fields.push_back(PrimitiveNode::Make( - "g", Repetition::REQUIRED, - GeometryLogicalType::Make(R"({"id": {"authority": "OGC", "code": "CRS84"}})", - LogicalType::GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::WKB, "metadata0"), - Type::BYTE_ARRAY)); - auto schema = std::static_pointer_cast( - GroupNode::Make("schema", Repetition::REQUIRED, fields)); +class TestGeometryLogicalType : public ::testing::Test { + public: + const int NUM_ROWS = 1000; + + void WriteTestData(ParquetDataPageVersion data_page_version, + bool enable_write_page_index) { + // Make schema + schema::NodeVector fields; + fields.push_back(PrimitiveNode::Make( + "g", Repetition::REQUIRED, + GeometryLogicalType::Make(R"({"id": {"authority": "OGC", "code": "CRS84"}})", + LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB, "metadata0"), + Type::BYTE_ARRAY)); + auto schema = std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); + + // Write small batches and small data pages + + auto writer_props_builder = WriterProperties::Builder() + .write_batch_size(64) + ->data_pagesize(128) + ->data_page_version(data_page_version); + if (enable_write_page_index) { + writer_props_builder->enable_write_page_index(); + } - // Write small batches and small data pages - std::shared_ptr writer_props = - WriterProperties::Builder().write_batch_size(64)->data_pagesize(128)->build(); + std::shared_ptr writer_props = writer_props_builder->build(); + + ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create()); + std::shared_ptr file_writer = + ParquetFileWriter::Open(out_file, schema, writer_props); + RowGroupWriter* rg_writer = file_writer->AppendRowGroup(); + + // write WKB points to columns + auto* writer = static_cast(rg_writer->NextColumn()); + std::vector buffer(test::WKB_POINT_SIZE * NUM_ROWS); + uint8_t* ptr = buffer.data(); + std::vector values(NUM_ROWS); + for (int k = 0; k < NUM_ROWS; k++) { + test::GenerateWKBPoint(ptr, k, k + 1); + values[k].len = test::WKB_POINT_SIZE; + values[k].ptr = ptr; + ptr += test::WKB_POINT_SIZE; + } + writer->WriteBatch(NUM_ROWS, nullptr, nullptr, values.data()); - ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create()); - std::shared_ptr file_writer = - ParquetFileWriter::Open(out_file, schema, writer_props); - RowGroupWriter* rg_writer = file_writer->AppendRowGroup(); + rg_writer->Close(); + file_writer->Close(); - // write WKB points to columns - auto* writer = static_cast(rg_writer->NextColumn()); - std::vector buffer(test::WKB_POINT_SIZE * num_rows); - uint8_t* ptr = buffer.data(); - std::vector values(num_rows); - for (int k = 0; k < num_rows; k++) { - test::GenerateWKBPoint(ptr, k, k + 1); - values[k].len = test::WKB_POINT_SIZE; - values[k].ptr = ptr; - ptr += test::WKB_POINT_SIZE; + ASSERT_OK_AND_ASSIGN(file_buf, out_file->Finish()); } - writer->WriteBatch(num_rows, nullptr, nullptr, values.data()); - rg_writer->Close(); - file_writer->Close(); + void TestWriteAndRead(ParquetDataPageVersion data_page_version, + bool enable_write_page_index) { + WriteTestData(data_page_version, enable_write_page_index); + + auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf); + + ReaderProperties reader_props; + reader_props.enable_buffered_stream(); + reader_props.set_buffer_size(64); + auto file_reader = ParquetFileReader::Open(in_file, reader_props); + + // Check that the geometry statistics are correctly written and read + auto metadata = file_reader->metadata(); + auto page_index_reader = file_reader->GetPageIndexReader(); + int num_row_groups = metadata->num_row_groups(); + for (int i = 0; i < num_row_groups; i++) { + auto row_group_metadata = metadata->RowGroup(i); + auto column_chunk_metadata = row_group_metadata->ColumnChunk(0); + auto statistics = column_chunk_metadata->statistics(); + CheckStatistics(statistics); + + if (enable_write_page_index) { + // Check column index + auto row_group_index_reader = page_index_reader->RowGroup(i); + auto column_index = row_group_index_reader->GetColumnIndex(0); + auto geometry_column_index = + std::static_pointer_cast(column_index); + CheckColumnIndex(geometry_column_index); + } else { + // Check per-page statistics + auto row_group_reader = file_reader->RowGroup(i); + auto page_reader = row_group_reader->GetColumnPageReader(0); + CheckPageStatistics(page_reader.get()); + } + } - // Open the reader - ASSERT_OK_AND_ASSIGN(auto file_buf, out_file->Finish()); - auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf); + // Check the geometry values + auto row_group = file_reader->RowGroup(0); + std::shared_ptr reader = + std::static_pointer_cast(row_group->Column(0)); + int64_t total_values_read = 0; + while (total_values_read < NUM_ROWS) { + std::vector out(NUM_ROWS); + int64_t values_read = 0; + int64_t levels_read = + reader->ReadBatch(NUM_ROWS, nullptr, nullptr, out.data(), &values_read); + ASSERT_GE(levels_read, 1); + ASSERT_GE(values_read, 1); + + // Check the batch + for (int64_t i = 0; i < values_read; i++) { + const ByteArray& value = out[i]; + double x = 0; + double y = 0; + EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); + auto expected_x = static_cast(i + total_values_read); + auto expected_y = static_cast(i + 1 + total_values_read); + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); + } - ReaderProperties reader_props; - reader_props.enable_buffered_stream(); - reader_props.set_buffer_size(64); - std::unique_ptr file_reader = - ParquetFileReader::Open(in_file, reader_props); + total_values_read += values_read; + } + } - // Check that the geometry statistics are correctly written and read - std::shared_ptr metadata = file_reader->metadata(); - int num_row_groups = metadata->num_row_groups(); - for (int i = 0; i < num_row_groups; i++) { - std::unique_ptr row_group_metadata = metadata->RowGroup(i); - std::unique_ptr column_chunk_metadata = - row_group_metadata->ColumnChunk(0); - std::shared_ptr statistics = column_chunk_metadata->statistics(); + void CheckStatistics(std::shared_ptr statistics) { EXPECT_FALSE(statistics->HasMinMax()); EXPECT_TRUE(statistics->HasGeometryStatistics()); const GeometryStatistics* geom_stats = statistics->geometry_statistics(); @@ -1886,34 +1948,70 @@ TEST(TestFileReader, GeometryLogicalType) { EXPECT_GT(geom_stats->GetYMax(), geom_stats->GetYMin()); EXPECT_FALSE(geom_stats->HasZ()); EXPECT_FALSE(geom_stats->HasM()); + EXPECT_EQ(1, geom_stats->GetCoverings().size()); + EXPECT_EQ("WKB", geom_stats->GetCoverings().front().first); } - // Check the geometry values - auto row_group = file_reader->RowGroup(0); - std::shared_ptr reader = - std::static_pointer_cast(row_group->Column(0)); - int64_t total_values_read = 0; - while (total_values_read < num_rows) { - std::vector out(num_rows); - int64_t values_read = 0; - int64_t levels_read = - reader->ReadBatch(num_rows, nullptr, nullptr, out.data(), &values_read); - ASSERT_GE(levels_read, 1); - ASSERT_GE(values_read, 1); - - // Check the batch - for (int64_t i = 0; i < values_read; i++) { - const ByteArray& value = out[i]; - double x = 0; - double y = 0; - EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); - auto expected_x = static_cast(i + total_values_read); - auto expected_y = static_cast(i + 1 + total_values_read); - EXPECT_DOUBLE_EQ(expected_x, x); - EXPECT_DOUBLE_EQ(expected_y, y); + void CheckColumnIndex(std::shared_ptr geometry_column_index) { + EXPECT_FALSE(geometry_column_index->geometry_statistics().empty()); + double last_xmin = -geometry::kInf; + double last_ymin = -geometry::kInf; + for (const auto& geom_stats : geometry_column_index->geometry_statistics()) { + std::vector geometry_types = geom_stats.GetGeometryTypes(); + EXPECT_EQ(1, geometry_types.size()); + EXPECT_EQ(1, geometry_types[0]); + EXPECT_GE(geom_stats.GetXMin(), last_xmin); + EXPECT_GT(geom_stats.GetXMax(), geom_stats.GetXMin()); + EXPECT_GT(geom_stats.GetYMin(), last_ymin); + EXPECT_GT(geom_stats.GetYMax(), geom_stats.GetYMin()); + EXPECT_FALSE(geom_stats.HasZ()); + EXPECT_FALSE(geom_stats.HasM()); + EXPECT_EQ(1, geom_stats.GetCoverings().size()); + EXPECT_EQ("WKB", geom_stats.GetCoverings().front().first); + last_xmin = geom_stats.GetXMin(); + last_ymin = geom_stats.GetYMin(); } + } + + void CheckPageStatistics(PageReader* page_reader) { + while (true) { + auto page = page_reader->NextPage(); + if (!page) { + break; // No more pages + } + // Check if the page has statistics + if (page->type() == parquet::PageType::DATA_PAGE || + page->type() == parquet::PageType::DATA_PAGE_V2) { + std::shared_ptr data_page = + std::static_pointer_cast(page); + const EncodedStatistics& statistics = data_page->statistics(); + EXPECT_TRUE(statistics.has_geometry_statistics); + EncodedGeometryStatistics geom_stats = statistics.geometry_statistics(); + EXPECT_EQ(1, geom_stats.geometry_types.size()); + EXPECT_EQ(1, geom_stats.coverings.size()); + EXPECT_GE(geom_stats.xmin, 0); + EXPECT_GT(geom_stats.xmax, geom_stats.xmin); + EXPECT_GT(geom_stats.ymin, 0); + EXPECT_GT(geom_stats.ymax, geom_stats.ymin); + } + } + } + + protected: + std::shared_ptr file_buf; +}; + +TEST_F(TestGeometryLogicalType, TestWriteAndReadWithPageStatistics) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(data_page_version, false); + } +} - total_values_read += values_read; +TEST_F(TestGeometryLogicalType, TestWriteAndReadWithColumnIndex) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(data_page_version, true); } } diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 3f96afb6241fd..65346a487c5ed 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -153,6 +153,12 @@ class GeometryStatisticsImpl { } } + void Reset() { + bounder_.Reset(); + coverings_.clear(); + is_valid_ = true; + } + EncodedGeometryStatistics Encode() const { const double* mins = bounder_.Bounds().min; const double* maxes = bounder_.Bounds().max; @@ -247,6 +253,8 @@ GeometryStatistics::GeometryStatistics() { GeometryStatistics::GeometryStatistics(std::unique_ptr impl) : impl_(std::move(impl)) {} +GeometryStatistics::GeometryStatistics(GeometryStatistics&&) = default; + GeometryStatistics::~GeometryStatistics() = default; bool GeometryStatistics::Equals(const GeometryStatistics& other) const { @@ -274,6 +282,8 @@ void GeometryStatistics::Update(const ::arrow::Array& values, bool update_counts impl_->Update(values, update_counts); } +void GeometryStatistics::Reset() { impl_->Reset(); } + bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } EncodedGeometryStatistics GeometryStatistics::Encode() { return impl_->Encode(); } @@ -990,6 +1000,9 @@ class TypedStatisticsImpl : public TypedStatistics { void Reset() override { ResetCounts(); ResetHasFlags(); + if (HasGeometryStatistics()) { + geometry_statistics_->Reset(); + } } void SetMinMax(const T& arg_min, const T& arg_max) override { diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index fc8d914bbc24f..430bff9fd4594 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -146,6 +146,7 @@ class PARQUET_EXPORT GeometryStatistics { public: GeometryStatistics(); explicit GeometryStatistics(std::unique_ptr impl); + GeometryStatistics(GeometryStatistics&&); ~GeometryStatistics(); @@ -161,6 +162,8 @@ class PARQUET_EXPORT GeometryStatistics { void Update(const ::arrow::Array& values, bool update_counts); + void Reset(); + EncodedGeometryStatistics Encode(); bool is_valid() const; diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 33d43221efef5..5769c99f0bd3d 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -231,6 +231,36 @@ static inline AadMetadata FromThrift(format::AesGcmCtrV1 aesGcmCtrV1) { aesGcmCtrV1.supply_aad_prefix}; } +static inline EncodedGeometryStatistics FromThrift( + const format::GeometryStatistics& geometry_stats, bool has_geometry_stats) { + EncodedGeometryStatistics out; + + if (has_geometry_stats) { + out.geometry_types = geometry_stats.geometry_types; + + out.xmin = geometry_stats.bbox.xmin; + out.xmax = geometry_stats.bbox.xmax; + out.ymin = geometry_stats.bbox.ymin; + out.ymax = geometry_stats.bbox.ymax; + + if (geometry_stats.bbox.__isset.zmin && geometry_stats.bbox.__isset.zmax) { + out.zmin = geometry_stats.bbox.zmin; + out.zmax = geometry_stats.bbox.zmax; + } + + if (geometry_stats.bbox.__isset.mmin && geometry_stats.bbox.__isset.mmax) { + out.mmin = geometry_stats.bbox.mmin; + out.mmax = geometry_stats.bbox.mmax; + } + + for (const auto& covering : geometry_stats.coverings) { + out.coverings.emplace_back(covering.kind, covering.value); + } + } + + return out; +} + static inline EncryptionAlgorithm FromThrift(format::EncryptionAlgorithm encryption) { EncryptionAlgorithm encryption_algorithm; @@ -323,6 +353,37 @@ static inline format::SortingColumn ToThrift(SortingColumn sorting_column) { return thrift_sorting_column; } +static inline format::GeometryStatistics ToThrift( + const EncodedGeometryStatistics& encoded_geometry_stats) { + format::GeometryStatistics geometry_statistics; + geometry_statistics.__set_geometry_types(encoded_geometry_stats.geometry_types); + format::BoundingBox bbox; + bbox.__set_xmin(encoded_geometry_stats.xmin); + bbox.__set_xmax(encoded_geometry_stats.xmax); + bbox.__set_ymin(encoded_geometry_stats.ymin); + bbox.__set_ymax(encoded_geometry_stats.ymax); + if (encoded_geometry_stats.has_z()) { + bbox.__set_zmin(encoded_geometry_stats.zmin); + bbox.__set_zmax(encoded_geometry_stats.zmax); + } + if (encoded_geometry_stats.has_m()) { + bbox.__set_mmin(encoded_geometry_stats.mmin); + bbox.__set_mmax(encoded_geometry_stats.mmax); + } + geometry_statistics.__set_bbox(bbox); + + std::vector coverings; + coverings.reserve(encoded_geometry_stats.coverings.size()); + for (const auto& pair : encoded_geometry_stats.coverings) { + format::Covering covering; + covering.__set_kind(pair.first); + covering.__set_value(pair.second); + coverings.push_back(std::move(covering)); + } + geometry_statistics.__set_coverings(coverings); + return geometry_statistics; +} + static inline format::Statistics ToThrift(const EncodedStatistics& stats) { format::Statistics statistics; if (stats.has_min) { @@ -349,34 +410,7 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { } if (stats.has_geometry_statistics) { - const EncodedGeometryStatistics& encoded_geometry_stats = stats.geometry_statistics(); - format::GeometryStatistics geometry_statistics; - geometry_statistics.__set_geometry_types(encoded_geometry_stats.geometry_types); - format::BoundingBox bbox; - bbox.__set_xmin(encoded_geometry_stats.xmin); - bbox.__set_xmax(encoded_geometry_stats.xmax); - bbox.__set_ymin(encoded_geometry_stats.ymin); - bbox.__set_ymax(encoded_geometry_stats.ymax); - if (encoded_geometry_stats.has_z()) { - bbox.__set_zmin(encoded_geometry_stats.zmin); - bbox.__set_zmax(encoded_geometry_stats.zmax); - } - if (encoded_geometry_stats.has_m()) { - bbox.__set_mmin(encoded_geometry_stats.mmin); - bbox.__set_mmax(encoded_geometry_stats.mmax); - } - geometry_statistics.__set_bbox(bbox); - - std::vector coverings; - coverings.reserve(encoded_geometry_stats.coverings.size()); - for (const auto& pair : encoded_geometry_stats.coverings) { - format::Covering covering; - covering.__set_kind(pair.first); - covering.__set_value(pair.second); - coverings.push_back(std::move(covering)); - } - geometry_statistics.__set_coverings(coverings); - statistics.__set_geometry_stats(geometry_statistics); + statistics.__set_geometry_stats(ToThrift(stats.geometry_statistics())); } return statistics; From 174e1e19231eb09e549557c7048d41a253bfb8f3 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 12 Sep 2024 19:59:49 +0800 Subject: [PATCH 46/61] Fix compiler warnings on AMD platforms as well as sanitizer warnings --- cpp/src/parquet/geometry_util_internal.h | 3 ++- cpp/src/parquet/reader_test.cc | 11 +++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index a1dd9bb6da711..b2c81606f4eab 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -432,7 +432,8 @@ class WKBSequenceBounder { class WKBGenericSequenceBounder { public: WKBGenericSequenceBounder() - : xy_(chunk_), + : chunk_{0.0}, + xy_(chunk_), xyz_(chunk_), xym_(chunk_), xyzm_(chunk_), diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 887ebdb4a52bd..a84cee60cb85e 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1836,15 +1836,14 @@ class TestGeometryLogicalType : public ::testing::Test { // Write small batches and small data pages - auto writer_props_builder = WriterProperties::Builder() - .write_batch_size(64) - ->data_pagesize(128) - ->data_page_version(data_page_version); + auto writer_props_builder = WriterProperties::Builder(); + writer_props_builder.write_batch_size(64)->data_pagesize(128)->data_page_version( + data_page_version); if (enable_write_page_index) { - writer_props_builder->enable_write_page_index(); + writer_props_builder.enable_write_page_index(); } - std::shared_ptr writer_props = writer_props_builder->build(); + std::shared_ptr writer_props = writer_props_builder.build(); ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create()); std::shared_ptr file_writer = From bd0e2ad2c857f281042eeed74fb7196183a5cf62 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 12 Sep 2024 20:36:45 +0800 Subject: [PATCH 47/61] Remove all newly added include directives --- cpp/src/parquet/statistics.cc | 4 ---- cpp/src/parquet/statistics.h | 1 - 2 files changed, 5 deletions(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 65346a487c5ed..23e4d230b228d 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -21,20 +21,17 @@ #include #include #include -#include #include #include #include #include "arrow/array.h" -#include "arrow/array/array_binary.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/checked_cast.h" #include "arrow/util/float16.h" #include "arrow/util/logging.h" -#include "arrow/util/macros.h" #include "arrow/util/ubsan.h" #include "arrow/visit_data_inline.h" #include "parquet/encoding.h" @@ -42,7 +39,6 @@ #include "parquet/geometry_util_internal.h" #include "parquet/platform.h" #include "parquet/schema.h" -#include "parquet/types.h" using arrow::default_memory_pool; using arrow::MemoryPool; diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 430bff9fd4594..b80bf50d9abf6 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -24,7 +24,6 @@ #include #include -#include "arrow/util/macros.h" #include "parquet/platform.h" #include "parquet/types.h" From 1521bac133350da3d671ee2c4308549560f31ee1 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 12 Sep 2024 23:19:18 +0800 Subject: [PATCH 48/61] include cmath for std::isnan --- cpp/src/parquet/geometry_util_internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index b2c81606f4eab..b943ff6e204cd 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include #include From 572e865c90697b110cb7c84c6ebeb8e831a70177 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Mon, 16 Sep 2024 12:24:46 +0800 Subject: [PATCH 49/61] Test writing WKB encoded geometries using WriteArrow --- cpp/src/parquet/reader_test.cc | 54 +++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index a84cee60cb85e..32ad89ff27a97 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1822,7 +1822,7 @@ class TestGeometryLogicalType : public ::testing::Test { const int NUM_ROWS = 1000; void WriteTestData(ParquetDataPageVersion data_page_version, - bool enable_write_page_index) { + bool enable_write_page_index, bool write_arrow) { // Make schema schema::NodeVector fields; fields.push_back(PrimitiveNode::Make( @@ -1835,7 +1835,6 @@ class TestGeometryLogicalType : public ::testing::Test { GroupNode::Make("schema", Repetition::REQUIRED, fields)); // Write small batches and small data pages - auto writer_props_builder = WriterProperties::Builder(); writer_props_builder.write_batch_size(64)->data_pagesize(128)->data_page_version( data_page_version); @@ -1852,6 +1851,19 @@ class TestGeometryLogicalType : public ::testing::Test { // write WKB points to columns auto* writer = static_cast(rg_writer->NextColumn()); + if (!write_arrow) { + WriteTestDataUsingWriteBatch(writer); + } else { + WriteTestDataUsingWriteArrow(writer); + } + + rg_writer->Close(); + file_writer->Close(); + + ASSERT_OK_AND_ASSIGN(file_buf, out_file->Finish()); + } + + void WriteTestDataUsingWriteBatch(ByteArrayWriter* writer) { std::vector buffer(test::WKB_POINT_SIZE * NUM_ROWS); uint8_t* ptr = buffer.data(); std::vector values(NUM_ROWS); @@ -1862,16 +1874,30 @@ class TestGeometryLogicalType : public ::testing::Test { ptr += test::WKB_POINT_SIZE; } writer->WriteBatch(NUM_ROWS, nullptr, nullptr, values.data()); + } - rg_writer->Close(); - file_writer->Close(); - - ASSERT_OK_AND_ASSIGN(file_buf, out_file->Finish()); + void WriteTestDataUsingWriteArrow(ByteArrayWriter* writer) { + ::arrow::BinaryBuilder builder; + std::vector buffer(test::WKB_POINT_SIZE * NUM_ROWS); + uint8_t* ptr = buffer.data(); + for (int k = 0; k < NUM_ROWS; k++) { + test::GenerateWKBPoint(ptr, k, k + 1); + ASSERT_OK(builder.Append(ptr, test::WKB_POINT_SIZE)); + ptr += test::WKB_POINT_SIZE; + } + std::shared_ptr<::arrow::BinaryArray> array; + ASSERT_OK(builder.Finish(&array)); + + std::shared_ptr properties = + ArrowWriterProperties::Builder().build(); + MemoryPool* pool = ::arrow::default_memory_pool(); + auto ctx = std::make_unique(pool, properties.get()); + ASSERT_OK(writer->WriteArrow(nullptr, nullptr, NUM_ROWS, *array, ctx.get(), true)); } void TestWriteAndRead(ParquetDataPageVersion data_page_version, - bool enable_write_page_index) { - WriteTestData(data_page_version, enable_write_page_index); + bool enable_write_page_index, bool write_arrow) { + WriteTestData(data_page_version, enable_write_page_index, write_arrow); auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf); @@ -2003,14 +2029,22 @@ class TestGeometryLogicalType : public ::testing::Test { TEST_F(TestGeometryLogicalType, TestWriteAndReadWithPageStatistics) { for (auto data_page_version : {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { - TestWriteAndRead(data_page_version, false); + TestWriteAndRead(data_page_version, false, false); } } TEST_F(TestGeometryLogicalType, TestWriteAndReadWithColumnIndex) { for (auto data_page_version : {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { - TestWriteAndRead(data_page_version, true); + TestWriteAndRead(data_page_version, true, false); + } +} + +TEST_F(TestGeometryLogicalType, TestWriteArrowAndRead) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(data_page_version, false, true); + TestWriteAndRead(data_page_version, true, true); } } From 6c322d5639d0e5c2fd26880123044dd28bd5b5b9 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 19 Sep 2024 15:27:11 +0800 Subject: [PATCH 50/61] Change the sort order of geometry from unknown to unsigned; resolved several review comments --- cpp/src/parquet/column_reader.cc | 7 +- cpp/src/parquet/column_writer.cc | 5 +- cpp/src/parquet/column_writer_test.cc | 22 +++--- cpp/src/parquet/geometry_util_internal.h | 6 +- cpp/src/parquet/metadata.cc | 17 +++-- cpp/src/parquet/page_index.cc | 93 ++++++++---------------- cpp/src/parquet/page_index.h | 7 -- cpp/src/parquet/page_index_test.cc | 37 +++++----- cpp/src/parquet/reader_test.cc | 85 +++++++++++----------- cpp/src/parquet/schema_test.cc | 2 +- cpp/src/parquet/statistics.cc | 29 ++------ cpp/src/parquet/statistics.h | 2 +- cpp/src/parquet/test_util.h | 14 ++-- cpp/src/parquet/thrift_internal.h | 35 ++++----- cpp/src/parquet/types.cc | 60 +++++++++------ 15 files changed, 193 insertions(+), 228 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index e982675ed390f..b25235970da6d 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -244,8 +244,11 @@ EncodedStatistics ExtractStatsFromHeader(const H& header) { page_statistics.set_distinct_count(stats.distinct_count); } if (stats.__isset.geometry_stats) { - page_statistics.set_geometry( - FromThrift(stats.geometry_stats, stats.__isset.geometry_stats)); + EncodedGeometryStatistics encoded_geometry_stats; + if (stats.__isset.geometry_stats) { + encoded_geometry_stats = FromThrift(stats.geometry_stats); + } + page_statistics.set_geometry(encoded_geometry_stats); } return page_statistics; } diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index d5d250b0f26bf..40d19d38e10ab 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1224,11 +1224,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< // Will be null if not using dictionary, but that's ok current_dict_encoder_ = dynamic_cast*>(current_encoder_.get()); - bool is_geometry = - (descr_->logical_type() != nullptr && descr_->logical_type()->is_geometry()); - bool has_sort_order = SortOrder::UNKNOWN != descr_->sort_order(); if (properties->statistics_enabled(descr_->path()) && - (is_geometry || has_sort_order)) { + (SortOrder::UNKNOWN != descr_->sort_order())) { page_statistics_ = MakeStatistics(descr_, allocator_); chunk_statistics_ = MakeStatistics(descr_, allocator_); } diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 37c8116cc605d..7a8038e80651a 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1785,8 +1785,8 @@ TEST_F(TestInt32Writer, WriteKeyValueMetadataEndToEnd) { // Test writing and reading geometry columns class TestGeometryValuesWriter : public TestPrimitiveWriter { public: - static const char* CRS; - static const char* METADATA; + static const char* kCrs; + static const char* kMetadata; void SetUpSchema(Repetition::type repetition, int num_columns) override { std::vector fields; @@ -1794,8 +1794,8 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { for (int i = 0; i < num_columns; ++i) { std::string name = TestColumnName(i); std::shared_ptr logical_type = - GeometryLogicalType::Make(CRS, LogicalType::GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::WKB, METADATA); + GeometryLogicalType::Make(kCrs, LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB, kMetadata); fields.push_back(schema::PrimitiveNode::Make(name, repetition, logical_type, ByteArrayType::type_num)); } @@ -1806,13 +1806,13 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { void GenerateData(int64_t num_values, uint32_t seed = 0) { values_.resize(num_values); - buffer_.resize(num_values * WKB_POINT_SIZE); + buffer_.resize(num_values * kWkbPointSize); uint8_t* ptr = buffer_.data(); for (int k = 0; k < num_values; k++) { GenerateWKBPoint(ptr, k, k + 1); - values_[k].len = WKB_POINT_SIZE; + values_[k].len = kWkbPointSize; values_[k].ptr = ptr; - ptr += WKB_POINT_SIZE; + ptr += kWkbPointSize; } values_ptr_ = values_.data(); @@ -1842,7 +1842,7 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } std::shared_ptr statistics = metadata_stats(); - EXPECT_FALSE(statistics->HasMinMax()); + EXPECT_TRUE(statistics->HasMinMax()); EXPECT_TRUE(statistics->HasGeometryStatistics()); const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); std::vector geometry_types = geometry_statistics->GetGeometryTypes(); @@ -1921,7 +1921,7 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } std::shared_ptr statistics = metadata_stats(); - EXPECT_FALSE(statistics->HasMinMax()); + EXPECT_TRUE(statistics->HasMinMax()); EXPECT_TRUE(statistics->HasGeometryStatistics()); const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); std::vector geometry_types = geometry_statistics->GetGeometryTypes(); @@ -1936,9 +1936,9 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } }; -const char* TestGeometryValuesWriter::CRS = +const char* TestGeometryValuesWriter::kCrs = R"({"id": {"authority": "OGC", "code": "CRS84"}})"; -const char* TestGeometryValuesWriter::METADATA = "test_metadata"; +const char* TestGeometryValuesWriter::kMetadata = "test_metadata"; TEST_F(TestGeometryValuesWriter, TestWriteAndReadV1) { for (auto data_page_version : diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index b943ff6e204cd..dad695ba72b43 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -69,7 +69,7 @@ struct Dimensions { case XYZM: return {0, 1, 2, 3}; default: - return {-1, -1, -1, -1}; + throw ParquetException("Unknown geometry dimension: ", dims); } } @@ -84,7 +84,7 @@ struct Dimensions { case XYZM: return "XYZM"; default: - return ""; + throw ParquetException("Unknown geometry dimension: ", dims); } } }; @@ -120,7 +120,7 @@ inline uint32_t Dimensions::size(dimensions dims) { case XYZM: return size(); default: - return 0; + throw ParquetException("Unknown geometry dimension: ", dims); } } diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index e6ecf3a866484..9d4f4176d3058 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -37,6 +37,7 @@ #include "parquet/exception.h" #include "parquet/schema.h" #include "parquet/schema_internal.h" +#include "parquet/statistics.h" #include "parquet/thrift_internal.h" namespace parquet { @@ -92,13 +93,16 @@ template static std::shared_ptr MakeTypedColumnStats( const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { // If ColumnOrder is defined, return max_value and min_value + EncodedGeometryStatistics encoded_geometry_stats; + if (metadata.statistics.__isset.geometry_stats) { + encoded_geometry_stats = FromThrift(metadata.statistics.geometry_stats); + } if (descr->column_order().get_order() == ColumnOrder::TYPE_DEFINED_ORDER) { return MakeStatistics( descr, metadata.statistics.min_value, metadata.statistics.max_value, metadata.num_values - metadata.statistics.null_count, metadata.statistics.null_count, metadata.statistics.distinct_count, - FromThrift(metadata.statistics.geometry_stats, - metadata.statistics.__isset.geometry_stats), + encoded_geometry_stats, metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, @@ -109,8 +113,7 @@ static std::shared_ptr MakeTypedColumnStats( descr, metadata.statistics.min, metadata.statistics.max, metadata.num_values - metadata.statistics.null_count, metadata.statistics.null_count, metadata.statistics.distinct_count, - FromThrift(metadata.statistics.geometry_stats, - metadata.statistics.__isset.geometry_stats), + encoded_geometry_stats, metadata.statistics.__isset.max && metadata.statistics.__isset.min, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, metadata.statistics.__isset.geometry_stats); @@ -298,10 +301,8 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { DCHECK(writer_version_ != nullptr); // If the column statistics don't exist or column sort order is unknown // we cannot use the column stats - auto logical_type = descr_->logical_type(); - bool is_geometry = (logical_type != nullptr && logical_type->is_geometry()); if (!column_metadata_->__isset.statistics || - (descr_->sort_order() == SortOrder::UNKNOWN && !is_geometry)) { + descr_->sort_order() == SortOrder::UNKNOWN) { return false; } if (possible_stats_ == nullptr) { @@ -1535,7 +1536,7 @@ bool ApplicationVersion::HasCorrectStatistics(Type::type col_type, } // Unknown sort order has incorrect stats - if (SortOrder::UNKNOWN == sort_order && !statistics.has_geometry_statistics) { + if (SortOrder::UNKNOWN == sort_order) { return false; } diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc index cb9ebdd4296c8..7ee9761d35635 100644 --- a/cpp/src/parquet/page_index.cc +++ b/cpp/src/parquet/page_index.cc @@ -94,12 +94,10 @@ class TypedColumnIndexImpl : public TypedColumnIndex { TypedColumnIndexImpl(const ColumnDescriptor& descr, format::ColumnIndex column_index) : column_index_(std::move(column_index)) { // Make sure the number of pages is valid and it does not overflow to int32_t. - bool is_geometry = - (descr.logical_type() != nullptr && descr.logical_type()->is_geometry()); const size_t num_pages = column_index_.null_pages.size(); if (num_pages >= static_cast(std::numeric_limits::max()) || - (!is_geometry && (column_index_.min_values.size() != num_pages || - column_index_.max_values.size() != num_pages)) || + column_index_.min_values.size() != num_pages || + column_index_.max_values.size() != num_pages || (column_index_.__isset.null_counts && column_index_.null_counts.size() != num_pages) || (column_index_.__isset.geometry_stats && @@ -114,43 +112,31 @@ class TypedColumnIndexImpl : public TypedColumnIndex { })); DCHECK_LE(num_non_null_pages, num_pages); + // Allocate slots for decoded values. + min_values_.resize(num_pages); + max_values_.resize(num_pages); non_null_page_indices_.reserve(num_non_null_pages); + + // Decode min and max values according to the physical type. + // Note that null page are skipped. + auto plain_decoder = MakeTypedDecoder(Encoding::PLAIN, &descr); for (size_t i = 0; i < num_pages; ++i) { if (!column_index_.null_pages[i]) { + // The check on `num_pages` has guaranteed the cast below is safe. non_null_page_indices_.emplace_back(static_cast(i)); + Decode(plain_decoder, column_index_.min_values[i], &min_values_, i); + Decode(plain_decoder, column_index_.max_values[i], &max_values_, i); } } DCHECK_EQ(num_non_null_pages, non_null_page_indices_.size()); - if (!is_geometry) { - // Allocate slots for decoded values. - min_values_.resize(num_pages); - max_values_.resize(num_pages); - - // Decode min and max values according to the physical type. - // Note that null page are skipped. - auto plain_decoder = MakeTypedDecoder(Encoding::PLAIN, &descr); + // Decode geometry statistics. + // Note that null pages are skipped. + if (column_index_.__isset.geometry_stats) { + geometry_statistics_.reserve(num_pages); for (size_t i = 0; i < num_pages; ++i) { if (!column_index_.null_pages[i]) { - // The check on `num_pages` has guaranteed the cast below is safe. - Decode(plain_decoder, column_index_.min_values[i], &min_values_, i); - Decode(plain_decoder, column_index_.max_values[i], &max_values_, i); - } - } - } else { - // Decode geometry statistics. - // Note that null pages are skipped. - if (column_index_.__isset.geometry_stats) { - encoded_geometry_statistics_.resize(num_pages); - for (size_t i = 0; i < num_pages; ++i) { - if (!column_index_.null_pages[i]) { - encoded_geometry_statistics_[i] = - FromThrift(column_index_.geometry_stats[i], true); - } - } - - geometry_statistics_.reserve(num_pages); - for (const auto& encoded_geom_stat : encoded_geometry_statistics_) { + auto encoded_geom_stat = FromThrift(column_index_.geometry_stats[i]); GeometryStatistics geom_stat; geom_stat.Decode(encoded_geom_stat); geometry_statistics_.push_back(std::move(geom_stat)); @@ -189,11 +175,6 @@ class TypedColumnIndexImpl : public TypedColumnIndex { const std::vector& max_values() const override { return max_values_; } - const std::vector& encoded_geometry_statistics() - const override { - return encoded_geometry_statistics_; - } - const std::vector& geometry_statistics() const override { return geometry_statistics_; } @@ -206,8 +187,6 @@ class TypedColumnIndexImpl : public TypedColumnIndex { std::vector max_values_; /// A list of page indices for non-null pages. std::vector non_null_page_indices_; - /// A list of encoded geometry statistics - std::vector encoded_geometry_statistics_; /// A list of geometry statistics std::vector geometry_statistics_; }; @@ -517,29 +496,17 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { column_index_.null_pages.emplace_back(true); column_index_.min_values.emplace_back(""); column_index_.max_values.emplace_back(""); + } else if (stats.has_min && stats.has_max) { + const size_t page_ordinal = column_index_.null_pages.size(); + non_null_page_indices_.emplace_back(page_ordinal); + column_index_.min_values.emplace_back(stats.min()); + column_index_.max_values.emplace_back(stats.max()); + column_index_.null_pages.emplace_back(false); } else { - bool discard = true; - if (stats.has_min && stats.has_max) { - const size_t page_ordinal = column_index_.null_pages.size(); - non_null_page_indices_.emplace_back(page_ordinal); - column_index_.min_values.emplace_back(stats.min()); - column_index_.max_values.emplace_back(stats.max()); - discard = false; - } - if (stats.has_geometry_statistics) { - column_index_.__isset.geometry_stats = true; - column_index_.geometry_stats.emplace_back(ToThrift(stats.geometry_statistics())); - discard = false; - } - - if (!discard) { - column_index_.null_pages.emplace_back(false); - } else { - /// This is a non-null page but it lacks of meaningful min/max values - /// or geometry statistics. Discard the column index. - state_ = BuilderState::kDiscarded; - return; - } + /// This is a non-null page but it lacks of meaningful min/max values. + /// Discard the column index. + state_ = BuilderState::kDiscarded; + return; } if (column_index_.__isset.null_counts && stats.has_null_count) { @@ -548,6 +515,11 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { column_index_.__isset.null_counts = false; column_index_.null_counts.clear(); } + + if (stats.has_geometry_statistics) { + column_index_.__isset.geometry_stats = true; + column_index_.geometry_stats.emplace_back(ToThrift(stats.geometry_statistics())); + } } void Finish() override { @@ -952,7 +924,6 @@ std::unique_ptr ColumnIndex::Make(const ColumnDescriptor& descr, case Type::BYTE_ARRAY: return std::make_unique>( descr, std::move(column_index)); - case Type::FIXED_LEN_BYTE_ARRAY: return std::make_unique>(descr, std::move(column_index)); diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h index 0866a06fe1559..3b0174ed0403a 100644 --- a/cpp/src/parquet/page_index.h +++ b/cpp/src/parquet/page_index.h @@ -78,13 +78,6 @@ class PARQUET_EXPORT ColumnIndex { /// \brief A vector of page indices for non-null pages. virtual const std::vector& non_null_page_indices() const = 0; - - /// \brief A vector of encoded geometry statistics for each data page in this column. - /// - /// `null_pages` should be inspected first, as only pages with non-null values - /// may have their upper bounds populated. - virtual const std::vector& encoded_geometry_statistics() - const = 0; }; /// \brief Typed implementation of ColumnIndex. diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc index ea4396a15ebb0..3bce6d4240f07 100644 --- a/cpp/src/parquet/page_index_test.cc +++ b/cpp/src/parquet/page_index_test.cc @@ -462,8 +462,6 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, const std::vector& page_stats, BoundaryOrder::type boundary_order, bool has_null_counts) { auto descr = std::make_unique(node, /*max_definition_level=*/1, 0); - bool is_geometry = - (descr->logical_type() != nullptr && descr->logical_type()->is_geometry()); auto builder = ColumnIndexBuilder::Make(descr.get()); for (const auto& stats : page_stats) { @@ -489,26 +487,26 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, const size_t num_pages = column_index->null_pages().size(); for (size_t i = 0; i < num_pages; ++i) { ASSERT_EQ(page_stats[i].all_null_value, column_index->null_pages()[i]); - if (!is_geometry) { - ASSERT_EQ(page_stats[i].min(), column_index->encoded_min_values()[i]); - ASSERT_EQ(page_stats[i].max(), column_index->encoded_max_values()[i]); - } + ASSERT_EQ(page_stats[i].min(), column_index->encoded_min_values()[i]); + ASSERT_EQ(page_stats[i].max(), column_index->encoded_max_values()[i]); if (has_null_counts) { ASSERT_EQ(page_stats[i].null_count, column_index->null_counts()[i]); } if (page_stats[i].has_geometry_statistics) { const auto& expected_stats = page_stats[i].geometry_statistics(); - const auto& actual_stats = column_index->encoded_geometry_statistics()[i]; - ASSERT_EQ(expected_stats.geometry_types, actual_stats.geometry_types); - ASSERT_EQ(expected_stats.coverings, actual_stats.coverings); - ASSERT_DOUBLE_EQ(expected_stats.xmin, actual_stats.xmin); - ASSERT_DOUBLE_EQ(expected_stats.xmax, actual_stats.xmax); - ASSERT_DOUBLE_EQ(expected_stats.ymin, actual_stats.ymin); - ASSERT_DOUBLE_EQ(expected_stats.ymax, actual_stats.ymax); - ASSERT_DOUBLE_EQ(expected_stats.zmin, actual_stats.zmin); - ASSERT_DOUBLE_EQ(expected_stats.zmax, actual_stats.zmax); - ASSERT_DOUBLE_EQ(expected_stats.mmin, actual_stats.mmin); - ASSERT_DOUBLE_EQ(expected_stats.mmax, actual_stats.mmax); + const auto* byte_array_column_index = + static_cast(column_index.get()); + const auto& actual_stats = byte_array_column_index->geometry_statistics()[i]; + ASSERT_EQ(expected_stats.geometry_types, actual_stats.GetGeometryTypes()); + ASSERT_EQ(expected_stats.coverings, actual_stats.GetCoverings()); + ASSERT_DOUBLE_EQ(expected_stats.xmin, actual_stats.GetXMin()); + ASSERT_DOUBLE_EQ(expected_stats.xmax, actual_stats.GetXMax()); + ASSERT_DOUBLE_EQ(expected_stats.ymin, actual_stats.GetYMin()); + ASSERT_DOUBLE_EQ(expected_stats.ymax, actual_stats.GetYMax()); + ASSERT_DOUBLE_EQ(expected_stats.zmin, actual_stats.GetZMin()); + ASSERT_DOUBLE_EQ(expected_stats.zmax, actual_stats.GetZMax()); + ASSERT_DOUBLE_EQ(expected_stats.mmin, actual_stats.GetMMin()); + ASSERT_DOUBLE_EQ(expected_stats.mmax, actual_stats.GetMMax()); } } } @@ -626,6 +624,8 @@ TEST(PageIndex, WriteGeometryColumnIndex) { std::vector page_stats(3); EncodedGeometryStatistics geom_stats[3]; + std::string dummy_min = "dummy_min"; + std::string dummy_max = "dummy_max"; for (int i = 0; i < 3; i++) { geom_stats[i].xmin = i + 1; geom_stats[i].xmax = i + 2; @@ -639,6 +639,7 @@ TEST(PageIndex, WriteGeometryColumnIndex) { std::string covering = geometry::MakeCoveringWKBFromBound( geom_stats[i].xmin, geom_stats[i].xmax, geom_stats[i].ymin, geom_stats[i].ymax); geom_stats[i].coverings = {{"WKB", covering}}; + page_stats.at(i).set_min(dummy_min).set_max(dummy_max); page_stats.at(i).set_geometry(geom_stats[i]); } @@ -649,7 +650,7 @@ TEST(PageIndex, WriteGeometryColumnIndex) { LogicalType::GeometryEncoding::WKB, "metadata0"), Type::BYTE_ARRAY); - TestWriteTypedColumnIndex(node, page_stats, BoundaryOrder::Unordered, + TestWriteTypedColumnIndex(node, page_stats, BoundaryOrder::Ascending, /*has_null_counts=*/false); } diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 32ad89ff27a97..4eca392030b45 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -627,7 +627,7 @@ TEST(TestFileReader, GetRecordReader) { } TEST(TestFileReader, RecordReaderWithExposingDictionary) { - const int num_rows = 1000; + const int kNumRows = 1000; // Make schema schema::NodeVector fields; @@ -654,11 +654,11 @@ TEST(TestFileReader, RecordReaderWithExposingDictionary) { ByteArrayWriter* writer = static_cast(rg_writer->NextColumn()); std::vector raw_unique_data = {"a", "bc", "defg"}; std::vector col_typed; - for (int i = 0; i < num_rows; i++) { + for (int i = 0; i < kNumRows; i++) { std::string_view chosed_data = raw_unique_data[i % raw_unique_data.size()]; col_typed.emplace_back(chosed_data); } - writer->WriteBatch(num_rows, nullptr, nullptr, col_typed.data()); + writer->WriteBatch(kNumRows, nullptr, nullptr, col_typed.data()); rg_writer->Close(); file_writer->Close(); @@ -683,7 +683,7 @@ TEST(TestFileReader, RecordReaderWithExposingDictionary) { reinterpret_cast(record_reader->ReadDictionary(&dict_len)); ASSERT_NE(dict, nullptr); ASSERT_EQ(dict_len, raw_unique_data.size()); - ASSERT_EQ(record_reader->ReadRecords(num_rows), num_rows); + ASSERT_EQ(record_reader->ReadRecords(kNumRows), kNumRows); std::shared_ptr<::arrow::ChunkedArray> result_array = record_reader->GetResult(); ASSERT_EQ(result_array->num_chunks(), 1); const std::shared_ptr<::arrow::Array> chunk = result_array->chunk(0); @@ -694,7 +694,7 @@ TEST(TestFileReader, RecordReaderWithExposingDictionary) { // Verify values based on the dictionary from ReadDictionary(). int64_t indices_read = chunk->length(); - ASSERT_EQ(indices_read, num_rows); + ASSERT_EQ(indices_read, kNumRows); for (int i = 0; i < indices_read; ++i) { ASSERT_LT(indices[i], dict_len); ASSERT_EQ(std::string_view(reinterpret_cast(dict[indices[i]].ptr), @@ -1819,7 +1819,7 @@ TEST(PageIndexReaderTest, ReadFileWithoutPageIndex) { class TestGeometryLogicalType : public ::testing::Test { public: - const int NUM_ROWS = 1000; + const int kNumRows = 1000; void WriteTestData(ParquetDataPageVersion data_page_version, bool enable_write_page_index, bool write_arrow) { @@ -1864,26 +1864,26 @@ class TestGeometryLogicalType : public ::testing::Test { } void WriteTestDataUsingWriteBatch(ByteArrayWriter* writer) { - std::vector buffer(test::WKB_POINT_SIZE * NUM_ROWS); + std::vector buffer(test::kWkbPointSize * kNumRows); uint8_t* ptr = buffer.data(); - std::vector values(NUM_ROWS); - for (int k = 0; k < NUM_ROWS; k++) { + std::vector values(kNumRows); + for (int k = 0; k < kNumRows; k++) { test::GenerateWKBPoint(ptr, k, k + 1); - values[k].len = test::WKB_POINT_SIZE; + values[k].len = test::kWkbPointSize; values[k].ptr = ptr; - ptr += test::WKB_POINT_SIZE; + ptr += test::kWkbPointSize; } - writer->WriteBatch(NUM_ROWS, nullptr, nullptr, values.data()); + writer->WriteBatch(kNumRows, nullptr, nullptr, values.data()); } void WriteTestDataUsingWriteArrow(ByteArrayWriter* writer) { ::arrow::BinaryBuilder builder; - std::vector buffer(test::WKB_POINT_SIZE * NUM_ROWS); + std::vector buffer(test::kWkbPointSize * kNumRows); uint8_t* ptr = buffer.data(); - for (int k = 0; k < NUM_ROWS; k++) { + for (int k = 0; k < kNumRows; k++) { test::GenerateWKBPoint(ptr, k, k + 1); - ASSERT_OK(builder.Append(ptr, test::WKB_POINT_SIZE)); - ptr += test::WKB_POINT_SIZE; + ASSERT_OK(builder.Append(ptr, test::kWkbPointSize)); + ptr += test::kWkbPointSize; } std::shared_ptr<::arrow::BinaryArray> array; ASSERT_OK(builder.Finish(&array)); @@ -1892,7 +1892,7 @@ class TestGeometryLogicalType : public ::testing::Test { ArrowWriterProperties::Builder().build(); MemoryPool* pool = ::arrow::default_memory_pool(); auto ctx = std::make_unique(pool, properties.get()); - ASSERT_OK(writer->WriteArrow(nullptr, nullptr, NUM_ROWS, *array, ctx.get(), true)); + ASSERT_OK(writer->WriteArrow(nullptr, nullptr, kNumRows, *array, ctx.get(), true)); } void TestWriteAndRead(ParquetDataPageVersion data_page_version, @@ -1932,36 +1932,39 @@ class TestGeometryLogicalType : public ::testing::Test { } // Check the geometry values - auto row_group = file_reader->RowGroup(0); - std::shared_ptr reader = - std::static_pointer_cast(row_group->Column(0)); int64_t total_values_read = 0; - while (total_values_read < NUM_ROWS) { - std::vector out(NUM_ROWS); - int64_t values_read = 0; - int64_t levels_read = - reader->ReadBatch(NUM_ROWS, nullptr, nullptr, out.data(), &values_read); - ASSERT_GE(levels_read, 1); - ASSERT_GE(values_read, 1); - - // Check the batch - for (int64_t i = 0; i < values_read; i++) { - const ByteArray& value = out[i]; - double x = 0; - double y = 0; - EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); - auto expected_x = static_cast(i + total_values_read); - auto expected_y = static_cast(i + 1 + total_values_read); - EXPECT_DOUBLE_EQ(expected_x, x); - EXPECT_DOUBLE_EQ(expected_y, y); + for (int i = 0; i < num_row_groups; i++) { + auto row_group = file_reader->RowGroup(i); + std::shared_ptr reader = + std::static_pointer_cast(row_group->Column(0)); + while (reader->HasNext()) { + std::vector out(kNumRows); + int64_t values_read = 0; + int64_t levels_read = + reader->ReadBatch(kNumRows, nullptr, nullptr, out.data(), &values_read); + ASSERT_GE(levels_read, 1); + ASSERT_GE(values_read, 1); + + // Check the batch + for (int64_t i = 0; i < values_read; i++) { + const ByteArray& value = out[i]; + double x = 0; + double y = 0; + EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); + auto expected_x = static_cast(i + total_values_read); + auto expected_y = static_cast(i + 1 + total_values_read); + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); + } + + total_values_read += values_read; } - - total_values_read += values_read; } + EXPECT_EQ(kNumRows, total_values_read); } void CheckStatistics(std::shared_ptr statistics) { - EXPECT_FALSE(statistics->HasMinMax()); + EXPECT_TRUE(statistics->HasMinMax()); EXPECT_TRUE(statistics->HasGeometryStatistics()); const GeometryStatistics* geom_stats = statistics->geometry_statistics(); std::vector geometry_types = geom_stats->GetGeometryTypes(); diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index fb498c543cdbb..437f1a7ff95cb 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1599,7 +1599,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) { {LogicalType::BSON(), SortOrder::UNSIGNED}, {LogicalType::UUID(), SortOrder::UNSIGNED}, {LogicalType::Float16(), SortOrder::SIGNED}, - {LogicalType::Geometry(), SortOrder::UNKNOWN}, + {LogicalType::Geometry(), SortOrder::UNSIGNED}, {LogicalType::None(), SortOrder::UNKNOWN}}; for (const ExpectedSortOrder& c : cases) { diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 23e4d230b228d..f09fc37e752e5 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -129,9 +129,7 @@ class GeometryStatisticsImpl { } } - void Update(const ::arrow::Array& values, bool update_counts) { - ARROW_UNUSED(update_counts); - + void Update(const ::arrow::Array& values) { const auto& binary_array = static_cast(values); geometry::WKBBuffer buf; try { @@ -274,9 +272,7 @@ void GeometryStatistics::UpdateSpaced(const ByteArray* values, const uint8_t* va num_values, null_count); } -void GeometryStatistics::Update(const ::arrow::Array& values, bool update_counts) { - impl_->Update(values, update_counts); -} +void GeometryStatistics::Update(const ::arrow::Array& values) { impl_->Update(values); } void GeometryStatistics::Reset() { impl_->Reset(); } @@ -1052,17 +1048,15 @@ class TypedStatisticsImpl : public TypedStatistics { return; } + SetMinMaxPair(comparator_->GetMinMax(values)); + if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { if (geometry_statistics_ == nullptr) { geometry_statistics_ = std::make_unique(); } - geometry_statistics_->Update(values, update_counts); - } else { - SetMinMaxPair(comparator_->GetMinMax(values)); + geometry_statistics_->Update(values); } - } else { - SetMinMaxPair(comparator_->GetMinMax(values)); } } @@ -1220,6 +1214,7 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, IncrementNumValues(num_values); if (num_values == 0) return; + SetMinMaxPair(comparator_->GetMinMax(values, num_values)); if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { @@ -1227,11 +1222,7 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, geometry_statistics_ = std::make_unique(); } geometry_statistics_->Update(values, num_values, null_count); - } else { - SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } - } else { - SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } } @@ -1247,6 +1238,8 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va IncrementNumValues(num_values); if (num_values == 0) return; + SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, + valid_bits_offset)); if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { @@ -1255,13 +1248,7 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va } geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, num_values, null_count); - } else { - SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, - valid_bits_offset)); } - } else { - SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, - valid_bits_offset)); } } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index b80bf50d9abf6..ba04841fde2ed 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -159,7 +159,7 @@ class PARQUET_EXPORT GeometryStatistics { int64_t valid_bits_offset, int64_t num_spaced_values, int64_t num_values, int64_t null_count); - void Update(const ::arrow::Array& values, bool update_counts); + void Update(const ::arrow::Array& values); void Reset(); diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index 2d38b27fe015e..d06efd59e9075 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -837,28 +837,28 @@ inline void GenerateData(int num_values, FLBA* out, std::vector* // Test utility functions for geometry #if defined(ARROW_LITTLE_ENDIAN) -static constexpr int WKB_NATIVE_ENDIANNESS = geometry::WKBBuffer::WKB_LITTLE_ENDIAN; +static constexpr int kWkbNativeEndianness = geometry::WKBBuffer::WKB_LITTLE_ENDIAN; #else -static constexpr int WKB_NATIVE_ENDIANNESS = geometry::WKBBuffer::WKB_BIG_ENDIAN; +static constexpr int kWkbNativeEndianness = geometry::WKBBuffer::WKB_BIG_ENDIAN; #endif -static constexpr int WKB_POINT_SIZE = 21; // 1:endianness + 4:type + 8:x + 8:y +static constexpr int kWkbPointSize = 21; // 1:endianness + 4:type + 8:x + 8:y inline int GenerateWKBPoint(uint8_t* ptr, double x, double y) { - ptr[0] = WKB_NATIVE_ENDIANNESS; + ptr[0] = kWkbNativeEndianness; uint32_t geom_type = geometry::GeometryType::ToWKB(geometry::GeometryType::POINT, false, false); memcpy(&ptr[1], &geom_type, 4); memcpy(&ptr[5], &x, 8); memcpy(&ptr[13], &y, 8); - return WKB_POINT_SIZE; + return kWkbPointSize; } inline bool GetWKBPointCoordinate(const ByteArray& value, double* out_x, double* out_y) { - if (value.len != WKB_POINT_SIZE) { + if (value.len != kWkbPointSize) { return false; } - if (value.ptr[0] != WKB_NATIVE_ENDIANNESS) { + if (value.ptr[0] != kWkbNativeEndianness) { return false; } uint32_t expected_geom_type = diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 5769c99f0bd3d..f7984f6681d27 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -232,30 +232,27 @@ static inline AadMetadata FromThrift(format::AesGcmCtrV1 aesGcmCtrV1) { } static inline EncodedGeometryStatistics FromThrift( - const format::GeometryStatistics& geometry_stats, bool has_geometry_stats) { + const format::GeometryStatistics& geometry_stats) { EncodedGeometryStatistics out; - if (has_geometry_stats) { - out.geometry_types = geometry_stats.geometry_types; + out.geometry_types = geometry_stats.geometry_types; + out.xmin = geometry_stats.bbox.xmin; + out.xmax = geometry_stats.bbox.xmax; + out.ymin = geometry_stats.bbox.ymin; + out.ymax = geometry_stats.bbox.ymax; - out.xmin = geometry_stats.bbox.xmin; - out.xmax = geometry_stats.bbox.xmax; - out.ymin = geometry_stats.bbox.ymin; - out.ymax = geometry_stats.bbox.ymax; - - if (geometry_stats.bbox.__isset.zmin && geometry_stats.bbox.__isset.zmax) { - out.zmin = geometry_stats.bbox.zmin; - out.zmax = geometry_stats.bbox.zmax; - } + if (geometry_stats.bbox.__isset.zmin && geometry_stats.bbox.__isset.zmax) { + out.zmin = geometry_stats.bbox.zmin; + out.zmax = geometry_stats.bbox.zmax; + } - if (geometry_stats.bbox.__isset.mmin && geometry_stats.bbox.__isset.mmax) { - out.mmin = geometry_stats.bbox.mmin; - out.mmax = geometry_stats.bbox.mmax; - } + if (geometry_stats.bbox.__isset.mmin && geometry_stats.bbox.__isset.mmax) { + out.mmin = geometry_stats.bbox.mmin; + out.mmax = geometry_stats.bbox.mmax; + } - for (const auto& covering : geometry_stats.coverings) { - out.coverings.emplace_back(covering.kind, covering.value); - } + for (const auto& covering : geometry_stats.coverings) { + out.coverings.emplace_back(covering.kind, covering.value); } return out; diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 5dd3d79eda060..2ead035b65f65 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -475,9 +475,7 @@ std::shared_ptr LogicalType::FromThrift( } else if (type.GEOMETRY.edges == format::Edges::SPHERICAL) { edges = LogicalType::GeometryEdges::SPHERICAL; } else { - std::stringstream ss; - ss << "Unknown value for geometry edges: " << type.GEOMETRY.edges; - throw ParquetException(ss.str()); + throw ParquetException("Unknown value for geometry edges: ", type.GEOMETRY.edges); } LogicalType::GeometryEncoding::geometry_encoding encoding = @@ -485,9 +483,8 @@ std::shared_ptr LogicalType::FromThrift( if (type.GEOMETRY.encoding == format::GeometryEncoding::WKB) { encoding = LogicalType::GeometryEncoding::WKB; } else { - std::stringstream ss; - ss << "Unknown value for geometry encoding: " << type.GEOMETRY.edges; - throw ParquetException(ss.str()); + throw ParquetException("Unknown value for geometry encoding: ", + type.GEOMETRY.encoding); } std::string metadata; @@ -1646,13 +1643,25 @@ class LogicalType::Impl::Float16 final : public LogicalType::Impl::Incompatible, GENERATE_MAKE(Float16) -#define geometry_edges_string(u___) \ - ((u___) == LogicalType::GeometryEdges::PLANAR \ - ? "planar" \ - : ((u___) == LogicalType::GeometryEdges::SPHERICAL ? "spherical" : "unknown")) +namespace { -#define geometry_encoding_string(u___) \ - ((u___) == LogicalType::GeometryEncoding::WKB ? "wkb" : "unknown") +static inline const char* geometry_edges_string(LogicalType::GeometryEdges::edges edges) { + switch (edges) { + case LogicalType::GeometryEdges::PLANAR: + return "planar"; + case LogicalType::GeometryEdges::SPHERICAL: + return "spherical"; + default: + return "unknown"; + } +} + +static inline const char* geometry_encoding_string( + LogicalType::GeometryEncoding::geometry_encoding encoding) { + return (encoding == LogicalType::GeometryEncoding::WKB ? "wkb" : "unknown"); +} + +} // namespace class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible, public LogicalType::Impl::SimpleApplicable { @@ -1673,7 +1682,7 @@ class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible Geometry(std::string crs, LogicalType::GeometryEdges::edges edges, LogicalType::GeometryEncoding::geometry_encoding encoding, std::string metadata) - : LogicalType::Impl(LogicalType::Type::GEOMETRY, SortOrder::UNKNOWN), + : LogicalType::Impl(LogicalType::Type::GEOMETRY, SortOrder::UNSIGNED), LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY), crs_(std::move(crs)), edges_(edges), @@ -1698,7 +1707,7 @@ std::string LogicalType::Impl::Geometry::ToJSON() const { std::stringstream json; json << R"({"Type": "Geometry")"; - if (crs_.size() > 0) { + if (!crs_.empty()) { // TODO(paleolimbot): we'll need to escape the crs or assume that it's valid JSON json << R"(, "crs": )" << crs_; } @@ -1706,7 +1715,7 @@ std::string LogicalType::Impl::Geometry::ToJSON() const { json << R"(, "edges": ")" << geometry_edges_string(edges_) << R"(")"; json << R"(, "encoding": ")" << geometry_encoding_string(encoding_) << R"(")"; - if (metadata_.size() > 0) { + if (!metadata_.empty()) { // TODO(paleolimbot): we'll need to escape the metadata or assume that it's valid JSON json << R"(, "metadata": )" << crs_; } @@ -1720,22 +1729,25 @@ format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { format::GeometryType geometry_type; // Canonially export crs of "" as an unset CRS - if (crs_.size() > 0) { + if (!crs_.empty()) { geometry_type.__set_crs(crs_); } - DCHECK(edges_ != LogicalType::GeometryEdges::UNKNOWN); if (edges_ == LogicalType::GeometryEdges::SPHERICAL) { geometry_type.__set_edges(format::Edges::SPHERICAL); - } else { + } else if (edges_ == LogicalType::GeometryEdges::PLANAR) { geometry_type.__set_edges(format::Edges::PLANAR); + } else { + throw ParquetException("Unknown value for geometry edges: ", edges_); } - DCHECK_EQ(encoding_, LogicalType::GeometryEncoding::WKB); + if (encoding_ != LogicalType::GeometryEncoding::WKB) { + throw ParquetException("Unknown value for geometry encoding: ", encoding_); + } geometry_type.__set_encoding(format::GeometryEncoding::WKB); // Canonically export empty metadata as unset - if (metadata_.size() > 0) { + if (!metadata_.empty()) { geometry_type.__set_metadata(metadata_); } @@ -1755,19 +1767,19 @@ bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { } const std::string& GeometryLogicalType::crs() const { - return (dynamic_cast(*impl_)).crs(); + return (checked_cast(*impl_)).crs(); } LogicalType::GeometryEdges::edges GeometryLogicalType::edges() const { - return (dynamic_cast(*impl_)).edges(); + return (checked_cast(*impl_)).edges(); } LogicalType::GeometryEncoding::geometry_encoding GeometryLogicalType::encoding() const { - return (dynamic_cast(*impl_)).encoding(); + return (checked_cast(*impl_)).encoding(); } const std::string& GeometryLogicalType::metadata() const { - return (dynamic_cast(*impl_)).metadata(); + return (checked_cast(*impl_)).metadata(); } std::shared_ptr GeometryLogicalType::Make( From cd43ba5f1356d73659206277e7d0a308694dc44b Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 19 Sep 2024 15:59:05 +0800 Subject: [PATCH 51/61] Add generate_covering_ member to be explicit that' we'll generate the coverings from bounding box when populating the encoded statistics --- cpp/src/parquet/statistics.cc | 10 +++++++--- cpp/src/parquet/types.cc | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index f09fc37e752e5..c4ad11e8495c4 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -169,8 +169,7 @@ class GeometryStatisticsImpl { out.mmin = mins[3]; out.mmax = maxes[3]; - if (coverings_.empty()) { - // Generate coverings from bounding box if coverings is not present + if (generate_coverings_) { std::string kind = "WKB"; std::string value = geometry::MakeCoveringWKBFromBound(out.xmin, out.xmax, out.ymin, out.ymax); @@ -187,6 +186,11 @@ class GeometryStatisticsImpl { return; } + // Don't generate coverings when encoding since this statistics object is + // initialized from an encoded geometry statistics. We'll simply use the + // coverings in the encoded geometry statistics. + generate_coverings_ = false; + geometry::BoundingBox box; box.min[0] = encoded.xmin; box.max[0] = encoded.xmax; @@ -218,7 +222,6 @@ class GeometryStatisticsImpl { } } catch (ParquetException&) { is_valid_ = false; - return; } } @@ -238,6 +241,7 @@ class GeometryStatisticsImpl { geometry::WKBGeometryBounder bounder_; std::vector> coverings_; bool is_valid_ = true; + bool generate_coverings_ = true; }; GeometryStatistics::GeometryStatistics() { diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 2ead035b65f65..b14eb9a830e9d 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -1767,19 +1767,19 @@ bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { } const std::string& GeometryLogicalType::crs() const { - return (checked_cast(*impl_)).crs(); + return (dynamic_cast(*impl_)).crs(); } LogicalType::GeometryEdges::edges GeometryLogicalType::edges() const { - return (checked_cast(*impl_)).edges(); + return (dynamic_cast(*impl_)).edges(); } LogicalType::GeometryEncoding::geometry_encoding GeometryLogicalType::encoding() const { - return (checked_cast(*impl_)).encoding(); + return (dynamic_cast(*impl_)).encoding(); } const std::string& GeometryLogicalType::metadata() const { - return (checked_cast(*impl_)).metadata(); + return (dynamic_cast(*impl_)).metadata(); } std::shared_ptr GeometryLogicalType::Make( From 33803bce806f62b3f03672da9ab25f41df251273 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 19 Sep 2024 17:11:05 +0800 Subject: [PATCH 52/61] Refactor unscoped enums in geometry_util_internal to enum classes --- cpp/src/parquet/geometry_util_internal.h | 192 +++++++++--------- .../parquet/geometry_util_internal_test.cc | 147 ++++++++------ cpp/src/parquet/test_util.h | 8 +- 3 files changed, 179 insertions(+), 168 deletions(-) diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index dad695ba72b43..d903d06c747d9 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -34,18 +34,18 @@ namespace parquet::geometry { constexpr double kInf = std::numeric_limits::infinity(); struct Dimensions { - enum dimensions { XY = 0, XYZ = 1, XYM = 2, XYZM = 3 }; + enum class dimensions { XY = 0, XYZ = 1, XYM = 2, XYZM = 3 }; static dimensions FromWKB(uint32_t wkb_geometry_type) { switch (wkb_geometry_type / 1000) { case 0: - return XY; + return dimensions::XY; case 1: - return XYZ; + return dimensions::XYZ; case 2: - return XYM; + return dimensions::XYM; case 3: - return XYZM; + return dimensions::XYZM; default: throw ParquetException("Invalid wkb_geometry_type: ", wkb_geometry_type); } @@ -60,72 +60,72 @@ struct Dimensions { // for the X, Y, Z, and M dimensions, respectively. static std::array ToXYZM(dimensions dims) { switch (dims) { - case XY: + case dimensions::XY: return {0, 1, -1, -1}; - case XYZ: + case dimensions::XYZ: return {0, 1, 2, -1}; - case XYM: + case dimensions::XYM: return {0, 1, -1, 2}; - case XYZM: + case dimensions::XYZM: return {0, 1, 2, 3}; default: - throw ParquetException("Unknown geometry dimension: ", dims); + throw ParquetException("Unknown geometry dimension"); } } static std::string ToString(dimensions dims) { switch (dims) { - case XY: + case dimensions::XY: return "XY"; - case XYZ: + case dimensions::XYZ: return "XYZ"; - case XYM: + case dimensions::XYM: return "XYM"; - case XYZM: + case dimensions::XYZM: return "XYZM"; default: - throw ParquetException("Unknown geometry dimension: ", dims); + throw ParquetException("Unknown geometry dimension"); } } }; template <> -constexpr uint32_t Dimensions::size() { +constexpr uint32_t Dimensions::size() { return 2; } template <> -constexpr uint32_t Dimensions::size() { +constexpr uint32_t Dimensions::size() { return 3; } template <> -constexpr uint32_t Dimensions::size() { +constexpr uint32_t Dimensions::size() { return 3; } template <> -constexpr uint32_t Dimensions::size() { +constexpr uint32_t Dimensions::size() { return 4; } inline uint32_t Dimensions::size(dimensions dims) { switch (dims) { - case XY: - return size(); - case XYZ: - return size(); - case XYM: - return size(); - case XYZM: - return size(); + case dimensions::XY: + return size(); + case dimensions::XYZ: + return size(); + case dimensions::XYM: + return size(); + case dimensions::XYZM: + return size(); default: - throw ParquetException("Unknown geometry dimension: ", dims); + throw ParquetException("Unknown geometry dimension"); } } struct GeometryType { - enum geometry_type { + enum class geometry_type { POINT = 1, LINESTRING = 2, POLYGON = 3, @@ -138,19 +138,19 @@ struct GeometryType { static geometry_type FromWKB(uint32_t wkb_geometry_type) { switch (wkb_geometry_type % 1000) { case 1: - return POINT; + return geometry_type::POINT; case 2: - return LINESTRING; + return geometry_type::LINESTRING; case 3: - return POLYGON; + return geometry_type::POLYGON; case 4: - return MULTIPOINT; + return geometry_type::MULTIPOINT; case 5: - return MULTILINESTRING; + return geometry_type::MULTILINESTRING; case 6: - return MULTIPOLYGON; + return geometry_type::MULTIPOLYGON; case 7: - return GEOMETRYCOLLECTION; + return geometry_type::GEOMETRYCOLLECTION; default: throw ParquetException("Invalid wkb_geometry_type: ", wkb_geometry_type); } @@ -159,29 +159,29 @@ struct GeometryType { static uint32_t ToWKB(geometry_type geometry_type, bool has_z, bool has_m) { uint32_t wkb_geom_type = 0; switch (geometry_type) { - case POINT: + case geometry_type::POINT: wkb_geom_type = 1; break; - case LINESTRING: + case geometry_type::LINESTRING: wkb_geom_type = 2; break; - case POLYGON: + case geometry_type::POLYGON: wkb_geom_type = 3; break; - case MULTIPOINT: + case geometry_type::MULTIPOINT: wkb_geom_type = 4; break; - case MULTILINESTRING: + case geometry_type::MULTILINESTRING: wkb_geom_type = 5; break; - case MULTIPOLYGON: + case geometry_type::MULTIPOLYGON: wkb_geom_type = 6; break; - case GEOMETRYCOLLECTION: + case geometry_type::GEOMETRYCOLLECTION: wkb_geom_type = 7; break; default: - throw ParquetException("Invalid geometry_type: ", geometry_type); + throw ParquetException("Invalid geometry_type"); } if (has_z) { wkb_geom_type += 1000; @@ -194,19 +194,19 @@ struct GeometryType { static std::string ToString(geometry_type geometry_type) { switch (geometry_type) { - case POINT: + case geometry_type::POINT: return "POINT"; - case LINESTRING: + case geometry_type::LINESTRING: return "LINESTRING"; - case POLYGON: + case geometry_type::POLYGON: return "POLYGON"; - case MULTIPOINT: + case geometry_type::MULTIPOINT: return "MULTIPOINT"; - case MULTILINESTRING: + case geometry_type::MULTILINESTRING: return "MULTILINESTRING"; - case MULTIPOLYGON: + case geometry_type::MULTIPOLYGON: return "MULTIPOLYGON"; - case GEOMETRYCOLLECTION: + case geometry_type::GEOMETRYCOLLECTION: return "GEOMETRYCOLLECTION"; default: return ""; @@ -299,7 +299,7 @@ struct BoundingBox { std::memcpy(min, mins.data(), sizeof(min)); std::memcpy(max, maxes.data(), sizeof(max)); } - explicit BoundingBox(Dimensions::dimensions dimensions = Dimensions::XYZM) + explicit BoundingBox(Dimensions::dimensions dimensions = Dimensions::dimensions::XYZM) : dimensions(dimensions), min{kInf, kInf, kInf, kInf}, max{-kInf, -kInf, -kInf, -kInf} {} @@ -322,7 +322,7 @@ struct BoundingBox { } return; - } else if (dimensions == Dimensions::XYZM) { + } else if (dimensions == Dimensions::dimensions::XYZM) { Merge(other.ToXYZM()); } else { ParquetException::NYI(); @@ -330,7 +330,7 @@ struct BoundingBox { } BoundingBox ToXYZM() const { - BoundingBox xyzm(Dimensions::XYZM); + BoundingBox xyzm(Dimensions::dimensions::XYZM); auto to_xyzm = Dimensions::ToXYZM(dimensions); for (int i = 0; i < 4; i++) { int dim_to_xyzm = to_xyzm[i]; @@ -447,31 +447,31 @@ class WKBGenericSequenceBounder { void ReadPoint(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_TRUE(!swap)) { switch (dimensions) { - case Dimensions::XY: + case Dimensions::dimensions::XY: xy_.ReadPoint(src); break; - case Dimensions::XYZ: + case Dimensions::dimensions::XYZ: xyz_.ReadPoint(src); break; - case Dimensions::XYM: + case Dimensions::dimensions::XYM: xym_.ReadPoint(src); break; - case Dimensions::XYZM: + case Dimensions::dimensions::XYZM: xyzm_.ReadPoint(src); break; } } else { switch (dimensions) { - case Dimensions::XY: + case Dimensions::dimensions::XY: xy_swap_.ReadPoint(src); break; - case Dimensions::XYZ: + case Dimensions::dimensions::XYZ: xyz_swap_.ReadPoint(src); break; - case Dimensions::XYM: + case Dimensions::dimensions::XYM: xym_swap_.ReadPoint(src); break; - case Dimensions::XYZM: + case Dimensions::dimensions::XYZM: xyzm_swap_.ReadPoint(src); break; } @@ -481,31 +481,31 @@ class WKBGenericSequenceBounder { void ReadSequence(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_TRUE(!swap)) { switch (dimensions) { - case Dimensions::XY: + case Dimensions::dimensions::XY: xy_.ReadSequence(src); break; - case Dimensions::XYZ: + case Dimensions::dimensions::XYZ: xyz_.ReadSequence(src); break; - case Dimensions::XYM: + case Dimensions::dimensions::XYM: xym_.ReadSequence(src); break; - case Dimensions::XYZM: + case Dimensions::dimensions::XYZM: xyzm_.ReadSequence(src); break; } } else { switch (dimensions) { - case Dimensions::XY: + case Dimensions::dimensions::XY: xy_swap_.ReadSequence(src); break; - case Dimensions::XYZ: + case Dimensions::dimensions::XYZ: xyz_swap_.ReadSequence(src); break; - case Dimensions::XYM: + case Dimensions::dimensions::XYM: xym_swap_.ReadSequence(src); break; - case Dimensions::XYZM: + case Dimensions::dimensions::XYZM: xyzm_swap_.ReadSequence(src); break; } @@ -515,31 +515,31 @@ class WKBGenericSequenceBounder { void ReadRings(WKBBuffer* src, Dimensions::dimensions dimensions, bool swap) { if (ARROW_PREDICT_TRUE(!swap)) { switch (dimensions) { - case Dimensions::XY: + case Dimensions::dimensions::XY: xy_.ReadRings(src); break; - case Dimensions::XYZ: + case Dimensions::dimensions::XYZ: xyz_.ReadRings(src); break; - case Dimensions::XYM: + case Dimensions::dimensions::XYM: xym_.ReadRings(src); break; - case Dimensions::XYZM: + case Dimensions::dimensions::XYZM: xyzm_.ReadRings(src); break; } } else { switch (dimensions) { - case Dimensions::XY: + case Dimensions::dimensions::XY: xy_swap_.ReadRings(src); break; - case Dimensions::XYZ: + case Dimensions::dimensions::XYZ: xyz_swap_.ReadRings(src); break; - case Dimensions::XYM: + case Dimensions::dimensions::XYM: xym_swap_.ReadRings(src); break; - case Dimensions::XYZM: + case Dimensions::dimensions::XYZM: xyzm_swap_.ReadRings(src); break; } @@ -570,19 +570,19 @@ class WKBGenericSequenceBounder { private: double chunk_[64]; - WKBSequenceBounder xy_; - WKBSequenceBounder xyz_; - WKBSequenceBounder xym_; - WKBSequenceBounder xyzm_; - WKBSequenceBounder xy_swap_; - WKBSequenceBounder xyz_swap_; - WKBSequenceBounder xym_swap_; - WKBSequenceBounder xyzm_swap_; + WKBSequenceBounder xy_; + WKBSequenceBounder xyz_; + WKBSequenceBounder xym_; + WKBSequenceBounder xyzm_; + WKBSequenceBounder xy_swap_; + WKBSequenceBounder xyz_swap_; + WKBSequenceBounder xym_swap_; + WKBSequenceBounder xyzm_swap_; }; class WKBGeometryBounder { public: - WKBGeometryBounder() : box_(Dimensions::XYZM) {} + WKBGeometryBounder() : box_(Dimensions::dimensions::XYZM) {} WKBGeometryBounder(const WKBGeometryBounder&) = default; void ReadGeometry(WKBBuffer* src, bool record_wkb_type = true) { @@ -599,29 +599,27 @@ class WKBGeometryBounder { // Keep track of geometry types encountered if at the top level if (record_wkb_type) { - GeometryType::geometry_type geometry_type = - GeometryType::FromWKB(wkb_geometry_type); - geometry_types_.insert(geometry_type); + geometry_types_.insert(static_cast(wkb_geometry_type)); } switch (geometry_type) { - case GeometryType::POINT: + case GeometryType::geometry_type::POINT: bounder_.ReadPoint(src, dimensions, swap); break; - case GeometryType::LINESTRING: + case GeometryType::geometry_type::LINESTRING: bounder_.ReadSequence(src, dimensions, swap); break; - case GeometryType::POLYGON: + case GeometryType::geometry_type::POLYGON: bounder_.ReadRings(src, dimensions, swap); break; // These are all encoded the same in WKB, even though this encoding would // allow for parts to be of a different geometry type or different dimensions. // For the purposes of bounding, this does not cause us problems. - case GeometryType::MULTIPOINT: - case GeometryType::MULTILINESTRING: - case GeometryType::MULTIPOLYGON: - case GeometryType::GEOMETRYCOLLECTION: { + case GeometryType::geometry_type::MULTIPOINT: + case GeometryType::geometry_type::MULTILINESTRING: + case GeometryType::geometry_type::MULTIPOLYGON: + case GeometryType::geometry_type::GEOMETRYCOLLECTION: { uint32_t n_parts = src->ReadUInt32(swap); for (uint32_t i = 0; i < n_parts; i++) { ReadGeometry(src, /*record_wkb_type*/ false); diff --git a/cpp/src/parquet/geometry_util_internal_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc index 3763e7ff5de2b..3a144f9e383f2 100644 --- a/cpp/src/parquet/geometry_util_internal_test.cc +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -26,69 +26,75 @@ namespace parquet::geometry { TEST(TestGeometryUtil, TestDimensions) { - EXPECT_EQ(Dimensions::size(Dimensions::XY), 2); - EXPECT_EQ(Dimensions::size(Dimensions::XYZ), 3); - EXPECT_EQ(Dimensions::size(Dimensions::XYM), 3); - EXPECT_EQ(Dimensions::size(Dimensions::XYZM), 4); + EXPECT_EQ(Dimensions::size(Dimensions::dimensions::XY), 2); + EXPECT_EQ(Dimensions::size(Dimensions::dimensions::XYZ), 3); + EXPECT_EQ(Dimensions::size(Dimensions::dimensions::XYM), 3); + EXPECT_EQ(Dimensions::size(Dimensions::dimensions::XYZM), 4); - EXPECT_EQ(Dimensions::ToString(Dimensions::XY), "XY"); - EXPECT_EQ(Dimensions::ToString(Dimensions::XYZ), "XYZ"); - EXPECT_EQ(Dimensions::ToString(Dimensions::XYM), "XYM"); - EXPECT_EQ(Dimensions::ToString(Dimensions::XYZM), "XYZM"); + EXPECT_EQ(Dimensions::ToString(Dimensions::dimensions::XY), "XY"); + EXPECT_EQ(Dimensions::ToString(Dimensions::dimensions::XYZ), "XYZ"); + EXPECT_EQ(Dimensions::ToString(Dimensions::dimensions::XYM), "XYM"); + EXPECT_EQ(Dimensions::ToString(Dimensions::dimensions::XYZM), "XYZM"); - EXPECT_EQ(Dimensions::FromWKB(1), Dimensions::XY); - EXPECT_EQ(Dimensions::FromWKB(1001), Dimensions::XYZ); - EXPECT_EQ(Dimensions::FromWKB(2001), Dimensions::XYM); - EXPECT_EQ(Dimensions::FromWKB(3001), Dimensions::XYZM); + EXPECT_EQ(Dimensions::FromWKB(1), Dimensions::dimensions::XY); + EXPECT_EQ(Dimensions::FromWKB(1001), Dimensions::dimensions::XYZ); + EXPECT_EQ(Dimensions::FromWKB(2001), Dimensions::dimensions::XYM); + EXPECT_EQ(Dimensions::FromWKB(3001), Dimensions::dimensions::XYZM); EXPECT_THROW(Dimensions::FromWKB(4001), ParquetException); } TEST(TestGeometryUtil, TestGeometryType) { - EXPECT_EQ(GeometryType::ToString(GeometryType::POINT), "POINT"); - EXPECT_EQ(GeometryType::ToString(GeometryType::LINESTRING), "LINESTRING"); - EXPECT_EQ(GeometryType::ToString(GeometryType::POLYGON), "POLYGON"); - EXPECT_EQ(GeometryType::ToString(GeometryType::MULTIPOINT), "MULTIPOINT"); - EXPECT_EQ(GeometryType::ToString(GeometryType::MULTILINESTRING), "MULTILINESTRING"); - EXPECT_EQ(GeometryType::ToString(GeometryType::MULTIPOLYGON), "MULTIPOLYGON"); - EXPECT_EQ(GeometryType::ToString(GeometryType::GEOMETRYCOLLECTION), + EXPECT_EQ(GeometryType::ToString(GeometryType::geometry_type::POINT), "POINT"); + EXPECT_EQ(GeometryType::ToString(GeometryType::geometry_type::LINESTRING), + "LINESTRING"); + EXPECT_EQ(GeometryType::ToString(GeometryType::geometry_type::POLYGON), "POLYGON"); + EXPECT_EQ(GeometryType::ToString(GeometryType::geometry_type::MULTIPOINT), + "MULTIPOINT"); + EXPECT_EQ(GeometryType::ToString(GeometryType::geometry_type::MULTILINESTRING), + "MULTILINESTRING"); + EXPECT_EQ(GeometryType::ToString(GeometryType::geometry_type::MULTIPOLYGON), + "MULTIPOLYGON"); + EXPECT_EQ(GeometryType::ToString(GeometryType::geometry_type::GEOMETRYCOLLECTION), "GEOMETRYCOLLECTION"); - EXPECT_EQ(GeometryType::FromWKB(1), GeometryType::POINT); - EXPECT_EQ(GeometryType::FromWKB(1001), GeometryType::POINT); - EXPECT_EQ(GeometryType::FromWKB(1002), GeometryType::LINESTRING); - EXPECT_EQ(GeometryType::FromWKB(1003), GeometryType::POLYGON); - EXPECT_EQ(GeometryType::FromWKB(1004), GeometryType::MULTIPOINT); - EXPECT_EQ(GeometryType::FromWKB(1005), GeometryType::MULTILINESTRING); - EXPECT_EQ(GeometryType::FromWKB(1006), GeometryType::MULTIPOLYGON); - EXPECT_EQ(GeometryType::FromWKB(1007), GeometryType::GEOMETRYCOLLECTION); + EXPECT_EQ(GeometryType::FromWKB(1), GeometryType::geometry_type::POINT); + EXPECT_EQ(GeometryType::FromWKB(1001), GeometryType::geometry_type::POINT); + EXPECT_EQ(GeometryType::FromWKB(1002), GeometryType::geometry_type::LINESTRING); + EXPECT_EQ(GeometryType::FromWKB(1003), GeometryType::geometry_type::POLYGON); + EXPECT_EQ(GeometryType::FromWKB(1004), GeometryType::geometry_type::MULTIPOINT); + EXPECT_EQ(GeometryType::FromWKB(1005), GeometryType::geometry_type::MULTILINESTRING); + EXPECT_EQ(GeometryType::FromWKB(1006), GeometryType::geometry_type::MULTIPOLYGON); + EXPECT_EQ(GeometryType::FromWKB(1007), GeometryType::geometry_type::GEOMETRYCOLLECTION); EXPECT_THROW(GeometryType::FromWKB(1100), ParquetException); } TEST(TestGeometryUtil, TestBoundingBox) { BoundingBox box; - EXPECT_EQ(box, BoundingBox(Dimensions::XYZM, {kInf, kInf, kInf, kInf}, + EXPECT_EQ(box, BoundingBox(Dimensions::dimensions::XYZM, {kInf, kInf, kInf, kInf}, {-kInf, -kInf, -kInf, -kInf})); EXPECT_EQ(box.ToString(), "BoundingBox XYZM [inf => -inf, inf => -inf, inf => -inf, inf => -inf]"); - BoundingBox box_xyzm(Dimensions::XYZM, {-1, -2, -3, -4}, {1, 2, 3, 4}); + BoundingBox box_xyzm(Dimensions::dimensions::XYZM, {-1, -2, -3, -4}, {1, 2, 3, 4}); - BoundingBox box_xy(Dimensions::XY, {-10, -20, kInf, kInf}, {10, 20, -kInf, -kInf}); - BoundingBox box_xyz(Dimensions::XYZ, {kInf, kInf, -30, kInf}, + BoundingBox box_xy(Dimensions::dimensions::XY, {-10, -20, kInf, kInf}, + {10, 20, -kInf, -kInf}); + BoundingBox box_xyz(Dimensions::dimensions::XYZ, {kInf, kInf, -30, kInf}, {-kInf, -kInf, 30, -kInf}); - BoundingBox box_xym(Dimensions::XYM, {kInf, kInf, -40, kInf}, + BoundingBox box_xym(Dimensions::dimensions::XYM, {kInf, kInf, -40, kInf}, {-kInf, -kInf, 40, -kInf}); box_xyzm.Merge(box_xy); - EXPECT_EQ(box_xyzm, BoundingBox(Dimensions::XYZM, {-10, -20, -3, -4}, {10, 20, 3, 4})); + EXPECT_EQ(box_xyzm, BoundingBox(Dimensions::dimensions::XYZM, {-10, -20, -3, -4}, + {10, 20, 3, 4})); box_xyzm.Merge(box_xyz); - EXPECT_EQ(box_xyzm, - BoundingBox(Dimensions::XYZM, {-10, -20, -30, -4}, {10, 20, 30, 4})); + EXPECT_EQ(box_xyzm, BoundingBox(Dimensions::dimensions::XYZM, {-10, -20, -30, -4}, + {10, 20, 30, 4})); box_xyzm.Merge(box_xym); - EXPECT_EQ(box_xyzm, - BoundingBox(Dimensions::XYZM, {-10, -20, -30, -40}, {10, 20, 30, 40})); + EXPECT_EQ(box_xyzm, BoundingBox(Dimensions::dimensions::XYZM, {-10, -20, -30, -40}, + {10, 20, 30, 40})); box_xyzm.Reset(); EXPECT_EQ(box_xyzm, BoundingBox()); @@ -143,8 +149,9 @@ TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { bounder.Flush(); EXPECT_EQ(bounder.Bounds(), item.box); - EXPECT_THAT(bounder.GeometryTypes(), - ::testing::ElementsAre(::testing::Eq(item.geometry_type))); + uint32_t wkb_type = + static_cast(item.dimensions) * 1000 + static_cast(item.geometry_type); + EXPECT_THAT(bounder.GeometryTypes(), ::testing::ElementsAre(::testing::Eq(wkb_type))); bounder.Reset(); EXPECT_EQ(bounder.Bounds(), BoundingBox()); @@ -155,31 +162,31 @@ INSTANTIATE_TEST_SUITE_P( TestGeometryUtil, WKBTestFixture, ::testing::Values( // POINT (30 10) - WKBTestCase(GeometryType::POINT, Dimensions::XY, + WKBTestCase(GeometryType::geometry_type::POINT, Dimensions::dimensions::XY, {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {30, 10, 30, 10}), // POINT Z (30 10 40) - WKBTestCase(GeometryType::POINT, Dimensions::XYZ, + WKBTestCase(GeometryType::geometry_type::POINT, Dimensions::dimensions::XYZ, {0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {30, 10, 40, 30, 10, 40}), // POINT M (30 10 300) - WKBTestCase(GeometryType::POINT, Dimensions::XYM, + WKBTestCase(GeometryType::geometry_type::POINT, Dimensions::dimensions::XYM, {0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 300, 30, 10, 300}), // POINT ZM (30 10 40 300) - WKBTestCase(GeometryType::POINT, Dimensions::XYZM, + WKBTestCase(GeometryType::geometry_type::POINT, Dimensions::dimensions::XYZM, {0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 40, 300, 30, 10, 40, 300}), // LINESTRING (30 10, 10 30, 40 40) - WKBTestCase(GeometryType::LINESTRING, Dimensions::XY, + WKBTestCase(GeometryType::geometry_type::LINESTRING, Dimensions::dimensions::XY, {0x01, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -188,7 +195,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {10, 10, 40, 40}), // LINESTRING Z (30 10 40, 10 30 40, 40 40 80) - WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZ, + WKBTestCase(GeometryType::geometry_type::LINESTRING, Dimensions::dimensions::XYZ, {0x01, 0xea, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, @@ -199,7 +206,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x54, 0x40}, {10, 10, 40, 40, 40, 80}), // LINESTRING M (30 10 300, 10 30 300, 40 40 1600) - WKBTestCase(GeometryType::LINESTRING, Dimensions::XYM, + WKBTestCase(GeometryType::geometry_type::LINESTRING, Dimensions::dimensions::XYM, {0x01, 0xd2, 0x07, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, @@ -210,7 +217,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x99, 0x40}, {10, 10, 300, 40, 40, 1600}), // LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600) - WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZM, + WKBTestCase(GeometryType::geometry_type::LINESTRING, Dimensions::dimensions::XYZM, {0x01, 0xba, 0x0b, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, @@ -223,7 +230,7 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, {10, 10, 40, 300, 40, 40, 80, 1600}), // POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)) - WKBTestCase(GeometryType::POLYGON, Dimensions::XY, + WKBTestCase(GeometryType::geometry_type::POLYGON, Dimensions::dimensions::XY, {0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, @@ -236,7 +243,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 40, 40}), // POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)) WKBTestCase( - GeometryType::POLYGON, Dimensions::XYZ, + GeometryType::geometry_type::POLYGON, Dimensions::dimensions::XYZ, {0x01, 0xeb, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, @@ -251,7 +258,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 30, 40, 40, 80}), // POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)) WKBTestCase( - GeometryType::POLYGON, Dimensions::XYM, + GeometryType::geometry_type::POLYGON, Dimensions::dimensions::XYM, {0x01, 0xd3, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, @@ -267,7 +274,7 @@ INSTANTIATE_TEST_SUITE_P( // POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 // 300)) WKBTestCase( - GeometryType::POLYGON, Dimensions::XYZM, + GeometryType::geometry_type::POLYGON, Dimensions::dimensions::XYZM, {0x01, 0xbb, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, @@ -284,27 +291,27 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0xc0, 0x72, 0x40}, {10, 10, 30, 200, 40, 40, 80, 1600}), // MULTIPOINT ((30 10)) - WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XY, + WKBTestCase(GeometryType::geometry_type::MULTIPOINT, Dimensions::dimensions::XY, {0x01, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {30, 10, 30, 10}), // MULTIPOINT Z ((30 10 40)) - WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZ, + WKBTestCase(GeometryType::geometry_type::MULTIPOINT, Dimensions::dimensions::XYZ, {0x01, 0xec, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {30, 10, 40, 30, 10, 40}), // MULTIPOINT M ((30 10 300)) - WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYM, + WKBTestCase(GeometryType::geometry_type::MULTIPOINT, Dimensions::dimensions::XYM, {0x01, 0xd4, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 300, 30, 10, 300}), // MULTIPOINT ZM ((30 10 40 300)) - WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZM, + WKBTestCase(GeometryType::geometry_type::MULTIPOINT, Dimensions::dimensions::XYZM, {0x01, 0xbc, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, @@ -312,7 +319,8 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 40, 300, 30, 10, 40, 300}), // MULTILINESTRING ((30 10, 10 30, 40 40)) - WKBTestCase(GeometryType::MULTILINESTRING, Dimensions::XY, + WKBTestCase(GeometryType::geometry_type::MULTILINESTRING, + Dimensions::dimensions::XY, {0x01, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, @@ -322,7 +330,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 40, 40}), // MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80)) WKBTestCase( - GeometryType::MULTILINESTRING, Dimensions::XYZ, + GeometryType::geometry_type::MULTILINESTRING, Dimensions::dimensions::XYZ, {0x01, 0xed, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xea, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -333,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 40, 40, 40, 80}), // MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600)) WKBTestCase( - GeometryType::MULTILINESTRING, Dimensions::XYM, + GeometryType::geometry_type::MULTILINESTRING, Dimensions::dimensions::XYM, {0x01, 0xd5, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd2, 0x07, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -344,7 +352,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 300, 40, 40, 1600}), // MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600)) WKBTestCase( - GeometryType::MULTILINESTRING, Dimensions::XYZM, + GeometryType::geometry_type::MULTILINESTRING, Dimensions::dimensions::XYZM, {0x01, 0xbd, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xba, 0x0b, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -357,7 +365,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 40, 300, 40, 40, 80, 1600}), // MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))) WKBTestCase( - GeometryType::MULTIPOLYGON, Dimensions::XY, + GeometryType::geometry_type::MULTIPOLYGON, Dimensions::dimensions::XY, {0x01, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, @@ -369,7 +377,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 40, 40}), // MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))) WKBTestCase( - GeometryType::MULTIPOLYGON, Dimensions::XYZ, + GeometryType::geometry_type::MULTIPOLYGON, Dimensions::dimensions::XYZ, {0x01, 0xee, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xeb, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, @@ -384,7 +392,7 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 30, 40, 40, 80}), // MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))) WKBTestCase( - GeometryType::MULTIPOLYGON, Dimensions::XYM, + GeometryType::geometry_type::MULTIPOLYGON, Dimensions::dimensions::XYM, {0x01, 0xd6, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd3, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, @@ -399,7 +407,8 @@ INSTANTIATE_TEST_SUITE_P( {10, 10, 200, 40, 40, 1600}), // MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 // 10 40 300))) - WKBTestCase(GeometryType::MULTIPOLYGON, Dimensions::XYZM, + WKBTestCase(GeometryType::geometry_type::MULTIPOLYGON, + Dimensions::dimensions::XYZM, {0x01, 0xbe, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xbb, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, @@ -419,27 +428,31 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {10, 10, 30, 200, 40, 40, 80, 1600}), // GEOMETRYCOLLECTION (POINT (30 10)) - WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XY, + WKBTestCase(GeometryType::geometry_type::GEOMETRYCOLLECTION, + Dimensions::dimensions::XY, {0x01, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, {30, 10, 30, 10}), // GEOMETRYCOLLECTION Z (POINT Z (30 10 40)) - WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZ, + WKBTestCase(GeometryType::geometry_type::GEOMETRYCOLLECTION, + Dimensions::dimensions::XYZ, {0x01, 0xef, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, {30, 10, 40, 30, 10, 40}), // GEOMETRYCOLLECTION M (POINT M (30 10 300)) - WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYM, + WKBTestCase(GeometryType::geometry_type::GEOMETRYCOLLECTION, + Dimensions::dimensions::XYM, {0x01, 0xd7, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 300, 30, 10, 300}), // GEOMETRYCOLLECTION ZM (POINT ZM (30 10 40 300)) - WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZM, + WKBTestCase(GeometryType::geometry_type::GEOMETRYCOLLECTION, + Dimensions::dimensions::XYZM, {0x01, 0xbf, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index d06efd59e9075..9d265fb625e75 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -846,8 +846,8 @@ static constexpr int kWkbPointSize = 21; // 1:endianness + 4:type + 8:x + 8:y inline int GenerateWKBPoint(uint8_t* ptr, double x, double y) { ptr[0] = kWkbNativeEndianness; - uint32_t geom_type = - geometry::GeometryType::ToWKB(geometry::GeometryType::POINT, false, false); + uint32_t geom_type = geometry::GeometryType::ToWKB( + geometry::GeometryType::geometry_type::POINT, false, false); memcpy(&ptr[1], &geom_type, 4); memcpy(&ptr[5], &x, 8); memcpy(&ptr[13], &y, 8); @@ -861,8 +861,8 @@ inline bool GetWKBPointCoordinate(const ByteArray& value, double* out_x, double* if (value.ptr[0] != kWkbNativeEndianness) { return false; } - uint32_t expected_geom_type = - geometry::GeometryType::ToWKB(geometry::GeometryType::POINT, false, false); + uint32_t expected_geom_type = geometry::GeometryType::ToWKB( + geometry::GeometryType::geometry_type::POINT, false, false); uint32_t geom_type = 0; memcpy(&geom_type, &value.ptr[1], 4); if (geom_type != expected_geom_type) { From 31a70c5174d27db33604e698b75a5fa7e5510f6a Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 19 Sep 2024 17:52:39 +0800 Subject: [PATCH 53/61] Revert more special case handling for unknown sort order --- cpp/src/parquet/statistics.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index c4ad11e8495c4..3c5655500df15 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -853,7 +853,6 @@ LogicalType::Type::type LogicalTypeId(const ColumnDescriptor* descr) { } return LogicalType::Type::NONE; } - LogicalType::Type::type LogicalTypeId(const Statistics& stats) { return LogicalTypeId(stats.descr()); } @@ -1331,12 +1330,7 @@ std::shared_ptr DoMakeComparator(Type::type physical_type, ParquetException::NYI("Unsigned Compare not implemented"); } } else { - if (logical_type == LogicalType::Type::GEOMETRY && - physical_type == Type::BYTE_ARRAY) { - return std::make_shared>(); - } else { - throw ParquetException("UNKNOWN Sort Order"); - } + throw ParquetException("UNKNOWN Sort Order"); } return nullptr; } From 7dfbf4bd6716f9303022d5d21aab51aa99fd4af9 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 19 Sep 2024 20:59:56 +0800 Subject: [PATCH 54/61] Fix WKB covering test to take native endianness into consideration --- cpp/src/parquet/geometry_util_internal.h | 33 ++++++++++++++++++- .../parquet/geometry_util_internal_test.cc | 11 +++++++ cpp/src/parquet/test_util.h | 12 +++---- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index d903d06c747d9..0f6cafdf2c8e0 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -657,13 +657,44 @@ class WKBGeometryBounder { std::unordered_set geometry_types_; }; +#if defined(ARROW_LITTLE_ENDIAN) +static constexpr int kWkbNativeEndianness = geometry::WKBBuffer::WKB_LITTLE_ENDIAN; +#else +static constexpr int kWkbNativeEndianness = geometry::WKBBuffer::WKB_BIG_ENDIAN; +#endif + +static inline std::string MakeWKBPoint(const double* xyzm, bool has_z, bool has_m) { + // 1:endianness + 4:type + 8:x + 8:y + int num_bytes = 21 + (has_z ? 8 : 0) + (has_m ? 8 : 0); + std::string wkb(num_bytes, 0); + char* ptr = wkb.data(); + + ptr[0] = kWkbNativeEndianness; + uint32_t geom_type = geometry::GeometryType::ToWKB( + geometry::GeometryType::geometry_type::POINT, has_z, has_m); + memcpy(&ptr[1], &geom_type, 4); + memcpy(&ptr[5], &xyzm[0], 8); + memcpy(&ptr[13], &xyzm[1], 8); + ptr += 21; + + if (has_z) { + memcpy(ptr, &xyzm[2], 8); + ptr += 8; + } + if (has_m) { + memcpy(ptr, &xyzm[3], 8); + } + + return wkb; +} + static inline std::string MakeCoveringWKBFromBound(double xmin, double xmax, double ymin, double ymax) { std::string wkb_data(93, 0); // endianness and header auto data = reinterpret_cast(wkb_data.data()); - data[0] = ARROW_LITTLE_ENDIAN; + data[0] = kWkbNativeEndianness; uint32_t wkb_type = 3; // POLYGON memcpy(&data[1], &wkb_type, 4); diff --git a/cpp/src/parquet/geometry_util_internal_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc index 3a144f9e383f2..27e1f338a89c0 100644 --- a/cpp/src/parquet/geometry_util_internal_test.cc +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -463,6 +463,7 @@ INSTANTIATE_TEST_SUITE_P( TEST(TestGeometryUtil, MakeCoveringWKBFromBound) { std::string wkb_covering = MakeCoveringWKBFromBound(10, 20, 30, 40); // POLYGON ((10 30, 20 30, 20 40, 10 40, 10 30)) +#ifdef ARROW_LITTLE_ENDIAN std::vector expected_wkb = { 0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, @@ -471,6 +472,16 @@ TEST(TestGeometryUtil, MakeCoveringWKBFromBound) { 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40}; +#else + std::vector expected_wkb = { + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x40, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x40, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x3e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x40, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x44, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, + 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x40, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +#endif EXPECT_EQ(expected_wkb.size(), wkb_covering.size()); EXPECT_EQ(0, memcmp(wkb_covering.data(), expected_wkb.data(), expected_wkb.size())); } diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index 9d265fb625e75..55a56e132f650 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -844,14 +844,10 @@ static constexpr int kWkbNativeEndianness = geometry::WKBBuffer::WKB_BIG_ENDIAN; static constexpr int kWkbPointSize = 21; // 1:endianness + 4:type + 8:x + 8:y -inline int GenerateWKBPoint(uint8_t* ptr, double x, double y) { - ptr[0] = kWkbNativeEndianness; - uint32_t geom_type = geometry::GeometryType::ToWKB( - geometry::GeometryType::geometry_type::POINT, false, false); - memcpy(&ptr[1], &geom_type, 4); - memcpy(&ptr[5], &x, 8); - memcpy(&ptr[13], &y, 8); - return kWkbPointSize; +inline void GenerateWKBPoint(uint8_t* ptr, double x, double y) { + double xyzm[] = {x, y, geometry::kInf, geometry::kInf}; + std::string wkb = geometry::MakeWKBPoint(xyzm, false, false); + memcpy(ptr, wkb.data(), kWkbPointSize); } inline bool GetWKBPointCoordinate(const ByteArray& value, double* out_x, double* out_y) { From 5d8ab77df302d431e433e84e2e25420a541e16a5 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 19 Sep 2024 20:54:50 +0800 Subject: [PATCH 55/61] min/max of geometry columns are the WKB representation of lower-left and upper-right points --- cpp/src/parquet/column_writer_test.cc | 13 ++++++ .../parquet/geometry_util_internal_test.cc | 44 +++++++++++++++++++ cpp/src/parquet/reader_test.cc | 19 +++++++- cpp/src/parquet/statistics.cc | 44 ++++++++++++++++--- cpp/src/parquet/statistics.h | 4 +- 5 files changed, 116 insertions(+), 8 deletions(-) diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 7a8038e80651a..093916651607f 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1855,6 +1855,19 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_FALSE(geometry_statistics->HasZ()); EXPECT_FALSE(geometry_statistics->HasM()); + auto byte_array_statistics = + std::static_pointer_cast(statistics); + double min_x = 0; + double min_y = 0; + double max_x = 0; + double max_y = 0; + GetWKBPointCoordinate(byte_array_statistics->min(), &min_x, &min_y); + GetWKBPointCoordinate(byte_array_statistics->max(), &max_x, &max_y); + EXPECT_DOUBLE_EQ(0, min_x); + EXPECT_DOUBLE_EQ(1, min_y); + EXPECT_DOUBLE_EQ(99, max_x); + EXPECT_DOUBLE_EQ(100, max_y); + auto coverings = geometry_statistics->GetCoverings(); EXPECT_EQ(1, coverings.size()); EXPECT_EQ("WKB", coverings[0].first); diff --git a/cpp/src/parquet/geometry_util_internal_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc index 27e1f338a89c0..ea5edc4b4d688 100644 --- a/cpp/src/parquet/geometry_util_internal_test.cc +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include "arrow/testing/gtest_compat.h" @@ -486,4 +487,47 @@ TEST(TestGeometryUtil, MakeCoveringWKBFromBound) { EXPECT_EQ(0, memcmp(wkb_covering.data(), expected_wkb.data(), expected_wkb.size())); } +struct MakeWKBPointTestCase { + MakeWKBPointTestCase() = default; + MakeWKBPointTestCase(const std::vector xyzm, bool has_z, bool has_m) + : has_z(has_z), has_m(has_m) { + memcpy(this->xyzm, xyzm.data(), sizeof(this->xyzm)); + } + + double xyzm[4]; + bool has_z; + bool has_m; +}; + +class MakeWKBPointTestFixture : public testing::TestWithParam {}; + +TEST_P(MakeWKBPointTestFixture, MakeWKBPoint) { + auto param = GetParam(); + std::string wkb = MakeWKBPoint(param.xyzm, param.has_z, param.has_m); + WKBGeometryBounder bounder; + WKBBuffer buf(reinterpret_cast(wkb.data()), wkb.size()); + bounder.ReadGeometry(&buf); + bounder.Flush(); + const double* mins = bounder.Bounds().min; + EXPECT_DOUBLE_EQ(param.xyzm[0], mins[0]); + EXPECT_DOUBLE_EQ(param.xyzm[1], mins[1]); + if (param.has_z) { + EXPECT_DOUBLE_EQ(param.xyzm[2], mins[2]); + } else { + EXPECT_TRUE(std::isinf(mins[2])); + } + if (param.has_m) { + EXPECT_DOUBLE_EQ(param.xyzm[3], mins[3]); + } else { + EXPECT_TRUE(std::isinf(mins[3])); + } +} + +INSTANTIATE_TEST_SUITE_P( + TestGeometryUtil, MakeWKBPointTestFixture, + ::testing::Values(MakeWKBPointTestCase({30, 10, 40, 300}, false, false), + MakeWKBPointTestCase({30, 10, 40, 300}, true, false), + MakeWKBPointTestCase({30, 10, 40, 300}, false, true), + MakeWKBPointTestCase({30, 10, 40, 300}, true, true))); + } // namespace parquet::geometry diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 4eca392030b45..4bee9570e2022 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1984,7 +1984,11 @@ class TestGeometryLogicalType : public ::testing::Test { EXPECT_FALSE(geometry_column_index->geometry_statistics().empty()); double last_xmin = -geometry::kInf; double last_ymin = -geometry::kInf; - for (const auto& geom_stats : geometry_column_index->geometry_statistics()) { + + size_t num_pages = geometry_column_index->geometry_statistics().size(); + EXPECT_GT(num_pages, 0); + for (size_t i = 0; i < num_pages; i++) { + const auto& geom_stats = geometry_column_index->geometry_statistics()[i]; std::vector geometry_types = geom_stats.GetGeometryTypes(); EXPECT_EQ(1, geometry_types.size()); EXPECT_EQ(1, geometry_types[0]); @@ -1998,6 +2002,19 @@ class TestGeometryLogicalType : public ::testing::Test { EXPECT_EQ("WKB", geom_stats.GetCoverings().front().first); last_xmin = geom_stats.GetXMin(); last_ymin = geom_stats.GetYMin(); + + const auto& min = geometry_column_index->min_values()[i]; + const auto& max = geometry_column_index->max_values()[i]; + double min_x = 0; + double min_y = 0; + double max_x = 0; + double max_y = 0; + test::GetWKBPointCoordinate(min, &min_x, &min_y); + test::GetWKBPointCoordinate(max, &max_x, &max_y); + EXPECT_DOUBLE_EQ(geom_stats.GetXMin(), min_x); + EXPECT_DOUBLE_EQ(geom_stats.GetYMin(), min_y); + EXPECT_DOUBLE_EQ(geom_stats.GetXMax(), max_x); + EXPECT_DOUBLE_EQ(geom_stats.GetYMax(), max_y); } } diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 3c5655500df15..8714d06b29019 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -181,6 +181,20 @@ class GeometryStatisticsImpl { return out; } + std::string EncodeMin() const { + const double* mins = bounder_.Bounds().min; + bool has_z = !std::isinf(mins[2]); + bool has_m = !std::isinf(mins[3]); + return geometry::MakeWKBPoint(mins, has_z, has_m); + } + + std::string EncodeMax() const { + const double* maxes = bounder_.Bounds().max; + bool has_z = !std::isinf(maxes[2]); + bool has_m = !std::isinf(maxes[3]); + return geometry::MakeWKBPoint(maxes, has_z, has_m); + } + void Update(const EncodedGeometryStatistics& encoded) { if (!is_valid_) { return; @@ -282,7 +296,11 @@ void GeometryStatistics::Reset() { impl_->Reset(); } bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } -EncodedGeometryStatistics GeometryStatistics::Encode() { return impl_->Encode(); } +EncodedGeometryStatistics GeometryStatistics::Encode() const { return impl_->Encode(); } + +std::string GeometryStatistics::EncodeMin() const { return impl_->EncodeMin(); } + +std::string GeometryStatistics::EncodeMax() const { return impl_->EncodeMax(); } void GeometryStatistics::Decode(const EncodedGeometryStatistics& encoded) { impl_->Update(encoded); @@ -1051,15 +1069,17 @@ class TypedStatisticsImpl : public TypedStatistics { return; } - SetMinMaxPair(comparator_->GetMinMax(values)); - if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { if (geometry_statistics_ == nullptr) { geometry_statistics_ = std::make_unique(); } geometry_statistics_->Update(values); + } else { + SetMinMaxPair(comparator_->GetMinMax(values)); } + } else { + SetMinMaxPair(comparator_->GetMinMax(values)); } } @@ -1099,6 +1119,11 @@ class TypedStatisticsImpl : public TypedStatistics { } if (HasGeometryStatistics() && geometry_statistics_->is_valid()) { s.set_geometry(geometry_statistics_->Encode()); + + // Override min and max with the WKB representation of lower-left corner + // and upper-right corner + s.set_min(geometry_statistics_->EncodeMin()); + s.set_max(geometry_statistics_->EncodeMax()); } return s; } @@ -1217,7 +1242,6 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, IncrementNumValues(num_values); if (num_values == 0) return; - SetMinMaxPair(comparator_->GetMinMax(values, num_values)); if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { @@ -1225,7 +1249,11 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, geometry_statistics_ = std::make_unique(); } geometry_statistics_->Update(values, num_values, null_count); + } else { + SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } + } else { + SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } } @@ -1241,8 +1269,6 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va IncrementNumValues(num_values); if (num_values == 0) return; - SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, - valid_bits_offset)); if constexpr (std::is_same::value) { if (logical_type_ == LogicalType::Type::GEOMETRY) { @@ -1251,7 +1277,13 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va } geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, num_values, null_count); + } else { + SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, + valid_bits_offset)); } + } else { + SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, + valid_bits_offset)); } } diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index ba04841fde2ed..0840fb59f7c1c 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -163,7 +163,9 @@ class PARQUET_EXPORT GeometryStatistics { void Reset(); - EncodedGeometryStatistics Encode(); + EncodedGeometryStatistics Encode() const; + std::string EncodeMin() const; + std::string EncodeMax() const; bool is_valid() const; From ae5926fb56814bc7c92f71450397758a7e595498 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Fri, 20 Sep 2024 10:38:09 +0800 Subject: [PATCH 56/61] Address latest review comments --- cpp/src/parquet/column_reader.cc | 6 +-- cpp/src/parquet/metadata.cc | 10 ++--- cpp/src/parquet/statistics.cc | 60 +++++++++--------------------- cpp/src/parquet/statistics.h | 47 +++-------------------- cpp/src/parquet/statistics_test.cc | 8 ++-- 5 files changed, 34 insertions(+), 97 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index b25235970da6d..6b71338477af4 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -244,11 +244,7 @@ EncodedStatistics ExtractStatsFromHeader(const H& header) { page_statistics.set_distinct_count(stats.distinct_count); } if (stats.__isset.geometry_stats) { - EncodedGeometryStatistics encoded_geometry_stats; - if (stats.__isset.geometry_stats) { - encoded_geometry_stats = FromThrift(stats.geometry_stats); - } - page_statistics.set_geometry(encoded_geometry_stats); + page_statistics.set_geometry(FromThrift(stats.geometry_stats)); } return page_statistics; } diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 9d4f4176d3058..0aa04e6f912ab 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -94,29 +94,29 @@ static std::shared_ptr MakeTypedColumnStats( const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { // If ColumnOrder is defined, return max_value and min_value EncodedGeometryStatistics encoded_geometry_stats; + const EncodedGeometryStatistics* geometry_statistics = nullptr; if (metadata.statistics.__isset.geometry_stats) { encoded_geometry_stats = FromThrift(metadata.statistics.geometry_stats); + geometry_statistics = &encoded_geometry_stats; } if (descr->column_order().get_order() == ColumnOrder::TYPE_DEFINED_ORDER) { return MakeStatistics( descr, metadata.statistics.min_value, metadata.statistics.max_value, metadata.num_values - metadata.statistics.null_count, metadata.statistics.null_count, metadata.statistics.distinct_count, - encoded_geometry_stats, metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value, metadata.statistics.__isset.null_count, - metadata.statistics.__isset.distinct_count, - metadata.statistics.__isset.geometry_stats); + metadata.statistics.__isset.distinct_count, ::arrow::default_memory_pool(), + geometry_statistics); } // Default behavior return MakeStatistics( descr, metadata.statistics.min, metadata.statistics.max, metadata.num_values - metadata.statistics.null_count, metadata.statistics.null_count, metadata.statistics.distinct_count, - encoded_geometry_stats, metadata.statistics.__isset.max && metadata.statistics.__isset.min, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, - metadata.statistics.__isset.geometry_stats); + ::arrow::default_memory_pool(), geometry_statistics); } std::shared_ptr MakeColumnStats(const format::ColumnMetaData& meta_data, diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 8714d06b29019..60702b39f0d2d 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -910,7 +910,8 @@ class TypedStatisticsImpl : public TypedStatistics { TypedStatisticsImpl(const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, int64_t distinct_count, bool has_min_max, - bool has_null_count, bool has_distinct_count, MemoryPool* pool) + bool has_null_count, bool has_distinct_count, MemoryPool* pool, + const EncodedGeometryStatistics* geometry_statistics) : TypedStatisticsImpl(descr, pool) { TypedStatisticsImpl::IncrementNumValues(num_values); if (has_null_count) { @@ -931,21 +932,10 @@ class TypedStatisticsImpl : public TypedStatistics { PlainDecode(encoded_max, &max_); } has_min_max_ = has_min_max; - } - // Create stats from a thrift Statistics object. - TypedStatisticsImpl(const ColumnDescriptor* descr, const std::string& encoded_min, - const std::string& encoded_max, int64_t num_values, - int64_t null_count, int64_t distinct_count, - const EncodedGeometryStatistics& geometry_statistics, - bool has_min_max, bool has_null_count, bool has_distinct_count, - bool has_geometry_statistics, MemoryPool* pool) - : TypedStatisticsImpl(descr, encoded_min, encoded_max, num_values, null_count, - distinct_count, has_min_max, has_null_count, - has_distinct_count, pool) { - if (has_geometry_statistics) { + if (geometry_statistics != nullptr) { geometry_statistics_ = std::make_shared(); - geometry_statistics_->Decode(geometry_statistics); + geometry_statistics_->Decode(*geometry_statistics); } } @@ -1437,26 +1427,29 @@ std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, int64_t num_values, ::arrow::MemoryPool* pool) { DCHECK(encoded_stats != nullptr); + + const EncodedGeometryStatistics* geometry_statistics = nullptr; + if (encoded_stats->has_geometry_statistics) { + geometry_statistics = &encoded_stats->geometry_statistics(); + } return Make(descr, encoded_stats->min(), encoded_stats->max(), num_values, encoded_stats->null_count, encoded_stats->distinct_count, - encoded_stats->geometry_statistics(), encoded_stats->has_min && encoded_stats->has_max, - encoded_stats->has_null_count, encoded_stats->has_distinct_count, - encoded_stats->has_geometry_statistics, pool); + encoded_stats->has_null_count, encoded_stats->has_distinct_count, pool, + geometry_statistics); } -std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, - const std::string& encoded_min, - const std::string& encoded_max, - int64_t num_values, int64_t null_count, - int64_t distinct_count, bool has_min_max, - bool has_null_count, bool has_distinct_count, - ::arrow::MemoryPool* pool) { +std::shared_ptr Statistics::Make( + const ColumnDescriptor* descr, const std::string& encoded_min, + const std::string& encoded_max, int64_t num_values, int64_t null_count, + int64_t distinct_count, bool has_min_max, bool has_null_count, + bool has_distinct_count, ::arrow::MemoryPool* pool, + const EncodedGeometryStatistics* geometry_statistics) { #define MAKE_STATS(CAP_TYPE, KLASS) \ case Type::CAP_TYPE: \ return std::make_shared>( \ descr, encoded_min, encoded_max, num_values, null_count, distinct_count, \ - has_min_max, has_null_count, has_distinct_count, pool) + has_min_max, has_null_count, has_distinct_count, pool, geometry_statistics) switch (descr->physical_type()) { MAKE_STATS(BOOLEAN, BooleanType); @@ -1474,21 +1467,4 @@ std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, return nullptr; } -std::shared_ptr Statistics::Make( - const ColumnDescriptor* descr, const std::string& encoded_min, - const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, - bool has_min_max, bool has_null_count, bool has_distinct_count, - bool has_geometry_statistics, ::arrow::MemoryPool* pool) { - if (descr->physical_type() == Type::BYTE_ARRAY) { - return std::make_shared>( - descr, encoded_min, encoded_max, num_values, null_count, distinct_count, - geometry_statistics, has_min_max, has_null_count, has_distinct_count, - has_geometry_statistics, pool); - } else { - return Make(descr, encoded_min, encoded_max, num_values, null_count, distinct_count, - has_min_max, has_null_count, has_distinct_count, pool); - } -} - } // namespace parquet diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 0840fb59f7c1c..bef4bdf811dab 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -308,34 +308,13 @@ class PARQUET_EXPORT Statistics { /// \param[in] has_null_count whether the null_count statistics are set /// \param[in] has_distinct_count whether the distinct_count statistics are set /// \param[in] pool a memory pool to use for any memory allocations, optional - static std::shared_ptr Make( - const ColumnDescriptor* descr, const std::string& encoded_min, - const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, bool has_min_max, bool has_null_count, - bool has_distinct_count, - ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); - - /// \brief Create a new statistics instance given a column schema - /// definition and preexisting state - /// \param[in] descr the column schema - /// \param[in] encoded_min the encoded minimum value - /// \param[in] encoded_max the encoded maximum value - /// \param[in] num_values total number of values - /// \param[in] null_count number of null values - /// \param[in] distinct_count number of distinct values /// \param[in] geometry_statistics the geometry statistics - /// \param[in] has_min_max whether the min/max statistics are set - /// \param[in] has_null_count whether the null_count statistics are set - /// \param[in] has_distinct_count whether the distinct_count statistics are set - /// \param[in] has_geometry_statistics whether the geometry statistics are set - /// \param[in] pool a memory pool to use for any memory allocations, optional static std::shared_ptr Make( const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, - bool has_min_max, bool has_null_count, bool has_distinct_count, - bool has_geometry_statistics, - ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); + int64_t distinct_count, bool has_min_max, bool has_null_count, + bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), + const EncodedGeometryStatistics* geometry_statistics = NULLPTR); // Helper function to convert EncodedStatistics to Statistics. // EncodedStatistics does not contain number of non-null values, and it can be @@ -491,25 +470,11 @@ std::shared_ptr> MakeStatistics( const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, int64_t distinct_count, bool has_min_max, bool has_null_count, - bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { + bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), + const EncodedGeometryStatistics* geometry_statistics = NULLPTR) { return std::static_pointer_cast>(Statistics::Make( descr, encoded_min, encoded_max, num_values, null_count, distinct_count, - has_min_max, has_null_count, has_distinct_count, pool)); -} - -/// \brief Typed version of Statistics::Make -template -std::shared_ptr> MakeStatistics( - const ColumnDescriptor* descr, const std::string& encoded_min, - const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, const EncodedGeometryStatistics& geometry_statistics, - bool has_min_max, bool has_null_count, bool has_distinct_count, - bool has_geometry_statistics, - ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { - return std::static_pointer_cast>( - Statistics::Make(descr, encoded_min, encoded_max, num_values, null_count, - distinct_count, geometry_statistics, has_min_max, has_null_count, - has_distinct_count, has_geometry_statistics, pool)); + has_min_max, has_null_count, has_distinct_count, pool, geometry_statistics)); } } // namespace parquet diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 5df1a3340c13a..dad414ac89b47 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -320,7 +320,7 @@ class TestStatistics : public PrimitiveTypedTest { auto statistics2 = MakeStatistics(this->schema_.Column(0), encoded_min, encoded_max, - this->values_.size(), 0, 0, {}, true, true, true, false); + this->values_.size(), 0, 0, true, true, true); auto statistics3 = MakeStatistics(this->schema_.Column(0)); std::vector valid_bits( @@ -541,9 +541,9 @@ void TestStatistics::TestMinMaxEncode() { std::string(reinterpret_cast(statistics1->max().ptr), statistics1->max().len)); - auto statistics2 = MakeStatistics(this->schema_.Column(0), encoded_min, - encoded_max, this->values_.size(), 0, - 0, {}, true, true, true, false); + auto statistics2 = + MakeStatistics(this->schema_.Column(0), encoded_min, encoded_max, + this->values_.size(), 0, 0, true, true, true); ASSERT_EQ(encoded_min, statistics2->EncodeMin()); ASSERT_EQ(encoded_max, statistics2->EncodeMax()); From 672f19cd08c0739c55fb32f880832648ccfe7823 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Fri, 20 Sep 2024 17:35:42 +0800 Subject: [PATCH 57/61] A better implementation of geometry min/max statistics --- cpp/src/parquet/statistics.cc | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 60702b39f0d2d..8821130a341ca 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -1065,6 +1065,7 @@ class TypedStatisticsImpl : public TypedStatistics { geometry_statistics_ = std::make_unique(); } geometry_statistics_->Update(values); + SetGeometryMinMax(); } else { SetMinMaxPair(comparator_->GetMinMax(values)); } @@ -1109,11 +1110,6 @@ class TypedStatisticsImpl : public TypedStatistics { } if (HasGeometryStatistics() && geometry_statistics_->is_valid()) { s.set_geometry(geometry_statistics_->Encode()); - - // Override min and max with the WKB representation of lower-left corner - // and upper-right corner - s.set_min(geometry_statistics_->EncodeMin()); - s.set_max(geometry_statistics_->EncodeMax()); } return s; } @@ -1187,6 +1183,9 @@ class TypedStatisticsImpl : public TypedStatistics { Copy(comparator_->Compare(max_, max) ? max : max_, &max_, max_buffer_.get()); } } + + // Set the minimum and maximum values for geometry columns. + void SetGeometryMinMax(); }; template <> @@ -1222,6 +1221,24 @@ inline void TypedStatisticsImpl::Copy(const ByteArray& src, ByteA *dst = ByteArray(src.len, buffer->data()); } +template +void TypedStatisticsImpl::SetGeometryMinMax() {} + +template <> +void TypedStatisticsImpl::SetGeometryMinMax() { + DCHECK_EQ(logical_type_, LogicalType::Type::GEOMETRY); + + if (HasGeometryStatistics() && geometry_statistics_->is_valid()) { + std::string min = geometry_statistics_->EncodeMin(); + std::string max = geometry_statistics_->EncodeMax(); + Copy(ByteArray(min), &min_, min_buffer_.get()); + Copy(ByteArray(max), &max_, max_buffer_.get()); + has_min_max_ = true; + } else { + has_min_max_ = false; + } +} + template void TypedStatisticsImpl::Update(const T* values, int64_t num_values, int64_t null_count) { @@ -1239,6 +1256,7 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, geometry_statistics_ = std::make_unique(); } geometry_statistics_->Update(values, num_values, null_count); + SetGeometryMinMax(); } else { SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } @@ -1267,6 +1285,7 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va } geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, num_values, null_count); + SetGeometryMinMax(); } else { SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, valid_bits_offset)); From 26ba1623f5041bba3a85e782ac63c00558cb751c Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Mon, 7 Oct 2024 15:57:36 +0800 Subject: [PATCH 58/61] Update the code to accomodate the latest changes of the standard: 1. Remove metadata property of geometry logical types 2. Remove covering from geometry statistics --- cpp/src/generated/parquet_types.cpp | 2360 ++++++++--------- cpp/src/generated/parquet_types.h | 245 +- cpp/src/generated/parquet_types.tcc | 656 ++--- cpp/src/parquet/column_writer_test.cc | 26 +- cpp/src/parquet/geometry_util_internal.h | 29 - .../parquet/geometry_util_internal_test.cc | 26 - cpp/src/parquet/page_index_test.cc | 9 +- cpp/src/parquet/parquet.thrift | 175 +- cpp/src/parquet/reader_test.cc | 10 +- cpp/src/parquet/schema_test.cc | 6 +- cpp/src/parquet/statistics.cc | 40 - cpp/src/parquet/statistics.h | 2 - cpp/src/parquet/thrift_internal.h | 14 - cpp/src/parquet/types.cc | 45 +- cpp/src/parquet/types.h | 7 +- 15 files changed, 1563 insertions(+), 2087 deletions(-) diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index c1102440a107a..a8f98116277fa 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -372,35 +372,6 @@ std::string to_string(const FieldRepetitionType::type& val) { } } -int _kEdgesValues[] = { - Edges::PLANAR, - Edges::SPHERICAL -}; -const char* _kEdgesNames[] = { - "PLANAR", - "SPHERICAL" -}; -const std::map _Edges_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(2, _kEdgesValues, _kEdgesNames), ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); - -std::ostream& operator<<(std::ostream& out, const Edges::type& val) { - std::map::const_iterator it = _Edges_VALUES_TO_NAMES.find(val); - if (it != _Edges_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -std::string to_string(const Edges::type& val) { - std::map::const_iterator it = _Edges_VALUES_TO_NAMES.find(val); - if (it != _Edges_VALUES_TO_NAMES.end()) { - return std::string(it->second); - } else { - return std::to_string(static_cast(val)); - } -} - int _kGeometryEncodingValues[] = { /** * Allowed for physical type: BYTE_ARRAY. @@ -408,15 +379,22 @@ int _kGeometryEncodingValues[] = { * Well-known binary (WKB) representations of geometries. * * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the - * standard geometry types (Point, LineString, Polygon, MultiPoint, - * MultiLineString, MultiPolygon, and GeometryCollection). + * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] + * supporting XY, XYZ, XYM, XYZM and the standard geometry types + * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, + * and GeometryCollection). Coordinate order is always (x, y) where x is + * easting or longitude and y is northing or latitude. This ordering explicitly + * overrides the axis order as specified in the CRS following the GeoPackage + * specification [5]. * * This is the preferred encoding for maximum portability. It also supports * GeometryStatistics to be set in the column chunk and page index. * * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 + * [3] https://portal.ogc.org/files/?artifact_id=18241 + * [4] https://www.iso.org/standard/60343.html + * [5] https://www.geopackage.org/spec130/#gpb_spec */ GeometryEncoding::WKB }; @@ -427,15 +405,22 @@ const char* _kGeometryEncodingNames[] = { * Well-known binary (WKB) representations of geometries. * * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the - * standard geometry types (Point, LineString, Polygon, MultiPoint, - * MultiLineString, MultiPolygon, and GeometryCollection). + * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] + * supporting XY, XYZ, XYM, XYZM and the standard geometry types + * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, + * and GeometryCollection). Coordinate order is always (x, y) where x is + * easting or longitude and y is northing or latitude. This ordering explicitly + * overrides the axis order as specified in the CRS following the GeoPackage + * specification [5]. * * This is the preferred encoding for maximum portability. It also supports * GeometryStatistics to be set in the column chunk and page index. * * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 + * [3] https://portal.ogc.org/files/?artifact_id=18241 + * [4] https://www.iso.org/standard/60343.html + * [5] https://www.geopackage.org/spec130/#gpb_spec */ "WKB" }; @@ -460,6 +445,35 @@ std::string to_string(const GeometryEncoding::type& val) { } } +int _kEdgesValues[] = { + Edges::PLANAR, + Edges::SPHERICAL +}; +const char* _kEdgesNames[] = { + "PLANAR", + "SPHERICAL" +}; +const std::map _Edges_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(2, _kEdgesValues, _kEdgesNames), ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); + +std::ostream& operator<<(std::ostream& out, const Edges::type& val) { + std::map::const_iterator it = _Edges_VALUES_TO_NAMES.find(val); + if (it != _Edges_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} + +std::string to_string(const Edges::type& val) { + std::map::const_iterator it = _Edges_VALUES_TO_NAMES.find(val); + if (it != _Edges_VALUES_TO_NAMES.end()) { + return std::string(it->second); + } else { + return std::to_string(static_cast(val)); + } +} + int _kEncodingValues[] = { /** * Default encoding. @@ -778,57 +792,6 @@ void SizeStatistics::printTo(std::ostream& out) const { } -Covering::~Covering() noexcept { -} - - -void Covering::__set_kind(const std::string& val) { - this->kind = val; -} - -void Covering::__set_value(const std::string& val) { - this->value = val; -} -std::ostream& operator<<(std::ostream& out, const Covering& obj) -{ - obj.printTo(out); - return out; -} - - -void swap(Covering &a, Covering &b) { - using ::std::swap; - swap(a.kind, b.kind); - swap(a.value, b.value); -} - -Covering::Covering(const Covering& other16) { - kind = other16.kind; - value = other16.value; -} -Covering::Covering(Covering&& other17) noexcept { - kind = std::move(other17.kind); - value = std::move(other17.value); -} -Covering& Covering::operator=(const Covering& other18) { - kind = other18.kind; - value = other18.value; - return *this; -} -Covering& Covering::operator=(Covering&& other19) noexcept { - kind = std::move(other19.kind); - value = std::move(other19.value); - return *this; -} -void Covering::printTo(std::ostream& out) const { - using ::apache::thrift::to_string; - out << "Covering("; - out << "kind=" << to_string(kind); - out << ", " << "value=" << to_string(value); - out << ")"; -} - - BoundingBox::~BoundingBox() noexcept { } @@ -888,50 +851,50 @@ void swap(BoundingBox &a, BoundingBox &b) { swap(a.__isset, b.__isset); } -BoundingBox::BoundingBox(const BoundingBox& other20) noexcept { - xmin = other20.xmin; - xmax = other20.xmax; - ymin = other20.ymin; - ymax = other20.ymax; - zmin = other20.zmin; - zmax = other20.zmax; - mmin = other20.mmin; - mmax = other20.mmax; - __isset = other20.__isset; -} -BoundingBox::BoundingBox(BoundingBox&& other21) noexcept { - xmin = other21.xmin; - xmax = other21.xmax; - ymin = other21.ymin; - ymax = other21.ymax; - zmin = other21.zmin; - zmax = other21.zmax; - mmin = other21.mmin; - mmax = other21.mmax; - __isset = other21.__isset; -} -BoundingBox& BoundingBox::operator=(const BoundingBox& other22) noexcept { - xmin = other22.xmin; - xmax = other22.xmax; - ymin = other22.ymin; - ymax = other22.ymax; - zmin = other22.zmin; - zmax = other22.zmax; - mmin = other22.mmin; - mmax = other22.mmax; - __isset = other22.__isset; +BoundingBox::BoundingBox(const BoundingBox& other16) noexcept { + xmin = other16.xmin; + xmax = other16.xmax; + ymin = other16.ymin; + ymax = other16.ymax; + zmin = other16.zmin; + zmax = other16.zmax; + mmin = other16.mmin; + mmax = other16.mmax; + __isset = other16.__isset; +} +BoundingBox::BoundingBox(BoundingBox&& other17) noexcept { + xmin = other17.xmin; + xmax = other17.xmax; + ymin = other17.ymin; + ymax = other17.ymax; + zmin = other17.zmin; + zmax = other17.zmax; + mmin = other17.mmin; + mmax = other17.mmax; + __isset = other17.__isset; +} +BoundingBox& BoundingBox::operator=(const BoundingBox& other18) noexcept { + xmin = other18.xmin; + xmax = other18.xmax; + ymin = other18.ymin; + ymax = other18.ymax; + zmin = other18.zmin; + zmax = other18.zmax; + mmin = other18.mmin; + mmax = other18.mmax; + __isset = other18.__isset; return *this; } -BoundingBox& BoundingBox::operator=(BoundingBox&& other23) noexcept { - xmin = other23.xmin; - xmax = other23.xmax; - ymin = other23.ymin; - ymax = other23.ymax; - zmin = other23.zmin; - zmax = other23.zmax; - mmin = other23.mmin; - mmax = other23.mmax; - __isset = other23.__isset; +BoundingBox& BoundingBox::operator=(BoundingBox&& other19) noexcept { + xmin = other19.xmin; + xmax = other19.xmax; + ymin = other19.ymin; + ymax = other19.ymax; + zmin = other19.zmin; + zmax = other19.zmax; + mmin = other19.mmin; + mmax = other19.mmax; + __isset = other19.__isset; return *this; } void BoundingBox::printTo(std::ostream& out) const { @@ -958,11 +921,6 @@ void GeometryStatistics::__set_bbox(const BoundingBox& val) { __isset.bbox = true; } -void GeometryStatistics::__set_coverings(const std::vector & val) { - this->coverings = val; -__isset.coverings = true; -} - void GeometryStatistics::__set_geometry_types(const std::vector & val) { this->geometry_types = val; __isset.geometry_types = true; @@ -977,42 +935,36 @@ std::ostream& operator<<(std::ostream& out, const GeometryStatistics& obj) void swap(GeometryStatistics &a, GeometryStatistics &b) { using ::std::swap; swap(a.bbox, b.bbox); - swap(a.coverings, b.coverings); swap(a.geometry_types, b.geometry_types); swap(a.__isset, b.__isset); } -GeometryStatistics::GeometryStatistics(const GeometryStatistics& other36) { - bbox = other36.bbox; - coverings = other36.coverings; - geometry_types = other36.geometry_types; - __isset = other36.__isset; +GeometryStatistics::GeometryStatistics(const GeometryStatistics& other26) { + bbox = other26.bbox; + geometry_types = other26.geometry_types; + __isset = other26.__isset; } -GeometryStatistics::GeometryStatistics(GeometryStatistics&& other37) noexcept { - bbox = std::move(other37.bbox); - coverings = std::move(other37.coverings); - geometry_types = std::move(other37.geometry_types); - __isset = other37.__isset; +GeometryStatistics::GeometryStatistics(GeometryStatistics&& other27) noexcept { + bbox = std::move(other27.bbox); + geometry_types = std::move(other27.geometry_types); + __isset = other27.__isset; } -GeometryStatistics& GeometryStatistics::operator=(const GeometryStatistics& other38) { - bbox = other38.bbox; - coverings = other38.coverings; - geometry_types = other38.geometry_types; - __isset = other38.__isset; +GeometryStatistics& GeometryStatistics::operator=(const GeometryStatistics& other28) { + bbox = other28.bbox; + geometry_types = other28.geometry_types; + __isset = other28.__isset; return *this; } -GeometryStatistics& GeometryStatistics::operator=(GeometryStatistics&& other39) noexcept { - bbox = std::move(other39.bbox); - coverings = std::move(other39.coverings); - geometry_types = std::move(other39.geometry_types); - __isset = other39.__isset; +GeometryStatistics& GeometryStatistics::operator=(GeometryStatistics&& other29) noexcept { + bbox = std::move(other29.bbox); + geometry_types = std::move(other29.geometry_types); + __isset = other29.__isset; return *this; } void GeometryStatistics::printTo(std::ostream& out) const { using ::apache::thrift::to_string; out << "GeometryStatistics("; out << "bbox="; (__isset.bbox ? (out << to_string(bbox)) : (out << "")); - out << ", " << "coverings="; (__isset.coverings ? (out << to_string(coverings)) : (out << "")); out << ", " << "geometry_types="; (__isset.geometry_types ? (out << to_string(geometry_types)) : (out << "")); out << ")"; } @@ -1087,54 +1039,54 @@ void swap(Statistics &a, Statistics &b) { swap(a.__isset, b.__isset); } -Statistics::Statistics(const Statistics& other40) { - max = other40.max; - min = other40.min; - null_count = other40.null_count; - distinct_count = other40.distinct_count; - max_value = other40.max_value; - min_value = other40.min_value; - is_max_value_exact = other40.is_max_value_exact; - is_min_value_exact = other40.is_min_value_exact; - geometry_stats = other40.geometry_stats; - __isset = other40.__isset; -} -Statistics::Statistics(Statistics&& other41) noexcept { - max = std::move(other41.max); - min = std::move(other41.min); - null_count = other41.null_count; - distinct_count = other41.distinct_count; - max_value = std::move(other41.max_value); - min_value = std::move(other41.min_value); - is_max_value_exact = other41.is_max_value_exact; - is_min_value_exact = other41.is_min_value_exact; - geometry_stats = std::move(other41.geometry_stats); - __isset = other41.__isset; -} -Statistics& Statistics::operator=(const Statistics& other42) { - max = other42.max; - min = other42.min; - null_count = other42.null_count; - distinct_count = other42.distinct_count; - max_value = other42.max_value; - min_value = other42.min_value; - is_max_value_exact = other42.is_max_value_exact; - is_min_value_exact = other42.is_min_value_exact; - geometry_stats = other42.geometry_stats; - __isset = other42.__isset; +Statistics::Statistics(const Statistics& other30) { + max = other30.max; + min = other30.min; + null_count = other30.null_count; + distinct_count = other30.distinct_count; + max_value = other30.max_value; + min_value = other30.min_value; + is_max_value_exact = other30.is_max_value_exact; + is_min_value_exact = other30.is_min_value_exact; + geometry_stats = other30.geometry_stats; + __isset = other30.__isset; +} +Statistics::Statistics(Statistics&& other31) noexcept { + max = std::move(other31.max); + min = std::move(other31.min); + null_count = other31.null_count; + distinct_count = other31.distinct_count; + max_value = std::move(other31.max_value); + min_value = std::move(other31.min_value); + is_max_value_exact = other31.is_max_value_exact; + is_min_value_exact = other31.is_min_value_exact; + geometry_stats = std::move(other31.geometry_stats); + __isset = other31.__isset; +} +Statistics& Statistics::operator=(const Statistics& other32) { + max = other32.max; + min = other32.min; + null_count = other32.null_count; + distinct_count = other32.distinct_count; + max_value = other32.max_value; + min_value = other32.min_value; + is_max_value_exact = other32.is_max_value_exact; + is_min_value_exact = other32.is_min_value_exact; + geometry_stats = other32.geometry_stats; + __isset = other32.__isset; return *this; } -Statistics& Statistics::operator=(Statistics&& other43) noexcept { - max = std::move(other43.max); - min = std::move(other43.min); - null_count = other43.null_count; - distinct_count = other43.distinct_count; - max_value = std::move(other43.max_value); - min_value = std::move(other43.min_value); - is_max_value_exact = other43.is_max_value_exact; - is_min_value_exact = other43.is_min_value_exact; - geometry_stats = std::move(other43.geometry_stats); - __isset = other43.__isset; +Statistics& Statistics::operator=(Statistics&& other33) noexcept { + max = std::move(other33.max); + min = std::move(other33.min); + null_count = other33.null_count; + distinct_count = other33.distinct_count; + max_value = std::move(other33.max_value); + min_value = std::move(other33.min_value); + is_max_value_exact = other33.is_max_value_exact; + is_min_value_exact = other33.is_min_value_exact; + geometry_stats = std::move(other33.geometry_stats); + __isset = other33.__isset; return *this; } void Statistics::printTo(std::ostream& out) const { @@ -1169,18 +1121,18 @@ void swap(StringType &a, StringType &b) { (void) b; } -StringType::StringType(const StringType& other44) noexcept { - (void) other44; +StringType::StringType(const StringType& other34) noexcept { + (void) other34; } -StringType::StringType(StringType&& other45) noexcept { - (void) other45; +StringType::StringType(StringType&& other35) noexcept { + (void) other35; } -StringType& StringType::operator=(const StringType& other46) noexcept { - (void) other46; +StringType& StringType::operator=(const StringType& other36) noexcept { + (void) other36; return *this; } -StringType& StringType::operator=(StringType&& other47) noexcept { - (void) other47; +StringType& StringType::operator=(StringType&& other37) noexcept { + (void) other37; return *this; } void StringType::printTo(std::ostream& out) const { @@ -1206,18 +1158,18 @@ void swap(UUIDType &a, UUIDType &b) { (void) b; } -UUIDType::UUIDType(const UUIDType& other48) noexcept { - (void) other48; +UUIDType::UUIDType(const UUIDType& other38) noexcept { + (void) other38; } -UUIDType::UUIDType(UUIDType&& other49) noexcept { - (void) other49; +UUIDType::UUIDType(UUIDType&& other39) noexcept { + (void) other39; } -UUIDType& UUIDType::operator=(const UUIDType& other50) noexcept { - (void) other50; +UUIDType& UUIDType::operator=(const UUIDType& other40) noexcept { + (void) other40; return *this; } -UUIDType& UUIDType::operator=(UUIDType&& other51) noexcept { - (void) other51; +UUIDType& UUIDType::operator=(UUIDType&& other41) noexcept { + (void) other41; return *this; } void UUIDType::printTo(std::ostream& out) const { @@ -1243,18 +1195,18 @@ void swap(MapType &a, MapType &b) { (void) b; } -MapType::MapType(const MapType& other52) noexcept { - (void) other52; +MapType::MapType(const MapType& other42) noexcept { + (void) other42; } -MapType::MapType(MapType&& other53) noexcept { - (void) other53; +MapType::MapType(MapType&& other43) noexcept { + (void) other43; } -MapType& MapType::operator=(const MapType& other54) noexcept { - (void) other54; +MapType& MapType::operator=(const MapType& other44) noexcept { + (void) other44; return *this; } -MapType& MapType::operator=(MapType&& other55) noexcept { - (void) other55; +MapType& MapType::operator=(MapType&& other45) noexcept { + (void) other45; return *this; } void MapType::printTo(std::ostream& out) const { @@ -1280,18 +1232,18 @@ void swap(ListType &a, ListType &b) { (void) b; } -ListType::ListType(const ListType& other56) noexcept { - (void) other56; +ListType::ListType(const ListType& other46) noexcept { + (void) other46; } -ListType::ListType(ListType&& other57) noexcept { - (void) other57; +ListType::ListType(ListType&& other47) noexcept { + (void) other47; } -ListType& ListType::operator=(const ListType& other58) noexcept { - (void) other58; +ListType& ListType::operator=(const ListType& other48) noexcept { + (void) other48; return *this; } -ListType& ListType::operator=(ListType&& other59) noexcept { - (void) other59; +ListType& ListType::operator=(ListType&& other49) noexcept { + (void) other49; return *this; } void ListType::printTo(std::ostream& out) const { @@ -1317,18 +1269,18 @@ void swap(EnumType &a, EnumType &b) { (void) b; } -EnumType::EnumType(const EnumType& other60) noexcept { - (void) other60; +EnumType::EnumType(const EnumType& other50) noexcept { + (void) other50; } -EnumType::EnumType(EnumType&& other61) noexcept { - (void) other61; +EnumType::EnumType(EnumType&& other51) noexcept { + (void) other51; } -EnumType& EnumType::operator=(const EnumType& other62) noexcept { - (void) other62; +EnumType& EnumType::operator=(const EnumType& other52) noexcept { + (void) other52; return *this; } -EnumType& EnumType::operator=(EnumType&& other63) noexcept { - (void) other63; +EnumType& EnumType::operator=(EnumType&& other53) noexcept { + (void) other53; return *this; } void EnumType::printTo(std::ostream& out) const { @@ -1354,18 +1306,18 @@ void swap(DateType &a, DateType &b) { (void) b; } -DateType::DateType(const DateType& other64) noexcept { - (void) other64; +DateType::DateType(const DateType& other54) noexcept { + (void) other54; } -DateType::DateType(DateType&& other65) noexcept { - (void) other65; +DateType::DateType(DateType&& other55) noexcept { + (void) other55; } -DateType& DateType::operator=(const DateType& other66) noexcept { - (void) other66; +DateType& DateType::operator=(const DateType& other56) noexcept { + (void) other56; return *this; } -DateType& DateType::operator=(DateType&& other67) noexcept { - (void) other67; +DateType& DateType::operator=(DateType&& other57) noexcept { + (void) other57; return *this; } void DateType::printTo(std::ostream& out) const { @@ -1391,18 +1343,18 @@ void swap(Float16Type &a, Float16Type &b) { (void) b; } -Float16Type::Float16Type(const Float16Type& other68) noexcept { - (void) other68; +Float16Type::Float16Type(const Float16Type& other58) noexcept { + (void) other58; } -Float16Type::Float16Type(Float16Type&& other69) noexcept { - (void) other69; +Float16Type::Float16Type(Float16Type&& other59) noexcept { + (void) other59; } -Float16Type& Float16Type::operator=(const Float16Type& other70) noexcept { - (void) other70; +Float16Type& Float16Type::operator=(const Float16Type& other60) noexcept { + (void) other60; return *this; } -Float16Type& Float16Type::operator=(Float16Type&& other71) noexcept { - (void) other71; +Float16Type& Float16Type::operator=(Float16Type&& other61) noexcept { + (void) other61; return *this; } void Float16Type::printTo(std::ostream& out) const { @@ -1428,18 +1380,18 @@ void swap(NullType &a, NullType &b) { (void) b; } -NullType::NullType(const NullType& other72) noexcept { - (void) other72; +NullType::NullType(const NullType& other62) noexcept { + (void) other62; } -NullType::NullType(NullType&& other73) noexcept { - (void) other73; +NullType::NullType(NullType&& other63) noexcept { + (void) other63; } -NullType& NullType::operator=(const NullType& other74) noexcept { - (void) other74; +NullType& NullType::operator=(const NullType& other64) noexcept { + (void) other64; return *this; } -NullType& NullType::operator=(NullType&& other75) noexcept { - (void) other75; +NullType& NullType::operator=(NullType&& other65) noexcept { + (void) other65; return *this; } void NullType::printTo(std::ostream& out) const { @@ -1473,22 +1425,22 @@ void swap(DecimalType &a, DecimalType &b) { swap(a.precision, b.precision); } -DecimalType::DecimalType(const DecimalType& other76) noexcept { - scale = other76.scale; - precision = other76.precision; +DecimalType::DecimalType(const DecimalType& other66) noexcept { + scale = other66.scale; + precision = other66.precision; } -DecimalType::DecimalType(DecimalType&& other77) noexcept { - scale = other77.scale; - precision = other77.precision; +DecimalType::DecimalType(DecimalType&& other67) noexcept { + scale = other67.scale; + precision = other67.precision; } -DecimalType& DecimalType::operator=(const DecimalType& other78) noexcept { - scale = other78.scale; - precision = other78.precision; +DecimalType& DecimalType::operator=(const DecimalType& other68) noexcept { + scale = other68.scale; + precision = other68.precision; return *this; } -DecimalType& DecimalType::operator=(DecimalType&& other79) noexcept { - scale = other79.scale; - precision = other79.precision; +DecimalType& DecimalType::operator=(DecimalType&& other69) noexcept { + scale = other69.scale; + precision = other69.precision; return *this; } void DecimalType::printTo(std::ostream& out) const { @@ -1516,18 +1468,18 @@ void swap(MilliSeconds &a, MilliSeconds &b) { (void) b; } -MilliSeconds::MilliSeconds(const MilliSeconds& other80) noexcept { - (void) other80; +MilliSeconds::MilliSeconds(const MilliSeconds& other70) noexcept { + (void) other70; } -MilliSeconds::MilliSeconds(MilliSeconds&& other81) noexcept { - (void) other81; +MilliSeconds::MilliSeconds(MilliSeconds&& other71) noexcept { + (void) other71; } -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other82) noexcept { - (void) other82; +MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other72) noexcept { + (void) other72; return *this; } -MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other83) noexcept { - (void) other83; +MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other73) noexcept { + (void) other73; return *this; } void MilliSeconds::printTo(std::ostream& out) const { @@ -1553,18 +1505,18 @@ void swap(MicroSeconds &a, MicroSeconds &b) { (void) b; } -MicroSeconds::MicroSeconds(const MicroSeconds& other84) noexcept { - (void) other84; +MicroSeconds::MicroSeconds(const MicroSeconds& other74) noexcept { + (void) other74; } -MicroSeconds::MicroSeconds(MicroSeconds&& other85) noexcept { - (void) other85; +MicroSeconds::MicroSeconds(MicroSeconds&& other75) noexcept { + (void) other75; } -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other86) noexcept { - (void) other86; +MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other76) noexcept { + (void) other76; return *this; } -MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other87) noexcept { - (void) other87; +MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other77) noexcept { + (void) other77; return *this; } void MicroSeconds::printTo(std::ostream& out) const { @@ -1590,18 +1542,18 @@ void swap(NanoSeconds &a, NanoSeconds &b) { (void) b; } -NanoSeconds::NanoSeconds(const NanoSeconds& other88) noexcept { - (void) other88; +NanoSeconds::NanoSeconds(const NanoSeconds& other78) noexcept { + (void) other78; } -NanoSeconds::NanoSeconds(NanoSeconds&& other89) noexcept { - (void) other89; +NanoSeconds::NanoSeconds(NanoSeconds&& other79) noexcept { + (void) other79; } -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other90) noexcept { - (void) other90; +NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other80) noexcept { + (void) other80; return *this; } -NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other91) noexcept { - (void) other91; +NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other81) noexcept { + (void) other81; return *this; } void NanoSeconds::printTo(std::ostream& out) const { @@ -1644,30 +1596,30 @@ void swap(TimeUnit &a, TimeUnit &b) { swap(a.__isset, b.__isset); } -TimeUnit::TimeUnit(const TimeUnit& other92) noexcept { - MILLIS = other92.MILLIS; - MICROS = other92.MICROS; - NANOS = other92.NANOS; - __isset = other92.__isset; +TimeUnit::TimeUnit(const TimeUnit& other82) noexcept { + MILLIS = other82.MILLIS; + MICROS = other82.MICROS; + NANOS = other82.NANOS; + __isset = other82.__isset; } -TimeUnit::TimeUnit(TimeUnit&& other93) noexcept { - MILLIS = std::move(other93.MILLIS); - MICROS = std::move(other93.MICROS); - NANOS = std::move(other93.NANOS); - __isset = other93.__isset; +TimeUnit::TimeUnit(TimeUnit&& other83) noexcept { + MILLIS = std::move(other83.MILLIS); + MICROS = std::move(other83.MICROS); + NANOS = std::move(other83.NANOS); + __isset = other83.__isset; } -TimeUnit& TimeUnit::operator=(const TimeUnit& other94) noexcept { - MILLIS = other94.MILLIS; - MICROS = other94.MICROS; - NANOS = other94.NANOS; - __isset = other94.__isset; +TimeUnit& TimeUnit::operator=(const TimeUnit& other84) noexcept { + MILLIS = other84.MILLIS; + MICROS = other84.MICROS; + NANOS = other84.NANOS; + __isset = other84.__isset; return *this; } -TimeUnit& TimeUnit::operator=(TimeUnit&& other95) noexcept { - MILLIS = std::move(other95.MILLIS); - MICROS = std::move(other95.MICROS); - NANOS = std::move(other95.NANOS); - __isset = other95.__isset; +TimeUnit& TimeUnit::operator=(TimeUnit&& other85) noexcept { + MILLIS = std::move(other85.MILLIS); + MICROS = std::move(other85.MICROS); + NANOS = std::move(other85.NANOS); + __isset = other85.__isset; return *this; } void TimeUnit::printTo(std::ostream& out) const { @@ -1704,22 +1656,22 @@ void swap(TimestampType &a, TimestampType &b) { swap(a.unit, b.unit); } -TimestampType::TimestampType(const TimestampType& other96) noexcept { - isAdjustedToUTC = other96.isAdjustedToUTC; - unit = other96.unit; +TimestampType::TimestampType(const TimestampType& other86) noexcept { + isAdjustedToUTC = other86.isAdjustedToUTC; + unit = other86.unit; } -TimestampType::TimestampType(TimestampType&& other97) noexcept { - isAdjustedToUTC = other97.isAdjustedToUTC; - unit = std::move(other97.unit); +TimestampType::TimestampType(TimestampType&& other87) noexcept { + isAdjustedToUTC = other87.isAdjustedToUTC; + unit = std::move(other87.unit); } -TimestampType& TimestampType::operator=(const TimestampType& other98) noexcept { - isAdjustedToUTC = other98.isAdjustedToUTC; - unit = other98.unit; +TimestampType& TimestampType::operator=(const TimestampType& other88) noexcept { + isAdjustedToUTC = other88.isAdjustedToUTC; + unit = other88.unit; return *this; } -TimestampType& TimestampType::operator=(TimestampType&& other99) noexcept { - isAdjustedToUTC = other99.isAdjustedToUTC; - unit = std::move(other99.unit); +TimestampType& TimestampType::operator=(TimestampType&& other89) noexcept { + isAdjustedToUTC = other89.isAdjustedToUTC; + unit = std::move(other89.unit); return *this; } void TimestampType::printTo(std::ostream& out) const { @@ -1755,22 +1707,22 @@ void swap(TimeType &a, TimeType &b) { swap(a.unit, b.unit); } -TimeType::TimeType(const TimeType& other100) noexcept { - isAdjustedToUTC = other100.isAdjustedToUTC; - unit = other100.unit; +TimeType::TimeType(const TimeType& other90) noexcept { + isAdjustedToUTC = other90.isAdjustedToUTC; + unit = other90.unit; } -TimeType::TimeType(TimeType&& other101) noexcept { - isAdjustedToUTC = other101.isAdjustedToUTC; - unit = std::move(other101.unit); +TimeType::TimeType(TimeType&& other91) noexcept { + isAdjustedToUTC = other91.isAdjustedToUTC; + unit = std::move(other91.unit); } -TimeType& TimeType::operator=(const TimeType& other102) noexcept { - isAdjustedToUTC = other102.isAdjustedToUTC; - unit = other102.unit; +TimeType& TimeType::operator=(const TimeType& other92) noexcept { + isAdjustedToUTC = other92.isAdjustedToUTC; + unit = other92.unit; return *this; } -TimeType& TimeType::operator=(TimeType&& other103) noexcept { - isAdjustedToUTC = other103.isAdjustedToUTC; - unit = std::move(other103.unit); +TimeType& TimeType::operator=(TimeType&& other93) noexcept { + isAdjustedToUTC = other93.isAdjustedToUTC; + unit = std::move(other93.unit); return *this; } void TimeType::printTo(std::ostream& out) const { @@ -1806,22 +1758,22 @@ void swap(IntType &a, IntType &b) { swap(a.isSigned, b.isSigned); } -IntType::IntType(const IntType& other104) noexcept { - bitWidth = other104.bitWidth; - isSigned = other104.isSigned; +IntType::IntType(const IntType& other94) noexcept { + bitWidth = other94.bitWidth; + isSigned = other94.isSigned; } -IntType::IntType(IntType&& other105) noexcept { - bitWidth = other105.bitWidth; - isSigned = other105.isSigned; +IntType::IntType(IntType&& other95) noexcept { + bitWidth = other95.bitWidth; + isSigned = other95.isSigned; } -IntType& IntType::operator=(const IntType& other106) noexcept { - bitWidth = other106.bitWidth; - isSigned = other106.isSigned; +IntType& IntType::operator=(const IntType& other96) noexcept { + bitWidth = other96.bitWidth; + isSigned = other96.isSigned; return *this; } -IntType& IntType::operator=(IntType&& other107) noexcept { - bitWidth = other107.bitWidth; - isSigned = other107.isSigned; +IntType& IntType::operator=(IntType&& other97) noexcept { + bitWidth = other97.bitWidth; + isSigned = other97.isSigned; return *this; } void IntType::printTo(std::ostream& out) const { @@ -1849,18 +1801,18 @@ void swap(JsonType &a, JsonType &b) { (void) b; } -JsonType::JsonType(const JsonType& other108) noexcept { - (void) other108; +JsonType::JsonType(const JsonType& other98) noexcept { + (void) other98; } -JsonType::JsonType(JsonType&& other109) noexcept { - (void) other109; +JsonType::JsonType(JsonType&& other99) noexcept { + (void) other99; } -JsonType& JsonType::operator=(const JsonType& other110) noexcept { - (void) other110; +JsonType& JsonType::operator=(const JsonType& other100) noexcept { + (void) other100; return *this; } -JsonType& JsonType::operator=(JsonType&& other111) noexcept { - (void) other111; +JsonType& JsonType::operator=(JsonType&& other101) noexcept { + (void) other101; return *this; } void JsonType::printTo(std::ostream& out) const { @@ -1886,18 +1838,18 @@ void swap(BsonType &a, BsonType &b) { (void) b; } -BsonType::BsonType(const BsonType& other112) noexcept { - (void) other112; +BsonType::BsonType(const BsonType& other102) noexcept { + (void) other102; } -BsonType::BsonType(BsonType&& other113) noexcept { - (void) other113; +BsonType::BsonType(BsonType&& other103) noexcept { + (void) other103; } -BsonType& BsonType::operator=(const BsonType& other114) noexcept { - (void) other114; +BsonType& BsonType::operator=(const BsonType& other104) noexcept { + (void) other104; return *this; } -BsonType& BsonType::operator=(BsonType&& other115) noexcept { - (void) other115; +BsonType& BsonType::operator=(BsonType&& other105) noexcept { + (void) other105; return *this; } void BsonType::printTo(std::ostream& out) const { @@ -1923,16 +1875,6 @@ void GeometryType::__set_crs(const std::string& val) { this->crs = val; __isset.crs = true; } - -void GeometryType::__set_crs_encoding(const std::string& val) { - this->crs_encoding = val; -__isset.crs_encoding = true; -} - -void GeometryType::__set_metadata(const std::string& val) { - this->metadata = val; -__isset.metadata = true; -} std::ostream& operator<<(std::ostream& out, const GeometryType& obj) { obj.printTo(out); @@ -1945,43 +1887,33 @@ void swap(GeometryType &a, GeometryType &b) { swap(a.encoding, b.encoding); swap(a.edges, b.edges); swap(a.crs, b.crs); - swap(a.crs_encoding, b.crs_encoding); - swap(a.metadata, b.metadata); swap(a.__isset, b.__isset); } -GeometryType::GeometryType(const GeometryType& other118) { - encoding = other118.encoding; - edges = other118.edges; - crs = other118.crs; - crs_encoding = other118.crs_encoding; - metadata = other118.metadata; - __isset = other118.__isset; -} -GeometryType::GeometryType(GeometryType&& other119) noexcept { - encoding = other119.encoding; - edges = other119.edges; - crs = std::move(other119.crs); - crs_encoding = std::move(other119.crs_encoding); - metadata = std::move(other119.metadata); - __isset = other119.__isset; +GeometryType::GeometryType(const GeometryType& other108) { + encoding = other108.encoding; + edges = other108.edges; + crs = other108.crs; + __isset = other108.__isset; } -GeometryType& GeometryType::operator=(const GeometryType& other120) { - encoding = other120.encoding; - edges = other120.edges; - crs = other120.crs; - crs_encoding = other120.crs_encoding; - metadata = other120.metadata; - __isset = other120.__isset; +GeometryType::GeometryType(GeometryType&& other109) noexcept { + encoding = other109.encoding; + edges = other109.edges; + crs = std::move(other109.crs); + __isset = other109.__isset; +} +GeometryType& GeometryType::operator=(const GeometryType& other110) { + encoding = other110.encoding; + edges = other110.edges; + crs = other110.crs; + __isset = other110.__isset; return *this; } -GeometryType& GeometryType::operator=(GeometryType&& other121) noexcept { - encoding = other121.encoding; - edges = other121.edges; - crs = std::move(other121.crs); - crs_encoding = std::move(other121.crs_encoding); - metadata = std::move(other121.metadata); - __isset = other121.__isset; +GeometryType& GeometryType::operator=(GeometryType&& other111) noexcept { + encoding = other111.encoding; + edges = other111.edges; + crs = std::move(other111.crs); + __isset = other111.__isset; return *this; } void GeometryType::printTo(std::ostream& out) const { @@ -1990,8 +1922,6 @@ void GeometryType::printTo(std::ostream& out) const { out << "encoding=" << to_string(encoding); out << ", " << "edges=" << to_string(edges); out << ", " << "crs="; (__isset.crs ? (out << to_string(crs)) : (out << "")); - out << ", " << "crs_encoding="; (__isset.crs_encoding ? (out << to_string(crs_encoding)) : (out << "")); - out << ", " << "metadata="; (__isset.metadata ? (out << to_string(metadata)) : (out << "")); out << ")"; } @@ -2101,78 +2031,78 @@ void swap(LogicalType &a, LogicalType &b) { swap(a.__isset, b.__isset); } -LogicalType::LogicalType(const LogicalType& other122) { - STRING = other122.STRING; - MAP = other122.MAP; - LIST = other122.LIST; - ENUM = other122.ENUM; - DECIMAL = other122.DECIMAL; - DATE = other122.DATE; - TIME = other122.TIME; - TIMESTAMP = other122.TIMESTAMP; - INTEGER = other122.INTEGER; - UNKNOWN = other122.UNKNOWN; - JSON = other122.JSON; - BSON = other122.BSON; - UUID = other122.UUID; - FLOAT16 = other122.FLOAT16; - GEOMETRY = other122.GEOMETRY; - __isset = other122.__isset; -} -LogicalType::LogicalType(LogicalType&& other123) noexcept { - STRING = std::move(other123.STRING); - MAP = std::move(other123.MAP); - LIST = std::move(other123.LIST); - ENUM = std::move(other123.ENUM); - DECIMAL = std::move(other123.DECIMAL); - DATE = std::move(other123.DATE); - TIME = std::move(other123.TIME); - TIMESTAMP = std::move(other123.TIMESTAMP); - INTEGER = std::move(other123.INTEGER); - UNKNOWN = std::move(other123.UNKNOWN); - JSON = std::move(other123.JSON); - BSON = std::move(other123.BSON); - UUID = std::move(other123.UUID); - FLOAT16 = std::move(other123.FLOAT16); - GEOMETRY = std::move(other123.GEOMETRY); - __isset = other123.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other124) { - STRING = other124.STRING; - MAP = other124.MAP; - LIST = other124.LIST; - ENUM = other124.ENUM; - DECIMAL = other124.DECIMAL; - DATE = other124.DATE; - TIME = other124.TIME; - TIMESTAMP = other124.TIMESTAMP; - INTEGER = other124.INTEGER; - UNKNOWN = other124.UNKNOWN; - JSON = other124.JSON; - BSON = other124.BSON; - UUID = other124.UUID; - FLOAT16 = other124.FLOAT16; - GEOMETRY = other124.GEOMETRY; - __isset = other124.__isset; +LogicalType::LogicalType(const LogicalType& other112) { + STRING = other112.STRING; + MAP = other112.MAP; + LIST = other112.LIST; + ENUM = other112.ENUM; + DECIMAL = other112.DECIMAL; + DATE = other112.DATE; + TIME = other112.TIME; + TIMESTAMP = other112.TIMESTAMP; + INTEGER = other112.INTEGER; + UNKNOWN = other112.UNKNOWN; + JSON = other112.JSON; + BSON = other112.BSON; + UUID = other112.UUID; + FLOAT16 = other112.FLOAT16; + GEOMETRY = other112.GEOMETRY; + __isset = other112.__isset; +} +LogicalType::LogicalType(LogicalType&& other113) noexcept { + STRING = std::move(other113.STRING); + MAP = std::move(other113.MAP); + LIST = std::move(other113.LIST); + ENUM = std::move(other113.ENUM); + DECIMAL = std::move(other113.DECIMAL); + DATE = std::move(other113.DATE); + TIME = std::move(other113.TIME); + TIMESTAMP = std::move(other113.TIMESTAMP); + INTEGER = std::move(other113.INTEGER); + UNKNOWN = std::move(other113.UNKNOWN); + JSON = std::move(other113.JSON); + BSON = std::move(other113.BSON); + UUID = std::move(other113.UUID); + FLOAT16 = std::move(other113.FLOAT16); + GEOMETRY = std::move(other113.GEOMETRY); + __isset = other113.__isset; +} +LogicalType& LogicalType::operator=(const LogicalType& other114) { + STRING = other114.STRING; + MAP = other114.MAP; + LIST = other114.LIST; + ENUM = other114.ENUM; + DECIMAL = other114.DECIMAL; + DATE = other114.DATE; + TIME = other114.TIME; + TIMESTAMP = other114.TIMESTAMP; + INTEGER = other114.INTEGER; + UNKNOWN = other114.UNKNOWN; + JSON = other114.JSON; + BSON = other114.BSON; + UUID = other114.UUID; + FLOAT16 = other114.FLOAT16; + GEOMETRY = other114.GEOMETRY; + __isset = other114.__isset; return *this; } -LogicalType& LogicalType::operator=(LogicalType&& other125) noexcept { - STRING = std::move(other125.STRING); - MAP = std::move(other125.MAP); - LIST = std::move(other125.LIST); - ENUM = std::move(other125.ENUM); - DECIMAL = std::move(other125.DECIMAL); - DATE = std::move(other125.DATE); - TIME = std::move(other125.TIME); - TIMESTAMP = std::move(other125.TIMESTAMP); - INTEGER = std::move(other125.INTEGER); - UNKNOWN = std::move(other125.UNKNOWN); - JSON = std::move(other125.JSON); - BSON = std::move(other125.BSON); - UUID = std::move(other125.UUID); - FLOAT16 = std::move(other125.FLOAT16); - GEOMETRY = std::move(other125.GEOMETRY); - __isset = other125.__isset; +LogicalType& LogicalType::operator=(LogicalType&& other115) noexcept { + STRING = std::move(other115.STRING); + MAP = std::move(other115.MAP); + LIST = std::move(other115.LIST); + ENUM = std::move(other115.ENUM); + DECIMAL = std::move(other115.DECIMAL); + DATE = std::move(other115.DATE); + TIME = std::move(other115.TIME); + TIMESTAMP = std::move(other115.TIMESTAMP); + INTEGER = std::move(other115.INTEGER); + UNKNOWN = std::move(other115.UNKNOWN); + JSON = std::move(other115.JSON); + BSON = std::move(other115.BSON); + UUID = std::move(other115.UUID); + FLOAT16 = std::move(other115.FLOAT16); + GEOMETRY = std::move(other115.GEOMETRY); + __isset = other115.__isset; return *this; } void LogicalType::printTo(std::ostream& out) const { @@ -2271,58 +2201,58 @@ void swap(SchemaElement &a, SchemaElement &b) { swap(a.__isset, b.__isset); } -SchemaElement::SchemaElement(const SchemaElement& other129) { - type = other129.type; - type_length = other129.type_length; - repetition_type = other129.repetition_type; - name = other129.name; - num_children = other129.num_children; - converted_type = other129.converted_type; - scale = other129.scale; - precision = other129.precision; - field_id = other129.field_id; - logicalType = other129.logicalType; - __isset = other129.__isset; +SchemaElement::SchemaElement(const SchemaElement& other119) { + type = other119.type; + type_length = other119.type_length; + repetition_type = other119.repetition_type; + name = other119.name; + num_children = other119.num_children; + converted_type = other119.converted_type; + scale = other119.scale; + precision = other119.precision; + field_id = other119.field_id; + logicalType = other119.logicalType; + __isset = other119.__isset; } -SchemaElement::SchemaElement(SchemaElement&& other130) noexcept { - type = other130.type; - type_length = other130.type_length; - repetition_type = other130.repetition_type; - name = std::move(other130.name); - num_children = other130.num_children; - converted_type = other130.converted_type; - scale = other130.scale; - precision = other130.precision; - field_id = other130.field_id; - logicalType = std::move(other130.logicalType); - __isset = other130.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other131) { - type = other131.type; - type_length = other131.type_length; - repetition_type = other131.repetition_type; - name = other131.name; - num_children = other131.num_children; - converted_type = other131.converted_type; - scale = other131.scale; - precision = other131.precision; - field_id = other131.field_id; - logicalType = other131.logicalType; - __isset = other131.__isset; +SchemaElement::SchemaElement(SchemaElement&& other120) noexcept { + type = other120.type; + type_length = other120.type_length; + repetition_type = other120.repetition_type; + name = std::move(other120.name); + num_children = other120.num_children; + converted_type = other120.converted_type; + scale = other120.scale; + precision = other120.precision; + field_id = other120.field_id; + logicalType = std::move(other120.logicalType); + __isset = other120.__isset; +} +SchemaElement& SchemaElement::operator=(const SchemaElement& other121) { + type = other121.type; + type_length = other121.type_length; + repetition_type = other121.repetition_type; + name = other121.name; + num_children = other121.num_children; + converted_type = other121.converted_type; + scale = other121.scale; + precision = other121.precision; + field_id = other121.field_id; + logicalType = other121.logicalType; + __isset = other121.__isset; return *this; } -SchemaElement& SchemaElement::operator=(SchemaElement&& other132) noexcept { - type = other132.type; - type_length = other132.type_length; - repetition_type = other132.repetition_type; - name = std::move(other132.name); - num_children = other132.num_children; - converted_type = other132.converted_type; - scale = other132.scale; - precision = other132.precision; - field_id = other132.field_id; - logicalType = std::move(other132.logicalType); - __isset = other132.__isset; +SchemaElement& SchemaElement::operator=(SchemaElement&& other122) noexcept { + type = other122.type; + type_length = other122.type_length; + repetition_type = other122.repetition_type; + name = std::move(other122.name); + num_children = other122.num_children; + converted_type = other122.converted_type; + scale = other122.scale; + precision = other122.precision; + field_id = other122.field_id; + logicalType = std::move(other122.logicalType); + __isset = other122.__isset; return *this; } void SchemaElement::printTo(std::ostream& out) const { @@ -2383,38 +2313,38 @@ void swap(DataPageHeader &a, DataPageHeader &b) { swap(a.__isset, b.__isset); } -DataPageHeader::DataPageHeader(const DataPageHeader& other136) { - num_values = other136.num_values; - encoding = other136.encoding; - definition_level_encoding = other136.definition_level_encoding; - repetition_level_encoding = other136.repetition_level_encoding; - statistics = other136.statistics; - __isset = other136.__isset; -} -DataPageHeader::DataPageHeader(DataPageHeader&& other137) noexcept { - num_values = other137.num_values; - encoding = other137.encoding; - definition_level_encoding = other137.definition_level_encoding; - repetition_level_encoding = other137.repetition_level_encoding; - statistics = std::move(other137.statistics); - __isset = other137.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other138) { - num_values = other138.num_values; - encoding = other138.encoding; - definition_level_encoding = other138.definition_level_encoding; - repetition_level_encoding = other138.repetition_level_encoding; - statistics = other138.statistics; - __isset = other138.__isset; +DataPageHeader::DataPageHeader(const DataPageHeader& other126) { + num_values = other126.num_values; + encoding = other126.encoding; + definition_level_encoding = other126.definition_level_encoding; + repetition_level_encoding = other126.repetition_level_encoding; + statistics = other126.statistics; + __isset = other126.__isset; +} +DataPageHeader::DataPageHeader(DataPageHeader&& other127) noexcept { + num_values = other127.num_values; + encoding = other127.encoding; + definition_level_encoding = other127.definition_level_encoding; + repetition_level_encoding = other127.repetition_level_encoding; + statistics = std::move(other127.statistics); + __isset = other127.__isset; +} +DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other128) { + num_values = other128.num_values; + encoding = other128.encoding; + definition_level_encoding = other128.definition_level_encoding; + repetition_level_encoding = other128.repetition_level_encoding; + statistics = other128.statistics; + __isset = other128.__isset; return *this; } -DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other139) noexcept { - num_values = other139.num_values; - encoding = other139.encoding; - definition_level_encoding = other139.definition_level_encoding; - repetition_level_encoding = other139.repetition_level_encoding; - statistics = std::move(other139.statistics); - __isset = other139.__isset; +DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other129) noexcept { + num_values = other129.num_values; + encoding = other129.encoding; + definition_level_encoding = other129.definition_level_encoding; + repetition_level_encoding = other129.repetition_level_encoding; + statistics = std::move(other129.statistics); + __isset = other129.__isset; return *this; } void DataPageHeader::printTo(std::ostream& out) const { @@ -2445,18 +2375,18 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) { (void) b; } -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other140) noexcept { - (void) other140; +IndexPageHeader::IndexPageHeader(const IndexPageHeader& other130) noexcept { + (void) other130; } -IndexPageHeader::IndexPageHeader(IndexPageHeader&& other141) noexcept { - (void) other141; +IndexPageHeader::IndexPageHeader(IndexPageHeader&& other131) noexcept { + (void) other131; } -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other142) noexcept { - (void) other142; +IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other132) noexcept { + (void) other132; return *this; } -IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other143) noexcept { - (void) other143; +IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other133) noexcept { + (void) other133; return *this; } void IndexPageHeader::printTo(std::ostream& out) const { @@ -2497,30 +2427,30 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { swap(a.__isset, b.__isset); } -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other145) noexcept { - num_values = other145.num_values; - encoding = other145.encoding; - is_sorted = other145.is_sorted; - __isset = other145.__isset; +DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other135) noexcept { + num_values = other135.num_values; + encoding = other135.encoding; + is_sorted = other135.is_sorted; + __isset = other135.__isset; } -DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other146) noexcept { - num_values = other146.num_values; - encoding = other146.encoding; - is_sorted = other146.is_sorted; - __isset = other146.__isset; +DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other136) noexcept { + num_values = other136.num_values; + encoding = other136.encoding; + is_sorted = other136.is_sorted; + __isset = other136.__isset; } -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other147) noexcept { - num_values = other147.num_values; - encoding = other147.encoding; - is_sorted = other147.is_sorted; - __isset = other147.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other137) noexcept { + num_values = other137.num_values; + encoding = other137.encoding; + is_sorted = other137.is_sorted; + __isset = other137.__isset; return *this; } -DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other148) noexcept { - num_values = other148.num_values; - encoding = other148.encoding; - is_sorted = other148.is_sorted; - __isset = other148.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other138) noexcept { + num_values = other138.num_values; + encoding = other138.encoding; + is_sorted = other138.is_sorted; + __isset = other138.__isset; return *this; } void DictionaryPageHeader::printTo(std::ostream& out) const { @@ -2590,50 +2520,50 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { swap(a.__isset, b.__isset); } -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other150) { - num_values = other150.num_values; - num_nulls = other150.num_nulls; - num_rows = other150.num_rows; - encoding = other150.encoding; - definition_levels_byte_length = other150.definition_levels_byte_length; - repetition_levels_byte_length = other150.repetition_levels_byte_length; - is_compressed = other150.is_compressed; - statistics = other150.statistics; - __isset = other150.__isset; -} -DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other151) noexcept { - num_values = other151.num_values; - num_nulls = other151.num_nulls; - num_rows = other151.num_rows; - encoding = other151.encoding; - definition_levels_byte_length = other151.definition_levels_byte_length; - repetition_levels_byte_length = other151.repetition_levels_byte_length; - is_compressed = other151.is_compressed; - statistics = std::move(other151.statistics); - __isset = other151.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other152) { - num_values = other152.num_values; - num_nulls = other152.num_nulls; - num_rows = other152.num_rows; - encoding = other152.encoding; - definition_levels_byte_length = other152.definition_levels_byte_length; - repetition_levels_byte_length = other152.repetition_levels_byte_length; - is_compressed = other152.is_compressed; - statistics = other152.statistics; - __isset = other152.__isset; +DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other140) { + num_values = other140.num_values; + num_nulls = other140.num_nulls; + num_rows = other140.num_rows; + encoding = other140.encoding; + definition_levels_byte_length = other140.definition_levels_byte_length; + repetition_levels_byte_length = other140.repetition_levels_byte_length; + is_compressed = other140.is_compressed; + statistics = other140.statistics; + __isset = other140.__isset; +} +DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other141) noexcept { + num_values = other141.num_values; + num_nulls = other141.num_nulls; + num_rows = other141.num_rows; + encoding = other141.encoding; + definition_levels_byte_length = other141.definition_levels_byte_length; + repetition_levels_byte_length = other141.repetition_levels_byte_length; + is_compressed = other141.is_compressed; + statistics = std::move(other141.statistics); + __isset = other141.__isset; +} +DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other142) { + num_values = other142.num_values; + num_nulls = other142.num_nulls; + num_rows = other142.num_rows; + encoding = other142.encoding; + definition_levels_byte_length = other142.definition_levels_byte_length; + repetition_levels_byte_length = other142.repetition_levels_byte_length; + is_compressed = other142.is_compressed; + statistics = other142.statistics; + __isset = other142.__isset; return *this; } -DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other153) noexcept { - num_values = other153.num_values; - num_nulls = other153.num_nulls; - num_rows = other153.num_rows; - encoding = other153.encoding; - definition_levels_byte_length = other153.definition_levels_byte_length; - repetition_levels_byte_length = other153.repetition_levels_byte_length; - is_compressed = other153.is_compressed; - statistics = std::move(other153.statistics); - __isset = other153.__isset; +DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other143) noexcept { + num_values = other143.num_values; + num_nulls = other143.num_nulls; + num_rows = other143.num_rows; + encoding = other143.encoding; + definition_levels_byte_length = other143.definition_levels_byte_length; + repetition_levels_byte_length = other143.repetition_levels_byte_length; + is_compressed = other143.is_compressed; + statistics = std::move(other143.statistics); + __isset = other143.__isset; return *this; } void DataPageHeaderV2::printTo(std::ostream& out) const { @@ -2667,18 +2597,18 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { (void) b; } -SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other154) noexcept { - (void) other154; +SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other144) noexcept { + (void) other144; } -SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other155) noexcept { - (void) other155; +SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other145) noexcept { + (void) other145; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other156) noexcept { - (void) other156; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other146) noexcept { + (void) other146; return *this; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other157) noexcept { - (void) other157; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other147) noexcept { + (void) other147; return *this; } void SplitBlockAlgorithm::printTo(std::ostream& out) const { @@ -2709,22 +2639,22 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { swap(a.__isset, b.__isset); } -BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other158) noexcept { - BLOCK = other158.BLOCK; - __isset = other158.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other148) noexcept { + BLOCK = other148.BLOCK; + __isset = other148.__isset; } -BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other159) noexcept { - BLOCK = std::move(other159.BLOCK); - __isset = other159.__isset; +BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other149) noexcept { + BLOCK = std::move(other149.BLOCK); + __isset = other149.__isset; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other160) noexcept { - BLOCK = other160.BLOCK; - __isset = other160.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other150) noexcept { + BLOCK = other150.BLOCK; + __isset = other150.__isset; return *this; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other161) noexcept { - BLOCK = std::move(other161.BLOCK); - __isset = other161.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other151) noexcept { + BLOCK = std::move(other151.BLOCK); + __isset = other151.__isset; return *this; } void BloomFilterAlgorithm::printTo(std::ostream& out) const { @@ -2751,18 +2681,18 @@ void swap(XxHash &a, XxHash &b) { (void) b; } -XxHash::XxHash(const XxHash& other162) noexcept { - (void) other162; +XxHash::XxHash(const XxHash& other152) noexcept { + (void) other152; } -XxHash::XxHash(XxHash&& other163) noexcept { - (void) other163; +XxHash::XxHash(XxHash&& other153) noexcept { + (void) other153; } -XxHash& XxHash::operator=(const XxHash& other164) noexcept { - (void) other164; +XxHash& XxHash::operator=(const XxHash& other154) noexcept { + (void) other154; return *this; } -XxHash& XxHash::operator=(XxHash&& other165) noexcept { - (void) other165; +XxHash& XxHash::operator=(XxHash&& other155) noexcept { + (void) other155; return *this; } void XxHash::printTo(std::ostream& out) const { @@ -2793,22 +2723,22 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) { swap(a.__isset, b.__isset); } -BloomFilterHash::BloomFilterHash(const BloomFilterHash& other166) noexcept { - XXHASH = other166.XXHASH; - __isset = other166.__isset; +BloomFilterHash::BloomFilterHash(const BloomFilterHash& other156) noexcept { + XXHASH = other156.XXHASH; + __isset = other156.__isset; } -BloomFilterHash::BloomFilterHash(BloomFilterHash&& other167) noexcept { - XXHASH = std::move(other167.XXHASH); - __isset = other167.__isset; +BloomFilterHash::BloomFilterHash(BloomFilterHash&& other157) noexcept { + XXHASH = std::move(other157.XXHASH); + __isset = other157.__isset; } -BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other168) noexcept { - XXHASH = other168.XXHASH; - __isset = other168.__isset; +BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other158) noexcept { + XXHASH = other158.XXHASH; + __isset = other158.__isset; return *this; } -BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other169) noexcept { - XXHASH = std::move(other169.XXHASH); - __isset = other169.__isset; +BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other159) noexcept { + XXHASH = std::move(other159.XXHASH); + __isset = other159.__isset; return *this; } void BloomFilterHash::printTo(std::ostream& out) const { @@ -2835,18 +2765,18 @@ void swap(Uncompressed &a, Uncompressed &b) { (void) b; } -Uncompressed::Uncompressed(const Uncompressed& other170) noexcept { - (void) other170; +Uncompressed::Uncompressed(const Uncompressed& other160) noexcept { + (void) other160; } -Uncompressed::Uncompressed(Uncompressed&& other171) noexcept { - (void) other171; +Uncompressed::Uncompressed(Uncompressed&& other161) noexcept { + (void) other161; } -Uncompressed& Uncompressed::operator=(const Uncompressed& other172) noexcept { - (void) other172; +Uncompressed& Uncompressed::operator=(const Uncompressed& other162) noexcept { + (void) other162; return *this; } -Uncompressed& Uncompressed::operator=(Uncompressed&& other173) noexcept { - (void) other173; +Uncompressed& Uncompressed::operator=(Uncompressed&& other163) noexcept { + (void) other163; return *this; } void Uncompressed::printTo(std::ostream& out) const { @@ -2877,22 +2807,22 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) { swap(a.__isset, b.__isset); } -BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other174) noexcept { - UNCOMPRESSED = other174.UNCOMPRESSED; - __isset = other174.__isset; +BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other164) noexcept { + UNCOMPRESSED = other164.UNCOMPRESSED; + __isset = other164.__isset; } -BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other175) noexcept { - UNCOMPRESSED = std::move(other175.UNCOMPRESSED); - __isset = other175.__isset; +BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other165) noexcept { + UNCOMPRESSED = std::move(other165.UNCOMPRESSED); + __isset = other165.__isset; } -BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other176) noexcept { - UNCOMPRESSED = other176.UNCOMPRESSED; - __isset = other176.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other166) noexcept { + UNCOMPRESSED = other166.UNCOMPRESSED; + __isset = other166.__isset; return *this; } -BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other177) noexcept { - UNCOMPRESSED = std::move(other177.UNCOMPRESSED); - __isset = other177.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other167) noexcept { + UNCOMPRESSED = std::move(other167.UNCOMPRESSED); + __isset = other167.__isset; return *this; } void BloomFilterCompression::printTo(std::ostream& out) const { @@ -2937,30 +2867,30 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) { swap(a.compression, b.compression); } -BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other178) noexcept { - numBytes = other178.numBytes; - algorithm = other178.algorithm; - hash = other178.hash; - compression = other178.compression; +BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other168) noexcept { + numBytes = other168.numBytes; + algorithm = other168.algorithm; + hash = other168.hash; + compression = other168.compression; } -BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other179) noexcept { - numBytes = other179.numBytes; - algorithm = std::move(other179.algorithm); - hash = std::move(other179.hash); - compression = std::move(other179.compression); +BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other169) noexcept { + numBytes = other169.numBytes; + algorithm = std::move(other169.algorithm); + hash = std::move(other169.hash); + compression = std::move(other169.compression); } -BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other180) noexcept { - numBytes = other180.numBytes; - algorithm = other180.algorithm; - hash = other180.hash; - compression = other180.compression; +BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other170) noexcept { + numBytes = other170.numBytes; + algorithm = other170.algorithm; + hash = other170.hash; + compression = other170.compression; return *this; } -BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other181) noexcept { - numBytes = other181.numBytes; - algorithm = std::move(other181.algorithm); - hash = std::move(other181.hash); - compression = std::move(other181.compression); +BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other171) noexcept { + numBytes = other171.numBytes; + algorithm = std::move(other171.algorithm); + hash = std::move(other171.hash); + compression = std::move(other171.compression); return *this; } void BloomFilterHeader::printTo(std::ostream& out) const { @@ -3034,50 +2964,50 @@ void swap(PageHeader &a, PageHeader &b) { swap(a.__isset, b.__isset); } -PageHeader::PageHeader(const PageHeader& other183) { - type = other183.type; - uncompressed_page_size = other183.uncompressed_page_size; - compressed_page_size = other183.compressed_page_size; - crc = other183.crc; - data_page_header = other183.data_page_header; - index_page_header = other183.index_page_header; - dictionary_page_header = other183.dictionary_page_header; - data_page_header_v2 = other183.data_page_header_v2; - __isset = other183.__isset; -} -PageHeader::PageHeader(PageHeader&& other184) noexcept { - type = other184.type; - uncompressed_page_size = other184.uncompressed_page_size; - compressed_page_size = other184.compressed_page_size; - crc = other184.crc; - data_page_header = std::move(other184.data_page_header); - index_page_header = std::move(other184.index_page_header); - dictionary_page_header = std::move(other184.dictionary_page_header); - data_page_header_v2 = std::move(other184.data_page_header_v2); - __isset = other184.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other185) { - type = other185.type; - uncompressed_page_size = other185.uncompressed_page_size; - compressed_page_size = other185.compressed_page_size; - crc = other185.crc; - data_page_header = other185.data_page_header; - index_page_header = other185.index_page_header; - dictionary_page_header = other185.dictionary_page_header; - data_page_header_v2 = other185.data_page_header_v2; - __isset = other185.__isset; +PageHeader::PageHeader(const PageHeader& other173) { + type = other173.type; + uncompressed_page_size = other173.uncompressed_page_size; + compressed_page_size = other173.compressed_page_size; + crc = other173.crc; + data_page_header = other173.data_page_header; + index_page_header = other173.index_page_header; + dictionary_page_header = other173.dictionary_page_header; + data_page_header_v2 = other173.data_page_header_v2; + __isset = other173.__isset; +} +PageHeader::PageHeader(PageHeader&& other174) noexcept { + type = other174.type; + uncompressed_page_size = other174.uncompressed_page_size; + compressed_page_size = other174.compressed_page_size; + crc = other174.crc; + data_page_header = std::move(other174.data_page_header); + index_page_header = std::move(other174.index_page_header); + dictionary_page_header = std::move(other174.dictionary_page_header); + data_page_header_v2 = std::move(other174.data_page_header_v2); + __isset = other174.__isset; +} +PageHeader& PageHeader::operator=(const PageHeader& other175) { + type = other175.type; + uncompressed_page_size = other175.uncompressed_page_size; + compressed_page_size = other175.compressed_page_size; + crc = other175.crc; + data_page_header = other175.data_page_header; + index_page_header = other175.index_page_header; + dictionary_page_header = other175.dictionary_page_header; + data_page_header_v2 = other175.data_page_header_v2; + __isset = other175.__isset; return *this; } -PageHeader& PageHeader::operator=(PageHeader&& other186) noexcept { - type = other186.type; - uncompressed_page_size = other186.uncompressed_page_size; - compressed_page_size = other186.compressed_page_size; - crc = other186.crc; - data_page_header = std::move(other186.data_page_header); - index_page_header = std::move(other186.index_page_header); - dictionary_page_header = std::move(other186.dictionary_page_header); - data_page_header_v2 = std::move(other186.data_page_header_v2); - __isset = other186.__isset; +PageHeader& PageHeader::operator=(PageHeader&& other176) noexcept { + type = other176.type; + uncompressed_page_size = other176.uncompressed_page_size; + compressed_page_size = other176.compressed_page_size; + crc = other176.crc; + data_page_header = std::move(other176.data_page_header); + index_page_header = std::move(other176.index_page_header); + dictionary_page_header = std::move(other176.dictionary_page_header); + data_page_header_v2 = std::move(other176.data_page_header_v2); + __isset = other176.__isset; return *this; } void PageHeader::printTo(std::ostream& out) const { @@ -3121,26 +3051,26 @@ void swap(KeyValue &a, KeyValue &b) { swap(a.__isset, b.__isset); } -KeyValue::KeyValue(const KeyValue& other187) { - key = other187.key; - value = other187.value; - __isset = other187.__isset; +KeyValue::KeyValue(const KeyValue& other177) { + key = other177.key; + value = other177.value; + __isset = other177.__isset; } -KeyValue::KeyValue(KeyValue&& other188) noexcept { - key = std::move(other188.key); - value = std::move(other188.value); - __isset = other188.__isset; +KeyValue::KeyValue(KeyValue&& other178) noexcept { + key = std::move(other178.key); + value = std::move(other178.value); + __isset = other178.__isset; } -KeyValue& KeyValue::operator=(const KeyValue& other189) { - key = other189.key; - value = other189.value; - __isset = other189.__isset; +KeyValue& KeyValue::operator=(const KeyValue& other179) { + key = other179.key; + value = other179.value; + __isset = other179.__isset; return *this; } -KeyValue& KeyValue::operator=(KeyValue&& other190) noexcept { - key = std::move(other190.key); - value = std::move(other190.value); - __isset = other190.__isset; +KeyValue& KeyValue::operator=(KeyValue&& other180) noexcept { + key = std::move(other180.key); + value = std::move(other180.value); + __isset = other180.__isset; return *this; } void KeyValue::printTo(std::ostream& out) const { @@ -3181,26 +3111,26 @@ void swap(SortingColumn &a, SortingColumn &b) { swap(a.nulls_first, b.nulls_first); } -SortingColumn::SortingColumn(const SortingColumn& other191) noexcept { - column_idx = other191.column_idx; - descending = other191.descending; - nulls_first = other191.nulls_first; +SortingColumn::SortingColumn(const SortingColumn& other181) noexcept { + column_idx = other181.column_idx; + descending = other181.descending; + nulls_first = other181.nulls_first; } -SortingColumn::SortingColumn(SortingColumn&& other192) noexcept { - column_idx = other192.column_idx; - descending = other192.descending; - nulls_first = other192.nulls_first; +SortingColumn::SortingColumn(SortingColumn&& other182) noexcept { + column_idx = other182.column_idx; + descending = other182.descending; + nulls_first = other182.nulls_first; } -SortingColumn& SortingColumn::operator=(const SortingColumn& other193) noexcept { - column_idx = other193.column_idx; - descending = other193.descending; - nulls_first = other193.nulls_first; +SortingColumn& SortingColumn::operator=(const SortingColumn& other183) noexcept { + column_idx = other183.column_idx; + descending = other183.descending; + nulls_first = other183.nulls_first; return *this; } -SortingColumn& SortingColumn::operator=(SortingColumn&& other194) noexcept { - column_idx = other194.column_idx; - descending = other194.descending; - nulls_first = other194.nulls_first; +SortingColumn& SortingColumn::operator=(SortingColumn&& other184) noexcept { + column_idx = other184.column_idx; + descending = other184.descending; + nulls_first = other184.nulls_first; return *this; } void SortingColumn::printTo(std::ostream& out) const { @@ -3242,26 +3172,26 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) { swap(a.count, b.count); } -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other197) noexcept { - page_type = other197.page_type; - encoding = other197.encoding; - count = other197.count; +PageEncodingStats::PageEncodingStats(const PageEncodingStats& other187) noexcept { + page_type = other187.page_type; + encoding = other187.encoding; + count = other187.count; } -PageEncodingStats::PageEncodingStats(PageEncodingStats&& other198) noexcept { - page_type = other198.page_type; - encoding = other198.encoding; - count = other198.count; +PageEncodingStats::PageEncodingStats(PageEncodingStats&& other188) noexcept { + page_type = other188.page_type; + encoding = other188.encoding; + count = other188.count; } -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other199) noexcept { - page_type = other199.page_type; - encoding = other199.encoding; - count = other199.count; +PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other189) noexcept { + page_type = other189.page_type; + encoding = other189.encoding; + count = other189.count; return *this; } -PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other200) noexcept { - page_type = other200.page_type; - encoding = other200.encoding; - count = other200.count; +PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other190) noexcept { + page_type = other190.page_type; + encoding = other190.encoding; + count = other190.count; return *this; } void PageEncodingStats::printTo(std::ostream& out) const { @@ -3377,82 +3307,82 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) { swap(a.__isset, b.__isset); } -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other228) { - type = other228.type; - encodings = other228.encodings; - path_in_schema = other228.path_in_schema; - codec = other228.codec; - num_values = other228.num_values; - total_uncompressed_size = other228.total_uncompressed_size; - total_compressed_size = other228.total_compressed_size; - key_value_metadata = other228.key_value_metadata; - data_page_offset = other228.data_page_offset; - index_page_offset = other228.index_page_offset; - dictionary_page_offset = other228.dictionary_page_offset; - statistics = other228.statistics; - encoding_stats = other228.encoding_stats; - bloom_filter_offset = other228.bloom_filter_offset; - bloom_filter_length = other228.bloom_filter_length; - size_statistics = other228.size_statistics; - __isset = other228.__isset; -} -ColumnMetaData::ColumnMetaData(ColumnMetaData&& other229) noexcept { - type = other229.type; - encodings = std::move(other229.encodings); - path_in_schema = std::move(other229.path_in_schema); - codec = other229.codec; - num_values = other229.num_values; - total_uncompressed_size = other229.total_uncompressed_size; - total_compressed_size = other229.total_compressed_size; - key_value_metadata = std::move(other229.key_value_metadata); - data_page_offset = other229.data_page_offset; - index_page_offset = other229.index_page_offset; - dictionary_page_offset = other229.dictionary_page_offset; - statistics = std::move(other229.statistics); - encoding_stats = std::move(other229.encoding_stats); - bloom_filter_offset = other229.bloom_filter_offset; - bloom_filter_length = other229.bloom_filter_length; - size_statistics = std::move(other229.size_statistics); - __isset = other229.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other230) { - type = other230.type; - encodings = other230.encodings; - path_in_schema = other230.path_in_schema; - codec = other230.codec; - num_values = other230.num_values; - total_uncompressed_size = other230.total_uncompressed_size; - total_compressed_size = other230.total_compressed_size; - key_value_metadata = other230.key_value_metadata; - data_page_offset = other230.data_page_offset; - index_page_offset = other230.index_page_offset; - dictionary_page_offset = other230.dictionary_page_offset; - statistics = other230.statistics; - encoding_stats = other230.encoding_stats; - bloom_filter_offset = other230.bloom_filter_offset; - bloom_filter_length = other230.bloom_filter_length; - size_statistics = other230.size_statistics; - __isset = other230.__isset; +ColumnMetaData::ColumnMetaData(const ColumnMetaData& other218) { + type = other218.type; + encodings = other218.encodings; + path_in_schema = other218.path_in_schema; + codec = other218.codec; + num_values = other218.num_values; + total_uncompressed_size = other218.total_uncompressed_size; + total_compressed_size = other218.total_compressed_size; + key_value_metadata = other218.key_value_metadata; + data_page_offset = other218.data_page_offset; + index_page_offset = other218.index_page_offset; + dictionary_page_offset = other218.dictionary_page_offset; + statistics = other218.statistics; + encoding_stats = other218.encoding_stats; + bloom_filter_offset = other218.bloom_filter_offset; + bloom_filter_length = other218.bloom_filter_length; + size_statistics = other218.size_statistics; + __isset = other218.__isset; +} +ColumnMetaData::ColumnMetaData(ColumnMetaData&& other219) noexcept { + type = other219.type; + encodings = std::move(other219.encodings); + path_in_schema = std::move(other219.path_in_schema); + codec = other219.codec; + num_values = other219.num_values; + total_uncompressed_size = other219.total_uncompressed_size; + total_compressed_size = other219.total_compressed_size; + key_value_metadata = std::move(other219.key_value_metadata); + data_page_offset = other219.data_page_offset; + index_page_offset = other219.index_page_offset; + dictionary_page_offset = other219.dictionary_page_offset; + statistics = std::move(other219.statistics); + encoding_stats = std::move(other219.encoding_stats); + bloom_filter_offset = other219.bloom_filter_offset; + bloom_filter_length = other219.bloom_filter_length; + size_statistics = std::move(other219.size_statistics); + __isset = other219.__isset; +} +ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other220) { + type = other220.type; + encodings = other220.encodings; + path_in_schema = other220.path_in_schema; + codec = other220.codec; + num_values = other220.num_values; + total_uncompressed_size = other220.total_uncompressed_size; + total_compressed_size = other220.total_compressed_size; + key_value_metadata = other220.key_value_metadata; + data_page_offset = other220.data_page_offset; + index_page_offset = other220.index_page_offset; + dictionary_page_offset = other220.dictionary_page_offset; + statistics = other220.statistics; + encoding_stats = other220.encoding_stats; + bloom_filter_offset = other220.bloom_filter_offset; + bloom_filter_length = other220.bloom_filter_length; + size_statistics = other220.size_statistics; + __isset = other220.__isset; return *this; } -ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other231) noexcept { - type = other231.type; - encodings = std::move(other231.encodings); - path_in_schema = std::move(other231.path_in_schema); - codec = other231.codec; - num_values = other231.num_values; - total_uncompressed_size = other231.total_uncompressed_size; - total_compressed_size = other231.total_compressed_size; - key_value_metadata = std::move(other231.key_value_metadata); - data_page_offset = other231.data_page_offset; - index_page_offset = other231.index_page_offset; - dictionary_page_offset = other231.dictionary_page_offset; - statistics = std::move(other231.statistics); - encoding_stats = std::move(other231.encoding_stats); - bloom_filter_offset = other231.bloom_filter_offset; - bloom_filter_length = other231.bloom_filter_length; - size_statistics = std::move(other231.size_statistics); - __isset = other231.__isset; +ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other221) noexcept { + type = other221.type; + encodings = std::move(other221.encodings); + path_in_schema = std::move(other221.path_in_schema); + codec = other221.codec; + num_values = other221.num_values; + total_uncompressed_size = other221.total_uncompressed_size; + total_compressed_size = other221.total_compressed_size; + key_value_metadata = std::move(other221.key_value_metadata); + data_page_offset = other221.data_page_offset; + index_page_offset = other221.index_page_offset; + dictionary_page_offset = other221.dictionary_page_offset; + statistics = std::move(other221.statistics); + encoding_stats = std::move(other221.encoding_stats); + bloom_filter_offset = other221.bloom_filter_offset; + bloom_filter_length = other221.bloom_filter_length; + size_statistics = std::move(other221.size_statistics); + __isset = other221.__isset; return *this; } void ColumnMetaData::printTo(std::ostream& out) const { @@ -3494,18 +3424,18 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { (void) b; } -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other232) noexcept { - (void) other232; +EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other222) noexcept { + (void) other222; } -EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other233) noexcept { - (void) other233; +EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other223) noexcept { + (void) other223; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other234) noexcept { - (void) other234; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other224) noexcept { + (void) other224; return *this; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other235) noexcept { - (void) other235; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other225) noexcept { + (void) other225; return *this; } void EncryptionWithFooterKey::printTo(std::ostream& out) const { @@ -3541,26 +3471,26 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { swap(a.__isset, b.__isset); } -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other242) { - path_in_schema = other242.path_in_schema; - key_metadata = other242.key_metadata; - __isset = other242.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other232) { + path_in_schema = other232.path_in_schema; + key_metadata = other232.key_metadata; + __isset = other232.__isset; } -EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other243) noexcept { - path_in_schema = std::move(other243.path_in_schema); - key_metadata = std::move(other243.key_metadata); - __isset = other243.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other233) noexcept { + path_in_schema = std::move(other233.path_in_schema); + key_metadata = std::move(other233.key_metadata); + __isset = other233.__isset; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other244) { - path_in_schema = other244.path_in_schema; - key_metadata = other244.key_metadata; - __isset = other244.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other234) { + path_in_schema = other234.path_in_schema; + key_metadata = other234.key_metadata; + __isset = other234.__isset; return *this; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other245) noexcept { - path_in_schema = std::move(other245.path_in_schema); - key_metadata = std::move(other245.key_metadata); - __isset = other245.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other235) noexcept { + path_in_schema = std::move(other235.path_in_schema); + key_metadata = std::move(other235.key_metadata); + __isset = other235.__isset; return *this; } void EncryptionWithColumnKey::printTo(std::ostream& out) const { @@ -3599,26 +3529,26 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { swap(a.__isset, b.__isset); } -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other246) { - ENCRYPTION_WITH_FOOTER_KEY = other246.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other246.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other246.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other236) { + ENCRYPTION_WITH_FOOTER_KEY = other236.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other236.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other236.__isset; } -ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other247) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other247.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other247.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other247.__isset; +ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other237) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other237.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other237.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other237.__isset; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other248) { - ENCRYPTION_WITH_FOOTER_KEY = other248.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other248.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other248.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other238) { + ENCRYPTION_WITH_FOOTER_KEY = other238.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other238.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other238.__isset; return *this; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other249) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other249.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other249.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other249.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other239) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other239.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other239.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other239.__isset; return *this; } void ColumnCryptoMetaData::printTo(std::ostream& out) const { @@ -3698,54 +3628,54 @@ void swap(ColumnChunk &a, ColumnChunk &b) { swap(a.__isset, b.__isset); } -ColumnChunk::ColumnChunk(const ColumnChunk& other250) { - file_path = other250.file_path; - file_offset = other250.file_offset; - meta_data = other250.meta_data; - offset_index_offset = other250.offset_index_offset; - offset_index_length = other250.offset_index_length; - column_index_offset = other250.column_index_offset; - column_index_length = other250.column_index_length; - crypto_metadata = other250.crypto_metadata; - encrypted_column_metadata = other250.encrypted_column_metadata; - __isset = other250.__isset; -} -ColumnChunk::ColumnChunk(ColumnChunk&& other251) noexcept { - file_path = std::move(other251.file_path); - file_offset = other251.file_offset; - meta_data = std::move(other251.meta_data); - offset_index_offset = other251.offset_index_offset; - offset_index_length = other251.offset_index_length; - column_index_offset = other251.column_index_offset; - column_index_length = other251.column_index_length; - crypto_metadata = std::move(other251.crypto_metadata); - encrypted_column_metadata = std::move(other251.encrypted_column_metadata); - __isset = other251.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other252) { - file_path = other252.file_path; - file_offset = other252.file_offset; - meta_data = other252.meta_data; - offset_index_offset = other252.offset_index_offset; - offset_index_length = other252.offset_index_length; - column_index_offset = other252.column_index_offset; - column_index_length = other252.column_index_length; - crypto_metadata = other252.crypto_metadata; - encrypted_column_metadata = other252.encrypted_column_metadata; - __isset = other252.__isset; +ColumnChunk::ColumnChunk(const ColumnChunk& other240) { + file_path = other240.file_path; + file_offset = other240.file_offset; + meta_data = other240.meta_data; + offset_index_offset = other240.offset_index_offset; + offset_index_length = other240.offset_index_length; + column_index_offset = other240.column_index_offset; + column_index_length = other240.column_index_length; + crypto_metadata = other240.crypto_metadata; + encrypted_column_metadata = other240.encrypted_column_metadata; + __isset = other240.__isset; +} +ColumnChunk::ColumnChunk(ColumnChunk&& other241) noexcept { + file_path = std::move(other241.file_path); + file_offset = other241.file_offset; + meta_data = std::move(other241.meta_data); + offset_index_offset = other241.offset_index_offset; + offset_index_length = other241.offset_index_length; + column_index_offset = other241.column_index_offset; + column_index_length = other241.column_index_length; + crypto_metadata = std::move(other241.crypto_metadata); + encrypted_column_metadata = std::move(other241.encrypted_column_metadata); + __isset = other241.__isset; +} +ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other242) { + file_path = other242.file_path; + file_offset = other242.file_offset; + meta_data = other242.meta_data; + offset_index_offset = other242.offset_index_offset; + offset_index_length = other242.offset_index_length; + column_index_offset = other242.column_index_offset; + column_index_length = other242.column_index_length; + crypto_metadata = other242.crypto_metadata; + encrypted_column_metadata = other242.encrypted_column_metadata; + __isset = other242.__isset; return *this; } -ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other253) noexcept { - file_path = std::move(other253.file_path); - file_offset = other253.file_offset; - meta_data = std::move(other253.meta_data); - offset_index_offset = other253.offset_index_offset; - offset_index_length = other253.offset_index_length; - column_index_offset = other253.column_index_offset; - column_index_length = other253.column_index_length; - crypto_metadata = std::move(other253.crypto_metadata); - encrypted_column_metadata = std::move(other253.encrypted_column_metadata); - __isset = other253.__isset; +ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other243) noexcept { + file_path = std::move(other243.file_path); + file_offset = other243.file_offset; + meta_data = std::move(other243.meta_data); + offset_index_offset = other243.offset_index_offset; + offset_index_length = other243.offset_index_length; + column_index_offset = other243.column_index_offset; + column_index_length = other243.column_index_length; + crypto_metadata = std::move(other243.crypto_metadata); + encrypted_column_metadata = std::move(other243.encrypted_column_metadata); + __isset = other243.__isset; return *this; } void ColumnChunk::printTo(std::ostream& out) const { @@ -3818,46 +3748,46 @@ void swap(RowGroup &a, RowGroup &b) { swap(a.__isset, b.__isset); } -RowGroup::RowGroup(const RowGroup& other266) { - columns = other266.columns; - total_byte_size = other266.total_byte_size; - num_rows = other266.num_rows; - sorting_columns = other266.sorting_columns; - file_offset = other266.file_offset; - total_compressed_size = other266.total_compressed_size; - ordinal = other266.ordinal; - __isset = other266.__isset; -} -RowGroup::RowGroup(RowGroup&& other267) noexcept { - columns = std::move(other267.columns); - total_byte_size = other267.total_byte_size; - num_rows = other267.num_rows; - sorting_columns = std::move(other267.sorting_columns); - file_offset = other267.file_offset; - total_compressed_size = other267.total_compressed_size; - ordinal = other267.ordinal; - __isset = other267.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other268) { - columns = other268.columns; - total_byte_size = other268.total_byte_size; - num_rows = other268.num_rows; - sorting_columns = other268.sorting_columns; - file_offset = other268.file_offset; - total_compressed_size = other268.total_compressed_size; - ordinal = other268.ordinal; - __isset = other268.__isset; +RowGroup::RowGroup(const RowGroup& other256) { + columns = other256.columns; + total_byte_size = other256.total_byte_size; + num_rows = other256.num_rows; + sorting_columns = other256.sorting_columns; + file_offset = other256.file_offset; + total_compressed_size = other256.total_compressed_size; + ordinal = other256.ordinal; + __isset = other256.__isset; +} +RowGroup::RowGroup(RowGroup&& other257) noexcept { + columns = std::move(other257.columns); + total_byte_size = other257.total_byte_size; + num_rows = other257.num_rows; + sorting_columns = std::move(other257.sorting_columns); + file_offset = other257.file_offset; + total_compressed_size = other257.total_compressed_size; + ordinal = other257.ordinal; + __isset = other257.__isset; +} +RowGroup& RowGroup::operator=(const RowGroup& other258) { + columns = other258.columns; + total_byte_size = other258.total_byte_size; + num_rows = other258.num_rows; + sorting_columns = other258.sorting_columns; + file_offset = other258.file_offset; + total_compressed_size = other258.total_compressed_size; + ordinal = other258.ordinal; + __isset = other258.__isset; return *this; } -RowGroup& RowGroup::operator=(RowGroup&& other269) noexcept { - columns = std::move(other269.columns); - total_byte_size = other269.total_byte_size; - num_rows = other269.num_rows; - sorting_columns = std::move(other269.sorting_columns); - file_offset = other269.file_offset; - total_compressed_size = other269.total_compressed_size; - ordinal = other269.ordinal; - __isset = other269.__isset; +RowGroup& RowGroup::operator=(RowGroup&& other259) noexcept { + columns = std::move(other259.columns); + total_byte_size = other259.total_byte_size; + num_rows = other259.num_rows; + sorting_columns = std::move(other259.sorting_columns); + file_offset = other259.file_offset; + total_compressed_size = other259.total_compressed_size; + ordinal = other259.ordinal; + __isset = other259.__isset; return *this; } void RowGroup::printTo(std::ostream& out) const { @@ -3890,18 +3820,18 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { (void) b; } -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other270) noexcept { - (void) other270; +TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other260) noexcept { + (void) other260; } -TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other271) noexcept { - (void) other271; +TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other261) noexcept { + (void) other261; } -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other272) noexcept { - (void) other272; +TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other262) noexcept { + (void) other262; return *this; } -TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other273) noexcept { - (void) other273; +TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other263) noexcept { + (void) other263; return *this; } void TypeDefinedOrder::printTo(std::ostream& out) const { @@ -3932,22 +3862,22 @@ void swap(ColumnOrder &a, ColumnOrder &b) { swap(a.__isset, b.__isset); } -ColumnOrder::ColumnOrder(const ColumnOrder& other274) noexcept { - TYPE_ORDER = other274.TYPE_ORDER; - __isset = other274.__isset; +ColumnOrder::ColumnOrder(const ColumnOrder& other264) noexcept { + TYPE_ORDER = other264.TYPE_ORDER; + __isset = other264.__isset; } -ColumnOrder::ColumnOrder(ColumnOrder&& other275) noexcept { - TYPE_ORDER = std::move(other275.TYPE_ORDER); - __isset = other275.__isset; +ColumnOrder::ColumnOrder(ColumnOrder&& other265) noexcept { + TYPE_ORDER = std::move(other265.TYPE_ORDER); + __isset = other265.__isset; } -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other276) noexcept { - TYPE_ORDER = other276.TYPE_ORDER; - __isset = other276.__isset; +ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other266) noexcept { + TYPE_ORDER = other266.TYPE_ORDER; + __isset = other266.__isset; return *this; } -ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other277) noexcept { - TYPE_ORDER = std::move(other277.TYPE_ORDER); - __isset = other277.__isset; +ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other267) noexcept { + TYPE_ORDER = std::move(other267.TYPE_ORDER); + __isset = other267.__isset; return *this; } void ColumnOrder::printTo(std::ostream& out) const { @@ -3987,26 +3917,26 @@ void swap(PageLocation &a, PageLocation &b) { swap(a.first_row_index, b.first_row_index); } -PageLocation::PageLocation(const PageLocation& other278) noexcept { - offset = other278.offset; - compressed_page_size = other278.compressed_page_size; - first_row_index = other278.first_row_index; +PageLocation::PageLocation(const PageLocation& other268) noexcept { + offset = other268.offset; + compressed_page_size = other268.compressed_page_size; + first_row_index = other268.first_row_index; } -PageLocation::PageLocation(PageLocation&& other279) noexcept { - offset = other279.offset; - compressed_page_size = other279.compressed_page_size; - first_row_index = other279.first_row_index; +PageLocation::PageLocation(PageLocation&& other269) noexcept { + offset = other269.offset; + compressed_page_size = other269.compressed_page_size; + first_row_index = other269.first_row_index; } -PageLocation& PageLocation::operator=(const PageLocation& other280) noexcept { - offset = other280.offset; - compressed_page_size = other280.compressed_page_size; - first_row_index = other280.first_row_index; +PageLocation& PageLocation::operator=(const PageLocation& other270) noexcept { + offset = other270.offset; + compressed_page_size = other270.compressed_page_size; + first_row_index = other270.first_row_index; return *this; } -PageLocation& PageLocation::operator=(PageLocation&& other281) noexcept { - offset = other281.offset; - compressed_page_size = other281.compressed_page_size; - first_row_index = other281.first_row_index; +PageLocation& PageLocation::operator=(PageLocation&& other271) noexcept { + offset = other271.offset; + compressed_page_size = other271.compressed_page_size; + first_row_index = other271.first_row_index; return *this; } void PageLocation::printTo(std::ostream& out) const { @@ -4045,26 +3975,26 @@ void swap(OffsetIndex &a, OffsetIndex &b) { swap(a.__isset, b.__isset); } -OffsetIndex::OffsetIndex(const OffsetIndex& other294) { - page_locations = other294.page_locations; - unencoded_byte_array_data_bytes = other294.unencoded_byte_array_data_bytes; - __isset = other294.__isset; +OffsetIndex::OffsetIndex(const OffsetIndex& other284) { + page_locations = other284.page_locations; + unencoded_byte_array_data_bytes = other284.unencoded_byte_array_data_bytes; + __isset = other284.__isset; } -OffsetIndex::OffsetIndex(OffsetIndex&& other295) noexcept { - page_locations = std::move(other295.page_locations); - unencoded_byte_array_data_bytes = std::move(other295.unencoded_byte_array_data_bytes); - __isset = other295.__isset; +OffsetIndex::OffsetIndex(OffsetIndex&& other285) noexcept { + page_locations = std::move(other285.page_locations); + unencoded_byte_array_data_bytes = std::move(other285.unencoded_byte_array_data_bytes); + __isset = other285.__isset; } -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other296) { - page_locations = other296.page_locations; - unencoded_byte_array_data_bytes = other296.unencoded_byte_array_data_bytes; - __isset = other296.__isset; +OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other286) { + page_locations = other286.page_locations; + unencoded_byte_array_data_bytes = other286.unencoded_byte_array_data_bytes; + __isset = other286.__isset; return *this; } -OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other297) noexcept { - page_locations = std::move(other297.page_locations); - unencoded_byte_array_data_bytes = std::move(other297.unencoded_byte_array_data_bytes); - __isset = other297.__isset; +OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other287) noexcept { + page_locations = std::move(other287.page_locations); + unencoded_byte_array_data_bytes = std::move(other287.unencoded_byte_array_data_bytes); + __isset = other287.__isset; return *this; } void OffsetIndex::printTo(std::ostream& out) const { @@ -4135,50 +4065,50 @@ void swap(ColumnIndex &a, ColumnIndex &b) { swap(a.__isset, b.__isset); } -ColumnIndex::ColumnIndex(const ColumnIndex& other341) { - null_pages = other341.null_pages; - min_values = other341.min_values; - max_values = other341.max_values; - boundary_order = other341.boundary_order; - null_counts = other341.null_counts; - repetition_level_histograms = other341.repetition_level_histograms; - definition_level_histograms = other341.definition_level_histograms; - geometry_stats = other341.geometry_stats; - __isset = other341.__isset; -} -ColumnIndex::ColumnIndex(ColumnIndex&& other342) noexcept { - null_pages = std::move(other342.null_pages); - min_values = std::move(other342.min_values); - max_values = std::move(other342.max_values); - boundary_order = other342.boundary_order; - null_counts = std::move(other342.null_counts); - repetition_level_histograms = std::move(other342.repetition_level_histograms); - definition_level_histograms = std::move(other342.definition_level_histograms); - geometry_stats = std::move(other342.geometry_stats); - __isset = other342.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other343) { - null_pages = other343.null_pages; - min_values = other343.min_values; - max_values = other343.max_values; - boundary_order = other343.boundary_order; - null_counts = other343.null_counts; - repetition_level_histograms = other343.repetition_level_histograms; - definition_level_histograms = other343.definition_level_histograms; - geometry_stats = other343.geometry_stats; - __isset = other343.__isset; +ColumnIndex::ColumnIndex(const ColumnIndex& other331) { + null_pages = other331.null_pages; + min_values = other331.min_values; + max_values = other331.max_values; + boundary_order = other331.boundary_order; + null_counts = other331.null_counts; + repetition_level_histograms = other331.repetition_level_histograms; + definition_level_histograms = other331.definition_level_histograms; + geometry_stats = other331.geometry_stats; + __isset = other331.__isset; +} +ColumnIndex::ColumnIndex(ColumnIndex&& other332) noexcept { + null_pages = std::move(other332.null_pages); + min_values = std::move(other332.min_values); + max_values = std::move(other332.max_values); + boundary_order = other332.boundary_order; + null_counts = std::move(other332.null_counts); + repetition_level_histograms = std::move(other332.repetition_level_histograms); + definition_level_histograms = std::move(other332.definition_level_histograms); + geometry_stats = std::move(other332.geometry_stats); + __isset = other332.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other333) { + null_pages = other333.null_pages; + min_values = other333.min_values; + max_values = other333.max_values; + boundary_order = other333.boundary_order; + null_counts = other333.null_counts; + repetition_level_histograms = other333.repetition_level_histograms; + definition_level_histograms = other333.definition_level_histograms; + geometry_stats = other333.geometry_stats; + __isset = other333.__isset; return *this; } -ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other344) noexcept { - null_pages = std::move(other344.null_pages); - min_values = std::move(other344.min_values); - max_values = std::move(other344.max_values); - boundary_order = other344.boundary_order; - null_counts = std::move(other344.null_counts); - repetition_level_histograms = std::move(other344.repetition_level_histograms); - definition_level_histograms = std::move(other344.definition_level_histograms); - geometry_stats = std::move(other344.geometry_stats); - __isset = other344.__isset; +ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other334) noexcept { + null_pages = std::move(other334.null_pages); + min_values = std::move(other334.min_values); + max_values = std::move(other334.max_values); + boundary_order = other334.boundary_order; + null_counts = std::move(other334.null_counts); + repetition_level_histograms = std::move(other334.repetition_level_histograms); + definition_level_histograms = std::move(other334.definition_level_histograms); + geometry_stats = std::move(other334.geometry_stats); + __isset = other334.__isset; return *this; } void ColumnIndex::printTo(std::ostream& out) const { @@ -4229,30 +4159,30 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) { swap(a.__isset, b.__isset); } -AesGcmV1::AesGcmV1(const AesGcmV1& other345) { - aad_prefix = other345.aad_prefix; - aad_file_unique = other345.aad_file_unique; - supply_aad_prefix = other345.supply_aad_prefix; - __isset = other345.__isset; +AesGcmV1::AesGcmV1(const AesGcmV1& other335) { + aad_prefix = other335.aad_prefix; + aad_file_unique = other335.aad_file_unique; + supply_aad_prefix = other335.supply_aad_prefix; + __isset = other335.__isset; } -AesGcmV1::AesGcmV1(AesGcmV1&& other346) noexcept { - aad_prefix = std::move(other346.aad_prefix); - aad_file_unique = std::move(other346.aad_file_unique); - supply_aad_prefix = other346.supply_aad_prefix; - __isset = other346.__isset; +AesGcmV1::AesGcmV1(AesGcmV1&& other336) noexcept { + aad_prefix = std::move(other336.aad_prefix); + aad_file_unique = std::move(other336.aad_file_unique); + supply_aad_prefix = other336.supply_aad_prefix; + __isset = other336.__isset; } -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other347) { - aad_prefix = other347.aad_prefix; - aad_file_unique = other347.aad_file_unique; - supply_aad_prefix = other347.supply_aad_prefix; - __isset = other347.__isset; +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other337) { + aad_prefix = other337.aad_prefix; + aad_file_unique = other337.aad_file_unique; + supply_aad_prefix = other337.supply_aad_prefix; + __isset = other337.__isset; return *this; } -AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other348) noexcept { - aad_prefix = std::move(other348.aad_prefix); - aad_file_unique = std::move(other348.aad_file_unique); - supply_aad_prefix = other348.supply_aad_prefix; - __isset = other348.__isset; +AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other338) noexcept { + aad_prefix = std::move(other338.aad_prefix); + aad_file_unique = std::move(other338.aad_file_unique); + supply_aad_prefix = other338.supply_aad_prefix; + __isset = other338.__isset; return *this; } void AesGcmV1::printTo(std::ostream& out) const { @@ -4298,30 +4228,30 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { swap(a.__isset, b.__isset); } -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other349) { - aad_prefix = other349.aad_prefix; - aad_file_unique = other349.aad_file_unique; - supply_aad_prefix = other349.supply_aad_prefix; - __isset = other349.__isset; +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other339) { + aad_prefix = other339.aad_prefix; + aad_file_unique = other339.aad_file_unique; + supply_aad_prefix = other339.supply_aad_prefix; + __isset = other339.__isset; } -AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other350) noexcept { - aad_prefix = std::move(other350.aad_prefix); - aad_file_unique = std::move(other350.aad_file_unique); - supply_aad_prefix = other350.supply_aad_prefix; - __isset = other350.__isset; +AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other340) noexcept { + aad_prefix = std::move(other340.aad_prefix); + aad_file_unique = std::move(other340.aad_file_unique); + supply_aad_prefix = other340.supply_aad_prefix; + __isset = other340.__isset; } -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other351) { - aad_prefix = other351.aad_prefix; - aad_file_unique = other351.aad_file_unique; - supply_aad_prefix = other351.supply_aad_prefix; - __isset = other351.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other341) { + aad_prefix = other341.aad_prefix; + aad_file_unique = other341.aad_file_unique; + supply_aad_prefix = other341.supply_aad_prefix; + __isset = other341.__isset; return *this; } -AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other352) noexcept { - aad_prefix = std::move(other352.aad_prefix); - aad_file_unique = std::move(other352.aad_file_unique); - supply_aad_prefix = other352.supply_aad_prefix; - __isset = other352.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other342) noexcept { + aad_prefix = std::move(other342.aad_prefix); + aad_file_unique = std::move(other342.aad_file_unique); + supply_aad_prefix = other342.supply_aad_prefix; + __isset = other342.__isset; return *this; } void AesGcmCtrV1::printTo(std::ostream& out) const { @@ -4361,26 +4291,26 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { swap(a.__isset, b.__isset); } -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other353) { - AES_GCM_V1 = other353.AES_GCM_V1; - AES_GCM_CTR_V1 = other353.AES_GCM_CTR_V1; - __isset = other353.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other343) { + AES_GCM_V1 = other343.AES_GCM_V1; + AES_GCM_CTR_V1 = other343.AES_GCM_CTR_V1; + __isset = other343.__isset; } -EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other354) noexcept { - AES_GCM_V1 = std::move(other354.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other354.AES_GCM_CTR_V1); - __isset = other354.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other344) noexcept { + AES_GCM_V1 = std::move(other344.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other344.AES_GCM_CTR_V1); + __isset = other344.__isset; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other355) { - AES_GCM_V1 = other355.AES_GCM_V1; - AES_GCM_CTR_V1 = other355.AES_GCM_CTR_V1; - __isset = other355.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other345) { + AES_GCM_V1 = other345.AES_GCM_V1; + AES_GCM_CTR_V1 = other345.AES_GCM_CTR_V1; + __isset = other345.__isset; return *this; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other356) noexcept { - AES_GCM_V1 = std::move(other356.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other356.AES_GCM_CTR_V1); - __isset = other356.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other346) noexcept { + AES_GCM_V1 = std::move(other346.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other346.AES_GCM_CTR_V1); + __isset = other346.__isset; return *this; } void EncryptionAlgorithm::printTo(std::ostream& out) const { @@ -4457,54 +4387,54 @@ void swap(FileMetaData &a, FileMetaData &b) { swap(a.__isset, b.__isset); } -FileMetaData::FileMetaData(const FileMetaData& other381) { - version = other381.version; - schema = other381.schema; - num_rows = other381.num_rows; - row_groups = other381.row_groups; - key_value_metadata = other381.key_value_metadata; - created_by = other381.created_by; - column_orders = other381.column_orders; - encryption_algorithm = other381.encryption_algorithm; - footer_signing_key_metadata = other381.footer_signing_key_metadata; - __isset = other381.__isset; -} -FileMetaData::FileMetaData(FileMetaData&& other382) noexcept { - version = other382.version; - schema = std::move(other382.schema); - num_rows = other382.num_rows; - row_groups = std::move(other382.row_groups); - key_value_metadata = std::move(other382.key_value_metadata); - created_by = std::move(other382.created_by); - column_orders = std::move(other382.column_orders); - encryption_algorithm = std::move(other382.encryption_algorithm); - footer_signing_key_metadata = std::move(other382.footer_signing_key_metadata); - __isset = other382.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other383) { - version = other383.version; - schema = other383.schema; - num_rows = other383.num_rows; - row_groups = other383.row_groups; - key_value_metadata = other383.key_value_metadata; - created_by = other383.created_by; - column_orders = other383.column_orders; - encryption_algorithm = other383.encryption_algorithm; - footer_signing_key_metadata = other383.footer_signing_key_metadata; - __isset = other383.__isset; +FileMetaData::FileMetaData(const FileMetaData& other371) { + version = other371.version; + schema = other371.schema; + num_rows = other371.num_rows; + row_groups = other371.row_groups; + key_value_metadata = other371.key_value_metadata; + created_by = other371.created_by; + column_orders = other371.column_orders; + encryption_algorithm = other371.encryption_algorithm; + footer_signing_key_metadata = other371.footer_signing_key_metadata; + __isset = other371.__isset; +} +FileMetaData::FileMetaData(FileMetaData&& other372) noexcept { + version = other372.version; + schema = std::move(other372.schema); + num_rows = other372.num_rows; + row_groups = std::move(other372.row_groups); + key_value_metadata = std::move(other372.key_value_metadata); + created_by = std::move(other372.created_by); + column_orders = std::move(other372.column_orders); + encryption_algorithm = std::move(other372.encryption_algorithm); + footer_signing_key_metadata = std::move(other372.footer_signing_key_metadata); + __isset = other372.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other373) { + version = other373.version; + schema = other373.schema; + num_rows = other373.num_rows; + row_groups = other373.row_groups; + key_value_metadata = other373.key_value_metadata; + created_by = other373.created_by; + column_orders = other373.column_orders; + encryption_algorithm = other373.encryption_algorithm; + footer_signing_key_metadata = other373.footer_signing_key_metadata; + __isset = other373.__isset; return *this; } -FileMetaData& FileMetaData::operator=(FileMetaData&& other384) noexcept { - version = other384.version; - schema = std::move(other384.schema); - num_rows = other384.num_rows; - row_groups = std::move(other384.row_groups); - key_value_metadata = std::move(other384.key_value_metadata); - created_by = std::move(other384.created_by); - column_orders = std::move(other384.column_orders); - encryption_algorithm = std::move(other384.encryption_algorithm); - footer_signing_key_metadata = std::move(other384.footer_signing_key_metadata); - __isset = other384.__isset; +FileMetaData& FileMetaData::operator=(FileMetaData&& other374) noexcept { + version = other374.version; + schema = std::move(other374.schema); + num_rows = other374.num_rows; + row_groups = std::move(other374.row_groups); + key_value_metadata = std::move(other374.key_value_metadata); + created_by = std::move(other374.created_by); + column_orders = std::move(other374.column_orders); + encryption_algorithm = std::move(other374.encryption_algorithm); + footer_signing_key_metadata = std::move(other374.footer_signing_key_metadata); + __isset = other374.__isset; return *this; } void FileMetaData::printTo(std::ostream& out) const { @@ -4549,26 +4479,26 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { swap(a.__isset, b.__isset); } -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other385) { - encryption_algorithm = other385.encryption_algorithm; - key_metadata = other385.key_metadata; - __isset = other385.__isset; +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other375) { + encryption_algorithm = other375.encryption_algorithm; + key_metadata = other375.key_metadata; + __isset = other375.__isset; } -FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other386) noexcept { - encryption_algorithm = std::move(other386.encryption_algorithm); - key_metadata = std::move(other386.key_metadata); - __isset = other386.__isset; +FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other376) noexcept { + encryption_algorithm = std::move(other376.encryption_algorithm); + key_metadata = std::move(other376.key_metadata); + __isset = other376.__isset; } -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other387) { - encryption_algorithm = other387.encryption_algorithm; - key_metadata = other387.key_metadata; - __isset = other387.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other377) { + encryption_algorithm = other377.encryption_algorithm; + key_metadata = other377.key_metadata; + __isset = other377.__isset; return *this; } -FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other388) noexcept { - encryption_algorithm = std::move(other388.encryption_algorithm); - key_metadata = std::move(other388.key_metadata); - __isset = other388.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other378) noexcept { + encryption_algorithm = std::move(other378.encryption_algorithm); + key_metadata = std::move(other378.key_metadata); + __isset = other378.__isset; return *this; } void FileCryptoMetaData::printTo(std::ostream& out) const { diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index b190f9d21997b..fce29472addb8 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -212,24 +212,6 @@ std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val std::string to_string(const FieldRepetitionType::type& val); -/** - * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge - * between points represent a straight cartesian line or the shortest line on - * the sphere. It applies to all non-point geometry objects. - */ -struct Edges { - enum type { - PLANAR = 0, - SPHERICAL = 1 - }; -}; - -extern const std::map _Edges_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const Edges::type& val); - -std::string to_string(const Edges::type& val); - /** * Physical type and encoding for the geometry type. */ @@ -241,15 +223,22 @@ struct GeometryEncoding { * Well-known binary (WKB) representations of geometries. * * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the - * standard geometry types (Point, LineString, Polygon, MultiPoint, - * MultiLineString, MultiPolygon, and GeometryCollection). + * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] + * supporting XY, XYZ, XYM, XYZM and the standard geometry types + * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, + * and GeometryCollection). Coordinate order is always (x, y) where x is + * easting or longitude and y is northing or latitude. This ordering explicitly + * overrides the axis order as specified in the CRS following the GeoPackage + * specification [5]. * * This is the preferred encoding for maximum portability. It also supports * GeometryStatistics to be set in the column chunk and page index. * * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 + * [3] https://portal.ogc.org/files/?artifact_id=18241 + * [4] https://www.iso.org/standard/60343.html + * [5] https://www.geopackage.org/spec130/#gpb_spec */ WKB = 0 }; @@ -261,6 +250,31 @@ std::ostream& operator<<(std::ostream& out, const GeometryEncoding::type& val); std::string to_string(const GeometryEncoding::type& val); +/** + * Interpretation for edges of elements of a GEOMETRY logical type. In other + * words, whether a point between two vertices should be interpolated in + * its XY dimensions as if it were a Cartesian line connecting the two + * vertices (planar) or the shortest spherical arc between the longitude + * and latitude represented by the two vertices (spherical). This value + * applies to all non-point geometry objects and is independent of the + * coordinate reference system. + * + * Because most systems currently assume planar edges and do not support + * spherical edges, planar should be used as the default value. + */ +struct Edges { + enum type { + PLANAR = 0, + SPHERICAL = 1 + }; +}; + +extern const std::map _Edges_VALUES_TO_NAMES; + +std::ostream& operator<<(std::ostream& out, const Edges::type& val); + +std::string to_string(const Edges::type& val); + /** * Encodings supported by Parquet. Not all encodings are valid for all types. These * enums are also used to specify the encoding of definition and repetition levels. @@ -399,8 +413,6 @@ std::string to_string(const BoundaryOrder::type& val); class SizeStatistics; -class Covering; - class BoundingBox; class GeometryStatistics; @@ -618,69 +630,6 @@ void swap(SizeStatistics &a, SizeStatistics &b); std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj); - -/** - * A custom binary-encoded polygon or multi-polygon to represent a covering of - * geometries. For example, it may be a bounding box or an envelope of geometries - * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if - * an edge of geographic coordinates crosses the antimeridian). In addition, it can - * also be used to provide vendor-agnostic coverings like S2 or H3 grids. - */ -class Covering { - public: - - Covering(const Covering&); - Covering(Covering&&) noexcept; - Covering& operator=(const Covering&); - Covering& operator=(Covering&&) noexcept; - Covering() noexcept - : kind(), - value() { - } - - virtual ~Covering() noexcept; - /** - * A type of covering. Currently accepted values: "WKB". - */ - std::string kind; - /** - * A payload specific to kind. Below are the supported values: - * - WKB: well-known binary of a POLYGON or MULTI-POLYGON that completely - * covers the contents. This will be interpreted according to the same CRS - * and edges defined by the logical type. - */ - std::string value; - - void __set_kind(const std::string& val); - - void __set_value(const std::string& val); - - bool operator == (const Covering & rhs) const - { - if (!(kind == rhs.kind)) - return false; - if (!(value == rhs.value)) - return false; - return true; - } - bool operator != (const Covering &rhs) const { - return !(*this == rhs); - } - - bool operator < (const Covering & ) const; - - template - uint32_t read(Protocol_* iprot); - template - uint32_t write(Protocol_* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(Covering &a, Covering &b); - -std::ostream& operator<<(std::ostream& out, const Covering& obj); - typedef struct _BoundingBox__isset { _BoundingBox__isset() : zmin(false), zmax(false), mmin(false), mmax(false) {} bool zmin :1; @@ -691,10 +640,10 @@ typedef struct _BoundingBox__isset { /** * Bounding box of geometries in the representation of min/max value pair of - * coordinates from each axis. Values of Z and M are omitted for 2D geometries. - * Filter pushdown on geometries are only safe for planar spatial predicate - * but it is recommended that the writer always generates bounding box statistics, - * regardless of whether the geometries are planar or spherical. + * coordinates from each axis when Edges is planar. Values of Z and M are omitted + * for 2D geometries. When Edges is spherical, the bounding box is in the form of + * [westmost, eastmost, southmost, northmost], with necessary min/max values for + * Z and M if needed. */ class BoundingBox { public: @@ -715,9 +664,21 @@ class BoundingBox { } virtual ~BoundingBox() noexcept; + /** + * Westmost value if edges = spherical * + */ double xmin; + /** + * Eastmost value if edges = spherical * + */ double xmax; + /** + * Southmost value if edges = spherical * + */ double ymin; + /** + * Northmost value if edges = spherical * + */ double ymax; double zmin; double zmax; @@ -789,9 +750,8 @@ void swap(BoundingBox &a, BoundingBox &b); std::ostream& operator<<(std::ostream& out, const BoundingBox& obj); typedef struct _GeometryStatistics__isset { - _GeometryStatistics__isset() : bbox(false), coverings(false), geometry_types(false) {} + _GeometryStatistics__isset() : bbox(false), geometry_types(false) {} bool bbox :1; - bool coverings :1; bool geometry_types :1; } _GeometryStatistics__isset; @@ -813,13 +773,6 @@ class GeometryStatistics { * A bounding box of geometries */ BoundingBox bbox; - /** - * A list of coverings of geometries. - * Note that It is allowed to have more than one covering of the same kind and - * implementation is free to use any of them. It is recommended to have at most - * one covering for each kind. - */ - std::vector coverings; /** * The geometry types of all geometries, or an empty array if they are not * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] @@ -853,8 +806,6 @@ class GeometryStatistics { void __set_bbox(const BoundingBox& val); - void __set_coverings(const std::vector & val); - void __set_geometry_types(const std::vector & val); bool operator == (const GeometryStatistics & rhs) const @@ -863,10 +814,6 @@ class GeometryStatistics { return false; else if (__isset.bbox && !(bbox == rhs.bbox)) return false; - if (__isset.coverings != rhs.__isset.coverings) - return false; - else if (__isset.coverings && !(coverings == rhs.coverings)) - return false; if (__isset.geometry_types != rhs.__isset.geometry_types) return false; else if (__isset.geometry_types && !(geometry_types == rhs.geometry_types)) @@ -1812,10 +1759,8 @@ void swap(BsonType &a, BsonType &b); std::ostream& operator<<(std::ostream& out, const BsonType& obj); typedef struct _GeometryType__isset { - _GeometryType__isset() : crs(false), crs_encoding(false), metadata(false) {} + _GeometryType__isset() : crs(false) {} bool crs :1; - bool crs_encoding :1; - bool metadata :1; } _GeometryType__isset; /** @@ -1831,9 +1776,7 @@ class GeometryType { GeometryType() noexcept : encoding(static_cast(0)), edges(static_cast(0)), - crs(), - crs_encoding(), - metadata() { + crs() { } virtual ~GeometryType() noexcept; @@ -1845,65 +1788,31 @@ class GeometryType { */ GeometryEncoding::type encoding; /** - * Edges of geometry type. + * Interpretation for edges of elements of a GEOMETRY logical type, i.e. whether + * the interpolation between points along an edge represents a straight cartesian + * line or the shortest line on the sphere. * Please refer to the definition of Edges for more detail. * * @see Edges */ Edges::type edges; /** - * Coordinate Reference System, i.e. mapping of how coordinates refer to - * precise locations on earth. Writers are not required to set this field. - * Once crs is set, crs_encoding field below MUST be set together. - * For example, "OGC:CRS84" can be set in the form of PROJJSON as below: - * { - * "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json", - * "type": "GeographicCRS", - * "name": "WGS 84 longitude-latitude", - * "datum": { - * "type": "GeodeticReferenceFrame", - * "name": "World Geodetic System 1984", - * "ellipsoid": { - * "name": "WGS 84", - * "semi_major_axis": 6378137, - * "inverse_flattening": 298.257223563 - * } - * }, - * "coordinate_system": { - * "subtype": "ellipsoidal", - * "axis": [ - * { - * "name": "Geodetic longitude", - * "abbreviation": "Lon", - * "direction": "east", - * "unit": "degree" - * }, - * { - * "name": "Geodetic latitude", - * "abbreviation": "Lat", - * "direction": "north", - * "unit": "degree" - * } - * ] - * }, - * "id": { - * "authority": "OGC", - * "code": "CRS84" - * } - * } + * CRS (coordinate reference system) is a mapping of how coordinates refer to + * precise locations on earth. A crs is specified by a string, which is a Parquet + * file metadata field whose value is the crs representation. An additional field + * with the suffix '.type' describes the encoding of this CRS representation. + * + * For example, if a geometry column (e.g., 'geom1') uses the CRS 'OGC:CRS84', the + * writer may create 2 file metadata fields: 'geom1_crs' and 'geom1_crs.type', and + * set the 'crs' field to 'geom1_crs'. The 'geom1_crs' field will contain the + * PROJJSON representation of OGC:CRS84 + * (https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md#ogccrs84-details), + * and the 'geom1_crs.type' field will contain the string 'PROJJSON'. + * + * Multiple geometry columns can refer to the same CRS metadata field + * (e.g., 'geom1_crs') if they share the same CRS. */ std::string crs; - /** - * Encoding used in the above crs field. It MUST be set if crs field is set. - * Currently the only allowed value is "PROJJSON". - */ - std::string crs_encoding; - /** - * Additional informative metadata. - * GeoParquet could offload its column metadata in a JSON-encoded UTF-8 string: - * https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L46 - */ - std::string metadata; _GeometryType__isset __isset; @@ -1913,10 +1822,6 @@ class GeometryType { void __set_crs(const std::string& val); - void __set_crs_encoding(const std::string& val); - - void __set_metadata(const std::string& val); - bool operator == (const GeometryType & rhs) const { if (!(encoding == rhs.encoding)) @@ -1927,14 +1832,6 @@ class GeometryType { return false; else if (__isset.crs && !(crs == rhs.crs)) return false; - if (__isset.crs_encoding != rhs.__isset.crs_encoding) - return false; - else if (__isset.crs_encoding && !(crs_encoding == rhs.crs_encoding)) - return false; - if (__isset.metadata != rhs.__isset.metadata) - return false; - else if (__isset.metadata && !(metadata == rhs.metadata)) - return false; return true; } bool operator != (const GeometryType &rhs) const { diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index 33a457e4ad954..6caf040c7c874 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -135,81 +135,6 @@ uint32_t SizeStatistics::write(Protocol_* oprot) const { return xfer; } -template -uint32_t Covering::read(Protocol_* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_kind = false; - bool isset_value = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->kind); - isset_kind = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->value); - isset_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_kind) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_value) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -template -uint32_t Covering::write(Protocol_* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Covering"); - - xfer += oprot->writeFieldBegin("kind", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->kind); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->value); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - template uint32_t BoundingBox::read(Protocol_* iprot) { @@ -398,37 +323,17 @@ uint32_t GeometryStatistics::read(Protocol_* iprot) { } break; case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->coverings.clear(); - uint32_t _size24; - ::apache::thrift::protocol::TType _etype27; - xfer += iprot->readListBegin(_etype27, _size24); - this->coverings.resize(_size24); - uint32_t _i28; - for (_i28 = 0; _i28 < _size24; ++_i28) - { - xfer += this->coverings[_i28].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.coverings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: if (ftype == ::apache::thrift::protocol::T_LIST) { { this->geometry_types.clear(); - uint32_t _size29; - ::apache::thrift::protocol::TType _etype32; - xfer += iprot->readListBegin(_etype32, _size29); - this->geometry_types.resize(_size29); - uint32_t _i33; - for (_i33 = 0; _i33 < _size29; ++_i33) + uint32_t _size20; + ::apache::thrift::protocol::TType _etype23; + xfer += iprot->readListBegin(_etype23, _size20); + this->geometry_types.resize(_size20); + uint32_t _i24; + for (_i24 = 0; _i24 < _size20; ++_i24) { - xfer += iprot->readI32(this->geometry_types[_i33]); + xfer += iprot->readI32(this->geometry_types[_i24]); } xfer += iprot->readListEnd(); } @@ -460,27 +365,14 @@ uint32_t GeometryStatistics::write(Protocol_* oprot) const { xfer += this->bbox.write(oprot); xfer += oprot->writeFieldEnd(); } - if (this->__isset.coverings) { - xfer += oprot->writeFieldBegin("coverings", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->coverings.size())); - std::vector ::const_iterator _iter34; - for (_iter34 = this->coverings.begin(); _iter34 != this->coverings.end(); ++_iter34) - { - xfer += (*_iter34).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } if (this->__isset.geometry_types) { - xfer += oprot->writeFieldBegin("geometry_types", ::apache::thrift::protocol::T_LIST, 3); + xfer += oprot->writeFieldBegin("geometry_types", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->geometry_types.size())); - std::vector ::const_iterator _iter35; - for (_iter35 = this->geometry_types.begin(); _iter35 != this->geometry_types.end(); ++_iter35) + std::vector ::const_iterator _iter25; + for (_iter25 = this->geometry_types.begin(); _iter25 != this->geometry_types.end(); ++_iter25) { - xfer += oprot->writeI32((*_iter35)); + xfer += oprot->writeI32((*_iter25)); } xfer += oprot->writeListEnd(); } @@ -1583,9 +1475,9 @@ uint32_t GeometryType::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast116; - xfer += iprot->readI32(ecast116); - this->encoding = static_cast(ecast116); + int32_t ecast106; + xfer += iprot->readI32(ecast106); + this->encoding = static_cast(ecast106); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1593,9 +1485,9 @@ uint32_t GeometryType::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast117; - xfer += iprot->readI32(ecast117); - this->edges = static_cast(ecast117); + int32_t ecast107; + xfer += iprot->readI32(ecast107); + this->edges = static_cast(ecast107); isset_edges = true; } else { xfer += iprot->skip(ftype); @@ -1609,22 +1501,6 @@ uint32_t GeometryType::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->crs_encoding); - this->__isset.crs_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->metadata); - this->__isset.metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; default: xfer += iprot->skip(ftype); break; @@ -1660,16 +1536,6 @@ uint32_t GeometryType::write(Protocol_* oprot) const { xfer += oprot->writeString(this->crs); xfer += oprot->writeFieldEnd(); } - if (this->__isset.crs_encoding) { - xfer += oprot->writeFieldBegin("crs_encoding", ::apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->crs_encoding); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.metadata) { - xfer += oprot->writeFieldBegin("metadata", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeString(this->metadata); - xfer += oprot->writeFieldEnd(); - } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -1940,9 +1806,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast126; - xfer += iprot->readI32(ecast126); - this->type = static_cast(ecast126); + int32_t ecast116; + xfer += iprot->readI32(ecast116); + this->type = static_cast(ecast116); this->__isset.type = true; } else { xfer += iprot->skip(ftype); @@ -1958,9 +1824,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast127; - xfer += iprot->readI32(ecast127); - this->repetition_type = static_cast(ecast127); + int32_t ecast117; + xfer += iprot->readI32(ecast117); + this->repetition_type = static_cast(ecast117); this->__isset.repetition_type = true; } else { xfer += iprot->skip(ftype); @@ -1984,9 +1850,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 6: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast128; - xfer += iprot->readI32(ecast128); - this->converted_type = static_cast(ecast128); + int32_t ecast118; + xfer += iprot->readI32(ecast118); + this->converted_type = static_cast(ecast118); this->__isset.converted_type = true; } else { xfer += iprot->skip(ftype); @@ -2134,9 +2000,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast133; - xfer += iprot->readI32(ecast133); - this->encoding = static_cast(ecast133); + int32_t ecast123; + xfer += iprot->readI32(ecast123); + this->encoding = static_cast(ecast123); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2144,9 +2010,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast134; - xfer += iprot->readI32(ecast134); - this->definition_level_encoding = static_cast(ecast134); + int32_t ecast124; + xfer += iprot->readI32(ecast124); + this->definition_level_encoding = static_cast(ecast124); isset_definition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2154,9 +2020,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast135; - xfer += iprot->readI32(ecast135); - this->repetition_level_encoding = static_cast(ecast135); + int32_t ecast125; + xfer += iprot->readI32(ecast125); + this->repetition_level_encoding = static_cast(ecast125); isset_repetition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2296,9 +2162,9 @@ uint32_t DictionaryPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast144; - xfer += iprot->readI32(ecast144); - this->encoding = static_cast(ecast144); + int32_t ecast134; + xfer += iprot->readI32(ecast134); + this->encoding = static_cast(ecast134); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2406,9 +2272,9 @@ uint32_t DataPageHeaderV2::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast149; - xfer += iprot->readI32(ecast149); - this->encoding = static_cast(ecast149); + int32_t ecast139; + xfer += iprot->readI32(ecast139); + this->encoding = static_cast(ecast139); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2941,9 +2807,9 @@ uint32_t PageHeader::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast182; - xfer += iprot->readI32(ecast182); - this->type = static_cast(ecast182); + int32_t ecast172; + xfer += iprot->readI32(ecast172); + this->type = static_cast(ecast172); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -3261,9 +3127,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast195; - xfer += iprot->readI32(ecast195); - this->page_type = static_cast(ecast195); + int32_t ecast185; + xfer += iprot->readI32(ecast185); + this->page_type = static_cast(ecast185); isset_page_type = true; } else { xfer += iprot->skip(ftype); @@ -3271,9 +3137,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast196; - xfer += iprot->readI32(ecast196); - this->encoding = static_cast(ecast196); + int32_t ecast186; + xfer += iprot->readI32(ecast186); + this->encoding = static_cast(ecast186); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -3360,9 +3226,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast201; - xfer += iprot->readI32(ecast201); - this->type = static_cast(ecast201); + int32_t ecast191; + xfer += iprot->readI32(ecast191); + this->type = static_cast(ecast191); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -3372,16 +3238,16 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encodings.clear(); - uint32_t _size202; - ::apache::thrift::protocol::TType _etype205; - xfer += iprot->readListBegin(_etype205, _size202); - this->encodings.resize(_size202); - uint32_t _i206; - for (_i206 = 0; _i206 < _size202; ++_i206) + uint32_t _size192; + ::apache::thrift::protocol::TType _etype195; + xfer += iprot->readListBegin(_etype195, _size192); + this->encodings.resize(_size192); + uint32_t _i196; + for (_i196 = 0; _i196 < _size192; ++_i196) { - int32_t ecast207; - xfer += iprot->readI32(ecast207); - this->encodings[_i206] = static_cast(ecast207); + int32_t ecast197; + xfer += iprot->readI32(ecast197); + this->encodings[_i196] = static_cast(ecast197); } xfer += iprot->readListEnd(); } @@ -3394,14 +3260,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size208; - ::apache::thrift::protocol::TType _etype211; - xfer += iprot->readListBegin(_etype211, _size208); - this->path_in_schema.resize(_size208); - uint32_t _i212; - for (_i212 = 0; _i212 < _size208; ++_i212) + uint32_t _size198; + ::apache::thrift::protocol::TType _etype201; + xfer += iprot->readListBegin(_etype201, _size198); + this->path_in_schema.resize(_size198); + uint32_t _i202; + for (_i202 = 0; _i202 < _size198; ++_i202) { - xfer += iprot->readString(this->path_in_schema[_i212]); + xfer += iprot->readString(this->path_in_schema[_i202]); } xfer += iprot->readListEnd(); } @@ -3412,9 +3278,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast213; - xfer += iprot->readI32(ecast213); - this->codec = static_cast(ecast213); + int32_t ecast203; + xfer += iprot->readI32(ecast203); + this->codec = static_cast(ecast203); isset_codec = true; } else { xfer += iprot->skip(ftype); @@ -3448,14 +3314,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size214; - ::apache::thrift::protocol::TType _etype217; - xfer += iprot->readListBegin(_etype217, _size214); - this->key_value_metadata.resize(_size214); - uint32_t _i218; - for (_i218 = 0; _i218 < _size214; ++_i218) + uint32_t _size204; + ::apache::thrift::protocol::TType _etype207; + xfer += iprot->readListBegin(_etype207, _size204); + this->key_value_metadata.resize(_size204); + uint32_t _i208; + for (_i208 = 0; _i208 < _size204; ++_i208) { - xfer += this->key_value_metadata[_i218].read(iprot); + xfer += this->key_value_metadata[_i208].read(iprot); } xfer += iprot->readListEnd(); } @@ -3500,14 +3366,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encoding_stats.clear(); - uint32_t _size219; - ::apache::thrift::protocol::TType _etype222; - xfer += iprot->readListBegin(_etype222, _size219); - this->encoding_stats.resize(_size219); - uint32_t _i223; - for (_i223 = 0; _i223 < _size219; ++_i223) + uint32_t _size209; + ::apache::thrift::protocol::TType _etype212; + xfer += iprot->readListBegin(_etype212, _size209); + this->encoding_stats.resize(_size209); + uint32_t _i213; + for (_i213 = 0; _i213 < _size209; ++_i213) { - xfer += this->encoding_stats[_i223].read(iprot); + xfer += this->encoding_stats[_i213].read(iprot); } xfer += iprot->readListEnd(); } @@ -3581,10 +3447,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter224; - for (_iter224 = this->encodings.begin(); _iter224 != this->encodings.end(); ++_iter224) + std::vector ::const_iterator _iter214; + for (_iter214 = this->encodings.begin(); _iter214 != this->encodings.end(); ++_iter214) { - xfer += oprot->writeI32(static_cast((*_iter224))); + xfer += oprot->writeI32(static_cast((*_iter214))); } xfer += oprot->writeListEnd(); } @@ -3593,10 +3459,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter225; - for (_iter225 = this->path_in_schema.begin(); _iter225 != this->path_in_schema.end(); ++_iter225) + std::vector ::const_iterator _iter215; + for (_iter215 = this->path_in_schema.begin(); _iter215 != this->path_in_schema.end(); ++_iter215) { - xfer += oprot->writeString((*_iter225)); + xfer += oprot->writeString((*_iter215)); } xfer += oprot->writeListEnd(); } @@ -3622,10 +3488,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter226; - for (_iter226 = this->key_value_metadata.begin(); _iter226 != this->key_value_metadata.end(); ++_iter226) + std::vector ::const_iterator _iter216; + for (_iter216 = this->key_value_metadata.begin(); _iter216 != this->key_value_metadata.end(); ++_iter216) { - xfer += (*_iter226).write(oprot); + xfer += (*_iter216).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3654,10 +3520,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter227; - for (_iter227 = this->encoding_stats.begin(); _iter227 != this->encoding_stats.end(); ++_iter227) + std::vector ::const_iterator _iter217; + for (_iter217 = this->encoding_stats.begin(); _iter217 != this->encoding_stats.end(); ++_iter217) { - xfer += (*_iter227).write(oprot); + xfer += (*_iter217).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3750,14 +3616,14 @@ uint32_t EncryptionWithColumnKey::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size236; - ::apache::thrift::protocol::TType _etype239; - xfer += iprot->readListBegin(_etype239, _size236); - this->path_in_schema.resize(_size236); - uint32_t _i240; - for (_i240 = 0; _i240 < _size236; ++_i240) + uint32_t _size226; + ::apache::thrift::protocol::TType _etype229; + xfer += iprot->readListBegin(_etype229, _size226); + this->path_in_schema.resize(_size226); + uint32_t _i230; + for (_i230 = 0; _i230 < _size226; ++_i230) { - xfer += iprot->readString(this->path_in_schema[_i240]); + xfer += iprot->readString(this->path_in_schema[_i230]); } xfer += iprot->readListEnd(); } @@ -3797,10 +3663,10 @@ uint32_t EncryptionWithColumnKey::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter241; - for (_iter241 = this->path_in_schema.begin(); _iter241 != this->path_in_schema.end(); ++_iter241) + std::vector ::const_iterator _iter231; + for (_iter231 = this->path_in_schema.begin(); _iter231 != this->path_in_schema.end(); ++_iter231) { - xfer += oprot->writeString((*_iter241)); + xfer += oprot->writeString((*_iter231)); } xfer += oprot->writeListEnd(); } @@ -4080,14 +3946,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->columns.clear(); - uint32_t _size254; - ::apache::thrift::protocol::TType _etype257; - xfer += iprot->readListBegin(_etype257, _size254); - this->columns.resize(_size254); - uint32_t _i258; - for (_i258 = 0; _i258 < _size254; ++_i258) + uint32_t _size244; + ::apache::thrift::protocol::TType _etype247; + xfer += iprot->readListBegin(_etype247, _size244); + this->columns.resize(_size244); + uint32_t _i248; + for (_i248 = 0; _i248 < _size244; ++_i248) { - xfer += this->columns[_i258].read(iprot); + xfer += this->columns[_i248].read(iprot); } xfer += iprot->readListEnd(); } @@ -4116,14 +3982,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->sorting_columns.clear(); - uint32_t _size259; - ::apache::thrift::protocol::TType _etype262; - xfer += iprot->readListBegin(_etype262, _size259); - this->sorting_columns.resize(_size259); - uint32_t _i263; - for (_i263 = 0; _i263 < _size259; ++_i263) + uint32_t _size249; + ::apache::thrift::protocol::TType _etype252; + xfer += iprot->readListBegin(_etype252, _size249); + this->sorting_columns.resize(_size249); + uint32_t _i253; + for (_i253 = 0; _i253 < _size249; ++_i253) { - xfer += this->sorting_columns[_i263].read(iprot); + xfer += this->sorting_columns[_i253].read(iprot); } xfer += iprot->readListEnd(); } @@ -4183,10 +4049,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter264; - for (_iter264 = this->columns.begin(); _iter264 != this->columns.end(); ++_iter264) + std::vector ::const_iterator _iter254; + for (_iter254 = this->columns.begin(); _iter254 != this->columns.end(); ++_iter254) { - xfer += (*_iter264).write(oprot); + xfer += (*_iter254).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4204,10 +4070,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter265; - for (_iter265 = this->sorting_columns.begin(); _iter265 != this->sorting_columns.end(); ++_iter265) + std::vector ::const_iterator _iter255; + for (_iter255 = this->sorting_columns.begin(); _iter255 != this->sorting_columns.end(); ++_iter255) { - xfer += (*_iter265).write(oprot); + xfer += (*_iter255).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4448,14 +4314,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->page_locations.clear(); - uint32_t _size282; - ::apache::thrift::protocol::TType _etype285; - xfer += iprot->readListBegin(_etype285, _size282); - this->page_locations.resize(_size282); - uint32_t _i286; - for (_i286 = 0; _i286 < _size282; ++_i286) + uint32_t _size272; + ::apache::thrift::protocol::TType _etype275; + xfer += iprot->readListBegin(_etype275, _size272); + this->page_locations.resize(_size272); + uint32_t _i276; + for (_i276 = 0; _i276 < _size272; ++_i276) { - xfer += this->page_locations[_i286].read(iprot); + xfer += this->page_locations[_i276].read(iprot); } xfer += iprot->readListEnd(); } @@ -4468,14 +4334,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size287; - ::apache::thrift::protocol::TType _etype290; - xfer += iprot->readListBegin(_etype290, _size287); - this->unencoded_byte_array_data_bytes.resize(_size287); - uint32_t _i291; - for (_i291 = 0; _i291 < _size287; ++_i291) + uint32_t _size277; + ::apache::thrift::protocol::TType _etype280; + xfer += iprot->readListBegin(_etype280, _size277); + this->unencoded_byte_array_data_bytes.resize(_size277); + uint32_t _i281; + for (_i281 = 0; _i281 < _size277; ++_i281) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i291]); + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i281]); } xfer += iprot->readListEnd(); } @@ -4507,10 +4373,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter292; - for (_iter292 = this->page_locations.begin(); _iter292 != this->page_locations.end(); ++_iter292) + std::vector ::const_iterator _iter282; + for (_iter282 = this->page_locations.begin(); _iter282 != this->page_locations.end(); ++_iter282) { - xfer += (*_iter292).write(oprot); + xfer += (*_iter282).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4520,10 +4386,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter293; - for (_iter293 = this->unencoded_byte_array_data_bytes.begin(); _iter293 != this->unencoded_byte_array_data_bytes.end(); ++_iter293) + std::vector ::const_iterator _iter283; + for (_iter283 = this->unencoded_byte_array_data_bytes.begin(); _iter283 != this->unencoded_byte_array_data_bytes.end(); ++_iter283) { - xfer += oprot->writeI64((*_iter293)); + xfer += oprot->writeI64((*_iter283)); } xfer += oprot->writeListEnd(); } @@ -4564,14 +4430,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_pages.clear(); - uint32_t _size298; - ::apache::thrift::protocol::TType _etype301; - xfer += iprot->readListBegin(_etype301, _size298); - this->null_pages.resize(_size298); - uint32_t _i302; - for (_i302 = 0; _i302 < _size298; ++_i302) + uint32_t _size288; + ::apache::thrift::protocol::TType _etype291; + xfer += iprot->readListBegin(_etype291, _size288); + this->null_pages.resize(_size288); + uint32_t _i292; + for (_i292 = 0; _i292 < _size288; ++_i292) { - xfer += iprot->readBool(this->null_pages[_i302]); + xfer += iprot->readBool(this->null_pages[_i292]); } xfer += iprot->readListEnd(); } @@ -4584,14 +4450,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->min_values.clear(); - uint32_t _size303; - ::apache::thrift::protocol::TType _etype306; - xfer += iprot->readListBegin(_etype306, _size303); - this->min_values.resize(_size303); - uint32_t _i307; - for (_i307 = 0; _i307 < _size303; ++_i307) + uint32_t _size293; + ::apache::thrift::protocol::TType _etype296; + xfer += iprot->readListBegin(_etype296, _size293); + this->min_values.resize(_size293); + uint32_t _i297; + for (_i297 = 0; _i297 < _size293; ++_i297) { - xfer += iprot->readBinary(this->min_values[_i307]); + xfer += iprot->readBinary(this->min_values[_i297]); } xfer += iprot->readListEnd(); } @@ -4604,14 +4470,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->max_values.clear(); - uint32_t _size308; - ::apache::thrift::protocol::TType _etype311; - xfer += iprot->readListBegin(_etype311, _size308); - this->max_values.resize(_size308); - uint32_t _i312; - for (_i312 = 0; _i312 < _size308; ++_i312) + uint32_t _size298; + ::apache::thrift::protocol::TType _etype301; + xfer += iprot->readListBegin(_etype301, _size298); + this->max_values.resize(_size298); + uint32_t _i302; + for (_i302 = 0; _i302 < _size298; ++_i302) { - xfer += iprot->readBinary(this->max_values[_i312]); + xfer += iprot->readBinary(this->max_values[_i302]); } xfer += iprot->readListEnd(); } @@ -4622,9 +4488,9 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast313; - xfer += iprot->readI32(ecast313); - this->boundary_order = static_cast(ecast313); + int32_t ecast303; + xfer += iprot->readI32(ecast303); + this->boundary_order = static_cast(ecast303); isset_boundary_order = true; } else { xfer += iprot->skip(ftype); @@ -4634,14 +4500,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_counts.clear(); - uint32_t _size314; - ::apache::thrift::protocol::TType _etype317; - xfer += iprot->readListBegin(_etype317, _size314); - this->null_counts.resize(_size314); - uint32_t _i318; - for (_i318 = 0; _i318 < _size314; ++_i318) + uint32_t _size304; + ::apache::thrift::protocol::TType _etype307; + xfer += iprot->readListBegin(_etype307, _size304); + this->null_counts.resize(_size304); + uint32_t _i308; + for (_i308 = 0; _i308 < _size304; ++_i308) { - xfer += iprot->readI64(this->null_counts[_i318]); + xfer += iprot->readI64(this->null_counts[_i308]); } xfer += iprot->readListEnd(); } @@ -4654,14 +4520,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->repetition_level_histograms.clear(); - uint32_t _size319; - ::apache::thrift::protocol::TType _etype322; - xfer += iprot->readListBegin(_etype322, _size319); - this->repetition_level_histograms.resize(_size319); - uint32_t _i323; - for (_i323 = 0; _i323 < _size319; ++_i323) + uint32_t _size309; + ::apache::thrift::protocol::TType _etype312; + xfer += iprot->readListBegin(_etype312, _size309); + this->repetition_level_histograms.resize(_size309); + uint32_t _i313; + for (_i313 = 0; _i313 < _size309; ++_i313) { - xfer += iprot->readI64(this->repetition_level_histograms[_i323]); + xfer += iprot->readI64(this->repetition_level_histograms[_i313]); } xfer += iprot->readListEnd(); } @@ -4674,14 +4540,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->definition_level_histograms.clear(); - uint32_t _size324; - ::apache::thrift::protocol::TType _etype327; - xfer += iprot->readListBegin(_etype327, _size324); - this->definition_level_histograms.resize(_size324); - uint32_t _i328; - for (_i328 = 0; _i328 < _size324; ++_i328) + uint32_t _size314; + ::apache::thrift::protocol::TType _etype317; + xfer += iprot->readListBegin(_etype317, _size314); + this->definition_level_histograms.resize(_size314); + uint32_t _i318; + for (_i318 = 0; _i318 < _size314; ++_i318) { - xfer += iprot->readI64(this->definition_level_histograms[_i328]); + xfer += iprot->readI64(this->definition_level_histograms[_i318]); } xfer += iprot->readListEnd(); } @@ -4694,14 +4560,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->geometry_stats.clear(); - uint32_t _size329; - ::apache::thrift::protocol::TType _etype332; - xfer += iprot->readListBegin(_etype332, _size329); - this->geometry_stats.resize(_size329); - uint32_t _i333; - for (_i333 = 0; _i333 < _size329; ++_i333) + uint32_t _size319; + ::apache::thrift::protocol::TType _etype322; + xfer += iprot->readListBegin(_etype322, _size319); + this->geometry_stats.resize(_size319); + uint32_t _i323; + for (_i323 = 0; _i323 < _size319; ++_i323) { - xfer += this->geometry_stats[_i333].read(iprot); + xfer += this->geometry_stats[_i323].read(iprot); } xfer += iprot->readListEnd(); } @@ -4739,10 +4605,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter334; - for (_iter334 = this->null_pages.begin(); _iter334 != this->null_pages.end(); ++_iter334) + std::vector ::const_iterator _iter324; + for (_iter324 = this->null_pages.begin(); _iter324 != this->null_pages.end(); ++_iter324) { - xfer += oprot->writeBool((*_iter334)); + xfer += oprot->writeBool((*_iter324)); } xfer += oprot->writeListEnd(); } @@ -4751,10 +4617,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter335; - for (_iter335 = this->min_values.begin(); _iter335 != this->min_values.end(); ++_iter335) + std::vector ::const_iterator _iter325; + for (_iter325 = this->min_values.begin(); _iter325 != this->min_values.end(); ++_iter325) { - xfer += oprot->writeBinary((*_iter335)); + xfer += oprot->writeBinary((*_iter325)); } xfer += oprot->writeListEnd(); } @@ -4763,10 +4629,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter336; - for (_iter336 = this->max_values.begin(); _iter336 != this->max_values.end(); ++_iter336) + std::vector ::const_iterator _iter326; + for (_iter326 = this->max_values.begin(); _iter326 != this->max_values.end(); ++_iter326) { - xfer += oprot->writeBinary((*_iter336)); + xfer += oprot->writeBinary((*_iter326)); } xfer += oprot->writeListEnd(); } @@ -4780,10 +4646,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter337; - for (_iter337 = this->null_counts.begin(); _iter337 != this->null_counts.end(); ++_iter337) + std::vector ::const_iterator _iter327; + for (_iter327 = this->null_counts.begin(); _iter327 != this->null_counts.end(); ++_iter327) { - xfer += oprot->writeI64((*_iter337)); + xfer += oprot->writeI64((*_iter327)); } xfer += oprot->writeListEnd(); } @@ -4793,10 +4659,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter338; - for (_iter338 = this->repetition_level_histograms.begin(); _iter338 != this->repetition_level_histograms.end(); ++_iter338) + std::vector ::const_iterator _iter328; + for (_iter328 = this->repetition_level_histograms.begin(); _iter328 != this->repetition_level_histograms.end(); ++_iter328) { - xfer += oprot->writeI64((*_iter338)); + xfer += oprot->writeI64((*_iter328)); } xfer += oprot->writeListEnd(); } @@ -4806,10 +4672,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter339; - for (_iter339 = this->definition_level_histograms.begin(); _iter339 != this->definition_level_histograms.end(); ++_iter339) + std::vector ::const_iterator _iter329; + for (_iter329 = this->definition_level_histograms.begin(); _iter329 != this->definition_level_histograms.end(); ++_iter329) { - xfer += oprot->writeI64((*_iter339)); + xfer += oprot->writeI64((*_iter329)); } xfer += oprot->writeListEnd(); } @@ -4819,10 +4685,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("geometry_stats", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->geometry_stats.size())); - std::vector ::const_iterator _iter340; - for (_iter340 = this->geometry_stats.begin(); _iter340 != this->geometry_stats.end(); ++_iter340) + std::vector ::const_iterator _iter330; + for (_iter330 = this->geometry_stats.begin(); _iter330 != this->geometry_stats.end(); ++_iter330) { - xfer += (*_iter340).write(oprot); + xfer += (*_iter330).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5110,14 +4976,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size357; - ::apache::thrift::protocol::TType _etype360; - xfer += iprot->readListBegin(_etype360, _size357); - this->schema.resize(_size357); - uint32_t _i361; - for (_i361 = 0; _i361 < _size357; ++_i361) + uint32_t _size347; + ::apache::thrift::protocol::TType _etype350; + xfer += iprot->readListBegin(_etype350, _size347); + this->schema.resize(_size347); + uint32_t _i351; + for (_i351 = 0; _i351 < _size347; ++_i351) { - xfer += this->schema[_i361].read(iprot); + xfer += this->schema[_i351].read(iprot); } xfer += iprot->readListEnd(); } @@ -5138,14 +5004,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size362; - ::apache::thrift::protocol::TType _etype365; - xfer += iprot->readListBegin(_etype365, _size362); - this->row_groups.resize(_size362); - uint32_t _i366; - for (_i366 = 0; _i366 < _size362; ++_i366) + uint32_t _size352; + ::apache::thrift::protocol::TType _etype355; + xfer += iprot->readListBegin(_etype355, _size352); + this->row_groups.resize(_size352); + uint32_t _i356; + for (_i356 = 0; _i356 < _size352; ++_i356) { - xfer += this->row_groups[_i366].read(iprot); + xfer += this->row_groups[_i356].read(iprot); } xfer += iprot->readListEnd(); } @@ -5158,14 +5024,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size367; - ::apache::thrift::protocol::TType _etype370; - xfer += iprot->readListBegin(_etype370, _size367); - this->key_value_metadata.resize(_size367); - uint32_t _i371; - for (_i371 = 0; _i371 < _size367; ++_i371) + uint32_t _size357; + ::apache::thrift::protocol::TType _etype360; + xfer += iprot->readListBegin(_etype360, _size357); + this->key_value_metadata.resize(_size357); + uint32_t _i361; + for (_i361 = 0; _i361 < _size357; ++_i361) { - xfer += this->key_value_metadata[_i371].read(iprot); + xfer += this->key_value_metadata[_i361].read(iprot); } xfer += iprot->readListEnd(); } @@ -5186,14 +5052,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->column_orders.clear(); - uint32_t _size372; - ::apache::thrift::protocol::TType _etype375; - xfer += iprot->readListBegin(_etype375, _size372); - this->column_orders.resize(_size372); - uint32_t _i376; - for (_i376 = 0; _i376 < _size372; ++_i376) + uint32_t _size362; + ::apache::thrift::protocol::TType _etype365; + xfer += iprot->readListBegin(_etype365, _size362); + this->column_orders.resize(_size362); + uint32_t _i366; + for (_i366 = 0; _i366 < _size362; ++_i366) { - xfer += this->column_orders[_i376].read(iprot); + xfer += this->column_orders[_i366].read(iprot); } xfer += iprot->readListEnd(); } @@ -5251,10 +5117,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter377; - for (_iter377 = this->schema.begin(); _iter377 != this->schema.end(); ++_iter377) + std::vector ::const_iterator _iter367; + for (_iter367 = this->schema.begin(); _iter367 != this->schema.end(); ++_iter367) { - xfer += (*_iter377).write(oprot); + xfer += (*_iter367).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5267,10 +5133,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter378; - for (_iter378 = this->row_groups.begin(); _iter378 != this->row_groups.end(); ++_iter378) + std::vector ::const_iterator _iter368; + for (_iter368 = this->row_groups.begin(); _iter368 != this->row_groups.end(); ++_iter368) { - xfer += (*_iter378).write(oprot); + xfer += (*_iter368).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5280,10 +5146,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter379; - for (_iter379 = this->key_value_metadata.begin(); _iter379 != this->key_value_metadata.end(); ++_iter379) + std::vector ::const_iterator _iter369; + for (_iter369 = this->key_value_metadata.begin(); _iter369 != this->key_value_metadata.end(); ++_iter369) { - xfer += (*_iter379).write(oprot); + xfer += (*_iter369).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5298,10 +5164,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter380; - for (_iter380 = this->column_orders.begin(); _iter380 != this->column_orders.end(); ++_iter380) + std::vector ::const_iterator _iter370; + for (_iter370 = this->column_orders.begin(); _iter370 != this->column_orders.end(); ++_iter370) { - xfer += (*_iter380).write(oprot); + xfer += (*_iter370).write(oprot); } xfer += oprot->writeListEnd(); } diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 093916651607f..8c40d0b63669f 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1785,17 +1785,14 @@ TEST_F(TestInt32Writer, WriteKeyValueMetadataEndToEnd) { // Test writing and reading geometry columns class TestGeometryValuesWriter : public TestPrimitiveWriter { public: - static const char* kCrs; - static const char* kMetadata; - void SetUpSchema(Repetition::type repetition, int num_columns) override { std::vector fields; for (int i = 0; i < num_columns; ++i) { std::string name = TestColumnName(i); std::shared_ptr logical_type = - GeometryLogicalType::Make(kCrs, LogicalType::GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::WKB, kMetadata); + GeometryLogicalType::Make("OGC:CRS84", LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB); fields.push_back(schema::PrimitiveNode::Make(name, repetition, logical_type, ByteArrayType::type_num)); } @@ -1867,21 +1864,6 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_DOUBLE_EQ(1, min_y); EXPECT_DOUBLE_EQ(99, max_x); EXPECT_DOUBLE_EQ(100, max_y); - - auto coverings = geometry_statistics->GetCoverings(); - EXPECT_EQ(1, coverings.size()); - EXPECT_EQ("WKB", coverings[0].first); - geometry::WKBGeometryBounder bounder; - const std::string& wkb = coverings[0].second; - geometry::WKBBuffer wkb_buffer(reinterpret_cast(wkb.data()), - wkb.size()); - bounder.ReadGeometry(&wkb_buffer); - bounder.Flush(); - auto bounds = bounder.Bounds(); - EXPECT_DOUBLE_EQ(0, bounds.min[0]); - EXPECT_DOUBLE_EQ(1, bounds.min[1]); - EXPECT_DOUBLE_EQ(99, bounds.max[0]); - EXPECT_DOUBLE_EQ(100, bounds.max[1]); } void TestWriteAndReadSpaced(ParquetVersion::type version, @@ -1949,10 +1931,6 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { } }; -const char* TestGeometryValuesWriter::kCrs = - R"({"id": {"authority": "OGC", "code": "CRS84"}})"; -const char* TestGeometryValuesWriter::kMetadata = "test_metadata"; - TEST_F(TestGeometryValuesWriter, TestWriteAndReadV1) { for (auto data_page_version : {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index 0f6cafdf2c8e0..10bd591f91c3a 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -688,33 +688,4 @@ static inline std::string MakeWKBPoint(const double* xyzm, bool has_z, bool has_ return wkb; } -static inline std::string MakeCoveringWKBFromBound(double xmin, double xmax, double ymin, - double ymax) { - std::string wkb_data(93, 0); - - // endianness and header - auto data = reinterpret_cast(wkb_data.data()); - data[0] = kWkbNativeEndianness; - uint32_t wkb_type = 3; // POLYGON - memcpy(&data[1], &wkb_type, 4); - - // n_rings and n_coords - uint32_t n_rings = 1; - uint32_t n_coords = 5; - memcpy(&data[5], &n_rings, 4); - memcpy(&data[9], &n_coords, 4); - - // coordinates - double coords[5][2] = { - {xmin, ymin}, {xmax, ymin}, {xmax, ymax}, {xmin, ymax}, {xmin, ymin}}; - uint8_t* ptr = &data[13]; - for (auto coord : coords) { - memcpy(ptr, &coord[0], 8); - memcpy(ptr + 8, &coord[1], 8); - ptr += 16; - } - - return wkb_data; -} - } // namespace parquet::geometry diff --git a/cpp/src/parquet/geometry_util_internal_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc index ea5edc4b4d688..08cac7602ad34 100644 --- a/cpp/src/parquet/geometry_util_internal_test.cc +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -461,32 +461,6 @@ INSTANTIATE_TEST_SUITE_P( 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, {30, 10, 40, 300, 30, 10, 40, 300}))); -TEST(TestGeometryUtil, MakeCoveringWKBFromBound) { - std::string wkb_covering = MakeCoveringWKBFromBound(10, 20, 30, 40); - // POLYGON ((10 30, 20 30, 20 40, 10 40, 10 30)) -#ifdef ARROW_LITTLE_ENDIAN - std::vector expected_wkb = { - 0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, - 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, - 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40}; -#else - std::vector expected_wkb = { - 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x40, - 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x40, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x3e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x40, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x44, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, - 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x40, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; -#endif - EXPECT_EQ(expected_wkb.size(), wkb_covering.size()); - EXPECT_EQ(0, memcmp(wkb_covering.data(), expected_wkb.data(), expected_wkb.size())); -} - struct MakeWKBPointTestCase { MakeWKBPointTestCase() = default; MakeWKBPointTestCase(const std::vector xyzm, bool has_z, bool has_m) diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc index 3bce6d4240f07..2e780e17981e0 100644 --- a/cpp/src/parquet/page_index_test.cc +++ b/cpp/src/parquet/page_index_test.cc @@ -498,7 +498,6 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, static_cast(column_index.get()); const auto& actual_stats = byte_array_column_index->geometry_statistics()[i]; ASSERT_EQ(expected_stats.geometry_types, actual_stats.GetGeometryTypes()); - ASSERT_EQ(expected_stats.coverings, actual_stats.GetCoverings()); ASSERT_DOUBLE_EQ(expected_stats.xmin, actual_stats.GetXMin()); ASSERT_DOUBLE_EQ(expected_stats.xmax, actual_stats.GetXMax()); ASSERT_DOUBLE_EQ(expected_stats.ymin, actual_stats.GetYMin()); @@ -636,18 +635,14 @@ TEST(PageIndex, WriteGeometryColumnIndex) { geom_stats[i].mmin = i + 7; geom_stats[i].mmax = i + 8; geom_stats[i].geometry_types = {i + 1}; - std::string covering = geometry::MakeCoveringWKBFromBound( - geom_stats[i].xmin, geom_stats[i].xmax, geom_stats[i].ymin, geom_stats[i].ymax); - geom_stats[i].coverings = {{"WKB", covering}}; page_stats.at(i).set_min(dummy_min).set_max(dummy_max); page_stats.at(i).set_geometry(geom_stats[i]); } schema::NodePtr node = schema::PrimitiveNode::Make( "c1", Repetition::OPTIONAL, - GeometryLogicalType::Make(R"({"id": {"authority": "OGC", "code": "CRS84"}})", - LogicalType::GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::WKB, "metadata0"), + GeometryLogicalType::Make("OGC:CRS84", LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB), Type::BYTE_ARRAY); TestWriteTypedColumnIndex(node, page_stats, BoundaryOrder::Ascending, diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index 82ad4ff789369..f0ed1483fde13 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -239,47 +239,67 @@ struct SizeStatistics { } /** - * Interpretation for edges of GEOMETRY logical type, i.e. whether the edge - * between points represent a straight cartesian line or the shortest line on - * the sphere. It applies to all non-point geometry objects. + * Physical type and encoding for the geometry type. */ -enum Edges { - PLANAR = 0; - SPHERICAL = 1; +enum GeometryEncoding { + /** + * Allowed for physical type: BYTE_ARRAY. + * + * Well-known binary (WKB) representations of geometries. + * + * To be clear, we follow the same rule of WKB and coordinate axis order from + * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] + * supporting XY, XYZ, XYM, XYZM and the standard geometry types + * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, + * and GeometryCollection). Coordinate order is always (x, y) where x is + * easting or longitude and y is northing or latitude. This ordering explicitly + * overrides the axis order as specified in the CRS following the GeoPackage + * specification [5]. + * + * This is the preferred encoding for maximum portability. It also supports + * GeometryStatistics to be set in the column chunk and page index. + * + * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 + * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 + * [3] https://portal.ogc.org/files/?artifact_id=18241 + * [4] https://www.iso.org/standard/60343.html + * [5] https://www.geopackage.org/spec130/#gpb_spec + */ + WKB = 0; } /** - * A custom binary-encoded polygon or multi-polygon to represent a covering of - * geometries. For example, it may be a bounding box or an envelope of geometries - * when a bounding box cannot be built (e.g. a geometry has spherical edges, or if - * an edge of geographic coordinates crosses the antimeridian). In addition, it can - * also be used to provide vendor-agnostic coverings like S2 or H3 grids. + * Interpretation for edges of elements of a GEOMETRY logical type. In other + * words, whether a point between two vertices should be interpolated in + * its XY dimensions as if it were a Cartesian line connecting the two + * vertices (planar) or the shortest spherical arc between the longitude + * and latitude represented by the two vertices (spherical). This value + * applies to all non-point geometry objects and is independent of the + * coordinate reference system. + * + * Because most systems currently assume planar edges and do not support + * spherical edges, planar should be used as the default value. */ -struct Covering { - /** - * A type of covering. Currently accepted values: "WKB". - */ - 1: required string kind; - /** - * A payload specific to kind. Below are the supported values: - * - WKB: well-known binary of a POLYGON or MULTI-POLYGON that completely - * covers the contents. This will be interpreted according to the same CRS - * and edges defined by the logical type. - */ - 2: required binary value; +enum Edges { + PLANAR = 0; + SPHERICAL = 1; } /** * Bounding box of geometries in the representation of min/max value pair of - * coordinates from each axis. Values of Z and M are omitted for 2D geometries. - * Filter pushdown on geometries are only safe for planar spatial predicate - * but it is recommended that the writer always generates bounding box statistics, - * regardless of whether the geometries are planar or spherical. + * coordinates from each axis when Edges is planar. Values of Z and M are omitted + * for 2D geometries. When Edges is spherical, the bounding box is in the form of + * [westmost, eastmost, southmost, northmost], with necessary min/max values for + * Z and M if needed. */ struct BoundingBox { + /** Westmost value if edges = spherical **/ 1: required double xmin; + /** Eastmost value if edges = spherical **/ 2: required double xmax; + /** Southmost value if edges = spherical **/ 3: required double ymin; + /** Northmost value if edges = spherical **/ 4: required double ymax; 5: optional double zmin; 6: optional double zmax; @@ -292,14 +312,6 @@ struct GeometryStatistics { /** A bounding box of geometries */ 1: optional BoundingBox bbox; - /** - * A list of coverings of geometries. - * Note that It is allowed to have more than one covering of the same kind and - * implementation is free to use any of them. It is recommended to have at most - * one covering for each kind. - */ - 2: optional list coverings; - /** * The geometry types of all geometries, or an empty array if they are not * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] @@ -327,7 +339,7 @@ struct GeometryStatistics { * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L159 */ - 3: optional list geometry_types; + 2: optional list geometry_types; } /** @@ -469,29 +481,6 @@ struct JsonType { struct BsonType { } -/** - * Physical type and encoding for the geometry type. - */ -enum GeometryEncoding { - /** - * Allowed for physical type: BYTE_ARRAY. - * - * Well-known binary (WKB) representations of geometries. - * - * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. It is the ISO WKB supporting XY, XYZ, XYM, XYZM and the - * standard geometry types (Point, LineString, Polygon, MultiPoint, - * MultiLineString, MultiPolygon, and GeometryCollection). - * - * This is the preferred encoding for maximum portability. It also supports - * GeometryStatistics to be set in the column chunk and page index. - * - * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 - */ - WKB = 0; -} - /** * Geometry logical type annotation (added in 2.11.0) */ @@ -502,63 +491,29 @@ struct GeometryType { */ 1: required GeometryEncoding encoding; /** - * Edges of geometry type. + * Interpretation for edges of elements of a GEOMETRY logical type, i.e. whether + * the interpolation between points along an edge represents a straight cartesian + * line or the shortest line on the sphere. * Please refer to the definition of Edges for more detail. */ 2: required Edges edges; /** - * Coordinate Reference System, i.e. mapping of how coordinates refer to - * precise locations on earth. Writers are not required to set this field. - * Once crs is set, crs_encoding field below MUST be set together. - * For example, "OGC:CRS84" can be set in the form of PROJJSON as below: - * { - * "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json", - * "type": "GeographicCRS", - * "name": "WGS 84 longitude-latitude", - * "datum": { - * "type": "GeodeticReferenceFrame", - * "name": "World Geodetic System 1984", - * "ellipsoid": { - * "name": "WGS 84", - * "semi_major_axis": 6378137, - * "inverse_flattening": 298.257223563 - * } - * }, - * "coordinate_system": { - * "subtype": "ellipsoidal", - * "axis": [ - * { - * "name": "Geodetic longitude", - * "abbreviation": "Lon", - * "direction": "east", - * "unit": "degree" - * }, - * { - * "name": "Geodetic latitude", - * "abbreviation": "Lat", - * "direction": "north", - * "unit": "degree" - * } - * ] - * }, - * "id": { - * "authority": "OGC", - * "code": "CRS84" - * } - * } + * CRS (coordinate reference system) is a mapping of how coordinates refer to + * precise locations on earth. A crs is specified by a string, which is a Parquet + * file metadata field whose value is the crs representation. An additional field + * with the suffix '.type' describes the encoding of this CRS representation. + * + * For example, if a geometry column (e.g., 'geom1') uses the CRS 'OGC:CRS84', the + * writer may create 2 file metadata fields: 'geom1_crs' and 'geom1_crs.type', and + * set the 'crs' field to 'geom1_crs'. The 'geom1_crs' field will contain the + * PROJJSON representation of OGC:CRS84 + * (https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md#ogccrs84-details), + * and the 'geom1_crs.type' field will contain the string 'PROJJSON'. + * + * Multiple geometry columns can refer to the same CRS metadata field + * (e.g., 'geom1_crs') if they share the same CRS. */ 3: optional string crs; - /** - * Encoding used in the above crs field. It MUST be set if crs field is set. - * Currently the only allowed value is "PROJJSON". - */ - 4: optional string crs_encoding; - /** - * Additional informative metadata. - * GeoParquet could offload its column metadata in a JSON-encoded UTF-8 string: - * https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L46 - */ - 5: optional string metadata; } /** diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 4bee9570e2022..9f15c8670a3a6 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1827,9 +1827,8 @@ class TestGeometryLogicalType : public ::testing::Test { schema::NodeVector fields; fields.push_back(PrimitiveNode::Make( "g", Repetition::REQUIRED, - GeometryLogicalType::Make(R"({"id": {"authority": "OGC", "code": "CRS84"}})", - LogicalType::GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::WKB, "metadata0"), + GeometryLogicalType::Make("OGC:CRS84", LogicalType::GeometryEdges::PLANAR, + LogicalType::GeometryEncoding::WKB), Type::BYTE_ARRAY)); auto schema = std::static_pointer_cast( GroupNode::Make("schema", Repetition::REQUIRED, fields)); @@ -1976,8 +1975,6 @@ class TestGeometryLogicalType : public ::testing::Test { EXPECT_GT(geom_stats->GetYMax(), geom_stats->GetYMin()); EXPECT_FALSE(geom_stats->HasZ()); EXPECT_FALSE(geom_stats->HasM()); - EXPECT_EQ(1, geom_stats->GetCoverings().size()); - EXPECT_EQ("WKB", geom_stats->GetCoverings().front().first); } void CheckColumnIndex(std::shared_ptr geometry_column_index) { @@ -1998,8 +1995,6 @@ class TestGeometryLogicalType : public ::testing::Test { EXPECT_GT(geom_stats.GetYMax(), geom_stats.GetYMin()); EXPECT_FALSE(geom_stats.HasZ()); EXPECT_FALSE(geom_stats.HasM()); - EXPECT_EQ(1, geom_stats.GetCoverings().size()); - EXPECT_EQ("WKB", geom_stats.GetCoverings().front().first); last_xmin = geom_stats.GetXMin(); last_ymin = geom_stats.GetYMin(); @@ -2033,7 +2028,6 @@ class TestGeometryLogicalType : public ::testing::Test { EXPECT_TRUE(statistics.has_geometry_statistics); EncodedGeometryStatistics geom_stats = statistics.geometry_statistics(); EXPECT_EQ(1, geom_stats.geometry_types.size()); - EXPECT_EQ(1, geom_stats.coverings.size()); EXPECT_GE(geom_stats.xmin, 0); EXPECT_GT(geom_stats.xmax, geom_stats.xmin); EXPECT_GT(geom_stats.ymin, 0); diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index 437f1a7ff95cb..31268c118d895 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1544,10 +1544,10 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) { {LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"}, {LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"}, {LogicalType::Float16(), "Float16", R"({"Type": "Float16"})"}, - {LogicalType::Geometry(), "Geometry(crs=, edges=planar, encoding=wkb, metadata=)", + {LogicalType::Geometry(), "Geometry(crs=, edges=planar, encoding=wkb)", R"({"Type": "Geometry", "edges": "planar", "encoding": "wkb"})"}, {LogicalType::Geometry("{}", LogicalType::GeometryEdges::SPHERICAL), - "Geometry(crs={}, edges=spherical, encoding=wkb, metadata=)", + "Geometry(crs={}, edges=spherical, encoding=wkb)", R"({"Type": "Geometry", "crs": {}, "edges": "spherical", "encoding": "wkb"})"}, {LogicalType::None(), "None", R"({"Type": "None"})"}, }; @@ -2273,7 +2273,7 @@ TEST(TestLogicalTypeSerialization, Roundtrips) { {LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2}, {LogicalType::Geometry(), Type::BYTE_ARRAY, -1}, {LogicalType::Geometry("non-empty crs", LogicalType::GeometryEdges::SPHERICAL, - LogicalType::GeometryEncoding::WKB, "non-empty metadata"), + LogicalType::GeometryEncoding::WKB), Type::BYTE_ARRAY, -1}, {LogicalType::None(), Type::BOOLEAN, -1}}; diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 8821130a341ca..5306c1c9a0ba3 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -149,7 +149,6 @@ class GeometryStatisticsImpl { void Reset() { bounder_.Reset(); - coverings_.clear(); is_valid_ = true; } @@ -169,15 +168,6 @@ class GeometryStatisticsImpl { out.mmin = mins[3]; out.mmax = maxes[3]; - if (generate_coverings_) { - std::string kind = "WKB"; - std::string value = - geometry::MakeCoveringWKBFromBound(out.xmin, out.xmax, out.ymin, out.ymax); - out.coverings.emplace_back(kind, value); - } else { - out.coverings = coverings_; - } - return out; } @@ -200,11 +190,6 @@ class GeometryStatisticsImpl { return; } - // Don't generate coverings when encoding since this statistics object is - // initialized from an encoded geometry statistics. We'll simply use the - // coverings in the encoded geometry statistics. - generate_coverings_ = false; - geometry::BoundingBox box; box.min[0] = encoded.xmin; box.max[0] = encoded.xmax; @@ -223,20 +208,6 @@ class GeometryStatisticsImpl { bounder_.ReadBox(box); bounder_.ReadGeometryTypes(encoded.geometry_types); - coverings_ = encoded.coverings; - - try { - for (const auto& covering : encoded.coverings) { - if (covering.first == "WKB") { - geometry::WKBBuffer buf( - reinterpret_cast(covering.second.data()), - covering.second.size()); - bounder_.ReadGeometry(&buf, false); - } - } - } catch (ParquetException&) { - is_valid_ = false; - } } bool is_valid() const { return is_valid_; } @@ -247,15 +218,9 @@ class GeometryStatisticsImpl { std::vector GetGeometryTypes() const { return bounder_.GeometryTypes(); } - std::vector> GetCoverings() const { - return coverings_; - } - private: geometry::WKBGeometryBounder bounder_; - std::vector> coverings_; bool is_valid_ = true; - bool generate_coverings_ = true; }; GeometryStatistics::GeometryStatistics() { @@ -360,11 +325,6 @@ std::vector GeometryStatistics::GetGeometryTypes() const { return impl_->GetGeometryTypes(); } -std::vector> GeometryStatistics::GetCoverings() - const { - return impl_->GetCoverings(); -} - namespace { // ---------------------------------------------------------------------- diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index bef4bdf811dab..4900b46c85cd3 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -131,7 +131,6 @@ class PARQUET_EXPORT EncodedGeometryStatistics { double zmax{-kInf}; double mmin{kInf}; double mmax{-kInf}; - std::vector> coverings; std::vector geometry_types; bool has_z() const { return (zmax - zmin) >= 0; } @@ -186,7 +185,6 @@ class PARQUET_EXPORT GeometryStatistics { bool HasM() const; std::vector GetGeometryTypes() const; - std::vector> GetCoverings() const; private: std::unique_ptr impl_; diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index f7984f6681d27..c2f79d19ba993 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -251,10 +251,6 @@ static inline EncodedGeometryStatistics FromThrift( out.mmax = geometry_stats.bbox.mmax; } - for (const auto& covering : geometry_stats.coverings) { - out.coverings.emplace_back(covering.kind, covering.value); - } - return out; } @@ -368,16 +364,6 @@ static inline format::GeometryStatistics ToThrift( bbox.__set_mmax(encoded_geometry_stats.mmax); } geometry_statistics.__set_bbox(bbox); - - std::vector coverings; - coverings.reserve(encoded_geometry_stats.coverings.size()); - for (const auto& pair : encoded_geometry_stats.coverings) { - format::Covering covering; - covering.__set_kind(pair.first); - covering.__set_value(pair.second); - coverings.push_back(std::move(covering)); - } - geometry_statistics.__set_coverings(coverings); return geometry_statistics; } diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index b14eb9a830e9d..db1b61f00f369 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -487,12 +487,7 @@ std::shared_ptr LogicalType::FromThrift( type.GEOMETRY.encoding); } - std::string metadata; - if (type.GEOMETRY.__isset.metadata) { - metadata = type.GEOMETRY.metadata; - } - - return GeometryLogicalType::Make(crs, edges, encoding, metadata); + return GeometryLogicalType::Make(crs, edges, encoding); } else { throw ParquetException("Metadata contains Thrift LogicalType that is not recognized"); } @@ -552,8 +547,8 @@ std::shared_ptr LogicalType::Float16() { std::shared_ptr LogicalType::Geometry( std::string crs, LogicalType::GeometryEdges::edges edges, - LogicalType::GeometryEncoding::geometry_encoding encoding, std::string metadata) { - return GeometryLogicalType::Make(std::move(crs), edges, encoding, std::move(metadata)); + LogicalType::GeometryEncoding::geometry_encoding encoding) { + return GeometryLogicalType::Make(std::move(crs), edges, encoding); } std::shared_ptr LogicalType::None() { return NoLogicalType::Make(); } @@ -1676,30 +1671,25 @@ class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible const std::string& crs() const { return crs_; } LogicalType::GeometryEdges::edges edges() const { return edges_; } LogicalType::GeometryEncoding::geometry_encoding encoding() const { return encoding_; } - const std::string& metadata() const { return metadata_; } private: Geometry(std::string crs, LogicalType::GeometryEdges::edges edges, - LogicalType::GeometryEncoding::geometry_encoding encoding, - std::string metadata) + LogicalType::GeometryEncoding::geometry_encoding encoding) : LogicalType::Impl(LogicalType::Type::GEOMETRY, SortOrder::UNSIGNED), LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY), crs_(std::move(crs)), edges_(edges), - encoding_(encoding), - metadata_(std::move(metadata)) {} + encoding_(encoding) {} std::string crs_; LogicalType::GeometryEdges::edges edges_; LogicalType::GeometryEncoding::geometry_encoding encoding_; - std::string metadata_; }; std::string LogicalType::Impl::Geometry::ToString() const { std::stringstream type; type << "Geometry(crs=" << crs_ << ", edges=" << geometry_edges_string(edges_) - << ", encoding=" << geometry_encoding_string(encoding_) - << ", metadata=" << metadata_ << ")"; + << ", encoding=" << geometry_encoding_string(encoding_) << ")"; return type.str(); } @@ -1715,11 +1705,6 @@ std::string LogicalType::Impl::Geometry::ToJSON() const { json << R"(, "edges": ")" << geometry_edges_string(edges_) << R"(")"; json << R"(, "encoding": ")" << geometry_encoding_string(encoding_) << R"(")"; - if (!metadata_.empty()) { - // TODO(paleolimbot): we'll need to escape the metadata or assume that it's valid JSON - json << R"(, "metadata": )" << crs_; - } - json << "}"; return json.str(); } @@ -1746,11 +1731,6 @@ format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { } geometry_type.__set_encoding(format::GeometryEncoding::WKB); - // Canonically export empty metadata as unset - if (!metadata_.empty()) { - geometry_type.__set_metadata(metadata_); - } - type.__set_GEOMETRY(geometry_type); return type; } @@ -1759,8 +1739,7 @@ bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { if (other.is_geometry()) { const auto& other_geometry = checked_cast(other); return crs() == other_geometry.crs() && edges() == other_geometry.edges() && - encoding() == other_geometry.encoding() && - metadata() == other_geometry.metadata(); + encoding() == other_geometry.encoding(); } else { return false; } @@ -1778,16 +1757,12 @@ LogicalType::GeometryEncoding::geometry_encoding GeometryLogicalType::encoding() return (dynamic_cast(*impl_)).encoding(); } -const std::string& GeometryLogicalType::metadata() const { - return (dynamic_cast(*impl_)).metadata(); -} - std::shared_ptr GeometryLogicalType::Make( std::string crs, LogicalType::GeometryEdges::edges edges, - LogicalType::GeometryEncoding::geometry_encoding encoding, std::string metadata) { + LogicalType::GeometryEncoding::geometry_encoding encoding) { auto* logical_type = new GeometryLogicalType(); - logical_type->impl_.reset(new LogicalType::Impl::Geometry( - std::move(crs), edges, encoding, std::move(metadata))); + logical_type->impl_.reset( + new LogicalType::Impl::Geometry(std::move(crs), edges, encoding)); return std::shared_ptr(logical_type); } diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index d708a558943a3..ded241ed35cdf 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -225,8 +225,7 @@ class PARQUET_EXPORT LogicalType { static std::shared_ptr Geometry( std::string crs = "", LogicalType::GeometryEdges::edges edges = GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::geometry_encoding encoding = GeometryEncoding::WKB, - std::string metadata = ""); + LogicalType::GeometryEncoding::geometry_encoding encoding = GeometryEncoding::WKB); /// \brief Create a placeholder for when no logical type is specified static std::shared_ptr None(); @@ -467,13 +466,11 @@ class PARQUET_EXPORT GeometryLogicalType : public LogicalType { static std::shared_ptr Make( std::string crs = "", LogicalType::GeometryEdges::edges edges = GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::geometry_encoding encoding = GeometryEncoding::WKB, - std::string metadata = ""); + LogicalType::GeometryEncoding::geometry_encoding encoding = GeometryEncoding::WKB); const std::string& crs() const; LogicalType::GeometryEdges::edges edges() const; LogicalType::GeometryEncoding::geometry_encoding encoding() const; - const std::string& metadata() const; private: GeometryLogicalType() = default; From fe8a3e57381f22a352fcaa738461443c36898063 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 16 Oct 2024 00:01:38 +0800 Subject: [PATCH 59/61] Fix problem decoding WKB geometries with more than 32 coordinates --- cpp/src/parquet/geometry_util_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h index 10bd591f91c3a..ebafaaed13514 100644 --- a/cpp/src/parquet/geometry_util_internal.h +++ b/cpp/src/parquet/geometry_util_internal.h @@ -394,7 +394,7 @@ class WKBSequenceBounder { uint32_t n_coords = src->ReadUInt32(); uint32_t n_chunks = n_coords / coords_per_chunk; for (uint32_t i = 0; i < n_chunks; i++) { - src->ReadDoubles(coords_per_chunk, chunk_); + src->ReadDoubles(coords_per_chunk * coord_size, chunk_); ReadChunk(coords_per_chunk); } From ba80f3ed3b0c4bae64fbfd52da5ebf9ba8dc89e9 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 30 Oct 2024 22:34:39 +0800 Subject: [PATCH 60/61] Re-implemented geometry statistics according to the updated spec: 1. geometry statistics moved out of statistics, it is now a field of column metadata 2. geometry statistics is removed from page index --- cpp/src/generated/parquet_types.cpp | 1445 ++++++++++++++++++++---- cpp/src/generated/parquet_types.h | 1303 ++++----------------- cpp/src/generated/parquet_types.tcc | 177 ++- cpp/src/parquet/CMakeLists.txt | 1 + cpp/src/parquet/column_reader.cc | 3 - cpp/src/parquet/column_writer.cc | 51 +- cpp/src/parquet/column_writer_test.cc | 29 +- cpp/src/parquet/geometry_statistics.cc | 310 +++++ cpp/src/parquet/geometry_statistics.h | 111 ++ cpp/src/parquet/metadata.cc | 53 +- cpp/src/parquet/metadata.h | 7 + cpp/src/parquet/page_index.cc | 29 +- cpp/src/parquet/page_index.h | 6 - cpp/src/parquet/page_index_test.cc | 48 - cpp/src/parquet/parquet.thrift | 237 ++-- cpp/src/parquet/reader_test.cc | 84 +- cpp/src/parquet/schema_test.cc | 2 +- cpp/src/parquet/statistics.cc | 400 +------ cpp/src/parquet/statistics.h | 104 +- cpp/src/parquet/thrift_internal.h | 5 +- cpp/src/parquet/types.cc | 2 +- 21 files changed, 2139 insertions(+), 2268 deletions(-) create mode 100644 cpp/src/parquet/geometry_statistics.cc create mode 100644 cpp/src/parquet/geometry_statistics.h diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index a8f98116277fa..9c2771d18223c 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.20.0) + * Autogenerated by Thrift Compiler (0.21.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -73,14 +73,14 @@ int _kConvertedTypeValues[] = { */ ConvertedType::LIST, /** - * an enum is converted into a binary field + * an enum is converted into a BYTE_ARRAY field */ ConvertedType::ENUM, /** * A decimal value. * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. * @@ -161,7 +161,7 @@ int _kConvertedTypeValues[] = { /** * An embedded BSON document * - * A BSON document embedded within a single BINARY column. + * A BSON document embedded within a single BYTE_ARRAY column. */ ConvertedType::BSON, /** @@ -197,14 +197,14 @@ const char* _kConvertedTypeNames[] = { */ "LIST", /** - * an enum is converted into a binary field + * an enum is converted into a BYTE_ARRAY field */ "ENUM", /** * A decimal value. * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. * @@ -285,7 +285,7 @@ const char* _kConvertedTypeNames[] = { /** * An embedded BSON document * - * A BSON document embedded within a single BINARY column. + * A BSON document embedded within a single BYTE_ARRAY column. */ "BSON", /** @@ -325,11 +325,11 @@ std::string to_string(const ConvertedType::type& val) { int _kFieldRepetitionTypeValues[] = { /** - * This field is required (can not be null) and each record has exactly 1 value. + * This field is required (can not be null) and each row has exactly 1 value. */ FieldRepetitionType::REQUIRED, /** - * The field is optional (can be null) and each record has 0 or 1 values. + * The field is optional (can be null) and each row has 0 or 1 values. */ FieldRepetitionType::OPTIONAL, /** @@ -339,11 +339,11 @@ int _kFieldRepetitionTypeValues[] = { }; const char* _kFieldRepetitionTypeNames[] = { /** - * This field is required (can not be null) and each record has exactly 1 value. + * This field is required (can not be null) and each row has exactly 1 value. */ "REQUIRED", /** - * The field is optional (can be null) and each record has 0 or 1 values. + * The field is optional (can be null) and each row has 0 or 1 values. */ "OPTIONAL", /** @@ -377,24 +377,6 @@ int _kGeometryEncodingValues[] = { * Allowed for physical type: BYTE_ARRAY. * * Well-known binary (WKB) representations of geometries. - * - * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] - * supporting XY, XYZ, XYM, XYZM and the standard geometry types - * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, - * and GeometryCollection). Coordinate order is always (x, y) where x is - * easting or longitude and y is northing or latitude. This ordering explicitly - * overrides the axis order as specified in the CRS following the GeoPackage - * specification [5]. - * - * This is the preferred encoding for maximum portability. It also supports - * GeometryStatistics to be set in the column chunk and page index. - * - * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 - * [3] https://portal.ogc.org/files/?artifact_id=18241 - * [4] https://www.iso.org/standard/60343.html - * [5] https://www.geopackage.org/spec130/#gpb_spec */ GeometryEncoding::WKB }; @@ -403,24 +385,6 @@ const char* _kGeometryEncodingNames[] = { * Allowed for physical type: BYTE_ARRAY. * * Well-known binary (WKB) representations of geometries. - * - * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] - * supporting XY, XYZ, XYM, XYZM and the standard geometry types - * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, - * and GeometryCollection). Coordinate order is always (x, y) where x is - * easting or longitude and y is northing or latitude. This ordering explicitly - * overrides the axis order as specified in the CRS following the GeoPackage - * specification [5]. - * - * This is the preferred encoding for maximum portability. It also supports - * GeometryStatistics to be set in the column chunk and page index. - * - * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 - * [3] https://portal.ogc.org/files/?artifact_id=18241 - * [4] https://www.iso.org/standard/60343.html - * [5] https://www.geopackage.org/spec130/#gpb_spec */ "WKB" }; @@ -726,6 +690,9 @@ std::string to_string(const BoundaryOrder::type& val) { SizeStatistics::~SizeStatistics() noexcept { } +SizeStatistics::SizeStatistics() noexcept + : unencoded_byte_array_data_bytes(0) { +} void SizeStatistics::__set_unencoded_byte_array_data_bytes(const int64_t val) { this->unencoded_byte_array_data_bytes = val; @@ -756,6 +723,23 @@ void swap(SizeStatistics &a, SizeStatistics &b) { swap(a.__isset, b.__isset); } +bool SizeStatistics::operator==(const SizeStatistics & rhs) const +{ + if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) + return false; + else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) + return false; + if (__isset.repetition_level_histogram != rhs.__isset.repetition_level_histogram) + return false; + else if (__isset.repetition_level_histogram && !(repetition_level_histogram == rhs.repetition_level_histogram)) + return false; + if (__isset.definition_level_histogram != rhs.__isset.definition_level_histogram) + return false; + else if (__isset.definition_level_histogram && !(definition_level_histogram == rhs.definition_level_histogram)) + return false; + return true; +} + SizeStatistics::SizeStatistics(const SizeStatistics& other12) { unencoded_byte_array_data_bytes = other12.unencoded_byte_array_data_bytes; repetition_level_histogram = other12.repetition_level_histogram; @@ -795,6 +779,16 @@ void SizeStatistics::printTo(std::ostream& out) const { BoundingBox::~BoundingBox() noexcept { } +BoundingBox::BoundingBox() noexcept + : xmin(0), + xmax(0), + ymin(0), + ymax(0), + zmin(0), + zmax(0), + mmin(0), + mmax(0) { +} void BoundingBox::__set_xmin(const double val) { this->xmin = val; @@ -851,6 +845,35 @@ void swap(BoundingBox &a, BoundingBox &b) { swap(a.__isset, b.__isset); } +bool BoundingBox::operator==(const BoundingBox & rhs) const +{ + if (!(xmin == rhs.xmin)) + return false; + if (!(xmax == rhs.xmax)) + return false; + if (!(ymin == rhs.ymin)) + return false; + if (!(ymax == rhs.ymax)) + return false; + if (__isset.zmin != rhs.__isset.zmin) + return false; + else if (__isset.zmin && !(zmin == rhs.zmin)) + return false; + if (__isset.zmax != rhs.__isset.zmax) + return false; + else if (__isset.zmax && !(zmax == rhs.zmax)) + return false; + if (__isset.mmin != rhs.__isset.mmin) + return false; + else if (__isset.mmin && !(mmin == rhs.mmin)) + return false; + if (__isset.mmax != rhs.__isset.mmax) + return false; + else if (__isset.mmax && !(mmax == rhs.mmax)) + return false; + return true; +} + BoundingBox::BoundingBox(const BoundingBox& other16) noexcept { xmin = other16.xmin; xmax = other16.xmax; @@ -915,6 +938,8 @@ void BoundingBox::printTo(std::ostream& out) const { GeometryStatistics::~GeometryStatistics() noexcept { } +GeometryStatistics::GeometryStatistics() noexcept { +} void GeometryStatistics::__set_bbox(const BoundingBox& val) { this->bbox = val; @@ -939,6 +964,19 @@ void swap(GeometryStatistics &a, GeometryStatistics &b) { swap(a.__isset, b.__isset); } +bool GeometryStatistics::operator==(const GeometryStatistics & rhs) const +{ + if (__isset.bbox != rhs.__isset.bbox) + return false; + else if (__isset.bbox && !(bbox == rhs.bbox)) + return false; + if (__isset.geometry_types != rhs.__isset.geometry_types) + return false; + else if (__isset.geometry_types && !(geometry_types == rhs.geometry_types)) + return false; + return true; +} + GeometryStatistics::GeometryStatistics(const GeometryStatistics& other26) { bbox = other26.bbox; geometry_types = other26.geometry_types; @@ -973,6 +1011,16 @@ void GeometryStatistics::printTo(std::ostream& out) const { Statistics::~Statistics() noexcept { } +Statistics::Statistics() noexcept + : max(), + min(), + null_count(0), + distinct_count(0), + max_value(), + min_value(), + is_max_value_exact(0), + is_min_value_exact(0) { +} void Statistics::__set_max(const std::string& val) { this->max = val; @@ -1013,11 +1061,6 @@ void Statistics::__set_is_min_value_exact(const bool val) { this->is_min_value_exact = val; __isset.is_min_value_exact = true; } - -void Statistics::__set_geometry_stats(const GeometryStatistics& val) { - this->geometry_stats = val; -__isset.geometry_stats = true; -} std::ostream& operator<<(std::ostream& out, const Statistics& obj) { obj.printTo(out); @@ -1035,10 +1078,46 @@ void swap(Statistics &a, Statistics &b) { swap(a.min_value, b.min_value); swap(a.is_max_value_exact, b.is_max_value_exact); swap(a.is_min_value_exact, b.is_min_value_exact); - swap(a.geometry_stats, b.geometry_stats); swap(a.__isset, b.__isset); } +bool Statistics::operator==(const Statistics & rhs) const +{ + if (__isset.max != rhs.__isset.max) + return false; + else if (__isset.max && !(max == rhs.max)) + return false; + if (__isset.min != rhs.__isset.min) + return false; + else if (__isset.min && !(min == rhs.min)) + return false; + if (__isset.null_count != rhs.__isset.null_count) + return false; + else if (__isset.null_count && !(null_count == rhs.null_count)) + return false; + if (__isset.distinct_count != rhs.__isset.distinct_count) + return false; + else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) + return false; + if (__isset.max_value != rhs.__isset.max_value) + return false; + else if (__isset.max_value && !(max_value == rhs.max_value)) + return false; + if (__isset.min_value != rhs.__isset.min_value) + return false; + else if (__isset.min_value && !(min_value == rhs.min_value)) + return false; + if (__isset.is_max_value_exact != rhs.__isset.is_max_value_exact) + return false; + else if (__isset.is_max_value_exact && !(is_max_value_exact == rhs.is_max_value_exact)) + return false; + if (__isset.is_min_value_exact != rhs.__isset.is_min_value_exact) + return false; + else if (__isset.is_min_value_exact && !(is_min_value_exact == rhs.is_min_value_exact)) + return false; + return true; +} + Statistics::Statistics(const Statistics& other30) { max = other30.max; min = other30.min; @@ -1048,7 +1127,6 @@ Statistics::Statistics(const Statistics& other30) { min_value = other30.min_value; is_max_value_exact = other30.is_max_value_exact; is_min_value_exact = other30.is_min_value_exact; - geometry_stats = other30.geometry_stats; __isset = other30.__isset; } Statistics::Statistics(Statistics&& other31) noexcept { @@ -1060,7 +1138,6 @@ Statistics::Statistics(Statistics&& other31) noexcept { min_value = std::move(other31.min_value); is_max_value_exact = other31.is_max_value_exact; is_min_value_exact = other31.is_min_value_exact; - geometry_stats = std::move(other31.geometry_stats); __isset = other31.__isset; } Statistics& Statistics::operator=(const Statistics& other32) { @@ -1072,7 +1149,6 @@ Statistics& Statistics::operator=(const Statistics& other32) { min_value = other32.min_value; is_max_value_exact = other32.is_max_value_exact; is_min_value_exact = other32.is_min_value_exact; - geometry_stats = other32.geometry_stats; __isset = other32.__isset; return *this; } @@ -1085,7 +1161,6 @@ Statistics& Statistics::operator=(Statistics&& other33) noexcept { min_value = std::move(other33.min_value); is_max_value_exact = other33.is_max_value_exact; is_min_value_exact = other33.is_min_value_exact; - geometry_stats = std::move(other33.geometry_stats); __isset = other33.__isset; return *this; } @@ -1100,7 +1175,6 @@ void Statistics::printTo(std::ostream& out) const { out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "")); out << ", " << "is_max_value_exact="; (__isset.is_max_value_exact ? (out << to_string(is_max_value_exact)) : (out << "")); out << ", " << "is_min_value_exact="; (__isset.is_min_value_exact ? (out << to_string(is_min_value_exact)) : (out << "")); - out << ", " << "geometry_stats="; (__isset.geometry_stats ? (out << to_string(geometry_stats)) : (out << "")); out << ")"; } @@ -1108,6 +1182,8 @@ void Statistics::printTo(std::ostream& out) const { StringType::~StringType() noexcept { } +StringType::StringType() noexcept { +} std::ostream& operator<<(std::ostream& out, const StringType& obj) { obj.printTo(out); @@ -1121,6 +1197,11 @@ void swap(StringType &a, StringType &b) { (void) b; } +bool StringType::operator==(const StringType & /* rhs */) const +{ + return true; +} + StringType::StringType(const StringType& other34) noexcept { (void) other34; } @@ -1145,6 +1226,8 @@ void StringType::printTo(std::ostream& out) const { UUIDType::~UUIDType() noexcept { } +UUIDType::UUIDType() noexcept { +} std::ostream& operator<<(std::ostream& out, const UUIDType& obj) { obj.printTo(out); @@ -1158,6 +1241,11 @@ void swap(UUIDType &a, UUIDType &b) { (void) b; } +bool UUIDType::operator==(const UUIDType & /* rhs */) const +{ + return true; +} + UUIDType::UUIDType(const UUIDType& other38) noexcept { (void) other38; } @@ -1182,6 +1270,8 @@ void UUIDType::printTo(std::ostream& out) const { MapType::~MapType() noexcept { } +MapType::MapType() noexcept { +} std::ostream& operator<<(std::ostream& out, const MapType& obj) { obj.printTo(out); @@ -1195,6 +1285,11 @@ void swap(MapType &a, MapType &b) { (void) b; } +bool MapType::operator==(const MapType & /* rhs */) const +{ + return true; +} + MapType::MapType(const MapType& other42) noexcept { (void) other42; } @@ -1219,6 +1314,8 @@ void MapType::printTo(std::ostream& out) const { ListType::~ListType() noexcept { } +ListType::ListType() noexcept { +} std::ostream& operator<<(std::ostream& out, const ListType& obj) { obj.printTo(out); @@ -1232,6 +1329,11 @@ void swap(ListType &a, ListType &b) { (void) b; } +bool ListType::operator==(const ListType & /* rhs */) const +{ + return true; +} + ListType::ListType(const ListType& other46) noexcept { (void) other46; } @@ -1256,6 +1358,8 @@ void ListType::printTo(std::ostream& out) const { EnumType::~EnumType() noexcept { } +EnumType::EnumType() noexcept { +} std::ostream& operator<<(std::ostream& out, const EnumType& obj) { obj.printTo(out); @@ -1269,6 +1373,11 @@ void swap(EnumType &a, EnumType &b) { (void) b; } +bool EnumType::operator==(const EnumType & /* rhs */) const +{ + return true; +} + EnumType::EnumType(const EnumType& other50) noexcept { (void) other50; } @@ -1293,6 +1402,8 @@ void EnumType::printTo(std::ostream& out) const { DateType::~DateType() noexcept { } +DateType::DateType() noexcept { +} std::ostream& operator<<(std::ostream& out, const DateType& obj) { obj.printTo(out); @@ -1306,6 +1417,11 @@ void swap(DateType &a, DateType &b) { (void) b; } +bool DateType::operator==(const DateType & /* rhs */) const +{ + return true; +} + DateType::DateType(const DateType& other54) noexcept { (void) other54; } @@ -1330,6 +1446,8 @@ void DateType::printTo(std::ostream& out) const { Float16Type::~Float16Type() noexcept { } +Float16Type::Float16Type() noexcept { +} std::ostream& operator<<(std::ostream& out, const Float16Type& obj) { obj.printTo(out); @@ -1343,6 +1461,11 @@ void swap(Float16Type &a, Float16Type &b) { (void) b; } +bool Float16Type::operator==(const Float16Type & /* rhs */) const +{ + return true; +} + Float16Type::Float16Type(const Float16Type& other58) noexcept { (void) other58; } @@ -1367,6 +1490,8 @@ void Float16Type::printTo(std::ostream& out) const { NullType::~NullType() noexcept { } +NullType::NullType() noexcept { +} std::ostream& operator<<(std::ostream& out, const NullType& obj) { obj.printTo(out); @@ -1380,6 +1505,11 @@ void swap(NullType &a, NullType &b) { (void) b; } +bool NullType::operator==(const NullType & /* rhs */) const +{ + return true; +} + NullType::NullType(const NullType& other62) noexcept { (void) other62; } @@ -1404,6 +1534,10 @@ void NullType::printTo(std::ostream& out) const { DecimalType::~DecimalType() noexcept { } +DecimalType::DecimalType() noexcept + : scale(0), + precision(0) { +} void DecimalType::__set_scale(const int32_t val) { this->scale = val; @@ -1425,6 +1559,15 @@ void swap(DecimalType &a, DecimalType &b) { swap(a.precision, b.precision); } +bool DecimalType::operator==(const DecimalType & rhs) const +{ + if (!(scale == rhs.scale)) + return false; + if (!(precision == rhs.precision)) + return false; + return true; +} + DecimalType::DecimalType(const DecimalType& other66) noexcept { scale = other66.scale; precision = other66.precision; @@ -1455,6 +1598,8 @@ void DecimalType::printTo(std::ostream& out) const { MilliSeconds::~MilliSeconds() noexcept { } +MilliSeconds::MilliSeconds() noexcept { +} std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) { obj.printTo(out); @@ -1468,6 +1613,11 @@ void swap(MilliSeconds &a, MilliSeconds &b) { (void) b; } +bool MilliSeconds::operator==(const MilliSeconds & /* rhs */) const +{ + return true; +} + MilliSeconds::MilliSeconds(const MilliSeconds& other70) noexcept { (void) other70; } @@ -1492,6 +1642,8 @@ void MilliSeconds::printTo(std::ostream& out) const { MicroSeconds::~MicroSeconds() noexcept { } +MicroSeconds::MicroSeconds() noexcept { +} std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) { obj.printTo(out); @@ -1505,6 +1657,11 @@ void swap(MicroSeconds &a, MicroSeconds &b) { (void) b; } +bool MicroSeconds::operator==(const MicroSeconds & /* rhs */) const +{ + return true; +} + MicroSeconds::MicroSeconds(const MicroSeconds& other74) noexcept { (void) other74; } @@ -1529,6 +1686,8 @@ void MicroSeconds::printTo(std::ostream& out) const { NanoSeconds::~NanoSeconds() noexcept { } +NanoSeconds::NanoSeconds() noexcept { +} std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) { obj.printTo(out); @@ -1542,6 +1701,11 @@ void swap(NanoSeconds &a, NanoSeconds &b) { (void) b; } +bool NanoSeconds::operator==(const NanoSeconds & /* rhs */) const +{ + return true; +} + NanoSeconds::NanoSeconds(const NanoSeconds& other78) noexcept { (void) other78; } @@ -1566,6 +1730,8 @@ void NanoSeconds::printTo(std::ostream& out) const { TimeUnit::~TimeUnit() noexcept { } +TimeUnit::TimeUnit() noexcept { +} void TimeUnit::__set_MILLIS(const MilliSeconds& val) { this->MILLIS = val; @@ -1596,6 +1762,23 @@ void swap(TimeUnit &a, TimeUnit &b) { swap(a.__isset, b.__isset); } +bool TimeUnit::operator==(const TimeUnit & rhs) const +{ + if (__isset.MILLIS != rhs.__isset.MILLIS) + return false; + else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) + return false; + if (__isset.MICROS != rhs.__isset.MICROS) + return false; + else if (__isset.MICROS && !(MICROS == rhs.MICROS)) + return false; + if (__isset.NANOS != rhs.__isset.NANOS) + return false; + else if (__isset.NANOS && !(NANOS == rhs.NANOS)) + return false; + return true; +} + TimeUnit::TimeUnit(const TimeUnit& other82) noexcept { MILLIS = other82.MILLIS; MICROS = other82.MICROS; @@ -1635,6 +1818,9 @@ void TimeUnit::printTo(std::ostream& out) const { TimestampType::~TimestampType() noexcept { } +TimestampType::TimestampType() noexcept + : isAdjustedToUTC(0) { +} void TimestampType::__set_isAdjustedToUTC(const bool val) { this->isAdjustedToUTC = val; @@ -1656,6 +1842,15 @@ void swap(TimestampType &a, TimestampType &b) { swap(a.unit, b.unit); } +bool TimestampType::operator==(const TimestampType & rhs) const +{ + if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) + return false; + if (!(unit == rhs.unit)) + return false; + return true; +} + TimestampType::TimestampType(const TimestampType& other86) noexcept { isAdjustedToUTC = other86.isAdjustedToUTC; unit = other86.unit; @@ -1686,6 +1881,9 @@ void TimestampType::printTo(std::ostream& out) const { TimeType::~TimeType() noexcept { } +TimeType::TimeType() noexcept + : isAdjustedToUTC(0) { +} void TimeType::__set_isAdjustedToUTC(const bool val) { this->isAdjustedToUTC = val; @@ -1707,6 +1905,15 @@ void swap(TimeType &a, TimeType &b) { swap(a.unit, b.unit); } +bool TimeType::operator==(const TimeType & rhs) const +{ + if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) + return false; + if (!(unit == rhs.unit)) + return false; + return true; +} + TimeType::TimeType(const TimeType& other90) noexcept { isAdjustedToUTC = other90.isAdjustedToUTC; unit = other90.unit; @@ -1737,6 +1944,10 @@ void TimeType::printTo(std::ostream& out) const { IntType::~IntType() noexcept { } +IntType::IntType() noexcept + : bitWidth(0), + isSigned(0) { +} void IntType::__set_bitWidth(const int8_t val) { this->bitWidth = val; @@ -1758,6 +1969,15 @@ void swap(IntType &a, IntType &b) { swap(a.isSigned, b.isSigned); } +bool IntType::operator==(const IntType & rhs) const +{ + if (!(bitWidth == rhs.bitWidth)) + return false; + if (!(isSigned == rhs.isSigned)) + return false; + return true; +} + IntType::IntType(const IntType& other94) noexcept { bitWidth = other94.bitWidth; isSigned = other94.isSigned; @@ -1788,6 +2008,8 @@ void IntType::printTo(std::ostream& out) const { JsonType::~JsonType() noexcept { } +JsonType::JsonType() noexcept { +} std::ostream& operator<<(std::ostream& out, const JsonType& obj) { obj.printTo(out); @@ -1801,6 +2023,11 @@ void swap(JsonType &a, JsonType &b) { (void) b; } +bool JsonType::operator==(const JsonType & /* rhs */) const +{ + return true; +} + JsonType::JsonType(const JsonType& other98) noexcept { (void) other98; } @@ -1825,6 +2052,8 @@ void JsonType::printTo(std::ostream& out) const { BsonType::~BsonType() noexcept { } +BsonType::BsonType() noexcept { +} std::ostream& operator<<(std::ostream& out, const BsonType& obj) { obj.printTo(out); @@ -1838,6 +2067,11 @@ void swap(BsonType &a, BsonType &b) { (void) b; } +bool BsonType::operator==(const BsonType & /* rhs */) const +{ + return true; +} + BsonType::BsonType(const BsonType& other102) noexcept { (void) other102; } @@ -1862,6 +2096,11 @@ void BsonType::printTo(std::ostream& out) const { GeometryType::~GeometryType() noexcept { } +GeometryType::GeometryType() noexcept + : encoding(static_cast(0)), + edges(static_cast(0)), + crs() { +} void GeometryType::__set_encoding(const GeometryEncoding::type val) { this->encoding = val; @@ -1890,6 +2129,19 @@ void swap(GeometryType &a, GeometryType &b) { swap(a.__isset, b.__isset); } +bool GeometryType::operator==(const GeometryType & rhs) const +{ + if (!(encoding == rhs.encoding)) + return false; + if (!(edges == rhs.edges)) + return false; + if (__isset.crs != rhs.__isset.crs) + return false; + else if (__isset.crs && !(crs == rhs.crs)) + return false; + return true; +} + GeometryType::GeometryType(const GeometryType& other108) { encoding = other108.encoding; edges = other108.edges; @@ -1929,6 +2181,8 @@ void GeometryType::printTo(std::ostream& out) const { LogicalType::~LogicalType() noexcept { } +LogicalType::LogicalType() noexcept { +} void LogicalType::__set_STRING(const StringType& val) { this->STRING = val; @@ -2031,6 +2285,71 @@ void swap(LogicalType &a, LogicalType &b) { swap(a.__isset, b.__isset); } +bool LogicalType::operator==(const LogicalType & rhs) const +{ + if (__isset.STRING != rhs.__isset.STRING) + return false; + else if (__isset.STRING && !(STRING == rhs.STRING)) + return false; + if (__isset.MAP != rhs.__isset.MAP) + return false; + else if (__isset.MAP && !(MAP == rhs.MAP)) + return false; + if (__isset.LIST != rhs.__isset.LIST) + return false; + else if (__isset.LIST && !(LIST == rhs.LIST)) + return false; + if (__isset.ENUM != rhs.__isset.ENUM) + return false; + else if (__isset.ENUM && !(ENUM == rhs.ENUM)) + return false; + if (__isset.DECIMAL != rhs.__isset.DECIMAL) + return false; + else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) + return false; + if (__isset.DATE != rhs.__isset.DATE) + return false; + else if (__isset.DATE && !(DATE == rhs.DATE)) + return false; + if (__isset.TIME != rhs.__isset.TIME) + return false; + else if (__isset.TIME && !(TIME == rhs.TIME)) + return false; + if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) + return false; + else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) + return false; + if (__isset.INTEGER != rhs.__isset.INTEGER) + return false; + else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) + return false; + if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) + return false; + else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) + return false; + if (__isset.JSON != rhs.__isset.JSON) + return false; + else if (__isset.JSON && !(JSON == rhs.JSON)) + return false; + if (__isset.BSON != rhs.__isset.BSON) + return false; + else if (__isset.BSON && !(BSON == rhs.BSON)) + return false; + if (__isset.UUID != rhs.__isset.UUID) + return false; + else if (__isset.UUID && !(UUID == rhs.UUID)) + return false; + if (__isset.FLOAT16 != rhs.__isset.FLOAT16) + return false; + else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16)) + return false; + if (__isset.GEOMETRY != rhs.__isset.GEOMETRY) + return false; + else if (__isset.GEOMETRY && !(GEOMETRY == rhs.GEOMETRY)) + return false; + return true; +} + LogicalType::LogicalType(const LogicalType& other112) { STRING = other112.STRING; MAP = other112.MAP; @@ -2130,6 +2449,17 @@ void LogicalType::printTo(std::ostream& out) const { SchemaElement::~SchemaElement() noexcept { } +SchemaElement::SchemaElement() noexcept + : type(static_cast(0)), + type_length(0), + repetition_type(static_cast(0)), + name(), + num_children(0), + converted_type(static_cast(0)), + scale(0), + precision(0), + field_id(0) { +} void SchemaElement::__set_type(const Type::type val) { this->type = val; @@ -2201,6 +2531,49 @@ void swap(SchemaElement &a, SchemaElement &b) { swap(a.__isset, b.__isset); } +bool SchemaElement::operator==(const SchemaElement & rhs) const +{ + if (__isset.type != rhs.__isset.type) + return false; + else if (__isset.type && !(type == rhs.type)) + return false; + if (__isset.type_length != rhs.__isset.type_length) + return false; + else if (__isset.type_length && !(type_length == rhs.type_length)) + return false; + if (__isset.repetition_type != rhs.__isset.repetition_type) + return false; + else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) + return false; + if (!(name == rhs.name)) + return false; + if (__isset.num_children != rhs.__isset.num_children) + return false; + else if (__isset.num_children && !(num_children == rhs.num_children)) + return false; + if (__isset.converted_type != rhs.__isset.converted_type) + return false; + else if (__isset.converted_type && !(converted_type == rhs.converted_type)) + return false; + if (__isset.scale != rhs.__isset.scale) + return false; + else if (__isset.scale && !(scale == rhs.scale)) + return false; + if (__isset.precision != rhs.__isset.precision) + return false; + else if (__isset.precision && !(precision == rhs.precision)) + return false; + if (__isset.field_id != rhs.__isset.field_id) + return false; + else if (__isset.field_id && !(field_id == rhs.field_id)) + return false; + if (__isset.logicalType != rhs.__isset.logicalType) + return false; + else if (__isset.logicalType && !(logicalType == rhs.logicalType)) + return false; + return true; +} + SchemaElement::SchemaElement(const SchemaElement& other119) { type = other119.type; type_length = other119.type_length; @@ -2275,6 +2648,12 @@ void SchemaElement::printTo(std::ostream& out) const { DataPageHeader::~DataPageHeader() noexcept { } +DataPageHeader::DataPageHeader() noexcept + : num_values(0), + encoding(static_cast(0)), + definition_level_encoding(static_cast(0)), + repetition_level_encoding(static_cast(0)) { +} void DataPageHeader::__set_num_values(const int32_t val) { this->num_values = val; @@ -2313,6 +2692,23 @@ void swap(DataPageHeader &a, DataPageHeader &b) { swap(a.__isset, b.__isset); } +bool DataPageHeader::operator==(const DataPageHeader & rhs) const +{ + if (!(num_values == rhs.num_values)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(definition_level_encoding == rhs.definition_level_encoding)) + return false; + if (!(repetition_level_encoding == rhs.repetition_level_encoding)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + return true; +} + DataPageHeader::DataPageHeader(const DataPageHeader& other126) { num_values = other126.num_values; encoding = other126.encoding; @@ -2362,6 +2758,8 @@ void DataPageHeader::printTo(std::ostream& out) const { IndexPageHeader::~IndexPageHeader() noexcept { } +IndexPageHeader::IndexPageHeader() noexcept { +} std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) { obj.printTo(out); @@ -2375,6 +2773,11 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) { (void) b; } +bool IndexPageHeader::operator==(const IndexPageHeader & /* rhs */) const +{ + return true; +} + IndexPageHeader::IndexPageHeader(const IndexPageHeader& other130) noexcept { (void) other130; } @@ -2399,6 +2802,11 @@ void IndexPageHeader::printTo(std::ostream& out) const { DictionaryPageHeader::~DictionaryPageHeader() noexcept { } +DictionaryPageHeader::DictionaryPageHeader() noexcept + : num_values(0), + encoding(static_cast(0)), + is_sorted(0) { +} void DictionaryPageHeader::__set_num_values(const int32_t val) { this->num_values = val; @@ -2427,6 +2835,19 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { swap(a.__isset, b.__isset); } +bool DictionaryPageHeader::operator==(const DictionaryPageHeader & rhs) const +{ + if (!(num_values == rhs.num_values)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (__isset.is_sorted != rhs.__isset.is_sorted) + return false; + else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) + return false; + return true; +} + DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other135) noexcept { num_values = other135.num_values; encoding = other135.encoding; @@ -2466,6 +2887,15 @@ void DictionaryPageHeader::printTo(std::ostream& out) const { DataPageHeaderV2::~DataPageHeaderV2() noexcept { } +DataPageHeaderV2::DataPageHeaderV2() noexcept + : num_values(0), + num_nulls(0), + num_rows(0), + encoding(static_cast(0)), + definition_levels_byte_length(0), + repetition_levels_byte_length(0), + is_compressed(true) { +} void DataPageHeaderV2::__set_num_values(const int32_t val) { this->num_values = val; @@ -2520,6 +2950,31 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { swap(a.__isset, b.__isset); } +bool DataPageHeaderV2::operator==(const DataPageHeaderV2 & rhs) const +{ + if (!(num_values == rhs.num_values)) + return false; + if (!(num_nulls == rhs.num_nulls)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) + return false; + if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) + return false; + if (__isset.is_compressed != rhs.__isset.is_compressed) + return false; + else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + return true; +} + DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other140) { num_values = other140.num_values; num_nulls = other140.num_nulls; @@ -2584,6 +3039,8 @@ void DataPageHeaderV2::printTo(std::ostream& out) const { SplitBlockAlgorithm::~SplitBlockAlgorithm() noexcept { } +SplitBlockAlgorithm::SplitBlockAlgorithm() noexcept { +} std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) { obj.printTo(out); @@ -2597,6 +3054,11 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { (void) b; } +bool SplitBlockAlgorithm::operator==(const SplitBlockAlgorithm & /* rhs */) const +{ + return true; +} + SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other144) noexcept { (void) other144; } @@ -2621,6 +3083,8 @@ void SplitBlockAlgorithm::printTo(std::ostream& out) const { BloomFilterAlgorithm::~BloomFilterAlgorithm() noexcept { } +BloomFilterAlgorithm::BloomFilterAlgorithm() noexcept { +} void BloomFilterAlgorithm::__set_BLOCK(const SplitBlockAlgorithm& val) { this->BLOCK = val; @@ -2639,6 +3103,15 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { swap(a.__isset, b.__isset); } +bool BloomFilterAlgorithm::operator==(const BloomFilterAlgorithm & rhs) const +{ + if (__isset.BLOCK != rhs.__isset.BLOCK) + return false; + else if (__isset.BLOCK && !(BLOCK == rhs.BLOCK)) + return false; + return true; +} + BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other148) noexcept { BLOCK = other148.BLOCK; __isset = other148.__isset; @@ -2668,6 +3141,8 @@ void BloomFilterAlgorithm::printTo(std::ostream& out) const { XxHash::~XxHash() noexcept { } +XxHash::XxHash() noexcept { +} std::ostream& operator<<(std::ostream& out, const XxHash& obj) { obj.printTo(out); @@ -2681,6 +3156,11 @@ void swap(XxHash &a, XxHash &b) { (void) b; } +bool XxHash::operator==(const XxHash & /* rhs */) const +{ + return true; +} + XxHash::XxHash(const XxHash& other152) noexcept { (void) other152; } @@ -2705,6 +3185,8 @@ void XxHash::printTo(std::ostream& out) const { BloomFilterHash::~BloomFilterHash() noexcept { } +BloomFilterHash::BloomFilterHash() noexcept { +} void BloomFilterHash::__set_XXHASH(const XxHash& val) { this->XXHASH = val; @@ -2723,6 +3205,15 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) { swap(a.__isset, b.__isset); } +bool BloomFilterHash::operator==(const BloomFilterHash & rhs) const +{ + if (__isset.XXHASH != rhs.__isset.XXHASH) + return false; + else if (__isset.XXHASH && !(XXHASH == rhs.XXHASH)) + return false; + return true; +} + BloomFilterHash::BloomFilterHash(const BloomFilterHash& other156) noexcept { XXHASH = other156.XXHASH; __isset = other156.__isset; @@ -2752,6 +3243,8 @@ void BloomFilterHash::printTo(std::ostream& out) const { Uncompressed::~Uncompressed() noexcept { } +Uncompressed::Uncompressed() noexcept { +} std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) { obj.printTo(out); @@ -2765,6 +3258,11 @@ void swap(Uncompressed &a, Uncompressed &b) { (void) b; } +bool Uncompressed::operator==(const Uncompressed & /* rhs */) const +{ + return true; +} + Uncompressed::Uncompressed(const Uncompressed& other160) noexcept { (void) other160; } @@ -2789,6 +3287,8 @@ void Uncompressed::printTo(std::ostream& out) const { BloomFilterCompression::~BloomFilterCompression() noexcept { } +BloomFilterCompression::BloomFilterCompression() noexcept { +} void BloomFilterCompression::__set_UNCOMPRESSED(const Uncompressed& val) { this->UNCOMPRESSED = val; @@ -2807,6 +3307,15 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) { swap(a.__isset, b.__isset); } +bool BloomFilterCompression::operator==(const BloomFilterCompression & rhs) const +{ + if (__isset.UNCOMPRESSED != rhs.__isset.UNCOMPRESSED) + return false; + else if (__isset.UNCOMPRESSED && !(UNCOMPRESSED == rhs.UNCOMPRESSED)) + return false; + return true; +} + BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other164) noexcept { UNCOMPRESSED = other164.UNCOMPRESSED; __isset = other164.__isset; @@ -2836,6 +3345,9 @@ void BloomFilterCompression::printTo(std::ostream& out) const { BloomFilterHeader::~BloomFilterHeader() noexcept { } +BloomFilterHeader::BloomFilterHeader() noexcept + : numBytes(0) { +} void BloomFilterHeader::__set_numBytes(const int32_t val) { this->numBytes = val; @@ -2867,6 +3379,19 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) { swap(a.compression, b.compression); } +bool BloomFilterHeader::operator==(const BloomFilterHeader & rhs) const +{ + if (!(numBytes == rhs.numBytes)) + return false; + if (!(algorithm == rhs.algorithm)) + return false; + if (!(hash == rhs.hash)) + return false; + if (!(compression == rhs.compression)) + return false; + return true; +} + BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other168) noexcept { numBytes = other168.numBytes; algorithm = other168.algorithm; @@ -2907,6 +3432,12 @@ void BloomFilterHeader::printTo(std::ostream& out) const { PageHeader::~PageHeader() noexcept { } +PageHeader::PageHeader() noexcept + : type(static_cast(0)), + uncompressed_page_size(0), + compressed_page_size(0), + crc(0) { +} void PageHeader::__set_type(const PageType::type val) { this->type = val; @@ -2964,6 +3495,37 @@ void swap(PageHeader &a, PageHeader &b) { swap(a.__isset, b.__isset); } +bool PageHeader::operator==(const PageHeader & rhs) const +{ + if (!(type == rhs.type)) + return false; + if (!(uncompressed_page_size == rhs.uncompressed_page_size)) + return false; + if (!(compressed_page_size == rhs.compressed_page_size)) + return false; + if (__isset.crc != rhs.__isset.crc) + return false; + else if (__isset.crc && !(crc == rhs.crc)) + return false; + if (__isset.data_page_header != rhs.__isset.data_page_header) + return false; + else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) + return false; + if (__isset.index_page_header != rhs.__isset.index_page_header) + return false; + else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) + return false; + if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) + return false; + else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) + return false; + if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) + return false; + else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) + return false; + return true; +} + PageHeader::PageHeader(const PageHeader& other173) { type = other173.type; uncompressed_page_size = other173.uncompressed_page_size; @@ -3028,6 +3590,10 @@ void PageHeader::printTo(std::ostream& out) const { KeyValue::~KeyValue() noexcept { } +KeyValue::KeyValue() noexcept + : key(), + value() { +} void KeyValue::__set_key(const std::string& val) { this->key = val; @@ -3051,6 +3617,17 @@ void swap(KeyValue &a, KeyValue &b) { swap(a.__isset, b.__isset); } +bool KeyValue::operator==(const KeyValue & rhs) const +{ + if (!(key == rhs.key)) + return false; + if (__isset.value != rhs.__isset.value) + return false; + else if (__isset.value && !(value == rhs.value)) + return false; + return true; +} + KeyValue::KeyValue(const KeyValue& other177) { key = other177.key; value = other177.value; @@ -3085,6 +3662,11 @@ void KeyValue::printTo(std::ostream& out) const { SortingColumn::~SortingColumn() noexcept { } +SortingColumn::SortingColumn() noexcept + : column_idx(0), + descending(0), + nulls_first(0) { +} void SortingColumn::__set_column_idx(const int32_t val) { this->column_idx = val; @@ -3111,6 +3693,17 @@ void swap(SortingColumn &a, SortingColumn &b) { swap(a.nulls_first, b.nulls_first); } +bool SortingColumn::operator==(const SortingColumn & rhs) const +{ + if (!(column_idx == rhs.column_idx)) + return false; + if (!(descending == rhs.descending)) + return false; + if (!(nulls_first == rhs.nulls_first)) + return false; + return true; +} + SortingColumn::SortingColumn(const SortingColumn& other181) noexcept { column_idx = other181.column_idx; descending = other181.descending; @@ -3146,6 +3739,11 @@ void SortingColumn::printTo(std::ostream& out) const { PageEncodingStats::~PageEncodingStats() noexcept { } +PageEncodingStats::PageEncodingStats() noexcept + : page_type(static_cast(0)), + encoding(static_cast(0)), + count(0) { +} void PageEncodingStats::__set_page_type(const PageType::type val) { this->page_type = val; @@ -3172,6 +3770,17 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) { swap(a.count, b.count); } +bool PageEncodingStats::operator==(const PageEncodingStats & rhs) const +{ + if (!(page_type == rhs.page_type)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(count == rhs.count)) + return false; + return true; +} + PageEncodingStats::PageEncodingStats(const PageEncodingStats& other187) noexcept { page_type = other187.page_type; encoding = other187.encoding; @@ -3207,6 +3816,18 @@ void PageEncodingStats::printTo(std::ostream& out) const { ColumnMetaData::~ColumnMetaData() noexcept { } +ColumnMetaData::ColumnMetaData() noexcept + : type(static_cast(0)), + codec(static_cast(0)), + num_values(0), + total_uncompressed_size(0), + total_compressed_size(0), + data_page_offset(0), + index_page_offset(0), + dictionary_page_offset(0), + bloom_filter_offset(0), + bloom_filter_length(0) { +} void ColumnMetaData::__set_type(const Type::type val) { this->type = val; @@ -3279,6 +3900,11 @@ void ColumnMetaData::__set_size_statistics(const SizeStatistics& val) { this->size_statistics = val; __isset.size_statistics = true; } + +void ColumnMetaData::__set_geometry_stats(const GeometryStatistics& val) { + this->geometry_stats = val; +__isset.geometry_stats = true; +} std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) { obj.printTo(out); @@ -3304,9 +3930,67 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) { swap(a.bloom_filter_offset, b.bloom_filter_offset); swap(a.bloom_filter_length, b.bloom_filter_length); swap(a.size_statistics, b.size_statistics); + swap(a.geometry_stats, b.geometry_stats); swap(a.__isset, b.__isset); } +bool ColumnMetaData::operator==(const ColumnMetaData & rhs) const +{ + if (!(type == rhs.type)) + return false; + if (!(encodings == rhs.encodings)) + return false; + if (!(path_in_schema == rhs.path_in_schema)) + return false; + if (!(codec == rhs.codec)) + return false; + if (!(num_values == rhs.num_values)) + return false; + if (!(total_uncompressed_size == rhs.total_uncompressed_size)) + return false; + if (!(total_compressed_size == rhs.total_compressed_size)) + return false; + if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) + return false; + else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) + return false; + if (!(data_page_offset == rhs.data_page_offset)) + return false; + if (__isset.index_page_offset != rhs.__isset.index_page_offset) + return false; + else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) + return false; + if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) + return false; + else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + if (__isset.encoding_stats != rhs.__isset.encoding_stats) + return false; + else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) + return false; + if (__isset.bloom_filter_offset != rhs.__isset.bloom_filter_offset) + return false; + else if (__isset.bloom_filter_offset && !(bloom_filter_offset == rhs.bloom_filter_offset)) + return false; + if (__isset.bloom_filter_length != rhs.__isset.bloom_filter_length) + return false; + else if (__isset.bloom_filter_length && !(bloom_filter_length == rhs.bloom_filter_length)) + return false; + if (__isset.size_statistics != rhs.__isset.size_statistics) + return false; + else if (__isset.size_statistics && !(size_statistics == rhs.size_statistics)) + return false; + if (__isset.geometry_stats != rhs.__isset.geometry_stats) + return false; + else if (__isset.geometry_stats && !(geometry_stats == rhs.geometry_stats)) + return false; + return true; +} + ColumnMetaData::ColumnMetaData(const ColumnMetaData& other218) { type = other218.type; encodings = other218.encodings; @@ -3324,6 +4008,7 @@ ColumnMetaData::ColumnMetaData(const ColumnMetaData& other218) { bloom_filter_offset = other218.bloom_filter_offset; bloom_filter_length = other218.bloom_filter_length; size_statistics = other218.size_statistics; + geometry_stats = other218.geometry_stats; __isset = other218.__isset; } ColumnMetaData::ColumnMetaData(ColumnMetaData&& other219) noexcept { @@ -3343,6 +4028,7 @@ ColumnMetaData::ColumnMetaData(ColumnMetaData&& other219) noexcept { bloom_filter_offset = other219.bloom_filter_offset; bloom_filter_length = other219.bloom_filter_length; size_statistics = std::move(other219.size_statistics); + geometry_stats = std::move(other219.geometry_stats); __isset = other219.__isset; } ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other220) { @@ -3362,6 +4048,7 @@ ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other220) { bloom_filter_offset = other220.bloom_filter_offset; bloom_filter_length = other220.bloom_filter_length; size_statistics = other220.size_statistics; + geometry_stats = other220.geometry_stats; __isset = other220.__isset; return *this; } @@ -3382,6 +4069,7 @@ ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other221) noexcept { bloom_filter_offset = other221.bloom_filter_offset; bloom_filter_length = other221.bloom_filter_length; size_statistics = std::move(other221.size_statistics); + geometry_stats = std::move(other221.geometry_stats); __isset = other221.__isset; return *this; } @@ -3404,6 +4092,7 @@ void ColumnMetaData::printTo(std::ostream& out) const { out << ", " << "bloom_filter_offset="; (__isset.bloom_filter_offset ? (out << to_string(bloom_filter_offset)) : (out << "")); out << ", " << "bloom_filter_length="; (__isset.bloom_filter_length ? (out << to_string(bloom_filter_length)) : (out << "")); out << ", " << "size_statistics="; (__isset.size_statistics ? (out << to_string(size_statistics)) : (out << "")); + out << ", " << "geometry_stats="; (__isset.geometry_stats ? (out << to_string(geometry_stats)) : (out << "")); out << ")"; } @@ -3411,6 +4100,8 @@ void ColumnMetaData::printTo(std::ostream& out) const { EncryptionWithFooterKey::~EncryptionWithFooterKey() noexcept { } +EncryptionWithFooterKey::EncryptionWithFooterKey() noexcept { +} std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) { obj.printTo(out); @@ -3424,6 +4115,11 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { (void) b; } +bool EncryptionWithFooterKey::operator==(const EncryptionWithFooterKey & /* rhs */) const +{ + return true; +} + EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other222) noexcept { (void) other222; } @@ -3448,6 +4144,9 @@ void EncryptionWithFooterKey::printTo(std::ostream& out) const { EncryptionWithColumnKey::~EncryptionWithColumnKey() noexcept { } +EncryptionWithColumnKey::EncryptionWithColumnKey() noexcept + : key_metadata() { +} void EncryptionWithColumnKey::__set_path_in_schema(const std::vector & val) { this->path_in_schema = val; @@ -3471,6 +4170,17 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { swap(a.__isset, b.__isset); } +bool EncryptionWithColumnKey::operator==(const EncryptionWithColumnKey & rhs) const +{ + if (!(path_in_schema == rhs.path_in_schema)) + return false; + if (__isset.key_metadata != rhs.__isset.key_metadata) + return false; + else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) + return false; + return true; +} + EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other232) { path_in_schema = other232.path_in_schema; key_metadata = other232.key_metadata; @@ -3505,6 +4215,8 @@ void EncryptionWithColumnKey::printTo(std::ostream& out) const { ColumnCryptoMetaData::~ColumnCryptoMetaData() noexcept { } +ColumnCryptoMetaData::ColumnCryptoMetaData() noexcept { +} void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) { this->ENCRYPTION_WITH_FOOTER_KEY = val; @@ -3529,6 +4241,19 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { swap(a.__isset, b.__isset); } +bool ColumnCryptoMetaData::operator==(const ColumnCryptoMetaData & rhs) const +{ + if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) + return false; + else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) + return false; + if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) + return false; + else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) + return false; + return true; +} + ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other236) { ENCRYPTION_WITH_FOOTER_KEY = other236.ENCRYPTION_WITH_FOOTER_KEY; ENCRYPTION_WITH_COLUMN_KEY = other236.ENCRYPTION_WITH_COLUMN_KEY; @@ -3563,6 +4288,15 @@ void ColumnCryptoMetaData::printTo(std::ostream& out) const { ColumnChunk::~ColumnChunk() noexcept { } +ColumnChunk::ColumnChunk() noexcept + : file_path(), + file_offset(0LL), + offset_index_offset(0), + offset_index_length(0), + column_index_offset(0), + column_index_length(0), + encrypted_column_metadata() { +} void ColumnChunk::__set_file_path(const std::string& val) { this->file_path = val; @@ -3628,6 +4362,45 @@ void swap(ColumnChunk &a, ColumnChunk &b) { swap(a.__isset, b.__isset); } +bool ColumnChunk::operator==(const ColumnChunk & rhs) const +{ + if (__isset.file_path != rhs.__isset.file_path) + return false; + else if (__isset.file_path && !(file_path == rhs.file_path)) + return false; + if (!(file_offset == rhs.file_offset)) + return false; + if (__isset.meta_data != rhs.__isset.meta_data) + return false; + else if (__isset.meta_data && !(meta_data == rhs.meta_data)) + return false; + if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) + return false; + else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset)) + return false; + if (__isset.offset_index_length != rhs.__isset.offset_index_length) + return false; + else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length)) + return false; + if (__isset.column_index_offset != rhs.__isset.column_index_offset) + return false; + else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset)) + return false; + if (__isset.column_index_length != rhs.__isset.column_index_length) + return false; + else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length)) + return false; + if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) + return false; + else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) + return false; + if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata) + return false; + else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata)) + return false; + return true; +} + ColumnChunk::ColumnChunk(const ColumnChunk& other240) { file_path = other240.file_path; file_offset = other240.file_offset; @@ -3697,6 +4470,13 @@ void ColumnChunk::printTo(std::ostream& out) const { RowGroup::~RowGroup() noexcept { } +RowGroup::RowGroup() noexcept + : total_byte_size(0), + num_rows(0), + file_offset(0), + total_compressed_size(0), + ordinal(0) { +} void RowGroup::__set_columns(const std::vector & val) { this->columns = val; @@ -3748,6 +4528,33 @@ void swap(RowGroup &a, RowGroup &b) { swap(a.__isset, b.__isset); } +bool RowGroup::operator==(const RowGroup & rhs) const +{ + if (!(columns == rhs.columns)) + return false; + if (!(total_byte_size == rhs.total_byte_size)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (__isset.sorting_columns != rhs.__isset.sorting_columns) + return false; + else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) + return false; + if (__isset.file_offset != rhs.__isset.file_offset) + return false; + else if (__isset.file_offset && !(file_offset == rhs.file_offset)) + return false; + if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) + return false; + else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size)) + return false; + if (__isset.ordinal != rhs.__isset.ordinal) + return false; + else if (__isset.ordinal && !(ordinal == rhs.ordinal)) + return false; + return true; +} + RowGroup::RowGroup(const RowGroup& other256) { columns = other256.columns; total_byte_size = other256.total_byte_size; @@ -3807,6 +4614,8 @@ void RowGroup::printTo(std::ostream& out) const { TypeDefinedOrder::~TypeDefinedOrder() noexcept { } +TypeDefinedOrder::TypeDefinedOrder() noexcept { +} std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) { obj.printTo(out); @@ -3820,6 +4629,11 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { (void) b; } +bool TypeDefinedOrder::operator==(const TypeDefinedOrder & /* rhs */) const +{ + return true; +} + TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other260) noexcept { (void) other260; } @@ -3844,6 +4658,8 @@ void TypeDefinedOrder::printTo(std::ostream& out) const { ColumnOrder::~ColumnOrder() noexcept { } +ColumnOrder::ColumnOrder() noexcept { +} void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { this->TYPE_ORDER = val; @@ -3862,6 +4678,15 @@ void swap(ColumnOrder &a, ColumnOrder &b) { swap(a.__isset, b.__isset); } +bool ColumnOrder::operator==(const ColumnOrder & rhs) const +{ + if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) + return false; + else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) + return false; + return true; +} + ColumnOrder::ColumnOrder(const ColumnOrder& other264) noexcept { TYPE_ORDER = other264.TYPE_ORDER; __isset = other264.__isset; @@ -3891,6 +4716,11 @@ void ColumnOrder::printTo(std::ostream& out) const { PageLocation::~PageLocation() noexcept { } +PageLocation::PageLocation() noexcept + : offset(0), + compressed_page_size(0), + first_row_index(0) { +} void PageLocation::__set_offset(const int64_t val) { this->offset = val; @@ -3917,6 +4747,17 @@ void swap(PageLocation &a, PageLocation &b) { swap(a.first_row_index, b.first_row_index); } +bool PageLocation::operator==(const PageLocation & rhs) const +{ + if (!(offset == rhs.offset)) + return false; + if (!(compressed_page_size == rhs.compressed_page_size)) + return false; + if (!(first_row_index == rhs.first_row_index)) + return false; + return true; +} + PageLocation::PageLocation(const PageLocation& other268) noexcept { offset = other268.offset; compressed_page_size = other268.compressed_page_size; @@ -3952,6 +4793,8 @@ void PageLocation::printTo(std::ostream& out) const { OffsetIndex::~OffsetIndex() noexcept { } +OffsetIndex::OffsetIndex() noexcept { +} void OffsetIndex::__set_page_locations(const std::vector & val) { this->page_locations = val; @@ -3975,6 +4818,17 @@ void swap(OffsetIndex &a, OffsetIndex &b) { swap(a.__isset, b.__isset); } +bool OffsetIndex::operator==(const OffsetIndex & rhs) const +{ + if (!(page_locations == rhs.page_locations)) + return false; + if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) + return false; + else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) + return false; + return true; +} + OffsetIndex::OffsetIndex(const OffsetIndex& other284) { page_locations = other284.page_locations; unencoded_byte_array_data_bytes = other284.unencoded_byte_array_data_bytes; @@ -4009,6 +4863,9 @@ void OffsetIndex::printTo(std::ostream& out) const { ColumnIndex::~ColumnIndex() noexcept { } +ColumnIndex::ColumnIndex() noexcept + : boundary_order(static_cast(0)) { +} void ColumnIndex::__set_null_pages(const std::vector & val) { this->null_pages = val; @@ -4040,11 +4897,6 @@ void ColumnIndex::__set_definition_level_histograms(const std::vector & this->definition_level_histograms = val; __isset.definition_level_histograms = true; } - -void ColumnIndex::__set_geometry_stats(const std::vector & val) { - this->geometry_stats = val; -__isset.geometry_stats = true; -} std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) { obj.printTo(out); @@ -4061,54 +4913,74 @@ void swap(ColumnIndex &a, ColumnIndex &b) { swap(a.null_counts, b.null_counts); swap(a.repetition_level_histograms, b.repetition_level_histograms); swap(a.definition_level_histograms, b.definition_level_histograms); - swap(a.geometry_stats, b.geometry_stats); swap(a.__isset, b.__isset); } -ColumnIndex::ColumnIndex(const ColumnIndex& other331) { - null_pages = other331.null_pages; - min_values = other331.min_values; - max_values = other331.max_values; - boundary_order = other331.boundary_order; - null_counts = other331.null_counts; - repetition_level_histograms = other331.repetition_level_histograms; - definition_level_histograms = other331.definition_level_histograms; - geometry_stats = other331.geometry_stats; - __isset = other331.__isset; -} -ColumnIndex::ColumnIndex(ColumnIndex&& other332) noexcept { - null_pages = std::move(other332.null_pages); - min_values = std::move(other332.min_values); - max_values = std::move(other332.max_values); - boundary_order = other332.boundary_order; - null_counts = std::move(other332.null_counts); - repetition_level_histograms = std::move(other332.repetition_level_histograms); - definition_level_histograms = std::move(other332.definition_level_histograms); - geometry_stats = std::move(other332.geometry_stats); - __isset = other332.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other333) { - null_pages = other333.null_pages; - min_values = other333.min_values; - max_values = other333.max_values; - boundary_order = other333.boundary_order; - null_counts = other333.null_counts; - repetition_level_histograms = other333.repetition_level_histograms; - definition_level_histograms = other333.definition_level_histograms; - geometry_stats = other333.geometry_stats; - __isset = other333.__isset; +bool ColumnIndex::operator==(const ColumnIndex & rhs) const +{ + if (!(null_pages == rhs.null_pages)) + return false; + if (!(min_values == rhs.min_values)) + return false; + if (!(max_values == rhs.max_values)) + return false; + if (!(boundary_order == rhs.boundary_order)) + return false; + if (__isset.null_counts != rhs.__isset.null_counts) + return false; + else if (__isset.null_counts && !(null_counts == rhs.null_counts)) + return false; + if (__isset.repetition_level_histograms != rhs.__isset.repetition_level_histograms) + return false; + else if (__isset.repetition_level_histograms && !(repetition_level_histograms == rhs.repetition_level_histograms)) + return false; + if (__isset.definition_level_histograms != rhs.__isset.definition_level_histograms) + return false; + else if (__isset.definition_level_histograms && !(definition_level_histograms == rhs.definition_level_histograms)) + return false; + return true; +} + +ColumnIndex::ColumnIndex(const ColumnIndex& other325) { + null_pages = other325.null_pages; + min_values = other325.min_values; + max_values = other325.max_values; + boundary_order = other325.boundary_order; + null_counts = other325.null_counts; + repetition_level_histograms = other325.repetition_level_histograms; + definition_level_histograms = other325.definition_level_histograms; + __isset = other325.__isset; +} +ColumnIndex::ColumnIndex(ColumnIndex&& other326) noexcept { + null_pages = std::move(other326.null_pages); + min_values = std::move(other326.min_values); + max_values = std::move(other326.max_values); + boundary_order = other326.boundary_order; + null_counts = std::move(other326.null_counts); + repetition_level_histograms = std::move(other326.repetition_level_histograms); + definition_level_histograms = std::move(other326.definition_level_histograms); + __isset = other326.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other327) { + null_pages = other327.null_pages; + min_values = other327.min_values; + max_values = other327.max_values; + boundary_order = other327.boundary_order; + null_counts = other327.null_counts; + repetition_level_histograms = other327.repetition_level_histograms; + definition_level_histograms = other327.definition_level_histograms; + __isset = other327.__isset; return *this; } -ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other334) noexcept { - null_pages = std::move(other334.null_pages); - min_values = std::move(other334.min_values); - max_values = std::move(other334.max_values); - boundary_order = other334.boundary_order; - null_counts = std::move(other334.null_counts); - repetition_level_histograms = std::move(other334.repetition_level_histograms); - definition_level_histograms = std::move(other334.definition_level_histograms); - geometry_stats = std::move(other334.geometry_stats); - __isset = other334.__isset; +ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other328) noexcept { + null_pages = std::move(other328.null_pages); + min_values = std::move(other328.min_values); + max_values = std::move(other328.max_values); + boundary_order = other328.boundary_order; + null_counts = std::move(other328.null_counts); + repetition_level_histograms = std::move(other328.repetition_level_histograms); + definition_level_histograms = std::move(other328.definition_level_histograms); + __isset = other328.__isset; return *this; } void ColumnIndex::printTo(std::ostream& out) const { @@ -4121,7 +4993,6 @@ void ColumnIndex::printTo(std::ostream& out) const { out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); out << ", " << "repetition_level_histograms="; (__isset.repetition_level_histograms ? (out << to_string(repetition_level_histograms)) : (out << "")); out << ", " << "definition_level_histograms="; (__isset.definition_level_histograms ? (out << to_string(definition_level_histograms)) : (out << "")); - out << ", " << "geometry_stats="; (__isset.geometry_stats ? (out << to_string(geometry_stats)) : (out << "")); out << ")"; } @@ -4129,6 +5000,11 @@ void ColumnIndex::printTo(std::ostream& out) const { AesGcmV1::~AesGcmV1() noexcept { } +AesGcmV1::AesGcmV1() noexcept + : aad_prefix(), + aad_file_unique(), + supply_aad_prefix(0) { +} void AesGcmV1::__set_aad_prefix(const std::string& val) { this->aad_prefix = val; @@ -4159,30 +5035,47 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) { swap(a.__isset, b.__isset); } -AesGcmV1::AesGcmV1(const AesGcmV1& other335) { - aad_prefix = other335.aad_prefix; - aad_file_unique = other335.aad_file_unique; - supply_aad_prefix = other335.supply_aad_prefix; - __isset = other335.__isset; -} -AesGcmV1::AesGcmV1(AesGcmV1&& other336) noexcept { - aad_prefix = std::move(other336.aad_prefix); - aad_file_unique = std::move(other336.aad_file_unique); - supply_aad_prefix = other336.supply_aad_prefix; - __isset = other336.__isset; -} -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other337) { - aad_prefix = other337.aad_prefix; - aad_file_unique = other337.aad_file_unique; - supply_aad_prefix = other337.supply_aad_prefix; - __isset = other337.__isset; +bool AesGcmV1::operator==(const AesGcmV1 & rhs) const +{ + if (__isset.aad_prefix != rhs.__isset.aad_prefix) + return false; + else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) + return false; + if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) + return false; + else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) + return false; + if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) + return false; + else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) + return false; + return true; +} + +AesGcmV1::AesGcmV1(const AesGcmV1& other329) { + aad_prefix = other329.aad_prefix; + aad_file_unique = other329.aad_file_unique; + supply_aad_prefix = other329.supply_aad_prefix; + __isset = other329.__isset; +} +AesGcmV1::AesGcmV1(AesGcmV1&& other330) noexcept { + aad_prefix = std::move(other330.aad_prefix); + aad_file_unique = std::move(other330.aad_file_unique); + supply_aad_prefix = other330.supply_aad_prefix; + __isset = other330.__isset; +} +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other331) { + aad_prefix = other331.aad_prefix; + aad_file_unique = other331.aad_file_unique; + supply_aad_prefix = other331.supply_aad_prefix; + __isset = other331.__isset; return *this; } -AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other338) noexcept { - aad_prefix = std::move(other338.aad_prefix); - aad_file_unique = std::move(other338.aad_file_unique); - supply_aad_prefix = other338.supply_aad_prefix; - __isset = other338.__isset; +AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other332) noexcept { + aad_prefix = std::move(other332.aad_prefix); + aad_file_unique = std::move(other332.aad_file_unique); + supply_aad_prefix = other332.supply_aad_prefix; + __isset = other332.__isset; return *this; } void AesGcmV1::printTo(std::ostream& out) const { @@ -4198,6 +5091,11 @@ void AesGcmV1::printTo(std::ostream& out) const { AesGcmCtrV1::~AesGcmCtrV1() noexcept { } +AesGcmCtrV1::AesGcmCtrV1() noexcept + : aad_prefix(), + aad_file_unique(), + supply_aad_prefix(0) { +} void AesGcmCtrV1::__set_aad_prefix(const std::string& val) { this->aad_prefix = val; @@ -4228,30 +5126,47 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { swap(a.__isset, b.__isset); } -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other339) { - aad_prefix = other339.aad_prefix; - aad_file_unique = other339.aad_file_unique; - supply_aad_prefix = other339.supply_aad_prefix; - __isset = other339.__isset; +bool AesGcmCtrV1::operator==(const AesGcmCtrV1 & rhs) const +{ + if (__isset.aad_prefix != rhs.__isset.aad_prefix) + return false; + else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) + return false; + if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) + return false; + else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) + return false; + if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) + return false; + else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) + return false; + return true; +} + +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other333) { + aad_prefix = other333.aad_prefix; + aad_file_unique = other333.aad_file_unique; + supply_aad_prefix = other333.supply_aad_prefix; + __isset = other333.__isset; } -AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other340) noexcept { - aad_prefix = std::move(other340.aad_prefix); - aad_file_unique = std::move(other340.aad_file_unique); - supply_aad_prefix = other340.supply_aad_prefix; - __isset = other340.__isset; +AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other334) noexcept { + aad_prefix = std::move(other334.aad_prefix); + aad_file_unique = std::move(other334.aad_file_unique); + supply_aad_prefix = other334.supply_aad_prefix; + __isset = other334.__isset; } -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other341) { - aad_prefix = other341.aad_prefix; - aad_file_unique = other341.aad_file_unique; - supply_aad_prefix = other341.supply_aad_prefix; - __isset = other341.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other335) { + aad_prefix = other335.aad_prefix; + aad_file_unique = other335.aad_file_unique; + supply_aad_prefix = other335.supply_aad_prefix; + __isset = other335.__isset; return *this; } -AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other342) noexcept { - aad_prefix = std::move(other342.aad_prefix); - aad_file_unique = std::move(other342.aad_file_unique); - supply_aad_prefix = other342.supply_aad_prefix; - __isset = other342.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other336) noexcept { + aad_prefix = std::move(other336.aad_prefix); + aad_file_unique = std::move(other336.aad_file_unique); + supply_aad_prefix = other336.supply_aad_prefix; + __isset = other336.__isset; return *this; } void AesGcmCtrV1::printTo(std::ostream& out) const { @@ -4267,6 +5182,8 @@ void AesGcmCtrV1::printTo(std::ostream& out) const { EncryptionAlgorithm::~EncryptionAlgorithm() noexcept { } +EncryptionAlgorithm::EncryptionAlgorithm() noexcept { +} void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { this->AES_GCM_V1 = val; @@ -4291,26 +5208,39 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { swap(a.__isset, b.__isset); } -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other343) { - AES_GCM_V1 = other343.AES_GCM_V1; - AES_GCM_CTR_V1 = other343.AES_GCM_CTR_V1; - __isset = other343.__isset; +bool EncryptionAlgorithm::operator==(const EncryptionAlgorithm & rhs) const +{ + if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) + return false; + else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) + return false; + if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) + return false; + else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) + return false; + return true; +} + +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other337) { + AES_GCM_V1 = other337.AES_GCM_V1; + AES_GCM_CTR_V1 = other337.AES_GCM_CTR_V1; + __isset = other337.__isset; } -EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other344) noexcept { - AES_GCM_V1 = std::move(other344.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other344.AES_GCM_CTR_V1); - __isset = other344.__isset; +EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other338) noexcept { + AES_GCM_V1 = std::move(other338.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other338.AES_GCM_CTR_V1); + __isset = other338.__isset; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other345) { - AES_GCM_V1 = other345.AES_GCM_V1; - AES_GCM_CTR_V1 = other345.AES_GCM_CTR_V1; - __isset = other345.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other339) { + AES_GCM_V1 = other339.AES_GCM_V1; + AES_GCM_CTR_V1 = other339.AES_GCM_CTR_V1; + __isset = other339.__isset; return *this; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other346) noexcept { - AES_GCM_V1 = std::move(other346.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other346.AES_GCM_CTR_V1); - __isset = other346.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other340) noexcept { + AES_GCM_V1 = std::move(other340.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other340.AES_GCM_CTR_V1); + __isset = other340.__isset; return *this; } void EncryptionAlgorithm::printTo(std::ostream& out) const { @@ -4325,6 +5255,12 @@ void EncryptionAlgorithm::printTo(std::ostream& out) const { FileMetaData::~FileMetaData() noexcept { } +FileMetaData::FileMetaData() noexcept + : version(0), + num_rows(0), + created_by(), + footer_signing_key_metadata() { +} void FileMetaData::__set_version(const int32_t val) { this->version = val; @@ -4387,54 +5323,87 @@ void swap(FileMetaData &a, FileMetaData &b) { swap(a.__isset, b.__isset); } -FileMetaData::FileMetaData(const FileMetaData& other371) { - version = other371.version; - schema = other371.schema; - num_rows = other371.num_rows; - row_groups = other371.row_groups; - key_value_metadata = other371.key_value_metadata; - created_by = other371.created_by; - column_orders = other371.column_orders; - encryption_algorithm = other371.encryption_algorithm; - footer_signing_key_metadata = other371.footer_signing_key_metadata; - __isset = other371.__isset; -} -FileMetaData::FileMetaData(FileMetaData&& other372) noexcept { - version = other372.version; - schema = std::move(other372.schema); - num_rows = other372.num_rows; - row_groups = std::move(other372.row_groups); - key_value_metadata = std::move(other372.key_value_metadata); - created_by = std::move(other372.created_by); - column_orders = std::move(other372.column_orders); - encryption_algorithm = std::move(other372.encryption_algorithm); - footer_signing_key_metadata = std::move(other372.footer_signing_key_metadata); - __isset = other372.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other373) { - version = other373.version; - schema = other373.schema; - num_rows = other373.num_rows; - row_groups = other373.row_groups; - key_value_metadata = other373.key_value_metadata; - created_by = other373.created_by; - column_orders = other373.column_orders; - encryption_algorithm = other373.encryption_algorithm; - footer_signing_key_metadata = other373.footer_signing_key_metadata; - __isset = other373.__isset; +bool FileMetaData::operator==(const FileMetaData & rhs) const +{ + if (!(version == rhs.version)) + return false; + if (!(schema == rhs.schema)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (!(row_groups == rhs.row_groups)) + return false; + if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) + return false; + else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) + return false; + if (__isset.created_by != rhs.__isset.created_by) + return false; + else if (__isset.created_by && !(created_by == rhs.created_by)) + return false; + if (__isset.column_orders != rhs.__isset.column_orders) + return false; + else if (__isset.column_orders && !(column_orders == rhs.column_orders)) + return false; + if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) + return false; + else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm)) + return false; + if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata) + return false; + else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) + return false; + return true; +} + +FileMetaData::FileMetaData(const FileMetaData& other365) { + version = other365.version; + schema = other365.schema; + num_rows = other365.num_rows; + row_groups = other365.row_groups; + key_value_metadata = other365.key_value_metadata; + created_by = other365.created_by; + column_orders = other365.column_orders; + encryption_algorithm = other365.encryption_algorithm; + footer_signing_key_metadata = other365.footer_signing_key_metadata; + __isset = other365.__isset; +} +FileMetaData::FileMetaData(FileMetaData&& other366) noexcept { + version = other366.version; + schema = std::move(other366.schema); + num_rows = other366.num_rows; + row_groups = std::move(other366.row_groups); + key_value_metadata = std::move(other366.key_value_metadata); + created_by = std::move(other366.created_by); + column_orders = std::move(other366.column_orders); + encryption_algorithm = std::move(other366.encryption_algorithm); + footer_signing_key_metadata = std::move(other366.footer_signing_key_metadata); + __isset = other366.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other367) { + version = other367.version; + schema = other367.schema; + num_rows = other367.num_rows; + row_groups = other367.row_groups; + key_value_metadata = other367.key_value_metadata; + created_by = other367.created_by; + column_orders = other367.column_orders; + encryption_algorithm = other367.encryption_algorithm; + footer_signing_key_metadata = other367.footer_signing_key_metadata; + __isset = other367.__isset; return *this; } -FileMetaData& FileMetaData::operator=(FileMetaData&& other374) noexcept { - version = other374.version; - schema = std::move(other374.schema); - num_rows = other374.num_rows; - row_groups = std::move(other374.row_groups); - key_value_metadata = std::move(other374.key_value_metadata); - created_by = std::move(other374.created_by); - column_orders = std::move(other374.column_orders); - encryption_algorithm = std::move(other374.encryption_algorithm); - footer_signing_key_metadata = std::move(other374.footer_signing_key_metadata); - __isset = other374.__isset; +FileMetaData& FileMetaData::operator=(FileMetaData&& other368) noexcept { + version = other368.version; + schema = std::move(other368.schema); + num_rows = other368.num_rows; + row_groups = std::move(other368.row_groups); + key_value_metadata = std::move(other368.key_value_metadata); + created_by = std::move(other368.created_by); + column_orders = std::move(other368.column_orders); + encryption_algorithm = std::move(other368.encryption_algorithm); + footer_signing_key_metadata = std::move(other368.footer_signing_key_metadata); + __isset = other368.__isset; return *this; } void FileMetaData::printTo(std::ostream& out) const { @@ -4456,6 +5425,9 @@ void FileMetaData::printTo(std::ostream& out) const { FileCryptoMetaData::~FileCryptoMetaData() noexcept { } +FileCryptoMetaData::FileCryptoMetaData() noexcept + : key_metadata() { +} void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { this->encryption_algorithm = val; @@ -4479,26 +5451,37 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { swap(a.__isset, b.__isset); } -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other375) { - encryption_algorithm = other375.encryption_algorithm; - key_metadata = other375.key_metadata; - __isset = other375.__isset; -} -FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other376) noexcept { - encryption_algorithm = std::move(other376.encryption_algorithm); - key_metadata = std::move(other376.key_metadata); - __isset = other376.__isset; -} -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other377) { - encryption_algorithm = other377.encryption_algorithm; - key_metadata = other377.key_metadata; - __isset = other377.__isset; +bool FileCryptoMetaData::operator==(const FileCryptoMetaData & rhs) const +{ + if (!(encryption_algorithm == rhs.encryption_algorithm)) + return false; + if (__isset.key_metadata != rhs.__isset.key_metadata) + return false; + else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) + return false; + return true; +} + +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other369) { + encryption_algorithm = other369.encryption_algorithm; + key_metadata = other369.key_metadata; + __isset = other369.__isset; +} +FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other370) noexcept { + encryption_algorithm = std::move(other370.encryption_algorithm); + key_metadata = std::move(other370.key_metadata); + __isset = other370.__isset; +} +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other371) { + encryption_algorithm = other371.encryption_algorithm; + key_metadata = other371.key_metadata; + __isset = other371.__isset; return *this; } -FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other378) noexcept { - encryption_algorithm = std::move(other378.encryption_algorithm); - key_metadata = std::move(other378.key_metadata); - __isset = other378.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other372) noexcept { + encryption_algorithm = std::move(other372.encryption_algorithm); + key_metadata = std::move(other372.key_metadata); + __isset = other372.__isset; return *this; } void FileCryptoMetaData::printTo(std::ostream& out) const { diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index fce29472addb8..2e03948a96b45 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.20.0) + * Autogenerated by Thrift Compiler (0.21.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -73,14 +73,14 @@ struct ConvertedType { */ LIST = 3, /** - * an enum is converted into a binary field + * an enum is converted into a BYTE_ARRAY field */ ENUM = 4, /** * A decimal value. * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. * @@ -161,7 +161,7 @@ struct ConvertedType { /** * An embedded BSON document * - * A BSON document embedded within a single BINARY column. + * A BSON document embedded within a single BYTE_ARRAY column. */ BSON = 20, /** @@ -192,11 +192,11 @@ std::string to_string(const ConvertedType::type& val); struct FieldRepetitionType { enum type { /** - * This field is required (can not be null) and each record has exactly 1 value. + * This field is required (can not be null) and each row has exactly 1 value. */ REQUIRED = 0, /** - * The field is optional (can be null) and each record has 0 or 1 values. + * The field is optional (can be null) and each row has 0 or 1 values. */ OPTIONAL = 1, /** @@ -213,7 +213,7 @@ std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val std::string to_string(const FieldRepetitionType::type& val); /** - * Physical type and encoding for the geometry type. + * Physical type and encoding for the geometry type */ struct GeometryEncoding { enum type { @@ -221,24 +221,6 @@ struct GeometryEncoding { * Allowed for physical type: BYTE_ARRAY. * * Well-known binary (WKB) representations of geometries. - * - * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] - * supporting XY, XYZ, XYM, XYZM and the standard geometry types - * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, - * and GeometryCollection). Coordinate order is always (x, y) where x is - * easting or longitude and y is northing or latitude. This ordering explicitly - * overrides the axis order as specified in the CRS following the GeoPackage - * specification [5]. - * - * This is the preferred encoding for maximum portability. It also supports - * GeometryStatistics to be set in the column chunk and page index. - * - * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 - * [3] https://portal.ogc.org/files/?artifact_id=18241 - * [4] https://www.iso.org/standard/60343.html - * [5] https://www.geopackage.org/spec130/#gpb_spec */ WKB = 0 }; @@ -251,16 +233,7 @@ std::ostream& operator<<(std::ostream& out, const GeometryEncoding::type& val); std::string to_string(const GeometryEncoding::type& val); /** - * Interpretation for edges of elements of a GEOMETRY logical type. In other - * words, whether a point between two vertices should be interpolated in - * its XY dimensions as if it were a Cartesian line connecting the two - * vertices (planar) or the shortest spherical arc between the longitude - * and latitude represented by the two vertices (spherical). This value - * applies to all non-point geometry objects and is independent of the - * coordinate reference system. - * - * Because most systems currently assume planar edges and do not support - * spherical edges, planar should be used as the default value. + * Interpretation for edges of elements of a GEOMETRY type */ struct Edges { enum type { @@ -545,9 +518,7 @@ class SizeStatistics { SizeStatistics(SizeStatistics&&) noexcept; SizeStatistics& operator=(const SizeStatistics&); SizeStatistics& operator=(SizeStatistics&&) noexcept; - SizeStatistics() noexcept - : unencoded_byte_array_data_bytes(0) { - } + SizeStatistics() noexcept; virtual ~SizeStatistics() noexcept; /** @@ -596,22 +567,7 @@ class SizeStatistics { void __set_definition_level_histogram(const std::vector & val); - bool operator == (const SizeStatistics & rhs) const - { - if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) - return false; - else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) - return false; - if (__isset.repetition_level_histogram != rhs.__isset.repetition_level_histogram) - return false; - else if (__isset.repetition_level_histogram && !(repetition_level_histogram == rhs.repetition_level_histogram)) - return false; - if (__isset.definition_level_histogram != rhs.__isset.definition_level_histogram) - return false; - else if (__isset.definition_level_histogram && !(definition_level_histogram == rhs.definition_level_histogram)) - return false; - return true; - } + bool operator == (const SizeStatistics & rhs) const; bool operator != (const SizeStatistics &rhs) const { return !(*this == rhs); } @@ -640,10 +596,7 @@ typedef struct _BoundingBox__isset { /** * Bounding box of geometries in the representation of min/max value pair of - * coordinates from each axis when Edges is planar. Values of Z and M are omitted - * for 2D geometries. When Edges is spherical, the bounding box is in the form of - * [westmost, eastmost, southmost, northmost], with necessary min/max values for - * Z and M if needed. + * coordinates from each axis. */ class BoundingBox { public: @@ -652,37 +605,40 @@ class BoundingBox { BoundingBox(BoundingBox&&) noexcept; BoundingBox& operator=(const BoundingBox&) noexcept; BoundingBox& operator=(BoundingBox&&) noexcept; - BoundingBox() noexcept - : xmin(0), - xmax(0), - ymin(0), - ymax(0), - zmin(0), - zmax(0), - mmin(0), - mmax(0) { - } + BoundingBox() noexcept; virtual ~BoundingBox() noexcept; /** - * Westmost value if edges = spherical * + * Min X value when edges = PLANAR, westmost value if edges = SPHERICAL */ double xmin; /** - * Eastmost value if edges = spherical * + * Max X value when edges = PLANAR, eastmost value if edges = SPHERICAL */ double xmax; /** - * Southmost value if edges = spherical * + * Min Y value when edges = PLANAR, southmost value if edges = SPHERICAL */ double ymin; /** - * Northmost value if edges = spherical * + * Max Y value when edges = PLANAR, northmost value if edges = SPHERICAL */ double ymax; + /** + * Min Z value if the axis exists + */ double zmin; + /** + * Max Z value if the axis exists + */ double zmax; + /** + * Min M value if the axis exists + */ double mmin; + /** + * Max M value if the axis exists + */ double mmax; _BoundingBox__isset __isset; @@ -703,34 +659,7 @@ class BoundingBox { void __set_mmax(const double val); - bool operator == (const BoundingBox & rhs) const - { - if (!(xmin == rhs.xmin)) - return false; - if (!(xmax == rhs.xmax)) - return false; - if (!(ymin == rhs.ymin)) - return false; - if (!(ymax == rhs.ymax)) - return false; - if (__isset.zmin != rhs.__isset.zmin) - return false; - else if (__isset.zmin && !(zmin == rhs.zmin)) - return false; - if (__isset.zmax != rhs.__isset.zmax) - return false; - else if (__isset.zmax && !(zmax == rhs.zmax)) - return false; - if (__isset.mmin != rhs.__isset.mmin) - return false; - else if (__isset.mmin && !(mmin == rhs.mmin)) - return false; - if (__isset.mmax != rhs.__isset.mmax) - return false; - else if (__isset.mmax && !(mmax == rhs.mmax)) - return false; - return true; - } + bool operator == (const BoundingBox & rhs) const; bool operator != (const BoundingBox &rhs) const { return !(*this == rhs); } @@ -765,8 +694,7 @@ class GeometryStatistics { GeometryStatistics(GeometryStatistics&&) noexcept; GeometryStatistics& operator=(const GeometryStatistics&); GeometryStatistics& operator=(GeometryStatistics&&) noexcept; - GeometryStatistics() noexcept { - } + GeometryStatistics() noexcept; virtual ~GeometryStatistics() noexcept; /** @@ -774,31 +702,7 @@ class GeometryStatistics { */ BoundingBox bbox; /** - * The geometry types of all geometries, or an empty array if they are not - * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] - * except that values in the list are WKB (ISO variant) integer codes [2]. Table - * below shows the most common geometry types and their codes: - * - * | Type | XY | XYZ | XYM | XYZM | - * | :----------------- | :--- | :--- | :--- | :--: | - * | Point | 0001 | 1001 | 2001 | 3001 | - * | LineString | 0002 | 1002 | 2002 | 3002 | - * | Polygon | 0003 | 1003 | 2003 | 3003 | - * | MultiPoint | 0004 | 1004 | 2004 | 3004 | - * | MultiLineString | 0005 | 1005 | 2005 | 3005 | - * | MultiPolygon | 0006 | 1006 | 2006 | 3006 | - * | GeometryCollection | 0007 | 1007 | 2007 | 3007 | - * - * In addition, the following rules are used: - * - A list of multiple values indicates that multiple geometry types are - * present (e.g. `[0003, 0006]`). - * - An empty array explicitly signals that the geometry types are not known. - * - The geometry types in the list must be unique (e.g. `[0001, 0001]` - * is not valid). - * - * Please refer to links below for more detail: - * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L159 + * Geometry type codes of all geometries, or an empty list if not known */ std::vector geometry_types; @@ -808,18 +712,7 @@ class GeometryStatistics { void __set_geometry_types(const std::vector & val); - bool operator == (const GeometryStatistics & rhs) const - { - if (__isset.bbox != rhs.__isset.bbox) - return false; - else if (__isset.bbox && !(bbox == rhs.bbox)) - return false; - if (__isset.geometry_types != rhs.__isset.geometry_types) - return false; - else if (__isset.geometry_types && !(geometry_types == rhs.geometry_types)) - return false; - return true; - } + bool operator == (const GeometryStatistics & rhs) const; bool operator != (const GeometryStatistics &rhs) const { return !(*this == rhs); } @@ -839,7 +732,7 @@ void swap(GeometryStatistics &a, GeometryStatistics &b); std::ostream& operator<<(std::ostream& out, const GeometryStatistics& obj); typedef struct _Statistics__isset { - _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false), is_max_value_exact(false), is_min_value_exact(false), geometry_stats(false) {} + _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false), is_max_value_exact(false), is_min_value_exact(false) {} bool max :1; bool min :1; bool null_count :1; @@ -848,7 +741,6 @@ typedef struct _Statistics__isset { bool min_value :1; bool is_max_value_exact :1; bool is_min_value_exact :1; - bool geometry_stats :1; } _Statistics__isset; /** @@ -862,16 +754,7 @@ class Statistics { Statistics(Statistics&&) noexcept; Statistics& operator=(const Statistics&); Statistics& operator=(Statistics&&) noexcept; - Statistics() noexcept - : max(), - min(), - null_count(0), - distinct_count(0), - max_value(), - min_value(), - is_max_value_exact(0), - is_min_value_exact(0) { - } + Statistics() noexcept; virtual ~Statistics() noexcept; /** @@ -890,7 +773,12 @@ class Statistics { std::string max; std::string min; /** - * count of null value in the column + * Count of null values in the column. + * + * Writers SHOULD always write this field even if it is zero (i.e. no null value) + * or the column is not nullable. + * Readers MUST distinguish between null_count not being present and null_count == 0. + * If null_count is not present, readers MUST NOT assume null_count == 0. */ int64_t null_count; /** @@ -919,10 +807,6 @@ class Statistics { * If true, min_value is the actual minimum value for a column */ bool is_min_value_exact; - /** - * statistics specific to geometry logical type - */ - GeometryStatistics geometry_stats; _Statistics__isset __isset; @@ -942,48 +826,7 @@ class Statistics { void __set_is_min_value_exact(const bool val); - void __set_geometry_stats(const GeometryStatistics& val); - - bool operator == (const Statistics & rhs) const - { - if (__isset.max != rhs.__isset.max) - return false; - else if (__isset.max && !(max == rhs.max)) - return false; - if (__isset.min != rhs.__isset.min) - return false; - else if (__isset.min && !(min == rhs.min)) - return false; - if (__isset.null_count != rhs.__isset.null_count) - return false; - else if (__isset.null_count && !(null_count == rhs.null_count)) - return false; - if (__isset.distinct_count != rhs.__isset.distinct_count) - return false; - else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) - return false; - if (__isset.max_value != rhs.__isset.max_value) - return false; - else if (__isset.max_value && !(max_value == rhs.max_value)) - return false; - if (__isset.min_value != rhs.__isset.min_value) - return false; - else if (__isset.min_value && !(min_value == rhs.min_value)) - return false; - if (__isset.is_max_value_exact != rhs.__isset.is_max_value_exact) - return false; - else if (__isset.is_max_value_exact && !(is_max_value_exact == rhs.is_max_value_exact)) - return false; - if (__isset.is_min_value_exact != rhs.__isset.is_min_value_exact) - return false; - else if (__isset.is_min_value_exact && !(is_min_value_exact == rhs.is_min_value_exact)) - return false; - if (__isset.geometry_stats != rhs.__isset.geometry_stats) - return false; - else if (__isset.geometry_stats && !(geometry_stats == rhs.geometry_stats)) - return false; - return true; - } + bool operator == (const Statistics & rhs) const; bool operator != (const Statistics &rhs) const { return !(*this == rhs); } @@ -1013,15 +856,11 @@ class StringType { StringType(StringType&&) noexcept; StringType& operator=(const StringType&) noexcept; StringType& operator=(StringType&&) noexcept; - StringType() noexcept { - } + StringType() noexcept; virtual ~StringType() noexcept; - bool operator == (const StringType & /* rhs */) const - { - return true; - } + bool operator == (const StringType & /* rhs */) const; bool operator != (const StringType &rhs) const { return !(*this == rhs); } @@ -1048,15 +887,11 @@ class UUIDType { UUIDType(UUIDType&&) noexcept; UUIDType& operator=(const UUIDType&) noexcept; UUIDType& operator=(UUIDType&&) noexcept; - UUIDType() noexcept { - } + UUIDType() noexcept; virtual ~UUIDType() noexcept; - bool operator == (const UUIDType & /* rhs */) const - { - return true; - } + bool operator == (const UUIDType & /* rhs */) const; bool operator != (const UUIDType &rhs) const { return !(*this == rhs); } @@ -1083,15 +918,11 @@ class MapType { MapType(MapType&&) noexcept; MapType& operator=(const MapType&) noexcept; MapType& operator=(MapType&&) noexcept; - MapType() noexcept { - } + MapType() noexcept; virtual ~MapType() noexcept; - bool operator == (const MapType & /* rhs */) const - { - return true; - } + bool operator == (const MapType & /* rhs */) const; bool operator != (const MapType &rhs) const { return !(*this == rhs); } @@ -1118,15 +949,11 @@ class ListType { ListType(ListType&&) noexcept; ListType& operator=(const ListType&) noexcept; ListType& operator=(ListType&&) noexcept; - ListType() noexcept { - } + ListType() noexcept; virtual ~ListType() noexcept; - bool operator == (const ListType & /* rhs */) const - { - return true; - } + bool operator == (const ListType & /* rhs */) const; bool operator != (const ListType &rhs) const { return !(*this == rhs); } @@ -1153,15 +980,11 @@ class EnumType { EnumType(EnumType&&) noexcept; EnumType& operator=(const EnumType&) noexcept; EnumType& operator=(EnumType&&) noexcept; - EnumType() noexcept { - } + EnumType() noexcept; virtual ~EnumType() noexcept; - bool operator == (const EnumType & /* rhs */) const - { - return true; - } + bool operator == (const EnumType & /* rhs */) const; bool operator != (const EnumType &rhs) const { return !(*this == rhs); } @@ -1188,15 +1011,11 @@ class DateType { DateType(DateType&&) noexcept; DateType& operator=(const DateType&) noexcept; DateType& operator=(DateType&&) noexcept; - DateType() noexcept { - } + DateType() noexcept; virtual ~DateType() noexcept; - bool operator == (const DateType & /* rhs */) const - { - return true; - } + bool operator == (const DateType & /* rhs */) const; bool operator != (const DateType &rhs) const { return !(*this == rhs); } @@ -1223,15 +1042,11 @@ class Float16Type { Float16Type(Float16Type&&) noexcept; Float16Type& operator=(const Float16Type&) noexcept; Float16Type& operator=(Float16Type&&) noexcept; - Float16Type() noexcept { - } + Float16Type() noexcept; virtual ~Float16Type() noexcept; - bool operator == (const Float16Type & /* rhs */) const - { - return true; - } + bool operator == (const Float16Type & /* rhs */) const; bool operator != (const Float16Type &rhs) const { return !(*this == rhs); } @@ -1265,15 +1080,11 @@ class NullType { NullType(NullType&&) noexcept; NullType& operator=(const NullType&) noexcept; NullType& operator=(NullType&&) noexcept; - NullType() noexcept { - } + NullType() noexcept; virtual ~NullType() noexcept; - bool operator == (const NullType & /* rhs */) const - { - return true; - } + bool operator == (const NullType & /* rhs */) const; bool operator != (const NullType &rhs) const { return !(*this == rhs); } @@ -1302,7 +1113,7 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj); * To maintain forward-compatibility in v1, implementations using this logical * type must also set scale and precision on the annotated SchemaElement. * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY + * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ class DecimalType { public: @@ -1311,10 +1122,7 @@ class DecimalType { DecimalType(DecimalType&&) noexcept; DecimalType& operator=(const DecimalType&) noexcept; DecimalType& operator=(DecimalType&&) noexcept; - DecimalType() noexcept - : scale(0), - precision(0) { - } + DecimalType() noexcept; virtual ~DecimalType() noexcept; int32_t scale; @@ -1324,14 +1132,7 @@ class DecimalType { void __set_precision(const int32_t val); - bool operator == (const DecimalType & rhs) const - { - if (!(scale == rhs.scale)) - return false; - if (!(precision == rhs.precision)) - return false; - return true; - } + bool operator == (const DecimalType & rhs) const; bool operator != (const DecimalType &rhs) const { return !(*this == rhs); } @@ -1361,15 +1162,11 @@ class MilliSeconds { MilliSeconds(MilliSeconds&&) noexcept; MilliSeconds& operator=(const MilliSeconds&) noexcept; MilliSeconds& operator=(MilliSeconds&&) noexcept; - MilliSeconds() noexcept { - } + MilliSeconds() noexcept; virtual ~MilliSeconds() noexcept; - bool operator == (const MilliSeconds & /* rhs */) const - { - return true; - } + bool operator == (const MilliSeconds & /* rhs */) const; bool operator != (const MilliSeconds &rhs) const { return !(*this == rhs); } @@ -1396,15 +1193,11 @@ class MicroSeconds { MicroSeconds(MicroSeconds&&) noexcept; MicroSeconds& operator=(const MicroSeconds&) noexcept; MicroSeconds& operator=(MicroSeconds&&) noexcept; - MicroSeconds() noexcept { - } + MicroSeconds() noexcept; virtual ~MicroSeconds() noexcept; - bool operator == (const MicroSeconds & /* rhs */) const - { - return true; - } + bool operator == (const MicroSeconds & /* rhs */) const; bool operator != (const MicroSeconds &rhs) const { return !(*this == rhs); } @@ -1431,15 +1224,11 @@ class NanoSeconds { NanoSeconds(NanoSeconds&&) noexcept; NanoSeconds& operator=(const NanoSeconds&) noexcept; NanoSeconds& operator=(NanoSeconds&&) noexcept; - NanoSeconds() noexcept { - } + NanoSeconds() noexcept; virtual ~NanoSeconds() noexcept; - bool operator == (const NanoSeconds & /* rhs */) const - { - return true; - } + bool operator == (const NanoSeconds & /* rhs */) const; bool operator != (const NanoSeconds &rhs) const { return !(*this == rhs); } @@ -1472,8 +1261,7 @@ class TimeUnit { TimeUnit(TimeUnit&&) noexcept; TimeUnit& operator=(const TimeUnit&) noexcept; TimeUnit& operator=(TimeUnit&&) noexcept; - TimeUnit() noexcept { - } + TimeUnit() noexcept; virtual ~TimeUnit() noexcept; MilliSeconds MILLIS; @@ -1488,22 +1276,7 @@ class TimeUnit { void __set_NANOS(const NanoSeconds& val); - bool operator == (const TimeUnit & rhs) const - { - if (__isset.MILLIS != rhs.__isset.MILLIS) - return false; - else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) - return false; - if (__isset.MICROS != rhs.__isset.MICROS) - return false; - else if (__isset.MICROS && !(MICROS == rhs.MICROS)) - return false; - if (__isset.NANOS != rhs.__isset.NANOS) - return false; - else if (__isset.NANOS && !(NANOS == rhs.NANOS)) - return false; - return true; - } + bool operator == (const TimeUnit & rhs) const; bool operator != (const TimeUnit &rhs) const { return !(*this == rhs); } @@ -1535,9 +1308,7 @@ class TimestampType { TimestampType(TimestampType&&) noexcept; TimestampType& operator=(const TimestampType&) noexcept; TimestampType& operator=(TimestampType&&) noexcept; - TimestampType() noexcept - : isAdjustedToUTC(0) { - } + TimestampType() noexcept; virtual ~TimestampType() noexcept; bool isAdjustedToUTC; @@ -1547,14 +1318,7 @@ class TimestampType { void __set_unit(const TimeUnit& val); - bool operator == (const TimestampType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } + bool operator == (const TimestampType & rhs) const; bool operator != (const TimestampType &rhs) const { return !(*this == rhs); } @@ -1586,9 +1350,7 @@ class TimeType { TimeType(TimeType&&) noexcept; TimeType& operator=(const TimeType&) noexcept; TimeType& operator=(TimeType&&) noexcept; - TimeType() noexcept - : isAdjustedToUTC(0) { - } + TimeType() noexcept; virtual ~TimeType() noexcept; bool isAdjustedToUTC; @@ -1598,14 +1360,7 @@ class TimeType { void __set_unit(const TimeUnit& val); - bool operator == (const TimeType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } + bool operator == (const TimeType & rhs) const; bool operator != (const TimeType &rhs) const { return !(*this == rhs); } @@ -1639,10 +1394,7 @@ class IntType { IntType(IntType&&) noexcept; IntType& operator=(const IntType&) noexcept; IntType& operator=(IntType&&) noexcept; - IntType() noexcept - : bitWidth(0), - isSigned(0) { - } + IntType() noexcept; virtual ~IntType() noexcept; int8_t bitWidth; @@ -1652,14 +1404,7 @@ class IntType { void __set_isSigned(const bool val); - bool operator == (const IntType & rhs) const - { - if (!(bitWidth == rhs.bitWidth)) - return false; - if (!(isSigned == rhs.isSigned)) - return false; - return true; - } + bool operator == (const IntType & rhs) const; bool operator != (const IntType &rhs) const { return !(*this == rhs); } @@ -1682,7 +1427,7 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj); /** * Embedded JSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ class JsonType { public: @@ -1691,15 +1436,11 @@ class JsonType { JsonType(JsonType&&) noexcept; JsonType& operator=(const JsonType&) noexcept; JsonType& operator=(JsonType&&) noexcept; - JsonType() noexcept { - } + JsonType() noexcept; virtual ~JsonType() noexcept; - bool operator == (const JsonType & /* rhs */) const - { - return true; - } + bool operator == (const JsonType & /* rhs */) const; bool operator != (const JsonType &rhs) const { return !(*this == rhs); } @@ -1722,7 +1463,7 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj); /** * Embedded BSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ class BsonType { public: @@ -1731,15 +1472,11 @@ class BsonType { BsonType(BsonType&&) noexcept; BsonType& operator=(const BsonType&) noexcept; BsonType& operator=(BsonType&&) noexcept; - BsonType() noexcept { - } + BsonType() noexcept; virtual ~BsonType() noexcept; - bool operator == (const BsonType & /* rhs */) const - { - return true; - } + bool operator == (const BsonType & /* rhs */) const; bool operator != (const BsonType &rhs) const { return !(*this == rhs); } @@ -1764,7 +1501,16 @@ typedef struct _GeometryType__isset { } _GeometryType__isset; /** - * Geometry logical type annotation (added in 2.11.0) + * GEOMETRY logical type annotation (added in 2.11.0) + * + * GeometryEncoding and Edges are required. In order to correctly interpret + * geometry data, writer implementations SHOULD always them, and reader + * implementations SHOULD fail for unknown values. + * + * CRS is optional. Once CRS is set, it MUST be a key to an entry in the + * `key_value_metadata` field of `FileMetaData`. + * + * See LogicalTypes.md for detail. */ class GeometryType { public: @@ -1773,45 +1519,19 @@ class GeometryType { GeometryType(GeometryType&&) noexcept; GeometryType& operator=(const GeometryType&); GeometryType& operator=(GeometryType&&) noexcept; - GeometryType() noexcept - : encoding(static_cast(0)), - edges(static_cast(0)), - crs() { - } + GeometryType() noexcept; virtual ~GeometryType() noexcept; /** - * Physical type and encoding for the geometry type. - * Please refer to the definition of GeometryEncoding for more detail. * * @see GeometryEncoding */ GeometryEncoding::type encoding; /** - * Interpretation for edges of elements of a GEOMETRY logical type, i.e. whether - * the interpolation between points along an edge represents a straight cartesian - * line or the shortest line on the sphere. - * Please refer to the definition of Edges for more detail. * * @see Edges */ Edges::type edges; - /** - * CRS (coordinate reference system) is a mapping of how coordinates refer to - * precise locations on earth. A crs is specified by a string, which is a Parquet - * file metadata field whose value is the crs representation. An additional field - * with the suffix '.type' describes the encoding of this CRS representation. - * - * For example, if a geometry column (e.g., 'geom1') uses the CRS 'OGC:CRS84', the - * writer may create 2 file metadata fields: 'geom1_crs' and 'geom1_crs.type', and - * set the 'crs' field to 'geom1_crs'. The 'geom1_crs' field will contain the - * PROJJSON representation of OGC:CRS84 - * (https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md#ogccrs84-details), - * and the 'geom1_crs.type' field will contain the string 'PROJJSON'. - * - * Multiple geometry columns can refer to the same CRS metadata field - * (e.g., 'geom1_crs') if they share the same CRS. - */ std::string crs; _GeometryType__isset __isset; @@ -1822,18 +1542,7 @@ class GeometryType { void __set_crs(const std::string& val); - bool operator == (const GeometryType & rhs) const - { - if (!(encoding == rhs.encoding)) - return false; - if (!(edges == rhs.edges)) - return false; - if (__isset.crs != rhs.__isset.crs) - return false; - else if (__isset.crs && !(crs == rhs.crs)) - return false; - return true; - } + bool operator == (const GeometryType & rhs) const; bool operator != (const GeometryType &rhs) const { return !(*this == rhs); } @@ -1885,8 +1594,7 @@ class LogicalType { LogicalType(LogicalType&&) noexcept; LogicalType& operator=(const LogicalType&); LogicalType& operator=(LogicalType&&) noexcept; - LogicalType() noexcept { - } + LogicalType() noexcept; virtual ~LogicalType() noexcept; StringType STRING; @@ -1937,70 +1645,7 @@ class LogicalType { void __set_GEOMETRY(const GeometryType& val); - bool operator == (const LogicalType & rhs) const - { - if (__isset.STRING != rhs.__isset.STRING) - return false; - else if (__isset.STRING && !(STRING == rhs.STRING)) - return false; - if (__isset.MAP != rhs.__isset.MAP) - return false; - else if (__isset.MAP && !(MAP == rhs.MAP)) - return false; - if (__isset.LIST != rhs.__isset.LIST) - return false; - else if (__isset.LIST && !(LIST == rhs.LIST)) - return false; - if (__isset.ENUM != rhs.__isset.ENUM) - return false; - else if (__isset.ENUM && !(ENUM == rhs.ENUM)) - return false; - if (__isset.DECIMAL != rhs.__isset.DECIMAL) - return false; - else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) - return false; - if (__isset.DATE != rhs.__isset.DATE) - return false; - else if (__isset.DATE && !(DATE == rhs.DATE)) - return false; - if (__isset.TIME != rhs.__isset.TIME) - return false; - else if (__isset.TIME && !(TIME == rhs.TIME)) - return false; - if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) - return false; - else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) - return false; - if (__isset.INTEGER != rhs.__isset.INTEGER) - return false; - else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) - return false; - if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) - return false; - else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) - return false; - if (__isset.JSON != rhs.__isset.JSON) - return false; - else if (__isset.JSON && !(JSON == rhs.JSON)) - return false; - if (__isset.BSON != rhs.__isset.BSON) - return false; - else if (__isset.BSON && !(BSON == rhs.BSON)) - return false; - if (__isset.UUID != rhs.__isset.UUID) - return false; - else if (__isset.UUID && !(UUID == rhs.UUID)) - return false; - if (__isset.FLOAT16 != rhs.__isset.FLOAT16) - return false; - else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16)) - return false; - if (__isset.GEOMETRY != rhs.__isset.GEOMETRY) - return false; - else if (__isset.GEOMETRY && !(GEOMETRY == rhs.GEOMETRY)) - return false; - return true; - } + bool operator == (const LogicalType & rhs) const; bool operator != (const LogicalType &rhs) const { return !(*this == rhs); } @@ -2045,17 +1690,7 @@ class SchemaElement { SchemaElement(SchemaElement&&) noexcept; SchemaElement& operator=(const SchemaElement&); SchemaElement& operator=(SchemaElement&&) noexcept; - SchemaElement() noexcept - : type(static_cast(0)), - type_length(0), - repetition_type(static_cast(0)), - name(), - num_children(0), - converted_type(static_cast(0)), - scale(0), - precision(0), - field_id(0) { - } + SchemaElement() noexcept; virtual ~SchemaElement() noexcept; /** @@ -2141,48 +1776,7 @@ class SchemaElement { void __set_logicalType(const LogicalType& val); - bool operator == (const SchemaElement & rhs) const - { - if (__isset.type != rhs.__isset.type) - return false; - else if (__isset.type && !(type == rhs.type)) - return false; - if (__isset.type_length != rhs.__isset.type_length) - return false; - else if (__isset.type_length && !(type_length == rhs.type_length)) - return false; - if (__isset.repetition_type != rhs.__isset.repetition_type) - return false; - else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) - return false; - if (!(name == rhs.name)) - return false; - if (__isset.num_children != rhs.__isset.num_children) - return false; - else if (__isset.num_children && !(num_children == rhs.num_children)) - return false; - if (__isset.converted_type != rhs.__isset.converted_type) - return false; - else if (__isset.converted_type && !(converted_type == rhs.converted_type)) - return false; - if (__isset.scale != rhs.__isset.scale) - return false; - else if (__isset.scale && !(scale == rhs.scale)) - return false; - if (__isset.precision != rhs.__isset.precision) - return false; - else if (__isset.precision && !(precision == rhs.precision)) - return false; - if (__isset.field_id != rhs.__isset.field_id) - return false; - else if (__isset.field_id && !(field_id == rhs.field_id)) - return false; - if (__isset.logicalType != rhs.__isset.logicalType) - return false; - else if (__isset.logicalType && !(logicalType == rhs.logicalType)) - return false; - return true; - } + bool operator == (const SchemaElement & rhs) const; bool operator != (const SchemaElement &rhs) const { return !(*this == rhs); } @@ -2216,16 +1810,16 @@ class DataPageHeader { DataPageHeader(DataPageHeader&&) noexcept; DataPageHeader& operator=(const DataPageHeader&); DataPageHeader& operator=(DataPageHeader&&) noexcept; - DataPageHeader() noexcept - : num_values(0), - encoding(static_cast(0)), - definition_level_encoding(static_cast(0)), - repetition_level_encoding(static_cast(0)) { - } + DataPageHeader() noexcept; virtual ~DataPageHeader() noexcept; /** - * Number of values, including NULLs, in this data page. * + * Number of values, including NULLs, in this data page. + * + * If a OffsetIndex is present, a page must begin at a row + * boundary (repetition_level = 0). Otherwise, pages may begin + * within a row (repetition_level > 0). + * */ int32_t num_values; /** @@ -2263,22 +1857,7 @@ class DataPageHeader { void __set_statistics(const Statistics& val); - bool operator == (const DataPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_level_encoding == rhs.definition_level_encoding)) - return false; - if (!(repetition_level_encoding == rhs.repetition_level_encoding)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } + bool operator == (const DataPageHeader & rhs) const; bool operator != (const DataPageHeader &rhs) const { return !(*this == rhs); } @@ -2305,15 +1884,11 @@ class IndexPageHeader { IndexPageHeader(IndexPageHeader&&) noexcept; IndexPageHeader& operator=(const IndexPageHeader&) noexcept; IndexPageHeader& operator=(IndexPageHeader&&) noexcept; - IndexPageHeader() noexcept { - } + IndexPageHeader() noexcept; virtual ~IndexPageHeader() noexcept; - bool operator == (const IndexPageHeader & /* rhs */) const - { - return true; - } + bool operator == (const IndexPageHeader & /* rhs */) const; bool operator != (const IndexPageHeader &rhs) const { return !(*this == rhs); } @@ -2350,11 +1925,7 @@ class DictionaryPageHeader { DictionaryPageHeader(DictionaryPageHeader&&) noexcept; DictionaryPageHeader& operator=(const DictionaryPageHeader&) noexcept; DictionaryPageHeader& operator=(DictionaryPageHeader&&) noexcept; - DictionaryPageHeader() noexcept - : num_values(0), - encoding(static_cast(0)), - is_sorted(0) { - } + DictionaryPageHeader() noexcept; virtual ~DictionaryPageHeader() noexcept; /** @@ -2380,18 +1951,7 @@ class DictionaryPageHeader { void __set_is_sorted(const bool val); - bool operator == (const DictionaryPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (__isset.is_sorted != rhs.__isset.is_sorted) - return false; - else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) - return false; - return true; - } + bool operator == (const DictionaryPageHeader & rhs) const; bool operator != (const DictionaryPageHeader &rhs) const { return !(*this == rhs); } @@ -2429,15 +1989,7 @@ class DataPageHeaderV2 { DataPageHeaderV2(DataPageHeaderV2&&) noexcept; DataPageHeaderV2& operator=(const DataPageHeaderV2&); DataPageHeaderV2& operator=(DataPageHeaderV2&&) noexcept; - DataPageHeaderV2() noexcept - : num_values(0), - num_nulls(0), - num_rows(0), - encoding(static_cast(0)), - definition_levels_byte_length(0), - repetition_levels_byte_length(0), - is_compressed(true) { - } + DataPageHeaderV2() noexcept; virtual ~DataPageHeaderV2() noexcept; /** @@ -2450,7 +2002,10 @@ class DataPageHeaderV2 { */ int32_t num_nulls; /** - * Number of rows in this data page. which means pages change on record boundaries (r = 0) * + * Number of rows in this data page. Every page must begin at a + * row boundary (repetition_level = 0): rows must **not** be + * split across page boundaries when using V2 data pages. + * */ int32_t num_rows; /** @@ -2498,30 +2053,7 @@ class DataPageHeaderV2 { void __set_statistics(const Statistics& val); - bool operator == (const DataPageHeaderV2 & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(num_nulls == rhs.num_nulls)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) - return false; - if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) - return false; - if (__isset.is_compressed != rhs.__isset.is_compressed) - return false; - else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } + bool operator == (const DataPageHeaderV2 & rhs) const; bool operator != (const DataPageHeaderV2 &rhs) const { return !(*this == rhs); } @@ -2551,15 +2083,11 @@ class SplitBlockAlgorithm { SplitBlockAlgorithm(SplitBlockAlgorithm&&) noexcept; SplitBlockAlgorithm& operator=(const SplitBlockAlgorithm&) noexcept; SplitBlockAlgorithm& operator=(SplitBlockAlgorithm&&) noexcept; - SplitBlockAlgorithm() noexcept { - } + SplitBlockAlgorithm() noexcept; virtual ~SplitBlockAlgorithm() noexcept; - bool operator == (const SplitBlockAlgorithm & /* rhs */) const - { - return true; - } + bool operator == (const SplitBlockAlgorithm & /* rhs */) const; bool operator != (const SplitBlockAlgorithm &rhs) const { return !(*this == rhs); } @@ -2593,8 +2121,7 @@ class BloomFilterAlgorithm { BloomFilterAlgorithm(BloomFilterAlgorithm&&) noexcept; BloomFilterAlgorithm& operator=(const BloomFilterAlgorithm&) noexcept; BloomFilterAlgorithm& operator=(BloomFilterAlgorithm&&) noexcept; - BloomFilterAlgorithm() noexcept { - } + BloomFilterAlgorithm() noexcept; virtual ~BloomFilterAlgorithm() noexcept; /** @@ -2606,14 +2133,7 @@ class BloomFilterAlgorithm { void __set_BLOCK(const SplitBlockAlgorithm& val); - bool operator == (const BloomFilterAlgorithm & rhs) const - { - if (__isset.BLOCK != rhs.__isset.BLOCK) - return false; - else if (__isset.BLOCK && !(BLOCK == rhs.BLOCK)) - return false; - return true; - } + bool operator == (const BloomFilterAlgorithm & rhs) const; bool operator != (const BloomFilterAlgorithm &rhs) const { return !(*this == rhs); } @@ -2645,15 +2165,11 @@ class XxHash { XxHash(XxHash&&) noexcept; XxHash& operator=(const XxHash&) noexcept; XxHash& operator=(XxHash&&) noexcept; - XxHash() noexcept { - } + XxHash() noexcept; virtual ~XxHash() noexcept; - bool operator == (const XxHash & /* rhs */) const - { - return true; - } + bool operator == (const XxHash & /* rhs */) const; bool operator != (const XxHash &rhs) const { return !(*this == rhs); } @@ -2689,8 +2205,7 @@ class BloomFilterHash { BloomFilterHash(BloomFilterHash&&) noexcept; BloomFilterHash& operator=(const BloomFilterHash&) noexcept; BloomFilterHash& operator=(BloomFilterHash&&) noexcept; - BloomFilterHash() noexcept { - } + BloomFilterHash() noexcept; virtual ~BloomFilterHash() noexcept; /** @@ -2702,14 +2217,7 @@ class BloomFilterHash { void __set_XXHASH(const XxHash& val); - bool operator == (const BloomFilterHash & rhs) const - { - if (__isset.XXHASH != rhs.__isset.XXHASH) - return false; - else if (__isset.XXHASH && !(XXHASH == rhs.XXHASH)) - return false; - return true; - } + bool operator == (const BloomFilterHash & rhs) const; bool operator != (const BloomFilterHash &rhs) const { return !(*this == rhs); } @@ -2740,15 +2248,11 @@ class Uncompressed { Uncompressed(Uncompressed&&) noexcept; Uncompressed& operator=(const Uncompressed&) noexcept; Uncompressed& operator=(Uncompressed&&) noexcept; - Uncompressed() noexcept { - } + Uncompressed() noexcept; virtual ~Uncompressed() noexcept; - bool operator == (const Uncompressed & /* rhs */) const - { - return true; - } + bool operator == (const Uncompressed & /* rhs */) const; bool operator != (const Uncompressed &rhs) const { return !(*this == rhs); } @@ -2779,8 +2283,7 @@ class BloomFilterCompression { BloomFilterCompression(BloomFilterCompression&&) noexcept; BloomFilterCompression& operator=(const BloomFilterCompression&) noexcept; BloomFilterCompression& operator=(BloomFilterCompression&&) noexcept; - BloomFilterCompression() noexcept { - } + BloomFilterCompression() noexcept; virtual ~BloomFilterCompression() noexcept; Uncompressed UNCOMPRESSED; @@ -2789,14 +2292,7 @@ class BloomFilterCompression { void __set_UNCOMPRESSED(const Uncompressed& val); - bool operator == (const BloomFilterCompression & rhs) const - { - if (__isset.UNCOMPRESSED != rhs.__isset.UNCOMPRESSED) - return false; - else if (__isset.UNCOMPRESSED && !(UNCOMPRESSED == rhs.UNCOMPRESSED)) - return false; - return true; - } + bool operator == (const BloomFilterCompression & rhs) const; bool operator != (const BloomFilterCompression &rhs) const { return !(*this == rhs); } @@ -2828,9 +2324,7 @@ class BloomFilterHeader { BloomFilterHeader(BloomFilterHeader&&) noexcept; BloomFilterHeader& operator=(const BloomFilterHeader&) noexcept; BloomFilterHeader& operator=(BloomFilterHeader&&) noexcept; - BloomFilterHeader() noexcept - : numBytes(0) { - } + BloomFilterHeader() noexcept; virtual ~BloomFilterHeader() noexcept; /** @@ -2858,18 +2352,7 @@ class BloomFilterHeader { void __set_compression(const BloomFilterCompression& val); - bool operator == (const BloomFilterHeader & rhs) const - { - if (!(numBytes == rhs.numBytes)) - return false; - if (!(algorithm == rhs.algorithm)) - return false; - if (!(hash == rhs.hash)) - return false; - if (!(compression == rhs.compression)) - return false; - return true; - } + bool operator == (const BloomFilterHeader & rhs) const; bool operator != (const BloomFilterHeader &rhs) const { return !(*this == rhs); } @@ -2904,12 +2387,7 @@ class PageHeader { PageHeader(PageHeader&&) noexcept; PageHeader& operator=(const PageHeader&); PageHeader& operator=(PageHeader&&) noexcept; - PageHeader() noexcept - : type(static_cast(0)), - uncompressed_page_size(0), - compressed_page_size(0), - crc(0) { - } + PageHeader() noexcept; virtual ~PageHeader() noexcept; /** @@ -2968,36 +2446,7 @@ class PageHeader { void __set_data_page_header_v2(const DataPageHeaderV2& val); - bool operator == (const PageHeader & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(uncompressed_page_size == rhs.uncompressed_page_size)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (__isset.crc != rhs.__isset.crc) - return false; - else if (__isset.crc && !(crc == rhs.crc)) - return false; - if (__isset.data_page_header != rhs.__isset.data_page_header) - return false; - else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) - return false; - if (__isset.index_page_header != rhs.__isset.index_page_header) - return false; - else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) - return false; - if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) - return false; - else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) - return false; - if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) - return false; - else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) - return false; - return true; - } + bool operator == (const PageHeader & rhs) const; bool operator != (const PageHeader &rhs) const { return !(*this == rhs); } @@ -3031,10 +2480,7 @@ class KeyValue { KeyValue(KeyValue&&) noexcept; KeyValue& operator=(const KeyValue&); KeyValue& operator=(KeyValue&&) noexcept; - KeyValue() noexcept - : key(), - value() { - } + KeyValue() noexcept; virtual ~KeyValue() noexcept; std::string key; @@ -3046,16 +2492,7 @@ class KeyValue { void __set_value(const std::string& val); - bool operator == (const KeyValue & rhs) const - { - if (!(key == rhs.key)) - return false; - if (__isset.value != rhs.__isset.value) - return false; - else if (__isset.value && !(value == rhs.value)) - return false; - return true; - } + bool operator == (const KeyValue & rhs) const; bool operator != (const KeyValue &rhs) const { return !(*this == rhs); } @@ -3085,11 +2522,7 @@ class SortingColumn { SortingColumn(SortingColumn&&) noexcept; SortingColumn& operator=(const SortingColumn&) noexcept; SortingColumn& operator=(SortingColumn&&) noexcept; - SortingColumn() noexcept - : column_idx(0), - descending(0), - nulls_first(0) { - } + SortingColumn() noexcept; virtual ~SortingColumn() noexcept; /** @@ -3112,16 +2545,7 @@ class SortingColumn { void __set_nulls_first(const bool val); - bool operator == (const SortingColumn & rhs) const - { - if (!(column_idx == rhs.column_idx)) - return false; - if (!(descending == rhs.descending)) - return false; - if (!(nulls_first == rhs.nulls_first)) - return false; - return true; - } + bool operator == (const SortingColumn & rhs) const; bool operator != (const SortingColumn &rhs) const { return !(*this == rhs); } @@ -3151,11 +2575,7 @@ class PageEncodingStats { PageEncodingStats(PageEncodingStats&&) noexcept; PageEncodingStats& operator=(const PageEncodingStats&) noexcept; PageEncodingStats& operator=(PageEncodingStats&&) noexcept; - PageEncodingStats() noexcept - : page_type(static_cast(0)), - encoding(static_cast(0)), - count(0) { - } + PageEncodingStats() noexcept; virtual ~PageEncodingStats() noexcept; /** @@ -3181,16 +2601,7 @@ class PageEncodingStats { void __set_count(const int32_t val); - bool operator == (const PageEncodingStats & rhs) const - { - if (!(page_type == rhs.page_type)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(count == rhs.count)) - return false; - return true; - } + bool operator == (const PageEncodingStats & rhs) const; bool operator != (const PageEncodingStats &rhs) const { return !(*this == rhs); } @@ -3210,7 +2621,7 @@ void swap(PageEncodingStats &a, PageEncodingStats &b); std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); typedef struct _ColumnMetaData__isset { - _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false), bloom_filter_offset(false), bloom_filter_length(false), size_statistics(false) {} + _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false), bloom_filter_offset(false), bloom_filter_length(false), size_statistics(false), geometry_stats(false) {} bool key_value_metadata :1; bool index_page_offset :1; bool dictionary_page_offset :1; @@ -3219,6 +2630,7 @@ typedef struct _ColumnMetaData__isset { bool bloom_filter_offset :1; bool bloom_filter_length :1; bool size_statistics :1; + bool geometry_stats :1; } _ColumnMetaData__isset; /** @@ -3231,18 +2643,7 @@ class ColumnMetaData { ColumnMetaData(ColumnMetaData&&) noexcept; ColumnMetaData& operator=(const ColumnMetaData&); ColumnMetaData& operator=(ColumnMetaData&&) noexcept; - ColumnMetaData() noexcept - : type(static_cast(0)), - codec(static_cast(0)), - num_values(0), - total_uncompressed_size(0), - total_compressed_size(0), - data_page_offset(0), - index_page_offset(0), - dictionary_page_offset(0), - bloom_filter_offset(0), - bloom_filter_length(0) { - } + ColumnMetaData() noexcept; virtual ~ColumnMetaData() noexcept; /** @@ -3324,6 +2725,10 @@ class ColumnMetaData { * filter pushdown. */ SizeStatistics size_statistics; + /** + * Optional statistics specific to GEOMETRY logical type + */ + GeometryStatistics geometry_stats; _ColumnMetaData__isset __isset; @@ -3359,58 +2764,9 @@ class ColumnMetaData { void __set_size_statistics(const SizeStatistics& val); - bool operator == (const ColumnMetaData & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(encodings == rhs.encodings)) - return false; - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (!(codec == rhs.codec)) - return false; - if (!(num_values == rhs.num_values)) - return false; - if (!(total_uncompressed_size == rhs.total_uncompressed_size)) - return false; - if (!(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (!(data_page_offset == rhs.data_page_offset)) - return false; - if (__isset.index_page_offset != rhs.__isset.index_page_offset) - return false; - else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) - return false; - if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) - return false; - else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - if (__isset.encoding_stats != rhs.__isset.encoding_stats) - return false; - else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) - return false; - if (__isset.bloom_filter_offset != rhs.__isset.bloom_filter_offset) - return false; - else if (__isset.bloom_filter_offset && !(bloom_filter_offset == rhs.bloom_filter_offset)) - return false; - if (__isset.bloom_filter_length != rhs.__isset.bloom_filter_length) - return false; - else if (__isset.bloom_filter_length && !(bloom_filter_length == rhs.bloom_filter_length)) - return false; - if (__isset.size_statistics != rhs.__isset.size_statistics) - return false; - else if (__isset.size_statistics && !(size_statistics == rhs.size_statistics)) - return false; - return true; - } + void __set_geometry_stats(const GeometryStatistics& val); + + bool operator == (const ColumnMetaData & rhs) const; bool operator != (const ColumnMetaData &rhs) const { return !(*this == rhs); } @@ -3437,15 +2793,11 @@ class EncryptionWithFooterKey { EncryptionWithFooterKey(EncryptionWithFooterKey&&) noexcept; EncryptionWithFooterKey& operator=(const EncryptionWithFooterKey&) noexcept; EncryptionWithFooterKey& operator=(EncryptionWithFooterKey&&) noexcept; - EncryptionWithFooterKey() noexcept { - } + EncryptionWithFooterKey() noexcept; virtual ~EncryptionWithFooterKey() noexcept; - bool operator == (const EncryptionWithFooterKey & /* rhs */) const - { - return true; - } + bool operator == (const EncryptionWithFooterKey & /* rhs */) const; bool operator != (const EncryptionWithFooterKey &rhs) const { return !(*this == rhs); } @@ -3476,9 +2828,7 @@ class EncryptionWithColumnKey { EncryptionWithColumnKey(EncryptionWithColumnKey&&) noexcept; EncryptionWithColumnKey& operator=(const EncryptionWithColumnKey&); EncryptionWithColumnKey& operator=(EncryptionWithColumnKey&&) noexcept; - EncryptionWithColumnKey() noexcept - : key_metadata() { - } + EncryptionWithColumnKey() noexcept; virtual ~EncryptionWithColumnKey() noexcept; /** @@ -3496,16 +2846,7 @@ class EncryptionWithColumnKey { void __set_key_metadata(const std::string& val); - bool operator == (const EncryptionWithColumnKey & rhs) const - { - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } + bool operator == (const EncryptionWithColumnKey & rhs) const; bool operator != (const EncryptionWithColumnKey &rhs) const { return !(*this == rhs); } @@ -3537,8 +2878,7 @@ class ColumnCryptoMetaData { ColumnCryptoMetaData(ColumnCryptoMetaData&&) noexcept; ColumnCryptoMetaData& operator=(const ColumnCryptoMetaData&); ColumnCryptoMetaData& operator=(ColumnCryptoMetaData&&) noexcept; - ColumnCryptoMetaData() noexcept { - } + ColumnCryptoMetaData() noexcept; virtual ~ColumnCryptoMetaData() noexcept; EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY; @@ -3550,18 +2890,7 @@ class ColumnCryptoMetaData { void __set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val); - bool operator == (const ColumnCryptoMetaData & rhs) const - { - if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) - return false; - if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) - return false; - return true; - } + bool operator == (const ColumnCryptoMetaData & rhs) const; bool operator != (const ColumnCryptoMetaData &rhs) const { return !(*this == rhs); } @@ -3599,15 +2928,7 @@ class ColumnChunk { ColumnChunk(ColumnChunk&&) noexcept; ColumnChunk& operator=(const ColumnChunk&); ColumnChunk& operator=(ColumnChunk&&) noexcept; - ColumnChunk() noexcept - : file_path(), - file_offset(0), - offset_index_offset(0), - offset_index_length(0), - column_index_offset(0), - column_index_length(0), - encrypted_column_metadata() { - } + ColumnChunk() noexcept; virtual ~ColumnChunk() noexcept; /** @@ -3617,13 +2938,21 @@ class ColumnChunk { */ std::string file_path; /** - * Byte offset in file_path to the ColumnMetaData * + * Deprecated: Byte offset in file_path to the ColumnMetaData + * + * Past use of this field has been inconsistent, with some implementations + * using it to point to the ColumnMetaData and some using it to point to + * the first page in the column chunk. In many cases, the ColumnMetaData at this + * location is wrong. This field is now deprecated and should not be used. + * Writers should set this field to 0 if no ColumnMetaData has been written outside + * the footer. */ int64_t file_offset; /** - * Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. + * Column metadata for this chunk. Some writers may also replicate this at the + * location pointed to by file_path/file_offset. + * Note: while marked as optional, this field is in fact required by most major + * Parquet implementations. As such, writers MUST populate this field. * */ ColumnMetaData meta_data; @@ -3672,44 +3001,7 @@ class ColumnChunk { void __set_encrypted_column_metadata(const std::string& val); - bool operator == (const ColumnChunk & rhs) const - { - if (__isset.file_path != rhs.__isset.file_path) - return false; - else if (__isset.file_path && !(file_path == rhs.file_path)) - return false; - if (!(file_offset == rhs.file_offset)) - return false; - if (__isset.meta_data != rhs.__isset.meta_data) - return false; - else if (__isset.meta_data && !(meta_data == rhs.meta_data)) - return false; - if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) - return false; - else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset)) - return false; - if (__isset.offset_index_length != rhs.__isset.offset_index_length) - return false; - else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length)) - return false; - if (__isset.column_index_offset != rhs.__isset.column_index_offset) - return false; - else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset)) - return false; - if (__isset.column_index_length != rhs.__isset.column_index_length) - return false; - else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length)) - return false; - if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) - return false; - else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) - return false; - if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata) - return false; - else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata)) - return false; - return true; - } + bool operator == (const ColumnChunk & rhs) const; bool operator != (const ColumnChunk &rhs) const { return !(*this == rhs); } @@ -3743,13 +3035,7 @@ class RowGroup { RowGroup(RowGroup&&) noexcept; RowGroup& operator=(const RowGroup&); RowGroup& operator=(RowGroup&&) noexcept; - RowGroup() noexcept - : total_byte_size(0), - num_rows(0), - file_offset(0), - total_compressed_size(0), - ordinal(0) { - } + RowGroup() noexcept; virtual ~RowGroup() noexcept; /** @@ -3802,32 +3088,7 @@ class RowGroup { void __set_ordinal(const int16_t val); - bool operator == (const RowGroup & rhs) const - { - if (!(columns == rhs.columns)) - return false; - if (!(total_byte_size == rhs.total_byte_size)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (__isset.sorting_columns != rhs.__isset.sorting_columns) - return false; - else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) - return false; - if (__isset.file_offset != rhs.__isset.file_offset) - return false; - else if (__isset.file_offset && !(file_offset == rhs.file_offset)) - return false; - if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) - return false; - else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.ordinal != rhs.__isset.ordinal) - return false; - else if (__isset.ordinal && !(ordinal == rhs.ordinal)) - return false; - return true; - } + bool operator == (const RowGroup & rhs) const; bool operator != (const RowGroup &rhs) const { return !(*this == rhs); } @@ -3857,15 +3118,11 @@ class TypeDefinedOrder { TypeDefinedOrder(TypeDefinedOrder&&) noexcept; TypeDefinedOrder& operator=(const TypeDefinedOrder&) noexcept; TypeDefinedOrder& operator=(TypeDefinedOrder&&) noexcept; - TypeDefinedOrder() noexcept { - } + TypeDefinedOrder() noexcept; virtual ~TypeDefinedOrder() noexcept; - bool operator == (const TypeDefinedOrder & /* rhs */) const - { - return true; - } + bool operator == (const TypeDefinedOrder & /* rhs */) const; bool operator != (const TypeDefinedOrder &rhs) const { return !(*this == rhs); } @@ -3908,8 +3165,7 @@ class ColumnOrder { ColumnOrder(ColumnOrder&&) noexcept; ColumnOrder& operator=(const ColumnOrder&) noexcept; ColumnOrder& operator=(ColumnOrder&&) noexcept; - ColumnOrder() noexcept { - } + ColumnOrder() noexcept; virtual ~ColumnOrder() noexcept; /** @@ -3935,7 +3191,7 @@ class ColumnOrder { * ENUM - unsigned byte-wise comparison * LIST - undefined * MAP - undefined - * GEOMETRY - undefined, use GeometryStatistics instead. + * GEOMETRY - undefined * * In the absence of logical types, the sort order is determined by the physical type: * BOOLEAN - false, true @@ -3969,14 +3225,7 @@ class ColumnOrder { void __set_TYPE_ORDER(const TypeDefinedOrder& val); - bool operator == (const ColumnOrder & rhs) const - { - if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) - return false; - else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) - return false; - return true; - } + bool operator == (const ColumnOrder & rhs) const; bool operator != (const ColumnOrder &rhs) const { return !(*this == rhs); } @@ -4003,11 +3252,7 @@ class PageLocation { PageLocation(PageLocation&&) noexcept; PageLocation& operator=(const PageLocation&) noexcept; PageLocation& operator=(PageLocation&&) noexcept; - PageLocation() noexcept - : offset(0), - compressed_page_size(0), - first_row_index(0) { - } + PageLocation() noexcept; virtual ~PageLocation() noexcept; /** @@ -4020,8 +3265,9 @@ class PageLocation { */ int32_t compressed_page_size; /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). + * Index within the RowGroup of the first row of the page. When an + * OffsetIndex is present, pages must begin on row boundaries + * (repetition_level = 0). */ int64_t first_row_index; @@ -4031,16 +3277,7 @@ class PageLocation { void __set_first_row_index(const int64_t val); - bool operator == (const PageLocation & rhs) const - { - if (!(offset == rhs.offset)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (!(first_row_index == rhs.first_row_index)) - return false; - return true; - } + bool operator == (const PageLocation & rhs) const; bool operator != (const PageLocation &rhs) const { return !(*this == rhs); } @@ -4078,8 +3315,7 @@ class OffsetIndex { OffsetIndex(OffsetIndex&&) noexcept; OffsetIndex& operator=(const OffsetIndex&); OffsetIndex& operator=(OffsetIndex&&) noexcept; - OffsetIndex() noexcept { - } + OffsetIndex() noexcept; virtual ~OffsetIndex() noexcept; /** @@ -4101,16 +3337,7 @@ class OffsetIndex { void __set_unencoded_byte_array_data_bytes(const std::vector & val); - bool operator == (const OffsetIndex & rhs) const - { - if (!(page_locations == rhs.page_locations)) - return false; - if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) - return false; - else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) - return false; - return true; - } + bool operator == (const OffsetIndex & rhs) const; bool operator != (const OffsetIndex &rhs) const { return !(*this == rhs); } @@ -4130,11 +3357,10 @@ void swap(OffsetIndex &a, OffsetIndex &b); std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj); typedef struct _ColumnIndex__isset { - _ColumnIndex__isset() : null_counts(false), repetition_level_histograms(false), definition_level_histograms(false), geometry_stats(false) {} + _ColumnIndex__isset() : null_counts(false), repetition_level_histograms(false), definition_level_histograms(false) {} bool null_counts :1; bool repetition_level_histograms :1; bool definition_level_histograms :1; - bool geometry_stats :1; } _ColumnIndex__isset; /** @@ -4154,9 +3380,7 @@ class ColumnIndex { ColumnIndex(ColumnIndex&&) noexcept; ColumnIndex& operator=(const ColumnIndex&); ColumnIndex& operator=(ColumnIndex&&) noexcept; - ColumnIndex() noexcept - : boundary_order(static_cast(0)) { - } + ColumnIndex() noexcept; virtual ~ColumnIndex() noexcept; /** @@ -4189,7 +3413,14 @@ class ColumnIndex { */ BoundaryOrder::type boundary_order; /** - * A list containing the number of null values for each page * + * A list containing the number of null values for each page + * + * Writers SHOULD always write this field even if no null values + * are present or the column is not nullable. + * Readers MUST distinguish between null_counts not being present + * and null_count being 0. + * If null_counts are not present, readers MUST NOT assume all + * null counts are 0. */ std::vector null_counts; /** @@ -4211,10 +3442,6 @@ class ColumnIndex { * */ std::vector definition_level_histograms; - /** - * A list containing statistics of GEOMETRY logical type for each page - */ - std::vector geometry_stats; _ColumnIndex__isset __isset; @@ -4232,36 +3459,7 @@ class ColumnIndex { void __set_definition_level_histograms(const std::vector & val); - void __set_geometry_stats(const std::vector & val); - - bool operator == (const ColumnIndex & rhs) const - { - if (!(null_pages == rhs.null_pages)) - return false; - if (!(min_values == rhs.min_values)) - return false; - if (!(max_values == rhs.max_values)) - return false; - if (!(boundary_order == rhs.boundary_order)) - return false; - if (__isset.null_counts != rhs.__isset.null_counts) - return false; - else if (__isset.null_counts && !(null_counts == rhs.null_counts)) - return false; - if (__isset.repetition_level_histograms != rhs.__isset.repetition_level_histograms) - return false; - else if (__isset.repetition_level_histograms && !(repetition_level_histograms == rhs.repetition_level_histograms)) - return false; - if (__isset.definition_level_histograms != rhs.__isset.definition_level_histograms) - return false; - else if (__isset.definition_level_histograms && !(definition_level_histograms == rhs.definition_level_histograms)) - return false; - if (__isset.geometry_stats != rhs.__isset.geometry_stats) - return false; - else if (__isset.geometry_stats && !(geometry_stats == rhs.geometry_stats)) - return false; - return true; - } + bool operator == (const ColumnIndex & rhs) const; bool operator != (const ColumnIndex &rhs) const { return !(*this == rhs); } @@ -4294,11 +3492,7 @@ class AesGcmV1 { AesGcmV1(AesGcmV1&&) noexcept; AesGcmV1& operator=(const AesGcmV1&); AesGcmV1& operator=(AesGcmV1&&) noexcept; - AesGcmV1() noexcept - : aad_prefix(), - aad_file_unique(), - supply_aad_prefix(0) { - } + AesGcmV1() noexcept; virtual ~AesGcmV1() noexcept; /** @@ -4323,22 +3517,7 @@ class AesGcmV1 { void __set_supply_aad_prefix(const bool val); - bool operator == (const AesGcmV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } + bool operator == (const AesGcmV1 & rhs) const; bool operator != (const AesGcmV1 &rhs) const { return !(*this == rhs); } @@ -4371,11 +3550,7 @@ class AesGcmCtrV1 { AesGcmCtrV1(AesGcmCtrV1&&) noexcept; AesGcmCtrV1& operator=(const AesGcmCtrV1&); AesGcmCtrV1& operator=(AesGcmCtrV1&&) noexcept; - AesGcmCtrV1() noexcept - : aad_prefix(), - aad_file_unique(), - supply_aad_prefix(0) { - } + AesGcmCtrV1() noexcept; virtual ~AesGcmCtrV1() noexcept; /** @@ -4400,22 +3575,7 @@ class AesGcmCtrV1 { void __set_supply_aad_prefix(const bool val); - bool operator == (const AesGcmCtrV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } + bool operator == (const AesGcmCtrV1 & rhs) const; bool operator != (const AesGcmCtrV1 &rhs) const { return !(*this == rhs); } @@ -4447,8 +3607,7 @@ class EncryptionAlgorithm { EncryptionAlgorithm(EncryptionAlgorithm&&) noexcept; EncryptionAlgorithm& operator=(const EncryptionAlgorithm&); EncryptionAlgorithm& operator=(EncryptionAlgorithm&&) noexcept; - EncryptionAlgorithm() noexcept { - } + EncryptionAlgorithm() noexcept; virtual ~EncryptionAlgorithm() noexcept; AesGcmV1 AES_GCM_V1; @@ -4460,18 +3619,7 @@ class EncryptionAlgorithm { void __set_AES_GCM_CTR_V1(const AesGcmCtrV1& val); - bool operator == (const EncryptionAlgorithm & rhs) const - { - if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) - return false; - else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) - return false; - if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) - return false; - else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) - return false; - return true; - } + bool operator == (const EncryptionAlgorithm & rhs) const; bool operator != (const EncryptionAlgorithm &rhs) const { return !(*this == rhs); } @@ -4509,12 +3657,7 @@ class FileMetaData { FileMetaData(FileMetaData&&) noexcept; FileMetaData& operator=(const FileMetaData&); FileMetaData& operator=(FileMetaData&&) noexcept; - FileMetaData() noexcept - : version(0), - num_rows(0), - created_by(), - footer_signing_key_metadata() { - } + FileMetaData() noexcept; virtual ~FileMetaData() noexcept; /** @@ -4598,38 +3741,7 @@ class FileMetaData { void __set_footer_signing_key_metadata(const std::string& val); - bool operator == (const FileMetaData & rhs) const - { - if (!(version == rhs.version)) - return false; - if (!(schema == rhs.schema)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(row_groups == rhs.row_groups)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (__isset.created_by != rhs.__isset.created_by) - return false; - else if (__isset.created_by && !(created_by == rhs.created_by)) - return false; - if (__isset.column_orders != rhs.__isset.column_orders) - return false; - else if (__isset.column_orders && !(column_orders == rhs.column_orders)) - return false; - if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) - return false; - else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata) - return false; - else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) - return false; - return true; - } + bool operator == (const FileMetaData & rhs) const; bool operator != (const FileMetaData &rhs) const { return !(*this == rhs); } @@ -4663,9 +3775,7 @@ class FileCryptoMetaData { FileCryptoMetaData(FileCryptoMetaData&&) noexcept; FileCryptoMetaData& operator=(const FileCryptoMetaData&); FileCryptoMetaData& operator=(FileCryptoMetaData&&) noexcept; - FileCryptoMetaData() noexcept - : key_metadata() { - } + FileCryptoMetaData() noexcept; virtual ~FileCryptoMetaData() noexcept; /** @@ -4686,16 +3796,7 @@ class FileCryptoMetaData { void __set_key_metadata(const std::string& val); - bool operator == (const FileCryptoMetaData & rhs) const - { - if (!(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } + bool operator == (const FileCryptoMetaData & rhs) const; bool operator != (const FileCryptoMetaData &rhs) const { return !(*this == rhs); } diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index 6caf040c7c874..a552fdf381954 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.20.0) + * Autogenerated by Thrift Compiler (0.21.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -469,14 +469,6 @@ uint32_t Statistics::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->geometry_stats.read(iprot); - this->__isset.geometry_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; default: xfer += iprot->skip(ftype); break; @@ -535,11 +527,6 @@ uint32_t Statistics::write(Protocol_* oprot) const { xfer += oprot->writeBool(this->is_min_value_exact); xfer += oprot->writeFieldEnd(); } - if (this->__isset.geometry_stats) { - xfer += oprot->writeFieldBegin("geometry_stats", ::apache::thrift::protocol::T_STRUCT, 9); - xfer += this->geometry_stats.write(oprot); - xfer += oprot->writeFieldEnd(); - } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -3406,6 +3393,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 17: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->geometry_stats.read(iprot); + this->__isset.geometry_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -3544,6 +3539,11 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += this->size_statistics.write(oprot); xfer += oprot->writeFieldEnd(); } + if (this->__isset.geometry_stats) { + xfer += oprot->writeFieldBegin("geometry_stats", ::apache::thrift::protocol::T_STRUCT, 17); + xfer += this->geometry_stats.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -4556,26 +4556,6 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; - case 8: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->geometry_stats.clear(); - uint32_t _size319; - ::apache::thrift::protocol::TType _etype322; - xfer += iprot->readListBegin(_etype322, _size319); - this->geometry_stats.resize(_size319); - uint32_t _i323; - for (_i323 = 0; _i323 < _size319; ++_i323) - { - xfer += this->geometry_stats[_i323].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.geometry_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; default: xfer += iprot->skip(ftype); break; @@ -4605,10 +4585,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter324; - for (_iter324 = this->null_pages.begin(); _iter324 != this->null_pages.end(); ++_iter324) + std::vector ::const_iterator _iter319; + for (_iter319 = this->null_pages.begin(); _iter319 != this->null_pages.end(); ++_iter319) { - xfer += oprot->writeBool((*_iter324)); + xfer += oprot->writeBool((*_iter319)); } xfer += oprot->writeListEnd(); } @@ -4617,10 +4597,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter325; - for (_iter325 = this->min_values.begin(); _iter325 != this->min_values.end(); ++_iter325) + std::vector ::const_iterator _iter320; + for (_iter320 = this->min_values.begin(); _iter320 != this->min_values.end(); ++_iter320) { - xfer += oprot->writeBinary((*_iter325)); + xfer += oprot->writeBinary((*_iter320)); } xfer += oprot->writeListEnd(); } @@ -4629,10 +4609,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter326; - for (_iter326 = this->max_values.begin(); _iter326 != this->max_values.end(); ++_iter326) + std::vector ::const_iterator _iter321; + for (_iter321 = this->max_values.begin(); _iter321 != this->max_values.end(); ++_iter321) { - xfer += oprot->writeBinary((*_iter326)); + xfer += oprot->writeBinary((*_iter321)); } xfer += oprot->writeListEnd(); } @@ -4646,10 +4626,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter327; - for (_iter327 = this->null_counts.begin(); _iter327 != this->null_counts.end(); ++_iter327) + std::vector ::const_iterator _iter322; + for (_iter322 = this->null_counts.begin(); _iter322 != this->null_counts.end(); ++_iter322) { - xfer += oprot->writeI64((*_iter327)); + xfer += oprot->writeI64((*_iter322)); } xfer += oprot->writeListEnd(); } @@ -4659,10 +4639,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter328; - for (_iter328 = this->repetition_level_histograms.begin(); _iter328 != this->repetition_level_histograms.end(); ++_iter328) + std::vector ::const_iterator _iter323; + for (_iter323 = this->repetition_level_histograms.begin(); _iter323 != this->repetition_level_histograms.end(); ++_iter323) { - xfer += oprot->writeI64((*_iter328)); + xfer += oprot->writeI64((*_iter323)); } xfer += oprot->writeListEnd(); } @@ -4672,23 +4652,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter329; - for (_iter329 = this->definition_level_histograms.begin(); _iter329 != this->definition_level_histograms.end(); ++_iter329) - { - xfer += oprot->writeI64((*_iter329)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.geometry_stats) { - xfer += oprot->writeFieldBegin("geometry_stats", ::apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->geometry_stats.size())); - std::vector ::const_iterator _iter330; - for (_iter330 = this->geometry_stats.begin(); _iter330 != this->geometry_stats.end(); ++_iter330) + std::vector ::const_iterator _iter324; + for (_iter324 = this->definition_level_histograms.begin(); _iter324 != this->definition_level_histograms.end(); ++_iter324) { - xfer += (*_iter330).write(oprot); + xfer += oprot->writeI64((*_iter324)); } xfer += oprot->writeListEnd(); } @@ -4976,14 +4943,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size347; - ::apache::thrift::protocol::TType _etype350; - xfer += iprot->readListBegin(_etype350, _size347); - this->schema.resize(_size347); - uint32_t _i351; - for (_i351 = 0; _i351 < _size347; ++_i351) + uint32_t _size341; + ::apache::thrift::protocol::TType _etype344; + xfer += iprot->readListBegin(_etype344, _size341); + this->schema.resize(_size341); + uint32_t _i345; + for (_i345 = 0; _i345 < _size341; ++_i345) { - xfer += this->schema[_i351].read(iprot); + xfer += this->schema[_i345].read(iprot); } xfer += iprot->readListEnd(); } @@ -5004,14 +4971,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size352; - ::apache::thrift::protocol::TType _etype355; - xfer += iprot->readListBegin(_etype355, _size352); - this->row_groups.resize(_size352); - uint32_t _i356; - for (_i356 = 0; _i356 < _size352; ++_i356) + uint32_t _size346; + ::apache::thrift::protocol::TType _etype349; + xfer += iprot->readListBegin(_etype349, _size346); + this->row_groups.resize(_size346); + uint32_t _i350; + for (_i350 = 0; _i350 < _size346; ++_i350) { - xfer += this->row_groups[_i356].read(iprot); + xfer += this->row_groups[_i350].read(iprot); } xfer += iprot->readListEnd(); } @@ -5024,14 +4991,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size357; - ::apache::thrift::protocol::TType _etype360; - xfer += iprot->readListBegin(_etype360, _size357); - this->key_value_metadata.resize(_size357); - uint32_t _i361; - for (_i361 = 0; _i361 < _size357; ++_i361) + uint32_t _size351; + ::apache::thrift::protocol::TType _etype354; + xfer += iprot->readListBegin(_etype354, _size351); + this->key_value_metadata.resize(_size351); + uint32_t _i355; + for (_i355 = 0; _i355 < _size351; ++_i355) { - xfer += this->key_value_metadata[_i361].read(iprot); + xfer += this->key_value_metadata[_i355].read(iprot); } xfer += iprot->readListEnd(); } @@ -5052,14 +5019,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->column_orders.clear(); - uint32_t _size362; - ::apache::thrift::protocol::TType _etype365; - xfer += iprot->readListBegin(_etype365, _size362); - this->column_orders.resize(_size362); - uint32_t _i366; - for (_i366 = 0; _i366 < _size362; ++_i366) + uint32_t _size356; + ::apache::thrift::protocol::TType _etype359; + xfer += iprot->readListBegin(_etype359, _size356); + this->column_orders.resize(_size356); + uint32_t _i360; + for (_i360 = 0; _i360 < _size356; ++_i360) { - xfer += this->column_orders[_i366].read(iprot); + xfer += this->column_orders[_i360].read(iprot); } xfer += iprot->readListEnd(); } @@ -5117,10 +5084,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter367; - for (_iter367 = this->schema.begin(); _iter367 != this->schema.end(); ++_iter367) + std::vector ::const_iterator _iter361; + for (_iter361 = this->schema.begin(); _iter361 != this->schema.end(); ++_iter361) { - xfer += (*_iter367).write(oprot); + xfer += (*_iter361).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5133,10 +5100,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter368; - for (_iter368 = this->row_groups.begin(); _iter368 != this->row_groups.end(); ++_iter368) + std::vector ::const_iterator _iter362; + for (_iter362 = this->row_groups.begin(); _iter362 != this->row_groups.end(); ++_iter362) { - xfer += (*_iter368).write(oprot); + xfer += (*_iter362).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5146,10 +5113,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter369; - for (_iter369 = this->key_value_metadata.begin(); _iter369 != this->key_value_metadata.end(); ++_iter369) + std::vector ::const_iterator _iter363; + for (_iter363 = this->key_value_metadata.begin(); _iter363 != this->key_value_metadata.end(); ++_iter363) { - xfer += (*_iter369).write(oprot); + xfer += (*_iter363).write(oprot); } xfer += oprot->writeListEnd(); } @@ -5164,10 +5131,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter370; - for (_iter370 = this->column_orders.begin(); _iter370 != this->column_orders.end(); ++_iter370) + std::vector ::const_iterator _iter364; + for (_iter364 = this->column_orders.begin(); _iter364 != this->column_orders.end(); ++_iter364) { - xfer += (*_iter370).write(oprot); + xfer += (*_iter364).write(oprot); } xfer += oprot->writeListEnd(); } diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index f5d9a77dfb54f..231fe649b99b8 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -173,6 +173,7 @@ set(PARQUET_SRCS exception.cc file_reader.cc file_writer.cc + geometry_statistics.cc level_comparison.cc level_conversion.cc metadata.cc diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 6b71338477af4..60a8a2176b0a8 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -243,9 +243,6 @@ EncodedStatistics ExtractStatsFromHeader(const H& header) { if (stats.__isset.distinct_count) { page_statistics.set_distinct_count(stats.distinct_count); } - if (stats.__isset.geometry_stats) { - page_statistics.set_geometry(FromThrift(stats.geometry_stats)); - } return page_statistics; } diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 40d19d38e10ab..e5aba94033c10 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -792,6 +792,9 @@ class ColumnWriterImpl { // Plain-encoded statistics of the whole chunk virtual EncodedStatistics GetChunkStatistics() = 0; + // Plain-encoded geometry statistics of the whole chunk + virtual EncodedGeometryStatistics GetChunkGeometryStatistics() = 0; + // Merges page statistics into chunk statistics, then resets the values virtual void ResetPageStatistics() = 0; @@ -1104,6 +1107,14 @@ int64_t ColumnWriterImpl::Close() { if (rows_written_ > 0 && chunk_statistics.is_set()) { metadata_->SetStatistics(chunk_statistics); } + + if (descr_->logical_type() != nullptr && descr_->logical_type()->is_geometry()) { + EncodedGeometryStatistics geometry_stats = GetChunkGeometryStatistics(); + if (geometry_stats.is_set()) { + metadata_->SetGeometryStatistics(geometry_stats); + } + } + metadata_->SetKeyValueMetadata(key_value_metadata_); pager_->Close(has_dictionary_, fallback_); } @@ -1224,10 +1235,14 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< // Will be null if not using dictionary, but that's ok current_dict_encoder_ = dynamic_cast*>(current_encoder_.get()); - if (properties->statistics_enabled(descr_->path()) && - (SortOrder::UNKNOWN != descr_->sort_order())) { - page_statistics_ = MakeStatistics(descr_, allocator_); - chunk_statistics_ = MakeStatistics(descr_, allocator_); + if (properties->statistics_enabled(descr_->path())) { + if (SortOrder::UNKNOWN != descr_->sort_order()) { + page_statistics_ = MakeStatistics(descr_, allocator_); + chunk_statistics_ = MakeStatistics(descr_, allocator_); + } + if (descr_->logical_type() != nullptr && descr_->logical_type()->is_geometry()) { + chunk_geometry_statistics_ = std::make_shared(); + } } pages_change_on_record_boundaries_ = properties->data_page_version() == ParquetDataPageVersion::V2 || @@ -1375,6 +1390,12 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< return result; } + EncodedGeometryStatistics GetChunkGeometryStatistics() override { + EncodedGeometryStatistics result; + if (chunk_geometry_statistics_) result = chunk_geometry_statistics_->Encode(); + return result; + } + void ResetPageStatistics() override { if (chunk_statistics_ != nullptr) { chunk_statistics_->Merge(*page_statistics_); @@ -1433,6 +1454,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< DictEncoder* current_dict_encoder_; std::shared_ptr page_statistics_; std::shared_ptr chunk_statistics_; + std::shared_ptr chunk_geometry_statistics_; bool pages_change_on_record_boundaries_; // If writing a sequence of ::arrow::DictionaryArray to the writer, we keep the @@ -1619,6 +1641,11 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< if (page_statistics_ != nullptr) { page_statistics_->Update(values, num_values, num_nulls); } + if constexpr (std::is_same::value) { + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->Update(values, num_values, num_nulls); + } + } } /// \brief Write values with spaces and update page statistics accordingly. @@ -1647,6 +1674,13 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, num_values, num_nulls); } + if constexpr (std::is_same::value) { + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, + num_spaced_values, num_values, + num_nulls); + } + } } }; @@ -1724,6 +1758,12 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( page_statistics_->IncrementNullCount(num_chunk_levels - non_null_count); page_statistics_->IncrementNumValues(non_null_count); page_statistics_->Update(*referenced_dictionary, /*update_counts=*/false); + + if constexpr (std::is_same::value) { + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->Update(*referenced_dictionary); + } + } }; int64_t value_offset = 0; @@ -2227,6 +2267,9 @@ Status TypedColumnWriterImpl::WriteArrowDense( page_statistics_->IncrementNullCount(batch_size - non_null); page_statistics_->IncrementNumValues(non_null); } + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->Update(*data_slice); + } CommitWriteAndCheckPageLimit(batch_size, batch_num_values, batch_size - non_null, check_page); CheckDictionarySizeLimit(); diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 8c40d0b63669f..77fab468bd8df 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -400,11 +400,11 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { return metadata_accessor->key_value_metadata(); } - std::shared_ptr metadata_stats() { + std::shared_ptr metadata_geometry_stats() { ApplicationVersion app_version(this->writer_properties_->created_by()); auto metadata_accessor = ColumnChunkMetaData::Make( metadata_->contents(), this->descr_, default_reader_properties(), &app_version); - return metadata_accessor->statistics(); + return metadata_accessor->geometry_statistics(); } protected: @@ -1838,10 +1838,8 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_DOUBLE_EQ(expected_y, y); } - std::shared_ptr statistics = metadata_stats(); - EXPECT_TRUE(statistics->HasMinMax()); - EXPECT_TRUE(statistics->HasGeometryStatistics()); - const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); + std::shared_ptr geometry_statistics = metadata_geometry_stats(); + ASSERT_TRUE(geometry_statistics != nullptr); std::vector geometry_types = geometry_statistics->GetGeometryTypes(); EXPECT_EQ(1, geometry_types.size()); EXPECT_EQ(1, geometry_types[0]); @@ -1851,19 +1849,6 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_DOUBLE_EQ(100, geometry_statistics->GetYMax()); EXPECT_FALSE(geometry_statistics->HasZ()); EXPECT_FALSE(geometry_statistics->HasM()); - - auto byte_array_statistics = - std::static_pointer_cast(statistics); - double min_x = 0; - double min_y = 0; - double max_x = 0; - double max_y = 0; - GetWKBPointCoordinate(byte_array_statistics->min(), &min_x, &min_y); - GetWKBPointCoordinate(byte_array_statistics->max(), &max_x, &max_y); - EXPECT_DOUBLE_EQ(0, min_x); - EXPECT_DOUBLE_EQ(1, min_y); - EXPECT_DOUBLE_EQ(99, max_x); - EXPECT_DOUBLE_EQ(100, max_y); } void TestWriteAndReadSpaced(ParquetVersion::type version, @@ -1915,10 +1900,8 @@ class TestGeometryValuesWriter : public TestPrimitiveWriter { EXPECT_DOUBLE_EQ(expected_y, y); } - std::shared_ptr statistics = metadata_stats(); - EXPECT_TRUE(statistics->HasMinMax()); - EXPECT_TRUE(statistics->HasGeometryStatistics()); - const GeometryStatistics* geometry_statistics = statistics->geometry_statistics(); + std::shared_ptr geometry_statistics = metadata_geometry_stats(); + ASSERT_TRUE(geometry_statistics != nullptr); std::vector geometry_types = geometry_statistics->GetGeometryTypes(); EXPECT_EQ(1, geometry_types.size()); EXPECT_EQ(1, geometry_types[0]); diff --git a/cpp/src/parquet/geometry_statistics.cc b/cpp/src/parquet/geometry_statistics.cc new file mode 100644 index 0000000000000..e17302aef40e7 --- /dev/null +++ b/cpp/src/parquet/geometry_statistics.cc @@ -0,0 +1,310 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/geometry_statistics.h" +#include + +#include "arrow/array.h" +#include "arrow/type.h" +#include "arrow/util/bit_run_reader.h" +#include "parquet/geometry_util_internal.h" + +using arrow::util::SafeLoad; + +namespace parquet { + +class GeometryStatisticsImpl { + public: + GeometryStatisticsImpl() = default; + GeometryStatisticsImpl(const GeometryStatisticsImpl&) = default; + + bool Equals(const GeometryStatisticsImpl& other) const { + if (is_valid_ != other.is_valid_) { + return false; + } + + if (!is_valid_ && !other.is_valid_) { + return true; + } + + auto geometry_types = bounder_.GeometryTypes(); + auto other_geometry_types = other.bounder_.GeometryTypes(); + if (geometry_types.size() != other_geometry_types.size()) { + return false; + } + + for (size_t i = 0; i < geometry_types.size(); i++) { + if (geometry_types[i] != other_geometry_types[i]) { + return false; + } + } + + return bounder_.Bounds() == other.bounder_.Bounds(); + } + + void Merge(const GeometryStatisticsImpl& other) { + if (!is_valid_ || !other.is_valid_) { + is_valid_ = false; + return; + } + + bounder_.ReadBox(other.bounder_.Bounds()); + bounder_.ReadGeometryTypes(other.bounder_.GeometryTypes()); + } + + void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { + if (!is_valid_) { + return; + } + + geometry::WKBBuffer buf; + try { + for (int64_t i = 0; i < num_values; i++) { + const ByteArray& item = values[i]; + buf.Init(item.ptr, item.len); + bounder_.ReadGeometry(&buf); + } + + bounder_.Flush(); + } catch (ParquetException&) { + is_valid_ = false; + } + } + + void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, int64_t num_spaced_values, + int64_t num_values, int64_t null_count) { + DCHECK_GT(num_spaced_values, 0); + + geometry::WKBBuffer buf; + try { + ::arrow::internal::VisitSetBitRunsVoid( + valid_bits, valid_bits_offset, num_spaced_values, + [&](int64_t position, int64_t length) { + for (int64_t i = 0; i < length; i++) { + ByteArray item = SafeLoad(values + i + position); + buf.Init(item.ptr, item.len); + bounder_.ReadGeometry(&buf); + } + }); + bounder_.Flush(); + } catch (ParquetException&) { + is_valid_ = false; + } + } + + void Update(const ::arrow::Array& values) { + const auto& binary_array = static_cast(values); + geometry::WKBBuffer buf; + try { + for (int64_t i = 0; i < binary_array.length(); ++i) { + if (!binary_array.IsNull(i)) { + std::string_view byte_array = binary_array.GetView(i); + buf.Init(reinterpret_cast(byte_array.data()), + byte_array.length()); + bounder_.ReadGeometry(&buf); + bounder_.Flush(); + } + } + } catch (ParquetException&) { + is_valid_ = false; + } + } + + void Reset() { + bounder_.Reset(); + is_valid_ = true; + } + + EncodedGeometryStatistics Encode() const { + const double* mins = bounder_.Bounds().min; + const double* maxes = bounder_.Bounds().max; + + EncodedGeometryStatistics out; + out.geometry_types = bounder_.GeometryTypes(); + + out.xmin = mins[0]; + out.xmax = maxes[0]; + out.ymin = mins[1]; + out.ymax = maxes[1]; + out.zmin = mins[2]; + out.zmax = maxes[2]; + out.mmin = mins[3]; + out.mmax = maxes[3]; + + return out; + } + + std::string EncodeMin() const { + const double* mins = bounder_.Bounds().min; + bool has_z = !std::isinf(mins[2]); + bool has_m = !std::isinf(mins[3]); + return geometry::MakeWKBPoint(mins, has_z, has_m); + } + + std::string EncodeMax() const { + const double* maxes = bounder_.Bounds().max; + bool has_z = !std::isinf(maxes[2]); + bool has_m = !std::isinf(maxes[3]); + return geometry::MakeWKBPoint(maxes, has_z, has_m); + } + + void Update(const EncodedGeometryStatistics& encoded) { + if (!is_valid_) { + return; + } + + geometry::BoundingBox box; + box.min[0] = encoded.xmin; + box.max[0] = encoded.xmax; + box.min[1] = encoded.ymin; + box.max[1] = encoded.ymax; + + if (encoded.has_z()) { + box.min[2] = encoded.zmin; + box.max[2] = encoded.zmax; + } + + if (encoded.has_m()) { + box.min[3] = encoded.mmin; + box.max[3] = encoded.mmax; + } + + bounder_.ReadBox(box); + bounder_.ReadGeometryTypes(encoded.geometry_types); + } + + bool is_valid() const { return is_valid_; } + + const double* GetMinBounds() { return bounder_.Bounds().min; } + + const double* GetMaxBounds() { return bounder_.Bounds().max; } + + std::vector GetGeometryTypes() const { return bounder_.GeometryTypes(); } + + private: + geometry::WKBGeometryBounder bounder_; + bool is_valid_ = true; +}; + +GeometryStatistics::GeometryStatistics() + : impl_(std::make_unique()) {} + +GeometryStatistics::GeometryStatistics(std::unique_ptr impl) + : impl_(std::move(impl)) {} + +GeometryStatistics::GeometryStatistics(const EncodedGeometryStatistics& encoded) + : GeometryStatistics() { + Decode(encoded); +} + +GeometryStatistics::GeometryStatistics(GeometryStatistics&&) = default; + +GeometryStatistics::~GeometryStatistics() = default; + +bool GeometryStatistics::Equals(const GeometryStatistics& other) const { + return impl_->Equals(*other.impl_); +} + +void GeometryStatistics::Merge(const GeometryStatistics& other) { + impl_->Merge(*other.impl_); +} + +void GeometryStatistics::Update(const ByteArray* values, int64_t num_values, + int64_t null_count) { + impl_->Update(values, num_values, null_count); +} + +void GeometryStatistics::UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, + int64_t num_spaced_values, int64_t num_values, + int64_t null_count) { + impl_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, + num_values, null_count); +} + +void GeometryStatistics::Update(const ::arrow::Array& values) { impl_->Update(values); } + +void GeometryStatistics::Reset() { impl_->Reset(); } + +bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } + +EncodedGeometryStatistics GeometryStatistics::Encode() const { return impl_->Encode(); } + +std::string GeometryStatistics::EncodeMin() const { return impl_->EncodeMin(); } + +std::string GeometryStatistics::EncodeMax() const { return impl_->EncodeMax(); } + +void GeometryStatistics::Decode(const EncodedGeometryStatistics& encoded) { + impl_->Update(encoded); +} + +std::shared_ptr GeometryStatistics::clone() const { + std::unique_ptr impl = + std::make_unique(*impl_); + return std::make_shared(std::move(impl)); +} + +double GeometryStatistics::GetXMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[0]; +} + +double GeometryStatistics::GetXMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[0]; +} + +double GeometryStatistics::GetYMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[1]; +} + +double GeometryStatistics::GetYMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[1]; +} + +double GeometryStatistics::GetZMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[2]; +} + +double GeometryStatistics::GetZMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[2]; +} + +double GeometryStatistics::GetMMin() const { + const double* mins = impl_->GetMinBounds(); + return mins[3]; +} + +double GeometryStatistics::GetMMax() const { + const double* maxes = impl_->GetMaxBounds(); + return maxes[3]; +} + +bool GeometryStatistics::HasZ() const { return (GetZMax() - GetZMin()) > 0; } + +bool GeometryStatistics::HasM() const { return (GetMMax() - GetMMin()) > 0; } + +std::vector GeometryStatistics::GetGeometryTypes() const { + return impl_->GetGeometryTypes(); +} + +} // namespace parquet diff --git a/cpp/src/parquet/geometry_statistics.h b/cpp/src/parquet/geometry_statistics.h new file mode 100644 index 0000000000000..7cd21350e1bc1 --- /dev/null +++ b/cpp/src/parquet/geometry_statistics.h @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "parquet/platform.h" +#include "parquet/types.h" + +namespace parquet { + +class PARQUET_EXPORT EncodedGeometryStatistics { + public: + static constexpr double kInf = std::numeric_limits::infinity(); + + EncodedGeometryStatistics() = default; + EncodedGeometryStatistics(const EncodedGeometryStatistics&) = default; + EncodedGeometryStatistics(EncodedGeometryStatistics&&) = default; + EncodedGeometryStatistics& operator=(const EncodedGeometryStatistics&) = default; + + double xmin{kInf}; + double xmax{-kInf}; + double ymin{kInf}; + double ymax{-kInf}; + double zmin{kInf}; + double zmax{-kInf}; + double mmin{kInf}; + double mmax{-kInf}; + std::vector geometry_types; + + bool has_z() const { return (zmax - zmin) >= 0; } + + bool has_m() const { return (mmax - mmin) >= 0; } + + bool is_set() const { return !geometry_types.empty(); } +}; + +class GeometryStatisticsImpl; + +class PARQUET_EXPORT GeometryStatistics { + public: + GeometryStatistics(); + explicit GeometryStatistics(std::unique_ptr impl); + explicit GeometryStatistics(const EncodedGeometryStatistics& encoded); + GeometryStatistics(GeometryStatistics&&); + + ~GeometryStatistics(); + + bool Equals(const GeometryStatistics& other) const; + + void Merge(const GeometryStatistics& other); + + void Update(const ByteArray* values, int64_t num_values, int64_t null_count); + + void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, int64_t num_spaced_values, + int64_t num_values, int64_t null_count); + + void Update(const ::arrow::Array& values); + + void Reset(); + + EncodedGeometryStatistics Encode() const; + std::string EncodeMin() const; + std::string EncodeMax() const; + + bool is_valid() const; + + std::shared_ptr clone() const; + + void Decode(const EncodedGeometryStatistics& encoded); + + double GetXMin() const; + double GetXMax() const; + double GetYMin() const; + double GetYMax() const; + double GetZMin() const; + double GetZMax() const; + double GetMMin() const; + double GetMMax() const; + + bool HasZ() const; + bool HasM() const; + + std::vector GetGeometryTypes() const; + + private: + std::unique_ptr impl_; +}; + +} // namespace parquet diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 0aa04e6f912ab..b1fccb6f777ab 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -93,12 +93,6 @@ template static std::shared_ptr MakeTypedColumnStats( const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { // If ColumnOrder is defined, return max_value and min_value - EncodedGeometryStatistics encoded_geometry_stats; - const EncodedGeometryStatistics* geometry_statistics = nullptr; - if (metadata.statistics.__isset.geometry_stats) { - encoded_geometry_stats = FromThrift(metadata.statistics.geometry_stats); - geometry_statistics = &encoded_geometry_stats; - } if (descr->column_order().get_order() == ColumnOrder::TYPE_DEFINED_ORDER) { return MakeStatistics( descr, metadata.statistics.min_value, metadata.statistics.max_value, @@ -106,8 +100,7 @@ static std::shared_ptr MakeTypedColumnStats( metadata.statistics.null_count, metadata.statistics.distinct_count, metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value, metadata.statistics.__isset.null_count, - metadata.statistics.__isset.distinct_count, ::arrow::default_memory_pool(), - geometry_statistics); + metadata.statistics.__isset.distinct_count, ::arrow::default_memory_pool()); } // Default behavior return MakeStatistics( @@ -116,7 +109,18 @@ static std::shared_ptr MakeTypedColumnStats( metadata.statistics.null_count, metadata.statistics.distinct_count, metadata.statistics.__isset.max && metadata.statistics.__isset.min, metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, - ::arrow::default_memory_pool(), geometry_statistics); + ::arrow::default_memory_pool()); +} + +static std::shared_ptr MakeColumnGeometryStats( + const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { + if (metadata.__isset.geometry_stats) { + EncodedGeometryStatistics encoded_geometry_stats = + FromThrift(metadata.geometry_stats); + return std::make_shared(encoded_geometry_stats); + } else { + return nullptr; + } } std::shared_ptr MakeColumnStats(const format::ColumnMetaData& meta_data, @@ -275,6 +279,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { encoding_stats.count}); } possible_stats_ = nullptr; + possible_geometry_stats_ = nullptr; InitKeyValueMetadata(); } @@ -313,10 +318,22 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { descr_->sort_order()); } + inline bool is_geometry_stats_set() const { + DCHECK(writer_version_ != nullptr); + if (possible_geometry_stats_ == nullptr && column_metadata_->__isset.geometry_stats) { + possible_geometry_stats_ = MakeColumnGeometryStats(*column_metadata_, descr_); + } + return possible_geometry_stats_ != nullptr && possible_geometry_stats_->is_valid(); + } + inline std::shared_ptr statistics() const { return is_stats_set() ? possible_stats_ : nullptr; } + inline std::shared_ptr geometry_statistics() const { + return is_geometry_stats_set() ? possible_geometry_stats_ : nullptr; + } + inline Compression::type compression() const { return LoadEnumSafe(&column_metadata_->codec); } @@ -396,6 +413,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { } mutable std::shared_ptr possible_stats_; + mutable std::shared_ptr possible_geometry_stats_; std::vector encodings_; std::vector encoding_stats_; const format::ColumnChunk* column_; @@ -455,8 +473,16 @@ std::shared_ptr ColumnChunkMetaData::statistics() const { return impl_->statistics(); } +std::shared_ptr ColumnChunkMetaData::geometry_statistics() const { + return impl_->geometry_statistics(); +} + bool ColumnChunkMetaData::is_stats_set() const { return impl_->is_stats_set(); } +bool ColumnChunkMetaData::is_geometry_stats_set() const { + return impl_->is_geometry_stats_set(); +} + std::optional ColumnChunkMetaData::bloom_filter_offset() const { return impl_->bloom_filter_offset(); } @@ -1577,6 +1603,10 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { column_chunk_->meta_data.__set_statistics(ToThrift(val)); } + void SetGeometryStatistics(const EncodedGeometryStatistics& val) { + column_chunk_->meta_data.__set_geometry_stats(ToThrift(val)); + } + void Finish(int64_t num_values, int64_t dictionary_page_offset, int64_t index_page_offset, int64_t data_page_offset, int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary, @@ -1786,6 +1816,11 @@ void ColumnChunkMetaDataBuilder::SetStatistics(const EncodedStatistics& result) impl_->SetStatistics(result); } +void ColumnChunkMetaDataBuilder::SetGeometryStatistics( + const EncodedGeometryStatistics& result) { + impl_->SetGeometryStatistics(result); +} + void ColumnChunkMetaDataBuilder::SetKeyValueMetadata( std::shared_ptr key_value_metadata) { impl_->SetKeyValueMetadata(std::move(key_value_metadata)); diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index d1e2d1904a694..96ebfa2009c12 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -35,8 +35,10 @@ namespace parquet { class ColumnDescriptor; class EncodedStatistics; +class EncodedGeometryStatistics; class FileCryptoMetaData; class Statistics; +class GeometryStatistics; class SchemaDescriptor; namespace schema { @@ -163,7 +165,9 @@ class PARQUET_EXPORT ColumnChunkMetaData { int64_t num_values() const; std::shared_ptr path_in_schema() const; bool is_stats_set() const; + bool is_geometry_stats_set() const; std::shared_ptr statistics() const; + std::shared_ptr geometry_statistics() const; Compression::type compression() const; // Indicate if the ColumnChunk compression is supported by the current @@ -471,6 +475,9 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder { // column metadata void SetStatistics(const EncodedStatistics& stats); + // column geometry statistics + void SetGeometryStatistics(const EncodedGeometryStatistics& geometry_stats); + void SetKeyValueMetadata(std::shared_ptr key_value_metadata); // get the column descriptor diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc index 7ee9761d35635..afda4c6064b36 100644 --- a/cpp/src/parquet/page_index.cc +++ b/cpp/src/parquet/page_index.cc @@ -99,9 +99,7 @@ class TypedColumnIndexImpl : public TypedColumnIndex { column_index_.min_values.size() != num_pages || column_index_.max_values.size() != num_pages || (column_index_.__isset.null_counts && - column_index_.null_counts.size() != num_pages) || - (column_index_.__isset.geometry_stats && - column_index_.geometry_stats.size() != num_pages)) { + column_index_.null_counts.size() != num_pages)) { throw ParquetException("Invalid column index"); } @@ -129,20 +127,6 @@ class TypedColumnIndexImpl : public TypedColumnIndex { } } DCHECK_EQ(num_non_null_pages, non_null_page_indices_.size()); - - // Decode geometry statistics. - // Note that null pages are skipped. - if (column_index_.__isset.geometry_stats) { - geometry_statistics_.reserve(num_pages); - for (size_t i = 0; i < num_pages; ++i) { - if (!column_index_.null_pages[i]) { - auto encoded_geom_stat = FromThrift(column_index_.geometry_stats[i]); - GeometryStatistics geom_stat; - geom_stat.Decode(encoded_geom_stat); - geometry_statistics_.push_back(std::move(geom_stat)); - } - } - } } const std::vector& null_pages() const override { @@ -175,10 +159,6 @@ class TypedColumnIndexImpl : public TypedColumnIndex { const std::vector& max_values() const override { return max_values_; } - const std::vector& geometry_statistics() const override { - return geometry_statistics_; - } - private: /// Wrapped thrift column index. const format::ColumnIndex column_index_; @@ -187,8 +167,6 @@ class TypedColumnIndexImpl : public TypedColumnIndex { std::vector max_values_; /// A list of page indices for non-null pages. std::vector non_null_page_indices_; - /// A list of geometry statistics - std::vector geometry_statistics_; }; class OffsetIndexImpl : public OffsetIndex { @@ -515,11 +493,6 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { column_index_.__isset.null_counts = false; column_index_.null_counts.clear(); } - - if (stats.has_geometry_statistics) { - column_index_.__isset.geometry_stats = true; - column_index_.geometry_stats.emplace_back(ToThrift(stats.geometry_statistics())); - } } void Finish() override { diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h index 3b0174ed0403a..1c45d6ffd8836 100644 --- a/cpp/src/parquet/page_index.h +++ b/cpp/src/parquet/page_index.h @@ -98,12 +98,6 @@ class PARQUET_EXPORT TypedColumnIndex : public ColumnIndex { /// /// Just like `min_values`, but for upper bounds instead of lower bounds. virtual const std::vector& max_values() const = 0; - - /// \brief A vector of geometry statistics for each data page in this column. - /// - /// This is like `min_values` and `max_values`, but for geometry statistics - /// instead of lower/upper bounds - virtual const std::vector& geometry_statistics() const = 0; }; using BoolColumnIndex = TypedColumnIndex; diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc index 2e780e17981e0..4db49b4267415 100644 --- a/cpp/src/parquet/page_index_test.cc +++ b/cpp/src/parquet/page_index_test.cc @@ -23,13 +23,10 @@ #include "arrow/io/file.h" #include "arrow/util/float16.h" #include "parquet/file_reader.h" -#include "parquet/geometry_util_internal.h" #include "parquet/metadata.h" #include "parquet/schema.h" -#include "parquet/statistics.h" #include "parquet/test_util.h" #include "parquet/thrift_internal.h" -#include "parquet/types.h" namespace parquet { @@ -492,21 +489,6 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, if (has_null_counts) { ASSERT_EQ(page_stats[i].null_count, column_index->null_counts()[i]); } - if (page_stats[i].has_geometry_statistics) { - const auto& expected_stats = page_stats[i].geometry_statistics(); - const auto* byte_array_column_index = - static_cast(column_index.get()); - const auto& actual_stats = byte_array_column_index->geometry_statistics()[i]; - ASSERT_EQ(expected_stats.geometry_types, actual_stats.GetGeometryTypes()); - ASSERT_DOUBLE_EQ(expected_stats.xmin, actual_stats.GetXMin()); - ASSERT_DOUBLE_EQ(expected_stats.xmax, actual_stats.GetXMax()); - ASSERT_DOUBLE_EQ(expected_stats.ymin, actual_stats.GetYMin()); - ASSERT_DOUBLE_EQ(expected_stats.ymax, actual_stats.GetYMax()); - ASSERT_DOUBLE_EQ(expected_stats.zmin, actual_stats.GetZMin()); - ASSERT_DOUBLE_EQ(expected_stats.zmax, actual_stats.GetZMax()); - ASSERT_DOUBLE_EQ(expected_stats.mmin, actual_stats.GetMMin()); - ASSERT_DOUBLE_EQ(expected_stats.mmax, actual_stats.GetMMax()); - } } } } @@ -619,36 +601,6 @@ TEST(PageIndex, WriteFloat16ColumnIndex) { /*has_null_counts=*/false); } -TEST(PageIndex, WriteGeometryColumnIndex) { - std::vector page_stats(3); - - EncodedGeometryStatistics geom_stats[3]; - std::string dummy_min = "dummy_min"; - std::string dummy_max = "dummy_max"; - for (int i = 0; i < 3; i++) { - geom_stats[i].xmin = i + 1; - geom_stats[i].xmax = i + 2; - geom_stats[i].ymin = i + 3; - geom_stats[i].ymax = i + 4; - geom_stats[i].zmin = i + 5; - geom_stats[i].zmax = i + 6; - geom_stats[i].mmin = i + 7; - geom_stats[i].mmax = i + 8; - geom_stats[i].geometry_types = {i + 1}; - page_stats.at(i).set_min(dummy_min).set_max(dummy_max); - page_stats.at(i).set_geometry(geom_stats[i]); - } - - schema::NodePtr node = schema::PrimitiveNode::Make( - "c1", Repetition::OPTIONAL, - GeometryLogicalType::Make("OGC:CRS84", LogicalType::GeometryEdges::PLANAR, - LogicalType::GeometryEncoding::WKB), - Type::BYTE_ARRAY); - - TestWriteTypedColumnIndex(node, page_stats, BoundaryOrder::Ascending, - /*has_null_counts=*/false); -} - TEST(PageIndex, WriteColumnIndexWithAllNullPages) { // All values are null. std::vector page_stats(3); diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index f0ed1483fde13..074f13be6000f 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -61,14 +61,14 @@ enum ConvertedType { * values */ LIST = 3; - /** an enum is converted into a binary field */ + /** an enum is converted into a BYTE_ARRAY field */ ENUM = 4; /** * A decimal value. * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. * @@ -159,7 +159,7 @@ enum ConvertedType { /** * An embedded BSON document * - * A BSON document embedded within a single BINARY column. + * A BSON document embedded within a single BYTE_ARRAY column. */ BSON = 20; @@ -182,10 +182,10 @@ enum ConvertedType { * Representation of Schemas */ enum FieldRepetitionType { - /** This field is required (can not be null) and each record has exactly 1 value. */ + /** This field is required (can not be null) and each row has exactly 1 value. */ REQUIRED = 0; - /** The field is optional (can be null) and each record has 0 or 1 values. */ + /** The field is optional (can be null) and each row has 0 or 1 values. */ OPTIONAL = 1; /** The field is repeated and can contain 0 or more values */ @@ -238,72 +238,26 @@ struct SizeStatistics { 3: optional list definition_level_histogram; } -/** - * Physical type and encoding for the geometry type. - */ -enum GeometryEncoding { - /** - * Allowed for physical type: BYTE_ARRAY. - * - * Well-known binary (WKB) representations of geometries. - * - * To be clear, we follow the same rule of WKB and coordinate axis order from - * GeoParquet [1][2]. Geometries SHOULD be encoded as ISO WKB [3][4] - * supporting XY, XYZ, XYM, XYZM and the standard geometry types - * Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, - * and GeometryCollection). Coordinate order is always (x, y) where x is - * easting or longitude and y is northing or latitude. This ordering explicitly - * overrides the axis order as specified in the CRS following the GeoPackage - * specification [5]. - * - * This is the preferred encoding for maximum portability. It also supports - * GeometryStatistics to be set in the column chunk and page index. - * - * [1] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L92 - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L155 - * [3] https://portal.ogc.org/files/?artifact_id=18241 - * [4] https://www.iso.org/standard/60343.html - * [5] https://www.geopackage.org/spec130/#gpb_spec - */ - WKB = 0; -} - -/** - * Interpretation for edges of elements of a GEOMETRY logical type. In other - * words, whether a point between two vertices should be interpolated in - * its XY dimensions as if it were a Cartesian line connecting the two - * vertices (planar) or the shortest spherical arc between the longitude - * and latitude represented by the two vertices (spherical). This value - * applies to all non-point geometry objects and is independent of the - * coordinate reference system. - * - * Because most systems currently assume planar edges and do not support - * spherical edges, planar should be used as the default value. - */ -enum Edges { - PLANAR = 0; - SPHERICAL = 1; -} - /** * Bounding box of geometries in the representation of min/max value pair of - * coordinates from each axis when Edges is planar. Values of Z and M are omitted - * for 2D geometries. When Edges is spherical, the bounding box is in the form of - * [westmost, eastmost, southmost, northmost], with necessary min/max values for - * Z and M if needed. + * coordinates from each axis. */ struct BoundingBox { - /** Westmost value if edges = spherical **/ + /** Min X value when edges = PLANAR, westmost value if edges = SPHERICAL */ 1: required double xmin; - /** Eastmost value if edges = spherical **/ + /** Max X value when edges = PLANAR, eastmost value if edges = SPHERICAL */ 2: required double xmax; - /** Southmost value if edges = spherical **/ + /** Min Y value when edges = PLANAR, southmost value if edges = SPHERICAL */ 3: required double ymin; - /** Northmost value if edges = spherical **/ + /** Max Y value when edges = PLANAR, northmost value if edges = SPHERICAL */ 4: required double ymax; + /** Min Z value if the axis exists */ 5: optional double zmin; + /** Max Z value if the axis exists */ 6: optional double zmax; + /** Min M value if the axis exists */ 7: optional double mmin; + /** Max M value if the axis exists */ 8: optional double mmax; } @@ -311,34 +265,7 @@ struct BoundingBox { struct GeometryStatistics { /** A bounding box of geometries */ 1: optional BoundingBox bbox; - - /** - * The geometry types of all geometries, or an empty array if they are not - * known. This is borrowed from `geometry_types` column metadata of GeoParquet [1] - * except that values in the list are WKB (ISO variant) integer codes [2]. Table - * below shows the most common geometry types and their codes: - * - * | Type | XY | XYZ | XYM | XYZM | - * | :----------------- | :--- | :--- | :--- | :--: | - * | Point | 0001 | 1001 | 2001 | 3001 | - * | LineString | 0002 | 1002 | 2002 | 3002 | - * | Polygon | 0003 | 1003 | 2003 | 3003 | - * | MultiPoint | 0004 | 1004 | 2004 | 3004 | - * | MultiLineString | 0005 | 1005 | 2005 | 3005 | - * | MultiPolygon | 0006 | 1006 | 2006 | 3006 | - * | GeometryCollection | 0007 | 1007 | 2007 | 3007 | - * - * In addition, the following rules are used: - * - A list of multiple values indicates that multiple geometry types are - * present (e.g. `[0003, 0006]`). - * - An empty array explicitly signals that the geometry types are not known. - * - The geometry types in the list must be unique (e.g. `[0001, 0001]` - * is not valid). - * - * Please refer to links below for more detail: - * [1] https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary - * [2] https://github.com/opengeospatial/geoparquet/blob/v1.1.0/format-specs/geoparquet.md?plain=1#L159 - */ + /** Geometry type codes of all geometries, or an empty list if not known */ 2: optional list geometry_types; } @@ -362,7 +289,14 @@ struct Statistics { */ 1: optional binary max; 2: optional binary min; - /** count of null value in the column */ + /** + * Count of null values in the column. + * + * Writers SHOULD always write this field even if it is zero (i.e. no null value) + * or the column is not nullable. + * Readers MUST distinguish between null_count not being present and null_count == 0. + * If null_count is not present, readers MUST NOT assume null_count == 0. + */ 3: optional i64 null_count; /** count of distinct values occurring */ 4: optional i64 distinct_count; @@ -384,17 +318,14 @@ struct Statistics { 7: optional bool is_max_value_exact; /** If true, min_value is the actual minimum value for a column */ 8: optional bool is_min_value_exact; - - /** statistics specific to geometry logical type */ - 9: optional GeometryStatistics geometry_stats; } /** Empty structs to use as logical type annotations */ -struct StringType {} // allowed for BINARY, must be encoded with UTF-8 +struct StringType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes struct MapType {} // see LogicalTypes.md struct ListType {} // see LogicalTypes.md -struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 +struct EnumType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 struct DateType {} // allowed for INT32 struct Float16Type {} // allowed for FIXED[2], must encoded raw FLOAT16 bytes @@ -416,7 +347,7 @@ struct NullType {} // allowed for any physical type, only null values stored * To maintain forward-compatibility in v1, implementations using this logical * type must also set scale and precision on the annotated SchemaElement. * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY + * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ struct DecimalType { 1: required i32 scale @@ -468,7 +399,7 @@ struct IntType { /** * Embedded JSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ struct JsonType { } @@ -476,43 +407,42 @@ struct JsonType { /** * Embedded BSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ struct BsonType { } +/** Physical type and encoding for the geometry type */ +enum GeometryEncoding { + /** + * Allowed for physical type: BYTE_ARRAY. + * + * Well-known binary (WKB) representations of geometries. + */ + WKB = 0; +} + +/** Interpretation for edges of elements of a GEOMETRY type */ +enum Edges { + PLANAR = 0; + SPHERICAL = 1; +} + /** - * Geometry logical type annotation (added in 2.11.0) + * GEOMETRY logical type annotation (added in 2.11.0) + * + * GeometryEncoding and Edges are required. In order to correctly interpret + * geometry data, writer implementations SHOULD always them, and reader + * implementations SHOULD fail for unknown values. + * + * CRS is optional. Once CRS is set, it MUST be a key to an entry in the + * `key_value_metadata` field of `FileMetaData`. + * + * See LogicalTypes.md for detail. */ struct GeometryType { - /** - * Physical type and encoding for the geometry type. - * Please refer to the definition of GeometryEncoding for more detail. - */ 1: required GeometryEncoding encoding; - /** - * Interpretation for edges of elements of a GEOMETRY logical type, i.e. whether - * the interpolation between points along an edge represents a straight cartesian - * line or the shortest line on the sphere. - * Please refer to the definition of Edges for more detail. - */ 2: required Edges edges; - /** - * CRS (coordinate reference system) is a mapping of how coordinates refer to - * precise locations on earth. A crs is specified by a string, which is a Parquet - * file metadata field whose value is the crs representation. An additional field - * with the suffix '.type' describes the encoding of this CRS representation. - * - * For example, if a geometry column (e.g., 'geom1') uses the CRS 'OGC:CRS84', the - * writer may create 2 file metadata fields: 'geom1_crs' and 'geom1_crs.type', and - * set the 'crs' field to 'geom1_crs'. The 'geom1_crs' field will contain the - * PROJJSON representation of OGC:CRS84 - * (https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md#ogccrs84-details), - * and the 'geom1_crs.type' field will contain the string 'PROJJSON'. - * - * Multiple geometry columns can refer to the same CRS metadata field - * (e.g., 'geom1_crs') if they share the same CRS. - */ 3: optional string crs; } @@ -722,7 +652,13 @@ enum BoundaryOrder { /** Data page header */ struct DataPageHeader { - /** Number of values, including NULLs, in this data page. **/ + /** + * Number of values, including NULLs, in this data page. + * + * If a OffsetIndex is present, a page must begin at a row + * boundary (repetition_level = 0). Otherwise, pages may begin + * within a row (repetition_level > 0). + **/ 1: required i32 num_values /** Encoding used for this data page **/ @@ -769,7 +705,11 @@ struct DataPageHeaderV2 { /** Number of NULL values, in this data page. Number of non-null = num_values - num_nulls which is also the number of values in the data section **/ 2: required i32 num_nulls - /** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/ + /** + * Number of rows in this data page. Every page must begin at a + * row boundary (repetition_level = 0): rows must **not** be + * split across page boundaries when using V2 data pages. + **/ 3: required i32 num_rows /** Encoding used for data in this page **/ 4: required Encoding encoding @@ -977,6 +917,9 @@ struct ColumnMetaData { * filter pushdown. */ 16: optional SizeStatistics size_statistics; + + /** Optional statistics specific to GEOMETRY logical type */ + 17: optional GeometryStatistics geometry_stats; } struct EncryptionWithFooterKey { @@ -1001,12 +944,21 @@ struct ColumnChunk { **/ 1: optional string file_path - /** Byte offset in file_path to the ColumnMetaData **/ - 2: required i64 file_offset + /** Deprecated: Byte offset in file_path to the ColumnMetaData + * + * Past use of this field has been inconsistent, with some implementations + * using it to point to the ColumnMetaData and some using it to point to + * the first page in the column chunk. In many cases, the ColumnMetaData at this + * location is wrong. This field is now deprecated and should not be used. + * Writers should set this field to 0 if no ColumnMetaData has been written outside + * the footer. + */ + 2: required i64 file_offset = 0 - /** Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. + /** Column metadata for this chunk. Some writers may also replicate this at the + * location pointed to by file_path/file_offset. + * Note: while marked as optional, this field is in fact required by most major + * Parquet implementations. As such, writers MUST populate this field. **/ 3: optional ColumnMetaData meta_data @@ -1098,7 +1050,7 @@ union ColumnOrder { * ENUM - unsigned byte-wise comparison * LIST - undefined * MAP - undefined - * GEOMETRY - undefined, use GeometryStatistics instead. + * GEOMETRY - undefined * * In the absence of logical types, the sort order is determined by the physical type: * BOOLEAN - false, true @@ -1140,8 +1092,9 @@ struct PageLocation { 2: required i32 compressed_page_size /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). + * Index within the RowGroup of the first row of the page. When an + * OffsetIndex is present, pages must begin on row boundaries + * (repetition_level = 0). */ 3: required i64 first_row_index } @@ -1209,7 +1162,16 @@ struct ColumnIndex { */ 4: required BoundaryOrder boundary_order - /** A list containing the number of null values for each page **/ + /** + * A list containing the number of null values for each page + * + * Writers SHOULD always write this field even if no null values + * are present or the column is not nullable. + * Readers MUST distinguish between null_counts not being present + * and null_count being 0. + * If null_counts are not present, readers MUST NOT assume all + * null counts are 0. + */ 5: optional list null_counts /** @@ -1229,9 +1191,6 @@ struct ColumnIndex { * Same as repetition_level_histograms except for definitions levels. **/ 7: optional list definition_level_histograms; - - /** A list containing statistics of GEOMETRY logical type for each page */ - 8: optional list geometry_stats; } struct AesGcmV1 { diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 9f15c8670a3a6..f17f383d72e03 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -44,6 +44,7 @@ #include "parquet/column_writer.h" #include "parquet/file_reader.h" #include "parquet/file_writer.h" +#include "parquet/geometry_statistics.h" #include "parquet/metadata.h" #include "parquet/page_index.h" #include "parquet/platform.h" @@ -1912,22 +1913,8 @@ class TestGeometryLogicalType : public ::testing::Test { for (int i = 0; i < num_row_groups; i++) { auto row_group_metadata = metadata->RowGroup(i); auto column_chunk_metadata = row_group_metadata->ColumnChunk(0); - auto statistics = column_chunk_metadata->statistics(); - CheckStatistics(statistics); - - if (enable_write_page_index) { - // Check column index - auto row_group_index_reader = page_index_reader->RowGroup(i); - auto column_index = row_group_index_reader->GetColumnIndex(0); - auto geometry_column_index = - std::static_pointer_cast(column_index); - CheckColumnIndex(geometry_column_index); - } else { - // Check per-page statistics - auto row_group_reader = file_reader->RowGroup(i); - auto page_reader = row_group_reader->GetColumnPageReader(0); - CheckPageStatistics(page_reader.get()); - } + auto geometry_stats = column_chunk_metadata->geometry_statistics(); + CheckGeometryStatistics(geometry_stats); } // Check the geometry values @@ -1962,10 +1949,8 @@ class TestGeometryLogicalType : public ::testing::Test { EXPECT_EQ(kNumRows, total_values_read); } - void CheckStatistics(std::shared_ptr statistics) { - EXPECT_TRUE(statistics->HasMinMax()); - EXPECT_TRUE(statistics->HasGeometryStatistics()); - const GeometryStatistics* geom_stats = statistics->geometry_statistics(); + void CheckGeometryStatistics(std::shared_ptr geom_stats) { + ASSERT_TRUE(geom_stats != nullptr); std::vector geometry_types = geom_stats->GetGeometryTypes(); EXPECT_EQ(1, geometry_types.size()); EXPECT_EQ(1, geometry_types[0]); @@ -1977,65 +1962,6 @@ class TestGeometryLogicalType : public ::testing::Test { EXPECT_FALSE(geom_stats->HasM()); } - void CheckColumnIndex(std::shared_ptr geometry_column_index) { - EXPECT_FALSE(geometry_column_index->geometry_statistics().empty()); - double last_xmin = -geometry::kInf; - double last_ymin = -geometry::kInf; - - size_t num_pages = geometry_column_index->geometry_statistics().size(); - EXPECT_GT(num_pages, 0); - for (size_t i = 0; i < num_pages; i++) { - const auto& geom_stats = geometry_column_index->geometry_statistics()[i]; - std::vector geometry_types = geom_stats.GetGeometryTypes(); - EXPECT_EQ(1, geometry_types.size()); - EXPECT_EQ(1, geometry_types[0]); - EXPECT_GE(geom_stats.GetXMin(), last_xmin); - EXPECT_GT(geom_stats.GetXMax(), geom_stats.GetXMin()); - EXPECT_GT(geom_stats.GetYMin(), last_ymin); - EXPECT_GT(geom_stats.GetYMax(), geom_stats.GetYMin()); - EXPECT_FALSE(geom_stats.HasZ()); - EXPECT_FALSE(geom_stats.HasM()); - last_xmin = geom_stats.GetXMin(); - last_ymin = geom_stats.GetYMin(); - - const auto& min = geometry_column_index->min_values()[i]; - const auto& max = geometry_column_index->max_values()[i]; - double min_x = 0; - double min_y = 0; - double max_x = 0; - double max_y = 0; - test::GetWKBPointCoordinate(min, &min_x, &min_y); - test::GetWKBPointCoordinate(max, &max_x, &max_y); - EXPECT_DOUBLE_EQ(geom_stats.GetXMin(), min_x); - EXPECT_DOUBLE_EQ(geom_stats.GetYMin(), min_y); - EXPECT_DOUBLE_EQ(geom_stats.GetXMax(), max_x); - EXPECT_DOUBLE_EQ(geom_stats.GetYMax(), max_y); - } - } - - void CheckPageStatistics(PageReader* page_reader) { - while (true) { - auto page = page_reader->NextPage(); - if (!page) { - break; // No more pages - } - // Check if the page has statistics - if (page->type() == parquet::PageType::DATA_PAGE || - page->type() == parquet::PageType::DATA_PAGE_V2) { - std::shared_ptr data_page = - std::static_pointer_cast(page); - const EncodedStatistics& statistics = data_page->statistics(); - EXPECT_TRUE(statistics.has_geometry_statistics); - EncodedGeometryStatistics geom_stats = statistics.geometry_statistics(); - EXPECT_EQ(1, geom_stats.geometry_types.size()); - EXPECT_GE(geom_stats.xmin, 0); - EXPECT_GT(geom_stats.xmax, geom_stats.xmin); - EXPECT_GT(geom_stats.ymin, 0); - EXPECT_GT(geom_stats.ymax, geom_stats.ymin); - } - } - } - protected: std::shared_ptr file_buf; }; diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index 31268c118d895..c888279a63df8 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1599,7 +1599,7 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) { {LogicalType::BSON(), SortOrder::UNSIGNED}, {LogicalType::UUID(), SortOrder::UNSIGNED}, {LogicalType::Float16(), SortOrder::SIGNED}, - {LogicalType::Geometry(), SortOrder::UNSIGNED}, + {LogicalType::Geometry(), SortOrder::UNKNOWN}, {LogicalType::None(), SortOrder::UNKNOWN}}; for (const ExpectedSortOrder& c : cases) { diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 5306c1c9a0ba3..6ba04153e853d 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -36,7 +36,6 @@ #include "arrow/visit_data_inline.h" #include "parquet/encoding.h" #include "parquet/exception.h" -#include "parquet/geometry_util_internal.h" #include "parquet/platform.h" #include "parquet/schema.h" @@ -49,282 +48,6 @@ using arrow::util::SafeLoad; namespace parquet { -class GeometryStatisticsImpl { - public: - GeometryStatisticsImpl() = default; - GeometryStatisticsImpl(const GeometryStatisticsImpl&) = default; - - bool Equals(const GeometryStatisticsImpl& other) const { - if (is_valid_ != other.is_valid_) { - return false; - } - - if (!is_valid_ && !other.is_valid_) { - return true; - } - - auto geometry_types = bounder_.GeometryTypes(); - auto other_geometry_types = other.bounder_.GeometryTypes(); - if (geometry_types.size() != other_geometry_types.size()) { - return false; - } - - for (size_t i = 0; i < geometry_types.size(); i++) { - if (geometry_types[i] != other_geometry_types[i]) { - return false; - } - } - - return bounder_.Bounds() == other.bounder_.Bounds(); - } - - void Merge(const GeometryStatisticsImpl& other) { - if (!is_valid_ || !other.is_valid_) { - is_valid_ = false; - return; - } - - bounder_.ReadBox(other.bounder_.Bounds()); - bounder_.ReadGeometryTypes(other.bounder_.GeometryTypes()); - } - - void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { - if (!is_valid_) { - return; - } - - geometry::WKBBuffer buf; - try { - for (int64_t i = 0; i < num_values; i++) { - const ByteArray& item = values[i]; - buf.Init(item.ptr, item.len); - bounder_.ReadGeometry(&buf); - } - - bounder_.Flush(); - } catch (ParquetException&) { - is_valid_ = false; - } - } - - void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, - int64_t valid_bits_offset, int64_t num_spaced_values, - int64_t num_values, int64_t null_count) { - DCHECK_GT(num_spaced_values, 0); - - geometry::WKBBuffer buf; - try { - ::arrow::internal::VisitSetBitRunsVoid( - valid_bits, valid_bits_offset, num_spaced_values, - [&](int64_t position, int64_t length) { - for (int64_t i = 0; i < length; i++) { - ByteArray item = SafeLoad(values + i + position); - buf.Init(item.ptr, item.len); - bounder_.ReadGeometry(&buf); - } - }); - bounder_.Flush(); - } catch (ParquetException&) { - is_valid_ = false; - } - } - - void Update(const ::arrow::Array& values) { - const auto& binary_array = static_cast(values); - geometry::WKBBuffer buf; - try { - for (int64_t i = 0; i < binary_array.length(); ++i) { - if (!binary_array.IsNull(i)) { - std::string_view byte_array = binary_array.GetView(i); - buf.Init(reinterpret_cast(byte_array.data()), - byte_array.length()); - bounder_.ReadGeometry(&buf); - bounder_.Flush(); - } - } - } catch (ParquetException&) { - is_valid_ = false; - } - } - - void Reset() { - bounder_.Reset(); - is_valid_ = true; - } - - EncodedGeometryStatistics Encode() const { - const double* mins = bounder_.Bounds().min; - const double* maxes = bounder_.Bounds().max; - - EncodedGeometryStatistics out; - out.geometry_types = bounder_.GeometryTypes(); - - out.xmin = mins[0]; - out.xmax = maxes[0]; - out.ymin = mins[1]; - out.ymax = maxes[1]; - out.zmin = mins[2]; - out.zmax = maxes[2]; - out.mmin = mins[3]; - out.mmax = maxes[3]; - - return out; - } - - std::string EncodeMin() const { - const double* mins = bounder_.Bounds().min; - bool has_z = !std::isinf(mins[2]); - bool has_m = !std::isinf(mins[3]); - return geometry::MakeWKBPoint(mins, has_z, has_m); - } - - std::string EncodeMax() const { - const double* maxes = bounder_.Bounds().max; - bool has_z = !std::isinf(maxes[2]); - bool has_m = !std::isinf(maxes[3]); - return geometry::MakeWKBPoint(maxes, has_z, has_m); - } - - void Update(const EncodedGeometryStatistics& encoded) { - if (!is_valid_) { - return; - } - - geometry::BoundingBox box; - box.min[0] = encoded.xmin; - box.max[0] = encoded.xmax; - box.min[1] = encoded.ymin; - box.max[1] = encoded.ymax; - - if (encoded.has_z()) { - box.min[2] = encoded.zmin; - box.max[2] = encoded.zmax; - } - - if (encoded.has_m()) { - box.min[3] = encoded.mmin; - box.max[3] = encoded.mmax; - } - - bounder_.ReadBox(box); - bounder_.ReadGeometryTypes(encoded.geometry_types); - } - - bool is_valid() const { return is_valid_; } - - const double* GetMinBounds() { return bounder_.Bounds().min; } - - const double* GetMaxBounds() { return bounder_.Bounds().max; } - - std::vector GetGeometryTypes() const { return bounder_.GeometryTypes(); } - - private: - geometry::WKBGeometryBounder bounder_; - bool is_valid_ = true; -}; - -GeometryStatistics::GeometryStatistics() { - impl_ = std::make_unique(); -} - -GeometryStatistics::GeometryStatistics(std::unique_ptr impl) - : impl_(std::move(impl)) {} - -GeometryStatistics::GeometryStatistics(GeometryStatistics&&) = default; - -GeometryStatistics::~GeometryStatistics() = default; - -bool GeometryStatistics::Equals(const GeometryStatistics& other) const { - return impl_->Equals(*other.impl_); -} - -void GeometryStatistics::Merge(const GeometryStatistics& other) { - impl_->Merge(*other.impl_); -} - -void GeometryStatistics::Update(const ByteArray* values, int64_t num_values, - int64_t null_count) { - impl_->Update(values, num_values, null_count); -} - -void GeometryStatistics::UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, - int64_t valid_bits_offset, - int64_t num_spaced_values, int64_t num_values, - int64_t null_count) { - impl_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, - num_values, null_count); -} - -void GeometryStatistics::Update(const ::arrow::Array& values) { impl_->Update(values); } - -void GeometryStatistics::Reset() { impl_->Reset(); } - -bool GeometryStatistics::is_valid() const { return impl_->is_valid(); } - -EncodedGeometryStatistics GeometryStatistics::Encode() const { return impl_->Encode(); } - -std::string GeometryStatistics::EncodeMin() const { return impl_->EncodeMin(); } - -std::string GeometryStatistics::EncodeMax() const { return impl_->EncodeMax(); } - -void GeometryStatistics::Decode(const EncodedGeometryStatistics& encoded) { - impl_->Update(encoded); -} - -std::shared_ptr GeometryStatistics::clone() const { - std::unique_ptr impl = - std::make_unique(*impl_); - return std::make_shared(std::move(impl)); -} - -double GeometryStatistics::GetXMin() const { - const double* mins = impl_->GetMinBounds(); - return mins[0]; -} - -double GeometryStatistics::GetXMax() const { - const double* maxes = impl_->GetMaxBounds(); - return maxes[0]; -} - -double GeometryStatistics::GetYMin() const { - const double* mins = impl_->GetMinBounds(); - return mins[1]; -} - -double GeometryStatistics::GetYMax() const { - const double* maxes = impl_->GetMaxBounds(); - return maxes[1]; -} - -double GeometryStatistics::GetZMin() const { - const double* mins = impl_->GetMinBounds(); - return mins[2]; -} - -double GeometryStatistics::GetZMax() const { - const double* maxes = impl_->GetMaxBounds(); - return maxes[2]; -} - -double GeometryStatistics::GetMMin() const { - const double* mins = impl_->GetMinBounds(); - return mins[3]; -} - -double GeometryStatistics::GetMMax() const { - const double* maxes = impl_->GetMaxBounds(); - return maxes[3]; -} - -bool GeometryStatistics::HasZ() const { return (GetZMax() - GetZMin()) > 0; } - -bool GeometryStatistics::HasM() const { return (GetMMax() - GetMMin()) > 0; } - -std::vector GeometryStatistics::GetGeometryTypes() const { - return impl_->GetGeometryTypes(); -} - namespace { // ---------------------------------------------------------------------- @@ -870,8 +593,7 @@ class TypedStatisticsImpl : public TypedStatistics { TypedStatisticsImpl(const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, int64_t distinct_count, bool has_min_max, - bool has_null_count, bool has_distinct_count, MemoryPool* pool, - const EncodedGeometryStatistics* geometry_statistics) + bool has_null_count, bool has_distinct_count, MemoryPool* pool) : TypedStatisticsImpl(descr, pool) { TypedStatisticsImpl::IncrementNumValues(num_values); if (has_null_count) { @@ -892,20 +614,11 @@ class TypedStatisticsImpl : public TypedStatistics { PlainDecode(encoded_max, &max_); } has_min_max_ = has_min_max; - - if (geometry_statistics != nullptr) { - geometry_statistics_ = std::make_shared(); - geometry_statistics_->Decode(*geometry_statistics); - } } bool HasDistinctCount() const override { return has_distinct_count_; }; bool HasMinMax() const override { return has_min_max_; } bool HasNullCount() const override { return has_null_count_; }; - bool HasGeometryStatistics() const override { return geometry_statistics_ != nullptr; } - const GeometryStatistics* geometry_statistics() const override { - return geometry_statistics_.get(); - } void IncrementNullCount(int64_t n) override { statistics_.null_count += n; @@ -944,15 +657,6 @@ class TypedStatisticsImpl : public TypedStatistics { if (!MinMaxEqual(other)) return false; } - if (HasGeometryStatistics() != other.HasGeometryStatistics()) { - return false; - } - - if (HasGeometryStatistics() && - !geometry_statistics_->Equals(*other.geometry_statistics())) { - return false; - } - return null_count() == other.null_count() && distinct_count() == other.distinct_count() && num_values() == other.num_values(); @@ -963,9 +667,6 @@ class TypedStatisticsImpl : public TypedStatistics { void Reset() override { ResetCounts(); ResetHasFlags(); - if (HasGeometryStatistics()) { - geometry_statistics_->Reset(); - } } void SetMinMax(const T& arg_min, const T& arg_max) override { @@ -996,12 +697,6 @@ class TypedStatisticsImpl : public TypedStatistics { if (other.HasMinMax()) { SetMinMax(other.min(), other.max()); } - - if (this->HasGeometryStatistics() && other.HasGeometryStatistics()) { - this->geometry_statistics_->Merge(*other.geometry_statistics()); - } else if (other.HasGeometryStatistics()) { - this->geometry_statistics_ = other.geometry_statistics()->clone(); - } } void Update(const T* values, int64_t num_values, int64_t null_count) override; @@ -1019,19 +714,7 @@ class TypedStatisticsImpl : public TypedStatistics { return; } - if constexpr (std::is_same::value) { - if (logical_type_ == LogicalType::Type::GEOMETRY) { - if (geometry_statistics_ == nullptr) { - geometry_statistics_ = std::make_unique(); - } - geometry_statistics_->Update(values); - SetGeometryMinMax(); - } else { - SetMinMaxPair(comparator_->GetMinMax(values)); - } - } else { - SetMinMaxPair(comparator_->GetMinMax(values)); - } + SetMinMaxPair(comparator_->GetMinMax(values)); } const T& min() const override { return min_; } @@ -1068,9 +751,6 @@ class TypedStatisticsImpl : public TypedStatistics { if (HasDistinctCount()) { s.set_distinct_count(this->distinct_count()); } - if (HasGeometryStatistics() && geometry_statistics_->is_valid()) { - s.set_geometry(geometry_statistics_->Encode()); - } return s; } @@ -1096,7 +776,6 @@ class TypedStatisticsImpl : public TypedStatistics { std::shared_ptr> comparator_; std::shared_ptr min_buffer_, max_buffer_; LogicalType::Type::type logical_type_ = LogicalType::Type::NONE; - std::shared_ptr geometry_statistics_; void PlainEncode(const T& src, std::string* dst) const; void PlainDecode(const std::string& src, T* dst) const; @@ -1143,9 +822,6 @@ class TypedStatisticsImpl : public TypedStatistics { Copy(comparator_->Compare(max_, max) ? max : max_, &max_, max_buffer_.get()); } } - - // Set the minimum and maximum values for geometry columns. - void SetGeometryMinMax(); }; template <> @@ -1181,24 +857,6 @@ inline void TypedStatisticsImpl::Copy(const ByteArray& src, ByteA *dst = ByteArray(src.len, buffer->data()); } -template -void TypedStatisticsImpl::SetGeometryMinMax() {} - -template <> -void TypedStatisticsImpl::SetGeometryMinMax() { - DCHECK_EQ(logical_type_, LogicalType::Type::GEOMETRY); - - if (HasGeometryStatistics() && geometry_statistics_->is_valid()) { - std::string min = geometry_statistics_->EncodeMin(); - std::string max = geometry_statistics_->EncodeMax(); - Copy(ByteArray(min), &min_, min_buffer_.get()); - Copy(ByteArray(max), &max_, max_buffer_.get()); - has_min_max_ = true; - } else { - has_min_max_ = false; - } -} - template void TypedStatisticsImpl::Update(const T* values, int64_t num_values, int64_t null_count) { @@ -1210,19 +868,7 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, if (num_values == 0) return; - if constexpr (std::is_same::value) { - if (logical_type_ == LogicalType::Type::GEOMETRY) { - if (geometry_statistics_ == nullptr) { - geometry_statistics_ = std::make_unique(); - } - geometry_statistics_->Update(values, num_values, null_count); - SetGeometryMinMax(); - } else { - SetMinMaxPair(comparator_->GetMinMax(values, num_values)); - } - } else { - SetMinMaxPair(comparator_->GetMinMax(values, num_values)); - } + SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } template @@ -1238,22 +884,8 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va if (num_values == 0) return; - if constexpr (std::is_same::value) { - if (logical_type_ == LogicalType::Type::GEOMETRY) { - if (geometry_statistics_ == nullptr) { - geometry_statistics_ = std::make_unique(); - } - geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, - num_spaced_values, num_values, null_count); - SetGeometryMinMax(); - } else { - SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, - valid_bits_offset)); - } - } else { - SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, - valid_bits_offset)); - } + SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, + valid_bits_offset)); } template @@ -1407,28 +1039,24 @@ std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool) { DCHECK(encoded_stats != nullptr); - const EncodedGeometryStatistics* geometry_statistics = nullptr; - if (encoded_stats->has_geometry_statistics) { - geometry_statistics = &encoded_stats->geometry_statistics(); - } return Make(descr, encoded_stats->min(), encoded_stats->max(), num_values, encoded_stats->null_count, encoded_stats->distinct_count, encoded_stats->has_min && encoded_stats->has_max, - encoded_stats->has_null_count, encoded_stats->has_distinct_count, pool, - geometry_statistics); + encoded_stats->has_null_count, encoded_stats->has_distinct_count, pool); } -std::shared_ptr Statistics::Make( - const ColumnDescriptor* descr, const std::string& encoded_min, - const std::string& encoded_max, int64_t num_values, int64_t null_count, - int64_t distinct_count, bool has_min_max, bool has_null_count, - bool has_distinct_count, ::arrow::MemoryPool* pool, - const EncodedGeometryStatistics* geometry_statistics) { +std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, + const std::string& encoded_min, + const std::string& encoded_max, + int64_t num_values, int64_t null_count, + int64_t distinct_count, bool has_min_max, + bool has_null_count, bool has_distinct_count, + ::arrow::MemoryPool* pool) { #define MAKE_STATS(CAP_TYPE, KLASS) \ case Type::CAP_TYPE: \ return std::make_shared>( \ descr, encoded_min, encoded_max, num_values, null_count, distinct_count, \ - has_min_max, has_null_count, has_distinct_count, pool, geometry_statistics) + has_min_max, has_null_count, has_distinct_count, pool) switch (descr->physical_type()) { MAKE_STATS(BOOLEAN, BooleanType); diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h index 4900b46c85cd3..c5da44a7b6665 100644 --- a/cpp/src/parquet/statistics.h +++ b/cpp/src/parquet/statistics.h @@ -114,82 +114,6 @@ std::shared_ptr> MakeComparator(const ColumnDescriptor* d return std::static_pointer_cast>(Comparator::Make(descr)); } -class PARQUET_EXPORT EncodedGeometryStatistics { - public: - static constexpr double kInf = std::numeric_limits::infinity(); - - EncodedGeometryStatistics() = default; - EncodedGeometryStatistics(const EncodedGeometryStatistics&) = default; - EncodedGeometryStatistics(EncodedGeometryStatistics&&) = default; - EncodedGeometryStatistics& operator=(const EncodedGeometryStatistics&) = default; - - double xmin{kInf}; - double xmax{-kInf}; - double ymin{kInf}; - double ymax{-kInf}; - double zmin{kInf}; - double zmax{-kInf}; - double mmin{kInf}; - double mmax{-kInf}; - std::vector geometry_types; - - bool has_z() const { return (zmax - zmin) >= 0; } - - bool has_m() const { return (mmax - mmin) >= 0; } -}; - -class GeometryStatisticsImpl; - -class PARQUET_EXPORT GeometryStatistics { - public: - GeometryStatistics(); - explicit GeometryStatistics(std::unique_ptr impl); - GeometryStatistics(GeometryStatistics&&); - - ~GeometryStatistics(); - - bool Equals(const GeometryStatistics& other) const; - - void Merge(const GeometryStatistics& other); - - void Update(const ByteArray* values, int64_t num_values, int64_t null_count); - - void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, - int64_t valid_bits_offset, int64_t num_spaced_values, - int64_t num_values, int64_t null_count); - - void Update(const ::arrow::Array& values); - - void Reset(); - - EncodedGeometryStatistics Encode() const; - std::string EncodeMin() const; - std::string EncodeMax() const; - - bool is_valid() const; - - std::shared_ptr clone() const; - - void Decode(const EncodedGeometryStatistics& encoded); - - double GetXMin() const; - double GetXMax() const; - double GetYMin() const; - double GetYMax() const; - double GetZMin() const; - double GetZMax() const; - double GetMMin() const; - double GetMMax() const; - - bool HasZ() const; - bool HasM() const; - - std::vector GetGeometryTypes() const; - - private: - std::unique_ptr impl_; -}; - // ---------------------------------------------------------------------- /// \brief Structure represented encoded statistics to be written to @@ -203,9 +127,6 @@ class PARQUET_EXPORT EncodedStatistics { const std::string& max() const { return max_; } const std::string& min() const { return min_; } - const EncodedGeometryStatistics& geometry_statistics() const { - return geometry_statistics_; - } int64_t null_count = 0; int64_t distinct_count = 0; @@ -221,11 +142,6 @@ class PARQUET_EXPORT EncodedStatistics { // is a null page or not. bool all_null_value = false; - // Statistics for geometry column. geometry_statistics_ is only valid when - // has_geometry_statistics is true. - EncodedGeometryStatistics geometry_statistics_; - bool has_geometry_statistics = false; - // From parquet-mr // Don't write stats larger than the max size rather than truncating. The // rationale is that some engines may use the minimum value in the page as @@ -273,12 +189,6 @@ class PARQUET_EXPORT EncodedStatistics { has_distinct_count = true; return *this; } - - EncodedStatistics& set_geometry(EncodedGeometryStatistics geometry_statistics) { - geometry_statistics_ = std::move(geometry_statistics); - has_geometry_statistics = true; - return *this; - } }; /// \brief Base type for computing column statistics while writing a file @@ -306,13 +216,12 @@ class PARQUET_EXPORT Statistics { /// \param[in] has_null_count whether the null_count statistics are set /// \param[in] has_distinct_count whether the distinct_count statistics are set /// \param[in] pool a memory pool to use for any memory allocations, optional - /// \param[in] geometry_statistics the geometry statistics static std::shared_ptr Make( const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, int64_t distinct_count, bool has_min_max, bool has_null_count, - bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), - const EncodedGeometryStatistics* geometry_statistics = NULLPTR); + bool has_distinct_count, + ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); // Helper function to convert EncodedStatistics to Statistics. // EncodedStatistics does not contain number of non-null values, and it can be @@ -341,10 +250,6 @@ class PARQUET_EXPORT Statistics { /// with TypedStatistics::min and max virtual bool HasMinMax() const = 0; - virtual bool HasGeometryStatistics() const { return false; } - - virtual const GeometryStatistics* geometry_statistics() const { return NULLPTR; } - /// \brief Reset state of object to initial (no data observed) state virtual void Reset() = 0; @@ -468,11 +373,10 @@ std::shared_ptr> MakeStatistics( const ColumnDescriptor* descr, const std::string& encoded_min, const std::string& encoded_max, int64_t num_values, int64_t null_count, int64_t distinct_count, bool has_min_max, bool has_null_count, - bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(), - const EncodedGeometryStatistics* geometry_statistics = NULLPTR) { + bool has_distinct_count, ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) { return std::static_pointer_cast>(Statistics::Make( descr, encoded_min, encoded_max, num_values, null_count, distinct_count, - has_min_max, has_null_count, has_distinct_count, pool, geometry_statistics)); + has_min_max, has_null_count, has_distinct_count, pool)); } } // namespace parquet diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index c2f79d19ba993..5917d47687557 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -41,6 +41,7 @@ #include "parquet/encryption/internal_file_decryptor.h" #include "parquet/encryption/internal_file_encryptor.h" #include "parquet/exception.h" +#include "parquet/geometry_statistics.h" #include "parquet/platform.h" #include "parquet/properties.h" #include "parquet/statistics.h" @@ -392,10 +393,6 @@ static inline format::Statistics ToThrift(const EncodedStatistics& stats) { statistics.__set_distinct_count(stats.distinct_count); } - if (stats.has_geometry_statistics) { - statistics.__set_geometry_stats(ToThrift(stats.geometry_statistics())); - } - return statistics; } diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index db1b61f00f369..b34dd3c0ef984 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -1675,7 +1675,7 @@ class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible private: Geometry(std::string crs, LogicalType::GeometryEdges::edges edges, LogicalType::GeometryEncoding::geometry_encoding encoding) - : LogicalType::Impl(LogicalType::Type::GEOMETRY, SortOrder::UNSIGNED), + : LogicalType::Impl(LogicalType::Type::GEOMETRY, SortOrder::UNKNOWN), LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY), crs_(std::move(crs)), edges_(edges), From da55a5564e5d5a6b9ecc0fa50d1970e747022092 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 30 Oct 2024 22:51:17 +0800 Subject: [PATCH 61/61] Revert some unnecessary changes --- cpp/src/parquet/metadata.cc | 6 ++---- cpp/src/parquet/page_index.h | 2 -- cpp/src/parquet/reader_test.cc | 10 +++++----- cpp/src/parquet/statistics.cc | 6 ------ 4 files changed, 7 insertions(+), 17 deletions(-) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 5fd06a3da2c5f..da769b3c6702f 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -37,7 +37,6 @@ #include "parquet/exception.h" #include "parquet/schema.h" #include "parquet/schema_internal.h" -#include "parquet/statistics.h" #include "parquet/thrift_internal.h" namespace parquet { @@ -100,7 +99,7 @@ static std::shared_ptr MakeTypedColumnStats( metadata.statistics.null_count, metadata.statistics.distinct_count, metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value, metadata.statistics.__isset.null_count, - metadata.statistics.__isset.distinct_count, ::arrow::default_memory_pool()); + metadata.statistics.__isset.distinct_count); } // Default behavior return MakeStatistics( @@ -108,8 +107,7 @@ static std::shared_ptr MakeTypedColumnStats( metadata.num_values - metadata.statistics.null_count, metadata.statistics.null_count, metadata.statistics.distinct_count, metadata.statistics.__isset.max && metadata.statistics.__isset.min, - metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count, - ::arrow::default_memory_pool()); + metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count); } static std::shared_ptr MakeColumnGeometryStats( diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h index 1c45d6ffd8836..d45c59cab223f 100644 --- a/cpp/src/parquet/page_index.h +++ b/cpp/src/parquet/page_index.h @@ -27,8 +27,6 @@ namespace parquet { class EncodedStatistics; -class EncodedGeometryStatistics; -class GeometryStatistics; struct PageIndexLocation; /// \brief ColumnIndex is a proxy around format::ColumnIndex. diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index f17f383d72e03..6a2fe4ca2201d 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -628,7 +628,7 @@ TEST(TestFileReader, GetRecordReader) { } TEST(TestFileReader, RecordReaderWithExposingDictionary) { - const int kNumRows = 1000; + const int num_rows = 1000; // Make schema schema::NodeVector fields; @@ -655,11 +655,11 @@ TEST(TestFileReader, RecordReaderWithExposingDictionary) { ByteArrayWriter* writer = static_cast(rg_writer->NextColumn()); std::vector raw_unique_data = {"a", "bc", "defg"}; std::vector col_typed; - for (int i = 0; i < kNumRows; i++) { + for (int i = 0; i < num_rows; i++) { std::string_view chosed_data = raw_unique_data[i % raw_unique_data.size()]; col_typed.emplace_back(chosed_data); } - writer->WriteBatch(kNumRows, nullptr, nullptr, col_typed.data()); + writer->WriteBatch(num_rows, nullptr, nullptr, col_typed.data()); rg_writer->Close(); file_writer->Close(); @@ -684,7 +684,7 @@ TEST(TestFileReader, RecordReaderWithExposingDictionary) { reinterpret_cast(record_reader->ReadDictionary(&dict_len)); ASSERT_NE(dict, nullptr); ASSERT_EQ(dict_len, raw_unique_data.size()); - ASSERT_EQ(record_reader->ReadRecords(kNumRows), kNumRows); + ASSERT_EQ(record_reader->ReadRecords(num_rows), num_rows); std::shared_ptr<::arrow::ChunkedArray> result_array = record_reader->GetResult(); ASSERT_EQ(result_array->num_chunks(), 1); const std::shared_ptr<::arrow::Array> chunk = result_array->chunk(0); @@ -695,7 +695,7 @@ TEST(TestFileReader, RecordReaderWithExposingDictionary) { // Verify values based on the dictionary from ReadDictionary(). int64_t indices_read = chunk->length(); - ASSERT_EQ(indices_read, kNumRows); + ASSERT_EQ(indices_read, num_rows); for (int i = 0; i < indices_read; ++i) { ASSERT_LT(indices[i], dict_len); ASSERT_EQ(std::string_view(reinterpret_cast(dict[indices[i]].ptr), diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index 6ba04153e853d..e54b94f1a861a 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -47,7 +47,6 @@ using arrow::util::SafeCopy; using arrow::util::SafeLoad; namespace parquet { - namespace { // ---------------------------------------------------------------------- @@ -631,8 +630,6 @@ class TypedStatisticsImpl : public TypedStatistics { switch (type) { case LogicalType::Type::FLOAT16: return true; - case LogicalType::Type::GEOMETRY: - return true; default: return false; } @@ -867,7 +864,6 @@ void TypedStatisticsImpl::Update(const T* values, int64_t num_values, IncrementNumValues(num_values); if (num_values == 0) return; - SetMinMaxPair(comparator_->GetMinMax(values, num_values)); } @@ -883,7 +879,6 @@ void TypedStatisticsImpl::UpdateSpaced(const T* values, const uint8_t* va IncrementNumValues(num_values); if (num_values == 0) return; - SetMinMaxPair(comparator_->GetMinMaxSpaced(values, num_spaced_values, valid_bits, valid_bits_offset)); } @@ -1038,7 +1033,6 @@ std::shared_ptr Statistics::Make(const ColumnDescriptor* descr, int64_t num_values, ::arrow::MemoryPool* pool) { DCHECK(encoded_stats != nullptr); - return Make(descr, encoded_stats->min(), encoded_stats->max(), num_values, encoded_stats->null_count, encoded_stats->distinct_count, encoded_stats->has_min && encoded_stats->has_max,