From ebdb556b30d6fd1a94346892fec47aa01464ad40 Mon Sep 17 00:00:00 2001 From: Runkang He Date: Sun, 22 Oct 2023 09:02:37 +0800 Subject: [PATCH] [CALCITE-6065] Add HEX and UNHEX functions (enabled in Hive and Spark libraries) --- .../adapter/enumerable/RexImpTable.java | 4 + .../apache/calcite/runtime/SqlFunctions.java | 15 ++++ .../calcite/sql/fun/SqlLibraryOperators.java | 18 ++++- .../apache/calcite/util/BuiltInMethod.java | 1 + site/_docs/reference.md | 6 +- .../apache/calcite/test/SqlOperatorTest.java | 78 +++++++++++++++++-- 6 files changed, 113 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index b3a5565eb5de..dc5352a38ee5 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -187,6 +187,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE64; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_HEX; import static org.apache.calcite.sql.fun.SqlLibraryOperators.GETBIT; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.HEX; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ILIKE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_INF; import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_NAN; @@ -272,6 +273,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRANSLATE3; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRUNC; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRY_CAST; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNHEX; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_DATE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MICROS; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MILLIS; @@ -523,7 +525,9 @@ Builder populate() { defineMethod(FROM_BASE64, BuiltInMethod.FROM_BASE64.method, NullPolicy.STRICT); defineMethod(TO_BASE32, BuiltInMethod.TO_BASE32.method, NullPolicy.STRICT); defineMethod(FROM_BASE32, BuiltInMethod.FROM_BASE32.method, NullPolicy.STRICT); + defineMethod(HEX, BuiltInMethod.HEX.method, NullPolicy.STRICT); defineMethod(TO_HEX, BuiltInMethod.TO_HEX.method, NullPolicy.STRICT); + defineMethod(UNHEX, BuiltInMethod.FROM_HEX.method, NullPolicy.STRICT); defineMethod(FROM_HEX, BuiltInMethod.FROM_HEX.method, NullPolicy.STRICT); defineMethod(MD5, BuiltInMethod.MD5.method, NullPolicy.STRICT); defineMethod(SHA1, BuiltInMethod.SHA1.method, NullPolicy.STRICT); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index 8a0e04c6d1ce..5d2cd60f1cb1 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -325,6 +325,21 @@ public static String toHex(ByteString byteString) { return Hex.encodeHexString(byteString.getBytes()); } + /** SQL HEX(binary) function. */ + public static String hex(ByteString value) { + return toHex(value).toUpperCase(Locale.ROOT); + } + + /** SQL HEX(bigint) function. */ + public static String hex(long value) { + return Long.toHexString(value).toUpperCase(Locale.ROOT); + } + + /** SQL HEX(varchar) function. */ + public static String hex(String value) { + return Hex.encodeHexString(value.getBytes(UTF_8)).toUpperCase(Locale.ROOT); + } + /** SQL MD5(string) function. */ public static String md5(String string) { return DigestUtils.md5Hex(string.getBytes(UTF_8)); diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index c123c449cd52..d5400853c03b 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -1454,12 +1454,18 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding * The "FROM_HEX(varchar)" function; converts a hexadecimal-encoded {@code varchar} into bytes. */ @LibraryOperator(libraries = {BIG_QUERY}) - public static final SqlFunction FROM_HEX = + public static final SqlBasicFunction FROM_HEX = SqlBasicFunction.create("FROM_HEX", ReturnTypes.VARBINARY_NULLABLE, OperandTypes.CHARACTER, SqlFunctionCategory.STRING); + /** The "UNHEX(varchar)" function, Hive and Spark's + * equivalent to {@link #FROM_HEX}. */ + @LibraryOperator(libraries = {HIVE, SPARK}) + public static final SqlFunction UNHEX = + FROM_HEX.withName("UNHEX"); + /** * The "TO_HEX(binary)" function; converts {@code binary} into a hexadecimal varchar. */ @@ -1470,6 +1476,16 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding OperandTypes.BINARY, SqlFunctionCategory.STRING); + /** + * The "HEX(binary or bigint or varchar)" function. + */ + @LibraryOperator(libraries = {HIVE, SPARK}) + public static final SqlFunction HEX = + SqlBasicFunction.create("HEX", + ReturnTypes.VARCHAR_NULLABLE, + OperandTypes.BINARY.or(OperandTypes.INTEGER).or(OperandTypes.CHARACTER), + SqlFunctionCategory.STRING); + /** The "FORMAT_NUMBER(value, decimalOrFormat)" function. */ @LibraryOperator(libraries = {HIVE, SPARK}) public static final SqlFunction FORMAT_NUMBER = diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index f0df06185201..6842d4babe9b 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -383,6 +383,7 @@ public enum BuiltInMethod { FROM_BASE64(SqlFunctions.class, "fromBase64", String.class), TO_BASE32(SqlFunctions.class, "toBase32", String.class), FROM_BASE32(SqlFunctions.class, "fromBase32", String.class), + HEX(SqlFunctions.class, "hex", ByteString.class), TO_HEX(SqlFunctions.class, "toHex", ByteString.class), FROM_HEX(SqlFunctions.class, "fromHex", String.class), MD5(SqlFunctions.class, "md5", String.class), diff --git a/site/_docs/reference.md b/site/_docs/reference.md index b2a2483f88de..aff64ea02e69 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2649,7 +2649,7 @@ BigQuery's type system uses confusingly different names for types and functions: * Similarly, `DATETIME(string)` returns a Calcite `TIMESTAMP`. | C | Operator syntax | Description -|:- |:-----------------------------------------------|:----------- +|: |:-----------------------------------------------|:----------- | p | expr :: type | Casts *expr* to *type* | m | expr1 <=> expr2 | Whether two values are equal, treating null values as the same, and it's similar to `IS NOT DISTINCT FROM` | * | ACOSH(numeric) | Returns the inverse hyperbolic cosine of *numeric* @@ -2739,6 +2739,9 @@ BigQuery's type system uses confusingly different names for types and functions: | b | FORMAT_TIMESTAMP(string timestamp) | Formats *timestamp* according to the specified format *string* | s | GETBIT(value, position) | Equivalent to `BIT_GET(value, position)` | b o | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions +| h s | HEX(binary) | Converts *binary* into a hexadecimal varchar +| h s | HEX(bigint) | Converts *bigint* into a shortened hexadecimal varchar +| h s | HEX(varchar) | Converts *varchar* into a hexadecimal varchar | b h s | IF(condition, value1, value2) | Returns *value1* if *condition* is TRUE, *value2* otherwise | b | IFNULL(value1, value2) | Equivalent to `NVL(value1, value2)` | p | string1 ILIKE string2 [ ESCAPE string3 ] | Whether *string1* matches pattern *string2*, ignoring case (similar to `LIKE`) @@ -2852,6 +2855,7 @@ BigQuery's type system uses confusingly different names for types and functions: | b o p | TRANSLATE(expr, fromString, toString) | Returns *expr* with all occurrences of each character in *fromString* replaced by its corresponding character in *toString*. Characters in *expr* that are not in *fromString* are not replaced | b | TRUNC(numeric1 [, numeric2 ]) | Truncates *numeric1* to optionally *numeric2* (if not specified 0) places right to the decimal point | q | TRY_CAST(value AS type) | Converts *value* to *type*, returning NULL if conversion fails +| h s | UNHEX(varchar) | Converts a hexadecimal-encoded *varchar* into bytes | b | UNIX_MICROS(timestamp) | Returns the number of microseconds since 1970-01-01 00:00:00 | b | UNIX_MILLIS(timestamp) | Returns the number of milliseconds since 1970-01-01 00:00:00 | b | UNIX_SECONDS(timestamp) | Returns the number of seconds since 1970-01-01 00:00:00 diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index bfc14daab707..3bf59a109995 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -4520,22 +4520,86 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) { f.checkNull("to_hex(cast(null as varbinary))"); } + @Test void testHex() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.HEX); + f0.checkFails("^hex(x'')^", + "No match found for function signature HEX\\(\\)", + false); + final Consumer consumer = f -> { + // test with binary + f.checkString("hex(x'00010203AAEEEFFF')", + "00010203AAEEEFFF", + "VARCHAR NOT NULL"); + f.checkString("hex(x'')", "", "VARCHAR NOT NULL"); + f.checkNull("hex(cast(null as varbinary))"); + + // test with bigint + f.checkString("hex(0)", "0", "VARCHAR NOT NULL"); + f.checkString("hex(17)", + "11", + "VARCHAR NOT NULL"); + f.checkString("hex(1234567)", "12D687", "VARCHAR NOT NULL"); + f.checkNull("hex(cast(null as varbinary))"); + + // test with varchar + f.checkString("hex('abcDEF123')", + "616263444546313233", + "VARCHAR NOT NULL"); + f.checkString("hex(_UTF8'\u4F60\u597D')", + "E4BDA0E5A5BD", + "VARCHAR NOT NULL"); + f.checkString("hex('')", "", "VARCHAR NOT NULL"); + f.checkNull("hex(cast(null as varbinary))"); + }; + f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); + } + + /** Generates parameters to test both FROM_HEX and UNHEX functions. */ + private Stream fromHexParameters() { + SqlOperatorFixture f0 = fixture() + .withLibrary(SqlLibrary.BIG_QUERY) + .setFor(SqlLibraryOperators.FROM_HEX, VmName.EXPAND); + SqlOperatorFixture f1 = fixture() + .withLibrary(SqlLibrary.HIVE) + .setFor(SqlLibraryOperators.UNHEX, VmName.EXPAND); + SqlOperatorFixture f2 = f1.withLibrary(SqlLibrary.SPARK); + return Stream.of( + () -> new Object[] {f0, "FROM_HEX"}, + () -> new Object[] {f1, "UNHEX"}, + () -> new Object[] {f2, "UNHEX"}); + } + + /** Tests that the FROM_HEX and UNHEX is not available by default. FROM_HEX is + * only available in BigQuery library and UNHEX is only available in Hive/Spark libraries. */ @Test void testFromHex() { final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FROM_HEX); - f0.checkFails("^from_hex('')^", + f0.checkFails("^FROM_HEX('')^", "No match found for function signature FROM_HEX\\(\\)", false); - final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.BIG_QUERY); - f.checkString("from_hex('00010203aaeeefff')", + final SqlOperatorFixture f1 = fixture().setFor(SqlLibraryOperators.UNHEX); + f1.checkFails("^UNHEX('')^", + "No match found for function signature UNHEX\\(\\)", + false); + } + + @ParameterizedTest + @MethodSource("fromHexParameters") + void testFromHexFunc(SqlOperatorFixture f, String functionName) { + f.checkString(functionName + "('00010203aaeeefff')", "00010203aaeeefff", "VARBINARY NOT NULL"); - - f.checkString("from_hex('666f6f626172')", + f.checkString(functionName + "('00010203AAEEEFFF')", + "00010203aaeeefff", + "VARBINARY NOT NULL"); + f.checkString(functionName + "('666f6f626172')", + "666f6f626172", + "VARBINARY NOT NULL"); + f.checkString(functionName + "('666F6F626172')", "666f6f626172", "VARBINARY NOT NULL"); - f.checkString("from_hex('')", "", "VARBINARY NOT NULL"); - f.checkNull("from_hex(cast(null as varchar))"); + f.checkString(functionName + "('')", "", "VARBINARY NOT NULL"); + f.checkNull(functionName + "(cast(null as varchar))"); } @Test void testRepeatFunc() {