Skip to content

Commit

Permalink
[CALCITE-6065] Add HEX and UNHEX functions (enabled in Hive and Spark…
Browse files Browse the repository at this point in the history
… libraries)
  • Loading branch information
herunkang2018 committed Oct 22, 2023
1 parent c83ac69 commit ebdb556
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE64;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_HEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.GETBIT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.HEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ILIKE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_INF;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_NAN;
Expand Down Expand Up @@ -272,6 +273,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRANSLATE3;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRUNC;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRY_CAST;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNHEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_DATE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MICROS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MILLIS;
Expand Down Expand Up @@ -523,7 +525,9 @@ Builder populate() {
defineMethod(FROM_BASE64, BuiltInMethod.FROM_BASE64.method, NullPolicy.STRICT);
defineMethod(TO_BASE32, BuiltInMethod.TO_BASE32.method, NullPolicy.STRICT);
defineMethod(FROM_BASE32, BuiltInMethod.FROM_BASE32.method, NullPolicy.STRICT);
defineMethod(HEX, BuiltInMethod.HEX.method, NullPolicy.STRICT);
defineMethod(TO_HEX, BuiltInMethod.TO_HEX.method, NullPolicy.STRICT);
defineMethod(UNHEX, BuiltInMethod.FROM_HEX.method, NullPolicy.STRICT);
defineMethod(FROM_HEX, BuiltInMethod.FROM_HEX.method, NullPolicy.STRICT);
defineMethod(MD5, BuiltInMethod.MD5.method, NullPolicy.STRICT);
defineMethod(SHA1, BuiltInMethod.SHA1.method, NullPolicy.STRICT);
Expand Down
15 changes: 15 additions & 0 deletions core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,21 @@ public static String toHex(ByteString byteString) {
return Hex.encodeHexString(byteString.getBytes());
}

/** SQL HEX(binary) function. */
public static String hex(ByteString value) {
return toHex(value).toUpperCase(Locale.ROOT);
}

/** SQL HEX(bigint) function. */
public static String hex(long value) {
return Long.toHexString(value).toUpperCase(Locale.ROOT);
}

/** SQL HEX(varchar) function. */
public static String hex(String value) {
return Hex.encodeHexString(value.getBytes(UTF_8)).toUpperCase(Locale.ROOT);
}

/** SQL MD5(string) function. */
public static String md5(String string) {
return DigestUtils.md5Hex(string.getBytes(UTF_8));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1454,12 +1454,18 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding
* The "FROM_HEX(varchar)" function; converts a hexadecimal-encoded {@code varchar} into bytes.
*/
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction FROM_HEX =
public static final SqlBasicFunction FROM_HEX =
SqlBasicFunction.create("FROM_HEX",
ReturnTypes.VARBINARY_NULLABLE,
OperandTypes.CHARACTER,
SqlFunctionCategory.STRING);

/** The "UNHEX(varchar)" function, Hive and Spark's
* equivalent to {@link #FROM_HEX}. */
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction UNHEX =
FROM_HEX.withName("UNHEX");

/**
* The "TO_HEX(binary)" function; converts {@code binary} into a hexadecimal varchar.
*/
Expand All @@ -1470,6 +1476,16 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding
OperandTypes.BINARY,
SqlFunctionCategory.STRING);

/**
* The "HEX(binary or bigint or varchar)" function.
*/
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction HEX =
SqlBasicFunction.create("HEX",
ReturnTypes.VARCHAR_NULLABLE,
OperandTypes.BINARY.or(OperandTypes.INTEGER).or(OperandTypes.CHARACTER),
SqlFunctionCategory.STRING);

/** The "FORMAT_NUMBER(value, decimalOrFormat)" function. */
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction FORMAT_NUMBER =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ public enum BuiltInMethod {
FROM_BASE64(SqlFunctions.class, "fromBase64", String.class),
TO_BASE32(SqlFunctions.class, "toBase32", String.class),
FROM_BASE32(SqlFunctions.class, "fromBase32", String.class),
HEX(SqlFunctions.class, "hex", ByteString.class),
TO_HEX(SqlFunctions.class, "toHex", ByteString.class),
FROM_HEX(SqlFunctions.class, "fromHex", String.class),
MD5(SqlFunctions.class, "md5", String.class),
Expand Down
6 changes: 5 additions & 1 deletion site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2649,7 +2649,7 @@ BigQuery's type system uses confusingly different names for types and functions:
* Similarly, `DATETIME(string)` returns a Calcite `TIMESTAMP`.

| C | Operator syntax | Description
|:- |:-----------------------------------------------|:-----------
|: |:-----------------------------------------------|:-----------
| p | expr :: type | Casts *expr* to *type*
| m | expr1 <=> expr2 | Whether two values are equal, treating null values as the same, and it's similar to `IS NOT DISTINCT FROM`
| * | ACOSH(numeric) | Returns the inverse hyperbolic cosine of *numeric*
Expand Down Expand Up @@ -2739,6 +2739,9 @@ BigQuery's type system uses confusingly different names for types and functions:
| b | FORMAT_TIMESTAMP(string timestamp) | Formats *timestamp* according to the specified format *string*
| s | GETBIT(value, position) | Equivalent to `BIT_GET(value, position)`
| b o | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions
| h s | HEX(binary) | Converts *binary* into a hexadecimal varchar
| h s | HEX(bigint) | Converts *bigint* into a shortened hexadecimal varchar
| h s | HEX(varchar) | Converts *varchar* into a hexadecimal varchar
| b h s | IF(condition, value1, value2) | Returns *value1* if *condition* is TRUE, *value2* otherwise
| b | IFNULL(value1, value2) | Equivalent to `NVL(value1, value2)`
| p | string1 ILIKE string2 [ ESCAPE string3 ] | Whether *string1* matches pattern *string2*, ignoring case (similar to `LIKE`)
Expand Down Expand Up @@ -2852,6 +2855,7 @@ BigQuery's type system uses confusingly different names for types and functions:
| b o p | TRANSLATE(expr, fromString, toString) | Returns *expr* with all occurrences of each character in *fromString* replaced by its corresponding character in *toString*. Characters in *expr* that are not in *fromString* are not replaced
| b | TRUNC(numeric1 [, numeric2 ]) | Truncates *numeric1* to optionally *numeric2* (if not specified 0) places right to the decimal point
| q | TRY_CAST(value AS type) | Converts *value* to *type*, returning NULL if conversion fails
| h s | UNHEX(varchar) | Converts a hexadecimal-encoded *varchar* into bytes
| b | UNIX_MICROS(timestamp) | Returns the number of microseconds since 1970-01-01 00:00:00
| b | UNIX_MILLIS(timestamp) | Returns the number of milliseconds since 1970-01-01 00:00:00
| b | UNIX_SECONDS(timestamp) | Returns the number of seconds since 1970-01-01 00:00:00
Expand Down
78 changes: 71 additions & 7 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4520,22 +4520,86 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) {
f.checkNull("to_hex(cast(null as varbinary))");
}

@Test void testHex() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.HEX);
f0.checkFails("^hex(x'')^",
"No match found for function signature HEX\\(<BINARY>\\)",
false);
final Consumer<SqlOperatorFixture> consumer = f -> {
// test with binary
f.checkString("hex(x'00010203AAEEEFFF')",
"00010203AAEEEFFF",
"VARCHAR NOT NULL");
f.checkString("hex(x'')", "", "VARCHAR NOT NULL");
f.checkNull("hex(cast(null as varbinary))");

// test with bigint
f.checkString("hex(0)", "0", "VARCHAR NOT NULL");
f.checkString("hex(17)",
"11",
"VARCHAR NOT NULL");
f.checkString("hex(1234567)", "12D687", "VARCHAR NOT NULL");
f.checkNull("hex(cast(null as varbinary))");

// test with varchar
f.checkString("hex('abcDEF123')",
"616263444546313233",
"VARCHAR NOT NULL");
f.checkString("hex(_UTF8'\u4F60\u597D')",
"E4BDA0E5A5BD",
"VARCHAR NOT NULL");
f.checkString("hex('')", "", "VARCHAR NOT NULL");
f.checkNull("hex(cast(null as varbinary))");
};
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

/** Generates parameters to test both FROM_HEX and UNHEX functions. */
private Stream<Arguments> fromHexParameters() {
SqlOperatorFixture f0 = fixture()
.withLibrary(SqlLibrary.BIG_QUERY)
.setFor(SqlLibraryOperators.FROM_HEX, VmName.EXPAND);
SqlOperatorFixture f1 = fixture()
.withLibrary(SqlLibrary.HIVE)
.setFor(SqlLibraryOperators.UNHEX, VmName.EXPAND);
SqlOperatorFixture f2 = f1.withLibrary(SqlLibrary.SPARK);
return Stream.of(
() -> new Object[] {f0, "FROM_HEX"},
() -> new Object[] {f1, "UNHEX"},
() -> new Object[] {f2, "UNHEX"});
}

/** Tests that the FROM_HEX and UNHEX is not available by default. FROM_HEX is
* only available in BigQuery library and UNHEX is only available in Hive/Spark libraries. */
@Test void testFromHex() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FROM_HEX);
f0.checkFails("^from_hex('')^",
f0.checkFails("^FROM_HEX('')^",
"No match found for function signature FROM_HEX\\(<CHARACTER>\\)",
false);
final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.BIG_QUERY);
f.checkString("from_hex('00010203aaeeefff')",
final SqlOperatorFixture f1 = fixture().setFor(SqlLibraryOperators.UNHEX);
f1.checkFails("^UNHEX('')^",
"No match found for function signature UNHEX\\(<CHARACTER>\\)",
false);
}

@ParameterizedTest
@MethodSource("fromHexParameters")
void testFromHexFunc(SqlOperatorFixture f, String functionName) {
f.checkString(functionName + "('00010203aaeeefff')",
"00010203aaeeefff",
"VARBINARY NOT NULL");

f.checkString("from_hex('666f6f626172')",
f.checkString(functionName + "('00010203AAEEEFFF')",
"00010203aaeeefff",
"VARBINARY NOT NULL");
f.checkString(functionName + "('666f6f626172')",
"666f6f626172",
"VARBINARY NOT NULL");
f.checkString(functionName + "('666F6F626172')",
"666f6f626172",
"VARBINARY NOT NULL");

f.checkString("from_hex('')", "", "VARBINARY NOT NULL");
f.checkNull("from_hex(cast(null as varchar))");
f.checkString(functionName + "('')", "", "VARBINARY NOT NULL");
f.checkNull(functionName + "(cast(null as varchar))");
}

@Test void testRepeatFunc() {
Expand Down

0 comments on commit ebdb556

Please sign in to comment.