diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java
index 0966ccc3a7..0da9479c55 100644
--- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java
+++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java
@@ -104,7 +104,7 @@
*
Name: STARTS WITH
*
* - SQL semantic:
expr STARTS WITH expr
- * - Since version: 3.3.0
+ *
- Since version: 3.4.0
*
*
*
diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java
index d8ec0e63a0..38fb59e714 100644
--- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java
+++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java
@@ -307,12 +307,12 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) {
"Invalid number of inputs to STARTS_WITH expression. "
+ "Example usage: STARTS_WITH(column, 'test')");
}
- ExpressionTransformResult leftResult = visit(startsWith.getChildren().get(0));
- ExpressionTransformResult rightResult = visit(startsWith.getChildren().get(1));
+ ExpressionTransformResult leftResult = visit(childAt(startsWith, 0));
+ ExpressionTransformResult rightResult = visit(childAt(startsWith, 1));
if (!(StringType.STRING.equivalent(leftResult.outputType)
&& StringType.STRING.equivalent(rightResult.outputType))) {
throw unsupportedExpressionException(
- startsWith, "'starts with' is only supported for string type expressions");
+ startsWith, "'STARTS_WITH' is expects STRING type inputs");
}
// TODO: support non literal as the second input of starts with.
if (!(rightResult.expression instanceof Literal)) {
@@ -329,7 +329,7 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) {
right.getValue() == null
? right
: Literal.ofString(
- DefaultExpressionUtils.escape(
+ LikeExpressionEvaluator.escape(
String.valueOf(right.getValue()), /*escapeChar=*/ '%')
.concat("%")))),
BooleanType.BOOLEAN);
diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java
index 4cbeacd7f6..b59db8689a 100644
--- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java
+++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java
@@ -383,17 +383,4 @@ private ColumnVector getVector(int rowId) {
}
};
}
- /** Escapes characters escapeChar in the input String */
- static String escape(String input, char escapeChar) {
- final int len = input.length();
- final StringBuilder escapedString = new StringBuilder(len + len);
- for (int i = 0; i < len; i++) {
- char c = input.charAt(i);
- if (c == escapeChar) {
- escapedString.append('\\');
- }
- escapedString.append(c);
- }
- return escapedString.toString();
- }
}
diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java
index 1dc80d2b88..6584caf3df 100644
--- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java
+++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java
@@ -183,4 +183,18 @@ private static String escapeLikeRegex(String pattern, char escape) {
}
return "(?s)" + javaPattern;
}
+
+ /** Escapes characters escapeChar in the input String */
+ static String escape(String input, char escapeChar) {
+ final int len = input.length();
+ final StringBuilder escapedString = new StringBuilder(len + len);
+ for (int i = 0; i < len; i++) {
+ char c = input.charAt(i);
+ if (c == escapeChar) {
+ escapedString.append('\\');
+ }
+ escapedString.append(c);
+ }
+ return escapedString.toString();
+ }
}
diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala
index ed87f20480..bb164bc89c 100644
--- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala
+++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala
@@ -615,6 +615,16 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa
checkBooleanVectors(new DefaultExpressionEvaluator(
schema, startsWithExpressionAlwaysFalse, BooleanType.BOOLEAN).eval(input), allFalseVector)
+ val colUnicode = stringVector(Seq[String]("中文", "中", "文"))
+ val schemaUnicode = new StructType().add("col", StringType.STRING)
+ val inputUnicode = new DefaultColumnarBatch(colUnicode.getSize,
+ schemaUnicode, Array(colUnicode))
+ val startsWithExpressionUnicode = startsWith(new Column("col"), Literal.ofString("中"))
+ val expOutputVectorLiteralUnicode = booleanVector(Seq[BooleanJ](true, true, false))
+ checkBooleanVectors(new DefaultExpressionEvaluator(schemaUnicode,
+ startsWithExpressionUnicode,
+ BooleanType.BOOLEAN).eval(inputUnicode), expOutputVectorLiteralUnicode)
+
val startsWithExpressionExpression = startsWith(new Column("col1"), new Column("col2"))
val e = intercept[UnsupportedOperationException] {
new DefaultExpressionEvaluator(
@@ -634,7 +644,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa
new DefaultExpressionEvaluator(
schema, expr, BooleanType.BOOLEAN).eval(input)
}
- assert(e.getMessage.contains("'starts with' is only supported for string type expressions"))
+ assert(e.getMessage.contains("'STARTS_WITH' is expects STRING type inputs"))
}
checkUnsupportedTypes(BooleanType.BOOLEAN, BooleanType.BOOLEAN)