diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java index 0966ccc3a7..0da9479c55 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java @@ -104,7 +104,7 @@ *
  • Name: STARTS WITH * * * diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java index d8ec0e63a0..38fb59e714 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java @@ -307,12 +307,12 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) { "Invalid number of inputs to STARTS_WITH expression. " + "Example usage: STARTS_WITH(column, 'test')"); } - ExpressionTransformResult leftResult = visit(startsWith.getChildren().get(0)); - ExpressionTransformResult rightResult = visit(startsWith.getChildren().get(1)); + ExpressionTransformResult leftResult = visit(childAt(startsWith, 0)); + ExpressionTransformResult rightResult = visit(childAt(startsWith, 1)); if (!(StringType.STRING.equivalent(leftResult.outputType) && StringType.STRING.equivalent(rightResult.outputType))) { throw unsupportedExpressionException( - startsWith, "'starts with' is only supported for string type expressions"); + startsWith, "'STARTS_WITH' is expects STRING type inputs"); } // TODO: support non literal as the second input of starts with. if (!(rightResult.expression instanceof Literal)) { @@ -329,7 +329,7 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) { right.getValue() == null ? right : Literal.ofString( - DefaultExpressionUtils.escape( + LikeExpressionEvaluator.escape( String.valueOf(right.getValue()), /*escapeChar=*/ '%') .concat("%")))), BooleanType.BOOLEAN); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java index 4cbeacd7f6..b59db8689a 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java @@ -383,17 +383,4 @@ private ColumnVector getVector(int rowId) { } }; } - /** Escapes characters escapeChar in the input String */ - static String escape(String input, char escapeChar) { - final int len = input.length(); - final StringBuilder escapedString = new StringBuilder(len + len); - for (int i = 0; i < len; i++) { - char c = input.charAt(i); - if (c == escapeChar) { - escapedString.append('\\'); - } - escapedString.append(c); - } - return escapedString.toString(); - } } diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java index 1dc80d2b88..6584caf3df 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java @@ -183,4 +183,18 @@ private static String escapeLikeRegex(String pattern, char escape) { } return "(?s)" + javaPattern; } + + /** Escapes characters escapeChar in the input String */ + static String escape(String input, char escapeChar) { + final int len = input.length(); + final StringBuilder escapedString = new StringBuilder(len + len); + for (int i = 0; i < len; i++) { + char c = input.charAt(i); + if (c == escapeChar) { + escapedString.append('\\'); + } + escapedString.append(c); + } + return escapedString.toString(); + } } diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala index ed87f20480..bb164bc89c 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala @@ -615,6 +615,16 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa checkBooleanVectors(new DefaultExpressionEvaluator( schema, startsWithExpressionAlwaysFalse, BooleanType.BOOLEAN).eval(input), allFalseVector) + val colUnicode = stringVector(Seq[String]("中文", "中", "文")) + val schemaUnicode = new StructType().add("col", StringType.STRING) + val inputUnicode = new DefaultColumnarBatch(colUnicode.getSize, + schemaUnicode, Array(colUnicode)) + val startsWithExpressionUnicode = startsWith(new Column("col"), Literal.ofString("中")) + val expOutputVectorLiteralUnicode = booleanVector(Seq[BooleanJ](true, true, false)) + checkBooleanVectors(new DefaultExpressionEvaluator(schemaUnicode, + startsWithExpressionUnicode, + BooleanType.BOOLEAN).eval(inputUnicode), expOutputVectorLiteralUnicode) + val startsWithExpressionExpression = startsWith(new Column("col1"), new Column("col2")) val e = intercept[UnsupportedOperationException] { new DefaultExpressionEvaluator( @@ -634,7 +644,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa new DefaultExpressionEvaluator( schema, expr, BooleanType.BOOLEAN).eval(input) } - assert(e.getMessage.contains("'starts with' is only supported for string type expressions")) + assert(e.getMessage.contains("'STARTS_WITH' is expects STRING type inputs")) } checkUnsupportedTypes(BooleanType.BOOLEAN, BooleanType.BOOLEAN)