Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -354,4 +354,19 @@ public static long crc32(byte[] bytes) {
checksum.update(bytes, 0, bytes.length);
return checksum.getValue();
}

/**
* Returns the numeric value of the first character of the input string, or 0 if it is empty.
* Shared by the Ascii expression's eval and codegen paths so the generated Java is a single
* call rather than an inline substring/if-else block.
*/
public static int ascii(UTF8String str) {
// only pick the first character to reduce the `toString` cost
UTF8String firstCharStr = str.substring(0, 1);
if (firstCharStr.numChars() > 0) {
return firstCharStr.toString().codePointAt(0);
} else {
return 0;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2774,27 +2774,11 @@ case class Ascii(child: Expression)
override def inputTypes: Seq[AbstractDataType] =
Seq(StringTypeWithCollation(supportsTrimCollation = true))

protected override def nullSafeEval(string: Any): Any = {
// only pick the first character to reduce the `toString` cost
val firstCharStr = string.asInstanceOf[UTF8String].substring(0, 1)
if (firstCharStr.numChars > 0) {
firstCharStr.toString.codePointAt(0)
} else {
0
}
}
protected override def nullSafeEval(string: Any): Any =
ExpressionImplUtils.ascii(string.asInstanceOf[UTF8String])

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (child) => {
val firstCharStr = ctx.freshName("firstCharStr")
s"""
UTF8String $firstCharStr = $child.substring(0, 1);
if ($firstCharStr.numChars() > 0) {
${ev.value} = $firstCharStr.toString().codePointAt(0);
} else {
${ev.value} = 0;
}
"""})
defineCodeGen(ctx, ev, c => s"${classOf[ExpressionImplUtils].getName}.ascii($c)")
}

override protected def withNewChildInternal(newChild: Expression): Ascii = copy(child = newChild)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,11 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val a = $"a".string.at(0)
checkEvaluation(Ascii(Literal("efg")), 101, create_row("abdef"))
checkEvaluation(Ascii(a), 97, create_row("abdef"))
// U+1F600 is a supplementary-plane code point; ascii must return the full code point
// (128512 via codePointAt), not the leading UTF-16 surrogate (55357 via charAt).
// scalastyle:off
checkEvaluation(Ascii(Literal("😀")), 128512, create_row("😀"))
// scalastyle:on
checkEvaluation(Ascii(a), 0, create_row(""))
checkEvaluation(Ascii(a), null, create_row(null))
checkEvaluation(Ascii(Literal.create(null, StringType)), null, create_row("abdef"))
Expand Down