Skip to content

Commit

Permalink
More chars support (#513)
Browse files Browse the repository at this point in the history
* More chars support

Now that the compiler can support Char literals, more features can be
added including a chars iterator for strings.

* UTF-8 encoding/decoding

The implementation of `CharsIterator` now also supports multi-byte UTF-8
decoding. I also added a `Char#bytes` method to convert the u32
representation of the character codepoint into the required number of
bytes for proper UTF-8 encoding.

* Renaming 'CharIterator' to 'CharsIterator'
  • Loading branch information
kengorab authored Nov 29, 2024
1 parent 1601beb commit 5043b18
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 45 deletions.
20 changes: 11 additions & 9 deletions projects/compiler/example.abra
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
val x = 'a'
println(x)
val chars = "a£→😀".chars()

val t = x == 'a'
val f = x == 'A'
println(t, f)
/// Expect: a 97 [0b1100001]
/// Expect: £ 163 [0b11000010, 0b10100011]
/// Expect: → 65515 [0b11101111, 0b10111111, 0b10101011]
/// Expect: 😀 128512 [0b11110000, 0b10011111, 0b10011000, 0b10000000]
for ch in chars {
println(ch, ch.asInt(), ch.bytes().map(b => b.binary()))
}

val h = 'a'.hash()
println(h)

println(x.asInt())
val ch = Char.fromInt(0xD800)
/// Expect: �
println(ch)
41 changes: 11 additions & 30 deletions projects/compiler/src/compiler.abra
Original file line number Diff line number Diff line change
Expand Up @@ -3092,6 +3092,17 @@ export type Compiler {

Ok(argVal)
}
"int_as_char" => {
self._currentFn.block.addComment("begin int_as_char...")

val _arg = if arguments[0] |arg| arg else unreachable("'int_as_char' has 1 required argument")
val arg = if _arg |arg| arg else unreachable("'int_as_char' has 1 required argument")
val argVal = try self._compileExpression(arg)

self._currentFn.block.addComment("...int_as_char end")

Ok(argVal)
}
"int_as_float" => {
self._currentFn.block.addComment("begin int_as_float...")

Expand Down Expand Up @@ -3402,7 +3413,6 @@ export type Compiler {
if struct == self._project.preludeFloatStruct return self._getOrCompileFloatToStringMethod()
if struct == self._project.preludeStringStruct return self._getOrCompileStringToStringMethod()
if struct == self._project.preludeBoolStruct return self._getOrCompileBoolToStringMethod()
if struct == self._project.preludeCharStruct return self._getOrCompileCharToStringMethod()

val _fn = struct.instanceMethods.find(m => m.label.name == "toString")
val fn = if _fn |f| f else unreachable("every struct has a toString method defined")
Expand Down Expand Up @@ -3713,35 +3723,6 @@ export type Compiler {
Ok(fnVal)
}

func _getOrCompileCharToStringMethod(self): Result<QbeFunction, CompileError> {
val charTypeQbe = try self._getQbeTypeForTypeExpect(Type(kind: TypeKind.PrimitiveChar), "char qbe type should exist")
val stringTypeQbe = try self._getQbeTypeForTypeExpect(Type(kind: TypeKind.PrimitiveString), "string qbe type should exist")

val typeName = try self._structTypeName(self._project.preludeCharStruct)
val methodName = "$typeName..toString"
if self._builder.getFunction(methodName) |fn| return Ok(fn)

val fnVal = self._builder.buildFunction(name: methodName, returnType: Some(stringTypeQbe))
val prevFn = self._currentFn
self._currentFn = fnVal

fnVal.addComment("Char#toString(self): String")
val selfParam = fnVal.addParameter("self", stringTypeQbe)

val lowestByteVal = try self._currentFn.block.buildAnd(selfParam, Value.Int(0xff)) else |e| return qbeError(e)
val strData = try self._callMalloc(Value.Int(1))
self._currentFn.block.buildStoreL(lowestByteVal, strData)

val strVal = try self._constructString(strData, Value.Int(1))
fnVal.block.buildReturn(Some(strVal))

try fnVal.block.verify() else |e| return qbeError(e)

self._currentFn = prevFn

Ok(fnVal)
}

func _getOrCompileStringToStringMethod(self): Result<QbeFunction, CompileError> {
val stringTypeQbe = try self._getQbeTypeForTypeExpect(Type(kind: TypeKind.PrimitiveString), "string qbe type should exist")

Expand Down
2 changes: 1 addition & 1 deletion projects/compiler/src/lexer.abra
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ export type Lexer {
s
} else if value.isBetween(0x0080, 0x07FF, true) {
val s = String.withLength(2)
val b1 = 0b11000000 || (value && 0b11111000000)
val b1 = 0b11000000 || ((value && 0b11111000000) >> 6)
val b2 = 0b10000000 || (value && 0b00000111111)
s._buffer.offset(0).store(b1.asByte())
s._buffer.offset(1).store(b2.asByte())
Expand Down
14 changes: 14 additions & 0 deletions projects/compiler/test/compiler/chars.abra
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,17 @@ println(b != b, b != 'a')
val m = { 'a': 1, 'b': 2 }
/// Expect: { a: 1, b: 2 } Option.None Option.Some(value: 1)
println(m, m['c'], m['a'])

val chars = "a£→😀".chars()

/// Expect: a 97 [0b1100001]
/// Expect: £ 163 [0b11000010, 0b10100011]
/// Expect: → 65515 [0b11101111, 0b10111111, 0b10101011]
/// Expect: 😀 128512 [0b11110000, 0b10011111, 0b10011000, 0b10000000]
for ch in chars {
println(ch, ch.asInt(), ch.bytes().map(b => b.binary()))
}

val ch = Char.fromInt(0xD800)
/// Expect: �
println(ch)
2 changes: 1 addition & 1 deletion projects/compiler/test/compiler/process_callstack.abra
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ val arr = [1].map((i, _) => {
/// Expect: at baz (%TEST_DIR%/compiler/process_callstack.abra:10)
/// Expect: at bar (%TEST_DIR%/compiler/process_callstack.abra:5)
/// Expect: at foo (%TEST_DIR%/compiler/process_callstack.abra:19)
/// Expect: at <expression> (%STD_DIR%/prelude.abra:601)
/// Expect: at <expression> (%STD_DIR%/prelude.abra:688)
/// Expect: at Array.map (%TEST_DIR%/compiler/process_callstack.abra:18)

type OneTwoThreeIterator {
Expand Down
16 changes: 16 additions & 0 deletions projects/compiler/test/compiler/strings.abra
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@ println("hello")
// println("\"hello\"".replaceAll("\"", "\\\""))
})()

// String#chars
(() => {
val chars = "hello".chars()
/// Expect: h 0
/// Expect: e 1
/// Expect: l 2
/// Expect: l 3
/// Expect: o 4
for ch, idx in chars {
println(ch, idx)
}

/// Expect: done
println("done")
})()

// Indexing (also String#get(index: Int))
(() => {
val s1 = "abc"
Expand Down
7 changes: 5 additions & 2 deletions projects/std/src/_intrinsics.abra
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ export func functionNames(): Pointer<Byte>
@Intrinsic("u64_to_string")
export func u64ToString(i: Int): String

// @Intrinsic("char_as_int")
// export func charAsInt(c: Char): Int
@Intrinsic("int_as_char")
export func intAsChar(i: Int): Char

@Intrinsic("char_as_int")
export func charAsInt(c: Char): Int

@Intrinsic("int_as_float")
export func intAsFloat(i: Int): Float
Expand Down
91 changes: 89 additions & 2 deletions projects/std/src/prelude.abra
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ type Int {
Some(str)
}

func binary(self): String = "0b" + (self.asBase(2) ?: "")

func hex(self): String = "0x" + (self.asBase(16) ?: "")

func isEven(self): Bool = self % 2 == 0
Expand Down Expand Up @@ -152,8 +154,91 @@ type Bool {
}

type Char {
// No methods for Char (yet)
// func asInt(self): Int = intrinsics.charAsInt(self)
func fromInt(value: Int): Char = intrinsics.intAsChar(value)

func toString(self): String {
val byteVals = self.bytes()
if byteVals.isEmpty() return "�"

val str = String.withLength(byteVals.length)

for b, idx in byteVals {
str._buffer.offset(idx).store(b.asByte())
}

str
}

func asInt(self): Int = intrinsics.charAsInt(self)

func bytes(self): Int[] {
val value = self.asInt()

if !(value.isBetween(0, 0xD7FF, true) || value.isBetween(0xE000, 0x10FFFF, true)) return []

if value.isBetween(0, 0x007F, true) {
[value]
} else if value.isBetween(0x0080, 0x07FF, true) {
val b1 = 0b11000000 || ((value && 0b11111000000)) >> 6
val b2 = 0b10000000 || (value && 0b00000111111)
[b1, b2]
} else if value.isBetween(0x0800, 0xFFFF, true) {
val b1 = 0b11100000 || ((value && 0b1111000000000000) >> 12)
val b2 = 0b10000000 || ((value && 0b0000111111000000) >> 6)
val b3 = 0b10000000 || (value && 0b0000000000111111)
[b1, b2, b3]
} else if value.isBetween(0x10000, 0x10FFFF, true) {
val b1 = 0b11110000 || ((value && 0b111000000000000000000)) >> 18
val b2 = 0b10000000 || ((value && 0b000111111000000000000)) >> 12
val b3 = 0b10000000 || ((value && 0b000000000111111000000)) >> 6
val b4 = 0b10000000 || (value && 0b000000000000000111111)
[b1, b2, b3, b4]
} else {
[]
}
}
}

type CharsIterator {
_bytes: Pointer<Byte>
_numBytes: Int
_i: Int = 0

func next(self): Char? {
if self._i >= self._numBytes return None

val b1 = self._bytes.offset(self._i).load().asInt()
self._i += 1
if b1 < 128 return Some(Char.fromInt(b1))

// Begin multi-byte cases, obtain first continuation byte
if self._i >= self._numBytes unreachable("invalid utf-8 encoding: continuation byte expected given first byte ${b1.hex()}")
val b2 = self._bytes.offset(self._i).load().asInt()
self._i += 1

// 3-byte case
if b1 >= 0b11100000 {
if self._i >= self._numBytes unreachable("invalid utf-8 encoding: continuation byte expected given first byte ${b1.hex()}")
val b3 = self._bytes.offset(self._i).load().asInt()
self._i += 1

// 4-byte case
if b1 >= 0b11110000 {
if self._i >= self._numBytes unreachable("invalid utf-8 encoding: continuation byte expected given first byte ${b1.hex()}")
val b4 = self._bytes.offset(self._i).load().asInt()
self._i += 1

val ch = ((b1 && 0b00000111) << 18) || ((b2 && 0b00111111) << 12) || ((b3 && 0b00111111) << 6) || (b4 && 0b00111111)
return Some(Char.fromInt(ch))
}

val ch = ((b1 && 0b00001111) << 12) || ((b2 && 0b00111111) << 6) || (b3 && 0b00111111)
return Some(Char.fromInt(ch))
}

val ch = ((b1 && 0b00011111) << 6) || (b2 && 0b00111111)
Some(Char.fromInt(ch))
}
}

type String {
Expand Down Expand Up @@ -419,6 +504,8 @@ type String {
newString
}

func chars(self): CharsIterator = CharsIterator(_bytes: self._buffer, _numBytes: self.length)

func get(self, index: Int): String {
var idx = if index < 0 index + self.length else index
if idx >= self.length || idx < 0 { return "" }
Expand Down

0 comments on commit 5043b18

Please sign in to comment.