Add uleb128 byte-at-a-time decoder.

This commit is contained in:
Jeroen van Rijn
2022-04-16 02:07:57 +02:00
parent de72754d7a
commit 44316401c9
3 changed files with 34 additions and 22 deletions
+33 -21
View File
@@ -10,8 +10,6 @@
// the LEB128 format as used by DWARF debug info, Android .dex and other file formats.
package varint
import "core:fmt"
// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
// Instead we'll set limits on the values we'll encode/decode
// 18 * 7 bits = 126, which means that a possible 19th byte may at most be `0b0000_0011`.
@@ -25,32 +23,47 @@ Error :: enum {
// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used.
// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int, err: Error) {
more := true
decode_uleb128_buffer :: proc(buf: []u8) -> (val: u128, size: int, err: Error) {
if len(buf) == 0 {
return 0, 0, .Buffer_Too_Small
}
for v, i in buf {
size = i + 1
// 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011.
if size > LEB128_MAX_BYTES || size == LEB128_MAX_BYTES && v > 0b0000_0011 {
return 0, 0, .Value_Too_Large
}
val |= u128(v & 0x7f) << uint(i * 7)
if v < 128 {
more = false
break
for v in buf {
val, size, err = decode_uleb128_byte(v, size, val)
if err != .Buffer_Too_Small {
return
}
}
// If the buffer runs out before the number ends, return an error.
if more {
return 0, 0, .Buffer_Too_Small
if err == .Buffer_Too_Small {
val, size = 0, 0
}
return
}
// Decodes an unsigned LEB128 integer into value a byte at a time.
// Returns `.None` when decoded properly, `.Value_Too_Large` when they value
// exceeds the limits of a u128, and `.Buffer_Too_Small` when it's not yet fully decoded.
decode_uleb128_byte :: proc(input: u8, offset: int, accumulator: u128) -> (val: u128, size: int, err: Error) {
size = offset + 1
// 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011.
if size > LEB128_MAX_BYTES || size == LEB128_MAX_BYTES && input > 0b0000_0011 {
return 0, 0, .Value_Too_Large
}
val = accumulator | u128(input & 0x7f) << uint(offset * 7)
if input < 128 {
// We're done
return
}
// If the buffer runs out before the number ends, return an error.
return val, size, .Buffer_Too_Small
}
decode_uleb128 :: proc {decode_uleb128_buffer, decode_uleb128_byte}
// Decode a slice of bytes encoding a signed LEB128 integer into value and number of bytes used.
// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int, err: Error) {
@@ -89,7 +102,6 @@ encode_uleb128 :: proc(buf: []u8, val: u128) -> (size: int, err: Error) {
size += 1
if size > len(buf) {
fmt.println(val, buf[:size - 1])
return 0, .Buffer_Too_Small
}
@@ -51,7 +51,7 @@ test_leb128 :: proc(t: ^testing.T) {
msg := fmt.tprintf("Expected %02x to decode to %v consuming %v bytes, got %v and %v", vector.encoded, vector.value, vector.size, val, size)
expect(t, size == vector.size && val == vector.value, msg)
msg = fmt.tprintf("Expected decoder to return error %v, got %v", vector.error, err)
msg = fmt.tprintf("Expected decoder to return error %v, got %v for vector %v", vector.error, err, vector)
expect(t, err == vector.error, msg)
if err == .None { // Try to roundtrip
BIN
View File
Binary file not shown.