Merge remote-tracking branch 'offical/master'

This commit is contained in:
2024-09-09 13:15:00 -04:00
492 changed files with 44673 additions and 6618 deletions
+67 -26
View File
@@ -18,7 +18,7 @@ jobs:
usesh: true
copyback: false
prepare: |
PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/10.0_2024Q2/All" /usr/sbin/pkg_add pkgin
PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/$(uname -r | cut -d_ -f1)_${PKGSRC_BRANCH}/All" /usr/sbin/pkg_add pkgin
pkgin -y in gmake git bash python311 llvm clang
ln -s /usr/pkg/bin/python3.11 /usr/bin/python3
run: |
@@ -32,10 +32,9 @@ jobs:
gmake -C vendor/miniaudio/src
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_amd64
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
(cd tests/issues; ./run.sh)
build_freebsd:
name: FreeBSD Build, Check, and Test
@@ -61,10 +60,9 @@ jobs:
gmake -C vendor/cgltf/src
gmake -C vendor/miniaudio/src
./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
(cd tests/issues; ./run.sh)
ci:
strategy:
@@ -118,15 +116,13 @@ jobs:
- name: Odin check examples/all
run: ./odin check examples/all -strict-style
- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Optimized Core library tests
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Vendor library tests
run: ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
run: ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Internals tests
run: ./odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
- name: Core library benchmarks
run: ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: GitHub Issue tests
run: |
cd tests/issues
@@ -180,38 +176,33 @@ jobs:
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin run examples/demo -debug
odin run examples/demo -debug -vet -strict-style -disallow-do
- name: Odin check examples/all
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check examples/all -strict-style
odin check examples/all -vet -strict-style -disallow-do
- name: Core library tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Optimized core library tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
- name: Core library benchmarks
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Vendor library tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
copy vendor\lua\5.4\windows\*.dll .
odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Odin internals tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Odin documentation tests
shell: cmd
run: |
@@ -229,3 +220,53 @@ jobs:
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check examples/all -strict-style -target:windows_i386
build_linux_riscv64:
runs-on: ubuntu-latest
name: Linux riscv64 (emulated) Build, Check and Test
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Download LLVM (Linux)
run: |
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 18
echo "/usr/lib/llvm-18/bin" >> $GITHUB_PATH
- name: Build Odin
run: ./build_odin.sh release
- name: Odin version
run: ./odin version
- name: Odin report
run: ./odin report
- name: Compile needed Vendor
run: |
make -C vendor/stb/src
make -C vendor/cgltf/src
make -C vendor/miniaudio/src
- name: Odin check
run: ./odin check examples/all -target:linux_riscv64 -vet -strict-style -disallow-do
- name: Install riscv64 toolchain and qemu
run: sudo apt-get install -y qemu-user qemu-user-static gcc-12-riscv64-linux-gnu libc6-riscv64-cross
- name: Odin run
run: ./odin run examples/demo -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
- name: Odin run -debug
run: ./odin run examples/demo -debug -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
- name: Optimized Core library tests
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
- name: Internals tests
run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
-1
View File
@@ -61,7 +61,6 @@ jobs:
mkdir dist
cp odin dist
cp LICENSE dist
cp libLLVM* dist
cp -r shared dist
cp -r base dist
cp -r core dist
+1 -3
View File
@@ -17,13 +17,12 @@
[Rr]eleases/
x64/
x86/
!/core/simd/x86
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
![Cc]ore/[Ll]og/
tests/documentation/verify/
tests/documentation/all.odin-doc
# Visual Studio 2015 cache/options directory
.vs/
# Visual Studio Code options directory
@@ -31,7 +30,6 @@ tests/documentation/all.odin-doc
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
demo
benchmark
# MSTest test Results
[Tt]est[Rr]esult*/
+7 -1
View File
@@ -1,4 +1,4 @@
all: debug
all: default
demo:
./odin run examples/demo/demo.odin -file
@@ -6,12 +6,18 @@ demo:
report:
./odin report
default:
PROGRAM=make ./build_odin.sh # debug
debug:
./build_odin.sh debug
release:
./build_odin.sh release
release-native:
./build_odin.sh release-native
release_native:
./build_odin.sh release-native
+2 -2
View File
@@ -76,9 +76,9 @@ Answers to common questions about Odin.
Documentation for all the official packages part of the [core](https://pkg.odin-lang.org/core/) and [vendor](https://pkg.odin-lang.org/vendor/) library collections.
#### [The Odin Wiki](https://github.com/odin-lang/Odin/wiki)
#### [Odin Documentation](https://odin-lang.org/docs/)
A wiki maintained by the Odin community.
Documentation for the Odin language itself.
#### [Odin Discord](https://discord.gg/sVBPHEv)
+35 -16
View File
@@ -42,8 +42,8 @@ overflow_add :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #option
overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
add_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
sub_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
saturating_add :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
saturating_sub :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
sqrt :: proc(x: $T) -> T where type_is_float(T) || (type_is_simd_vector(T) && type_is_float(type_elem_type(T))) ---
@@ -219,14 +219,21 @@ type_map_cell_info :: proc($T: typeid) -> ^runtime.Map_Cell_Info ---
type_convert_variants_to_pointers :: proc($T: typeid) -> typeid where type_is_union(T) ---
type_merge :: proc($U, $V: typeid) -> typeid where type_is_union(U), type_is_union(V) ---
type_has_shared_fields :: proc($U, $V: typeid) -> bool typeid where type_is_struct(U), type_is_struct(V) ---
constant_utf16_cstring :: proc($literal: string) -> [^]u16 ---
constant_log2 :: proc($v: $T) -> T where type_is_integer(T) ---
// SIMD related
simd_add :: proc(a, b: #simd[N]T) -> #simd[N]T ---
simd_sub :: proc(a, b: #simd[N]T) -> #simd[N]T ---
simd_mul :: proc(a, b: #simd[N]T) -> #simd[N]T ---
simd_div :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_float(T) ---
simd_saturating_add :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) ---
simd_saturating_sub :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) ---
// Keeps Odin's Behaviour
// (x << y) if y <= mask else 0
simd_shl :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
@@ -237,9 +244,6 @@ simd_shr :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
simd_shl_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
simd_shr_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
simd_add_sat :: proc(a, b: #simd[N]T) -> #simd[N]T ---
simd_sub_sat :: proc(a, b: #simd[N]T) -> #simd[N]T ---
simd_bit_and :: proc(a, b: #simd[N]T) -> #simd[N]T ---
simd_bit_or :: proc(a, b: #simd[N]T) -> #simd[N]T ---
simd_bit_xor :: proc(a, b: #simd[N]T) -> #simd[N]T ---
@@ -268,13 +272,28 @@ simd_lanes_ge :: proc(a, b: #simd[N]T) -> #simd[N]Integer ---
simd_extract :: proc(a: #simd[N]T, idx: uint) -> T ---
simd_replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T ---
simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T ---
simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T ---
simd_reduce_min :: proc(a: #simd[N]T) -> T ---
simd_reduce_max :: proc(a: #simd[N]T) -> T ---
simd_reduce_and :: proc(a: #simd[N]T) -> T ---
simd_reduce_or :: proc(a: #simd[N]T) -> T ---
simd_reduce_xor :: proc(a: #simd[N]T) -> T ---
simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
simd_reduce_min :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
simd_reduce_max :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
simd_reduce_and :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
simd_reduce_or :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
simd_reduce_xor :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
simd_reduce_any :: proc(a: #simd[N]T) -> T where type_is_boolean(T) ---
simd_reduce_all :: proc(a: #simd[N]T) -> T where type_is_boolean(T) ---
simd_gather :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
simd_masked_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
simd_masked_expand_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
@@ -288,11 +307,11 @@ simd_nearest :: proc(a: #simd[N]any_float) -> #simd[N]any_float ---
simd_to_bits :: proc(v: #simd[N]T) -> #simd[N]Integer where size_of(T) == size_of(Integer), type_is_unsigned(Integer) ---
// equivalent a swizzle with descending indices, e.g. reserve(a, 3, 2, 1, 0)
simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
// equivalent to a swizzle with descending indices, e.g. reserve(a, 3, 2, 1, 0)
simd_lanes_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
simd_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
simd_lanes_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
simd_lanes_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
// Checks if the current target supports the given target features.
//
+33
View File
@@ -546,10 +546,23 @@ Odin_OS_Type :: type_of(ODIN_OS)
arm64,
wasm32,
wasm64p32,
riscv64,
}
*/
Odin_Arch_Type :: type_of(ODIN_ARCH)
Odin_Arch_Types :: bit_set[Odin_Arch_Type]
ALL_ODIN_ARCH_TYPES :: Odin_Arch_Types{
.amd64,
.i386,
.arm32,
.arm64,
.wasm32,
.wasm64p32,
.riscv64,
}
/*
// Defined internally by the compiler
Odin_Build_Mode_Type :: enum int {
@@ -573,6 +586,22 @@ Odin_Build_Mode_Type :: type_of(ODIN_BUILD_MODE)
*/
Odin_Endian_Type :: type_of(ODIN_ENDIAN)
Odin_OS_Types :: bit_set[Odin_OS_Type]
ALL_ODIN_OS_TYPES :: Odin_OS_Types{
.Windows,
.Darwin,
.Linux,
.Essence,
.FreeBSD,
.OpenBSD,
.NetBSD,
.Haiku,
.WASI,
.JS,
.Orca,
.Freestanding,
}
/*
// Defined internally by the compiler
@@ -750,6 +779,10 @@ __init_context :: proc "contextless" (c: ^Context) {
}
default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code_Location) -> ! {
default_assertion_contextless_failure_proc(prefix, message, loc)
}
default_assertion_contextless_failure_proc :: proc "contextless" (prefix, message: string, loc: Source_Code_Location) -> ! {
when ODIN_OS == .Freestanding {
// Do nothing
} else {
+39 -12
View File
@@ -68,7 +68,7 @@ copy :: proc{copy_slice, copy_from_string}
// Note: If you want the elements to remain in their order, use `ordered_remove`.
// Note: If the index is out of bounds, this procedure will panic.
@builtin
unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
unordered_remove :: proc(array: ^$D/[dynamic]$T, #any_int index: int, loc := #caller_location) #no_bounds_check {
bounds_check_error_loc(loc, index, len(array))
n := len(array)-1
if index != n {
@@ -82,7 +82,7 @@ unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_loca
// Note: If the elements do not have to remain in their order, prefer `unordered_remove`.
// Note: If the index is out of bounds, this procedure will panic.
@builtin
ordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
ordered_remove :: proc(array: ^$D/[dynamic]$T, #any_int index: int, loc := #caller_location) #no_bounds_check {
bounds_check_error_loc(loc, index, len(array))
if index+1 < len(array) {
copy(array[index:], array[index+1:])
@@ -95,7 +95,7 @@ ordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_locati
// Note: This is an O(N) operation.
// Note: If the range is out of bounds, this procedure will panic.
@builtin
remove_range :: proc(array: ^$D/[dynamic]$T, lo, hi: int, loc := #caller_location) #no_bounds_check {
remove_range :: proc(array: ^$D/[dynamic]$T, #any_int lo, hi: int, loc := #caller_location) #no_bounds_check {
slice_expr_error_lo_hi_loc(loc, lo, hi, len(array))
n := max(hi-lo, 0)
if n > 0 {
@@ -350,7 +350,7 @@ _make_dynamic_array_len_cap :: proc(array: ^Raw_Dynamic_Array, size_of_elem, ali
return
}
// `make_map` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
// `make_map` allocates and initializes a map. Like `new`, the first argument is a type, not a value.
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
//
// Note: Prefer using the procedure group `make`.
@@ -362,7 +362,7 @@ make_map :: proc($T: typeid/map[$K]$E, #any_int capacity: int = 1<<MAP_MIN_LOG2_
err = reserve_map(&m, capacity, loc)
return
}
// `make_multi_pointer` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
// `make_multi_pointer` allocates and initializes a multi-pointer. Like `new`, the first argument is a type, not a value.
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
//
// This is "similar" to doing `raw_data(make([]E, len, allocator))`.
@@ -602,7 +602,7 @@ append_nothing :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (n: i
@builtin
inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
inject_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
if array == nil {
return
}
@@ -620,7 +620,7 @@ inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast arg: E,
}
@builtin
inject_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
inject_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
if array == nil {
return
}
@@ -643,7 +643,7 @@ inject_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args:
}
@builtin
inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, #any_int index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
if array == nil {
return
}
@@ -668,7 +668,7 @@ inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string
@builtin
assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
assign_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
if index < len(array) {
array[index] = arg
ok = true
@@ -682,7 +682,7 @@ assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #calle
@builtin
assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
assign_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
new_size := index + len(args)
if len(args) == 0 {
ok = true
@@ -699,7 +699,7 @@ assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args:
@builtin
assign_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
assign_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, #any_int index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
new_size := index + len(arg)
if len(arg) == 0 {
ok = true
@@ -838,7 +838,7 @@ non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: i
Note: Prefer the procedure group `shrink`
*/
shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
return _shrink_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), new_cap, loc)
}
@@ -948,3 +948,30 @@ unimplemented :: proc(message := "", loc := #caller_location) -> ! {
}
p("not yet implemented", message, loc)
}
@builtin
@(disabled=ODIN_DISABLE_ASSERT)
assert_contextless :: proc "contextless" (condition: bool, message := "", loc := #caller_location) {
if !condition {
// NOTE(bill): This is wrapped in a procedure call
// to improve performance to make the CPU not
// execute speculatively, making it about an order of
// magnitude faster
@(cold)
internal :: proc "contextless" (message: string, loc: Source_Code_Location) {
default_assertion_contextless_failure_proc("runtime assertion", message, loc)
}
internal(message, loc)
}
}
@builtin
panic_contextless :: proc "contextless" (message: string, loc := #caller_location) -> ! {
default_assertion_contextless_failure_proc("panic", message, loc)
}
@builtin
unimplemented_contextless :: proc "contextless" (message := "", loc := #caller_location) -> ! {
default_assertion_contextless_failure_proc("not yet implemented", message, loc)
}
+8 -8
View File
@@ -76,7 +76,7 @@ raw_soa_footer :: proc{
@(builtin, require_results)
make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
make_soa_aligned :: proc($T: typeid/#soa[]$E, #any_int length, alignment: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
if length <= 0 {
return
}
@@ -135,7 +135,7 @@ make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, alloc
}
@(builtin, require_results)
make_soa_slice :: proc($T: typeid/#soa[]$E, length: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
make_soa_slice :: proc($T: typeid/#soa[]$E, #any_int length: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
return make_soa_aligned(T, length, align_of(E), allocator, loc)
}
@@ -172,7 +172,7 @@ make_soa :: proc{
@builtin
resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
resize_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
if array == nil {
return nil
}
@@ -183,7 +183,7 @@ resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_locat
}
@builtin
non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
if array == nil {
return nil
}
@@ -194,12 +194,12 @@ non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #cal
}
@builtin
reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_soa(array, capacity, true, loc)
}
@builtin
non_zero_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
non_zero_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_soa(array, capacity, false, loc)
}
@@ -484,7 +484,7 @@ into_dynamic_soa :: proc(array: $T/#soa[]$E) -> #soa[dynamic]E {
// Note: If you the elements to remain in their order, use `ordered_remove_soa`.
// Note: If the index is out of bounds, this procedure will panic.
@builtin
unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #caller_location) #no_bounds_check {
unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int index: int, loc := #caller_location) #no_bounds_check {
bounds_check_error_loc(loc, index, len(array))
if index+1 < len(array) {
ti := type_info_of(typeid_of(T))
@@ -512,7 +512,7 @@ unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #cal
// Note: If you the elements do not have to remain in their order, prefer `unordered_remove_soa`.
// Note: If the index is out of bounds, this procedure will panic.
@builtin
ordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #caller_location) #no_bounds_check {
ordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int index: int, loc := #caller_location) #no_bounds_check {
bounds_check_error_loc(loc, index, len(array))
if index+1 < len(array) {
ti := type_info_of(typeid_of(T))
+2 -2
View File
@@ -1,8 +1,8 @@
package runtime
nil_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
size, alignment: int,
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
size, alignment: int,
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
switch mode {
case .Alloc, .Alloc_Non_Zeroed:
return nil, .Out_Of_Memory
@@ -129,7 +129,7 @@ arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_locatio
return
}
// `arena_init` will initialize the arena with a usuable block.
// `arena_init` will initialize the arena with a usable block.
// This procedure is not necessary to use the Arena as the default zero as `arena_alloc` will set things up if necessary
@(require_results)
arena_init :: proc(arena: ^Arena, size: uint, backing_allocator: Allocator, loc := #caller_location) -> Allocator_Error {
+3
View File
@@ -34,6 +34,9 @@ when ODIN_BUILD_MODE == .Dynamic {
} else when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 {
@require foreign import entry "entry_unix_no_crt_darwin_arm64.asm"
SYS_exit :: 1
} else when ODIN_ARCH == .riscv64 {
@require foreign import entry "entry_unix_no_crt_riscv64.asm"
SYS_exit :: 93
}
@(link_name="_start_odin", linkage="strong", require)
_start_odin :: proc "c" (argc: i32, argv: [^]cstring) -> ! {
@@ -0,0 +1,10 @@
.text
.globl _start
_start:
ld a0, 0(sp)
addi a1, sp, 8
addi sp, sp, ~15
call _start_odin
ebreak
+15 -8
View File
@@ -19,12 +19,15 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
// the pointer we return to the user.
//
aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr = nil, zero_memory := true) -> ([]byte, Allocator_Error) {
aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr, old_size: int, zero_memory := true) -> ([]byte, Allocator_Error) {
a := max(alignment, align_of(rawptr))
space := size + a - 1
allocated_mem: rawptr
if old_ptr != nil {
force_copy := old_ptr != nil && a > align_of(rawptr)
if !force_copy && old_ptr != nil {
original_old_ptr := ([^]rawptr)(old_ptr)[-1]
allocated_mem = heap_resize(original_old_ptr, space+size_of(rawptr))
} else {
@@ -36,12 +39,19 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
aligned_ptr := (ptr - 1 + uintptr(a)) & -uintptr(a)
diff := int(aligned_ptr - ptr)
if (size + diff) > space || allocated_mem == nil {
aligned_free(old_ptr)
aligned_free(allocated_mem)
return nil, .Out_Of_Memory
}
aligned_mem = rawptr(aligned_ptr)
([^]rawptr)(aligned_mem)[-1] = allocated_mem
if force_copy {
mem_copy_non_overlapping(aligned_mem, old_ptr, old_size)
aligned_free(old_ptr)
}
return byte_slice(aligned_mem, size), nil
}
@@ -53,10 +63,10 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
aligned_resize :: proc(p: rawptr, old_size: int, new_size: int, new_alignment: int, zero_memory := true) -> (new_memory: []byte, err: Allocator_Error) {
if p == nil {
return nil, nil
return aligned_alloc(new_size, new_alignment, nil, old_size, zero_memory)
}
new_memory = aligned_alloc(new_size, new_alignment, p, zero_memory) or_return
new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return
// NOTE: heap_resize does not zero the new memory, so we do it
if zero_memory && new_size > old_size {
@@ -68,7 +78,7 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
switch mode {
case .Alloc, .Alloc_Non_Zeroed:
return aligned_alloc(size, alignment, nil, mode == .Alloc)
return aligned_alloc(size, alignment, nil, 0, mode == .Alloc)
case .Free:
aligned_free(old_memory)
@@ -77,9 +87,6 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
return nil, .Mode_Not_Implemented
case .Resize, .Resize_Non_Zeroed:
if old_memory == nil {
return aligned_alloc(size, alignment, nil, mode == .Resize)
}
return aligned_resize(old_memory, old_size, size, alignment, mode == .Resize)
case .Query_Features:
+24 -15
View File
@@ -8,10 +8,9 @@ IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
@(private)
RUNTIME_LINKAGE :: "strong" when (
(ODIN_USE_SEPARATE_MODULES ||
ODIN_USE_SEPARATE_MODULES ||
ODIN_BUILD_MODE == .Dynamic ||
!ODIN_NO_CRT) &&
!IS_WASM) else "internal"
!ODIN_NO_CRT) else "internal"
RUNTIME_REQUIRE :: false // !ODIN_TILDE
@(private)
@@ -879,9 +878,6 @@ extendhfsf2 :: proc "c" (value: __float16) -> f32 {
@(link_name="__floattidf", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
floattidf :: proc "c" (a: i128) -> f64 {
when IS_WASM {
return 0
} else {
DBL_MANT_DIG :: 53
if a == 0 {
return 0.0
@@ -921,14 +917,10 @@ when IS_WASM {
fb[0] = u32(a) // mantissa-low
return transmute(f64)fb
}
}
@(link_name="__floattidf_unsigned", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
floattidf_unsigned :: proc "c" (a: u128) -> f64 {
when IS_WASM {
return 0
} else {
DBL_MANT_DIG :: 53
if a == 0 {
return 0.0
@@ -966,7 +958,6 @@ when IS_WASM {
fb[0] = u32(a) // mantissa-low
return transmute(f64)fb
}
}
@@ -1023,14 +1014,32 @@ modti3 :: proc "c" (a, b: i128) -> i128 {
@(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
u := udivmod128(u128(a), u128(b), (^u128)(rem))
return i128(u)
s_a := a >> (128 - 1) // -1 if negative or 0
s_b := b >> (128 - 1)
an := (a ~ s_a) - s_a // absolute
bn := (b ~ s_b) - s_b
s_b ~= s_a // quotient sign
u_s_b := u128(s_b)
u_s_a := u128(s_a)
r: u128 = ---
u := i128((udivmodti4(u128(an), u128(bn), &r) ~ u_s_b) - u_s_b) // negate if negative
rem^ = i128((r ~ u_s_a) - u_s_a)
return u
}
@(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
divti3 :: proc "c" (a, b: i128) -> i128 {
u := udivmodti4(u128(a), u128(b), nil)
return i128(u)
s_a := a >> (128 - 1) // -1 if negative or 0
s_b := b >> (128 - 1)
an := (a ~ s_a) - s_a // absolute
bn := (b ~ s_b) - s_b
s_a ~= s_b // quotient sign
u_s_a := u128(s_a)
return i128((udivmodti4(u128(an), u128(bn), nil) ~ u_s_a) - u_s_a) // negate if negative
}
+2
View File
@@ -12,6 +12,8 @@ _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
SYS_write :: uintptr(4)
} else when ODIN_ARCH == .arm32 {
SYS_write :: uintptr(4)
} else when ODIN_ARCH == .riscv64 {
SYS_write :: uintptr(64)
}
stderr :: 2
+21
View File
@@ -52,3 +52,24 @@ udivti3 :: proc "c" (la, ha, lb, hb: u64) -> u128 {
b.lo, b.hi = lb, hb
return udivmodti4(a.all, b.all, nil)
}
@(link_name="__lshrti3", linkage="strong")
__lshrti3 :: proc "c" (la, ha: u64, b: u32) -> i128 {
bits :: size_of(u32)*8
input, result: ti_int
input.lo = la
input.hi = ha
if b & bits != 0 {
result.hi = 0
result.lo = input.hi >> (b - bits)
} else if b == 0 {
return input.all
} else {
result.hi = input.hi >> b
result.lo = (input.hi << (bits - b)) | (input.lo >> b)
}
return result.all
}
+34
View File
@@ -0,0 +1,34 @@
package runtime
Thread_Local_Cleaner :: #type proc "odin" ()
@(private="file")
thread_local_cleaners: [8]Thread_Local_Cleaner
// Add a procedure that will be run at the end of a thread for the purpose of
// deallocating state marked as `thread_local`.
//
// Intended to be called in an `init` procedure of a package with
// dynamically-allocated memory that is stored in `thread_local` variables.
add_thread_local_cleaner :: proc "contextless" (p: Thread_Local_Cleaner) {
for &v in thread_local_cleaners {
if v == nil {
v = p
return
}
}
panic_contextless("There are no more thread-local cleaner slots available.")
}
// Run all of the thread-local cleaner procedures.
//
// Intended to be called by the internals of a threading API at the end of a
// thread's lifetime.
run_thread_local_cleaners :: proc "odin" () {
for p in thread_local_cleaners {
if p == nil {
break
}
p()
}
}
+3
View File
@@ -116,6 +116,9 @@ if %errorlevel% neq 0 goto end_of_build
rem If the demo doesn't run for you and your CPU is more than a decade old, try -microarch:native
if %release_mode% EQU 0 odin run examples/demo -vet -strict-style -- Hellope World
rem Many non-compiler devs seem to run debug build but don't realize.
if %release_mode% EQU 0 echo: & echo Debug compiler built. Note: run "build.bat release" if you want a faster, release mode compiler.
del *.obj > NUL 2> NUL
:end_of_build
+15 -2
View File
@@ -23,6 +23,14 @@ error() {
exit 1
}
# Brew advises people not to add llvm to their $PATH, so try and use brew to find it.
if [ -z "$LLVM_CONFIG" ] && [ -n "$(command -v brew)" ]; then
if [ -n "$(command -v $(brew --prefix llvm@18)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@18)/bin/llvm-config"
elif [ -n "$(command -v $(brew --prefix llvm@17)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@17)/bin/llvm-config"
elif [ -n "$(command -v $(brew --prefix llvm@14)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@14)/bin/llvm-config"
fi
fi
if [ -z "$LLVM_CONFIG" ]; then
# darwin, linux, openbsd
if [ -n "$(command -v llvm-config-18)" ]; then LLVM_CONFIG="llvm-config-18"
@@ -95,7 +103,7 @@ Linux)
LDFLAGS="$LDFLAGS -ldl $($LLVM_CONFIG --libs core native --system-libs --libfiles)"
# Copy libLLVM*.so into current directory for linking
# NOTE: This is needed by the Linux release pipeline!
cp $(readlink -f $($LLVM_CONFIG --libfiles)) ./
# cp $(readlink -f $($LLVM_CONFIG --libfiles)) ./
LDFLAGS="$LDFLAGS -Wl,-rpath=\$ORIGIN"
;;
OpenBSD)
@@ -144,12 +152,17 @@ build_odin() {
}
run_demo() {
./odin run examples/demo -vet -strict-style -- Hellope World
if [ $# -eq 0 ] || [ "$1" = "debug" ]; then
./odin run examples/demo -vet -strict-style -- Hellope World
fi
}
if [ $# -eq 0 ]; then
build_odin debug
run_demo
: ${PROGRAM:=$0}
printf "\nDebug compiler built. Note: run \"$PROGRAM release\" or \"$PROGRAM release-native\" if you want a faster, release mode compiler.\n"
elif [ $# -eq 1 ]; then
case $1 in
report)
+33 -3
View File
@@ -144,6 +144,9 @@ buffer_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) {
}
buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int, loc := #caller_location) -> (n: int, err: io.Error) {
if len(p) == 0 {
return 0, nil
}
b.last_read = .Invalid
if offset < 0 {
err = .Invalid_Offset
@@ -246,10 +249,13 @@ buffer_read_ptr :: proc(b: ^Buffer, ptr: rawptr, size: int) -> (n: int, err: io.
}
buffer_read_at :: proc(b: ^Buffer, p: []byte, offset: int) -> (n: int, err: io.Error) {
if len(p) == 0 {
return 0, nil
}
b.last_read = .Invalid
if uint(offset) >= len(b.buf) {
err = .Invalid_Offset
err = .EOF
return
}
n = copy(p, b.buf[offset:])
@@ -310,6 +316,27 @@ buffer_unread_rune :: proc(b: ^Buffer) -> io.Error {
return nil
}
buffer_seek :: proc(b: ^Buffer, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
abs: i64
switch whence {
case .Start:
abs = offset
case .Current:
abs = i64(b.off) + offset
case .End:
abs = i64(len(b.buf)) + offset
case:
return 0, .Invalid_Whence
}
abs_int := int(abs)
if abs_int < 0 {
return 0, .Invalid_Offset
}
b.last_read = .Invalid
b.off = abs_int
return abs, nil
}
buffer_read_bytes :: proc(b: ^Buffer, delim: byte) -> (line: []byte, err: io.Error) {
i := index_byte(b.buf[b.off:], delim)
@@ -395,14 +422,17 @@ _buffer_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, offse
return io._i64_err(buffer_write(b, p))
case .Write_At:
return io._i64_err(buffer_write_at(b, p, int(offset)))
case .Seek:
n, err = buffer_seek(b, offset, whence)
return
case .Size:
n = i64(buffer_capacity(b))
n = i64(buffer_length(b))
return
case .Destroy:
buffer_destroy(b)
return
case .Query:
return io.query_utility({.Read, .Read_At, .Write, .Write_At, .Size, .Destroy})
return io.query_utility({.Read, .Read_At, .Write, .Write_At, .Seek, .Size, .Destroy, .Query})
}
return 0, .Empty
}
+284 -6
View File
@@ -1,9 +1,38 @@
package bytes
import "base:intrinsics"
import "core:mem"
import "core:simd"
import "core:unicode"
import "core:unicode/utf8"
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
@(private)
SCANNER_INDICES_256 : simd.u8x32 : {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
}
@(private)
SCANNER_SENTINEL_MAX_256: simd.u8x32 : u8(0x00)
@(private)
SCANNER_SENTINEL_MIN_256: simd.u8x32 : u8(0xff)
@(private)
SIMD_REG_SIZE_256 :: 32
}
@(private)
SCANNER_INDICES_128 : simd.u8x16 : {
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
}
@(private)
SCANNER_SENTINEL_MAX_128: simd.u8x16 : u8(0x00)
@(private)
SCANNER_SENTINEL_MIN_128: simd.u8x16 : u8(0xff)
@(private)
SIMD_REG_SIZE_128 :: 16
clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
c := make([]byte, len(s), allocator, loc)
copy(c, s)
@@ -293,28 +322,277 @@ split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
return _split_iterator(s, sep, len(sep))
}
/*
Scan a slice of bytes for a specific byte.
index_byte :: proc(s: []byte, c: byte) -> int {
for i := 0; i < len(s); i += 1 {
This procedure safely handles slices of any length, including empty slices.
Inputs:
- data: A slice of bytes.
- c: The byte to search for.
Returns:
- index: The index of the byte `c`, or -1 if it was not found.
*/
index_byte :: proc(s: []byte, c: byte) -> (index: int) #no_bounds_check {
i, l := 0, len(s)
// Guard against small strings. On modern systems, it is ALWAYS
// worth vectorizing assuming there is a hardware vector unit, and
// the data size is large enough.
if l < SIMD_REG_SIZE_128 {
for /**/; i < l; i += 1 {
if s[i] == c {
return i
}
}
return -1
}
c_vec: simd.u8x16 = c
when !simd.IS_EMULATED {
// Note: While this is something that could also logically take
// advantage of AVX512, the various downclocking and power
// consumption related woes make premature to have a dedicated
// code path.
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
c_vec_256: simd.u8x32 = c
s_vecs: [4]simd.u8x32 = ---
c_vecs: [4]simd.u8x32 = ---
m_vec: [4]u8 = ---
// Scan 128-byte chunks, using 256-bit SIMD.
for nr_blocks := l / (4 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
#unroll for j in 0..<4 {
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
m_vec[j] = simd.reduce_or(c_vecs[j])
}
if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
#unroll for j in 0..<4 {
if m_vec[j] > 0 {
sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
off := simd.reduce_min(sel)
return i + j * SIMD_REG_SIZE_256 + int(off)
}
}
}
i += 4 * SIMD_REG_SIZE_256
}
// Scan 64-byte chunks, using 256-bit SIMD.
for nr_blocks := (l - i) / (2 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
#unroll for j in 0..<2 {
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
m_vec[j] = simd.reduce_or(c_vecs[j])
}
if m_vec[0] | m_vec[1] > 0 {
#unroll for j in 0..<2 {
if m_vec[j] > 0 {
sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
off := simd.reduce_min(sel)
return i + j * SIMD_REG_SIZE_256 + int(off)
}
}
}
i += 2 * SIMD_REG_SIZE_256
}
} else {
s_vecs: [4]simd.u8x16 = ---
c_vecs: [4]simd.u8x16 = ---
m_vecs: [4]u8 = ---
// Scan 64-byte chunks, using 128-bit SIMD.
for nr_blocks := l / (4 * SIMD_REG_SIZE_128); nr_blocks > 0; nr_blocks -= 1 {
#unroll for j in 0..<4 {
s_vecs[j]= intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
m_vecs[j] = simd.reduce_or(c_vecs[j])
}
if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
#unroll for j in 0..<4 {
if m_vecs[j] > 0 {
sel := simd.select(c_vecs[j], SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
off := simd.reduce_min(sel)
return i + j * SIMD_REG_SIZE_128 + int(off)
}
}
}
i += 4 * SIMD_REG_SIZE_128
}
}
}
// Scan the remaining SIMD register sized chunks.
//
// Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
// on potato targets. Scanning more at a time when LLVM is emulating SIMD
// likely does not buy much, as all that does is increase GP register
// pressure.
for nr_blocks := (l - i) / SIMD_REG_SIZE_128; nr_blocks > 0; nr_blocks -= 1 {
s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
c0 := simd.lanes_eq(s0, c_vec)
if simd.reduce_or(c0) > 0 {
sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
off := simd.reduce_min(sel)
return i + int(off)
}
i += SIMD_REG_SIZE_128
}
// Scan serially for the remainder.
for /**/; i < l; i += 1 {
if s[i] == c {
return i
}
}
return -1
}
// Returns -1 if c is not present
last_index_byte :: proc(s: []byte, c: byte) -> int {
for i := len(s)-1; i >= 0; i -= 1 {
/*
Scan a slice of bytes for a specific byte, starting from the end and working
backwards to the start.
This procedure safely handles slices of any length, including empty slices.
Inputs:
- data: A slice of bytes.
- c: The byte to search for.
Returns:
- index: The index of the byte `c`, or -1 if it was not found.
*/
last_index_byte :: proc(s: []byte, c: byte) -> int #no_bounds_check {
i := len(s)
// Guard against small strings. On modern systems, it is ALWAYS
// worth vectorizing assuming there is a hardware vector unit, and
// the data size is large enough.
if i < SIMD_REG_SIZE_128 {
#reverse for ch, j in s {
if ch == c {
return j
}
}
return -1
}
c_vec: simd.u8x16 = c
when !simd.IS_EMULATED {
// Note: While this is something that could also logically take
// advantage of AVX512, the various downclocking and power
// consumption related woes make premature to have a dedicated
// code path.
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
c_vec_256: simd.u8x32 = c
s_vecs: [4]simd.u8x32 = ---
c_vecs: [4]simd.u8x32 = ---
m_vec: [4]u8 = ---
// Scan 128-byte chunks, using 256-bit SIMD.
for i >= 4 * SIMD_REG_SIZE_256 {
i -= 4 * SIMD_REG_SIZE_256
#unroll for j in 0..<4 {
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
m_vec[j] = simd.reduce_or(c_vecs[j])
}
if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
#unroll for j in 0..<4 {
if m_vec[3-j] > 0 {
sel := simd.select(c_vecs[3-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
off := simd.reduce_max(sel)
return i + (3-j) * SIMD_REG_SIZE_256 + int(off)
}
}
}
}
// Scan 64-byte chunks, using 256-bit SIMD.
for i >= 2 * SIMD_REG_SIZE_256 {
i -= 2 * SIMD_REG_SIZE_256
#unroll for j in 0..<2 {
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
m_vec[j] = simd.reduce_or(c_vecs[j])
}
if m_vec[0] | m_vec[1] > 0 {
#unroll for j in 0..<2 {
if m_vec[1-j] > 0 {
sel := simd.select(c_vecs[1-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
off := simd.reduce_max(sel)
return i + (1-j) * SIMD_REG_SIZE_256 + int(off)
}
}
}
}
} else {
s_vecs: [4]simd.u8x16 = ---
c_vecs: [4]simd.u8x16 = ---
m_vecs: [4]u8 = ---
// Scan 64-byte chunks, using 128-bit SIMD.
for i >= 4 * SIMD_REG_SIZE_128 {
i -= 4 * SIMD_REG_SIZE_128
#unroll for j in 0..<4 {
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
m_vecs[j] = simd.reduce_or(c_vecs[j])
}
if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
#unroll for j in 0..<4 {
if m_vecs[3-j] > 0 {
sel := simd.select(c_vecs[3-j], SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
off := simd.reduce_max(sel)
return i + (3-j) * SIMD_REG_SIZE_128 + int(off)
}
}
}
}
}
}
// Scan the remaining SIMD register sized chunks.
//
// Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
// on potato targets. Scanning more at a time when LLVM is emulating SIMD
// likely does not buy much, as all that does is increase GP register
// pressure.
for i >= SIMD_REG_SIZE_128 {
i -= SIMD_REG_SIZE_128
s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
c0 := simd.lanes_eq(s0, c_vec)
if simd.reduce_or(c0) > 0 {
sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
off := simd.reduce_max(sel)
return i + int(off)
}
}
// Scan serially for the remainder.
for i > 0 {
i -= 1
if s[i] == c {
return i
}
}
return -1
}
@private PRIME_RABIN_KARP :: 16777619
index :: proc(s, substr: []byte) -> int {
+9 -2
View File
@@ -9,10 +9,11 @@ Reader :: struct {
prev_rune: int, // previous reading index of rune or < 0
}
reader_init :: proc(r: ^Reader, s: []byte) {
reader_init :: proc(r: ^Reader, s: []byte) -> io.Stream {
r.s = s
r.i = 0
r.prev_rune = -1
return reader_to_stream(r)
}
reader_to_stream :: proc(r: ^Reader) -> (s: io.Stream) {
@@ -33,6 +34,9 @@ reader_size :: proc(r: ^Reader) -> i64 {
}
reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
if len(p) == 0 {
return 0, nil
}
if r.i >= i64(len(r.s)) {
return 0, .EOF
}
@@ -42,6 +46,9 @@ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
return
}
reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Error) {
if len(p) == 0 {
return 0, nil
}
if off < 0 {
return 0, .Invalid_Offset
}
@@ -97,7 +104,6 @@ reader_unread_rune :: proc(r: ^Reader) -> io.Error {
return nil
}
reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
r.prev_rune = -1
abs: i64
switch whence {
case .Start:
@@ -114,6 +120,7 @@ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.E
return 0, .Invalid_Offset
}
r.i = abs
r.prev_rune = -1
return abs, nil
}
reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
+2 -2
View File
@@ -47,8 +47,8 @@ foreign libc {
clogf :: proc(z: complex_float) -> complex_float ---
// 7.3.8 Power and absolute-value functions
cabs :: proc(z: complex_double) -> complex_double ---
cabsf :: proc(z: complex_float) -> complex_float ---
cabs :: proc(z: complex_double) -> double ---
cabsf :: proc(z: complex_float) -> float ---
cpow :: proc(x, y: complex_double) -> complex_double ---
cpowf :: proc(x, y: complex_float) -> complex_float ---
csqrt :: proc(z: complex_double) -> complex_double ---
+1 -1
View File
@@ -102,6 +102,6 @@ when ODIN_OS == .Haiku {
// read the value, or to produce an lvalue such that you can assign a different
// error value to errno. To work around this, just expose it as a function like
// it actually is.
errno :: #force_inline proc() -> ^int {
errno :: #force_inline proc "contextless" () -> ^int {
return _get_errno()
}
+12 -7
View File
@@ -32,24 +32,21 @@ when ODIN_OS == .Windows {
// the RDX register will contain zero and correctly set the flag to disable
// stack unwinding.
@(link_name="_setjmp")
setjmp :: proc(env: ^jmp_buf, hack: rawptr = nil) -> int ---
setjmp :: proc(env: ^jmp_buf, hack: rawptr = nil) -> int ---
}
} else {
@(default_calling_convention="c")
foreign libc {
// 7.13.1 Save calling environment
//
// NOTE(dweiler): C11 requires setjmp be a macro, which means it won't
// necessarily export a symbol named setjmp but rather _setjmp in the case
// of musl, glibc, BSD libc, and msvcrt.
@(link_name="_setjmp")
setjmp :: proc(env: ^jmp_buf) -> int ---
@(link_name=LSETJMP)
setjmp :: proc(env: ^jmp_buf) -> int ---
}
}
@(default_calling_convention="c")
foreign libc {
// 7.13.2 Restore calling environment
@(link_name=LLONGJMP)
longjmp :: proc(env: ^jmp_buf, val: int) -> ! ---
}
@@ -64,3 +61,11 @@ foreign libc {
// The choice of 4096 bytes for storage of this type is more than enough on all
// relevant platforms.
jmp_buf :: struct #align(16) { _: [4096]char, }
when ODIN_OS == .NetBSD {
@(private) LSETJMP :: "__setjmp14"
@(private) LLONGJMP :: "__longjmp14"
} else {
@(private) LSETJMP :: "setjmp"
@(private) LLONGJMP :: "longjmp"
}
+36 -9
View File
@@ -17,6 +17,12 @@ when ODIN_OS == .Windows {
FILE :: struct {}
Whence :: enum int {
SET = SEEK_SET,
CUR = SEEK_CUR,
END = SEEK_END,
}
// MSVCRT compatible.
when ODIN_OS == .Windows {
_IOFBF :: 0x0000
@@ -101,6 +107,8 @@ when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
SEEK_CUR :: 1
SEEK_END :: 2
TMP_MAX :: 308915776
foreign libc {
__sF: [3]FILE
}
@@ -128,6 +136,8 @@ when ODIN_OS == .FreeBSD {
SEEK_CUR :: 1
SEEK_END :: 2
TMP_MAX :: 308915776
foreign libc {
@(link_name="__stderrp") stderr: ^FILE
@(link_name="__stdinp") stdin: ^FILE
@@ -195,10 +205,21 @@ when ODIN_OS == .Haiku {
}
}
when ODIN_OS == .NetBSD {
@(private) LRENAME :: "__posix_rename"
@(private) LFGETPOS :: "__fgetpos50"
@(private) LFSETPOS :: "__fsetpos50"
} else {
@(private) LRENAME :: "rename"
@(private) LFGETPOS :: "fgetpos"
@(private) LFSETPOS :: "fsetpos"
}
@(default_calling_convention="c")
foreign libc {
// 7.21.4 Operations on files
remove :: proc(filename: cstring) -> int ---
@(link_name=LRENAME)
rename :: proc(old, new: cstring) -> int ---
tmpfile :: proc() -> ^FILE ---
tmpnam :: proc(s: [^]char) -> [^]char ---
@@ -240,8 +261,10 @@ foreign libc {
fwrite :: proc(ptr: rawptr, size: size_t, nmemb: size_t, stream: ^FILE) -> size_t ---
// 7.21.9 File positioning functions
@(link_name=LFGETPOS)
fgetpos :: proc(stream: ^FILE, pos: ^fpos_t) -> int ---
fseek :: proc(stream: ^FILE, offset: long, whence: int) -> int ---
fseek :: proc(stream: ^FILE, offset: long, whence: Whence) -> int ---
@(link_name=LFSETPOS)
fsetpos :: proc(stream: ^FILE, pos: ^fpos_t) -> int ---
ftell :: proc(stream: ^FILE) -> long ---
rewind :: proc(stream: ^FILE) ---
@@ -288,11 +311,11 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
return 0, unknown_or_eof(file)
}
if fseek(file, long(offset), SEEK_SET) != 0 {
if fseek(file, long(offset), .SET) != 0 {
return 0, unknown_or_eof(file)
}
defer fseek(file, long(curr), SEEK_SET)
defer fseek(file, long(curr), .SET)
n = i64(fread(raw_data(p), size_of(byte), len(p), file))
if n == 0 { err = unknown_or_eof(file) }
@@ -307,17 +330,21 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
return 0, unknown_or_eof(file)
}
if fseek(file, long(offset), SEEK_SET) != 0 {
if fseek(file, long(offset), .SET) != 0 {
return 0, unknown_or_eof(file)
}
defer fseek(file, long(curr), SEEK_SET)
defer fseek(file, long(curr), .SET)
n = i64(fwrite(raw_data(p), size_of(byte), len(p), file))
if n == 0 { err = unknown_or_eof(file) }
case .Seek:
if fseek(file, long(offset), int(whence)) != 0 {
#assert(int(Whence.SET) == int(io.Seek_From.Start))
#assert(int(Whence.CUR) == int(io.Seek_From.Current))
#assert(int(Whence.END) == int(io.Seek_From.End))
if fseek(file, long(offset), Whence(whence)) != 0 {
return 0, unknown_or_eof(file)
}
@@ -326,9 +353,9 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
if curr == -1 {
return 0, unknown_or_eof(file)
}
defer fseek(file, curr, SEEK_SET)
defer fseek(file, curr, .SET)
if fseek(file, 0, SEEK_END) != 0 {
if fseek(file, 0, .END) != 0 {
return 0, unknown_or_eof(file)
}
@@ -341,7 +368,7 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
return 0, .Empty
case .Query:
return io.query_utility({ .Close, .Flush, .Read, .Read_At, .Write, .Write_At, .Seek, .Size })
return io.query_utility({ .Close, .Flush, .Read, .Read_At, .Write, .Write_At, .Seek, .Size, .Query })
}
return
}
+17 -4
View File
@@ -40,10 +40,9 @@ when ODIN_OS == .Linux {
}
when ODIN_OS == .Darwin {
when ODIN_OS == .Darwin || ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD {
RAND_MAX :: 0x7fffffff
// GLIBC and MUSL only
@(private="file")
@(default_calling_convention="c")
foreign libc {
@@ -55,6 +54,20 @@ when ODIN_OS == .Darwin {
}
}
when ODIN_OS == .NetBSD {
RAND_MAX :: 0x7fffffff
@(private="file")
@(default_calling_convention="c")
foreign libc {
__mb_cur_max: size_t
}
MB_CUR_MAX :: #force_inline proc() -> size_t {
return __mb_cur_max
}
}
// C does not declare what these values should be, as an implementation is free
// to use any two distinct values it wants to indicate success or failure.
// However, nobody actually does and everyone appears to have agreed upon these
@@ -99,7 +112,7 @@ foreign libc {
at_quick_exit :: proc(func: proc "c" ()) -> int ---
exit :: proc(status: int) -> ! ---
_Exit :: proc(status: int) -> ! ---
getenv :: proc(name: cstring) -> [^]char ---
getenv :: proc(name: cstring) -> cstring ---
quick_exit :: proc(status: int) -> ! ---
system :: proc(cmd: cstring) -> int ---
@@ -150,4 +163,4 @@ aligned_free :: #force_inline proc "c" (ptr: rawptr) {
} else {
free(ptr)
}
}
}
+1 -1
View File
@@ -40,7 +40,7 @@ foreign libc {
strtok :: proc(s1: [^]char, s2: cstring) -> [^]char ---
// 7.24.6 Miscellaneous functions
strerror :: proc(errnum: int) -> [^]char ---
strerror :: proc(errnum: int) -> cstring ---
strlen :: proc(s: cstring) -> size_t ---
}
memset :: proc "c" (s: rawptr, c: int, n: size_t) -> rawptr {
+29 -3
View File
@@ -50,30 +50,56 @@ when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS =
foreign libc {
// 7.27.2 Time manipulation functions
clock :: proc() -> clock_t ---
@(link_name=LDIFFTIME)
difftime :: proc(time1, time2: time_t) -> double ---
@(link_name=LMKTIME)
mktime :: proc(timeptr: ^tm) -> time_t ---
@(link_name=LTIME)
time :: proc(timer: ^time_t) -> time_t ---
timespec_get :: proc(ts: ^timespec, base: int) -> int ---
// 7.27.3 Time conversion functions
asctime :: proc(timeptr: ^tm) -> [^]char ---
@(link_name=LCTIME)
ctime :: proc(timer: ^time_t) -> [^]char ---
@(link_name=LGMTIME)
gmtime :: proc(timer: ^time_t) -> ^tm ---
@(link_name=LLOCALTIME)
localtime :: proc(timer: ^time_t) -> ^tm ---
strftime :: proc(s: [^]char, maxsize: size_t, format: cstring, timeptr: ^tm) -> size_t ---
}
when ODIN_OS == .NetBSD {
@(private) LDIFFTIME :: "__difftime50"
@(private) LMKTIME :: "__mktime50"
@(private) LTIME :: "__time50"
@(private) LCTIME :: "__ctime50"
@(private) LGMTIME :: "__gmtime50"
@(private) LLOCALTIME :: "__localtime50"
} else {
@(private) LDIFFTIME :: "difftime"
@(private) LMKTIME :: "mktime"
@(private) LTIME :: "time"
@(private) LCTIME :: "ctime"
@(private) LGMTIME :: "gmtime"
@(private) LLOCALTIME :: "localtime"
}
when ODIN_OS == .OpenBSD {
CLOCKS_PER_SEC :: 100
} else {
CLOCKS_PER_SEC :: 1000000
}
TIME_UTC :: 1
TIME_UTC :: 1
time_t :: distinct i64
time_t :: distinct i64
clock_t :: long
when ODIN_OS == .FreeBSD || ODIN_OS == .NetBSD {
clock_t :: distinct int32_t
} else {
clock_t :: distinct long
}
timespec :: struct {
tv_sec: time_t,
+90
View File
@@ -0,0 +1,90 @@
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
Ginger Bill: Cosmetic changes.
A small GZIP implementation as an example.
*/
/*
Example:
import "core:bytes"
import "core:os"
import "core:compress"
import "core:fmt"
// Small GZIP file with fextra, fname and fcomment present.
@private
TEST: []u8 = {
0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a,
0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00,
0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e,
0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73,
0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f,
0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48,
0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15,
0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00,
}
main :: proc() {
// Set up output buffer.
buf := bytes.Buffer{}
stdout :: proc(s: string) {
os.write_string(os.stdout, s)
}
stderr :: proc(s: string) {
os.write_string(os.stderr, s)
}
args := os.args
if len(args) < 2 {
stderr("No input file specified.\n")
err := load(data=TEST, buf=&buf, known_gzip_size=len(TEST))
if err == nil {
stdout("Displaying test vector: ")
stdout(bytes.buffer_to_string(&buf))
stdout("\n")
} else {
fmt.printf("gzip.load returned %v\n", err)
}
bytes.buffer_destroy(&buf)
os.exit(0)
}
// The rest are all files.
args = args[1:]
err: Error
for file in args {
if file == "-" {
// Read from stdin
s := os.stream_from_handle(os.stdin)
ctx := &compress.Context_Stream_Input{
input = s,
}
err = load(ctx, &buf)
} else {
err = load(file, &buf)
}
if err != nil {
if err != E_General.File_Not_Found {
stderr("File not found: ")
stderr(file)
stderr("\n")
os.exit(1)
}
stderr("GZIP returned an error.\n")
bytes.buffer_destroy(&buf)
os.exit(2)
}
stdout(bytes.buffer_to_string(&buf))
}
bytes.buffer_destroy(&buf)
}
*/
package compress_gzip
-89
View File
@@ -1,89 +0,0 @@
//+build ignore
package compress_gzip
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
Ginger Bill: Cosmetic changes.
A small GZIP implementation as an example.
*/
import "core:bytes"
import "core:os"
import "core:compress"
import "core:fmt"
// Small GZIP file with fextra, fname and fcomment present.
@private
TEST: []u8 = {
0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a,
0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00,
0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e,
0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73,
0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f,
0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48,
0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15,
0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00,
}
main :: proc() {
// Set up output buffer.
buf := bytes.Buffer{}
stdout :: proc(s: string) {
os.write_string(os.stdout, s)
}
stderr :: proc(s: string) {
os.write_string(os.stderr, s)
}
args := os.args
if len(args) < 2 {
stderr("No input file specified.\n")
err := load(data=TEST, buf=&buf, known_gzip_size=len(TEST))
if err == nil {
stdout("Displaying test vector: ")
stdout(bytes.buffer_to_string(&buf))
stdout("\n")
} else {
fmt.printf("gzip.load returned %v\n", err)
}
bytes.buffer_destroy(&buf)
os.exit(0)
}
// The rest are all files.
args = args[1:]
err: Error
for file in args {
if file == "-" {
// Read from stdin
s := os.stream_from_handle(os.stdin)
ctx := &compress.Context_Stream_Input{
input = s,
}
err = load(ctx, &buf)
} else {
err = load(file, &buf)
}
if err != nil {
if err != E_General.File_Not_Found {
stderr("File not found: ")
stderr(file)
stderr("\n")
os.exit(1)
}
stderr("GZIP returned an error.\n")
bytes.buffer_destroy(&buf)
os.exit(2)
}
stdout(bytes.buffer_to_string(&buf))
}
bytes.buffer_destroy(&buf)
}
+1 -2
View File
@@ -4,7 +4,6 @@
which is an English word model.
*/
// package shoco is an implementation of the shoco short string compressor
package compress_shoco
DEFAULT_MODEL :: Shoco_Model {
@@ -145,4 +144,4 @@ DEFAULT_MODEL :: Shoco_Model {
{ 0xc0000000, 2, 4, { 25, 22, 19, 16, 16, 16, 16, 16 }, { 15, 7, 7, 7, 0, 0, 0, 0 }, 0xe0, 0xc0 },
{ 0xe0000000, 4, 8, { 23, 19, 15, 11, 8, 5, 2, 0 }, { 31, 15, 15, 15, 7, 7, 7, 3 }, 0xf0, 0xe0 },
},
}
}
+2 -2
View File
@@ -8,7 +8,7 @@
An implementation of [shoco](https://github.com/Ed-von-Schleck/shoco) by Christian Schramm.
*/
// package shoco is an implementation of the shoco short string compressor
// package shoco is an implementation of the shoco short string compressor.
package compress_shoco
import "base:intrinsics"
@@ -308,4 +308,4 @@ compress_string :: proc(input: string, model := DEFAULT_MODEL, allocator := cont
resize(&buf, length) or_return
return buf[:length], result
}
compress :: proc{compress_string_to_buffer, compress_string}
compress :: proc{compress_string_to_buffer, compress_string}
+50
View File
@@ -0,0 +1,50 @@
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
An example of how to use `zlib.inflate`.
*/
/*
Example:
package main
import "core:bytes"
import "core:fmt"
main :: proc() {
ODIN_DEMO := []u8{
120, 218, 101, 144, 65, 110, 131, 48, 16, 69, 215, 246, 41, 190, 44, 69, 73, 32, 148, 182,
75, 75, 28, 32, 251, 46, 217, 88, 238, 0, 86, 192, 32, 219, 36, 170, 170, 172, 122, 137,
238, 122, 197, 30, 161, 70, 162, 20, 81, 203, 139, 25, 191, 255, 191, 60, 51, 40, 125, 81,
53, 33, 144, 15, 156, 155, 110, 232, 93, 128, 208, 189, 35, 89, 117, 65, 112, 222, 41, 99,
33, 37, 6, 215, 235, 195, 17, 239, 156, 197, 170, 118, 170, 131, 44, 32, 82, 164, 72, 240,
253, 245, 249, 129, 12, 185, 224, 76, 105, 61, 118, 99, 171, 66, 239, 38, 193, 35, 103, 85,
172, 66, 127, 33, 139, 24, 244, 235, 141, 49, 204, 223, 76, 208, 205, 204, 166, 7, 173, 60,
97, 159, 238, 37, 214, 41, 105, 129, 167, 5, 102, 27, 152, 173, 97, 178, 129, 73, 129, 231,
5, 230, 27, 152, 175, 225, 52, 192, 127, 243, 170, 157, 149, 18, 121, 142, 115, 109, 227, 122,
64, 87, 114, 111, 161, 49, 182, 6, 181, 158, 162, 226, 206, 167, 27, 215, 246, 48, 56, 99,
67, 117, 16, 47, 13, 45, 35, 151, 98, 231, 75, 1, 173, 90, 61, 101, 146, 71, 136, 244,
170, 218, 145, 176, 123, 45, 173, 56, 113, 134, 191, 51, 219, 78, 235, 95, 28, 249, 253, 7,
159, 150, 133, 125,
}
OUTPUT_SIZE :: 432
buf: bytes.Buffer
// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE)
defer bytes.buffer_destroy(&buf)
if err != nil {
fmt.printf("\nError: %v\n", err)
}
s := bytes.buffer_to_string(&buf)
fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s)
assert(len(s) == OUTPUT_SIZE)
}
*/
package compress_zlib
-47
View File
@@ -1,47 +0,0 @@
//+build ignore
package compress_zlib
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
An example of how to use `zlib.inflate`.
*/
import "core:bytes"
import "core:fmt"
main :: proc() {
ODIN_DEMO := []u8{
120, 218, 101, 144, 65, 110, 131, 48, 16, 69, 215, 246, 41, 190, 44, 69, 73, 32, 148, 182,
75, 75, 28, 32, 251, 46, 217, 88, 238, 0, 86, 192, 32, 219, 36, 170, 170, 172, 122, 137,
238, 122, 197, 30, 161, 70, 162, 20, 81, 203, 139, 25, 191, 255, 191, 60, 51, 40, 125, 81,
53, 33, 144, 15, 156, 155, 110, 232, 93, 128, 208, 189, 35, 89, 117, 65, 112, 222, 41, 99,
33, 37, 6, 215, 235, 195, 17, 239, 156, 197, 170, 118, 170, 131, 44, 32, 82, 164, 72, 240,
253, 245, 249, 129, 12, 185, 224, 76, 105, 61, 118, 99, 171, 66, 239, 38, 193, 35, 103, 85,
172, 66, 127, 33, 139, 24, 244, 235, 141, 49, 204, 223, 76, 208, 205, 204, 166, 7, 173, 60,
97, 159, 238, 37, 214, 41, 105, 129, 167, 5, 102, 27, 152, 173, 97, 178, 129, 73, 129, 231,
5, 230, 27, 152, 175, 225, 52, 192, 127, 243, 170, 157, 149, 18, 121, 142, 115, 109, 227, 122,
64, 87, 114, 111, 161, 49, 182, 6, 181, 158, 162, 226, 206, 167, 27, 215, 246, 48, 56, 99,
67, 117, 16, 47, 13, 45, 35, 151, 98, 231, 75, 1, 173, 90, 61, 101, 146, 71, 136, 244,
170, 218, 145, 176, 123, 45, 173, 56, 113, 134, 191, 51, 219, 78, 235, 95, 28, 249, 253, 7,
159, 150, 133, 125,
}
OUTPUT_SIZE :: 432
buf: bytes.Buffer
// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE)
defer bytes.buffer_destroy(&buf)
if err != nil {
fmt.printf("\nError: %v\n", err)
}
s := bytes.buffer_to_string(&buf)
fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s)
assert(len(s) == OUTPUT_SIZE)
}
+2 -7
View File
@@ -12,6 +12,7 @@ package compress_zlib
import "core:compress"
import "base:intrinsics"
import "core:mem"
import "core:io"
import "core:hash"
@@ -123,13 +124,7 @@ Huffman_Table :: struct {
@(optimization_mode="favor_size")
z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
assert(bits <= 16)
// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
// by reversing all of the bits and masking out the unneeded ones.
r = n
r = ((r & 0xAAAA) >> 1) | ((r & 0x5555) << 1)
r = ((r & 0xCCCC) >> 2) | ((r & 0x3333) << 2)
r = ((r & 0xF0F0) >> 4) | ((r & 0x0F0F) << 4)
r = ((r & 0xFF00) >> 8) | ((r & 0x00FF) << 8)
r = intrinsics.reverse_bits(n)
r >>= (16 - bits)
return
+60 -14
View File
@@ -1,5 +1,6 @@
package container_dynamic_bit_array
import "base:builtin"
import "base:intrinsics"
import "core:mem"
@@ -18,7 +19,7 @@ NUM_BITS :: 64
Bit_Array :: struct {
bits: [dynamic]u64,
bias: int,
max_index: int,
length: int,
free_pointer: bool,
}
@@ -52,9 +53,9 @@ Returns:
*/
iterate_by_all :: proc (it: ^Bit_Array_Iterator) -> (set: bool, index: int, ok: bool) {
index = it.word_idx * NUM_BITS + int(it.bit_idx) + it.array.bias
if index > it.array.max_index { return false, 0, false }
if index >= it.array.length + it.array.bias { return false, 0, false }
word := it.array.bits[it.word_idx] if len(it.array.bits) > it.word_idx else 0
word := it.array.bits[it.word_idx] if builtin.len(it.array.bits) > it.word_idx else 0
set = (word >> it.bit_idx & 1) == 1
it.bit_idx += 1
@@ -106,22 +107,22 @@ Returns:
*/
@(private="file")
iterate_internal_ :: proc (it: ^Bit_Array_Iterator, $ITERATE_SET_BITS: bool) -> (index: int, ok: bool) {
word := it.array.bits[it.word_idx] if len(it.array.bits) > it.word_idx else 0
word := it.array.bits[it.word_idx] if builtin.len(it.array.bits) > it.word_idx else 0
when ! ITERATE_SET_BITS { word = ~word }
// If the word is empty or we have already gone over all the bits in it,
// b.bit_idx is greater than the index of any set bit in the word,
// meaning that word >> b.bit_idx == 0.
for it.word_idx < len(it.array.bits) && word >> it.bit_idx == 0 {
for it.word_idx < builtin.len(it.array.bits) && word >> it.bit_idx == 0 {
it.word_idx += 1
it.bit_idx = 0
word = it.array.bits[it.word_idx] if len(it.array.bits) > it.word_idx else 0
word = it.array.bits[it.word_idx] if builtin.len(it.array.bits) > it.word_idx else 0
when ! ITERATE_SET_BITS { word = ~word }
}
// If we are iterating the set bits, reaching the end of the array means we have no more bits to check
when ITERATE_SET_BITS {
if it.word_idx >= len(it.array.bits) {
if it.word_idx >= builtin.len(it.array.bits) {
return 0, false
}
}
@@ -135,7 +136,7 @@ iterate_internal_ :: proc (it: ^Bit_Array_Iterator, $ITERATE_SET_BITS: bool) ->
it.bit_idx = 0
it.word_idx += 1
}
return index, index <= it.array.max_index
return index, index < it.array.length + it.array.bias
}
/*
Gets the state of a bit in the bit-array
@@ -160,7 +161,7 @@ get :: proc(ba: ^Bit_Array, #any_int index: uint) -> (res: bool, ok: bool) #opti
If we `get` a bit that doesn't fit in the Bit Array, it's naturally `false`.
This early-out prevents unnecessary resizing.
*/
if leg_index + 1 > len(ba.bits) { return false, true }
if leg_index + 1 > builtin.len(ba.bits) { return false, true }
val := u64(1 << uint(bit_index))
res = ba.bits[leg_index] & val == val
@@ -208,7 +209,7 @@ set :: proc(ba: ^Bit_Array, #any_int index: uint, set_to: bool = true, allocator
resize_if_needed(ba, leg_index) or_return
ba.max_index = max(idx, ba.max_index)
ba.length = max(1 + idx, ba.length)
if set_to {
ba.bits[leg_index] |= 1 << uint(bit_index)
@@ -261,6 +262,9 @@ unsafe_unset :: proc(b: ^Bit_Array, bit: int) #no_bounds_check {
/*
A helper function to create a Bit Array with optional bias, in case your smallest index is non-zero (including negative).
The range of bits created by this procedure is `min_index..<max_index`, and the
array will be able to expand beyond `max_index` if needed.
*Allocates (`new(Bit_Array) & make(ba.bits)`)*
Inputs:
@@ -275,7 +279,7 @@ create :: proc(max_index: int, min_index: int = 0, allocator := context.allocato
context.allocator = allocator
size_in_bits := max_index - min_index
if size_in_bits < 1 { return {}, false }
if size_in_bits < 0 { return {}, false }
legs := size_in_bits >> INDEX_SHIFT
if size_in_bits & INDEX_MASK > 0 {legs+=1}
@@ -284,7 +288,7 @@ create :: proc(max_index: int, min_index: int = 0, allocator := context.allocato
res = new(Bit_Array)
res.bits = bits
res.bias = min_index
res.max_index = max_index
res.length = max_index - min_index
res.free_pointer = true
return
}
@@ -299,6 +303,48 @@ clear :: proc(ba: ^Bit_Array) {
mem.zero_slice(ba.bits[:])
}
/*
Gets the length of set and unset valid bits in the Bit_Array.
Inputs:
- ba: The target Bit_Array
Returns:
- length: The length of valid bits.
*/
len :: proc(ba: ^Bit_Array) -> (length: int) {
if ba == nil { return }
return ba.length
}
/*
Shrinks the Bit_Array's backing storage to the smallest possible size.
Inputs:
- ba: The target Bit_Array
*/
shrink :: proc(ba: ^Bit_Array) #no_bounds_check {
if ba == nil { return }
legs_needed := builtin.len(ba.bits)
for i := legs_needed - 1; i >= 0; i -= 1 {
if ba.bits[i] == 0 {
legs_needed -= 1
} else {
break
}
}
if legs_needed == builtin.len(ba.bits) {
return
}
ba.length = 0
if legs_needed > 0 {
if legs_needed > 1 {
ba.length = (legs_needed - 1) * NUM_BITS
}
ba.length += NUM_BITS - int(intrinsics.count_leading_zeros(ba.bits[legs_needed - 1]))
}
resize(&ba.bits, legs_needed)
builtin.shrink(&ba.bits)
}
/*
Deallocates the Bit_Array and its backing storage
Inputs:
@@ -321,8 +367,8 @@ resize_if_needed :: proc(ba: ^Bit_Array, legs: int, allocator := context.allocat
context.allocator = allocator
if legs + 1 > len(ba.bits) {
if legs + 1 > builtin.len(ba.bits) {
resize(&ba.bits, legs + 1)
}
return len(ba.bits) > legs
return builtin.len(ba.bits) > legs
}
+4 -4
View File
@@ -1,8 +1,8 @@
/*
The Bit Array can be used in several ways:
- By default you don't need to instantiate a Bit Array:
By default you don't need to instantiate a Bit Array.
Example:
package test
import "core:fmt"
@@ -22,8 +22,8 @@ The Bit Array can be used in several ways:
destroy(&bits)
}
- A Bit Array can optionally allow for negative indices, if the minimum value was given during creation:
A Bit Array can optionally allow for negative indices, if the minimum value was given during creation.
Example:
package test
import "core:fmt"
+9 -6
View File
@@ -1,22 +1,22 @@
/*
Package list implements an intrusive doubly-linked list.
An intrusive container requires a `Node` to be embedded in your own structure, like this:
An intrusive container requires a `Node` to be embedded in your own structure, like this.
Example:
My_String :: struct {
node: list.Node,
value: string,
}
Embedding the members of a `list.Node` in your structure with the `using` keyword is also allowed:
Embedding the members of a `list.Node` in your structure with the `using` keyword is also allowed.
Example:
My_String :: struct {
using node: list.Node,
value: string,
}
Here is a full example:
Here is a full example.
Example:
package test
import "core:fmt"
@@ -42,5 +42,8 @@ Here is a full example:
value: string,
}
Output:
Hello
World
*/
package container_intrusive_list
+7 -3
View File
@@ -139,9 +139,13 @@ clear :: proc "contextless" (a: ^$A/Small_Array($N, $T)) {
resize(a, 0)
}
push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) {
n := copy(a.data[a.len:], items[:])
a.len += n
push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) -> bool {
if a.len + builtin.len(items) <= cap(a^) {
n := copy(a.data[a.len:], items[:])
a.len += n
return true
}
return false
}
inject_at :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T, index: int) -> bool #no_bounds_check {
+1 -1
View File
@@ -80,8 +80,8 @@ ghash :: proc "contextless" (dst, key, data: []byte) {
h2 := h0 ~ h1
h2r := h0r ~ h1r
src: []byte
for l > 0 {
src: []byte = ---
if l >= _aes.GHASH_BLOCK_SIZE {
src = buf
buf = buf[_aes.GHASH_BLOCK_SIZE:]
+1 -1
View File
@@ -3,7 +3,7 @@ package aes_hw_intel
import "core:sys/info"
// is_supporte returns true iff hardware accelerated AES
// is_supported returns true iff hardware accelerated AES
// is supported.
is_supported :: proc "contextless" () -> bool {
features, ok := info.cpu_features.?
+3 -7
View File
@@ -25,7 +25,6 @@ package aes_hw_intel
import "base:intrinsics"
import "core:crypto/_aes"
import "core:simd"
import "core:simd/x86"
@(private = "file")
@@ -58,14 +57,11 @@ GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
// chunks. We number chunks from 0 to 3 in left to right order.
@(private = "file")
byteswap_index := transmute(x86.__m128i)simd.i8x16{
// Note: simd.i8x16 is reverse order from x86._mm_set_epi8.
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
}
_BYTESWAP_INDEX: x86.__m128i : { 0x08090a0b0c0d0e0f, 0x0001020304050607 }
@(private = "file", require_results, enable_target_feature = "sse2,ssse3")
byteswap :: #force_inline proc "contextless" (x: x86.__m128i) -> x86.__m128i {
return x86._mm_shuffle_epi8(x, byteswap_index)
return x86._mm_shuffle_epi8(x, _BYTESWAP_INDEX)
}
// From a 128-bit value kw, compute kx as the XOR of the two 64-bit
@@ -244,8 +240,8 @@ ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
}
// Process 1 block at a time
src: []byte
for l > 0 {
src: []byte = ---
if l >= _aes.GHASH_BLOCK_SIZE {
src = buf
buf = buf[_aes.GHASH_BLOCK_SIZE:]
+123
View File
@@ -0,0 +1,123 @@
package _chacha20
import "base:intrinsics"
import "core:encoding/endian"
import "core:math/bits"
import "core:mem"
// KEY_SIZE is the (X)ChaCha20 key size in bytes.
KEY_SIZE :: 32
// IV_SIZE is the ChaCha20 IV size in bytes.
IV_SIZE :: 12
// XIV_SIZE is the XChaCha20 IV size in bytes.
XIV_SIZE :: 24
// MAX_CTR_IETF is the maximum counter value for the IETF flavor ChaCha20.
MAX_CTR_IETF :: 0xffffffff
// BLOCK_SIZE is the (X)ChaCha20 block size in bytes.
BLOCK_SIZE :: 64
// STATE_SIZE_U32 is the (X)ChaCha20 state size in u32s.
STATE_SIZE_U32 :: 16
// Rounds is the (X)ChaCha20 round count.
ROUNDS :: 20
// SIGMA_0 is sigma[0:4].
SIGMA_0: u32 : 0x61707865
// SIGMA_1 is sigma[4:8].
SIGMA_1: u32 : 0x3320646e
// SIGMA_2 is sigma[8:12].
SIGMA_2: u32 : 0x79622d32
// SIGMA_3 is sigma[12:16].
SIGMA_3: u32 : 0x6b206574
// Context is a ChaCha20 or XChaCha20 instance.
Context :: struct {
_s: [STATE_SIZE_U32]u32,
_buffer: [BLOCK_SIZE]byte,
_off: int,
_is_ietf_flavor: bool,
_is_initialized: bool,
}
// init inititializes a Context for ChaCha20 with the provided key and
// iv.
//
// WARNING: This ONLY handles ChaCha20. XChaCha20 sub-key and IV
// derivation is expected to be handled by the caller, so that the
// HChaCha call can be suitably accelerated.
init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
if len(key) != KEY_SIZE || len(iv) != IV_SIZE {
intrinsics.trap()
}
k, n := key, iv
ctx._s[0] = SIGMA_0
ctx._s[1] = SIGMA_1
ctx._s[2] = SIGMA_2
ctx._s[3] = SIGMA_3
ctx._s[4] = endian.unchecked_get_u32le(k[0:4])
ctx._s[5] = endian.unchecked_get_u32le(k[4:8])
ctx._s[6] = endian.unchecked_get_u32le(k[8:12])
ctx._s[7] = endian.unchecked_get_u32le(k[12:16])
ctx._s[8] = endian.unchecked_get_u32le(k[16:20])
ctx._s[9] = endian.unchecked_get_u32le(k[20:24])
ctx._s[10] = endian.unchecked_get_u32le(k[24:28])
ctx._s[11] = endian.unchecked_get_u32le(k[28:32])
ctx._s[12] = 0
ctx._s[13] = endian.unchecked_get_u32le(n[0:4])
ctx._s[14] = endian.unchecked_get_u32le(n[4:8])
ctx._s[15] = endian.unchecked_get_u32le(n[8:12])
ctx._off = BLOCK_SIZE
ctx._is_ietf_flavor = !is_xchacha
ctx._is_initialized = true
}
// seek seeks the (X)ChaCha20 stream counter to the specified block.
seek :: proc(ctx: ^Context, block_nr: u64) {
assert(ctx._is_initialized)
if ctx._is_ietf_flavor {
if block_nr > MAX_CTR_IETF {
panic("crypto/chacha20: attempted to seek past maximum counter")
}
} else {
ctx._s[13] = u32(block_nr >> 32)
}
ctx._s[12] = u32(block_nr)
ctx._off = BLOCK_SIZE
}
// reset sanitizes the Context. The Context must be re-initialized to
// be used again.
reset :: proc(ctx: ^Context) {
mem.zero_explicit(&ctx._s, size_of(ctx._s))
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
ctx._is_initialized = false
}
check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
// Enforce the maximum consumed keystream per IV.
//
// While all modern "standard" definitions of ChaCha20 use
// the IETF 32-bit counter, for XChaCha20 most common
// implementations allow for a 64-bit counter.
//
// Honestly, the answer here is "use a MRAE primitive", but
// go with "common" practice in the case of XChaCha20.
ERR_CTR_EXHAUSTED :: "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached"
if ctx._is_ietf_flavor {
if u64(ctx._s[12]) + u64(nr_blocks) > MAX_CTR_IETF {
panic(ERR_CTR_EXHAUSTED)
}
} else {
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
panic(ERR_CTR_EXHAUSTED)
}
}
}
+360
View File
@@ -0,0 +1,360 @@
package chacha20_ref
import "core:crypto/_chacha20"
import "core:encoding/endian"
import "core:math/bits"
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
// Enforce the maximum consumed keystream per IV.
_chacha20.check_counter_limit(ctx, nr_blocks)
dst, src := dst, src
x := &ctx._s
for n := 0; n < nr_blocks; n = n + 1 {
x0, x1, x2, x3 :=
_chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 :=
x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
// Even when forcing inlining manually inlining all of
// these is decently faster.
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 16)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 8)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 16)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 12)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 8)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 16)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 12)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 8)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 16)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 12)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 8)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 16)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 12)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 8)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 16)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 8)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 16)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 12)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 8)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 16)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 12)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 8)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 7)
}
x0 += _chacha20.SIGMA_0
x1 += _chacha20.SIGMA_1
x2 += _chacha20.SIGMA_2
x3 += _chacha20.SIGMA_3
x4 += x[4]
x5 += x[5]
x6 += x[6]
x7 += x[7]
x8 += x[8]
x9 += x[9]
x10 += x[10]
x11 += x[11]
x12 += x[12]
x13 += x[13]
x14 += x[14]
x15 += x[15]
// - The caller(s) ensure that src/dst are valid.
// - The compiler knows if the target is picky about alignment.
#no_bounds_check {
if src != nil {
endian.unchecked_put_u32le(dst[0:4], endian.unchecked_get_u32le(src[0:4]) ~ x0)
endian.unchecked_put_u32le(dst[4:8], endian.unchecked_get_u32le(src[4:8]) ~ x1)
endian.unchecked_put_u32le(dst[8:12], endian.unchecked_get_u32le(src[8:12]) ~ x2)
endian.unchecked_put_u32le(dst[12:16], endian.unchecked_get_u32le(src[12:16]) ~ x3)
endian.unchecked_put_u32le(dst[16:20], endian.unchecked_get_u32le(src[16:20]) ~ x4)
endian.unchecked_put_u32le(dst[20:24], endian.unchecked_get_u32le(src[20:24]) ~ x5)
endian.unchecked_put_u32le(dst[24:28], endian.unchecked_get_u32le(src[24:28]) ~ x6)
endian.unchecked_put_u32le(dst[28:32], endian.unchecked_get_u32le(src[28:32]) ~ x7)
endian.unchecked_put_u32le(dst[32:36], endian.unchecked_get_u32le(src[32:36]) ~ x8)
endian.unchecked_put_u32le(dst[36:40], endian.unchecked_get_u32le(src[36:40]) ~ x9)
endian.unchecked_put_u32le(
dst[40:44],
endian.unchecked_get_u32le(src[40:44]) ~ x10,
)
endian.unchecked_put_u32le(
dst[44:48],
endian.unchecked_get_u32le(src[44:48]) ~ x11,
)
endian.unchecked_put_u32le(
dst[48:52],
endian.unchecked_get_u32le(src[48:52]) ~ x12,
)
endian.unchecked_put_u32le(
dst[52:56],
endian.unchecked_get_u32le(src[52:56]) ~ x13,
)
endian.unchecked_put_u32le(
dst[56:60],
endian.unchecked_get_u32le(src[56:60]) ~ x14,
)
endian.unchecked_put_u32le(
dst[60:64],
endian.unchecked_get_u32le(src[60:64]) ~ x15,
)
src = src[_chacha20.BLOCK_SIZE:]
} else {
endian.unchecked_put_u32le(dst[0:4], x0)
endian.unchecked_put_u32le(dst[4:8], x1)
endian.unchecked_put_u32le(dst[8:12], x2)
endian.unchecked_put_u32le(dst[12:16], x3)
endian.unchecked_put_u32le(dst[16:20], x4)
endian.unchecked_put_u32le(dst[20:24], x5)
endian.unchecked_put_u32le(dst[24:28], x6)
endian.unchecked_put_u32le(dst[28:32], x7)
endian.unchecked_put_u32le(dst[32:36], x8)
endian.unchecked_put_u32le(dst[36:40], x9)
endian.unchecked_put_u32le(dst[40:44], x10)
endian.unchecked_put_u32le(dst[44:48], x11)
endian.unchecked_put_u32le(dst[48:52], x12)
endian.unchecked_put_u32le(dst[52:56], x13)
endian.unchecked_put_u32le(dst[56:60], x14)
endian.unchecked_put_u32le(dst[60:64], x15)
}
dst = dst[_chacha20.BLOCK_SIZE:]
}
// Increment the counter. Overflow checking is done upon
// entry into the routine, so a 64-bit increment safely
// covers both cases.
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
x[12] = u32(new_ctr)
x[13] = u32(new_ctr >> 32)
}
}
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
x0, x1, x2, x3 := _chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3
x4 := endian.unchecked_get_u32le(key[0:4])
x5 := endian.unchecked_get_u32le(key[4:8])
x6 := endian.unchecked_get_u32le(key[8:12])
x7 := endian.unchecked_get_u32le(key[12:16])
x8 := endian.unchecked_get_u32le(key[16:20])
x9 := endian.unchecked_get_u32le(key[20:24])
x10 := endian.unchecked_get_u32le(key[24:28])
x11 := endian.unchecked_get_u32le(key[28:32])
x12 := endian.unchecked_get_u32le(iv[0:4])
x13 := endian.unchecked_get_u32le(iv[4:8])
x14 := endian.unchecked_get_u32le(iv[8:12])
x15 := endian.unchecked_get_u32le(iv[12:16])
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 16)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 8)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 16)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 12)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 8)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 16)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 12)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 8)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 16)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 12)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 8)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 16)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 12)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 8)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 16)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 8)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 16)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 12)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 8)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 16)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 12)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 8)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 7)
}
endian.unchecked_put_u32le(dst[0:4], x0)
endian.unchecked_put_u32le(dst[4:8], x1)
endian.unchecked_put_u32le(dst[8:12], x2)
endian.unchecked_put_u32le(dst[12:16], x3)
endian.unchecked_put_u32le(dst[16:20], x12)
endian.unchecked_put_u32le(dst[20:24], x13)
endian.unchecked_put_u32le(dst[24:28], x14)
endian.unchecked_put_u32le(dst[28:32], x15)
}
@@ -0,0 +1,481 @@
package chacha20_simd128
import "base:intrinsics"
import "core:crypto/_chacha20"
import "core:simd"
@(require) import "core:sys/info"
// Portable 128-bit `core:simd` implementation.
//
// This is loosely based on Ted Krovetz's public domain C intrinsic
// implementation.
//
// This is written to perform adequately on any target that has "enough"
// 128-bit vector registers, the current thought is that 4 blocks at at
// time is reasonable for amd64, though Ted's code is more conservative.
//
// See:
// supercop-20230530/crypto_stream/chacha20/krovetz/vec128
// Ensure the compiler emits SIMD instructions. This is a minimum, and
// setting the microarchitecture at compile time will allow for better
// code gen when applicable (eg: AVX). This is somewhat redundant with
// the default microarchitecture configurations.
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
@(private = "file")
TARGET_SIMD_FEATURES :: "neon"
} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
// Note: LLVM appears to be smart enough to use PSHUFB despite not
// explicitly using simd.u8x16 shuffles.
@(private = "file")
TARGET_SIMD_FEATURES :: "sse2,ssse3"
} else {
@(private = "file")
TARGET_SIMD_FEATURES :: ""
}
@(private = "file")
_ROT_7L: simd.u32x4 : {7, 7, 7, 7}
@(private = "file")
_ROT_7R: simd.u32x4 : {25, 25, 25, 25}
@(private = "file")
_ROT_12L: simd.u32x4 : {12, 12, 12, 12}
@(private = "file")
_ROT_12R: simd.u32x4 : {20, 20, 20, 20}
@(private = "file")
_ROT_8L: simd.u32x4 : {8, 8, 8, 8}
@(private = "file")
_ROT_8R: simd.u32x4 : {24, 24, 24, 24}
@(private = "file")
_ROT_16: simd.u32x4 : {16, 16, 16, 16}
when ODIN_ENDIAN == .Big {
@(private = "file")
_increment_counter :: #force_inline proc "contextless" (ctx: ^Context) -> simd.u32x4 {
// In the Big Endian case, the low and high portions in the vector
// are flipped, so the 64-bit addition can't be done with a simple
// vector add.
x := &ctx._s
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
x[12] = u32(new_ctr)
x[13] = u32(new_ctr >> 32)
return intrinsics.unaligned_load(transmute(^simd.u32x4)&x[12])
}
// Convert the endian-ness of the components of a u32x4 vector, for
// the purposes of output.
@(private = "file")
_byteswap_u32x4 :: #force_inline proc "contextless" (v: simd.u32x4) -> simd.u32x4 {
return(
transmute(simd.u32x4)simd.shuffle(
transmute(simd.u8x16)v,
transmute(simd.u8x16)v,
3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12,
)
)
}
} else {
@(private = "file")
_VEC_ONE: simd.u64x2 : {1, 0}
}
@(private = "file")
_dq_round_simd128 :: #force_inline proc "contextless" (
v0, v1, v2, v3: simd.u32x4,
) -> (
simd.u32x4,
simd.u32x4,
simd.u32x4,
simd.u32x4,
) {
v0, v1, v2, v3 := v0, v1, v2, v3
// a += b; d ^= a; d = ROTW16(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
// c += d; b ^= c; b = ROTW12(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
// a += b; d ^= a; d = ROTW8(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
// c += d; b ^= c; b = ROTW7(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
// b = ROTV1(b); c = ROTV2(c); d = ROTV3(d);
v1 = simd.shuffle(v1, v1, 1, 2, 3, 0)
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1)
v3 = simd.shuffle(v3, v3, 3, 0, 1, 2)
// a += b; d ^= a; d = ROTW16(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
// c += d; b ^= c; b = ROTW12(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
// a += b; d ^= a; d = ROTW8(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
// c += d; b ^= c; b = ROTW7(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
// b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
v1 = simd.shuffle(v1, v1, 3, 0, 1, 2)
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1)
v3 = simd.shuffle(v3, v3, 1, 2, 3, 0)
return v0, v1, v2, v3
}
@(private = "file")
_add_state_simd128 :: #force_inline proc "contextless" (
v0, v1, v2, v3, s0, s1, s2, s3: simd.u32x4,
) -> (
simd.u32x4,
simd.u32x4,
simd.u32x4,
simd.u32x4,
) {
v0, v1, v2, v3 := v0, v1, v2, v3
v0 = simd.add(v0, s0)
v1 = simd.add(v1, s1)
v2 = simd.add(v2, s2)
v3 = simd.add(v3, s3)
when ODIN_ENDIAN == .Big {
v0 = _byteswap_u32x4(v0)
v1 = _byteswap_u32x4(v1)
v2 = _byteswap_u32x4(v2)
v3 = _byteswap_u32x4(v3)
}
return v0, v1, v2, v3
}
@(private = "file")
_xor_simd128 :: #force_inline proc "contextless" (
src: [^]simd.u32x4,
v0, v1, v2, v3: simd.u32x4,
) -> (
simd.u32x4,
simd.u32x4,
simd.u32x4,
simd.u32x4,
) {
v0, v1, v2, v3 := v0, v1, v2, v3
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x4)(src[0:])))
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x4)(src[1:])))
v2 = simd.bit_xor(v2, intrinsics.unaligned_load((^simd.u32x4)(src[2:])))
v3 = simd.bit_xor(v3, intrinsics.unaligned_load((^simd.u32x4)(src[3:])))
return v0, v1, v2, v3
}
@(private = "file")
_store_simd128 :: #force_inline proc "contextless" (
dst: [^]simd.u32x4,
v0, v1, v2, v3: simd.u32x4,
) {
intrinsics.unaligned_store((^simd.u32x4)(dst[0:]), v0)
intrinsics.unaligned_store((^simd.u32x4)(dst[1:]), v1)
intrinsics.unaligned_store((^simd.u32x4)(dst[2:]), v2)
intrinsics.unaligned_store((^simd.u32x4)(dst[3:]), v3)
}
// is_performant returns true iff the target and current host both support
// "enough" 128-bit SIMD to make this implementation performant.
is_performant :: proc "contextless" () -> bool {
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
req_features :: info.CPU_Features{.asimd}
} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
req_features :: info.CPU_Features{.sse2, .ssse3}
}
features, ok := info.cpu_features.?
if !ok {
return false
}
return features >= req_features
} else when ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32 {
return intrinsics.has_target_feature("simd128")
} else {
return false
}
}
@(enable_target_feature = TARGET_SIMD_FEATURES)
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
// Enforce the maximum consumed keystream per IV.
_chacha20.check_counter_limit(ctx, nr_blocks)
dst_v := ([^]simd.u32x4)(raw_data(dst))
src_v := ([^]simd.u32x4)(raw_data(src))
x := &ctx._s
n := nr_blocks
// The state vector is an array of uint32s in native byte-order.
x_v := ([^]simd.u32x4)(raw_data(x))
s0 := intrinsics.unaligned_load((^simd.u32x4)(x_v[0:]))
s1 := intrinsics.unaligned_load((^simd.u32x4)(x_v[1:]))
s2 := intrinsics.unaligned_load((^simd.u32x4)(x_v[2:]))
s3 := intrinsics.unaligned_load((^simd.u32x4)(x_v[3:]))
// 8 blocks at a time.
//
// Note: This is only worth it on Aarch64.
when ODIN_ARCH == .arm64 {
for ; n >= 8; n = n - 8 {
v0, v1, v2, v3 := s0, s1, s2, s3
when ODIN_ENDIAN == .Little {
s7 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
} else {
s7 := _increment_counter(ctx)
}
v4, v5, v6, v7 := s0, s1, s2, s7
when ODIN_ENDIAN == .Little {
s11 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s7, _VEC_ONE)
} else {
s11 := _increment_counter(ctx)
}
v8, v9, v10, v11 := s0, s1, s2, s11
when ODIN_ENDIAN == .Little {
s15 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s11, _VEC_ONE)
} else {
s15 := _increment_counter(ctx)
}
v12, v13, v14, v15 := s0, s1, s2, s15
when ODIN_ENDIAN == .Little {
s19 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s15, _VEC_ONE)
} else {
s19 := _increment_counter(ctx)
}
v16, v17, v18, v19 := s0, s1, s2, s19
when ODIN_ENDIAN == .Little {
s23 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s19, _VEC_ONE)
} else {
s23 := _increment_counter(ctx)
}
v20, v21, v22, v23 := s0, s1, s2, s23
when ODIN_ENDIAN == .Little {
s27 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s23, _VEC_ONE)
} else {
s27 := _increment_counter(ctx)
}
v24, v25, v26, v27 := s0, s1, s2, s27
when ODIN_ENDIAN == .Little {
s31 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s27, _VEC_ONE)
} else {
s31 := _increment_counter(ctx)
}
v28, v29, v30, v31 := s0, s1, s2, s31
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
v4, v5, v6, v7 = _dq_round_simd128(v4, v5, v6, v7)
v8, v9, v10, v11 = _dq_round_simd128(v8, v9, v10, v11)
v12, v13, v14, v15 = _dq_round_simd128(v12, v13, v14, v15)
v16, v17, v18, v19 = _dq_round_simd128(v16, v17, v18, v19)
v20, v21, v22, v23 = _dq_round_simd128(v20, v21, v22, v23)
v24, v25, v26, v27 = _dq_round_simd128(v24, v25, v26, v27)
v28, v29, v30, v31 = _dq_round_simd128(v28, v29, v30, v31)
}
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
v4, v5, v6, v7 = _add_state_simd128(v4, v5, v6, v7, s0, s1, s2, s7)
v8, v9, v10, v11 = _add_state_simd128(v8, v9, v10, v11, s0, s1, s2, s11)
v12, v13, v14, v15 = _add_state_simd128(v12, v13, v14, v15, s0, s1, s2, s15)
v16, v17, v18, v19 = _add_state_simd128(v16, v17, v18, v19, s0, s1, s2, s19)
v20, v21, v22, v23 = _add_state_simd128(v20, v21, v22, v23, s0, s1, s2, s23)
v24, v25, v26, v27 = _add_state_simd128(v24, v25, v26, v27, s0, s1, s2, s27)
v28, v29, v30, v31 = _add_state_simd128(v28, v29, v30, v31, s0, s1, s2, s31)
#no_bounds_check {
if src != nil {
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
v4, v5, v6, v7 = _xor_simd128(src_v[4:], v4, v5, v6, v7)
v8, v9, v10, v11 = _xor_simd128(src_v[8:], v8, v9, v10, v11)
v12, v13, v14, v15 = _xor_simd128(src_v[12:], v12, v13, v14, v15)
v16, v17, v18, v19 = _xor_simd128(src_v[16:], v16, v17, v18, v19)
v20, v21, v22, v23 = _xor_simd128(src_v[20:], v20, v21, v22, v23)
v24, v25, v26, v27 = _xor_simd128(src_v[24:], v24, v25, v26, v27)
v28, v29, v30, v31 = _xor_simd128(src_v[28:], v28, v29, v30, v31)
src_v = src_v[32:]
}
_store_simd128(dst_v, v0, v1, v2, v3)
_store_simd128(dst_v[4:], v4, v5, v6, v7)
_store_simd128(dst_v[8:], v8, v9, v10, v11)
_store_simd128(dst_v[12:], v12, v13, v14, v15)
_store_simd128(dst_v[16:], v16, v17, v18, v19)
_store_simd128(dst_v[20:], v20, v21, v22, v23)
_store_simd128(dst_v[24:], v24, v25, v26, v27)
_store_simd128(dst_v[28:], v28, v29, v30, v31)
dst_v = dst_v[32:]
}
when ODIN_ENDIAN == .Little {
// s31 holds the most current counter, so `s3 = s31 + 1`.
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s31, _VEC_ONE)
} else {
s3 = _increment_counter(ctx)
}
}
}
// 4 blocks at a time.
//
// Note: The i386 target lacks the required number of registers
// for this to be performant, so it is skipped.
when ODIN_ARCH != .i386 {
for ; n >= 4; n = n - 4 {
v0, v1, v2, v3 := s0, s1, s2, s3
when ODIN_ENDIAN == .Little {
s7 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
} else {
s7 := _increment_counter(ctx)
}
v4, v5, v6, v7 := s0, s1, s2, s7
when ODIN_ENDIAN == .Little {
s11 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s7, _VEC_ONE)
} else {
s11 := _increment_counter(ctx)
}
v8, v9, v10, v11 := s0, s1, s2, s11
when ODIN_ENDIAN == .Little {
s15 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s11, _VEC_ONE)
} else {
s15 := _increment_counter(ctx)
}
v12, v13, v14, v15 := s0, s1, s2, s15
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
v4, v5, v6, v7 = _dq_round_simd128(v4, v5, v6, v7)
v8, v9, v10, v11 = _dq_round_simd128(v8, v9, v10, v11)
v12, v13, v14, v15 = _dq_round_simd128(v12, v13, v14, v15)
}
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
v4, v5, v6, v7 = _add_state_simd128(v4, v5, v6, v7, s0, s1, s2, s7)
v8, v9, v10, v11 = _add_state_simd128(v8, v9, v10, v11, s0, s1, s2, s11)
v12, v13, v14, v15 = _add_state_simd128(v12, v13, v14, v15, s0, s1, s2, s15)
#no_bounds_check {
if src != nil {
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
v4, v5, v6, v7 = _xor_simd128(src_v[4:], v4, v5, v6, v7)
v8, v9, v10, v11 = _xor_simd128(src_v[8:], v8, v9, v10, v11)
v12, v13, v14, v15 = _xor_simd128(src_v[12:], v12, v13, v14, v15)
src_v = src_v[16:]
}
_store_simd128(dst_v, v0, v1, v2, v3)
_store_simd128(dst_v[4:], v4, v5, v6, v7)
_store_simd128(dst_v[8:], v8, v9, v10, v11)
_store_simd128(dst_v[12:], v12, v13, v14, v15)
dst_v = dst_v[16:]
}
when ODIN_ENDIAN == .Little {
// s15 holds the most current counter, so `s3 = s15 + 1`.
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s15, _VEC_ONE)
} else {
s3 = _increment_counter(ctx)
}
}
}
// 1 block at a time.
for ; n > 0; n = n - 1 {
v0, v1, v2, v3 := s0, s1, s2, s3
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
}
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
#no_bounds_check {
if src != nil {
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
src_v = src_v[4:]
}
_store_simd128(dst_v, v0, v1, v2, v3)
dst_v = dst_v[4:]
}
// Increment the counter. Overflow checking is done upon
// entry into the routine, so a 64-bit increment safely
// covers both cases.
when ODIN_ENDIAN == .Little {
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
} else {
s3 = _increment_counter(ctx)
}
}
when ODIN_ENDIAN == .Little {
// Write back the counter to the state.
intrinsics.unaligned_store((^simd.u32x4)(x_v[3:]), s3)
}
}
@(enable_target_feature = TARGET_SIMD_FEATURES)
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
v0 := simd.u32x4{_chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3}
v1 := intrinsics.unaligned_load((^simd.u32x4)(&key[0]))
v2 := intrinsics.unaligned_load((^simd.u32x4)(&key[16]))
v3 := intrinsics.unaligned_load((^simd.u32x4)(&iv[0]))
when ODIN_ENDIAN == .Big {
v1 = _byteswap_u32x4(v1)
v2 = _byteswap_u32x4(v2)
v3 = _byteswap_u32x4(v3)
}
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
}
when ODIN_ENDIAN == .Big {
v0 = _byteswap_u32x4(v0)
v3 = _byteswap_u32x4(v3)
}
dst_v := ([^]simd.u32x4)(raw_data(dst))
intrinsics.unaligned_store((^simd.u32x4)(dst_v[0:]), v0)
intrinsics.unaligned_store((^simd.u32x4)(dst_v[1:]), v3)
}
@@ -0,0 +1,319 @@
//+build amd64
package chacha20_simd256
import "base:intrinsics"
import "core:crypto/_chacha20"
import chacha_simd128 "core:crypto/_chacha20/simd128"
import "core:simd"
import "core:sys/info"
// This is loosely based on Ted Krovetz's public domain C intrinsic
// implementations. While written using `core:simd`, this is currently
// amd64 specific because we do not have a way to detect ARM SVE.
//
// See:
// supercop-20230530/crypto_stream/chacha20/krovetz/vec128
// supercop-20230530/crypto_stream/chacha20/krovetz/avx2
#assert(ODIN_ENDIAN == .Little)
@(private = "file")
_ROT_7L: simd.u32x8 : {7, 7, 7, 7, 7, 7, 7, 7}
@(private = "file")
_ROT_7R: simd.u32x8 : {25, 25, 25, 25, 25, 25, 25, 25}
@(private = "file")
_ROT_12L: simd.u32x8 : {12, 12, 12, 12, 12, 12, 12, 12}
@(private = "file")
_ROT_12R: simd.u32x8 : {20, 20, 20, 20, 20, 20, 20, 20}
@(private = "file")
_ROT_8L: simd.u32x8 : {8, 8, 8, 8, 8, 8, 8, 8}
@(private = "file")
_ROT_8R: simd.u32x8 : {24, 24, 24, 24, 24, 24, 24, 24}
@(private = "file")
_ROT_16: simd.u32x8 : {16, 16, 16, 16, 16, 16, 16, 16}
@(private = "file")
_VEC_ZERO_ONE: simd.u64x4 : {0, 0, 1, 0}
@(private = "file")
_VEC_TWO: simd.u64x4 : {2, 0, 2, 0}
// is_performant returns true iff the target and current host both support
// "enough" SIMD to make this implementation performant.
is_performant :: proc "contextless" () -> bool {
req_features :: info.CPU_Features{.avx, .avx2}
features, ok := info.cpu_features.?
if !ok {
return false
}
return features >= req_features
}
@(private = "file")
_dq_round_simd256 :: #force_inline proc "contextless" (
v0, v1, v2, v3: simd.u32x8,
) -> (
simd.u32x8,
simd.u32x8,
simd.u32x8,
simd.u32x8,
) {
v0, v1, v2, v3 := v0, v1, v2, v3
// a += b; d ^= a; d = ROTW16(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
// c += d; b ^= c; b = ROTW12(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
// a += b; d ^= a; d = ROTW8(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
// c += d; b ^= c; b = ROTW7(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
// b = ROTV1(b); c = ROTV2(c); d = ROTV3(d);
v1 = simd.shuffle(v1, v1, 1, 2, 3, 0, 5, 6, 7, 4)
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1, 6, 7, 4, 5)
v3 = simd.shuffle(v3, v3, 3, 0, 1, 2, 7, 4, 5, 6)
// a += b; d ^= a; d = ROTW16(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
// c += d; b ^= c; b = ROTW12(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
// a += b; d ^= a; d = ROTW8(d);
v0 = simd.add(v0, v1)
v3 = simd.bit_xor(v3, v0)
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
// c += d; b ^= c; b = ROTW7(b);
v2 = simd.add(v2, v3)
v1 = simd.bit_xor(v1, v2)
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
// b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
v1 = simd.shuffle(v1, v1, 3, 0, 1, 2, 7, 4, 5, 6)
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1, 6, 7, 4, 5)
v3 = simd.shuffle(v3, v3, 1, 2, 3, 0, 5, 6, 7, 4)
return v0, v1, v2, v3
}
@(private = "file")
_add_and_permute_state_simd256 :: #force_inline proc "contextless" (
v0, v1, v2, v3, s0, s1, s2, s3: simd.u32x8,
) -> (
simd.u32x8,
simd.u32x8,
simd.u32x8,
simd.u32x8,
) {
t0 := simd.add(v0, s0)
t1 := simd.add(v1, s1)
t2 := simd.add(v2, s2)
t3 := simd.add(v3, s3)
// Big Endian would byteswap here.
// Each of v0 .. v3 has 128-bits of keystream for 2 separate blocks.
// permute the state such that (r0, r1) contains block 0, and (r2, r3)
// contains block 1.
r0 := simd.shuffle(t0, t1, 0, 1, 2, 3, 8, 9, 10, 11)
r2 := simd.shuffle(t0, t1, 4, 5, 6, 7, 12, 13, 14, 15)
r1 := simd.shuffle(t2, t3, 0, 1, 2, 3, 8, 9, 10, 11)
r3 := simd.shuffle(t2, t3, 4, 5, 6, 7, 12, 13, 14, 15)
return r0, r1, r2, r3
}
@(private = "file")
_xor_simd256 :: #force_inline proc "contextless" (
src: [^]simd.u32x8,
v0, v1, v2, v3: simd.u32x8,
) -> (
simd.u32x8,
simd.u32x8,
simd.u32x8,
simd.u32x8,
) {
v0, v1, v2, v3 := v0, v1, v2, v3
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x8)(src[0:])))
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x8)(src[1:])))
v2 = simd.bit_xor(v2, intrinsics.unaligned_load((^simd.u32x8)(src[2:])))
v3 = simd.bit_xor(v3, intrinsics.unaligned_load((^simd.u32x8)(src[3:])))
return v0, v1, v2, v3
}
@(private = "file")
_xor_simd256_x1 :: #force_inline proc "contextless" (
src: [^]simd.u32x8,
v0, v1: simd.u32x8,
) -> (
simd.u32x8,
simd.u32x8,
) {
v0, v1 := v0, v1
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x8)(src[0:])))
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x8)(src[1:])))
return v0, v1
}
@(private = "file")
_store_simd256 :: #force_inline proc "contextless" (
dst: [^]simd.u32x8,
v0, v1, v2, v3: simd.u32x8,
) {
intrinsics.unaligned_store((^simd.u32x8)(dst[0:]), v0)
intrinsics.unaligned_store((^simd.u32x8)(dst[1:]), v1)
intrinsics.unaligned_store((^simd.u32x8)(dst[2:]), v2)
intrinsics.unaligned_store((^simd.u32x8)(dst[3:]), v3)
}
@(private = "file")
_store_simd256_x1 :: #force_inline proc "contextless" (
dst: [^]simd.u32x8,
v0, v1: simd.u32x8,
) {
intrinsics.unaligned_store((^simd.u32x8)(dst[0:]), v0)
intrinsics.unaligned_store((^simd.u32x8)(dst[1:]), v1)
}
@(enable_target_feature = "sse2,ssse3,avx,avx2")
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
// Enforce the maximum consumed keystream per IV.
_chacha20.check_counter_limit(ctx, nr_blocks)
dst_v := ([^]simd.u32x8)(raw_data(dst))
src_v := ([^]simd.u32x8)(raw_data(src))
x := &ctx._s
n := nr_blocks
// The state vector is an array of uint32s in native byte-order.
// Setup s0 .. s3 such that each register stores 2 copies of the
// state.
x_v := ([^]simd.u32x4)(raw_data(x))
t0 := intrinsics.unaligned_load((^simd.u32x4)(x_v[0:]))
t1 := intrinsics.unaligned_load((^simd.u32x4)(x_v[1:]))
t2 := intrinsics.unaligned_load((^simd.u32x4)(x_v[2:]))
t3 := intrinsics.unaligned_load((^simd.u32x4)(x_v[3:]))
s0 := simd.swizzle(t0, 0, 1, 2, 3, 0, 1, 2, 3)
s1 := simd.swizzle(t1, 0, 1, 2, 3, 0, 1, 2, 3)
s2 := simd.swizzle(t2, 0, 1, 2, 3, 0, 1, 2, 3)
s3 := simd.swizzle(t3, 0, 1, 2, 3, 0, 1, 2, 3)
// Advance the counter in the 2nd copy of the state by one.
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_ZERO_ONE)
// 8 blocks at a time.
for ; n >= 8; n = n - 8 {
v0, v1, v2, v3 := s0, s1, s2, s3
s7 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_TWO)
v4, v5, v6, v7 := s0, s1, s2, s7
s11 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s7, _VEC_TWO)
v8, v9, v10, v11 := s0, s1, s2, s11
s15 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s11, _VEC_TWO)
v12, v13, v14, v15 := s0, s1, s2, s15
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
v0, v1, v2, v3 = _dq_round_simd256(v0, v1, v2, v3)
v4, v5, v6, v7 = _dq_round_simd256(v4, v5, v6, v7)
v8, v9, v10, v11 = _dq_round_simd256(v8, v9, v10, v11)
v12, v13, v14, v15 = _dq_round_simd256(v12, v13, v14, v15)
}
v0, v1, v2, v3 = _add_and_permute_state_simd256(v0, v1, v2, v3, s0, s1, s2, s3)
v4, v5, v6, v7 = _add_and_permute_state_simd256(v4, v5, v6, v7, s0, s1, s2, s7)
v8, v9, v10, v11 = _add_and_permute_state_simd256(v8, v9, v10, v11, s0, s1, s2, s11)
v12, v13, v14, v15 = _add_and_permute_state_simd256(v12, v13, v14, v15, s0, s1, s2, s15)
#no_bounds_check {
if src != nil {
v0, v1, v2, v3 = _xor_simd256(src_v, v0, v1, v2, v3)
v4, v5, v6, v7 = _xor_simd256(src_v[4:], v4, v5, v6, v7)
v8, v9, v10, v11 = _xor_simd256(src_v[8:], v8, v9, v10, v11)
v12, v13, v14, v15 = _xor_simd256(src_v[12:], v12, v13, v14, v15)
src_v = src_v[16:]
}
_store_simd256(dst_v, v0, v1, v2, v3)
_store_simd256(dst_v[4:], v4, v5, v6, v7)
_store_simd256(dst_v[8:], v8, v9, v10, v11)
_store_simd256(dst_v[12:], v12, v13, v14, v15)
dst_v = dst_v[16:]
}
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s15, _VEC_TWO)
}
// 2 (or 1) block at a time.
for ; n > 0; n = n - 2 {
v0, v1, v2, v3 := s0, s1, s2, s3
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
v0, v1, v2, v3 = _dq_round_simd256(v0, v1, v2, v3)
}
v0, v1, v2, v3 = _add_and_permute_state_simd256(v0, v1, v2, v3, s0, s1, s2, s3)
if n == 1 {
// Note: No need to advance src_v, dst_v, or increment the counter
// since this is guaranteed to be the final block.
#no_bounds_check {
if src != nil {
v0, v1 = _xor_simd256_x1(src_v, v0, v1)
}
_store_simd256_x1(dst_v, v0, v1)
}
break
}
#no_bounds_check {
if src != nil {
v0, v1, v2, v3 = _xor_simd256(src_v, v0, v1, v2, v3)
src_v = src_v[4:]
}
_store_simd256(dst_v, v0, v1, v2, v3)
dst_v = dst_v[4:]
}
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_TWO)
}
// Write back the counter. Doing it this way, saves having to
// pull out the correct counter value from s3.
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + u64(nr_blocks)
ctx._s[12] = u32(new_ctr)
ctx._s[13] = u32(new_ctr >> 32)
}
@(enable_target_feature = "sse2,ssse3,avx")
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
// We can just enable AVX and call the simd128 code as going
// wider has 0 performance benefit, but VEX encoded instructions
// is nice.
#force_inline chacha_simd128.hchacha20(dst, key, iv)
}
@@ -0,0 +1,17 @@
//+build !amd64
package chacha20_simd256
import "base:intrinsics"
import "core:crypto/_chacha20"
is_performant :: proc "contextless" () -> bool {
return false
}
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
panic("crypto/chacha20: simd256 implementation unsupported")
}
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
intrinsics.trap()
}
+36
View File
@@ -0,0 +1,36 @@
package aead
// seal_oneshot encrypts the plaintext and authenticates the aad and ciphertext,
// with the provided algorithm, key, and iv, stores the output in dst and tag.
//
// dst and plaintext MUST alias exactly or not at all.
seal_oneshot :: proc(algo: Algorithm, dst, tag, key, iv, aad, plaintext: []byte, impl: Implementation = nil) {
ctx: Context
init(&ctx, algo, key, impl)
defer reset(&ctx)
seal_ctx(&ctx, dst, tag, iv, aad, plaintext)
}
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
// with the provided algorithm, key, iv, and tag, and stores the output in dst,
// returning true iff the authentication was successful. If authentication
// fails, the destination buffer will be zeroed.
//
// dst and plaintext MUST alias exactly or not at all.
@(require_results)
open_oneshot :: proc(algo: Algorithm, dst, key, iv, aad, ciphertext, tag: []byte, impl: Implementation = nil) -> bool {
ctx: Context
init(&ctx, algo, key, impl)
defer reset(&ctx)
return open_ctx(&ctx, dst, iv, aad, ciphertext, tag)
}
seal :: proc {
seal_ctx,
seal_oneshot,
}
open :: proc {
open_ctx,
open_oneshot,
}
+57
View File
@@ -0,0 +1,57 @@
/*
package aead provides a generic interface to the supported Authenticated
Encryption with Associated Data algorithms.
Both a one-shot and context based interface are provided, with similar
usage. If multiple messages are to be sealed/opened via the same key,
the context based interface may be more efficient, depending on the
algorithm.
WARNING: Reusing the same key + iv to seal (encrypt) multiple messages
results in catastrophic loss of security for most algorithms.
Example:
package aead_example
import "core:bytes"
import "core:crypto"
import "core:crypto/aead"
main :: proc() {
algo := aead.Algorithm.XCHACHA20POLY1305
// The example added associated data, and plaintext.
aad_str := "Get your ass in gear boys."
pt_str := "They're immanetizing the Eschaton."
aad := transmute([]byte)aad_str
plaintext := transmute([]byte)pt_str
pt_len := len(plaintext)
// Generate a random key for the purposes of illustration.
key := make([]byte, aead.KEY_SIZES[algo])
defer delete(key)
crypto.rand_bytes(key)
// `ciphertext || tag`, is a common way data is transmitted, so
// demonstrate that.
buf := make([]byte, pt_len + aead.TAG_SIZES[algo])
defer delete(buf)
ciphertext, tag := buf[:pt_len], buf[pt_len:]
// Seal the AAD + Plaintext.
iv := make([]byte, aead.IV_SIZES[algo])
defer delete(iv)
crypto.rand_bytes(iv) // Random IVs are safe with XChaCha20-Poly1305.
aead.seal(algo, ciphertext, tag, key, iv, aad, plaintext)
// Open the AAD + Ciphertext.
opened_pt := buf[:pt_len]
if ok := aead.open(algo, opened_pt, key, iv, aad, ciphertext, tag); !ok {
panic("aead example: failed to open")
}
assert(bytes.equal(opened_pt, plaintext))
}
*/
package aead
+187
View File
@@ -0,0 +1,187 @@
package aead
import "core:crypto/aes"
import "core:crypto/chacha20"
import "core:crypto/chacha20poly1305"
import "core:reflect"
// Implementation is an AEAD implementation. Most callers will not need
// to use this as the package will automatically select the most performant
// implementation available.
Implementation :: union {
aes.Implementation,
chacha20.Implementation,
}
// MAX_TAG_SIZE is the maximum size tag that can be returned by any of the
// Algorithms supported via this package.
MAX_TAG_SIZE :: 16
// Algorithm is the algorithm identifier associated with a given Context.
Algorithm :: enum {
Invalid,
AES_GCM_128,
AES_GCM_192,
AES_GCM_256,
CHACHA20POLY1305,
XCHACHA20POLY1305,
}
// ALGORITM_NAMES is the Agorithm to algorithm name string.
ALGORITHM_NAMES := [Algorithm]string {
.Invalid = "Invalid",
.AES_GCM_128 = "AES-GCM-128",
.AES_GCM_192 = "AES-GCM-192",
.AES_GCM_256 = "AES-GCM-256",
.CHACHA20POLY1305 = "chacha20poly1305",
.XCHACHA20POLY1305 = "xchacha20poly1305",
}
// TAG_SIZES is the Algorithm to tag size in bytes.
TAG_SIZES := [Algorithm]int {
.Invalid = 0,
.AES_GCM_128 = aes.GCM_TAG_SIZE,
.AES_GCM_192 = aes.GCM_TAG_SIZE,
.AES_GCM_256 = aes.GCM_TAG_SIZE,
.CHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
.XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
}
// KEY_SIZES is the Algorithm to key size in bytes.
KEY_SIZES := [Algorithm]int {
.Invalid = 0,
.AES_GCM_128 = aes.KEY_SIZE_128,
.AES_GCM_192 = aes.KEY_SIZE_192,
.AES_GCM_256 = aes.KEY_SIZE_256,
.CHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
.XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
}
// IV_SIZES is the Algorithm to initialization vector size in bytes.
//
// Note: Some algorithms (such as AES-GCM) support variable IV sizes.
IV_SIZES := [Algorithm]int {
.Invalid = 0,
.AES_GCM_128 = aes.GCM_IV_SIZE,
.AES_GCM_192 = aes.GCM_IV_SIZE,
.AES_GCM_256 = aes.GCM_IV_SIZE,
.CHACHA20POLY1305 = chacha20poly1305.IV_SIZE,
.XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE,
}
// Context is a concrete instantiation of a specific AEAD algorithm.
Context :: struct {
_algo: Algorithm,
_impl: union {
aes.Context_GCM,
chacha20poly1305.Context,
},
}
@(private)
_IMPL_IDS := [Algorithm]typeid {
.Invalid = nil,
.AES_GCM_128 = typeid_of(aes.Context_GCM),
.AES_GCM_192 = typeid_of(aes.Context_GCM),
.AES_GCM_256 = typeid_of(aes.Context_GCM),
.CHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
.XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
}
// init initializes a Context with a specific AEAD Algorithm.
init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementation = nil) {
if ctx._impl != nil {
reset(ctx)
}
if len(key) != KEY_SIZES[algorithm] {
panic("crypto/aead: invalid key size")
}
// Directly specialize the union by setting the type ID (save a copy).
reflect.set_union_variant_typeid(
ctx._impl,
_IMPL_IDS[algorithm],
)
switch algorithm {
case .AES_GCM_128, .AES_GCM_192, .AES_GCM_256:
impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
aes.init_gcm(&ctx._impl.(aes.Context_GCM), key, impl_)
case .CHACHA20POLY1305:
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
chacha20poly1305.init(&ctx._impl.(chacha20poly1305.Context), key, impl_)
case .XCHACHA20POLY1305:
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_)
case .Invalid:
panic("crypto/aead: uninitialized algorithm")
case:
panic("crypto/aead: invalid algorithm")
}
ctx._algo = algorithm
}
// seal_ctx encrypts the plaintext and authenticates the aad and ciphertext,
// with the provided Context and iv, stores the output in dst and tag.
//
// dst and plaintext MUST alias exactly or not at all.
seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
switch &impl in ctx._impl {
case aes.Context_GCM:
aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext)
case chacha20poly1305.Context:
chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext)
case:
panic("crypto/aead: uninitialized algorithm")
}
}
// open_ctx authenticates the aad and ciphertext, and decrypts the ciphertext,
// with the provided Context, iv, and tag, and stores the output in dst,
// returning true iff the authentication was successful. If authentication
// fails, the destination buffer will be zeroed.
//
// dst and plaintext MUST alias exactly or not at all.
@(require_results)
open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
switch &impl in ctx._impl {
case aes.Context_GCM:
return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag)
case chacha20poly1305.Context:
return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag)
case:
panic("crypto/aead: uninitialized algorithm")
}
}
// reset sanitizes the Context. The Context must be re-initialized to
// be used again.
reset :: proc(ctx: ^Context) {
switch &impl in ctx._impl {
case aes.Context_GCM:
aes.reset_gcm(&impl)
case chacha20poly1305.Context:
chacha20poly1305.reset(&impl)
case:
// Calling reset repeatedly is fine.
}
ctx._algo = .Invalid
ctx._impl = nil
}
// algorithm returns the Algorithm used by a Context instance.
algorithm :: proc(ctx: ^Context) -> Algorithm {
return ctx._algo
}
// iv_size returns the IV size of a Context instance in bytes.
iv_size :: proc(ctx: ^Context) -> int {
return IV_SIZES[ctx._algo]
}
// tag_size returns the tag size of a Context instance in bytes.
tag_size :: proc(ctx: ^Context) -> int {
return TAG_SIZES[ctx._algo]
}
+3 -3
View File
@@ -2,9 +2,9 @@
package aes implements the AES block cipher and some common modes.
See:
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf ]]
- [[ https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf ]]
- [[ https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf ]]
*/
package aes
+3 -3
View File
@@ -20,7 +20,7 @@ Context_CTR :: struct {
}
// init_ctr initializes a Context_CTR with the provided key and IV.
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) {
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
if len(iv) != CTR_IV_SIZE {
panic("crypto/aes: invalid CTR IV size")
}
@@ -47,7 +47,7 @@ xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
panic("crypto/aes: dst and src alias inexactly")
}
for remaining := len(src); remaining > 0; {
#no_bounds_check for remaining := len(src); remaining > 0; {
// Process multiple blocks at once
if ctx._off == BLOCK_SIZE {
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
@@ -85,7 +85,7 @@ keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
assert(ctx._is_initialized)
dst := dst
for remaining := len(dst); remaining > 0; {
#no_bounds_check for remaining := len(dst); remaining > 0; {
// Process multiple blocks at once
if ctx._off == BLOCK_SIZE {
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
+1 -1
View File
@@ -12,7 +12,7 @@ Context_ECB :: struct {
}
// init_ecb initializes a Context_ECB with the provided key.
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) {
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := DEFAULT_IMPLEMENTATION) {
init_impl(&ctx._impl, key, impl)
ctx._is_initialized = true
}
+28 -27
View File
@@ -7,10 +7,10 @@ import "core:crypto/_aes/ct64"
import "core:encoding/endian"
import "core:mem"
// GCM_NONCE_SIZE is the default size of the GCM nonce in bytes.
GCM_NONCE_SIZE :: 12
// GCM_NONCE_SIZE_MAX is the maximum size of the GCM nonce in bytes.
GCM_NONCE_SIZE_MAX :: 0x2000000000000000 // floor((2^64 - 1) / 8) bits
// GCM_IV_SIZE is the default size of the GCM IV in bytes.
GCM_IV_SIZE :: 12
// GCM_IV_SIZE_MAX is the maximum size of the GCM IV in bytes.
GCM_IV_SIZE_MAX :: 0x2000000000000000 // floor((2^64 - 1) / 8) bits
// GCM_TAG_SIZE is the size of a GCM tag in bytes.
GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE
@@ -26,19 +26,19 @@ Context_GCM :: struct {
}
// init_gcm initializes a Context_GCM with the provided key.
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) {
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := DEFAULT_IMPLEMENTATION) {
init_impl(&ctx._impl, key, impl)
ctx._is_initialized = true
}
// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext,
// with the provided Context_GCM and nonce, stores the output in dst and tag.
// with the provided Context_GCM and iv, stores the output in dst and tag.
//
// dst and plaintext MUST alias exactly or not at all.
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
assert(ctx._is_initialized)
gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext)
gcm_validate_common_slice_sizes(tag, iv, aad, plaintext)
if len(dst) != len(plaintext) {
panic("crypto/aes: invalid destination ciphertext size")
}
@@ -47,7 +47,7 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
}
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext)
gcm_seal_hw(&impl, dst, tag, iv, aad, plaintext)
return
}
@@ -55,7 +55,7 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
init_ghash_ct64(ctx, &h, &j0, &j0_enc, iv)
// Note: Our GHASH implementation handles appending padding.
ct64.ghash(s[:], h[:], aad)
@@ -69,15 +69,16 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
}
// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext,
// with the provided Context_GCM, nonce, and tag, and stores the output in dst,
// with the provided Context_GCM, iv, and tag, and stores the output in dst,
// returning true iff the authentication was successful. If authentication
// fails, the destination buffer will be zeroed.
//
// dst and plaintext MUST alias exactly or not at all.
open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
@(require_results)
open_gcm :: proc(ctx: ^Context_GCM, dst, iv, aad, ciphertext, tag: []byte) -> bool {
assert(ctx._is_initialized)
gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext)
gcm_validate_common_slice_sizes(tag, iv, aad, ciphertext)
if len(dst) != len(ciphertext) {
panic("crypto/aes: invalid destination plaintext size")
}
@@ -86,14 +87,14 @@ open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) ->
}
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag)
return gcm_open_hw(&impl, dst, iv, aad, ciphertext, tag)
}
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
init_ghash_ct64(ctx, &h, &j0, &j0_enc, iv)
ct64.ghash(s[:], h[:], aad)
gctr_ct64(ctx, dst, &s, ciphertext, &h, &j0, false)
@@ -112,7 +113,7 @@ open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) ->
return ok
}
// reset_ctr sanitizes the Context_GCM. The Context_GCM must be
// reset_gcm sanitizes the Context_GCM. The Context_GCM must be
// re-initialized to be used again.
reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
reset_impl(&ctx._impl)
@@ -120,14 +121,14 @@ reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
}
@(private = "file")
gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) {
gcm_validate_common_slice_sizes :: proc(tag, iv, aad, text: []byte) {
if len(tag) != GCM_TAG_SIZE {
panic("crypto/aes: invalid GCM tag size")
}
// The specification supports nonces in the range [1, 2^64) bits.
if l := len(nonce); l == 0 || u64(l) >= GCM_NONCE_SIZE_MAX {
panic("crypto/aes: invalid GCM nonce size")
// The specification supports IVs in the range [1, 2^64) bits.
if l := len(iv); l == 0 || u64(l) >= GCM_IV_SIZE_MAX {
panic("crypto/aes: invalid GCM IV size")
}
if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
@@ -144,7 +145,7 @@ init_ghash_ct64 :: proc(
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
nonce: []byte,
iv: []byte,
) {
impl := &ctx._impl.(ct64.Context)
@@ -152,14 +153,14 @@ init_ghash_ct64 :: proc(
ct64.encrypt_block(impl, h[:], h[:])
// Define a block, J0, as follows:
if l := len(nonce); l == GCM_NONCE_SIZE {
if l := len(iv); l == GCM_IV_SIZE {
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
copy(j0[:], nonce)
copy(j0[:], iv)
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
} else {
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
ct64.ghash(j0[:], h[:], nonce)
ct64.ghash(j0[:], h[:], iv)
tmp: [_aes.GHASH_BLOCK_SIZE]byte
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
@@ -197,7 +198,7 @@ gctr_ct64 :: proc(
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
src: []byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
iv: ^[_aes.GHASH_BLOCK_SIZE]byte,
is_seal: bool,
) #no_bounds_check {
ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 {
@@ -208,14 +209,14 @@ gctr_ct64 :: proc(
// Setup the counter blocks.
tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, ---
ctrs, blks: [ct64.STRIDE][]byte = ---, ---
ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
ctr := endian.unchecked_get_u32be(iv[GCM_IV_SIZE:]) + 1
for i in 0 ..< ct64.STRIDE {
// Setup scratch space for the keystream.
blks[i] = tmp2[i][:]
// Pre-copy the IV to all the counter blocks.
ctrs[i] = tmp[i][:]
copy(ctrs[i], nonce[:GCM_NONCE_SIZE])
copy(ctrs[i], iv[:GCM_IV_SIZE])
}
impl := &ctx._impl.(ct64.Context)
+11 -11
View File
@@ -10,12 +10,12 @@ import "core:mem"
import "core:simd/x86"
@(private)
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, iv, aad, plaintext: []byte) {
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
init_ghash_hw(ctx, &h, &j0, &j0_enc, iv)
// Note: Our GHASH implementation handles appending padding.
hw_intel.ghash(s[:], h[:], aad)
@@ -29,12 +29,12 @@ gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext
}
@(private)
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, iv, aad, ciphertext, tag: []byte) -> bool {
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
init_ghash_hw(ctx, &h, &j0, &j0_enc, iv)
hw_intel.ghash(s[:], h[:], aad)
gctr_hw(ctx, dst, &s, ciphertext, &h, &j0, false)
@@ -59,20 +59,20 @@ init_ghash_hw :: proc(
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
nonce: []byte,
iv: []byte,
) {
// 1. Let H = CIPH(k, 0^128)
encrypt_block_hw(ctx, h[:], h[:])
// Define a block, J0, as follows:
if l := len(nonce); l == GCM_NONCE_SIZE {
if l := len(iv); l == GCM_IV_SIZE {
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
copy(j0[:], nonce)
copy(j0[:], iv)
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
} else {
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
hw_intel.ghash(j0[:], h[:], nonce)
hw_intel.ghash(j0[:], h[:], iv)
tmp: [_aes.GHASH_BLOCK_SIZE]byte
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
@@ -109,7 +109,7 @@ gctr_hw :: proc(
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
src: []byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
iv: ^[_aes.GHASH_BLOCK_SIZE]byte,
is_seal: bool,
) #no_bounds_check {
sks: [15]x86.__m128i = ---
@@ -118,8 +118,8 @@ gctr_hw :: proc(
}
// Setup the counter block
ctr_blk := intrinsics.unaligned_load((^x86.__m128i)(nonce))
ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
ctr_blk := intrinsics.unaligned_load((^x86.__m128i)(iv))
ctr := endian.unchecked_get_u32be(iv[GCM_IV_SIZE:]) + 1
src, dst := src, dst
+4
View File
@@ -10,6 +10,10 @@ Context_Impl :: union {
Context_Impl_Hardware,
}
// DEFAULT_IMPLEMENTATION is the implementation that will be used by
// default if possible.
DEFAULT_IMPLEMENTATION :: Implementation.Hardware
// Implementation is an AES implementation. Most callers will not need
// to use this as the package will automatically select the most performant
// implementation available (See `is_hardware_accelerated()`).
+2 -2
View File
@@ -34,11 +34,11 @@ ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
}
@(private)
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, iv, aad, plaintext: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, iv, aad, ciphertext, tag: []byte) -> bool {
panic(ERR_HW_NOT_SUPPORTED)
}
+2 -2
View File
@@ -2,8 +2,8 @@
package blake2b implements the BLAKE2b hash algorithm.
See:
- https://datatracker.ietf.org/doc/html/rfc7693
- https://www.blake2.net
- [[ https://datatracker.ietf.org/doc/html/rfc7693 ]]
- [[ https://www.blake2.net ]]
*/
package blake2b
+2 -2
View File
@@ -2,8 +2,8 @@
package blake2s implements the BLAKE2s hash algorithm.
See:
- https://datatracker.ietf.org/doc/html/rfc7693
- https://www.blake2.net/
- [[ https://datatracker.ietf.org/doc/html/rfc7693 ]]
- [[ https://www.blake2.net/ ]]
*/
package blake2s
+53 -466
View File
@@ -2,125 +2,72 @@
package chacha20 implements the ChaCha20 and XChaCha20 stream ciphers.
See:
- https://datatracker.ietf.org/doc/html/rfc8439
- https://datatracker.ietf.org/doc/draft-irtf-cfrg-xchacha/03/
- [[ https://datatracker.ietf.org/doc/html/rfc8439 ]]
- [[ https://datatracker.ietf.org/doc/draft-irtf-cfrg-xchacha/03/ ]]
*/
package chacha20
import "core:bytes"
import "core:encoding/endian"
import "core:math/bits"
import "core:crypto/_chacha20"
import "core:mem"
// KEY_SIZE is the (X)ChaCha20 key size in bytes.
KEY_SIZE :: 32
// NONCE_SIZE is the ChaCha20 nonce size in bytes.
NONCE_SIZE :: 12
// XNONCE_SIZE is the XChaCha20 nonce size in bytes.
XNONCE_SIZE :: 24
@(private)
_MAX_CTR_IETF :: 0xffffffff
@(private)
_BLOCK_SIZE :: 64
@(private)
_STATE_SIZE_U32 :: 16
@(private)
_ROUNDS :: 20
@(private)
_SIGMA_0: u32 : 0x61707865
@(private)
_SIGMA_1: u32 : 0x3320646e
@(private)
_SIGMA_2: u32 : 0x79622d32
@(private)
_SIGMA_3: u32 : 0x6b206574
KEY_SIZE :: _chacha20.KEY_SIZE
// IV_SIZE is the ChaCha20 IV size in bytes.
IV_SIZE :: _chacha20.IV_SIZE
// XIV_SIZE is the XChaCha20 IV size in bytes.
XIV_SIZE :: _chacha20.XIV_SIZE
// Context is a ChaCha20 or XChaCha20 instance.
Context :: struct {
_s: [_STATE_SIZE_U32]u32,
_buffer: [_BLOCK_SIZE]byte,
_off: int,
_is_ietf_flavor: bool,
_is_initialized: bool,
_state: _chacha20.Context,
_impl: Implementation,
}
// init inititializes a Context for ChaCha20 or XChaCha20 with the provided
// key and nonce.
init :: proc(ctx: ^Context, key, nonce: []byte) {
// key and iv.
init :: proc(ctx: ^Context, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
if len(key) != KEY_SIZE {
panic("crypto/chacha20: invalid ChaCha20 key size")
panic("crypto/chacha20: invalid (X)ChaCha20 key size")
}
if n_len := len(nonce); n_len != NONCE_SIZE && n_len != XNONCE_SIZE {
panic("crypto/chacha20: invalid (X)ChaCha20 nonce size")
if l := len(iv); l != IV_SIZE && l != XIV_SIZE {
panic("crypto/chacha20: invalid (X)ChaCha20 IV size")
}
k, n := key, nonce
k, n := key, iv
// Derive the XChaCha20 subkey and sub-nonce via HChaCha20.
is_xchacha := len(nonce) == XNONCE_SIZE
init_impl(ctx, impl)
is_xchacha := len(iv) == XIV_SIZE
if is_xchacha {
sub_key := ctx._buffer[:KEY_SIZE]
_hchacha20(sub_key, k, n)
sub_iv: [IV_SIZE]byte
sub_key := ctx._state._buffer[:KEY_SIZE]
hchacha20(sub_key, k, n, ctx._impl)
k = sub_key
n = n[16:24]
copy(sub_iv[4:], n[16:])
n = sub_iv[:]
}
ctx._s[0] = _SIGMA_0
ctx._s[1] = _SIGMA_1
ctx._s[2] = _SIGMA_2
ctx._s[3] = _SIGMA_3
ctx._s[4] = endian.unchecked_get_u32le(k[0:4])
ctx._s[5] = endian.unchecked_get_u32le(k[4:8])
ctx._s[6] = endian.unchecked_get_u32le(k[8:12])
ctx._s[7] = endian.unchecked_get_u32le(k[12:16])
ctx._s[8] = endian.unchecked_get_u32le(k[16:20])
ctx._s[9] = endian.unchecked_get_u32le(k[20:24])
ctx._s[10] = endian.unchecked_get_u32le(k[24:28])
ctx._s[11] = endian.unchecked_get_u32le(k[28:32])
ctx._s[12] = 0
if !is_xchacha {
ctx._s[13] = endian.unchecked_get_u32le(n[0:4])
ctx._s[14] = endian.unchecked_get_u32le(n[4:8])
ctx._s[15] = endian.unchecked_get_u32le(n[8:12])
} else {
ctx._s[13] = 0
ctx._s[14] = endian.unchecked_get_u32le(n[0:4])
ctx._s[15] = endian.unchecked_get_u32le(n[4:8])
_chacha20.init(&ctx._state, k, n, is_xchacha)
if is_xchacha {
// The sub-key is stored in the keystream buffer. While
// this will be overwritten in most circumstances, explicitly
// clear it out early.
mem.zero_explicit(&ctx._buffer, KEY_SIZE)
mem.zero_explicit(&ctx._state._buffer, KEY_SIZE)
}
ctx._off = _BLOCK_SIZE
ctx._is_ietf_flavor = !is_xchacha
ctx._is_initialized = true
}
// seek seeks the (X)ChaCha20 stream counter to the specified block.
seek :: proc(ctx: ^Context, block_nr: u64) {
assert(ctx._is_initialized)
if ctx._is_ietf_flavor {
if block_nr > _MAX_CTR_IETF {
panic("crypto/chacha20: attempted to seek past maximum counter")
}
} else {
ctx._s[13] = u32(block_nr >> 32)
}
ctx._s[12] = u32(block_nr)
ctx._off = _BLOCK_SIZE
_chacha20.seek(&ctx._state, block_nr)
}
// xor_bytes XORs each byte in src with bytes taken from the (X)ChaCha20
// keystream, and writes the resulting output to dst. Dst and src MUST
// alias exactly or not at all.
xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
assert(ctx._is_initialized)
assert(ctx._state._is_initialized)
src, dst := src, dst
if dst_len := len(dst); dst_len < len(src) {
@@ -131,12 +78,13 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
panic("crypto/chacha20: dst and src alias inexactly")
}
for remaining := len(src); remaining > 0; {
st := &ctx._state
#no_bounds_check for remaining := len(src); remaining > 0; {
// Process multiple blocks at once
if ctx._off == _BLOCK_SIZE {
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * _BLOCK_SIZE
_do_blocks(ctx, dst, src, nr_blocks)
if st._off == _chacha20.BLOCK_SIZE {
if nr_blocks := remaining / _chacha20.BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * _chacha20.BLOCK_SIZE
stream_blocks(ctx, dst, src, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
@@ -147,17 +95,17 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
_do_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
stream_blocks(ctx, st._buffer[:], nil, 1)
st._off = 0
}
// Process partial blocks from the buffered keystream.
to_xor := min(_BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
to_xor := min(_chacha20.BLOCK_SIZE - st._off, remaining)
buffered_keystream := st._buffer[st._off:]
for i := 0; i < to_xor; i = i + 1 {
dst[i] = buffered_keystream[i] ~ src[i]
}
ctx._off += to_xor
st._off += to_xor
dst = dst[to_xor:]
src = src[to_xor:]
remaining -= to_xor
@@ -166,15 +114,15 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
// keystream_bytes fills dst with the raw (X)ChaCha20 keystream output.
keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
assert(ctx._is_initialized)
assert(ctx._state._is_initialized)
dst := dst
for remaining := len(dst); remaining > 0; {
dst, st := dst, &ctx._state
#no_bounds_check for remaining := len(dst); remaining > 0; {
// Process multiple blocks at once
if ctx._off == _BLOCK_SIZE {
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * _BLOCK_SIZE
_do_blocks(ctx, dst, nil, nr_blocks)
if st._off == _chacha20.BLOCK_SIZE {
if nr_blocks := remaining / _chacha20.BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * _chacha20.BLOCK_SIZE
stream_blocks(ctx, dst, nil, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
@@ -184,15 +132,15 @@ keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
_do_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
stream_blocks(ctx, st._buffer[:], nil, 1)
st._off = 0
}
// Process partial blocks from the buffered keystream.
to_copy := min(_BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
to_copy := min(_chacha20.BLOCK_SIZE - st._off, remaining)
buffered_keystream := st._buffer[st._off:]
copy(dst[:to_copy], buffered_keystream[:to_copy])
ctx._off += to_copy
st._off += to_copy
dst = dst[to_copy:]
remaining -= to_copy
}
@@ -201,366 +149,5 @@ keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
// reset sanitizes the Context. The Context must be re-initialized to
// be used again.
reset :: proc(ctx: ^Context) {
mem.zero_explicit(&ctx._s, size_of(ctx._s))
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
ctx._is_initialized = false
}
@(private)
_do_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
// Enforce the maximum consumed keystream per nonce.
//
// While all modern "standard" definitions of ChaCha20 use
// the IETF 32-bit counter, for XChaCha20 most common
// implementations allow for a 64-bit counter.
//
// Honestly, the answer here is "use a MRAE primitive", but
// go with common practice in the case of XChaCha20.
if ctx._is_ietf_flavor {
if u64(ctx._s[12]) + u64(nr_blocks) > 0xffffffff {
panic("crypto/chacha20: maximum ChaCha20 keystream per nonce reached")
}
} else {
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
panic("crypto/chacha20: maximum XChaCha20 keystream per nonce reached")
}
}
dst, src := dst, src
x := &ctx._s
for n := 0; n < nr_blocks; n = n + 1 {
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
for i := _ROUNDS; i > 0; i = i - 2 {
// Even when forcing inlining manually inlining all of
// these is decently faster.
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 16)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 8)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 16)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 12)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 8)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 16)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 12)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 8)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 16)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 12)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 8)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 16)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 12)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 8)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 16)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 8)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 16)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 12)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 8)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 16)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 12)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 8)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 7)
}
x0 += _SIGMA_0
x1 += _SIGMA_1
x2 += _SIGMA_2
x3 += _SIGMA_3
x4 += x[4]
x5 += x[5]
x6 += x[6]
x7 += x[7]
x8 += x[8]
x9 += x[9]
x10 += x[10]
x11 += x[11]
x12 += x[12]
x13 += x[13]
x14 += x[14]
x15 += x[15]
// While the "correct" answer to getting more performance out of
// this is "use vector operations", support for that is currently
// a work in progress/to be designed.
//
// In the meantime:
// - The caller(s) ensure that src/dst are valid.
// - The compiler knows if the target is picky about alignment.
#no_bounds_check {
if src != nil {
endian.unchecked_put_u32le(dst[0:4], endian.unchecked_get_u32le(src[0:4]) ~ x0)
endian.unchecked_put_u32le(dst[4:8], endian.unchecked_get_u32le(src[4:8]) ~ x1)
endian.unchecked_put_u32le(dst[8:12], endian.unchecked_get_u32le(src[8:12]) ~ x2)
endian.unchecked_put_u32le(dst[12:16], endian.unchecked_get_u32le(src[12:16]) ~ x3)
endian.unchecked_put_u32le(dst[16:20], endian.unchecked_get_u32le(src[16:20]) ~ x4)
endian.unchecked_put_u32le(dst[20:24], endian.unchecked_get_u32le(src[20:24]) ~ x5)
endian.unchecked_put_u32le(dst[24:28], endian.unchecked_get_u32le(src[24:28]) ~ x6)
endian.unchecked_put_u32le(dst[28:32], endian.unchecked_get_u32le(src[28:32]) ~ x7)
endian.unchecked_put_u32le(dst[32:36], endian.unchecked_get_u32le(src[32:36]) ~ x8)
endian.unchecked_put_u32le(dst[36:40], endian.unchecked_get_u32le(src[36:40]) ~ x9)
endian.unchecked_put_u32le(dst[40:44], endian.unchecked_get_u32le(src[40:44]) ~ x10)
endian.unchecked_put_u32le(dst[44:48], endian.unchecked_get_u32le(src[44:48]) ~ x11)
endian.unchecked_put_u32le(dst[48:52], endian.unchecked_get_u32le(src[48:52]) ~ x12)
endian.unchecked_put_u32le(dst[52:56], endian.unchecked_get_u32le(src[52:56]) ~ x13)
endian.unchecked_put_u32le(dst[56:60], endian.unchecked_get_u32le(src[56:60]) ~ x14)
endian.unchecked_put_u32le(dst[60:64], endian.unchecked_get_u32le(src[60:64]) ~ x15)
src = src[_BLOCK_SIZE:]
} else {
endian.unchecked_put_u32le(dst[0:4], x0)
endian.unchecked_put_u32le(dst[4:8], x1)
endian.unchecked_put_u32le(dst[8:12], x2)
endian.unchecked_put_u32le(dst[12:16], x3)
endian.unchecked_put_u32le(dst[16:20], x4)
endian.unchecked_put_u32le(dst[20:24], x5)
endian.unchecked_put_u32le(dst[24:28], x6)
endian.unchecked_put_u32le(dst[28:32], x7)
endian.unchecked_put_u32le(dst[32:36], x8)
endian.unchecked_put_u32le(dst[36:40], x9)
endian.unchecked_put_u32le(dst[40:44], x10)
endian.unchecked_put_u32le(dst[44:48], x11)
endian.unchecked_put_u32le(dst[48:52], x12)
endian.unchecked_put_u32le(dst[52:56], x13)
endian.unchecked_put_u32le(dst[56:60], x14)
endian.unchecked_put_u32le(dst[60:64], x15)
}
dst = dst[_BLOCK_SIZE:]
}
// Increment the counter. Overflow checking is done upon
// entry into the routine, so a 64-bit increment safely
// covers both cases.
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
x[12] = u32(new_ctr)
x[13] = u32(new_ctr >> 32)
}
}
@(private)
_hchacha20 :: proc "contextless" (dst, key, nonce: []byte) {
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
x4 := endian.unchecked_get_u32le(key[0:4])
x5 := endian.unchecked_get_u32le(key[4:8])
x6 := endian.unchecked_get_u32le(key[8:12])
x7 := endian.unchecked_get_u32le(key[12:16])
x8 := endian.unchecked_get_u32le(key[16:20])
x9 := endian.unchecked_get_u32le(key[20:24])
x10 := endian.unchecked_get_u32le(key[24:28])
x11 := endian.unchecked_get_u32le(key[28:32])
x12 := endian.unchecked_get_u32le(nonce[0:4])
x13 := endian.unchecked_get_u32le(nonce[4:8])
x14 := endian.unchecked_get_u32le(nonce[8:12])
x15 := endian.unchecked_get_u32le(nonce[12:16])
for i := _ROUNDS; i > 0; i = i - 2 {
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 16)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 12)
x0 += x4
x12 ~= x0
x12 = bits.rotate_left32(x12, 8)
x8 += x12
x4 ~= x8
x4 = bits.rotate_left32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 16)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 12)
x1 += x5
x13 ~= x1
x13 = bits.rotate_left32(x13, 8)
x9 += x13
x5 ~= x9
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 16)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 12)
x2 += x6
x14 ~= x2
x14 = bits.rotate_left32(x14, 8)
x10 += x14
x6 ~= x10
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 16)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 12)
x3 += x7
x15 ~= x3
x15 = bits.rotate_left32(x15, 8)
x11 += x15
x7 ~= x11
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 16)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 12)
x0 += x5
x15 ~= x0
x15 = bits.rotate_left32(x15, 8)
x10 += x15
x5 ~= x10
x5 = bits.rotate_left32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 16)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 12)
x1 += x6
x12 ~= x1
x12 = bits.rotate_left32(x12, 8)
x11 += x12
x6 ~= x11
x6 = bits.rotate_left32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 16)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 12)
x2 += x7
x13 ~= x2
x13 = bits.rotate_left32(x13, 8)
x8 += x13
x7 ~= x8
x7 = bits.rotate_left32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 16)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 12)
x3 += x4
x14 ~= x3
x14 = bits.rotate_left32(x14, 8)
x9 += x14
x4 ~= x9
x4 = bits.rotate_left32(x4, 7)
}
endian.unchecked_put_u32le(dst[0:4], x0)
endian.unchecked_put_u32le(dst[4:8], x1)
endian.unchecked_put_u32le(dst[8:12], x2)
endian.unchecked_put_u32le(dst[12:16], x3)
endian.unchecked_put_u32le(dst[16:20], x12)
endian.unchecked_put_u32le(dst[20:24], x13)
endian.unchecked_put_u32le(dst[24:28], x14)
endian.unchecked_put_u32le(dst[28:32], x15)
_chacha20.reset(&ctx._state)
}
+56
View File
@@ -0,0 +1,56 @@
package chacha20
import "base:intrinsics"
import "core:crypto/_chacha20/ref"
import "core:crypto/_chacha20/simd128"
import "core:crypto/_chacha20/simd256"
// DEFAULT_IMPLEMENTATION is the implementation that will be used by
// default if possible.
DEFAULT_IMPLEMENTATION :: Implementation.Simd256
// Implementation is a ChaCha20 implementation. Most callers will not need
// to use this as the package will automatically select the most performant
// implementation available.
Implementation :: enum {
Portable,
Simd128,
Simd256,
}
@(private)
init_impl :: proc(ctx: ^Context, impl: Implementation) {
impl := impl
if impl == .Simd256 && !simd256.is_performant() {
impl = .Simd128
}
if impl == .Simd128 && !simd128.is_performant() {
impl = .Portable
}
ctx._impl = impl
}
@(private)
stream_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
switch ctx._impl {
case .Simd256:
simd256.stream_blocks(&ctx._state, dst, src, nr_blocks)
case .Simd128:
simd128.stream_blocks(&ctx._state, dst, src, nr_blocks)
case .Portable:
ref.stream_blocks(&ctx._state, dst, src, nr_blocks)
}
}
@(private)
hchacha20 :: proc "contextless" (dst, key, iv: []byte, impl: Implementation) {
switch impl {
case .Simd256:
simd256.hchacha20(dst, key, iv)
case .Simd128:
simd128.hchacha20(dst, key, iv)
case .Portable:
ref.hchacha20(dst, key, iv)
}
}
@@ -1,9 +1,11 @@
/*
package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 Authenticated
Encryption with Additional Data algorithm.
package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 and
AEAD_XChaCha20_Poly1305 Authenticated Encryption with Additional Data
algorithms.
See:
- https://www.rfc-editor.org/rfc/rfc8439
- [[ https://www.rfc-editor.org/rfc/rfc8439 ]]
- [[ https://datatracker.ietf.org/doc/html/draft-arciszewski-xchacha-03 ]]
*/
package chacha20poly1305
@@ -15,8 +17,10 @@ import "core:mem"
// KEY_SIZE is the chacha20poly1305 key size in bytes.
KEY_SIZE :: chacha20.KEY_SIZE
// NONCE_SIZE is the chacha20poly1305 nonce size in bytes.
NONCE_SIZE :: chacha20.NONCE_SIZE
// IV_SIZE is the chacha20poly1305 IV size in bytes.
IV_SIZE :: chacha20.IV_SIZE
// XIV_SIZE is the xchacha20poly1305 IV size in bytes.
XIV_SIZE :: chacha20.XIV_SIZE
// TAG_SIZE is the chacha20poly1305 tag size in bytes.
TAG_SIZE :: poly1305.TAG_SIZE
@@ -24,15 +28,13 @@ TAG_SIZE :: poly1305.TAG_SIZE
_P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
@(private)
_validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) {
_validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bool) {
if len(tag) != TAG_SIZE {
panic("crypto/chacha20poly1305: invalid destination tag size")
}
if len(key) != KEY_SIZE {
panic("crypto/chacha20poly1305: invalid key size")
}
if len(nonce) != NONCE_SIZE {
panic("crypto/chacha20poly1305: invalid nonce size")
expected_iv_len := is_xchacha ? XIV_SIZE : IV_SIZE
if len(iv) != expected_iv_len {
panic("crypto/chacha20poly1305: invalid IV size")
}
#assert(size_of(int) == 8 || size_of(int) <= 4)
@@ -59,18 +61,52 @@ _update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
}
}
// encrypt encrypts the plaintext and authenticates the aad and ciphertext,
// with the provided key and nonce, stores the output in ciphertext and tag.
encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
// Context is a keyed (X)Chacha20Poly1305 instance.
Context :: struct {
_key: [KEY_SIZE]byte,
_impl: chacha20.Implementation,
_is_xchacha: bool,
_is_initialized: bool,
}
// init initializes a Context with the provided key, for AEAD_CHACHA20_POLY1305.
init :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
if len(key) != KEY_SIZE {
panic("crypto/chacha20poly1305: invalid key size")
}
copy(ctx._key[:], key)
ctx._impl = impl
ctx._is_xchacha = false
ctx._is_initialized = true
}
// init_xchacha initializes a Context with the provided key, for
// AEAD_XChaCha20_Poly1305.
//
// Note: While there are multiple definitions of XChaCha20-Poly1305
// this sticks to the IETF draft and uses a 32-bit counter.
init_xchacha :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
init(ctx, key, impl)
ctx._is_xchacha = true
}
// seal encrypts the plaintext and authenticates the aad and ciphertext,
// with the provided Context and iv, stores the output in dst and tag.
//
// dst and plaintext MUST alias exactly or not at all.
seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
ciphertext := dst
_validate_common_slice_sizes(tag, iv, aad, plaintext, ctx._is_xchacha)
if len(ciphertext) != len(plaintext) {
panic("crypto/chacha20poly1305: invalid destination ciphertext size")
}
stream_ctx: chacha20.Context = ---
chacha20.init(&stream_ctx, key, nonce)
chacha20.init(&stream_ctx, ctx._key[:],iv, ctx._impl)
stream_ctx._state._is_ietf_flavor = true
// otk = poly1305_key_gen(key, nonce)
// otk = poly1305_key_gen(key, iv)
otk: [poly1305.KEY_SIZE]byte = ---
chacha20.keystream_bytes(&stream_ctx, otk[:])
mac_ctx: poly1305.Context = ---
@@ -87,7 +123,7 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
poly1305.update(&mac_ctx, aad)
_update_mac_pad16(&mac_ctx, aad_len)
// ciphertext = chacha20_encrypt(key, 1, nonce, plaintext)
// ciphertext = chacha20_encrypt(key, 1, iv, plaintext)
chacha20.seek(&stream_ctx, 1)
chacha20.xor_bytes(&stream_ctx, ciphertext, plaintext)
chacha20.reset(&stream_ctx) // Don't need the stream context anymore.
@@ -107,13 +143,16 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
}
// decrypt authenticates the aad and ciphertext, and decrypts the ciphertext,
// with the provided key, nonce, and tag, and stores the output in plaintext,
// returning true iff the authentication was successful.
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
// with the provided Context, iv, and tag, and stores the output in dst,
// returning true iff the authentication was successful. If authentication
// fails, the destination buffer will be zeroed.
//
// If authentication fails, the destination plaintext buffer will be zeroed.
decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
// dst and plaintext MUST alias exactly or not at all.
@(require_results)
open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
plaintext := dst
_validate_common_slice_sizes(tag, iv, aad, ciphertext, ctx._is_xchacha)
if len(ciphertext) != len(plaintext) {
panic("crypto/chacha20poly1305: invalid destination plaintext size")
}
@@ -123,9 +162,10 @@ decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
// points where needed.
stream_ctx: chacha20.Context = ---
chacha20.init(&stream_ctx, key, nonce)
chacha20.init(&stream_ctx, ctx._key[:], iv, ctx._impl)
stream_ctx._state._is_ietf_flavor = true
// otk = poly1305_key_gen(key, nonce)
// otk = poly1305_key_gen(key, iv)
otk: [poly1305.KEY_SIZE]byte = ---
chacha20.keystream_bytes(&stream_ctx, otk[:])
defer chacha20.reset(&stream_ctx)
@@ -160,9 +200,17 @@ decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
return false
}
// plaintext = chacha20_decrypt(key, 1, nonce, ciphertext)
// plaintext = chacha20_decrypt(key, 1, iv, ciphertext)
chacha20.seek(&stream_ctx, 1)
chacha20.xor_bytes(&stream_ctx, plaintext, ciphertext)
return true
}
// reset sanitizes the Context. The Context must be
// re-initialized to be used again.
reset :: proc "contextless" (ctx: ^Context) {
mem.zero_explicit(&ctx._key, len(ctx._key))
ctx._is_xchacha = false
ctx._is_initialized = false
}
+7 -7
View File
@@ -2,9 +2,9 @@
package ed25519 implements the Ed25519 EdDSA signature algorithm.
See:
- https://datatracker.ietf.org/doc/html/rfc8032
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf
- https://eprint.iacr.org/2020/1244.pdf
- [[ https://datatracker.ietf.org/doc/html/rfc8032 ]]
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf ]]
- [[ https://eprint.iacr.org/2020/1244.pdf ]]
*/
package ed25519
@@ -21,7 +21,7 @@ PUBLIC_KEY_SIZE :: 32
SIGNATURE_SIZE :: 64
@(private)
NONCE_SIZE :: 32
HDIGEST2_SIZE :: 32
// Private_Key is an Ed25519 private key.
Private_Key :: struct {
@@ -33,7 +33,7 @@ Private_Key :: struct {
// See: https://github.com/MystenLabs/ed25519-unsafe-libs
_b: [PRIVATE_KEY_SIZE]byte,
_s: grp.Scalar,
_nonce: [NONCE_SIZE]byte,
_hdigest2: [HDIGEST2_SIZE]byte,
_pub_key: Public_Key,
_is_initialized: bool,
}
@@ -63,7 +63,7 @@ private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
sha2.final(&ctx, h_bytes[:])
copy(priv_key._b[:], b)
copy(priv_key._nonce[:], h_bytes[32:])
copy(priv_key._hdigest2[:], h_bytes[32:])
grp.sc_set_bytes_rfc8032(&priv_key._s, h_bytes[:32])
// Derive the corresponding public key.
@@ -116,7 +116,7 @@ sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
ctx: sha2.Context_512 = ---
digest_bytes: [sha2.DIGEST_SIZE_512]byte = ---
sha2.init_512(&ctx)
sha2.update(&ctx, priv_key._nonce[:])
sha2.update(&ctx, priv_key._hdigest2[:])
sha2.update(&ctx, msg)
sha2.final(&ctx, digest_bytes[:])
+28 -30
View File
@@ -17,46 +17,44 @@ accomplish common tasks.
A third optional boolean parameter controls if the file is streamed
(default), or or read at once.
```odin
package hash_example
Example:
package hash_example
import "core:crypto/hash"
import "core:crypto/hash"
main :: proc() {
input := "Feed the fire."
main :: proc() {
input := "Feed the fire."
// Compute the digest, using the high level API.
returned_digest := hash.hash(hash.Algorithm.SHA512_256, input)
defer delete(returned_digest)
// Compute the digest, using the high level API.
returned_digest := hash.hash(hash.Algorithm.SHA512_256, input)
defer delete(returned_digest)
// Variant that takes a destination buffer, instead of returning
// the digest.
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.BLAKE2B]) // @note: Destination buffer has to be at least as big as the digest size of the hash.
defer delete(digest)
hash.hash(hash.Algorithm.BLAKE2B, input, digest)
}
```
// Variant that takes a destination buffer, instead of returning
// the digest.
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.BLAKE2B]) // @note: Destination buffer has to be at least as big as the digest size of the hash.
defer delete(digest)
hash.hash(hash.Algorithm.BLAKE2B, input, digest)
}
A generic low level API is provided supporting the init/update/final interface
that is typical with cryptographic hash function implementations.
```odin
package hash_example
Example:
package hash_example
import "core:crypto/hash"
import "core:crypto/hash"
main :: proc() {
input := "Let the cinders burn."
main :: proc() {
input := "Let the cinders burn."
// Compute the digest, using the low level API.
ctx: hash.Context
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.SHA3_512])
defer delete(digest)
// Compute the digest, using the low level API.
ctx: hash.Context
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.SHA3_512])
defer delete(digest)
hash.init(&ctx, hash.Algorithm.SHA3_512)
hash.update(&ctx, transmute([]byte)input)
hash.final(&ctx, digest)
}
```
hash.init(&ctx, hash.Algorithm.SHA3_512)
hash.update(&ctx, transmute([]byte)input)
hash.final(&ctx, digest)
}
*/
package crypto_hash
package crypto_hash
+11 -5
View File
@@ -28,20 +28,26 @@ hash_bytes :: proc(algorithm: Algorithm, data: []byte, allocator := context.allo
// hash_string_to_buffer will hash the given input and assign the
// computed digest to the third parameter. It requires that the
// destination buffer is at least as big as the digest size.
hash_string_to_buffer :: proc(algorithm: Algorithm, data: string, hash: []byte) {
hash_bytes_to_buffer(algorithm, transmute([]byte)(data), hash)
// destination buffer is at least as big as the digest size. The
// provided destination buffer is returned to match the behavior of
// `hash_string`.
hash_string_to_buffer :: proc(algorithm: Algorithm, data: string, hash: []byte) -> []byte {
return hash_bytes_to_buffer(algorithm, transmute([]byte)(data), hash)
}
// hash_bytes_to_buffer will hash the given input and write the
// computed digest into the third parameter. It requires that the
// destination buffer is at least as big as the digest size.
hash_bytes_to_buffer :: proc(algorithm: Algorithm, data, hash: []byte) {
// destination buffer is at least as big as the digest size. The
// provided destination buffer is returned to match the behavior of
// `hash_bytes`.
hash_bytes_to_buffer :: proc(algorithm: Algorithm, data, hash: []byte) -> []byte {
ctx: Context
init(&ctx, algorithm)
update(&ctx, data)
final(&ctx, hash)
return hash
}
// hash_stream will incrementally fully consume a stream, and return the
+1 -1
View File
@@ -2,7 +2,7 @@
package hkdf implements the HKDF HMAC-based Extract-and-Expand Key
Derivation Function.
See: https://www.rfc-editor.org/rfc/rfc5869
See: [[ https://www.rfc-editor.org/rfc/rfc5869 ]]
*/
package hkdf
+1 -1
View File
@@ -2,7 +2,7 @@
package hmac implements the HMAC MAC algorithm.
See:
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.198-1.pdf
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.198-1.pdf ]]
*/
package hmac
+1 -1
View File
@@ -2,7 +2,7 @@
package kmac implements the KMAC MAC algorithm.
See:
- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
- [[ https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf ]]
*/
package kmac
+2 -2
View File
@@ -5,8 +5,8 @@ WARNING: The MD5 algorithm is known to be insecure and should only be
used for interoperating with legacy applications.
See:
- https://eprint.iacr.org/2005/075
- https://datatracker.ietf.org/doc/html/rfc1321
- [[ https://eprint.iacr.org/2005/075 ]]
- [[ https://datatracker.ietf.org/doc/html/rfc1321 ]]
*/
package md5
+3 -3
View File
@@ -5,9 +5,9 @@ WARNING: The SHA1 algorithm is known to be insecure and should only be
used for interoperating with legacy applications.
See:
- https://eprint.iacr.org/2017/190
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
- https://datatracker.ietf.org/doc/html/rfc3174
- [[ https://eprint.iacr.org/2017/190 ]]
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf ]]
- [[ https://datatracker.ietf.org/doc/html/rfc3174 ]]
*/
package sha1
+1 -1
View File
@@ -1,7 +1,7 @@
/*
package pbkdf2 implements the PBKDF2 password-based key derivation function.
See: https://www.rfc-editor.org/rfc/rfc2898
See: [[ https://www.rfc-editor.org/rfc/rfc2898 ]]
*/
package pbkdf2
+1 -1
View File
@@ -2,7 +2,7 @@
package poly1305 implements the Poly1305 one-time MAC algorithm.
See:
- https://datatracker.ietf.org/doc/html/rfc8439
- [[ https://datatracker.ietf.org/doc/html/rfc8439 ]]
*/
package poly1305
+1 -1
View File
@@ -2,7 +2,7 @@
package ristretto255 implement the ristretto255 prime-order group.
See:
- https://www.rfc-editor.org/rfc/rfc9496
- [[ https://www.rfc-editor.org/rfc/rfc9496 ]]
*/
package ristretto255
+2 -2
View File
@@ -2,8 +2,8 @@
package sha2 implements the SHA2 hash algorithm family.
See:
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
- https://datatracker.ietf.org/doc/html/rfc3874
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf ]]
- [[ https://datatracker.ietf.org/doc/html/rfc3874 ]]
*/
package sha2
+1 -1
View File
@@ -6,7 +6,7 @@ pre-standardization Keccak algorithm is required, it can be found in
crypto/legacy/keccak.
See:
- https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf
- [[ https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf ]]
*/
package sha3
+2 -2
View File
@@ -4,8 +4,8 @@ package shake implements the SHAKE and cSHAKE XOF algorithm families.
The SHA3 hash algorithm can be found in the crypto/sha3.
See:
- https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf
- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
- [[ https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf ]]
- [[ https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf ]]
*/
package shake
+9 -4
View File
@@ -1,3 +1,12 @@
/*
package siphash Implements the SipHash hashing algorithm.
Use the specific procedures for a certain setup. The generic procedures will default to Siphash 2-4.
See:
- [[ https://github.com/veorq/SipHash ]]
- [[ https://www.aumasson.jp/siphash/siphash.pdf ]]
*/
package siphash
/*
@@ -6,10 +15,6 @@ package siphash
List of contributors:
zhibog: Initial implementation.
Implementation of the SipHash hashing algorithm, as defined at <https://github.com/veorq/SipHash> and <https://www.aumasson.jp/siphash/siphash.pdf>
Use the specific procedures for a certain setup. The generic procdedures will default to Siphash 2-4
*/
import "core:crypto"
+1 -1
View File
@@ -2,7 +2,7 @@
package sm3 implements the SM3 hash algorithm.
See:
- https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
- [[ https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 ]]
*/
package sm3
+1 -1
View File
@@ -2,7 +2,7 @@
package tuplehash implements the TupleHash and TupleHashXOF algorithms.
See:
- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
- [[ https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf ]]
*/
package tuplehash
+1 -1
View File
@@ -3,7 +3,7 @@ package x25519 implements the X25519 (aka curve25519) Elliptic-Curve
Diffie-Hellman key exchange protocol.
See:
- https://www.rfc-editor.org/rfc/rfc7748
- [[ https://www.rfc-editor.org/rfc/rfc7748 ]]
*/
package x25519
+3 -1
View File
@@ -1,4 +1,6 @@
//+build !windows !linux !darwin
//+build !windows
//+build !linux
//+build !darwin
package debug_trace
import "base:runtime"
+1 -2
View File
@@ -4,7 +4,6 @@ Package `core:dynlib` implements loading of shared libraries/DLLs and their symb
The behaviour of dynamically loaded libraries is specific to the target platform of the program.
For in depth detail on the underlying behaviour please refer to your target platform's documentation.
See `example` directory for an example library exporting 3 symbols and a host program loading them automatically
by defining a symbol table struct.
For a full example, see: [[ core/dynlib/example; https://github.com/odin-lang/Odin/tree/master/core/dynlib/example ]]
*/
package dynlib
+3 -3
View File
@@ -13,8 +13,8 @@ If your terminal supports 24-bit true color mode, you can also do this:
fmt.println(ansi.CSI + ansi.FG_COLOR_24_BIT + ";0;255;255" + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
For more information, see:
1. https://en.wikipedia.org/wiki/ANSI_escape_code
2. https://www.vt100.net/docs/vt102-ug/chapter5.html
3. https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
- [[ https://en.wikipedia.org/wiki/ANSI_escape_code ]]
- [[ https://www.vt100.net/docs/vt102-ug/chapter5.html ]]
- [[ https://invisible-island.net/xterm/ctlseqs/ctlseqs.html ]]
*/
package ansi
+3 -2
View File
@@ -3,6 +3,7 @@ package encoding_cbor
import "base:intrinsics"
import "core:encoding/json"
import "core:encoding/hex"
import "core:io"
import "core:mem"
import "core:strconv"
@@ -399,11 +400,11 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i
io.write_string(w, str) or_return
case bool: io.write_string(w, "true" if v else "false") or_return
case Nil: io.write_string(w, "nil") or_return
case Nil: io.write_string(w, "null") or_return
case Undefined: io.write_string(w, "undefined") or_return
case ^Bytes:
io.write_string(w, "h'") or_return
for b in v { io.write_int(w, int(b), 16) or_return }
hex.encode_into_writer(w, v^) or_return
io.write_string(w, "'") or_return
case ^Text:
io.write_string(w, `"`) or_return
+10 -8
View File
@@ -481,9 +481,7 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
}
}
marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error {
err_conv(_encode_text(e, name)) or_return
marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, i: int) -> Marshal_Error {
id := info.types[i].id
data := rawptr(uintptr(v.data) + info.offsets[i])
field_any := any{data, id}
@@ -517,7 +515,7 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
if .Deterministic_Map_Sorting in e.flags {
Name :: struct {
name: string,
name: []byte,
field: int,
}
entries := make([dynamic]Name, 0, n, e.temp_allocator) or_return
@@ -529,16 +527,19 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
continue
}
append(&entries, Name{fname, i}) or_return
key_builder := strings.builder_make(e.temp_allocator) or_return
err_conv(_encode_text(Encoder{e.flags, strings.to_stream(&key_builder), e.temp_allocator}, fname)) or_return
append(&entries, Name{key_builder.buf[:], i}) or_return
}
// Sort lexicographic on the bytes of the key.
slice.sort_by_cmp(entries[:], proc(a, b: Name) -> slice.Ordering {
return slice.Ordering(bytes.compare(transmute([]byte)a.name, transmute([]byte)b.name))
return slice.Ordering(bytes.compare(a.name, b.name))
})
for entry in entries {
marshal_entry(e, info, v, entry.name, entry.field) or_return
io.write_full(e.writer, entry.name) or_return
marshal_entry(e, info, v, entry.field) or_return
}
} else {
for _, i in info.names[:info.field_count] {
@@ -547,7 +548,8 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
continue
}
marshal_entry(e, info, v, fname, i) or_return
err_conv(_encode_text(e, fname)) or_return
marshal_entry(e, info, v, i) or_return
}
}
return
+96
View File
@@ -0,0 +1,96 @@
/*
package csv reads and writes comma-separated values (CSV) files.
This package supports the format described in [[ RFC 4180; https://tools.ietf.org/html/rfc4180.html ]]
Example:
package main
import "core:fmt"
import "core:encoding/csv"
import "core:os"
// Requires keeping the entire CSV file in memory at once
iterate_csv_from_string :: proc(filename: string) {
r: csv.Reader
r.trim_leading_space = true
r.reuse_record = true // Without it you have to delete(record)
r.reuse_record_buffer = true // Without it you have to each of the fields within it
defer csv.reader_destroy(&r)
csv_data, ok := os.read_entire_file(filename)
if ok {
csv.reader_init_with_string(&r, string(csv_data))
} else {
fmt.printfln("Unable to open file: %v", filename)
return
}
defer delete(csv_data)
for r, i, err in csv.iterator_next(&r) {
if err != nil { /* Do something with error */ }
for f, j in r {
fmt.printfln("Record %v, field %v: %q", i, j, f)
}
}
}
// Reads the CSV as it's processed (with a small buffer)
iterate_csv_from_stream :: proc(filename: string) {
fmt.printfln("Hellope from %v", filename)
r: csv.Reader
r.trim_leading_space = true
r.reuse_record = true // Without it you have to delete(record)
r.reuse_record_buffer = true // Without it you have to each of the fields within it
defer csv.reader_destroy(&r)
handle, err := os.open(filename)
if err != nil {
fmt.eprintfln("Error opening file: %v", filename)
return
}
defer os.close(handle)
csv.reader_init(&r, os.stream_from_handle(handle))
for r, i in csv.iterator_next(&r) {
for f, j in r {
fmt.printfln("Record %v, field %v: %q", i, j, f)
}
}
fmt.printfln("Error: %v", csv.iterator_last_error(r))
}
// Read all records at once
read_csv_from_string :: proc(filename: string) {
r: csv.Reader
r.trim_leading_space = true
r.reuse_record = true // Without it you have to delete(record)
r.reuse_record_buffer = true // Without it you have to each of the fields within it
defer csv.reader_destroy(&r)
csv_data, ok := os.read_entire_file(filename)
if ok {
csv.reader_init_with_string(&r, string(csv_data))
} else {
fmt.printfln("Unable to open file: %v", filename)
return
}
defer delete(csv_data)
records, err := csv.read_all(&r)
if err != nil { /* Do something with CSV parse error */ }
defer {
for rec in records {
delete(rec)
}
delete(records)
}
for r, i in records {
for f, j in r {
fmt.printfln("Record %v, field %v: %q", i, j, f)
}
}
}
*/
package encoding_csv
-88
View File
@@ -1,88 +0,0 @@
//+build ignore
package encoding_csv
import "core:fmt"
import "core:encoding/csv"
import "core:os"
// Requires keeping the entire CSV file in memory at once
iterate_csv_from_string :: proc(filename: string) {
r: csv.Reader
r.trim_leading_space = true
r.reuse_record = true // Without it you have to delete(record)
r.reuse_record_buffer = true // Without it you have to each of the fields within it
defer csv.reader_destroy(&r)
if csv_data, ok := os.read_entire_file(filename); ok {
csv.reader_init_with_string(&r, string(csv_data))
defer delete(csv_data)
} else {
fmt.printfln("Unable to open file: %v", filename)
return
}
for r, i, err in csv.iterator_next(&r) {
if err != nil { /* Do something with error */ }
for f, j in r {
fmt.printfln("Record %v, field %v: %q", i, j, f)
}
}
}
// Reads the CSV as it's processed (with a small buffer)
iterate_csv_from_stream :: proc(filename: string) {
fmt.printfln("Hellope from %v", filename)
r: csv.Reader
r.trim_leading_space = true
r.reuse_record = true // Without it you have to delete(record)
r.reuse_record_buffer = true // Without it you have to each of the fields within it
defer csv.reader_destroy(&r)
handle, err := os.open(filename)
if err != nil {
fmt.eprintfln("Error opening file: %v", filename)
return
}
defer os.close(handle)
csv.reader_init(&r, os.stream_from_handle(handle))
for r, i in csv.iterator_next(&r) {
for f, j in r {
fmt.printfln("Record %v, field %v: %q", i, j, f)
}
}
fmt.printfln("Error: %v", csv.iterator_last_error(r))
}
// Read all records at once
read_csv_from_string :: proc(filename: string) {
r: csv.Reader
r.trim_leading_space = true
r.reuse_record = true // Without it you have to delete(record)
r.reuse_record_buffer = true // Without it you have to each of the fields within it
defer csv.reader_destroy(&r)
if csv_data, ok := os.read_entire_file(filename); ok {
csv.reader_init_with_string(&r, string(csv_data))
defer delete(csv_data)
} else {
fmt.printfln("Unable to open file: %v", filename)
return
}
records, err := csv.read_all(&r)
if err != nil { /* Do something with CSV parse error */ }
defer {
for rec in records {
delete(rec)
}
delete(records)
}
for r, i in records {
for f, j in r {
fmt.printfln("Record %v, field %v: %q", i, j, f)
}
}
}
+2 -2
View File
@@ -1,5 +1,5 @@
// package csv reads and writes comma-separated values (CSV) files.
// This package supports the format described in RFC 4180 <https://tools.ietf.org/html/rfc4180.html>
// This package supports the format described in [[ RFC 4180; https://tools.ietf.org/html/rfc4180.html ]]
package encoding_csv
import "core:bufio"
@@ -484,4 +484,4 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
r.fields_per_record = len(dst)
}
return dst[:], err
}
}
+13 -12
View File
@@ -2,22 +2,23 @@
Package endian implements a simple translation between bytes and numbers with
specific endian encodings.
buf: [100]u8
put_u16(buf[:], .Little, 16) or_return
Example:
buf: [100]u8
put_u16(buf[:], .Little, 16) or_return
You may ask yourself, why isn't `byte_order` platform Endianness by default, so we can write:
put_u16(buf[:], 16) or_return
// You may ask yourself, why isn't `byte_order` platform Endianness by default, so we can write:
put_u16(buf[:], 16) or_return
The answer is that very few file formats are written in native/platform endianness. Most of them specify the endianness of
each of their fields, or use a header field which specifies it for the entire file.
// The answer is that very few file formats are written in native/platform endianness. Most of them specify the endianness of
// each of their fields, or use a header field which specifies it for the entire file.
e.g. a file which specifies it at the top for all fields could do this:
file_order := .Little if buf[0] == 0 else .Big
field := get_u16(buf[1:], file_order) or_return
// e.g. a file which specifies it at the top for all fields could do this:
file_order := .Little if buf[0] == 0 else .Big
field := get_u16(buf[1:], file_order) or_return
If on the other hand a field is *always* Big-Endian, you're wise to explicitly state it for the benefit of the reader,
be that your future self or someone else.
// If on the other hand a field is *always* Big-Endian, you're wise to explicitly state it for the benefit of the reader,
// be that your future self or someone else.
field := get_u16(buf[:], .Big) or_return
field := get_u16(buf[:], .Big) or_return
*/
package encoding_endian
+16 -14
View File
@@ -1,24 +1,26 @@
package encoding_unicode_entity
/*
A unicode entity encoder/decoder
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
This code has several procedures to map unicode runes to/from different textual encodings.
- SGML/XML/HTML entity
-- &#<decimal>;
-- &#x<hexadecimal>;
-- &<entity name>; (If the lookup tables are compiled in).
Reference: https://www.w3.org/2003/entities/2007xml/unicode.xml
- URL encode / decode %hex entity
Reference: https://datatracker.ietf.org/doc/html/rfc3986/#section-2.1
List of contributors:
Jeroen van Rijn: Initial implementation.
*/
/*
A unicode entity encoder/decoder.
This code has several procedures to map unicode runes to/from different textual encodings.
- SGML/XML/HTML entity
- &#<decimal>;
- &#x<hexadecimal>;
- &<entity name>; (If the lookup tables are compiled in).
Reference: [[ https://www.w3.org/2003/entities/2007xml/unicode.xml ]]
- URL encode / decode %hex entity
Reference: [[ https://datatracker.ietf.org/doc/html/rfc3986/#section-2.1 ]]
*/
package encoding_unicode_entity
import "core:unicode/utf8"
import "core:unicode"
import "core:strings"
@@ -353,4 +355,4 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X
}
return false, .None
}
}
+1 -1
View File
@@ -42,7 +42,7 @@ XML_NAME_TO_RUNE_MAX_LENGTH :: 31
Input:
entity_name - a string, like "copy" that describes a user-encoded Unicode entity as used in XML.
Output:
Returns:
"decoded" - The decoded rune if found by name, or -1 otherwise.
"ok" - true if found, false if not.
+7
View File
@@ -1,5 +1,6 @@
package encoding_hex
import "core:io"
import "core:strings"
encode :: proc(src: []byte, allocator := context.allocator, loc := #caller_location) -> []byte #no_bounds_check {
@@ -14,6 +15,12 @@ encode :: proc(src: []byte, allocator := context.allocator, loc := #caller_locat
return dst
}
encode_into_writer :: proc(dst: io.Writer, src: []byte) -> io.Error {
for v in src {
io.write(dst, {HEXTABLE[v>>4], HEXTABLE[v&0x0f]}) or_return
}
return nil
}
decode :: proc(src: []byte, allocator := context.allocator, loc := #caller_location) -> (dst: []byte, ok: bool) #no_bounds_check {
if len(src) % 2 == 1 {
+89 -83
View File
@@ -1,83 +1,89 @@
// Implementation of the HxA 3D asset format
// HxA is a interchangeable graphics asset format.
// Designed by Eskil Steenberg. @quelsolaar / eskil 'at' obsession 'dot' se / www.quelsolaar.com
//
// Author of this Odin package: Ginger Bill
//
// Following comment is copied from the original C-implementation
// ---------
// -Does the world need another Graphics file format?
// Unfortunately, Yes. All existing formats are either too large and complicated to be implemented from
// scratch, or don't have some basic features needed in modern computer graphics.
// -Who is this format for?
// For people who want a capable open Graphics format that can be implemented from scratch in
// a few hours. It is ideal for graphics researchers, game developers or other people who
// wants to build custom graphics pipelines. Given how easy it is to parse and write, it
// should be easy to write utilities that process assets to preform tasks like: generating
// normals, light-maps, tangent spaces, Error detection, GPU optimization, LOD generation,
// and UV mapping.
// -Why store images in the format when there are so many good image formats already?
// Yes there are, but only for 2D RGB/RGBA images. A lot of computer graphics rendering rely
// on 1D, 3D, cube, multilayer, multi channel, floating point bitmap buffers. There almost no
// formats for this kind of data. Also 3D files that reference separate image files rely on
// file paths, and this often creates issues when the assets are moved. By including the
// texture data in the files directly the assets become self contained.
// -Why doesn't the format support <insert whatever>?
// Because the entire point is to make a format that can be implemented. Features like NURBSs,
// Construction history, or BSP trees would make the format too large to serve its purpose.
// The facilities of the formats to store meta data should make the format flexible enough
// for most uses. Adding HxA support should be something anyone can do in a days work.
//
// Structure:
// ----------
// HxA is designed to be extremely simple to parse, and is therefore based around conventions. It has
// a few basic structures, and depending on how they are used they mean different things. This means
// that you can implement a tool that loads the entire file, modifies the parts it cares about and
// leaves the rest intact. It is also possible to write a tool that makes all data in the file
// editable without the need to understand its use. It is also possible for anyone to use the format
// to store data axillary data. Anyone who wants to store data not covered by a convention can submit
// a convention to extend the format. There should never be a convention for storing the same data in
// two differed ways.
// The data is story in a number of nodes that are stored in an array. Each node stores an array of
// meta data. Meta data can describe anything you want, and a lot of conventions will use meta data
// to store additional information, for things like transforms, lights, shaders and animation.
// Data for Vertices, Corners, Faces, and Pixels are stored in named layer stacks. Each stack consists
// of a number of named layers. All layers in the stack have the same number of elements. Each layer
// describes one property of the primitive. Each layer can have multiple channels and each layer can
// store data of a different type.
//
// HaX stores 3 kinds of nodes
// - Pixel data.
// - Polygon geometry data.
// - Meta data only.
//
// Pixel Nodes stores pixels in a layer stack. A layer may store things like Albedo, Roughness,
// Reflectance, Light maps, Masks, Normal maps, and Displacement. Layers use the channels of the
// layers to store things like color. The length of the layer stack is determined by the type and
// dimensions stored in the
//
// Geometry data is stored in 3 separate layer stacks for: vertex data, corner data and face data. The
// vertex data stores things like verities, blend shapes, weight maps, and vertex colors. The first
// layer in a vertex stack has to be a 3 channel layer named "position" describing the base position
// of the vertices. The corner stack describes data per corner or edge of the polygons. It can be used
// for things like UV, normals, and adjacency. The first layer in a corner stack has to be a 1 channel
// integer layer named "index" describing the vertices used to form polygons. The last value in each
// polygon has a negative - 1 index to indicate the end of the polygon.
//
// Example:
// A quad and a tri with the vertex index:
// [0, 1, 2, 3] [1, 4, 2]
// is stored:
// [0, 1, 2, -4, 1, 4, -3]
// The face stack stores values per face. the length of the face stack has to match the number of
// negative values in the index layer in the corner stack. The face stack can be used to store things
// like material index.
//
// Storage
// -------
// All data is stored in little endian byte order with no padding. The layout mirrors the structs
// defined below with a few exceptions. All names are stored as a 8-bit unsigned integer indicating
// the length of the name followed by that many characters. Termination is not stored in the file.
// Text strings stored in meta data are stored the same way as names, but instead of a 8-bit unsigned
// integer a 32-bit unsigned integer is used.
package encoding_hxa
/*
Implementation of the HxA 3D asset format
HxA is a interchangeable graphics asset format.
Designed by Eskil Steenberg. @quelsolaar / eskil 'at' obsession 'dot' se / www.quelsolaar.com
Author of this Odin package: Ginger Bill
Following comment is copied from the original C-implementation
---------
- Does the world need another Graphics file format?
Unfortunately, Yes. All existing formats are either too large and complicated to be implemented from
scratch, or don't have some basic features needed in modern computer graphics.
- Who is this format for?
For people who want a capable open Graphics format that can be implemented from scratch in
a few hours. It is ideal for graphics researchers, game developers or other people who
wants to build custom graphics pipelines. Given how easy it is to parse and write, it
should be easy to write utilities that process assets to preform tasks like: generating
normals, light-maps, tangent spaces, Error detection, GPU optimization, LOD generation,
and UV mapping.
- Why store images in the format when there are so many good image formats already?
Yes there are, but only for 2D RGB/RGBA images. A lot of computer graphics rendering rely
on 1D, 3D, cube, multilayer, multi channel, floating point bitmap buffers. There almost no
formats for this kind of data. Also 3D files that reference separate image files rely on
file paths, and this often creates issues when the assets are moved. By including the
texture data in the files directly the assets become self contained.
- Why doesn't the format support <insert whatever>?
Because the entire point is to make a format that can be implemented. Features like NURBSs,
Construction history, or BSP trees would make the format too large to serve its purpose.
The facilities of the formats to store meta data should make the format flexible enough
for most uses. Adding HxA support should be something anyone can do in a days work.
Structure:
----------
HxA is designed to be extremely simple to parse, and is therefore based around conventions. It has
a few basic structures, and depending on how they are used they mean different things. This means
that you can implement a tool that loads the entire file, modifies the parts it cares about and
leaves the rest intact. It is also possible to write a tool that makes all data in the file
editable without the need to understand its use. It is also possible for anyone to use the format
to store data axillary data. Anyone who wants to store data not covered by a convention can submit
a convention to extend the format. There should never be a convention for storing the same data in
two differed ways.
The data is story in a number of nodes that are stored in an array. Each node stores an array of
meta data. Meta data can describe anything you want, and a lot of conventions will use meta data
to store additional information, for things like transforms, lights, shaders and animation.
Data for Vertices, Corners, Faces, and Pixels are stored in named layer stacks. Each stack consists
of a number of named layers. All layers in the stack have the same number of elements. Each layer
describes one property of the primitive. Each layer can have multiple channels and each layer can
store data of a different type.
HaX stores 3 kinds of nodes
- Pixel data.
- Polygon geometry data.
- Meta data only.
Pixel Nodes stores pixels in a layer stack. A layer may store things like Albedo, Roughness,
Reflectance, Light maps, Masks, Normal maps, and Displacement. Layers use the channels of the
layers to store things like color.
The length of the layer stack is determined by the type and dimensions stored in the Geometry data
is stored in 3 separate layer stacks for: vertex data, corner data and face data. The
vertex data stores things like verities, blend shapes, weight maps, and vertex colors. The first
layer in a vertex stack has to be a 3 channel layer named "position" describing the base position
of the vertices. The corner stack describes data per corner or edge of the polygons. It can be used
for things like UV, normals, and adjacency. The first layer in a corner stack has to be a 1 channel
integer layer named "index" describing the vertices used to form polygons. The last value in each
polygon has a negative - 1 index to indicate the end of the polygon.
For Example:
A quad and a tri with the vertex index:
[0, 1, 2, 3] [1, 4, 2]
is stored:
[0, 1, 2, -4, 1, 4, -3]
The face stack stores values per face. the length of the face stack has to match the number of
negative values in the index layer in the corner stack. The face stack can be used to store things
like material index.
Storage:
-------
All data is stored in little endian byte order with no padding. The layout mirrors the structs
defined below with a few exceptions. All names are stored as a 8-bit unsigned integer indicating
the length of the name followed by that many characters. Termination is not stored in the file.
Text strings stored in meta data are stored the same way as names, but instead of a 8-bit unsigned
integer a 32-bit unsigned integer is used.
*/
package encoding_hxa
+24 -1
View File
@@ -116,7 +116,30 @@ assign_int :: proc(val: any, i: $T) -> bool {
case int: dst = int (i)
case uint: dst = uint (i)
case uintptr: dst = uintptr(i)
case: return false
case:
ti := type_info_of(v.id)
if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
do_byte_swap := !reflect.bit_set_is_big_endian(v)
switch ti.size * 8 {
case 0: // no-op.
case 8:
x := (^u8)(v.data)
x^ = u8(i)
case 16:
x := (^u16)(v.data)
x^ = do_byte_swap ? intrinsics.byte_swap(u16(i)) : u16(i)
case 32:
x := (^u32)(v.data)
x^ = do_byte_swap ? intrinsics.byte_swap(u32(i)) : u32(i)
case 64:
x := (^u64)(v.data)
x^ = do_byte_swap ? intrinsics.byte_swap(u64(i)) : u64(i)
case:
panic("unknown bit_size size")
}
return true
}
return false
}
return true
}
+5 -4
View File
@@ -21,8 +21,9 @@ cryptographically-secure, per RFC 9562's suggestion.
- Version 6 without either a clock or node argument.
- Version 7 in all cases.
Here's an example of how to set up one:
Example:
package main
import "core:crypto"
import "core:encoding/uuid"
@@ -40,7 +41,7 @@ Here's an example of how to set up one:
For more information on the specifications, see here:
- https://www.rfc-editor.org/rfc/rfc4122.html
- https://www.rfc-editor.org/rfc/rfc9562.html
- [[ https://www.rfc-editor.org/rfc/rfc4122.html ]]
- [[ https://www.rfc-editor.org/rfc/rfc9562.html ]]
*/
package uuid
+20 -15
View File
@@ -11,7 +11,7 @@ Write a UUID in the 8-4-4-4-12 format.
This procedure performs error checking with every byte written.
If you can guarantee beforehand that your stream has enough space to hold the
UUID (32 bytes), then it is better to use `unsafe_write` instead as that will
UUID (36 bytes), then it is better to use `unsafe_write` instead as that will
be faster.
Inputs:
@@ -22,7 +22,7 @@ Returns:
- error: An `io` error, if one occurred, otherwise `nil`.
*/
write :: proc(w: io.Writer, id: Identifier) -> (error: io.Error) #no_bounds_check {
write_octet :: proc (w: io.Writer, octet: u8) -> io.Error #no_bounds_check {
write_octet :: proc(w: io.Writer, octet: u8) -> io.Error #no_bounds_check {
high_nibble := octet >> 4
low_nibble := octet & 0xF
@@ -31,15 +31,15 @@ write :: proc(w: io.Writer, id: Identifier) -> (error: io.Error) #no_bounds_chec
return nil
}
for index in 0 ..< 4 { write_octet(w, id[index]) or_return }
for index in 0 ..< 4 {write_octet(w, id[index]) or_return}
io.write_byte(w, '-') or_return
for index in 4 ..< 6 { write_octet(w, id[index]) or_return }
for index in 4 ..< 6 {write_octet(w, id[index]) or_return}
io.write_byte(w, '-') or_return
for index in 6 ..< 8 { write_octet(w, id[index]) or_return }
for index in 6 ..< 8 {write_octet(w, id[index]) or_return}
io.write_byte(w, '-') or_return
for index in 8 ..< 10 { write_octet(w, id[index]) or_return }
for index in 8 ..< 10 {write_octet(w, id[index]) or_return}
io.write_byte(w, '-') or_return
for index in 10 ..< 16 { write_octet(w, id[index]) or_return }
for index in 10 ..< 16 {write_octet(w, id[index]) or_return}
return nil
}
@@ -54,7 +54,7 @@ Inputs:
- id: The identifier to convert.
*/
unsafe_write :: proc(w: io.Writer, id: Identifier) #no_bounds_check {
write_octet :: proc (w: io.Writer, octet: u8) #no_bounds_check {
write_octet :: proc(w: io.Writer, octet: u8) #no_bounds_check {
high_nibble := octet >> 4
low_nibble := octet & 0xF
@@ -62,15 +62,15 @@ unsafe_write :: proc(w: io.Writer, id: Identifier) #no_bounds_check {
io.write_byte(w, strconv.digits[low_nibble])
}
for index in 0 ..< 4 { write_octet(w, id[index]) }
for index in 0 ..< 4 {write_octet(w, id[index])}
io.write_byte(w, '-')
for index in 4 ..< 6 { write_octet(w, id[index]) }
for index in 4 ..< 6 {write_octet(w, id[index])}
io.write_byte(w, '-')
for index in 6 ..< 8 { write_octet(w, id[index]) }
for index in 6 ..< 8 {write_octet(w, id[index])}
io.write_byte(w, '-')
for index in 8 ..< 10 { write_octet(w, id[index]) }
for index in 8 ..< 10 {write_octet(w, id[index])}
io.write_byte(w, '-')
for index in 10 ..< 16 { write_octet(w, id[index]) }
for index in 10 ..< 16 {write_octet(w, id[index])}
}
/*
@@ -106,7 +106,7 @@ Convert a UUID to a string in the 8-4-4-4-12 format.
Inputs:
- id: The identifier to convert.
- buffer: A byte buffer to store the result. Must be at least 32 bytes large.
- buffer: A byte buffer to store the result. Must be at least 36 bytes large.
- loc: The caller location for debugging purposes (default: #caller_location)
Returns:
@@ -119,7 +119,11 @@ to_string_buffer :: proc(
) -> (
str: string,
) {
assert(len(buffer) >= EXPECTED_LENGTH, "The buffer provided is not at least 32 bytes large.", loc)
assert(
len(buffer) >= EXPECTED_LENGTH,
"The buffer provided is not at least 36 bytes large.",
loc,
)
builder := strings.builder_from_bytes(buffer)
unsafe_write(strings.to_writer(&builder), id)
return strings.to_string(builder)
@@ -129,3 +133,4 @@ to_string :: proc {
to_string_allocated,
to_string_buffer,
}
+6 -7
View File
@@ -1,10 +1,11 @@
/*
Implementation of the LEB128 variable integer encoding as used by DWARF encoding and DEX files, among others.
Implementation of the LEB128 variable integer encoding as used by DWARF encoding and DEX files, among others.
Author of this Odin package: Jeroen van Rijn
Author of this Odin package: Jeroen van Rijn
Example:
package main
Example:
```odin
import "core:encoding/varint"
import "core:fmt"
@@ -22,7 +23,5 @@
assert(decoded_val == value && decode_size == encode_size && decode_err == .None)
fmt.printf("Decoded as %v, using %v byte%v\n", decoded_val, decode_size, "" if decode_size == 1 else "s")
}
```
*/
package encoding_varint
package encoding_varint
+1 -3
View File
@@ -6,8 +6,6 @@
Jeroen van Rijn: Initial implementation.
*/
// package varint implements variable length integer encoding and decoding using
// the LEB128 format as used by DWARF debug info, Android .dex and other file formats.
package encoding_varint
// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
@@ -160,4 +158,4 @@ encode_ileb128 :: proc(buf: []u8, val: i128) -> (size: int, err: Error) {
buf[size - 1] = u8(low)
}
return
}
}

Some files were not shown because too many files have changed in this diff Show More