mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-13 01:21:38 -07:00
Merge remote-tracking branch 'offical/master'
This commit is contained in:
+67
-26
@@ -18,7 +18,7 @@ jobs:
|
||||
usesh: true
|
||||
copyback: false
|
||||
prepare: |
|
||||
PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/10.0_2024Q2/All" /usr/sbin/pkg_add pkgin
|
||||
PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/$(uname -r | cut -d_ -f1)_${PKGSRC_BRANCH}/All" /usr/sbin/pkg_add pkgin
|
||||
pkgin -y in gmake git bash python311 llvm clang
|
||||
ln -s /usr/pkg/bin/python3.11 /usr/bin/python3
|
||||
run: |
|
||||
@@ -32,10 +32,9 @@ jobs:
|
||||
gmake -C vendor/miniaudio/src
|
||||
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_amd64
|
||||
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
|
||||
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
(cd tests/issues; ./run.sh)
|
||||
build_freebsd:
|
||||
name: FreeBSD Build, Check, and Test
|
||||
@@ -61,10 +60,9 @@ jobs:
|
||||
gmake -C vendor/cgltf/src
|
||||
gmake -C vendor/miniaudio/src
|
||||
./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
|
||||
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
(cd tests/issues; ./run.sh)
|
||||
ci:
|
||||
strategy:
|
||||
@@ -118,15 +116,13 @@ jobs:
|
||||
- name: Odin check examples/all
|
||||
run: ./odin check examples/all -strict-style
|
||||
- name: Normal Core library tests
|
||||
run: ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Optimized Core library tests
|
||||
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Vendor library tests
|
||||
run: ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Internals tests
|
||||
run: ./odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
|
||||
- name: Core library benchmarks
|
||||
run: ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: GitHub Issue tests
|
||||
run: |
|
||||
cd tests/issues
|
||||
@@ -180,38 +176,33 @@ jobs:
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin run examples/demo -debug
|
||||
odin run examples/demo -debug -vet -strict-style -disallow-do
|
||||
- name: Odin check examples/all
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin check examples/all -strict-style
|
||||
odin check examples/all -vet -strict-style -disallow-do
|
||||
- name: Core library tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Optimized core library tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
- name: Core library benchmarks
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Vendor library tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
copy vendor\lua\5.4\windows\*.dll .
|
||||
odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Odin internals tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
|
||||
odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Odin documentation tests
|
||||
shell: cmd
|
||||
run: |
|
||||
@@ -229,3 +220,53 @@ jobs:
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin check examples/all -strict-style -target:windows_i386
|
||||
|
||||
build_linux_riscv64:
|
||||
runs-on: ubuntu-latest
|
||||
name: Linux riscv64 (emulated) Build, Check and Test
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download LLVM (Linux)
|
||||
run: |
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 18
|
||||
echo "/usr/lib/llvm-18/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Build Odin
|
||||
run: ./build_odin.sh release
|
||||
|
||||
- name: Odin version
|
||||
run: ./odin version
|
||||
|
||||
- name: Odin report
|
||||
run: ./odin report
|
||||
|
||||
- name: Compile needed Vendor
|
||||
run: |
|
||||
make -C vendor/stb/src
|
||||
make -C vendor/cgltf/src
|
||||
make -C vendor/miniaudio/src
|
||||
|
||||
- name: Odin check
|
||||
run: ./odin check examples/all -target:linux_riscv64 -vet -strict-style -disallow-do
|
||||
|
||||
- name: Install riscv64 toolchain and qemu
|
||||
run: sudo apt-get install -y qemu-user qemu-user-static gcc-12-riscv64-linux-gnu libc6-riscv64-cross
|
||||
|
||||
- name: Odin run
|
||||
run: ./odin run examples/demo -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Odin run -debug
|
||||
run: ./odin run examples/demo -debug -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Normal Core library tests
|
||||
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Optimized Core library tests
|
||||
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Internals tests
|
||||
run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
@@ -61,7 +61,6 @@ jobs:
|
||||
mkdir dist
|
||||
cp odin dist
|
||||
cp LICENSE dist
|
||||
cp libLLVM* dist
|
||||
cp -r shared dist
|
||||
cp -r base dist
|
||||
cp -r core dist
|
||||
|
||||
+1
-3
@@ -17,13 +17,12 @@
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
!/core/simd/x86
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
![Cc]ore/[Ll]og/
|
||||
tests/documentation/verify/
|
||||
tests/documentation/all.odin-doc
|
||||
# Visual Studio 2015 cache/options directory
|
||||
.vs/
|
||||
# Visual Studio Code options directory
|
||||
@@ -31,7 +30,6 @@ tests/documentation/all.odin-doc
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
demo
|
||||
benchmark
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
all: debug
|
||||
all: default
|
||||
|
||||
demo:
|
||||
./odin run examples/demo/demo.odin -file
|
||||
@@ -6,12 +6,18 @@ demo:
|
||||
report:
|
||||
./odin report
|
||||
|
||||
default:
|
||||
PROGRAM=make ./build_odin.sh # debug
|
||||
|
||||
debug:
|
||||
./build_odin.sh debug
|
||||
|
||||
release:
|
||||
./build_odin.sh release
|
||||
|
||||
release-native:
|
||||
./build_odin.sh release-native
|
||||
|
||||
release_native:
|
||||
./build_odin.sh release-native
|
||||
|
||||
|
||||
@@ -76,9 +76,9 @@ Answers to common questions about Odin.
|
||||
|
||||
Documentation for all the official packages part of the [core](https://pkg.odin-lang.org/core/) and [vendor](https://pkg.odin-lang.org/vendor/) library collections.
|
||||
|
||||
#### [The Odin Wiki](https://github.com/odin-lang/Odin/wiki)
|
||||
#### [Odin Documentation](https://odin-lang.org/docs/)
|
||||
|
||||
A wiki maintained by the Odin community.
|
||||
Documentation for the Odin language itself.
|
||||
|
||||
#### [Odin Discord](https://discord.gg/sVBPHEv)
|
||||
|
||||
|
||||
@@ -42,8 +42,8 @@ overflow_add :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #option
|
||||
overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
|
||||
overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
|
||||
|
||||
add_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
|
||||
sub_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
|
||||
saturating_add :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
|
||||
saturating_sub :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
|
||||
|
||||
sqrt :: proc(x: $T) -> T where type_is_float(T) || (type_is_simd_vector(T) && type_is_float(type_elem_type(T))) ---
|
||||
|
||||
@@ -219,14 +219,21 @@ type_map_cell_info :: proc($T: typeid) -> ^runtime.Map_Cell_Info ---
|
||||
type_convert_variants_to_pointers :: proc($T: typeid) -> typeid where type_is_union(T) ---
|
||||
type_merge :: proc($U, $V: typeid) -> typeid where type_is_union(U), type_is_union(V) ---
|
||||
|
||||
type_has_shared_fields :: proc($U, $V: typeid) -> bool typeid where type_is_struct(U), type_is_struct(V) ---
|
||||
|
||||
constant_utf16_cstring :: proc($literal: string) -> [^]u16 ---
|
||||
|
||||
constant_log2 :: proc($v: $T) -> T where type_is_integer(T) ---
|
||||
|
||||
// SIMD related
|
||||
simd_add :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_sub :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_mul :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_div :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_float(T) ---
|
||||
|
||||
simd_saturating_add :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) ---
|
||||
simd_saturating_sub :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) ---
|
||||
|
||||
// Keeps Odin's Behaviour
|
||||
// (x << y) if y <= mask else 0
|
||||
simd_shl :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
@@ -237,9 +244,6 @@ simd_shr :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
simd_shl_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
simd_shr_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
|
||||
simd_add_sat :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_sub_sat :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
|
||||
simd_bit_and :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_bit_or :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_bit_xor :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
@@ -268,13 +272,28 @@ simd_lanes_ge :: proc(a, b: #simd[N]T) -> #simd[N]Integer ---
|
||||
simd_extract :: proc(a: #simd[N]T, idx: uint) -> T ---
|
||||
simd_replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T ---
|
||||
|
||||
simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_min :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_max :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_and :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_or :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_xor :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_min :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_max :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_and :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_or :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_xor :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
|
||||
simd_reduce_any :: proc(a: #simd[N]T) -> T where type_is_boolean(T) ---
|
||||
simd_reduce_all :: proc(a: #simd[N]T) -> T where type_is_boolean(T) ---
|
||||
|
||||
|
||||
simd_gather :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
|
||||
simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
|
||||
|
||||
simd_masked_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
|
||||
simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
|
||||
|
||||
simd_masked_expand_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
|
||||
simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
|
||||
|
||||
|
||||
|
||||
simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
|
||||
simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
|
||||
@@ -288,11 +307,11 @@ simd_nearest :: proc(a: #simd[N]any_float) -> #simd[N]any_float ---
|
||||
|
||||
simd_to_bits :: proc(v: #simd[N]T) -> #simd[N]Integer where size_of(T) == size_of(Integer), type_is_unsigned(Integer) ---
|
||||
|
||||
// equivalent a swizzle with descending indices, e.g. reserve(a, 3, 2, 1, 0)
|
||||
simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
|
||||
// equivalent to a swizzle with descending indices, e.g. reserve(a, 3, 2, 1, 0)
|
||||
simd_lanes_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
|
||||
|
||||
simd_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
simd_lanes_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
simd_lanes_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
|
||||
// Checks if the current target supports the given target features.
|
||||
//
|
||||
|
||||
@@ -546,10 +546,23 @@ Odin_OS_Type :: type_of(ODIN_OS)
|
||||
arm64,
|
||||
wasm32,
|
||||
wasm64p32,
|
||||
riscv64,
|
||||
}
|
||||
*/
|
||||
Odin_Arch_Type :: type_of(ODIN_ARCH)
|
||||
|
||||
Odin_Arch_Types :: bit_set[Odin_Arch_Type]
|
||||
|
||||
ALL_ODIN_ARCH_TYPES :: Odin_Arch_Types{
|
||||
.amd64,
|
||||
.i386,
|
||||
.arm32,
|
||||
.arm64,
|
||||
.wasm32,
|
||||
.wasm64p32,
|
||||
.riscv64,
|
||||
}
|
||||
|
||||
/*
|
||||
// Defined internally by the compiler
|
||||
Odin_Build_Mode_Type :: enum int {
|
||||
@@ -573,6 +586,22 @@ Odin_Build_Mode_Type :: type_of(ODIN_BUILD_MODE)
|
||||
*/
|
||||
Odin_Endian_Type :: type_of(ODIN_ENDIAN)
|
||||
|
||||
Odin_OS_Types :: bit_set[Odin_OS_Type]
|
||||
|
||||
ALL_ODIN_OS_TYPES :: Odin_OS_Types{
|
||||
.Windows,
|
||||
.Darwin,
|
||||
.Linux,
|
||||
.Essence,
|
||||
.FreeBSD,
|
||||
.OpenBSD,
|
||||
.NetBSD,
|
||||
.Haiku,
|
||||
.WASI,
|
||||
.JS,
|
||||
.Orca,
|
||||
.Freestanding,
|
||||
}
|
||||
|
||||
/*
|
||||
// Defined internally by the compiler
|
||||
@@ -750,6 +779,10 @@ __init_context :: proc "contextless" (c: ^Context) {
|
||||
}
|
||||
|
||||
default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code_Location) -> ! {
|
||||
default_assertion_contextless_failure_proc(prefix, message, loc)
|
||||
}
|
||||
|
||||
default_assertion_contextless_failure_proc :: proc "contextless" (prefix, message: string, loc: Source_Code_Location) -> ! {
|
||||
when ODIN_OS == .Freestanding {
|
||||
// Do nothing
|
||||
} else {
|
||||
|
||||
@@ -68,7 +68,7 @@ copy :: proc{copy_slice, copy_from_string}
|
||||
// Note: If you want the elements to remain in their order, use `ordered_remove`.
|
||||
// Note: If the index is out of bounds, this procedure will panic.
|
||||
@builtin
|
||||
unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
|
||||
unordered_remove :: proc(array: ^$D/[dynamic]$T, #any_int index: int, loc := #caller_location) #no_bounds_check {
|
||||
bounds_check_error_loc(loc, index, len(array))
|
||||
n := len(array)-1
|
||||
if index != n {
|
||||
@@ -82,7 +82,7 @@ unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_loca
|
||||
// Note: If the elements do not have to remain in their order, prefer `unordered_remove`.
|
||||
// Note: If the index is out of bounds, this procedure will panic.
|
||||
@builtin
|
||||
ordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
|
||||
ordered_remove :: proc(array: ^$D/[dynamic]$T, #any_int index: int, loc := #caller_location) #no_bounds_check {
|
||||
bounds_check_error_loc(loc, index, len(array))
|
||||
if index+1 < len(array) {
|
||||
copy(array[index:], array[index+1:])
|
||||
@@ -95,7 +95,7 @@ ordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_locati
|
||||
// Note: This is an O(N) operation.
|
||||
// Note: If the range is out of bounds, this procedure will panic.
|
||||
@builtin
|
||||
remove_range :: proc(array: ^$D/[dynamic]$T, lo, hi: int, loc := #caller_location) #no_bounds_check {
|
||||
remove_range :: proc(array: ^$D/[dynamic]$T, #any_int lo, hi: int, loc := #caller_location) #no_bounds_check {
|
||||
slice_expr_error_lo_hi_loc(loc, lo, hi, len(array))
|
||||
n := max(hi-lo, 0)
|
||||
if n > 0 {
|
||||
@@ -350,7 +350,7 @@ _make_dynamic_array_len_cap :: proc(array: ^Raw_Dynamic_Array, size_of_elem, ali
|
||||
return
|
||||
}
|
||||
|
||||
// `make_map` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
|
||||
// `make_map` allocates and initializes a map. Like `new`, the first argument is a type, not a value.
|
||||
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
|
||||
//
|
||||
// Note: Prefer using the procedure group `make`.
|
||||
@@ -362,7 +362,7 @@ make_map :: proc($T: typeid/map[$K]$E, #any_int capacity: int = 1<<MAP_MIN_LOG2_
|
||||
err = reserve_map(&m, capacity, loc)
|
||||
return
|
||||
}
|
||||
// `make_multi_pointer` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
|
||||
// `make_multi_pointer` allocates and initializes a multi-pointer. Like `new`, the first argument is a type, not a value.
|
||||
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
|
||||
//
|
||||
// This is "similar" to doing `raw_data(make([]E, len, allocator))`.
|
||||
@@ -602,7 +602,7 @@ append_nothing :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (n: i
|
||||
|
||||
|
||||
@builtin
|
||||
inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
inject_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
if array == nil {
|
||||
return
|
||||
}
|
||||
@@ -620,7 +620,7 @@ inject_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast arg: E,
|
||||
}
|
||||
|
||||
@builtin
|
||||
inject_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
inject_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
if array == nil {
|
||||
return
|
||||
}
|
||||
@@ -643,7 +643,7 @@ inject_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args:
|
||||
}
|
||||
|
||||
@builtin
|
||||
inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, #any_int index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
if array == nil {
|
||||
return
|
||||
}
|
||||
@@ -668,7 +668,7 @@ inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string
|
||||
|
||||
|
||||
@builtin
|
||||
assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
assign_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
if index < len(array) {
|
||||
array[index] = arg
|
||||
ok = true
|
||||
@@ -682,7 +682,7 @@ assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #calle
|
||||
|
||||
|
||||
@builtin
|
||||
assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
assign_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
new_size := index + len(args)
|
||||
if len(args) == 0 {
|
||||
ok = true
|
||||
@@ -699,7 +699,7 @@ assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args:
|
||||
|
||||
|
||||
@builtin
|
||||
assign_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
assign_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, #any_int index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
new_size := index + len(arg)
|
||||
if len(arg) == 0 {
|
||||
ok = true
|
||||
@@ -838,7 +838,7 @@ non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: i
|
||||
|
||||
Note: Prefer the procedure group `shrink`
|
||||
*/
|
||||
shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
|
||||
shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
|
||||
return _shrink_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), new_cap, loc)
|
||||
}
|
||||
|
||||
@@ -948,3 +948,30 @@ unimplemented :: proc(message := "", loc := #caller_location) -> ! {
|
||||
}
|
||||
p("not yet implemented", message, loc)
|
||||
}
|
||||
|
||||
|
||||
@builtin
|
||||
@(disabled=ODIN_DISABLE_ASSERT)
|
||||
assert_contextless :: proc "contextless" (condition: bool, message := "", loc := #caller_location) {
|
||||
if !condition {
|
||||
// NOTE(bill): This is wrapped in a procedure call
|
||||
// to improve performance to make the CPU not
|
||||
// execute speculatively, making it about an order of
|
||||
// magnitude faster
|
||||
@(cold)
|
||||
internal :: proc "contextless" (message: string, loc: Source_Code_Location) {
|
||||
default_assertion_contextless_failure_proc("runtime assertion", message, loc)
|
||||
}
|
||||
internal(message, loc)
|
||||
}
|
||||
}
|
||||
|
||||
@builtin
|
||||
panic_contextless :: proc "contextless" (message: string, loc := #caller_location) -> ! {
|
||||
default_assertion_contextless_failure_proc("panic", message, loc)
|
||||
}
|
||||
|
||||
@builtin
|
||||
unimplemented_contextless :: proc "contextless" (message := "", loc := #caller_location) -> ! {
|
||||
default_assertion_contextless_failure_proc("not yet implemented", message, loc)
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ raw_soa_footer :: proc{
|
||||
|
||||
|
||||
@(builtin, require_results)
|
||||
make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
|
||||
make_soa_aligned :: proc($T: typeid/#soa[]$E, #any_int length, alignment: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
|
||||
if length <= 0 {
|
||||
return
|
||||
}
|
||||
@@ -135,7 +135,7 @@ make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, alloc
|
||||
}
|
||||
|
||||
@(builtin, require_results)
|
||||
make_soa_slice :: proc($T: typeid/#soa[]$E, length: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
|
||||
make_soa_slice :: proc($T: typeid/#soa[]$E, #any_int length: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
|
||||
return make_soa_aligned(T, length, align_of(E), allocator, loc)
|
||||
}
|
||||
|
||||
@@ -172,7 +172,7 @@ make_soa :: proc{
|
||||
|
||||
|
||||
@builtin
|
||||
resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
|
||||
resize_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
|
||||
if array == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -183,7 +183,7 @@ resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_locat
|
||||
}
|
||||
|
||||
@builtin
|
||||
non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
|
||||
non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
|
||||
if array == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -194,12 +194,12 @@ non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #cal
|
||||
}
|
||||
|
||||
@builtin
|
||||
reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
|
||||
reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
|
||||
return _reserve_soa(array, capacity, true, loc)
|
||||
}
|
||||
|
||||
@builtin
|
||||
non_zero_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
|
||||
non_zero_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
|
||||
return _reserve_soa(array, capacity, false, loc)
|
||||
}
|
||||
|
||||
@@ -484,7 +484,7 @@ into_dynamic_soa :: proc(array: $T/#soa[]$E) -> #soa[dynamic]E {
|
||||
// Note: If you the elements to remain in their order, use `ordered_remove_soa`.
|
||||
// Note: If the index is out of bounds, this procedure will panic.
|
||||
@builtin
|
||||
unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #caller_location) #no_bounds_check {
|
||||
unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int index: int, loc := #caller_location) #no_bounds_check {
|
||||
bounds_check_error_loc(loc, index, len(array))
|
||||
if index+1 < len(array) {
|
||||
ti := type_info_of(typeid_of(T))
|
||||
@@ -512,7 +512,7 @@ unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #cal
|
||||
// Note: If you the elements do not have to remain in their order, prefer `unordered_remove_soa`.
|
||||
// Note: If the index is out of bounds, this procedure will panic.
|
||||
@builtin
|
||||
ordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #caller_location) #no_bounds_check {
|
||||
ordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int index: int, loc := #caller_location) #no_bounds_check {
|
||||
bounds_check_error_loc(loc, index, len(array))
|
||||
if index+1 < len(array) {
|
||||
ti := type_info_of(typeid_of(T))
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package runtime
|
||||
|
||||
nil_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
size, alignment: int,
|
||||
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
|
||||
size, alignment: int,
|
||||
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
|
||||
switch mode {
|
||||
case .Alloc, .Alloc_Non_Zeroed:
|
||||
return nil, .Out_Of_Memory
|
||||
|
||||
@@ -129,7 +129,7 @@ arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_locatio
|
||||
return
|
||||
}
|
||||
|
||||
// `arena_init` will initialize the arena with a usuable block.
|
||||
// `arena_init` will initialize the arena with a usable block.
|
||||
// This procedure is not necessary to use the Arena as the default zero as `arena_alloc` will set things up if necessary
|
||||
@(require_results)
|
||||
arena_init :: proc(arena: ^Arena, size: uint, backing_allocator: Allocator, loc := #caller_location) -> Allocator_Error {
|
||||
|
||||
@@ -34,6 +34,9 @@ when ODIN_BUILD_MODE == .Dynamic {
|
||||
} else when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 {
|
||||
@require foreign import entry "entry_unix_no_crt_darwin_arm64.asm"
|
||||
SYS_exit :: 1
|
||||
} else when ODIN_ARCH == .riscv64 {
|
||||
@require foreign import entry "entry_unix_no_crt_riscv64.asm"
|
||||
SYS_exit :: 93
|
||||
}
|
||||
@(link_name="_start_odin", linkage="strong", require)
|
||||
_start_odin :: proc "c" (argc: i32, argv: [^]cstring) -> ! {
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
.text
|
||||
|
||||
.globl _start
|
||||
|
||||
_start:
|
||||
ld a0, 0(sp)
|
||||
addi a1, sp, 8
|
||||
addi sp, sp, ~15
|
||||
call _start_odin
|
||||
ebreak
|
||||
@@ -19,12 +19,15 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
// the pointer we return to the user.
|
||||
//
|
||||
|
||||
aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr = nil, zero_memory := true) -> ([]byte, Allocator_Error) {
|
||||
aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr, old_size: int, zero_memory := true) -> ([]byte, Allocator_Error) {
|
||||
a := max(alignment, align_of(rawptr))
|
||||
space := size + a - 1
|
||||
|
||||
allocated_mem: rawptr
|
||||
if old_ptr != nil {
|
||||
|
||||
force_copy := old_ptr != nil && a > align_of(rawptr)
|
||||
|
||||
if !force_copy && old_ptr != nil {
|
||||
original_old_ptr := ([^]rawptr)(old_ptr)[-1]
|
||||
allocated_mem = heap_resize(original_old_ptr, space+size_of(rawptr))
|
||||
} else {
|
||||
@@ -36,12 +39,19 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
aligned_ptr := (ptr - 1 + uintptr(a)) & -uintptr(a)
|
||||
diff := int(aligned_ptr - ptr)
|
||||
if (size + diff) > space || allocated_mem == nil {
|
||||
aligned_free(old_ptr)
|
||||
aligned_free(allocated_mem)
|
||||
return nil, .Out_Of_Memory
|
||||
}
|
||||
|
||||
aligned_mem = rawptr(aligned_ptr)
|
||||
([^]rawptr)(aligned_mem)[-1] = allocated_mem
|
||||
|
||||
if force_copy {
|
||||
mem_copy_non_overlapping(aligned_mem, old_ptr, old_size)
|
||||
aligned_free(old_ptr)
|
||||
}
|
||||
|
||||
return byte_slice(aligned_mem, size), nil
|
||||
}
|
||||
|
||||
@@ -53,10 +63,10 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
|
||||
aligned_resize :: proc(p: rawptr, old_size: int, new_size: int, new_alignment: int, zero_memory := true) -> (new_memory: []byte, err: Allocator_Error) {
|
||||
if p == nil {
|
||||
return nil, nil
|
||||
return aligned_alloc(new_size, new_alignment, nil, old_size, zero_memory)
|
||||
}
|
||||
|
||||
new_memory = aligned_alloc(new_size, new_alignment, p, zero_memory) or_return
|
||||
new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return
|
||||
|
||||
// NOTE: heap_resize does not zero the new memory, so we do it
|
||||
if zero_memory && new_size > old_size {
|
||||
@@ -68,7 +78,7 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
|
||||
switch mode {
|
||||
case .Alloc, .Alloc_Non_Zeroed:
|
||||
return aligned_alloc(size, alignment, nil, mode == .Alloc)
|
||||
return aligned_alloc(size, alignment, nil, 0, mode == .Alloc)
|
||||
|
||||
case .Free:
|
||||
aligned_free(old_memory)
|
||||
@@ -77,9 +87,6 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
return nil, .Mode_Not_Implemented
|
||||
|
||||
case .Resize, .Resize_Non_Zeroed:
|
||||
if old_memory == nil {
|
||||
return aligned_alloc(size, alignment, nil, mode == .Resize)
|
||||
}
|
||||
return aligned_resize(old_memory, old_size, size, alignment, mode == .Resize)
|
||||
|
||||
case .Query_Features:
|
||||
|
||||
+24
-15
@@ -8,10 +8,9 @@ IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
|
||||
|
||||
@(private)
|
||||
RUNTIME_LINKAGE :: "strong" when (
|
||||
(ODIN_USE_SEPARATE_MODULES ||
|
||||
ODIN_USE_SEPARATE_MODULES ||
|
||||
ODIN_BUILD_MODE == .Dynamic ||
|
||||
!ODIN_NO_CRT) &&
|
||||
!IS_WASM) else "internal"
|
||||
!ODIN_NO_CRT) else "internal"
|
||||
RUNTIME_REQUIRE :: false // !ODIN_TILDE
|
||||
|
||||
@(private)
|
||||
@@ -879,9 +878,6 @@ extendhfsf2 :: proc "c" (value: __float16) -> f32 {
|
||||
|
||||
@(link_name="__floattidf", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
floattidf :: proc "c" (a: i128) -> f64 {
|
||||
when IS_WASM {
|
||||
return 0
|
||||
} else {
|
||||
DBL_MANT_DIG :: 53
|
||||
if a == 0 {
|
||||
return 0.0
|
||||
@@ -921,14 +917,10 @@ when IS_WASM {
|
||||
fb[0] = u32(a) // mantissa-low
|
||||
return transmute(f64)fb
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@(link_name="__floattidf_unsigned", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
floattidf_unsigned :: proc "c" (a: u128) -> f64 {
|
||||
when IS_WASM {
|
||||
return 0
|
||||
} else {
|
||||
DBL_MANT_DIG :: 53
|
||||
if a == 0 {
|
||||
return 0.0
|
||||
@@ -966,7 +958,6 @@ when IS_WASM {
|
||||
fb[0] = u32(a) // mantissa-low
|
||||
return transmute(f64)fb
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1023,14 +1014,32 @@ modti3 :: proc "c" (a, b: i128) -> i128 {
|
||||
|
||||
@(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
|
||||
u := udivmod128(u128(a), u128(b), (^u128)(rem))
|
||||
return i128(u)
|
||||
s_a := a >> (128 - 1) // -1 if negative or 0
|
||||
s_b := b >> (128 - 1)
|
||||
an := (a ~ s_a) - s_a // absolute
|
||||
bn := (b ~ s_b) - s_b
|
||||
|
||||
s_b ~= s_a // quotient sign
|
||||
u_s_b := u128(s_b)
|
||||
u_s_a := u128(s_a)
|
||||
|
||||
r: u128 = ---
|
||||
u := i128((udivmodti4(u128(an), u128(bn), &r) ~ u_s_b) - u_s_b) // negate if negative
|
||||
rem^ = i128((r ~ u_s_a) - u_s_a)
|
||||
return u
|
||||
}
|
||||
|
||||
@(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
divti3 :: proc "c" (a, b: i128) -> i128 {
|
||||
u := udivmodti4(u128(a), u128(b), nil)
|
||||
return i128(u)
|
||||
s_a := a >> (128 - 1) // -1 if negative or 0
|
||||
s_b := b >> (128 - 1)
|
||||
an := (a ~ s_a) - s_a // absolute
|
||||
bn := (b ~ s_b) - s_b
|
||||
|
||||
s_a ~= s_b // quotient sign
|
||||
u_s_a := u128(s_a)
|
||||
|
||||
return i128((udivmodti4(u128(an), u128(bn), nil) ~ u_s_a) - u_s_a) // negate if negative
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
|
||||
SYS_write :: uintptr(4)
|
||||
} else when ODIN_ARCH == .arm32 {
|
||||
SYS_write :: uintptr(4)
|
||||
} else when ODIN_ARCH == .riscv64 {
|
||||
SYS_write :: uintptr(64)
|
||||
}
|
||||
|
||||
stderr :: 2
|
||||
|
||||
@@ -52,3 +52,24 @@ udivti3 :: proc "c" (la, ha, lb, hb: u64) -> u128 {
|
||||
b.lo, b.hi = lb, hb
|
||||
return udivmodti4(a.all, b.all, nil)
|
||||
}
|
||||
|
||||
@(link_name="__lshrti3", linkage="strong")
|
||||
__lshrti3 :: proc "c" (la, ha: u64, b: u32) -> i128 {
|
||||
bits :: size_of(u32)*8
|
||||
|
||||
input, result: ti_int
|
||||
input.lo = la
|
||||
input.hi = ha
|
||||
|
||||
if b & bits != 0 {
|
||||
result.hi = 0
|
||||
result.lo = input.hi >> (b - bits)
|
||||
} else if b == 0 {
|
||||
return input.all
|
||||
} else {
|
||||
result.hi = input.hi >> b
|
||||
result.lo = (input.hi << (bits - b)) | (input.lo >> b)
|
||||
}
|
||||
|
||||
return result.all
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
package runtime
|
||||
|
||||
Thread_Local_Cleaner :: #type proc "odin" ()
|
||||
|
||||
@(private="file")
|
||||
thread_local_cleaners: [8]Thread_Local_Cleaner
|
||||
|
||||
// Add a procedure that will be run at the end of a thread for the purpose of
|
||||
// deallocating state marked as `thread_local`.
|
||||
//
|
||||
// Intended to be called in an `init` procedure of a package with
|
||||
// dynamically-allocated memory that is stored in `thread_local` variables.
|
||||
add_thread_local_cleaner :: proc "contextless" (p: Thread_Local_Cleaner) {
|
||||
for &v in thread_local_cleaners {
|
||||
if v == nil {
|
||||
v = p
|
||||
return
|
||||
}
|
||||
}
|
||||
panic_contextless("There are no more thread-local cleaner slots available.")
|
||||
}
|
||||
|
||||
// Run all of the thread-local cleaner procedures.
|
||||
//
|
||||
// Intended to be called by the internals of a threading API at the end of a
|
||||
// thread's lifetime.
|
||||
run_thread_local_cleaners :: proc "odin" () {
|
||||
for p in thread_local_cleaners {
|
||||
if p == nil {
|
||||
break
|
||||
}
|
||||
p()
|
||||
}
|
||||
}
|
||||
@@ -116,6 +116,9 @@ if %errorlevel% neq 0 goto end_of_build
|
||||
rem If the demo doesn't run for you and your CPU is more than a decade old, try -microarch:native
|
||||
if %release_mode% EQU 0 odin run examples/demo -vet -strict-style -- Hellope World
|
||||
|
||||
rem Many non-compiler devs seem to run debug build but don't realize.
|
||||
if %release_mode% EQU 0 echo: & echo Debug compiler built. Note: run "build.bat release" if you want a faster, release mode compiler.
|
||||
|
||||
del *.obj > NUL 2> NUL
|
||||
|
||||
:end_of_build
|
||||
+15
-2
@@ -23,6 +23,14 @@ error() {
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Brew advises people not to add llvm to their $PATH, so try and use brew to find it.
|
||||
if [ -z "$LLVM_CONFIG" ] && [ -n "$(command -v brew)" ]; then
|
||||
if [ -n "$(command -v $(brew --prefix llvm@18)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@18)/bin/llvm-config"
|
||||
elif [ -n "$(command -v $(brew --prefix llvm@17)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@17)/bin/llvm-config"
|
||||
elif [ -n "$(command -v $(brew --prefix llvm@14)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@14)/bin/llvm-config"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$LLVM_CONFIG" ]; then
|
||||
# darwin, linux, openbsd
|
||||
if [ -n "$(command -v llvm-config-18)" ]; then LLVM_CONFIG="llvm-config-18"
|
||||
@@ -95,7 +103,7 @@ Linux)
|
||||
LDFLAGS="$LDFLAGS -ldl $($LLVM_CONFIG --libs core native --system-libs --libfiles)"
|
||||
# Copy libLLVM*.so into current directory for linking
|
||||
# NOTE: This is needed by the Linux release pipeline!
|
||||
cp $(readlink -f $($LLVM_CONFIG --libfiles)) ./
|
||||
# cp $(readlink -f $($LLVM_CONFIG --libfiles)) ./
|
||||
LDFLAGS="$LDFLAGS -Wl,-rpath=\$ORIGIN"
|
||||
;;
|
||||
OpenBSD)
|
||||
@@ -144,12 +152,17 @@ build_odin() {
|
||||
}
|
||||
|
||||
run_demo() {
|
||||
./odin run examples/demo -vet -strict-style -- Hellope World
|
||||
if [ $# -eq 0 ] || [ "$1" = "debug" ]; then
|
||||
./odin run examples/demo -vet -strict-style -- Hellope World
|
||||
fi
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
build_odin debug
|
||||
run_demo
|
||||
|
||||
: ${PROGRAM:=$0}
|
||||
printf "\nDebug compiler built. Note: run \"$PROGRAM release\" or \"$PROGRAM release-native\" if you want a faster, release mode compiler.\n"
|
||||
elif [ $# -eq 1 ]; then
|
||||
case $1 in
|
||||
report)
|
||||
|
||||
+33
-3
@@ -144,6 +144,9 @@ buffer_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) {
|
||||
}
|
||||
|
||||
buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int, loc := #caller_location) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
b.last_read = .Invalid
|
||||
if offset < 0 {
|
||||
err = .Invalid_Offset
|
||||
@@ -246,10 +249,13 @@ buffer_read_ptr :: proc(b: ^Buffer, ptr: rawptr, size: int) -> (n: int, err: io.
|
||||
}
|
||||
|
||||
buffer_read_at :: proc(b: ^Buffer, p: []byte, offset: int) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
b.last_read = .Invalid
|
||||
|
||||
if uint(offset) >= len(b.buf) {
|
||||
err = .Invalid_Offset
|
||||
err = .EOF
|
||||
return
|
||||
}
|
||||
n = copy(p, b.buf[offset:])
|
||||
@@ -310,6 +316,27 @@ buffer_unread_rune :: proc(b: ^Buffer) -> io.Error {
|
||||
return nil
|
||||
}
|
||||
|
||||
buffer_seek :: proc(b: ^Buffer, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
|
||||
abs: i64
|
||||
switch whence {
|
||||
case .Start:
|
||||
abs = offset
|
||||
case .Current:
|
||||
abs = i64(b.off) + offset
|
||||
case .End:
|
||||
abs = i64(len(b.buf)) + offset
|
||||
case:
|
||||
return 0, .Invalid_Whence
|
||||
}
|
||||
|
||||
abs_int := int(abs)
|
||||
if abs_int < 0 {
|
||||
return 0, .Invalid_Offset
|
||||
}
|
||||
b.last_read = .Invalid
|
||||
b.off = abs_int
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
buffer_read_bytes :: proc(b: ^Buffer, delim: byte) -> (line: []byte, err: io.Error) {
|
||||
i := index_byte(b.buf[b.off:], delim)
|
||||
@@ -395,14 +422,17 @@ _buffer_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, offse
|
||||
return io._i64_err(buffer_write(b, p))
|
||||
case .Write_At:
|
||||
return io._i64_err(buffer_write_at(b, p, int(offset)))
|
||||
case .Seek:
|
||||
n, err = buffer_seek(b, offset, whence)
|
||||
return
|
||||
case .Size:
|
||||
n = i64(buffer_capacity(b))
|
||||
n = i64(buffer_length(b))
|
||||
return
|
||||
case .Destroy:
|
||||
buffer_destroy(b)
|
||||
return
|
||||
case .Query:
|
||||
return io.query_utility({.Read, .Read_At, .Write, .Write_At, .Size, .Destroy})
|
||||
return io.query_utility({.Read, .Read_At, .Write, .Write_At, .Seek, .Size, .Destroy, .Query})
|
||||
}
|
||||
return 0, .Empty
|
||||
}
|
||||
|
||||
+284
-6
@@ -1,9 +1,38 @@
|
||||
package bytes
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:mem"
|
||||
import "core:simd"
|
||||
import "core:unicode"
|
||||
import "core:unicode/utf8"
|
||||
|
||||
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
|
||||
@(private)
|
||||
SCANNER_INDICES_256 : simd.u8x32 : {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
}
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MAX_256: simd.u8x32 : u8(0x00)
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MIN_256: simd.u8x32 : u8(0xff)
|
||||
@(private)
|
||||
SIMD_REG_SIZE_256 :: 32
|
||||
}
|
||||
@(private)
|
||||
SCANNER_INDICES_128 : simd.u8x16 : {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
}
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MAX_128: simd.u8x16 : u8(0x00)
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MIN_128: simd.u8x16 : u8(0xff)
|
||||
@(private)
|
||||
SIMD_REG_SIZE_128 :: 16
|
||||
|
||||
clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
|
||||
c := make([]byte, len(s), allocator, loc)
|
||||
copy(c, s)
|
||||
@@ -293,28 +322,277 @@ split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
|
||||
return _split_iterator(s, sep, len(sep))
|
||||
}
|
||||
|
||||
/*
|
||||
Scan a slice of bytes for a specific byte.
|
||||
|
||||
index_byte :: proc(s: []byte, c: byte) -> int {
|
||||
for i := 0; i < len(s); i += 1 {
|
||||
This procedure safely handles slices of any length, including empty slices.
|
||||
|
||||
Inputs:
|
||||
- data: A slice of bytes.
|
||||
- c: The byte to search for.
|
||||
|
||||
Returns:
|
||||
- index: The index of the byte `c`, or -1 if it was not found.
|
||||
*/
|
||||
index_byte :: proc(s: []byte, c: byte) -> (index: int) #no_bounds_check {
|
||||
i, l := 0, len(s)
|
||||
|
||||
// Guard against small strings. On modern systems, it is ALWAYS
|
||||
// worth vectorizing assuming there is a hardware vector unit, and
|
||||
// the data size is large enough.
|
||||
if l < SIMD_REG_SIZE_128 {
|
||||
for /**/; i < l; i += 1 {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
c_vec: simd.u8x16 = c
|
||||
when !simd.IS_EMULATED {
|
||||
// Note: While this is something that could also logically take
|
||||
// advantage of AVX512, the various downclocking and power
|
||||
// consumption related woes make premature to have a dedicated
|
||||
// code path.
|
||||
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
|
||||
c_vec_256: simd.u8x32 = c
|
||||
|
||||
s_vecs: [4]simd.u8x32 = ---
|
||||
c_vecs: [4]simd.u8x32 = ---
|
||||
m_vec: [4]u8 = ---
|
||||
|
||||
// Scan 128-byte chunks, using 256-bit SIMD.
|
||||
for nr_blocks := l / (4 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vec[j] > 0 {
|
||||
sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + j * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i += 4 * SIMD_REG_SIZE_256
|
||||
}
|
||||
|
||||
// Scan 64-byte chunks, using 256-bit SIMD.
|
||||
for nr_blocks := (l - i) / (2 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
|
||||
#unroll for j in 0..<2 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] > 0 {
|
||||
#unroll for j in 0..<2 {
|
||||
if m_vec[j] > 0 {
|
||||
sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + j * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i += 2 * SIMD_REG_SIZE_256
|
||||
}
|
||||
} else {
|
||||
s_vecs: [4]simd.u8x16 = ---
|
||||
c_vecs: [4]simd.u8x16 = ---
|
||||
m_vecs: [4]u8 = ---
|
||||
|
||||
// Scan 64-byte chunks, using 128-bit SIMD.
|
||||
for nr_blocks := l / (4 * SIMD_REG_SIZE_128); nr_blocks > 0; nr_blocks -= 1 {
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j]= intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
|
||||
m_vecs[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vecs[j] > 0 {
|
||||
sel := simd.select(c_vecs[j], SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + j * SIMD_REG_SIZE_128 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i += 4 * SIMD_REG_SIZE_128
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the remaining SIMD register sized chunks.
|
||||
//
|
||||
// Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
|
||||
// on potato targets. Scanning more at a time when LLVM is emulating SIMD
|
||||
// likely does not buy much, as all that does is increase GP register
|
||||
// pressure.
|
||||
for nr_blocks := (l - i) / SIMD_REG_SIZE_128; nr_blocks > 0; nr_blocks -= 1 {
|
||||
s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
|
||||
c0 := simd.lanes_eq(s0, c_vec)
|
||||
if simd.reduce_or(c0) > 0 {
|
||||
sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + int(off)
|
||||
}
|
||||
|
||||
i += SIMD_REG_SIZE_128
|
||||
}
|
||||
|
||||
// Scan serially for the remainder.
|
||||
for /**/; i < l; i += 1 {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
// Returns -1 if c is not present
|
||||
last_index_byte :: proc(s: []byte, c: byte) -> int {
|
||||
for i := len(s)-1; i >= 0; i -= 1 {
|
||||
/*
|
||||
Scan a slice of bytes for a specific byte, starting from the end and working
|
||||
backwards to the start.
|
||||
|
||||
This procedure safely handles slices of any length, including empty slices.
|
||||
|
||||
Inputs:
|
||||
- data: A slice of bytes.
|
||||
- c: The byte to search for.
|
||||
|
||||
Returns:
|
||||
- index: The index of the byte `c`, or -1 if it was not found.
|
||||
*/
|
||||
last_index_byte :: proc(s: []byte, c: byte) -> int #no_bounds_check {
|
||||
i := len(s)
|
||||
|
||||
// Guard against small strings. On modern systems, it is ALWAYS
|
||||
// worth vectorizing assuming there is a hardware vector unit, and
|
||||
// the data size is large enough.
|
||||
if i < SIMD_REG_SIZE_128 {
|
||||
#reverse for ch, j in s {
|
||||
if ch == c {
|
||||
return j
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
c_vec: simd.u8x16 = c
|
||||
when !simd.IS_EMULATED {
|
||||
// Note: While this is something that could also logically take
|
||||
// advantage of AVX512, the various downclocking and power
|
||||
// consumption related woes make premature to have a dedicated
|
||||
// code path.
|
||||
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
|
||||
c_vec_256: simd.u8x32 = c
|
||||
|
||||
s_vecs: [4]simd.u8x32 = ---
|
||||
c_vecs: [4]simd.u8x32 = ---
|
||||
m_vec: [4]u8 = ---
|
||||
|
||||
// Scan 128-byte chunks, using 256-bit SIMD.
|
||||
for i >= 4 * SIMD_REG_SIZE_256 {
|
||||
i -= 4 * SIMD_REG_SIZE_256
|
||||
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vec[3-j] > 0 {
|
||||
sel := simd.select(c_vecs[3-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + (3-j) * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan 64-byte chunks, using 256-bit SIMD.
|
||||
for i >= 2 * SIMD_REG_SIZE_256 {
|
||||
i -= 2 * SIMD_REG_SIZE_256
|
||||
|
||||
#unroll for j in 0..<2 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] > 0 {
|
||||
#unroll for j in 0..<2 {
|
||||
if m_vec[1-j] > 0 {
|
||||
sel := simd.select(c_vecs[1-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + (1-j) * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s_vecs: [4]simd.u8x16 = ---
|
||||
c_vecs: [4]simd.u8x16 = ---
|
||||
m_vecs: [4]u8 = ---
|
||||
|
||||
// Scan 64-byte chunks, using 128-bit SIMD.
|
||||
for i >= 4 * SIMD_REG_SIZE_128 {
|
||||
i -= 4 * SIMD_REG_SIZE_128
|
||||
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
|
||||
m_vecs[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vecs[3-j] > 0 {
|
||||
sel := simd.select(c_vecs[3-j], SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + (3-j) * SIMD_REG_SIZE_128 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the remaining SIMD register sized chunks.
|
||||
//
|
||||
// Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
|
||||
// on potato targets. Scanning more at a time when LLVM is emulating SIMD
|
||||
// likely does not buy much, as all that does is increase GP register
|
||||
// pressure.
|
||||
for i >= SIMD_REG_SIZE_128 {
|
||||
i -= SIMD_REG_SIZE_128
|
||||
|
||||
s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
|
||||
c0 := simd.lanes_eq(s0, c_vec)
|
||||
if simd.reduce_or(c0) > 0 {
|
||||
sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + int(off)
|
||||
}
|
||||
}
|
||||
|
||||
// Scan serially for the remainder.
|
||||
for i > 0 {
|
||||
i -= 1
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
|
||||
|
||||
@private PRIME_RABIN_KARP :: 16777619
|
||||
|
||||
index :: proc(s, substr: []byte) -> int {
|
||||
|
||||
@@ -9,10 +9,11 @@ Reader :: struct {
|
||||
prev_rune: int, // previous reading index of rune or < 0
|
||||
}
|
||||
|
||||
reader_init :: proc(r: ^Reader, s: []byte) {
|
||||
reader_init :: proc(r: ^Reader, s: []byte) -> io.Stream {
|
||||
r.s = s
|
||||
r.i = 0
|
||||
r.prev_rune = -1
|
||||
return reader_to_stream(r)
|
||||
}
|
||||
|
||||
reader_to_stream :: proc(r: ^Reader) -> (s: io.Stream) {
|
||||
@@ -33,6 +34,9 @@ reader_size :: proc(r: ^Reader) -> i64 {
|
||||
}
|
||||
|
||||
reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if r.i >= i64(len(r.s)) {
|
||||
return 0, .EOF
|
||||
}
|
||||
@@ -42,6 +46,9 @@ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
|
||||
return
|
||||
}
|
||||
reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if off < 0 {
|
||||
return 0, .Invalid_Offset
|
||||
}
|
||||
@@ -97,7 +104,6 @@ reader_unread_rune :: proc(r: ^Reader) -> io.Error {
|
||||
return nil
|
||||
}
|
||||
reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
|
||||
r.prev_rune = -1
|
||||
abs: i64
|
||||
switch whence {
|
||||
case .Start:
|
||||
@@ -114,6 +120,7 @@ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.E
|
||||
return 0, .Invalid_Offset
|
||||
}
|
||||
r.i = abs
|
||||
r.prev_rune = -1
|
||||
return abs, nil
|
||||
}
|
||||
reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
|
||||
|
||||
@@ -47,8 +47,8 @@ foreign libc {
|
||||
clogf :: proc(z: complex_float) -> complex_float ---
|
||||
|
||||
// 7.3.8 Power and absolute-value functions
|
||||
cabs :: proc(z: complex_double) -> complex_double ---
|
||||
cabsf :: proc(z: complex_float) -> complex_float ---
|
||||
cabs :: proc(z: complex_double) -> double ---
|
||||
cabsf :: proc(z: complex_float) -> float ---
|
||||
cpow :: proc(x, y: complex_double) -> complex_double ---
|
||||
cpowf :: proc(x, y: complex_float) -> complex_float ---
|
||||
csqrt :: proc(z: complex_double) -> complex_double ---
|
||||
|
||||
@@ -102,6 +102,6 @@ when ODIN_OS == .Haiku {
|
||||
// read the value, or to produce an lvalue such that you can assign a different
|
||||
// error value to errno. To work around this, just expose it as a function like
|
||||
// it actually is.
|
||||
errno :: #force_inline proc() -> ^int {
|
||||
errno :: #force_inline proc "contextless" () -> ^int {
|
||||
return _get_errno()
|
||||
}
|
||||
|
||||
+12
-7
@@ -32,24 +32,21 @@ when ODIN_OS == .Windows {
|
||||
// the RDX register will contain zero and correctly set the flag to disable
|
||||
// stack unwinding.
|
||||
@(link_name="_setjmp")
|
||||
setjmp :: proc(env: ^jmp_buf, hack: rawptr = nil) -> int ---
|
||||
setjmp :: proc(env: ^jmp_buf, hack: rawptr = nil) -> int ---
|
||||
}
|
||||
} else {
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
// 7.13.1 Save calling environment
|
||||
//
|
||||
// NOTE(dweiler): C11 requires setjmp be a macro, which means it won't
|
||||
// necessarily export a symbol named setjmp but rather _setjmp in the case
|
||||
// of musl, glibc, BSD libc, and msvcrt.
|
||||
@(link_name="_setjmp")
|
||||
setjmp :: proc(env: ^jmp_buf) -> int ---
|
||||
@(link_name=LSETJMP)
|
||||
setjmp :: proc(env: ^jmp_buf) -> int ---
|
||||
}
|
||||
}
|
||||
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
// 7.13.2 Restore calling environment
|
||||
@(link_name=LLONGJMP)
|
||||
longjmp :: proc(env: ^jmp_buf, val: int) -> ! ---
|
||||
}
|
||||
|
||||
@@ -64,3 +61,11 @@ foreign libc {
|
||||
// The choice of 4096 bytes for storage of this type is more than enough on all
|
||||
// relevant platforms.
|
||||
jmp_buf :: struct #align(16) { _: [4096]char, }
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
@(private) LSETJMP :: "__setjmp14"
|
||||
@(private) LLONGJMP :: "__longjmp14"
|
||||
} else {
|
||||
@(private) LSETJMP :: "setjmp"
|
||||
@(private) LLONGJMP :: "longjmp"
|
||||
}
|
||||
|
||||
+36
-9
@@ -17,6 +17,12 @@ when ODIN_OS == .Windows {
|
||||
|
||||
FILE :: struct {}
|
||||
|
||||
Whence :: enum int {
|
||||
SET = SEEK_SET,
|
||||
CUR = SEEK_CUR,
|
||||
END = SEEK_END,
|
||||
}
|
||||
|
||||
// MSVCRT compatible.
|
||||
when ODIN_OS == .Windows {
|
||||
_IOFBF :: 0x0000
|
||||
@@ -101,6 +107,8 @@ when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
|
||||
SEEK_CUR :: 1
|
||||
SEEK_END :: 2
|
||||
|
||||
TMP_MAX :: 308915776
|
||||
|
||||
foreign libc {
|
||||
__sF: [3]FILE
|
||||
}
|
||||
@@ -128,6 +136,8 @@ when ODIN_OS == .FreeBSD {
|
||||
SEEK_CUR :: 1
|
||||
SEEK_END :: 2
|
||||
|
||||
TMP_MAX :: 308915776
|
||||
|
||||
foreign libc {
|
||||
@(link_name="__stderrp") stderr: ^FILE
|
||||
@(link_name="__stdinp") stdin: ^FILE
|
||||
@@ -195,10 +205,21 @@ when ODIN_OS == .Haiku {
|
||||
}
|
||||
}
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
@(private) LRENAME :: "__posix_rename"
|
||||
@(private) LFGETPOS :: "__fgetpos50"
|
||||
@(private) LFSETPOS :: "__fsetpos50"
|
||||
} else {
|
||||
@(private) LRENAME :: "rename"
|
||||
@(private) LFGETPOS :: "fgetpos"
|
||||
@(private) LFSETPOS :: "fsetpos"
|
||||
}
|
||||
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
// 7.21.4 Operations on files
|
||||
remove :: proc(filename: cstring) -> int ---
|
||||
@(link_name=LRENAME)
|
||||
rename :: proc(old, new: cstring) -> int ---
|
||||
tmpfile :: proc() -> ^FILE ---
|
||||
tmpnam :: proc(s: [^]char) -> [^]char ---
|
||||
@@ -240,8 +261,10 @@ foreign libc {
|
||||
fwrite :: proc(ptr: rawptr, size: size_t, nmemb: size_t, stream: ^FILE) -> size_t ---
|
||||
|
||||
// 7.21.9 File positioning functions
|
||||
@(link_name=LFGETPOS)
|
||||
fgetpos :: proc(stream: ^FILE, pos: ^fpos_t) -> int ---
|
||||
fseek :: proc(stream: ^FILE, offset: long, whence: int) -> int ---
|
||||
fseek :: proc(stream: ^FILE, offset: long, whence: Whence) -> int ---
|
||||
@(link_name=LFSETPOS)
|
||||
fsetpos :: proc(stream: ^FILE, pos: ^fpos_t) -> int ---
|
||||
ftell :: proc(stream: ^FILE) -> long ---
|
||||
rewind :: proc(stream: ^FILE) ---
|
||||
@@ -288,11 +311,11 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
if fseek(file, long(offset), SEEK_SET) != 0 {
|
||||
if fseek(file, long(offset), .SET) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
defer fseek(file, long(curr), SEEK_SET)
|
||||
defer fseek(file, long(curr), .SET)
|
||||
|
||||
n = i64(fread(raw_data(p), size_of(byte), len(p), file))
|
||||
if n == 0 { err = unknown_or_eof(file) }
|
||||
@@ -307,17 +330,21 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
if fseek(file, long(offset), SEEK_SET) != 0 {
|
||||
if fseek(file, long(offset), .SET) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
defer fseek(file, long(curr), SEEK_SET)
|
||||
defer fseek(file, long(curr), .SET)
|
||||
|
||||
n = i64(fwrite(raw_data(p), size_of(byte), len(p), file))
|
||||
if n == 0 { err = unknown_or_eof(file) }
|
||||
|
||||
case .Seek:
|
||||
if fseek(file, long(offset), int(whence)) != 0 {
|
||||
#assert(int(Whence.SET) == int(io.Seek_From.Start))
|
||||
#assert(int(Whence.CUR) == int(io.Seek_From.Current))
|
||||
#assert(int(Whence.END) == int(io.Seek_From.End))
|
||||
|
||||
if fseek(file, long(offset), Whence(whence)) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
@@ -326,9 +353,9 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
if curr == -1 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
defer fseek(file, curr, SEEK_SET)
|
||||
defer fseek(file, curr, .SET)
|
||||
|
||||
if fseek(file, 0, SEEK_END) != 0 {
|
||||
if fseek(file, 0, .END) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
@@ -341,7 +368,7 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
return 0, .Empty
|
||||
|
||||
case .Query:
|
||||
return io.query_utility({ .Close, .Flush, .Read, .Read_At, .Write, .Write_At, .Seek, .Size })
|
||||
return io.query_utility({ .Close, .Flush, .Read, .Read_At, .Write, .Write_At, .Seek, .Size, .Query })
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
+17
-4
@@ -40,10 +40,9 @@ when ODIN_OS == .Linux {
|
||||
}
|
||||
|
||||
|
||||
when ODIN_OS == .Darwin {
|
||||
when ODIN_OS == .Darwin || ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD {
|
||||
RAND_MAX :: 0x7fffffff
|
||||
|
||||
// GLIBC and MUSL only
|
||||
@(private="file")
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
@@ -55,6 +54,20 @@ when ODIN_OS == .Darwin {
|
||||
}
|
||||
}
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
RAND_MAX :: 0x7fffffff
|
||||
|
||||
@(private="file")
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
__mb_cur_max: size_t
|
||||
}
|
||||
|
||||
MB_CUR_MAX :: #force_inline proc() -> size_t {
|
||||
return __mb_cur_max
|
||||
}
|
||||
}
|
||||
|
||||
// C does not declare what these values should be, as an implementation is free
|
||||
// to use any two distinct values it wants to indicate success or failure.
|
||||
// However, nobody actually does and everyone appears to have agreed upon these
|
||||
@@ -99,7 +112,7 @@ foreign libc {
|
||||
at_quick_exit :: proc(func: proc "c" ()) -> int ---
|
||||
exit :: proc(status: int) -> ! ---
|
||||
_Exit :: proc(status: int) -> ! ---
|
||||
getenv :: proc(name: cstring) -> [^]char ---
|
||||
getenv :: proc(name: cstring) -> cstring ---
|
||||
quick_exit :: proc(status: int) -> ! ---
|
||||
system :: proc(cmd: cstring) -> int ---
|
||||
|
||||
@@ -150,4 +163,4 @@ aligned_free :: #force_inline proc "c" (ptr: rawptr) {
|
||||
} else {
|
||||
free(ptr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ foreign libc {
|
||||
strtok :: proc(s1: [^]char, s2: cstring) -> [^]char ---
|
||||
|
||||
// 7.24.6 Miscellaneous functions
|
||||
strerror :: proc(errnum: int) -> [^]char ---
|
||||
strerror :: proc(errnum: int) -> cstring ---
|
||||
strlen :: proc(s: cstring) -> size_t ---
|
||||
}
|
||||
memset :: proc "c" (s: rawptr, c: int, n: size_t) -> rawptr {
|
||||
|
||||
+29
-3
@@ -50,30 +50,56 @@ when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS =
|
||||
foreign libc {
|
||||
// 7.27.2 Time manipulation functions
|
||||
clock :: proc() -> clock_t ---
|
||||
@(link_name=LDIFFTIME)
|
||||
difftime :: proc(time1, time2: time_t) -> double ---
|
||||
@(link_name=LMKTIME)
|
||||
mktime :: proc(timeptr: ^tm) -> time_t ---
|
||||
@(link_name=LTIME)
|
||||
time :: proc(timer: ^time_t) -> time_t ---
|
||||
timespec_get :: proc(ts: ^timespec, base: int) -> int ---
|
||||
|
||||
// 7.27.3 Time conversion functions
|
||||
asctime :: proc(timeptr: ^tm) -> [^]char ---
|
||||
@(link_name=LCTIME)
|
||||
ctime :: proc(timer: ^time_t) -> [^]char ---
|
||||
@(link_name=LGMTIME)
|
||||
gmtime :: proc(timer: ^time_t) -> ^tm ---
|
||||
@(link_name=LLOCALTIME)
|
||||
localtime :: proc(timer: ^time_t) -> ^tm ---
|
||||
strftime :: proc(s: [^]char, maxsize: size_t, format: cstring, timeptr: ^tm) -> size_t ---
|
||||
}
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
@(private) LDIFFTIME :: "__difftime50"
|
||||
@(private) LMKTIME :: "__mktime50"
|
||||
@(private) LTIME :: "__time50"
|
||||
@(private) LCTIME :: "__ctime50"
|
||||
@(private) LGMTIME :: "__gmtime50"
|
||||
@(private) LLOCALTIME :: "__localtime50"
|
||||
} else {
|
||||
@(private) LDIFFTIME :: "difftime"
|
||||
@(private) LMKTIME :: "mktime"
|
||||
@(private) LTIME :: "time"
|
||||
@(private) LCTIME :: "ctime"
|
||||
@(private) LGMTIME :: "gmtime"
|
||||
@(private) LLOCALTIME :: "localtime"
|
||||
}
|
||||
|
||||
when ODIN_OS == .OpenBSD {
|
||||
CLOCKS_PER_SEC :: 100
|
||||
} else {
|
||||
CLOCKS_PER_SEC :: 1000000
|
||||
}
|
||||
|
||||
TIME_UTC :: 1
|
||||
TIME_UTC :: 1
|
||||
|
||||
time_t :: distinct i64
|
||||
time_t :: distinct i64
|
||||
|
||||
clock_t :: long
|
||||
when ODIN_OS == .FreeBSD || ODIN_OS == .NetBSD {
|
||||
clock_t :: distinct int32_t
|
||||
} else {
|
||||
clock_t :: distinct long
|
||||
}
|
||||
|
||||
timespec :: struct {
|
||||
tv_sec: time_t,
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
||||
Made available under Odin's BSD-3 license.
|
||||
|
||||
List of contributors:
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
Ginger Bill: Cosmetic changes.
|
||||
|
||||
A small GZIP implementation as an example.
|
||||
*/
|
||||
|
||||
/*
|
||||
Example:
|
||||
import "core:bytes"
|
||||
import "core:os"
|
||||
import "core:compress"
|
||||
import "core:fmt"
|
||||
|
||||
// Small GZIP file with fextra, fname and fcomment present.
|
||||
@private
|
||||
TEST: []u8 = {
|
||||
0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a,
|
||||
0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00,
|
||||
0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e,
|
||||
0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73,
|
||||
0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f,
|
||||
0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48,
|
||||
0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15,
|
||||
0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00,
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
// Set up output buffer.
|
||||
buf := bytes.Buffer{}
|
||||
|
||||
stdout :: proc(s: string) {
|
||||
os.write_string(os.stdout, s)
|
||||
}
|
||||
stderr :: proc(s: string) {
|
||||
os.write_string(os.stderr, s)
|
||||
}
|
||||
|
||||
args := os.args
|
||||
|
||||
if len(args) < 2 {
|
||||
stderr("No input file specified.\n")
|
||||
err := load(data=TEST, buf=&buf, known_gzip_size=len(TEST))
|
||||
if err == nil {
|
||||
stdout("Displaying test vector: ")
|
||||
stdout(bytes.buffer_to_string(&buf))
|
||||
stdout("\n")
|
||||
} else {
|
||||
fmt.printf("gzip.load returned %v\n", err)
|
||||
}
|
||||
bytes.buffer_destroy(&buf)
|
||||
os.exit(0)
|
||||
}
|
||||
|
||||
// The rest are all files.
|
||||
args = args[1:]
|
||||
err: Error
|
||||
|
||||
for file in args {
|
||||
if file == "-" {
|
||||
// Read from stdin
|
||||
s := os.stream_from_handle(os.stdin)
|
||||
ctx := &compress.Context_Stream_Input{
|
||||
input = s,
|
||||
}
|
||||
err = load(ctx, &buf)
|
||||
} else {
|
||||
err = load(file, &buf)
|
||||
}
|
||||
if err != nil {
|
||||
if err != E_General.File_Not_Found {
|
||||
stderr("File not found: ")
|
||||
stderr(file)
|
||||
stderr("\n")
|
||||
os.exit(1)
|
||||
}
|
||||
stderr("GZIP returned an error.\n")
|
||||
bytes.buffer_destroy(&buf)
|
||||
os.exit(2)
|
||||
}
|
||||
stdout(bytes.buffer_to_string(&buf))
|
||||
}
|
||||
bytes.buffer_destroy(&buf)
|
||||
}
|
||||
*/
|
||||
package compress_gzip
|
||||
@@ -1,89 +0,0 @@
|
||||
//+build ignore
|
||||
package compress_gzip
|
||||
|
||||
/*
|
||||
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
||||
Made available under Odin's BSD-3 license.
|
||||
|
||||
List of contributors:
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
Ginger Bill: Cosmetic changes.
|
||||
|
||||
A small GZIP implementation as an example.
|
||||
*/
|
||||
|
||||
import "core:bytes"
|
||||
import "core:os"
|
||||
import "core:compress"
|
||||
import "core:fmt"
|
||||
|
||||
// Small GZIP file with fextra, fname and fcomment present.
|
||||
@private
|
||||
TEST: []u8 = {
|
||||
0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a,
|
||||
0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00,
|
||||
0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e,
|
||||
0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73,
|
||||
0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f,
|
||||
0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48,
|
||||
0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15,
|
||||
0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00,
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
// Set up output buffer.
|
||||
buf := bytes.Buffer{}
|
||||
|
||||
stdout :: proc(s: string) {
|
||||
os.write_string(os.stdout, s)
|
||||
}
|
||||
stderr :: proc(s: string) {
|
||||
os.write_string(os.stderr, s)
|
||||
}
|
||||
|
||||
args := os.args
|
||||
|
||||
if len(args) < 2 {
|
||||
stderr("No input file specified.\n")
|
||||
err := load(data=TEST, buf=&buf, known_gzip_size=len(TEST))
|
||||
if err == nil {
|
||||
stdout("Displaying test vector: ")
|
||||
stdout(bytes.buffer_to_string(&buf))
|
||||
stdout("\n")
|
||||
} else {
|
||||
fmt.printf("gzip.load returned %v\n", err)
|
||||
}
|
||||
bytes.buffer_destroy(&buf)
|
||||
os.exit(0)
|
||||
}
|
||||
|
||||
// The rest are all files.
|
||||
args = args[1:]
|
||||
err: Error
|
||||
|
||||
for file in args {
|
||||
if file == "-" {
|
||||
// Read from stdin
|
||||
s := os.stream_from_handle(os.stdin)
|
||||
ctx := &compress.Context_Stream_Input{
|
||||
input = s,
|
||||
}
|
||||
err = load(ctx, &buf)
|
||||
} else {
|
||||
err = load(file, &buf)
|
||||
}
|
||||
if err != nil {
|
||||
if err != E_General.File_Not_Found {
|
||||
stderr("File not found: ")
|
||||
stderr(file)
|
||||
stderr("\n")
|
||||
os.exit(1)
|
||||
}
|
||||
stderr("GZIP returned an error.\n")
|
||||
bytes.buffer_destroy(&buf)
|
||||
os.exit(2)
|
||||
}
|
||||
stdout(bytes.buffer_to_string(&buf))
|
||||
}
|
||||
bytes.buffer_destroy(&buf)
|
||||
}
|
||||
@@ -4,7 +4,6 @@
|
||||
which is an English word model.
|
||||
*/
|
||||
|
||||
// package shoco is an implementation of the shoco short string compressor
|
||||
package compress_shoco
|
||||
|
||||
DEFAULT_MODEL :: Shoco_Model {
|
||||
@@ -145,4 +144,4 @@ DEFAULT_MODEL :: Shoco_Model {
|
||||
{ 0xc0000000, 2, 4, { 25, 22, 19, 16, 16, 16, 16, 16 }, { 15, 7, 7, 7, 0, 0, 0, 0 }, 0xe0, 0xc0 },
|
||||
{ 0xe0000000, 4, 8, { 23, 19, 15, 11, 8, 5, 2, 0 }, { 31, 15, 15, 15, 7, 7, 7, 3 }, 0xf0, 0xe0 },
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
An implementation of [shoco](https://github.com/Ed-von-Schleck/shoco) by Christian Schramm.
|
||||
*/
|
||||
|
||||
// package shoco is an implementation of the shoco short string compressor
|
||||
// package shoco is an implementation of the shoco short string compressor.
|
||||
package compress_shoco
|
||||
|
||||
import "base:intrinsics"
|
||||
@@ -308,4 +308,4 @@ compress_string :: proc(input: string, model := DEFAULT_MODEL, allocator := cont
|
||||
resize(&buf, length) or_return
|
||||
return buf[:length], result
|
||||
}
|
||||
compress :: proc{compress_string_to_buffer, compress_string}
|
||||
compress :: proc{compress_string_to_buffer, compress_string}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
||||
Made available under Odin's BSD-3 license.
|
||||
|
||||
List of contributors:
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
|
||||
An example of how to use `zlib.inflate`.
|
||||
*/
|
||||
|
||||
/*
|
||||
Example:
|
||||
package main
|
||||
|
||||
import "core:bytes"
|
||||
import "core:fmt"
|
||||
|
||||
main :: proc() {
|
||||
ODIN_DEMO := []u8{
|
||||
120, 218, 101, 144, 65, 110, 131, 48, 16, 69, 215, 246, 41, 190, 44, 69, 73, 32, 148, 182,
|
||||
75, 75, 28, 32, 251, 46, 217, 88, 238, 0, 86, 192, 32, 219, 36, 170, 170, 172, 122, 137,
|
||||
238, 122, 197, 30, 161, 70, 162, 20, 81, 203, 139, 25, 191, 255, 191, 60, 51, 40, 125, 81,
|
||||
53, 33, 144, 15, 156, 155, 110, 232, 93, 128, 208, 189, 35, 89, 117, 65, 112, 222, 41, 99,
|
||||
33, 37, 6, 215, 235, 195, 17, 239, 156, 197, 170, 118, 170, 131, 44, 32, 82, 164, 72, 240,
|
||||
253, 245, 249, 129, 12, 185, 224, 76, 105, 61, 118, 99, 171, 66, 239, 38, 193, 35, 103, 85,
|
||||
172, 66, 127, 33, 139, 24, 244, 235, 141, 49, 204, 223, 76, 208, 205, 204, 166, 7, 173, 60,
|
||||
97, 159, 238, 37, 214, 41, 105, 129, 167, 5, 102, 27, 152, 173, 97, 178, 129, 73, 129, 231,
|
||||
5, 230, 27, 152, 175, 225, 52, 192, 127, 243, 170, 157, 149, 18, 121, 142, 115, 109, 227, 122,
|
||||
64, 87, 114, 111, 161, 49, 182, 6, 181, 158, 162, 226, 206, 167, 27, 215, 246, 48, 56, 99,
|
||||
67, 117, 16, 47, 13, 45, 35, 151, 98, 231, 75, 1, 173, 90, 61, 101, 146, 71, 136, 244,
|
||||
170, 218, 145, 176, 123, 45, 173, 56, 113, 134, 191, 51, 219, 78, 235, 95, 28, 249, 253, 7,
|
||||
159, 150, 133, 125,
|
||||
}
|
||||
OUTPUT_SIZE :: 432
|
||||
|
||||
buf: bytes.Buffer
|
||||
|
||||
// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
|
||||
err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE)
|
||||
defer bytes.buffer_destroy(&buf)
|
||||
|
||||
if err != nil {
|
||||
fmt.printf("\nError: %v\n", err)
|
||||
}
|
||||
s := bytes.buffer_to_string(&buf)
|
||||
fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s)
|
||||
assert(len(s) == OUTPUT_SIZE)
|
||||
}
|
||||
*/
|
||||
package compress_zlib
|
||||
@@ -1,47 +0,0 @@
|
||||
//+build ignore
|
||||
package compress_zlib
|
||||
|
||||
/*
|
||||
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
||||
Made available under Odin's BSD-3 license.
|
||||
|
||||
List of contributors:
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
|
||||
An example of how to use `zlib.inflate`.
|
||||
*/
|
||||
|
||||
import "core:bytes"
|
||||
import "core:fmt"
|
||||
|
||||
main :: proc() {
|
||||
ODIN_DEMO := []u8{
|
||||
120, 218, 101, 144, 65, 110, 131, 48, 16, 69, 215, 246, 41, 190, 44, 69, 73, 32, 148, 182,
|
||||
75, 75, 28, 32, 251, 46, 217, 88, 238, 0, 86, 192, 32, 219, 36, 170, 170, 172, 122, 137,
|
||||
238, 122, 197, 30, 161, 70, 162, 20, 81, 203, 139, 25, 191, 255, 191, 60, 51, 40, 125, 81,
|
||||
53, 33, 144, 15, 156, 155, 110, 232, 93, 128, 208, 189, 35, 89, 117, 65, 112, 222, 41, 99,
|
||||
33, 37, 6, 215, 235, 195, 17, 239, 156, 197, 170, 118, 170, 131, 44, 32, 82, 164, 72, 240,
|
||||
253, 245, 249, 129, 12, 185, 224, 76, 105, 61, 118, 99, 171, 66, 239, 38, 193, 35, 103, 85,
|
||||
172, 66, 127, 33, 139, 24, 244, 235, 141, 49, 204, 223, 76, 208, 205, 204, 166, 7, 173, 60,
|
||||
97, 159, 238, 37, 214, 41, 105, 129, 167, 5, 102, 27, 152, 173, 97, 178, 129, 73, 129, 231,
|
||||
5, 230, 27, 152, 175, 225, 52, 192, 127, 243, 170, 157, 149, 18, 121, 142, 115, 109, 227, 122,
|
||||
64, 87, 114, 111, 161, 49, 182, 6, 181, 158, 162, 226, 206, 167, 27, 215, 246, 48, 56, 99,
|
||||
67, 117, 16, 47, 13, 45, 35, 151, 98, 231, 75, 1, 173, 90, 61, 101, 146, 71, 136, 244,
|
||||
170, 218, 145, 176, 123, 45, 173, 56, 113, 134, 191, 51, 219, 78, 235, 95, 28, 249, 253, 7,
|
||||
159, 150, 133, 125,
|
||||
}
|
||||
OUTPUT_SIZE :: 432
|
||||
|
||||
buf: bytes.Buffer
|
||||
|
||||
// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
|
||||
err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE)
|
||||
defer bytes.buffer_destroy(&buf)
|
||||
|
||||
if err != nil {
|
||||
fmt.printf("\nError: %v\n", err)
|
||||
}
|
||||
s := bytes.buffer_to_string(&buf)
|
||||
fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s)
|
||||
assert(len(s) == OUTPUT_SIZE)
|
||||
}
|
||||
@@ -12,6 +12,7 @@ package compress_zlib
|
||||
|
||||
import "core:compress"
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:mem"
|
||||
import "core:io"
|
||||
import "core:hash"
|
||||
@@ -123,13 +124,7 @@ Huffman_Table :: struct {
|
||||
@(optimization_mode="favor_size")
|
||||
z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
|
||||
assert(bits <= 16)
|
||||
// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
|
||||
// by reversing all of the bits and masking out the unneeded ones.
|
||||
r = n
|
||||
r = ((r & 0xAAAA) >> 1) | ((r & 0x5555) << 1)
|
||||
r = ((r & 0xCCCC) >> 2) | ((r & 0x3333) << 2)
|
||||
r = ((r & 0xF0F0) >> 4) | ((r & 0x0F0F) << 4)
|
||||
r = ((r & 0xFF00) >> 8) | ((r & 0x00FF) << 8)
|
||||
r = intrinsics.reverse_bits(n)
|
||||
|
||||
r >>= (16 - bits)
|
||||
return
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package container_dynamic_bit_array
|
||||
|
||||
import "base:builtin"
|
||||
import "base:intrinsics"
|
||||
import "core:mem"
|
||||
|
||||
@@ -18,7 +19,7 @@ NUM_BITS :: 64
|
||||
Bit_Array :: struct {
|
||||
bits: [dynamic]u64,
|
||||
bias: int,
|
||||
max_index: int,
|
||||
length: int,
|
||||
free_pointer: bool,
|
||||
}
|
||||
|
||||
@@ -52,9 +53,9 @@ Returns:
|
||||
*/
|
||||
iterate_by_all :: proc (it: ^Bit_Array_Iterator) -> (set: bool, index: int, ok: bool) {
|
||||
index = it.word_idx * NUM_BITS + int(it.bit_idx) + it.array.bias
|
||||
if index > it.array.max_index { return false, 0, false }
|
||||
if index >= it.array.length + it.array.bias { return false, 0, false }
|
||||
|
||||
word := it.array.bits[it.word_idx] if len(it.array.bits) > it.word_idx else 0
|
||||
word := it.array.bits[it.word_idx] if builtin.len(it.array.bits) > it.word_idx else 0
|
||||
set = (word >> it.bit_idx & 1) == 1
|
||||
|
||||
it.bit_idx += 1
|
||||
@@ -106,22 +107,22 @@ Returns:
|
||||
*/
|
||||
@(private="file")
|
||||
iterate_internal_ :: proc (it: ^Bit_Array_Iterator, $ITERATE_SET_BITS: bool) -> (index: int, ok: bool) {
|
||||
word := it.array.bits[it.word_idx] if len(it.array.bits) > it.word_idx else 0
|
||||
word := it.array.bits[it.word_idx] if builtin.len(it.array.bits) > it.word_idx else 0
|
||||
when ! ITERATE_SET_BITS { word = ~word }
|
||||
|
||||
// If the word is empty or we have already gone over all the bits in it,
|
||||
// b.bit_idx is greater than the index of any set bit in the word,
|
||||
// meaning that word >> b.bit_idx == 0.
|
||||
for it.word_idx < len(it.array.bits) && word >> it.bit_idx == 0 {
|
||||
for it.word_idx < builtin.len(it.array.bits) && word >> it.bit_idx == 0 {
|
||||
it.word_idx += 1
|
||||
it.bit_idx = 0
|
||||
word = it.array.bits[it.word_idx] if len(it.array.bits) > it.word_idx else 0
|
||||
word = it.array.bits[it.word_idx] if builtin.len(it.array.bits) > it.word_idx else 0
|
||||
when ! ITERATE_SET_BITS { word = ~word }
|
||||
}
|
||||
|
||||
// If we are iterating the set bits, reaching the end of the array means we have no more bits to check
|
||||
when ITERATE_SET_BITS {
|
||||
if it.word_idx >= len(it.array.bits) {
|
||||
if it.word_idx >= builtin.len(it.array.bits) {
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
@@ -135,7 +136,7 @@ iterate_internal_ :: proc (it: ^Bit_Array_Iterator, $ITERATE_SET_BITS: bool) ->
|
||||
it.bit_idx = 0
|
||||
it.word_idx += 1
|
||||
}
|
||||
return index, index <= it.array.max_index
|
||||
return index, index < it.array.length + it.array.bias
|
||||
}
|
||||
/*
|
||||
Gets the state of a bit in the bit-array
|
||||
@@ -160,7 +161,7 @@ get :: proc(ba: ^Bit_Array, #any_int index: uint) -> (res: bool, ok: bool) #opti
|
||||
If we `get` a bit that doesn't fit in the Bit Array, it's naturally `false`.
|
||||
This early-out prevents unnecessary resizing.
|
||||
*/
|
||||
if leg_index + 1 > len(ba.bits) { return false, true }
|
||||
if leg_index + 1 > builtin.len(ba.bits) { return false, true }
|
||||
|
||||
val := u64(1 << uint(bit_index))
|
||||
res = ba.bits[leg_index] & val == val
|
||||
@@ -208,7 +209,7 @@ set :: proc(ba: ^Bit_Array, #any_int index: uint, set_to: bool = true, allocator
|
||||
|
||||
resize_if_needed(ba, leg_index) or_return
|
||||
|
||||
ba.max_index = max(idx, ba.max_index)
|
||||
ba.length = max(1 + idx, ba.length)
|
||||
|
||||
if set_to {
|
||||
ba.bits[leg_index] |= 1 << uint(bit_index)
|
||||
@@ -261,6 +262,9 @@ unsafe_unset :: proc(b: ^Bit_Array, bit: int) #no_bounds_check {
|
||||
/*
|
||||
A helper function to create a Bit Array with optional bias, in case your smallest index is non-zero (including negative).
|
||||
|
||||
The range of bits created by this procedure is `min_index..<max_index`, and the
|
||||
array will be able to expand beyond `max_index` if needed.
|
||||
|
||||
*Allocates (`new(Bit_Array) & make(ba.bits)`)*
|
||||
|
||||
Inputs:
|
||||
@@ -275,7 +279,7 @@ create :: proc(max_index: int, min_index: int = 0, allocator := context.allocato
|
||||
context.allocator = allocator
|
||||
size_in_bits := max_index - min_index
|
||||
|
||||
if size_in_bits < 1 { return {}, false }
|
||||
if size_in_bits < 0 { return {}, false }
|
||||
|
||||
legs := size_in_bits >> INDEX_SHIFT
|
||||
if size_in_bits & INDEX_MASK > 0 {legs+=1}
|
||||
@@ -284,7 +288,7 @@ create :: proc(max_index: int, min_index: int = 0, allocator := context.allocato
|
||||
res = new(Bit_Array)
|
||||
res.bits = bits
|
||||
res.bias = min_index
|
||||
res.max_index = max_index
|
||||
res.length = max_index - min_index
|
||||
res.free_pointer = true
|
||||
return
|
||||
}
|
||||
@@ -299,6 +303,48 @@ clear :: proc(ba: ^Bit_Array) {
|
||||
mem.zero_slice(ba.bits[:])
|
||||
}
|
||||
/*
|
||||
Gets the length of set and unset valid bits in the Bit_Array.
|
||||
|
||||
Inputs:
|
||||
- ba: The target Bit_Array
|
||||
|
||||
Returns:
|
||||
- length: The length of valid bits.
|
||||
*/
|
||||
len :: proc(ba: ^Bit_Array) -> (length: int) {
|
||||
if ba == nil { return }
|
||||
return ba.length
|
||||
}
|
||||
/*
|
||||
Shrinks the Bit_Array's backing storage to the smallest possible size.
|
||||
|
||||
Inputs:
|
||||
- ba: The target Bit_Array
|
||||
*/
|
||||
shrink :: proc(ba: ^Bit_Array) #no_bounds_check {
|
||||
if ba == nil { return }
|
||||
legs_needed := builtin.len(ba.bits)
|
||||
for i := legs_needed - 1; i >= 0; i -= 1 {
|
||||
if ba.bits[i] == 0 {
|
||||
legs_needed -= 1
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if legs_needed == builtin.len(ba.bits) {
|
||||
return
|
||||
}
|
||||
ba.length = 0
|
||||
if legs_needed > 0 {
|
||||
if legs_needed > 1 {
|
||||
ba.length = (legs_needed - 1) * NUM_BITS
|
||||
}
|
||||
ba.length += NUM_BITS - int(intrinsics.count_leading_zeros(ba.bits[legs_needed - 1]))
|
||||
}
|
||||
resize(&ba.bits, legs_needed)
|
||||
builtin.shrink(&ba.bits)
|
||||
}
|
||||
/*
|
||||
Deallocates the Bit_Array and its backing storage
|
||||
|
||||
Inputs:
|
||||
@@ -321,8 +367,8 @@ resize_if_needed :: proc(ba: ^Bit_Array, legs: int, allocator := context.allocat
|
||||
|
||||
context.allocator = allocator
|
||||
|
||||
if legs + 1 > len(ba.bits) {
|
||||
if legs + 1 > builtin.len(ba.bits) {
|
||||
resize(&ba.bits, legs + 1)
|
||||
}
|
||||
return len(ba.bits) > legs
|
||||
return builtin.len(ba.bits) > legs
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
/*
|
||||
The Bit Array can be used in several ways:
|
||||
|
||||
- By default you don't need to instantiate a Bit Array:
|
||||
|
||||
By default you don't need to instantiate a Bit Array.
|
||||
Example:
|
||||
package test
|
||||
|
||||
import "core:fmt"
|
||||
@@ -22,8 +22,8 @@ The Bit Array can be used in several ways:
|
||||
destroy(&bits)
|
||||
}
|
||||
|
||||
- A Bit Array can optionally allow for negative indices, if the minimum value was given during creation:
|
||||
|
||||
A Bit Array can optionally allow for negative indices, if the minimum value was given during creation.
|
||||
Example:
|
||||
package test
|
||||
|
||||
import "core:fmt"
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
/*
|
||||
Package list implements an intrusive doubly-linked list.
|
||||
|
||||
An intrusive container requires a `Node` to be embedded in your own structure, like this:
|
||||
|
||||
An intrusive container requires a `Node` to be embedded in your own structure, like this.
|
||||
Example:
|
||||
My_String :: struct {
|
||||
node: list.Node,
|
||||
value: string,
|
||||
}
|
||||
|
||||
Embedding the members of a `list.Node` in your structure with the `using` keyword is also allowed:
|
||||
|
||||
Embedding the members of a `list.Node` in your structure with the `using` keyword is also allowed.
|
||||
Example:
|
||||
My_String :: struct {
|
||||
using node: list.Node,
|
||||
value: string,
|
||||
}
|
||||
|
||||
Here is a full example:
|
||||
|
||||
Here is a full example.
|
||||
Example:
|
||||
package test
|
||||
|
||||
import "core:fmt"
|
||||
@@ -42,5 +42,8 @@ Here is a full example:
|
||||
value: string,
|
||||
}
|
||||
|
||||
Output:
|
||||
Hello
|
||||
World
|
||||
*/
|
||||
package container_intrusive_list
|
||||
|
||||
@@ -139,9 +139,13 @@ clear :: proc "contextless" (a: ^$A/Small_Array($N, $T)) {
|
||||
resize(a, 0)
|
||||
}
|
||||
|
||||
push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) {
|
||||
n := copy(a.data[a.len:], items[:])
|
||||
a.len += n
|
||||
push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) -> bool {
|
||||
if a.len + builtin.len(items) <= cap(a^) {
|
||||
n := copy(a.data[a.len:], items[:])
|
||||
a.len += n
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
inject_at :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T, index: int) -> bool #no_bounds_check {
|
||||
|
||||
@@ -80,8 +80,8 @@ ghash :: proc "contextless" (dst, key, data: []byte) {
|
||||
h2 := h0 ~ h1
|
||||
h2r := h0r ~ h1r
|
||||
|
||||
src: []byte
|
||||
for l > 0 {
|
||||
src: []byte = ---
|
||||
if l >= _aes.GHASH_BLOCK_SIZE {
|
||||
src = buf
|
||||
buf = buf[_aes.GHASH_BLOCK_SIZE:]
|
||||
|
||||
@@ -3,7 +3,7 @@ package aes_hw_intel
|
||||
|
||||
import "core:sys/info"
|
||||
|
||||
// is_supporte returns true iff hardware accelerated AES
|
||||
// is_supported returns true iff hardware accelerated AES
|
||||
// is supported.
|
||||
is_supported :: proc "contextless" () -> bool {
|
||||
features, ok := info.cpu_features.?
|
||||
|
||||
@@ -25,7 +25,6 @@ package aes_hw_intel
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:simd"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private = "file")
|
||||
@@ -58,14 +57,11 @@ GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
|
||||
// chunks. We number chunks from 0 to 3 in left to right order.
|
||||
|
||||
@(private = "file")
|
||||
byteswap_index := transmute(x86.__m128i)simd.i8x16{
|
||||
// Note: simd.i8x16 is reverse order from x86._mm_set_epi8.
|
||||
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
|
||||
}
|
||||
_BYTESWAP_INDEX: x86.__m128i : { 0x08090a0b0c0d0e0f, 0x0001020304050607 }
|
||||
|
||||
@(private = "file", require_results, enable_target_feature = "sse2,ssse3")
|
||||
byteswap :: #force_inline proc "contextless" (x: x86.__m128i) -> x86.__m128i {
|
||||
return x86._mm_shuffle_epi8(x, byteswap_index)
|
||||
return x86._mm_shuffle_epi8(x, _BYTESWAP_INDEX)
|
||||
}
|
||||
|
||||
// From a 128-bit value kw, compute kx as the XOR of the two 64-bit
|
||||
@@ -244,8 +240,8 @@ ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
|
||||
}
|
||||
|
||||
// Process 1 block at a time
|
||||
src: []byte
|
||||
for l > 0 {
|
||||
src: []byte = ---
|
||||
if l >= _aes.GHASH_BLOCK_SIZE {
|
||||
src = buf
|
||||
buf = buf[_aes.GHASH_BLOCK_SIZE:]
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
package _chacha20
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
import "core:mem"
|
||||
|
||||
// KEY_SIZE is the (X)ChaCha20 key size in bytes.
|
||||
KEY_SIZE :: 32
|
||||
// IV_SIZE is the ChaCha20 IV size in bytes.
|
||||
IV_SIZE :: 12
|
||||
// XIV_SIZE is the XChaCha20 IV size in bytes.
|
||||
XIV_SIZE :: 24
|
||||
|
||||
// MAX_CTR_IETF is the maximum counter value for the IETF flavor ChaCha20.
|
||||
MAX_CTR_IETF :: 0xffffffff
|
||||
// BLOCK_SIZE is the (X)ChaCha20 block size in bytes.
|
||||
BLOCK_SIZE :: 64
|
||||
// STATE_SIZE_U32 is the (X)ChaCha20 state size in u32s.
|
||||
STATE_SIZE_U32 :: 16
|
||||
// Rounds is the (X)ChaCha20 round count.
|
||||
ROUNDS :: 20
|
||||
|
||||
// SIGMA_0 is sigma[0:4].
|
||||
SIGMA_0: u32 : 0x61707865
|
||||
// SIGMA_1 is sigma[4:8].
|
||||
SIGMA_1: u32 : 0x3320646e
|
||||
// SIGMA_2 is sigma[8:12].
|
||||
SIGMA_2: u32 : 0x79622d32
|
||||
// SIGMA_3 is sigma[12:16].
|
||||
SIGMA_3: u32 : 0x6b206574
|
||||
|
||||
// Context is a ChaCha20 or XChaCha20 instance.
|
||||
Context :: struct {
|
||||
_s: [STATE_SIZE_U32]u32,
|
||||
_buffer: [BLOCK_SIZE]byte,
|
||||
_off: int,
|
||||
_is_ietf_flavor: bool,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
|
||||
// init inititializes a Context for ChaCha20 with the provided key and
|
||||
// iv.
|
||||
//
|
||||
// WARNING: This ONLY handles ChaCha20. XChaCha20 sub-key and IV
|
||||
// derivation is expected to be handled by the caller, so that the
|
||||
// HChaCha call can be suitably accelerated.
|
||||
init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
|
||||
if len(key) != KEY_SIZE || len(iv) != IV_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
k, n := key, iv
|
||||
|
||||
ctx._s[0] = SIGMA_0
|
||||
ctx._s[1] = SIGMA_1
|
||||
ctx._s[2] = SIGMA_2
|
||||
ctx._s[3] = SIGMA_3
|
||||
ctx._s[4] = endian.unchecked_get_u32le(k[0:4])
|
||||
ctx._s[5] = endian.unchecked_get_u32le(k[4:8])
|
||||
ctx._s[6] = endian.unchecked_get_u32le(k[8:12])
|
||||
ctx._s[7] = endian.unchecked_get_u32le(k[12:16])
|
||||
ctx._s[8] = endian.unchecked_get_u32le(k[16:20])
|
||||
ctx._s[9] = endian.unchecked_get_u32le(k[20:24])
|
||||
ctx._s[10] = endian.unchecked_get_u32le(k[24:28])
|
||||
ctx._s[11] = endian.unchecked_get_u32le(k[28:32])
|
||||
ctx._s[12] = 0
|
||||
ctx._s[13] = endian.unchecked_get_u32le(n[0:4])
|
||||
ctx._s[14] = endian.unchecked_get_u32le(n[4:8])
|
||||
ctx._s[15] = endian.unchecked_get_u32le(n[8:12])
|
||||
|
||||
ctx._off = BLOCK_SIZE
|
||||
ctx._is_ietf_flavor = !is_xchacha
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seek seeks the (X)ChaCha20 stream counter to the specified block.
|
||||
seek :: proc(ctx: ^Context, block_nr: u64) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
if ctx._is_ietf_flavor {
|
||||
if block_nr > MAX_CTR_IETF {
|
||||
panic("crypto/chacha20: attempted to seek past maximum counter")
|
||||
}
|
||||
} else {
|
||||
ctx._s[13] = u32(block_nr >> 32)
|
||||
}
|
||||
ctx._s[12] = u32(block_nr)
|
||||
ctx._off = BLOCK_SIZE
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be re-initialized to
|
||||
// be used again.
|
||||
reset :: proc(ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._s, size_of(ctx._s))
|
||||
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
|
||||
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
//
|
||||
// While all modern "standard" definitions of ChaCha20 use
|
||||
// the IETF 32-bit counter, for XChaCha20 most common
|
||||
// implementations allow for a 64-bit counter.
|
||||
//
|
||||
// Honestly, the answer here is "use a MRAE primitive", but
|
||||
// go with "common" practice in the case of XChaCha20.
|
||||
|
||||
ERR_CTR_EXHAUSTED :: "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached"
|
||||
|
||||
if ctx._is_ietf_flavor {
|
||||
if u64(ctx._s[12]) + u64(nr_blocks) > MAX_CTR_IETF {
|
||||
panic(ERR_CTR_EXHAUSTED)
|
||||
}
|
||||
} else {
|
||||
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
|
||||
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
|
||||
panic(ERR_CTR_EXHAUSTED)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,360 @@
|
||||
package chacha20_ref
|
||||
|
||||
import "core:crypto/_chacha20"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
_chacha20.check_counter_limit(ctx, nr_blocks)
|
||||
|
||||
dst, src := dst, src
|
||||
x := &ctx._s
|
||||
for n := 0; n < nr_blocks; n = n + 1 {
|
||||
x0, x1, x2, x3 :=
|
||||
_chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3
|
||||
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 :=
|
||||
x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
// Even when forcing inlining manually inlining all of
|
||||
// these is decently faster.
|
||||
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
x0 += _chacha20.SIGMA_0
|
||||
x1 += _chacha20.SIGMA_1
|
||||
x2 += _chacha20.SIGMA_2
|
||||
x3 += _chacha20.SIGMA_3
|
||||
x4 += x[4]
|
||||
x5 += x[5]
|
||||
x6 += x[6]
|
||||
x7 += x[7]
|
||||
x8 += x[8]
|
||||
x9 += x[9]
|
||||
x10 += x[10]
|
||||
x11 += x[11]
|
||||
x12 += x[12]
|
||||
x13 += x[13]
|
||||
x14 += x[14]
|
||||
x15 += x[15]
|
||||
|
||||
// - The caller(s) ensure that src/dst are valid.
|
||||
// - The compiler knows if the target is picky about alignment.
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
endian.unchecked_put_u32le(dst[0:4], endian.unchecked_get_u32le(src[0:4]) ~ x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], endian.unchecked_get_u32le(src[4:8]) ~ x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], endian.unchecked_get_u32le(src[8:12]) ~ x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], endian.unchecked_get_u32le(src[12:16]) ~ x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], endian.unchecked_get_u32le(src[16:20]) ~ x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], endian.unchecked_get_u32le(src[20:24]) ~ x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], endian.unchecked_get_u32le(src[24:28]) ~ x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], endian.unchecked_get_u32le(src[28:32]) ~ x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], endian.unchecked_get_u32le(src[32:36]) ~ x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], endian.unchecked_get_u32le(src[36:40]) ~ x9)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[40:44],
|
||||
endian.unchecked_get_u32le(src[40:44]) ~ x10,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[44:48],
|
||||
endian.unchecked_get_u32le(src[44:48]) ~ x11,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[48:52],
|
||||
endian.unchecked_get_u32le(src[48:52]) ~ x12,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[52:56],
|
||||
endian.unchecked_get_u32le(src[52:56]) ~ x13,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[56:60],
|
||||
endian.unchecked_get_u32le(src[56:60]) ~ x14,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[60:64],
|
||||
endian.unchecked_get_u32le(src[60:64]) ~ x15,
|
||||
)
|
||||
src = src[_chacha20.BLOCK_SIZE:]
|
||||
} else {
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], x9)
|
||||
endian.unchecked_put_u32le(dst[40:44], x10)
|
||||
endian.unchecked_put_u32le(dst[44:48], x11)
|
||||
endian.unchecked_put_u32le(dst[48:52], x12)
|
||||
endian.unchecked_put_u32le(dst[52:56], x13)
|
||||
endian.unchecked_put_u32le(dst[56:60], x14)
|
||||
endian.unchecked_put_u32le(dst[60:64], x15)
|
||||
}
|
||||
dst = dst[_chacha20.BLOCK_SIZE:]
|
||||
}
|
||||
|
||||
// Increment the counter. Overflow checking is done upon
|
||||
// entry into the routine, so a 64-bit increment safely
|
||||
// covers both cases.
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
|
||||
x[12] = u32(new_ctr)
|
||||
x[13] = u32(new_ctr >> 32)
|
||||
}
|
||||
}
|
||||
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
x0, x1, x2, x3 := _chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3
|
||||
x4 := endian.unchecked_get_u32le(key[0:4])
|
||||
x5 := endian.unchecked_get_u32le(key[4:8])
|
||||
x6 := endian.unchecked_get_u32le(key[8:12])
|
||||
x7 := endian.unchecked_get_u32le(key[12:16])
|
||||
x8 := endian.unchecked_get_u32le(key[16:20])
|
||||
x9 := endian.unchecked_get_u32le(key[20:24])
|
||||
x10 := endian.unchecked_get_u32le(key[24:28])
|
||||
x11 := endian.unchecked_get_u32le(key[28:32])
|
||||
x12 := endian.unchecked_get_u32le(iv[0:4])
|
||||
x13 := endian.unchecked_get_u32le(iv[4:8])
|
||||
x14 := endian.unchecked_get_u32le(iv[8:12])
|
||||
x15 := endian.unchecked_get_u32le(iv[12:16])
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x12)
|
||||
endian.unchecked_put_u32le(dst[20:24], x13)
|
||||
endian.unchecked_put_u32le(dst[24:28], x14)
|
||||
endian.unchecked_put_u32le(dst[28:32], x15)
|
||||
}
|
||||
@@ -0,0 +1,481 @@
|
||||
package chacha20_simd128
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20"
|
||||
import "core:simd"
|
||||
@(require) import "core:sys/info"
|
||||
|
||||
// Portable 128-bit `core:simd` implementation.
|
||||
//
|
||||
// This is loosely based on Ted Krovetz's public domain C intrinsic
|
||||
// implementation.
|
||||
//
|
||||
// This is written to perform adequately on any target that has "enough"
|
||||
// 128-bit vector registers, the current thought is that 4 blocks at at
|
||||
// time is reasonable for amd64, though Ted's code is more conservative.
|
||||
//
|
||||
// See:
|
||||
// supercop-20230530/crypto_stream/chacha20/krovetz/vec128
|
||||
|
||||
// Ensure the compiler emits SIMD instructions. This is a minimum, and
|
||||
// setting the microarchitecture at compile time will allow for better
|
||||
// code gen when applicable (eg: AVX). This is somewhat redundant with
|
||||
// the default microarchitecture configurations.
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: "neon"
|
||||
} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
// Note: LLVM appears to be smart enough to use PSHUFB despite not
|
||||
// explicitly using simd.u8x16 shuffles.
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: "sse2,ssse3"
|
||||
} else {
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: ""
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_ROT_7L: simd.u32x4 : {7, 7, 7, 7}
|
||||
@(private = "file")
|
||||
_ROT_7R: simd.u32x4 : {25, 25, 25, 25}
|
||||
@(private = "file")
|
||||
_ROT_12L: simd.u32x4 : {12, 12, 12, 12}
|
||||
@(private = "file")
|
||||
_ROT_12R: simd.u32x4 : {20, 20, 20, 20}
|
||||
@(private = "file")
|
||||
_ROT_8L: simd.u32x4 : {8, 8, 8, 8}
|
||||
@(private = "file")
|
||||
_ROT_8R: simd.u32x4 : {24, 24, 24, 24}
|
||||
@(private = "file")
|
||||
_ROT_16: simd.u32x4 : {16, 16, 16, 16}
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
@(private = "file")
|
||||
_increment_counter :: #force_inline proc "contextless" (ctx: ^Context) -> simd.u32x4 {
|
||||
// In the Big Endian case, the low and high portions in the vector
|
||||
// are flipped, so the 64-bit addition can't be done with a simple
|
||||
// vector add.
|
||||
x := &ctx._s
|
||||
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
|
||||
x[12] = u32(new_ctr)
|
||||
x[13] = u32(new_ctr >> 32)
|
||||
|
||||
return intrinsics.unaligned_load(transmute(^simd.u32x4)&x[12])
|
||||
}
|
||||
|
||||
// Convert the endian-ness of the components of a u32x4 vector, for
|
||||
// the purposes of output.
|
||||
@(private = "file")
|
||||
_byteswap_u32x4 :: #force_inline proc "contextless" (v: simd.u32x4) -> simd.u32x4 {
|
||||
return(
|
||||
transmute(simd.u32x4)simd.shuffle(
|
||||
transmute(simd.u8x16)v,
|
||||
transmute(simd.u8x16)v,
|
||||
3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12,
|
||||
)
|
||||
)
|
||||
}
|
||||
} else {
|
||||
@(private = "file")
|
||||
_VEC_ONE: simd.u64x2 : {1, 0}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_dq_round_simd128 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3: simd.u32x4,
|
||||
) -> (
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV1(b); c = ROTV2(c); d = ROTV3(d);
|
||||
v1 = simd.shuffle(v1, v1, 1, 2, 3, 0)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1)
|
||||
v3 = simd.shuffle(v3, v3, 3, 0, 1, 2)
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
|
||||
v1 = simd.shuffle(v1, v1, 3, 0, 1, 2)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1)
|
||||
v3 = simd.shuffle(v3, v3, 1, 2, 3, 0)
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_add_state_simd128 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3, s0, s1, s2, s3: simd.u32x4,
|
||||
) -> (
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
v0 = simd.add(v0, s0)
|
||||
v1 = simd.add(v1, s1)
|
||||
v2 = simd.add(v2, s2)
|
||||
v3 = simd.add(v3, s3)
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
v0 = _byteswap_u32x4(v0)
|
||||
v1 = _byteswap_u32x4(v1)
|
||||
v2 = _byteswap_u32x4(v2)
|
||||
v3 = _byteswap_u32x4(v3)
|
||||
}
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_xor_simd128 :: #force_inline proc "contextless" (
|
||||
src: [^]simd.u32x4,
|
||||
v0, v1, v2, v3: simd.u32x4,
|
||||
) -> (
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x4)(src[0:])))
|
||||
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x4)(src[1:])))
|
||||
v2 = simd.bit_xor(v2, intrinsics.unaligned_load((^simd.u32x4)(src[2:])))
|
||||
v3 = simd.bit_xor(v3, intrinsics.unaligned_load((^simd.u32x4)(src[3:])))
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_store_simd128 :: #force_inline proc "contextless" (
|
||||
dst: [^]simd.u32x4,
|
||||
v0, v1, v2, v3: simd.u32x4,
|
||||
) {
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[1:]), v1)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[2:]), v2)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[3:]), v3)
|
||||
}
|
||||
|
||||
// is_performant returns true iff the target and current host both support
|
||||
// "enough" 128-bit SIMD to make this implementation performant.
|
||||
is_performant :: proc "contextless" () -> bool {
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
|
||||
req_features :: info.CPU_Features{.asimd}
|
||||
} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
req_features :: info.CPU_Features{.sse2, .ssse3}
|
||||
}
|
||||
|
||||
features, ok := info.cpu_features.?
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
return features >= req_features
|
||||
} else when ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32 {
|
||||
return intrinsics.has_target_feature("simd128")
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@(enable_target_feature = TARGET_SIMD_FEATURES)
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
_chacha20.check_counter_limit(ctx, nr_blocks)
|
||||
|
||||
dst_v := ([^]simd.u32x4)(raw_data(dst))
|
||||
src_v := ([^]simd.u32x4)(raw_data(src))
|
||||
|
||||
x := &ctx._s
|
||||
n := nr_blocks
|
||||
|
||||
// The state vector is an array of uint32s in native byte-order.
|
||||
x_v := ([^]simd.u32x4)(raw_data(x))
|
||||
s0 := intrinsics.unaligned_load((^simd.u32x4)(x_v[0:]))
|
||||
s1 := intrinsics.unaligned_load((^simd.u32x4)(x_v[1:]))
|
||||
s2 := intrinsics.unaligned_load((^simd.u32x4)(x_v[2:]))
|
||||
s3 := intrinsics.unaligned_load((^simd.u32x4)(x_v[3:]))
|
||||
|
||||
// 8 blocks at a time.
|
||||
//
|
||||
// Note: This is only worth it on Aarch64.
|
||||
when ODIN_ARCH == .arm64 {
|
||||
for ; n >= 8; n = n - 8 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s7 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
|
||||
} else {
|
||||
s7 := _increment_counter(ctx)
|
||||
}
|
||||
v4, v5, v6, v7 := s0, s1, s2, s7
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s11 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s7, _VEC_ONE)
|
||||
} else {
|
||||
s11 := _increment_counter(ctx)
|
||||
}
|
||||
v8, v9, v10, v11 := s0, s1, s2, s11
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s15 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s11, _VEC_ONE)
|
||||
} else {
|
||||
s15 := _increment_counter(ctx)
|
||||
}
|
||||
v12, v13, v14, v15 := s0, s1, s2, s15
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s19 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s15, _VEC_ONE)
|
||||
} else {
|
||||
s19 := _increment_counter(ctx)
|
||||
}
|
||||
|
||||
v16, v17, v18, v19 := s0, s1, s2, s19
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s23 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s19, _VEC_ONE)
|
||||
} else {
|
||||
s23 := _increment_counter(ctx)
|
||||
}
|
||||
|
||||
v20, v21, v22, v23 := s0, s1, s2, s23
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s27 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s23, _VEC_ONE)
|
||||
} else {
|
||||
s27 := _increment_counter(ctx)
|
||||
}
|
||||
|
||||
v24, v25, v26, v27 := s0, s1, s2, s27
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s31 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s27, _VEC_ONE)
|
||||
} else {
|
||||
s31 := _increment_counter(ctx)
|
||||
}
|
||||
v28, v29, v30, v31 := s0, s1, s2, s31
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _dq_round_simd128(v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _dq_round_simd128(v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _dq_round_simd128(v12, v13, v14, v15)
|
||||
v16, v17, v18, v19 = _dq_round_simd128(v16, v17, v18, v19)
|
||||
v20, v21, v22, v23 = _dq_round_simd128(v20, v21, v22, v23)
|
||||
v24, v25, v26, v27 = _dq_round_simd128(v24, v25, v26, v27)
|
||||
v28, v29, v30, v31 = _dq_round_simd128(v28, v29, v30, v31)
|
||||
}
|
||||
|
||||
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
v4, v5, v6, v7 = _add_state_simd128(v4, v5, v6, v7, s0, s1, s2, s7)
|
||||
v8, v9, v10, v11 = _add_state_simd128(v8, v9, v10, v11, s0, s1, s2, s11)
|
||||
v12, v13, v14, v15 = _add_state_simd128(v12, v13, v14, v15, s0, s1, s2, s15)
|
||||
v16, v17, v18, v19 = _add_state_simd128(v16, v17, v18, v19, s0, s1, s2, s19)
|
||||
v20, v21, v22, v23 = _add_state_simd128(v20, v21, v22, v23, s0, s1, s2, s23)
|
||||
v24, v25, v26, v27 = _add_state_simd128(v24, v25, v26, v27, s0, s1, s2, s27)
|
||||
v28, v29, v30, v31 = _add_state_simd128(v28, v29, v30, v31, s0, s1, s2, s31)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _xor_simd128(src_v[4:], v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _xor_simd128(src_v[8:], v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _xor_simd128(src_v[12:], v12, v13, v14, v15)
|
||||
v16, v17, v18, v19 = _xor_simd128(src_v[16:], v16, v17, v18, v19)
|
||||
v20, v21, v22, v23 = _xor_simd128(src_v[20:], v20, v21, v22, v23)
|
||||
v24, v25, v26, v27 = _xor_simd128(src_v[24:], v24, v25, v26, v27)
|
||||
v28, v29, v30, v31 = _xor_simd128(src_v[28:], v28, v29, v30, v31)
|
||||
src_v = src_v[32:]
|
||||
}
|
||||
|
||||
_store_simd128(dst_v, v0, v1, v2, v3)
|
||||
_store_simd128(dst_v[4:], v4, v5, v6, v7)
|
||||
_store_simd128(dst_v[8:], v8, v9, v10, v11)
|
||||
_store_simd128(dst_v[12:], v12, v13, v14, v15)
|
||||
_store_simd128(dst_v[16:], v16, v17, v18, v19)
|
||||
_store_simd128(dst_v[20:], v20, v21, v22, v23)
|
||||
_store_simd128(dst_v[24:], v24, v25, v26, v27)
|
||||
_store_simd128(dst_v[28:], v28, v29, v30, v31)
|
||||
dst_v = dst_v[32:]
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
// s31 holds the most current counter, so `s3 = s31 + 1`.
|
||||
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s31, _VEC_ONE)
|
||||
} else {
|
||||
s3 = _increment_counter(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4 blocks at a time.
|
||||
//
|
||||
// Note: The i386 target lacks the required number of registers
|
||||
// for this to be performant, so it is skipped.
|
||||
when ODIN_ARCH != .i386 {
|
||||
for ; n >= 4; n = n - 4 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s7 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
|
||||
} else {
|
||||
s7 := _increment_counter(ctx)
|
||||
}
|
||||
v4, v5, v6, v7 := s0, s1, s2, s7
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s11 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s7, _VEC_ONE)
|
||||
} else {
|
||||
s11 := _increment_counter(ctx)
|
||||
}
|
||||
v8, v9, v10, v11 := s0, s1, s2, s11
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s15 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s11, _VEC_ONE)
|
||||
} else {
|
||||
s15 := _increment_counter(ctx)
|
||||
}
|
||||
v12, v13, v14, v15 := s0, s1, s2, s15
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _dq_round_simd128(v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _dq_round_simd128(v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _dq_round_simd128(v12, v13, v14, v15)
|
||||
}
|
||||
|
||||
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
v4, v5, v6, v7 = _add_state_simd128(v4, v5, v6, v7, s0, s1, s2, s7)
|
||||
v8, v9, v10, v11 = _add_state_simd128(v8, v9, v10, v11, s0, s1, s2, s11)
|
||||
v12, v13, v14, v15 = _add_state_simd128(v12, v13, v14, v15, s0, s1, s2, s15)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _xor_simd128(src_v[4:], v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _xor_simd128(src_v[8:], v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _xor_simd128(src_v[12:], v12, v13, v14, v15)
|
||||
src_v = src_v[16:]
|
||||
}
|
||||
|
||||
_store_simd128(dst_v, v0, v1, v2, v3)
|
||||
_store_simd128(dst_v[4:], v4, v5, v6, v7)
|
||||
_store_simd128(dst_v[8:], v8, v9, v10, v11)
|
||||
_store_simd128(dst_v[12:], v12, v13, v14, v15)
|
||||
dst_v = dst_v[16:]
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
// s15 holds the most current counter, so `s3 = s15 + 1`.
|
||||
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s15, _VEC_ONE)
|
||||
} else {
|
||||
s3 = _increment_counter(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 1 block at a time.
|
||||
for ; n > 0; n = n - 1 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
}
|
||||
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
|
||||
src_v = src_v[4:]
|
||||
}
|
||||
|
||||
_store_simd128(dst_v, v0, v1, v2, v3)
|
||||
dst_v = dst_v[4:]
|
||||
}
|
||||
|
||||
// Increment the counter. Overflow checking is done upon
|
||||
// entry into the routine, so a 64-bit increment safely
|
||||
// covers both cases.
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
|
||||
} else {
|
||||
s3 = _increment_counter(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
// Write back the counter to the state.
|
||||
intrinsics.unaligned_store((^simd.u32x4)(x_v[3:]), s3)
|
||||
}
|
||||
}
|
||||
|
||||
@(enable_target_feature = TARGET_SIMD_FEATURES)
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
v0 := simd.u32x4{_chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3}
|
||||
v1 := intrinsics.unaligned_load((^simd.u32x4)(&key[0]))
|
||||
v2 := intrinsics.unaligned_load((^simd.u32x4)(&key[16]))
|
||||
v3 := intrinsics.unaligned_load((^simd.u32x4)(&iv[0]))
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
v1 = _byteswap_u32x4(v1)
|
||||
v2 = _byteswap_u32x4(v2)
|
||||
v3 = _byteswap_u32x4(v3)
|
||||
}
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
v0 = _byteswap_u32x4(v0)
|
||||
v3 = _byteswap_u32x4(v3)
|
||||
}
|
||||
|
||||
dst_v := ([^]simd.u32x4)(raw_data(dst))
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst_v[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst_v[1:]), v3)
|
||||
}
|
||||
@@ -0,0 +1,319 @@
|
||||
//+build amd64
|
||||
package chacha20_simd256
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20"
|
||||
import chacha_simd128 "core:crypto/_chacha20/simd128"
|
||||
import "core:simd"
|
||||
import "core:sys/info"
|
||||
|
||||
// This is loosely based on Ted Krovetz's public domain C intrinsic
|
||||
// implementations. While written using `core:simd`, this is currently
|
||||
// amd64 specific because we do not have a way to detect ARM SVE.
|
||||
//
|
||||
// See:
|
||||
// supercop-20230530/crypto_stream/chacha20/krovetz/vec128
|
||||
// supercop-20230530/crypto_stream/chacha20/krovetz/avx2
|
||||
|
||||
#assert(ODIN_ENDIAN == .Little)
|
||||
|
||||
@(private = "file")
|
||||
_ROT_7L: simd.u32x8 : {7, 7, 7, 7, 7, 7, 7, 7}
|
||||
@(private = "file")
|
||||
_ROT_7R: simd.u32x8 : {25, 25, 25, 25, 25, 25, 25, 25}
|
||||
@(private = "file")
|
||||
_ROT_12L: simd.u32x8 : {12, 12, 12, 12, 12, 12, 12, 12}
|
||||
@(private = "file")
|
||||
_ROT_12R: simd.u32x8 : {20, 20, 20, 20, 20, 20, 20, 20}
|
||||
@(private = "file")
|
||||
_ROT_8L: simd.u32x8 : {8, 8, 8, 8, 8, 8, 8, 8}
|
||||
@(private = "file")
|
||||
_ROT_8R: simd.u32x8 : {24, 24, 24, 24, 24, 24, 24, 24}
|
||||
@(private = "file")
|
||||
_ROT_16: simd.u32x8 : {16, 16, 16, 16, 16, 16, 16, 16}
|
||||
@(private = "file")
|
||||
_VEC_ZERO_ONE: simd.u64x4 : {0, 0, 1, 0}
|
||||
@(private = "file")
|
||||
_VEC_TWO: simd.u64x4 : {2, 0, 2, 0}
|
||||
|
||||
// is_performant returns true iff the target and current host both support
|
||||
// "enough" SIMD to make this implementation performant.
|
||||
is_performant :: proc "contextless" () -> bool {
|
||||
req_features :: info.CPU_Features{.avx, .avx2}
|
||||
|
||||
features, ok := info.cpu_features.?
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
return features >= req_features
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_dq_round_simd256 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV1(b); c = ROTV2(c); d = ROTV3(d);
|
||||
v1 = simd.shuffle(v1, v1, 1, 2, 3, 0, 5, 6, 7, 4)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1, 6, 7, 4, 5)
|
||||
v3 = simd.shuffle(v3, v3, 3, 0, 1, 2, 7, 4, 5, 6)
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
|
||||
v1 = simd.shuffle(v1, v1, 3, 0, 1, 2, 7, 4, 5, 6)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1, 6, 7, 4, 5)
|
||||
v3 = simd.shuffle(v3, v3, 1, 2, 3, 0, 5, 6, 7, 4)
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_add_and_permute_state_simd256 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3, s0, s1, s2, s3: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
t0 := simd.add(v0, s0)
|
||||
t1 := simd.add(v1, s1)
|
||||
t2 := simd.add(v2, s2)
|
||||
t3 := simd.add(v3, s3)
|
||||
|
||||
// Big Endian would byteswap here.
|
||||
|
||||
// Each of v0 .. v3 has 128-bits of keystream for 2 separate blocks.
|
||||
// permute the state such that (r0, r1) contains block 0, and (r2, r3)
|
||||
// contains block 1.
|
||||
r0 := simd.shuffle(t0, t1, 0, 1, 2, 3, 8, 9, 10, 11)
|
||||
r2 := simd.shuffle(t0, t1, 4, 5, 6, 7, 12, 13, 14, 15)
|
||||
r1 := simd.shuffle(t2, t3, 0, 1, 2, 3, 8, 9, 10, 11)
|
||||
r3 := simd.shuffle(t2, t3, 4, 5, 6, 7, 12, 13, 14, 15)
|
||||
|
||||
return r0, r1, r2, r3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_xor_simd256 :: #force_inline proc "contextless" (
|
||||
src: [^]simd.u32x8,
|
||||
v0, v1, v2, v3: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x8)(src[0:])))
|
||||
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x8)(src[1:])))
|
||||
v2 = simd.bit_xor(v2, intrinsics.unaligned_load((^simd.u32x8)(src[2:])))
|
||||
v3 = simd.bit_xor(v3, intrinsics.unaligned_load((^simd.u32x8)(src[3:])))
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_xor_simd256_x1 :: #force_inline proc "contextless" (
|
||||
src: [^]simd.u32x8,
|
||||
v0, v1: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
v0, v1 := v0, v1
|
||||
|
||||
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x8)(src[0:])))
|
||||
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x8)(src[1:])))
|
||||
|
||||
return v0, v1
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_store_simd256 :: #force_inline proc "contextless" (
|
||||
dst: [^]simd.u32x8,
|
||||
v0, v1, v2, v3: simd.u32x8,
|
||||
) {
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[1:]), v1)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[2:]), v2)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[3:]), v3)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_store_simd256_x1 :: #force_inline proc "contextless" (
|
||||
dst: [^]simd.u32x8,
|
||||
v0, v1: simd.u32x8,
|
||||
) {
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[1:]), v1)
|
||||
}
|
||||
|
||||
@(enable_target_feature = "sse2,ssse3,avx,avx2")
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
_chacha20.check_counter_limit(ctx, nr_blocks)
|
||||
|
||||
dst_v := ([^]simd.u32x8)(raw_data(dst))
|
||||
src_v := ([^]simd.u32x8)(raw_data(src))
|
||||
|
||||
x := &ctx._s
|
||||
n := nr_blocks
|
||||
|
||||
// The state vector is an array of uint32s in native byte-order.
|
||||
// Setup s0 .. s3 such that each register stores 2 copies of the
|
||||
// state.
|
||||
x_v := ([^]simd.u32x4)(raw_data(x))
|
||||
t0 := intrinsics.unaligned_load((^simd.u32x4)(x_v[0:]))
|
||||
t1 := intrinsics.unaligned_load((^simd.u32x4)(x_v[1:]))
|
||||
t2 := intrinsics.unaligned_load((^simd.u32x4)(x_v[2:]))
|
||||
t3 := intrinsics.unaligned_load((^simd.u32x4)(x_v[3:]))
|
||||
s0 := simd.swizzle(t0, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
s1 := simd.swizzle(t1, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
s2 := simd.swizzle(t2, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
s3 := simd.swizzle(t3, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
|
||||
// Advance the counter in the 2nd copy of the state by one.
|
||||
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_ZERO_ONE)
|
||||
|
||||
// 8 blocks at a time.
|
||||
for ; n >= 8; n = n - 8 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
s7 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_TWO)
|
||||
v4, v5, v6, v7 := s0, s1, s2, s7
|
||||
|
||||
s11 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s7, _VEC_TWO)
|
||||
v8, v9, v10, v11 := s0, s1, s2, s11
|
||||
|
||||
s15 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s11, _VEC_TWO)
|
||||
v12, v13, v14, v15 := s0, s1, s2, s15
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd256(v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _dq_round_simd256(v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _dq_round_simd256(v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _dq_round_simd256(v12, v13, v14, v15)
|
||||
}
|
||||
|
||||
v0, v1, v2, v3 = _add_and_permute_state_simd256(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
v4, v5, v6, v7 = _add_and_permute_state_simd256(v4, v5, v6, v7, s0, s1, s2, s7)
|
||||
v8, v9, v10, v11 = _add_and_permute_state_simd256(v8, v9, v10, v11, s0, s1, s2, s11)
|
||||
v12, v13, v14, v15 = _add_and_permute_state_simd256(v12, v13, v14, v15, s0, s1, s2, s15)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd256(src_v, v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _xor_simd256(src_v[4:], v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _xor_simd256(src_v[8:], v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _xor_simd256(src_v[12:], v12, v13, v14, v15)
|
||||
src_v = src_v[16:]
|
||||
}
|
||||
|
||||
_store_simd256(dst_v, v0, v1, v2, v3)
|
||||
_store_simd256(dst_v[4:], v4, v5, v6, v7)
|
||||
_store_simd256(dst_v[8:], v8, v9, v10, v11)
|
||||
_store_simd256(dst_v[12:], v12, v13, v14, v15)
|
||||
dst_v = dst_v[16:]
|
||||
}
|
||||
|
||||
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s15, _VEC_TWO)
|
||||
}
|
||||
|
||||
|
||||
// 2 (or 1) block at a time.
|
||||
for ; n > 0; n = n - 2 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd256(v0, v1, v2, v3)
|
||||
}
|
||||
v0, v1, v2, v3 = _add_and_permute_state_simd256(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
|
||||
if n == 1 {
|
||||
// Note: No need to advance src_v, dst_v, or increment the counter
|
||||
// since this is guaranteed to be the final block.
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1 = _xor_simd256_x1(src_v, v0, v1)
|
||||
}
|
||||
|
||||
_store_simd256_x1(dst_v, v0, v1)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd256(src_v, v0, v1, v2, v3)
|
||||
src_v = src_v[4:]
|
||||
}
|
||||
|
||||
_store_simd256(dst_v, v0, v1, v2, v3)
|
||||
dst_v = dst_v[4:]
|
||||
}
|
||||
|
||||
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_TWO)
|
||||
}
|
||||
|
||||
// Write back the counter. Doing it this way, saves having to
|
||||
// pull out the correct counter value from s3.
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + u64(nr_blocks)
|
||||
ctx._s[12] = u32(new_ctr)
|
||||
ctx._s[13] = u32(new_ctr >> 32)
|
||||
}
|
||||
|
||||
@(enable_target_feature = "sse2,ssse3,avx")
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
// We can just enable AVX and call the simd128 code as going
|
||||
// wider has 0 performance benefit, but VEX encoded instructions
|
||||
// is nice.
|
||||
#force_inline chacha_simd128.hchacha20(dst, key, iv)
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
//+build !amd64
|
||||
package chacha20_simd256
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20"
|
||||
|
||||
is_performant :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
panic("crypto/chacha20: simd256 implementation unsupported")
|
||||
}
|
||||
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
intrinsics.trap()
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package aead
|
||||
|
||||
// seal_oneshot encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided algorithm, key, and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_oneshot :: proc(algo: Algorithm, dst, tag, key, iv, aad, plaintext: []byte, impl: Implementation = nil) {
|
||||
ctx: Context
|
||||
init(&ctx, algo, key, impl)
|
||||
defer reset(&ctx)
|
||||
seal_ctx(&ctx, dst, tag, iv, aad, plaintext)
|
||||
}
|
||||
|
||||
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided algorithm, key, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_oneshot :: proc(algo: Algorithm, dst, key, iv, aad, ciphertext, tag: []byte, impl: Implementation = nil) -> bool {
|
||||
ctx: Context
|
||||
init(&ctx, algo, key, impl)
|
||||
defer reset(&ctx)
|
||||
return open_ctx(&ctx, dst, iv, aad, ciphertext, tag)
|
||||
}
|
||||
|
||||
seal :: proc {
|
||||
seal_ctx,
|
||||
seal_oneshot,
|
||||
}
|
||||
|
||||
open :: proc {
|
||||
open_ctx,
|
||||
open_oneshot,
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
package aead provides a generic interface to the supported Authenticated
|
||||
Encryption with Associated Data algorithms.
|
||||
|
||||
Both a one-shot and context based interface are provided, with similar
|
||||
usage. If multiple messages are to be sealed/opened via the same key,
|
||||
the context based interface may be more efficient, depending on the
|
||||
algorithm.
|
||||
|
||||
WARNING: Reusing the same key + iv to seal (encrypt) multiple messages
|
||||
results in catastrophic loss of security for most algorithms.
|
||||
|
||||
Example:
|
||||
package aead_example
|
||||
|
||||
import "core:bytes"
|
||||
import "core:crypto"
|
||||
import "core:crypto/aead"
|
||||
|
||||
main :: proc() {
|
||||
algo := aead.Algorithm.XCHACHA20POLY1305
|
||||
|
||||
// The example added associated data, and plaintext.
|
||||
aad_str := "Get your ass in gear boys."
|
||||
pt_str := "They're immanetizing the Eschaton."
|
||||
|
||||
aad := transmute([]byte)aad_str
|
||||
plaintext := transmute([]byte)pt_str
|
||||
pt_len := len(plaintext)
|
||||
|
||||
// Generate a random key for the purposes of illustration.
|
||||
key := make([]byte, aead.KEY_SIZES[algo])
|
||||
defer delete(key)
|
||||
crypto.rand_bytes(key)
|
||||
|
||||
// `ciphertext || tag`, is a common way data is transmitted, so
|
||||
// demonstrate that.
|
||||
buf := make([]byte, pt_len + aead.TAG_SIZES[algo])
|
||||
defer delete(buf)
|
||||
ciphertext, tag := buf[:pt_len], buf[pt_len:]
|
||||
|
||||
// Seal the AAD + Plaintext.
|
||||
iv := make([]byte, aead.IV_SIZES[algo])
|
||||
defer delete(iv)
|
||||
crypto.rand_bytes(iv) // Random IVs are safe with XChaCha20-Poly1305.
|
||||
aead.seal(algo, ciphertext, tag, key, iv, aad, plaintext)
|
||||
|
||||
// Open the AAD + Ciphertext.
|
||||
opened_pt := buf[:pt_len]
|
||||
if ok := aead.open(algo, opened_pt, key, iv, aad, ciphertext, tag); !ok {
|
||||
panic("aead example: failed to open")
|
||||
}
|
||||
|
||||
assert(bytes.equal(opened_pt, plaintext))
|
||||
}
|
||||
*/
|
||||
package aead
|
||||
@@ -0,0 +1,187 @@
|
||||
package aead
|
||||
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/chacha20"
|
||||
import "core:crypto/chacha20poly1305"
|
||||
import "core:reflect"
|
||||
|
||||
// Implementation is an AEAD implementation. Most callers will not need
|
||||
// to use this as the package will automatically select the most performant
|
||||
// implementation available.
|
||||
Implementation :: union {
|
||||
aes.Implementation,
|
||||
chacha20.Implementation,
|
||||
}
|
||||
|
||||
// MAX_TAG_SIZE is the maximum size tag that can be returned by any of the
|
||||
// Algorithms supported via this package.
|
||||
MAX_TAG_SIZE :: 16
|
||||
|
||||
// Algorithm is the algorithm identifier associated with a given Context.
|
||||
Algorithm :: enum {
|
||||
Invalid,
|
||||
AES_GCM_128,
|
||||
AES_GCM_192,
|
||||
AES_GCM_256,
|
||||
CHACHA20POLY1305,
|
||||
XCHACHA20POLY1305,
|
||||
}
|
||||
|
||||
// ALGORITM_NAMES is the Agorithm to algorithm name string.
|
||||
ALGORITHM_NAMES := [Algorithm]string {
|
||||
.Invalid = "Invalid",
|
||||
.AES_GCM_128 = "AES-GCM-128",
|
||||
.AES_GCM_192 = "AES-GCM-192",
|
||||
.AES_GCM_256 = "AES-GCM-256",
|
||||
.CHACHA20POLY1305 = "chacha20poly1305",
|
||||
.XCHACHA20POLY1305 = "xchacha20poly1305",
|
||||
}
|
||||
|
||||
// TAG_SIZES is the Algorithm to tag size in bytes.
|
||||
TAG_SIZES := [Algorithm]int {
|
||||
.Invalid = 0,
|
||||
.AES_GCM_128 = aes.GCM_TAG_SIZE,
|
||||
.AES_GCM_192 = aes.GCM_TAG_SIZE,
|
||||
.AES_GCM_256 = aes.GCM_TAG_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
}
|
||||
|
||||
// KEY_SIZES is the Algorithm to key size in bytes.
|
||||
KEY_SIZES := [Algorithm]int {
|
||||
.Invalid = 0,
|
||||
.AES_GCM_128 = aes.KEY_SIZE_128,
|
||||
.AES_GCM_192 = aes.KEY_SIZE_192,
|
||||
.AES_GCM_256 = aes.KEY_SIZE_256,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
}
|
||||
|
||||
// IV_SIZES is the Algorithm to initialization vector size in bytes.
|
||||
//
|
||||
// Note: Some algorithms (such as AES-GCM) support variable IV sizes.
|
||||
IV_SIZES := [Algorithm]int {
|
||||
.Invalid = 0,
|
||||
.AES_GCM_128 = aes.GCM_IV_SIZE,
|
||||
.AES_GCM_192 = aes.GCM_IV_SIZE,
|
||||
.AES_GCM_256 = aes.GCM_IV_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.IV_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE,
|
||||
}
|
||||
|
||||
// Context is a concrete instantiation of a specific AEAD algorithm.
|
||||
Context :: struct {
|
||||
_algo: Algorithm,
|
||||
_impl: union {
|
||||
aes.Context_GCM,
|
||||
chacha20poly1305.Context,
|
||||
},
|
||||
}
|
||||
|
||||
@(private)
|
||||
_IMPL_IDS := [Algorithm]typeid {
|
||||
.Invalid = nil,
|
||||
.AES_GCM_128 = typeid_of(aes.Context_GCM),
|
||||
.AES_GCM_192 = typeid_of(aes.Context_GCM),
|
||||
.AES_GCM_256 = typeid_of(aes.Context_GCM),
|
||||
.CHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
.XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
}
|
||||
|
||||
// init initializes a Context with a specific AEAD Algorithm.
|
||||
init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementation = nil) {
|
||||
if ctx._impl != nil {
|
||||
reset(ctx)
|
||||
}
|
||||
|
||||
if len(key) != KEY_SIZES[algorithm] {
|
||||
panic("crypto/aead: invalid key size")
|
||||
}
|
||||
|
||||
// Directly specialize the union by setting the type ID (save a copy).
|
||||
reflect.set_union_variant_typeid(
|
||||
ctx._impl,
|
||||
_IMPL_IDS[algorithm],
|
||||
)
|
||||
switch algorithm {
|
||||
case .AES_GCM_128, .AES_GCM_192, .AES_GCM_256:
|
||||
impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
|
||||
aes.init_gcm(&ctx._impl.(aes.Context_GCM), key, impl_)
|
||||
case .CHACHA20POLY1305:
|
||||
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
|
||||
chacha20poly1305.init(&ctx._impl.(chacha20poly1305.Context), key, impl_)
|
||||
case .XCHACHA20POLY1305:
|
||||
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
|
||||
chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_)
|
||||
case .Invalid:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
case:
|
||||
panic("crypto/aead: invalid algorithm")
|
||||
}
|
||||
|
||||
ctx._algo = algorithm
|
||||
}
|
||||
|
||||
// seal_ctx encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
}
|
||||
|
||||
// open_ctx authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case chacha20poly1305.Context:
|
||||
return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be re-initialized to
|
||||
// be used again.
|
||||
reset :: proc(ctx: ^Context) {
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
aes.reset_gcm(&impl)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.reset(&impl)
|
||||
case:
|
||||
// Calling reset repeatedly is fine.
|
||||
}
|
||||
|
||||
ctx._algo = .Invalid
|
||||
ctx._impl = nil
|
||||
}
|
||||
|
||||
// algorithm returns the Algorithm used by a Context instance.
|
||||
algorithm :: proc(ctx: ^Context) -> Algorithm {
|
||||
return ctx._algo
|
||||
}
|
||||
|
||||
// iv_size returns the IV size of a Context instance in bytes.
|
||||
iv_size :: proc(ctx: ^Context) -> int {
|
||||
return IV_SIZES[ctx._algo]
|
||||
}
|
||||
|
||||
// tag_size returns the tag size of a Context instance in bytes.
|
||||
tag_size :: proc(ctx: ^Context) -> int {
|
||||
return TAG_SIZES[ctx._algo]
|
||||
}
|
||||
@@ -2,9 +2,9 @@
|
||||
package aes implements the AES block cipher and some common modes.
|
||||
|
||||
See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf
|
||||
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
|
||||
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf ]]
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf ]]
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf ]]
|
||||
*/
|
||||
package aes
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ Context_CTR :: struct {
|
||||
}
|
||||
|
||||
// init_ctr initializes a Context_CTR with the provided key and IV.
|
||||
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) {
|
||||
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
if len(iv) != CTR_IV_SIZE {
|
||||
panic("crypto/aes: invalid CTR IV size")
|
||||
}
|
||||
@@ -47,7 +47,7 @@ xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
|
||||
panic("crypto/aes: dst and src alias inexactly")
|
||||
}
|
||||
|
||||
for remaining := len(src); remaining > 0; {
|
||||
#no_bounds_check for remaining := len(src); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == BLOCK_SIZE {
|
||||
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
|
||||
@@ -85,7 +85,7 @@ keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
dst := dst
|
||||
for remaining := len(dst); remaining > 0; {
|
||||
#no_bounds_check for remaining := len(dst); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == BLOCK_SIZE {
|
||||
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
|
||||
|
||||
@@ -12,7 +12,7 @@ Context_ECB :: struct {
|
||||
}
|
||||
|
||||
// init_ecb initializes a Context_ECB with the provided key.
|
||||
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) {
|
||||
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
init_impl(&ctx._impl, key, impl)
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
@@ -7,10 +7,10 @@ import "core:crypto/_aes/ct64"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
|
||||
// GCM_NONCE_SIZE is the default size of the GCM nonce in bytes.
|
||||
GCM_NONCE_SIZE :: 12
|
||||
// GCM_NONCE_SIZE_MAX is the maximum size of the GCM nonce in bytes.
|
||||
GCM_NONCE_SIZE_MAX :: 0x2000000000000000 // floor((2^64 - 1) / 8) bits
|
||||
// GCM_IV_SIZE is the default size of the GCM IV in bytes.
|
||||
GCM_IV_SIZE :: 12
|
||||
// GCM_IV_SIZE_MAX is the maximum size of the GCM IV in bytes.
|
||||
GCM_IV_SIZE_MAX :: 0x2000000000000000 // floor((2^64 - 1) / 8) bits
|
||||
// GCM_TAG_SIZE is the size of a GCM tag in bytes.
|
||||
GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE
|
||||
|
||||
@@ -26,19 +26,19 @@ Context_GCM :: struct {
|
||||
}
|
||||
|
||||
// init_gcm initializes a Context_GCM with the provided key.
|
||||
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) {
|
||||
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
init_impl(&ctx._impl, key, impl)
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context_GCM and nonce, stores the output in dst and tag.
|
||||
// with the provided Context_GCM and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext)
|
||||
gcm_validate_common_slice_sizes(tag, iv, aad, plaintext)
|
||||
if len(dst) != len(plaintext) {
|
||||
panic("crypto/aes: invalid destination ciphertext size")
|
||||
}
|
||||
@@ -47,7 +47,7 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
}
|
||||
|
||||
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
|
||||
gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext)
|
||||
gcm_seal_hw(&impl, dst, tag, iv, aad, plaintext)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
|
||||
init_ghash_ct64(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
// Note: Our GHASH implementation handles appending padding.
|
||||
ct64.ghash(s[:], h[:], aad)
|
||||
@@ -69,15 +69,16 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
}
|
||||
|
||||
// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context_GCM, nonce, and tag, and stores the output in dst,
|
||||
// with the provided Context_GCM, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
|
||||
@(require_results)
|
||||
open_gcm :: proc(ctx: ^Context_GCM, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext)
|
||||
gcm_validate_common_slice_sizes(tag, iv, aad, ciphertext)
|
||||
if len(dst) != len(ciphertext) {
|
||||
panic("crypto/aes: invalid destination plaintext size")
|
||||
}
|
||||
@@ -86,14 +87,14 @@ open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) ->
|
||||
}
|
||||
|
||||
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
|
||||
return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag)
|
||||
return gcm_open_hw(&impl, dst, iv, aad, ciphertext, tag)
|
||||
}
|
||||
|
||||
h: [_aes.GHASH_KEY_SIZE]byte
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
|
||||
init_ghash_ct64(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
ct64.ghash(s[:], h[:], aad)
|
||||
gctr_ct64(ctx, dst, &s, ciphertext, &h, &j0, false)
|
||||
@@ -112,7 +113,7 @@ open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) ->
|
||||
return ok
|
||||
}
|
||||
|
||||
// reset_ctr sanitizes the Context_GCM. The Context_GCM must be
|
||||
// reset_gcm sanitizes the Context_GCM. The Context_GCM must be
|
||||
// re-initialized to be used again.
|
||||
reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
|
||||
reset_impl(&ctx._impl)
|
||||
@@ -120,14 +121,14 @@ reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) {
|
||||
gcm_validate_common_slice_sizes :: proc(tag, iv, aad, text: []byte) {
|
||||
if len(tag) != GCM_TAG_SIZE {
|
||||
panic("crypto/aes: invalid GCM tag size")
|
||||
}
|
||||
|
||||
// The specification supports nonces in the range [1, 2^64) bits.
|
||||
if l := len(nonce); l == 0 || u64(l) >= GCM_NONCE_SIZE_MAX {
|
||||
panic("crypto/aes: invalid GCM nonce size")
|
||||
// The specification supports IVs in the range [1, 2^64) bits.
|
||||
if l := len(iv); l == 0 || u64(l) >= GCM_IV_SIZE_MAX {
|
||||
panic("crypto/aes: invalid GCM IV size")
|
||||
}
|
||||
|
||||
if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
|
||||
@@ -144,7 +145,7 @@ init_ghash_ct64 :: proc(
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
nonce: []byte,
|
||||
iv: []byte,
|
||||
) {
|
||||
impl := &ctx._impl.(ct64.Context)
|
||||
|
||||
@@ -152,14 +153,14 @@ init_ghash_ct64 :: proc(
|
||||
ct64.encrypt_block(impl, h[:], h[:])
|
||||
|
||||
// Define a block, J0, as follows:
|
||||
if l := len(nonce); l == GCM_NONCE_SIZE {
|
||||
if l := len(iv); l == GCM_IV_SIZE {
|
||||
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
|
||||
copy(j0[:], nonce)
|
||||
copy(j0[:], iv)
|
||||
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
|
||||
} else {
|
||||
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
|
||||
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
|
||||
ct64.ghash(j0[:], h[:], nonce)
|
||||
ct64.ghash(j0[:], h[:], iv)
|
||||
|
||||
tmp: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
|
||||
@@ -197,7 +198,7 @@ gctr_ct64 :: proc(
|
||||
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
src: []byte,
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
iv: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
is_seal: bool,
|
||||
) #no_bounds_check {
|
||||
ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 {
|
||||
@@ -208,14 +209,14 @@ gctr_ct64 :: proc(
|
||||
// Setup the counter blocks.
|
||||
tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, ---
|
||||
ctrs, blks: [ct64.STRIDE][]byte = ---, ---
|
||||
ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
|
||||
ctr := endian.unchecked_get_u32be(iv[GCM_IV_SIZE:]) + 1
|
||||
for i in 0 ..< ct64.STRIDE {
|
||||
// Setup scratch space for the keystream.
|
||||
blks[i] = tmp2[i][:]
|
||||
|
||||
// Pre-copy the IV to all the counter blocks.
|
||||
ctrs[i] = tmp[i][:]
|
||||
copy(ctrs[i], nonce[:GCM_NONCE_SIZE])
|
||||
copy(ctrs[i], iv[:GCM_IV_SIZE])
|
||||
}
|
||||
|
||||
impl := &ctx._impl.(ct64.Context)
|
||||
|
||||
@@ -10,12 +10,12 @@ import "core:mem"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private)
|
||||
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, iv, aad, plaintext: []byte) {
|
||||
h: [_aes.GHASH_KEY_SIZE]byte
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
|
||||
init_ghash_hw(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
// Note: Our GHASH implementation handles appending padding.
|
||||
hw_intel.ghash(s[:], h[:], aad)
|
||||
@@ -29,12 +29,12 @@ gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext
|
||||
}
|
||||
|
||||
@(private)
|
||||
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
|
||||
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
h: [_aes.GHASH_KEY_SIZE]byte
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
|
||||
init_ghash_hw(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
hw_intel.ghash(s[:], h[:], aad)
|
||||
gctr_hw(ctx, dst, &s, ciphertext, &h, &j0, false)
|
||||
@@ -59,20 +59,20 @@ init_ghash_hw :: proc(
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
nonce: []byte,
|
||||
iv: []byte,
|
||||
) {
|
||||
// 1. Let H = CIPH(k, 0^128)
|
||||
encrypt_block_hw(ctx, h[:], h[:])
|
||||
|
||||
// Define a block, J0, as follows:
|
||||
if l := len(nonce); l == GCM_NONCE_SIZE {
|
||||
if l := len(iv); l == GCM_IV_SIZE {
|
||||
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
|
||||
copy(j0[:], nonce)
|
||||
copy(j0[:], iv)
|
||||
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
|
||||
} else {
|
||||
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
|
||||
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
|
||||
hw_intel.ghash(j0[:], h[:], nonce)
|
||||
hw_intel.ghash(j0[:], h[:], iv)
|
||||
|
||||
tmp: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
|
||||
@@ -109,7 +109,7 @@ gctr_hw :: proc(
|
||||
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
src: []byte,
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
iv: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
is_seal: bool,
|
||||
) #no_bounds_check {
|
||||
sks: [15]x86.__m128i = ---
|
||||
@@ -118,8 +118,8 @@ gctr_hw :: proc(
|
||||
}
|
||||
|
||||
// Setup the counter block
|
||||
ctr_blk := intrinsics.unaligned_load((^x86.__m128i)(nonce))
|
||||
ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
|
||||
ctr_blk := intrinsics.unaligned_load((^x86.__m128i)(iv))
|
||||
ctr := endian.unchecked_get_u32be(iv[GCM_IV_SIZE:]) + 1
|
||||
|
||||
src, dst := src, dst
|
||||
|
||||
|
||||
@@ -10,6 +10,10 @@ Context_Impl :: union {
|
||||
Context_Impl_Hardware,
|
||||
}
|
||||
|
||||
// DEFAULT_IMPLEMENTATION is the implementation that will be used by
|
||||
// default if possible.
|
||||
DEFAULT_IMPLEMENTATION :: Implementation.Hardware
|
||||
|
||||
// Implementation is an AES implementation. Most callers will not need
|
||||
// to use this as the package will automatically select the most performant
|
||||
// implementation available (See `is_hardware_accelerated()`).
|
||||
|
||||
@@ -34,11 +34,11 @@ ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
|
||||
}
|
||||
|
||||
@(private)
|
||||
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, iv, aad, plaintext: []byte) {
|
||||
panic(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
|
||||
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
panic(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
package blake2b implements the BLAKE2b hash algorithm.
|
||||
|
||||
See:
|
||||
- https://datatracker.ietf.org/doc/html/rfc7693
|
||||
- https://www.blake2.net
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc7693 ]]
|
||||
- [[ https://www.blake2.net ]]
|
||||
*/
|
||||
package blake2b
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
package blake2s implements the BLAKE2s hash algorithm.
|
||||
|
||||
See:
|
||||
- https://datatracker.ietf.org/doc/html/rfc7693
|
||||
- https://www.blake2.net/
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc7693 ]]
|
||||
- [[ https://www.blake2.net/ ]]
|
||||
*/
|
||||
package blake2s
|
||||
|
||||
|
||||
@@ -2,125 +2,72 @@
|
||||
package chacha20 implements the ChaCha20 and XChaCha20 stream ciphers.
|
||||
|
||||
See:
|
||||
- https://datatracker.ietf.org/doc/html/rfc8439
|
||||
- https://datatracker.ietf.org/doc/draft-irtf-cfrg-xchacha/03/
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc8439 ]]
|
||||
- [[ https://datatracker.ietf.org/doc/draft-irtf-cfrg-xchacha/03/ ]]
|
||||
*/
|
||||
package chacha20
|
||||
|
||||
import "core:bytes"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
import "core:crypto/_chacha20"
|
||||
import "core:mem"
|
||||
|
||||
// KEY_SIZE is the (X)ChaCha20 key size in bytes.
|
||||
KEY_SIZE :: 32
|
||||
// NONCE_SIZE is the ChaCha20 nonce size in bytes.
|
||||
NONCE_SIZE :: 12
|
||||
// XNONCE_SIZE is the XChaCha20 nonce size in bytes.
|
||||
XNONCE_SIZE :: 24
|
||||
|
||||
@(private)
|
||||
_MAX_CTR_IETF :: 0xffffffff
|
||||
|
||||
@(private)
|
||||
_BLOCK_SIZE :: 64
|
||||
@(private)
|
||||
_STATE_SIZE_U32 :: 16
|
||||
@(private)
|
||||
_ROUNDS :: 20
|
||||
|
||||
@(private)
|
||||
_SIGMA_0: u32 : 0x61707865
|
||||
@(private)
|
||||
_SIGMA_1: u32 : 0x3320646e
|
||||
@(private)
|
||||
_SIGMA_2: u32 : 0x79622d32
|
||||
@(private)
|
||||
_SIGMA_3: u32 : 0x6b206574
|
||||
KEY_SIZE :: _chacha20.KEY_SIZE
|
||||
// IV_SIZE is the ChaCha20 IV size in bytes.
|
||||
IV_SIZE :: _chacha20.IV_SIZE
|
||||
// XIV_SIZE is the XChaCha20 IV size in bytes.
|
||||
XIV_SIZE :: _chacha20.XIV_SIZE
|
||||
|
||||
// Context is a ChaCha20 or XChaCha20 instance.
|
||||
Context :: struct {
|
||||
_s: [_STATE_SIZE_U32]u32,
|
||||
_buffer: [_BLOCK_SIZE]byte,
|
||||
_off: int,
|
||||
_is_ietf_flavor: bool,
|
||||
_is_initialized: bool,
|
||||
_state: _chacha20.Context,
|
||||
_impl: Implementation,
|
||||
}
|
||||
|
||||
// init inititializes a Context for ChaCha20 or XChaCha20 with the provided
|
||||
// key and nonce.
|
||||
init :: proc(ctx: ^Context, key, nonce: []byte) {
|
||||
// key and iv.
|
||||
init :: proc(ctx: ^Context, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20: invalid ChaCha20 key size")
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 key size")
|
||||
}
|
||||
if n_len := len(nonce); n_len != NONCE_SIZE && n_len != XNONCE_SIZE {
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 nonce size")
|
||||
if l := len(iv); l != IV_SIZE && l != XIV_SIZE {
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 IV size")
|
||||
}
|
||||
|
||||
k, n := key, nonce
|
||||
k, n := key, iv
|
||||
|
||||
// Derive the XChaCha20 subkey and sub-nonce via HChaCha20.
|
||||
is_xchacha := len(nonce) == XNONCE_SIZE
|
||||
init_impl(ctx, impl)
|
||||
|
||||
is_xchacha := len(iv) == XIV_SIZE
|
||||
if is_xchacha {
|
||||
sub_key := ctx._buffer[:KEY_SIZE]
|
||||
_hchacha20(sub_key, k, n)
|
||||
sub_iv: [IV_SIZE]byte
|
||||
sub_key := ctx._state._buffer[:KEY_SIZE]
|
||||
hchacha20(sub_key, k, n, ctx._impl)
|
||||
k = sub_key
|
||||
n = n[16:24]
|
||||
copy(sub_iv[4:], n[16:])
|
||||
n = sub_iv[:]
|
||||
}
|
||||
|
||||
ctx._s[0] = _SIGMA_0
|
||||
ctx._s[1] = _SIGMA_1
|
||||
ctx._s[2] = _SIGMA_2
|
||||
ctx._s[3] = _SIGMA_3
|
||||
ctx._s[4] = endian.unchecked_get_u32le(k[0:4])
|
||||
ctx._s[5] = endian.unchecked_get_u32le(k[4:8])
|
||||
ctx._s[6] = endian.unchecked_get_u32le(k[8:12])
|
||||
ctx._s[7] = endian.unchecked_get_u32le(k[12:16])
|
||||
ctx._s[8] = endian.unchecked_get_u32le(k[16:20])
|
||||
ctx._s[9] = endian.unchecked_get_u32le(k[20:24])
|
||||
ctx._s[10] = endian.unchecked_get_u32le(k[24:28])
|
||||
ctx._s[11] = endian.unchecked_get_u32le(k[28:32])
|
||||
ctx._s[12] = 0
|
||||
if !is_xchacha {
|
||||
ctx._s[13] = endian.unchecked_get_u32le(n[0:4])
|
||||
ctx._s[14] = endian.unchecked_get_u32le(n[4:8])
|
||||
ctx._s[15] = endian.unchecked_get_u32le(n[8:12])
|
||||
} else {
|
||||
ctx._s[13] = 0
|
||||
ctx._s[14] = endian.unchecked_get_u32le(n[0:4])
|
||||
ctx._s[15] = endian.unchecked_get_u32le(n[4:8])
|
||||
_chacha20.init(&ctx._state, k, n, is_xchacha)
|
||||
|
||||
if is_xchacha {
|
||||
// The sub-key is stored in the keystream buffer. While
|
||||
// this will be overwritten in most circumstances, explicitly
|
||||
// clear it out early.
|
||||
mem.zero_explicit(&ctx._buffer, KEY_SIZE)
|
||||
mem.zero_explicit(&ctx._state._buffer, KEY_SIZE)
|
||||
}
|
||||
|
||||
ctx._off = _BLOCK_SIZE
|
||||
ctx._is_ietf_flavor = !is_xchacha
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seek seeks the (X)ChaCha20 stream counter to the specified block.
|
||||
seek :: proc(ctx: ^Context, block_nr: u64) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
if ctx._is_ietf_flavor {
|
||||
if block_nr > _MAX_CTR_IETF {
|
||||
panic("crypto/chacha20: attempted to seek past maximum counter")
|
||||
}
|
||||
} else {
|
||||
ctx._s[13] = u32(block_nr >> 32)
|
||||
}
|
||||
ctx._s[12] = u32(block_nr)
|
||||
ctx._off = _BLOCK_SIZE
|
||||
_chacha20.seek(&ctx._state, block_nr)
|
||||
}
|
||||
|
||||
// xor_bytes XORs each byte in src with bytes taken from the (X)ChaCha20
|
||||
// keystream, and writes the resulting output to dst. Dst and src MUST
|
||||
// alias exactly or not at all.
|
||||
xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
assert(ctx._state._is_initialized)
|
||||
|
||||
src, dst := src, dst
|
||||
if dst_len := len(dst); dst_len < len(src) {
|
||||
@@ -131,12 +78,13 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
panic("crypto/chacha20: dst and src alias inexactly")
|
||||
}
|
||||
|
||||
for remaining := len(src); remaining > 0; {
|
||||
st := &ctx._state
|
||||
#no_bounds_check for remaining := len(src); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == _BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _BLOCK_SIZE
|
||||
_do_blocks(ctx, dst, src, nr_blocks)
|
||||
if st._off == _chacha20.BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _chacha20.BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _chacha20.BLOCK_SIZE
|
||||
stream_blocks(ctx, dst, src, nr_blocks)
|
||||
remaining -= direct_bytes
|
||||
if remaining == 0 {
|
||||
return
|
||||
@@ -147,17 +95,17 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
|
||||
// If there is a partial block, generate and buffer 1 block
|
||||
// worth of keystream.
|
||||
_do_blocks(ctx, ctx._buffer[:], nil, 1)
|
||||
ctx._off = 0
|
||||
stream_blocks(ctx, st._buffer[:], nil, 1)
|
||||
st._off = 0
|
||||
}
|
||||
|
||||
// Process partial blocks from the buffered keystream.
|
||||
to_xor := min(_BLOCK_SIZE - ctx._off, remaining)
|
||||
buffered_keystream := ctx._buffer[ctx._off:]
|
||||
to_xor := min(_chacha20.BLOCK_SIZE - st._off, remaining)
|
||||
buffered_keystream := st._buffer[st._off:]
|
||||
for i := 0; i < to_xor; i = i + 1 {
|
||||
dst[i] = buffered_keystream[i] ~ src[i]
|
||||
}
|
||||
ctx._off += to_xor
|
||||
st._off += to_xor
|
||||
dst = dst[to_xor:]
|
||||
src = src[to_xor:]
|
||||
remaining -= to_xor
|
||||
@@ -166,15 +114,15 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
|
||||
// keystream_bytes fills dst with the raw (X)ChaCha20 keystream output.
|
||||
keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
assert(ctx._state._is_initialized)
|
||||
|
||||
dst := dst
|
||||
for remaining := len(dst); remaining > 0; {
|
||||
dst, st := dst, &ctx._state
|
||||
#no_bounds_check for remaining := len(dst); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == _BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _BLOCK_SIZE
|
||||
_do_blocks(ctx, dst, nil, nr_blocks)
|
||||
if st._off == _chacha20.BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _chacha20.BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _chacha20.BLOCK_SIZE
|
||||
stream_blocks(ctx, dst, nil, nr_blocks)
|
||||
remaining -= direct_bytes
|
||||
if remaining == 0 {
|
||||
return
|
||||
@@ -184,15 +132,15 @@ keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
|
||||
|
||||
// If there is a partial block, generate and buffer 1 block
|
||||
// worth of keystream.
|
||||
_do_blocks(ctx, ctx._buffer[:], nil, 1)
|
||||
ctx._off = 0
|
||||
stream_blocks(ctx, st._buffer[:], nil, 1)
|
||||
st._off = 0
|
||||
}
|
||||
|
||||
// Process partial blocks from the buffered keystream.
|
||||
to_copy := min(_BLOCK_SIZE - ctx._off, remaining)
|
||||
buffered_keystream := ctx._buffer[ctx._off:]
|
||||
to_copy := min(_chacha20.BLOCK_SIZE - st._off, remaining)
|
||||
buffered_keystream := st._buffer[st._off:]
|
||||
copy(dst[:to_copy], buffered_keystream[:to_copy])
|
||||
ctx._off += to_copy
|
||||
st._off += to_copy
|
||||
dst = dst[to_copy:]
|
||||
remaining -= to_copy
|
||||
}
|
||||
@@ -201,366 +149,5 @@ keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
|
||||
// reset sanitizes the Context. The Context must be re-initialized to
|
||||
// be used again.
|
||||
reset :: proc(ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._s, size_of(ctx._s))
|
||||
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
|
||||
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
@(private)
|
||||
_do_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per nonce.
|
||||
//
|
||||
// While all modern "standard" definitions of ChaCha20 use
|
||||
// the IETF 32-bit counter, for XChaCha20 most common
|
||||
// implementations allow for a 64-bit counter.
|
||||
//
|
||||
// Honestly, the answer here is "use a MRAE primitive", but
|
||||
// go with common practice in the case of XChaCha20.
|
||||
if ctx._is_ietf_flavor {
|
||||
if u64(ctx._s[12]) + u64(nr_blocks) > 0xffffffff {
|
||||
panic("crypto/chacha20: maximum ChaCha20 keystream per nonce reached")
|
||||
}
|
||||
} else {
|
||||
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
|
||||
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
|
||||
panic("crypto/chacha20: maximum XChaCha20 keystream per nonce reached")
|
||||
}
|
||||
}
|
||||
|
||||
dst, src := dst, src
|
||||
x := &ctx._s
|
||||
for n := 0; n < nr_blocks; n = n + 1 {
|
||||
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
|
||||
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
|
||||
|
||||
for i := _ROUNDS; i > 0; i = i - 2 {
|
||||
// Even when forcing inlining manually inlining all of
|
||||
// these is decently faster.
|
||||
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
x0 += _SIGMA_0
|
||||
x1 += _SIGMA_1
|
||||
x2 += _SIGMA_2
|
||||
x3 += _SIGMA_3
|
||||
x4 += x[4]
|
||||
x5 += x[5]
|
||||
x6 += x[6]
|
||||
x7 += x[7]
|
||||
x8 += x[8]
|
||||
x9 += x[9]
|
||||
x10 += x[10]
|
||||
x11 += x[11]
|
||||
x12 += x[12]
|
||||
x13 += x[13]
|
||||
x14 += x[14]
|
||||
x15 += x[15]
|
||||
|
||||
// While the "correct" answer to getting more performance out of
|
||||
// this is "use vector operations", support for that is currently
|
||||
// a work in progress/to be designed.
|
||||
//
|
||||
// In the meantime:
|
||||
// - The caller(s) ensure that src/dst are valid.
|
||||
// - The compiler knows if the target is picky about alignment.
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
endian.unchecked_put_u32le(dst[0:4], endian.unchecked_get_u32le(src[0:4]) ~ x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], endian.unchecked_get_u32le(src[4:8]) ~ x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], endian.unchecked_get_u32le(src[8:12]) ~ x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], endian.unchecked_get_u32le(src[12:16]) ~ x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], endian.unchecked_get_u32le(src[16:20]) ~ x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], endian.unchecked_get_u32le(src[20:24]) ~ x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], endian.unchecked_get_u32le(src[24:28]) ~ x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], endian.unchecked_get_u32le(src[28:32]) ~ x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], endian.unchecked_get_u32le(src[32:36]) ~ x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], endian.unchecked_get_u32le(src[36:40]) ~ x9)
|
||||
endian.unchecked_put_u32le(dst[40:44], endian.unchecked_get_u32le(src[40:44]) ~ x10)
|
||||
endian.unchecked_put_u32le(dst[44:48], endian.unchecked_get_u32le(src[44:48]) ~ x11)
|
||||
endian.unchecked_put_u32le(dst[48:52], endian.unchecked_get_u32le(src[48:52]) ~ x12)
|
||||
endian.unchecked_put_u32le(dst[52:56], endian.unchecked_get_u32le(src[52:56]) ~ x13)
|
||||
endian.unchecked_put_u32le(dst[56:60], endian.unchecked_get_u32le(src[56:60]) ~ x14)
|
||||
endian.unchecked_put_u32le(dst[60:64], endian.unchecked_get_u32le(src[60:64]) ~ x15)
|
||||
src = src[_BLOCK_SIZE:]
|
||||
} else {
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], x9)
|
||||
endian.unchecked_put_u32le(dst[40:44], x10)
|
||||
endian.unchecked_put_u32le(dst[44:48], x11)
|
||||
endian.unchecked_put_u32le(dst[48:52], x12)
|
||||
endian.unchecked_put_u32le(dst[52:56], x13)
|
||||
endian.unchecked_put_u32le(dst[56:60], x14)
|
||||
endian.unchecked_put_u32le(dst[60:64], x15)
|
||||
}
|
||||
dst = dst[_BLOCK_SIZE:]
|
||||
}
|
||||
|
||||
// Increment the counter. Overflow checking is done upon
|
||||
// entry into the routine, so a 64-bit increment safely
|
||||
// covers both cases.
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
|
||||
x[12] = u32(new_ctr)
|
||||
x[13] = u32(new_ctr >> 32)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
_hchacha20 :: proc "contextless" (dst, key, nonce: []byte) {
|
||||
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
|
||||
x4 := endian.unchecked_get_u32le(key[0:4])
|
||||
x5 := endian.unchecked_get_u32le(key[4:8])
|
||||
x6 := endian.unchecked_get_u32le(key[8:12])
|
||||
x7 := endian.unchecked_get_u32le(key[12:16])
|
||||
x8 := endian.unchecked_get_u32le(key[16:20])
|
||||
x9 := endian.unchecked_get_u32le(key[20:24])
|
||||
x10 := endian.unchecked_get_u32le(key[24:28])
|
||||
x11 := endian.unchecked_get_u32le(key[28:32])
|
||||
x12 := endian.unchecked_get_u32le(nonce[0:4])
|
||||
x13 := endian.unchecked_get_u32le(nonce[4:8])
|
||||
x14 := endian.unchecked_get_u32le(nonce[8:12])
|
||||
x15 := endian.unchecked_get_u32le(nonce[12:16])
|
||||
|
||||
for i := _ROUNDS; i > 0; i = i - 2 {
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x12)
|
||||
endian.unchecked_put_u32le(dst[20:24], x13)
|
||||
endian.unchecked_put_u32le(dst[24:28], x14)
|
||||
endian.unchecked_put_u32le(dst[28:32], x15)
|
||||
_chacha20.reset(&ctx._state)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
package chacha20
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20/ref"
|
||||
import "core:crypto/_chacha20/simd128"
|
||||
import "core:crypto/_chacha20/simd256"
|
||||
|
||||
// DEFAULT_IMPLEMENTATION is the implementation that will be used by
|
||||
// default if possible.
|
||||
DEFAULT_IMPLEMENTATION :: Implementation.Simd256
|
||||
|
||||
// Implementation is a ChaCha20 implementation. Most callers will not need
|
||||
// to use this as the package will automatically select the most performant
|
||||
// implementation available.
|
||||
Implementation :: enum {
|
||||
Portable,
|
||||
Simd128,
|
||||
Simd256,
|
||||
}
|
||||
|
||||
@(private)
|
||||
init_impl :: proc(ctx: ^Context, impl: Implementation) {
|
||||
impl := impl
|
||||
if impl == .Simd256 && !simd256.is_performant() {
|
||||
impl = .Simd128
|
||||
}
|
||||
if impl == .Simd128 && !simd128.is_performant() {
|
||||
impl = .Portable
|
||||
}
|
||||
|
||||
ctx._impl = impl
|
||||
}
|
||||
|
||||
@(private)
|
||||
stream_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
|
||||
switch ctx._impl {
|
||||
case .Simd256:
|
||||
simd256.stream_blocks(&ctx._state, dst, src, nr_blocks)
|
||||
case .Simd128:
|
||||
simd128.stream_blocks(&ctx._state, dst, src, nr_blocks)
|
||||
case .Portable:
|
||||
ref.stream_blocks(&ctx._state, dst, src, nr_blocks)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte, impl: Implementation) {
|
||||
switch impl {
|
||||
case .Simd256:
|
||||
simd256.hchacha20(dst, key, iv)
|
||||
case .Simd128:
|
||||
simd128.hchacha20(dst, key, iv)
|
||||
case .Portable:
|
||||
ref.hchacha20(dst, key, iv)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
/*
|
||||
package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 Authenticated
|
||||
Encryption with Additional Data algorithm.
|
||||
package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 and
|
||||
AEAD_XChaCha20_Poly1305 Authenticated Encryption with Additional Data
|
||||
algorithms.
|
||||
|
||||
See:
|
||||
- https://www.rfc-editor.org/rfc/rfc8439
|
||||
- [[ https://www.rfc-editor.org/rfc/rfc8439 ]]
|
||||
- [[ https://datatracker.ietf.org/doc/html/draft-arciszewski-xchacha-03 ]]
|
||||
*/
|
||||
package chacha20poly1305
|
||||
|
||||
@@ -15,8 +17,10 @@ import "core:mem"
|
||||
|
||||
// KEY_SIZE is the chacha20poly1305 key size in bytes.
|
||||
KEY_SIZE :: chacha20.KEY_SIZE
|
||||
// NONCE_SIZE is the chacha20poly1305 nonce size in bytes.
|
||||
NONCE_SIZE :: chacha20.NONCE_SIZE
|
||||
// IV_SIZE is the chacha20poly1305 IV size in bytes.
|
||||
IV_SIZE :: chacha20.IV_SIZE
|
||||
// XIV_SIZE is the xchacha20poly1305 IV size in bytes.
|
||||
XIV_SIZE :: chacha20.XIV_SIZE
|
||||
// TAG_SIZE is the chacha20poly1305 tag size in bytes.
|
||||
TAG_SIZE :: poly1305.TAG_SIZE
|
||||
|
||||
@@ -24,15 +28,13 @@ TAG_SIZE :: poly1305.TAG_SIZE
|
||||
_P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
|
||||
|
||||
@(private)
|
||||
_validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) {
|
||||
_validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bool) {
|
||||
if len(tag) != TAG_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid destination tag size")
|
||||
}
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid key size")
|
||||
}
|
||||
if len(nonce) != NONCE_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid nonce size")
|
||||
expected_iv_len := is_xchacha ? XIV_SIZE : IV_SIZE
|
||||
if len(iv) != expected_iv_len {
|
||||
panic("crypto/chacha20poly1305: invalid IV size")
|
||||
}
|
||||
|
||||
#assert(size_of(int) == 8 || size_of(int) <= 4)
|
||||
@@ -59,18 +61,52 @@ _update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
|
||||
}
|
||||
}
|
||||
|
||||
// encrypt encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided key and nonce, stores the output in ciphertext and tag.
|
||||
encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
|
||||
_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
|
||||
// Context is a keyed (X)Chacha20Poly1305 instance.
|
||||
Context :: struct {
|
||||
_key: [KEY_SIZE]byte,
|
||||
_impl: chacha20.Implementation,
|
||||
_is_xchacha: bool,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
|
||||
// init initializes a Context with the provided key, for AEAD_CHACHA20_POLY1305.
|
||||
init :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid key size")
|
||||
}
|
||||
|
||||
copy(ctx._key[:], key)
|
||||
ctx._impl = impl
|
||||
ctx._is_xchacha = false
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// init_xchacha initializes a Context with the provided key, for
|
||||
// AEAD_XChaCha20_Poly1305.
|
||||
//
|
||||
// Note: While there are multiple definitions of XChaCha20-Poly1305
|
||||
// this sticks to the IETF draft and uses a 32-bit counter.
|
||||
init_xchacha :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
|
||||
init(ctx, key, impl)
|
||||
ctx._is_xchacha = true
|
||||
}
|
||||
|
||||
// seal encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
ciphertext := dst
|
||||
_validate_common_slice_sizes(tag, iv, aad, plaintext, ctx._is_xchacha)
|
||||
if len(ciphertext) != len(plaintext) {
|
||||
panic("crypto/chacha20poly1305: invalid destination ciphertext size")
|
||||
}
|
||||
|
||||
stream_ctx: chacha20.Context = ---
|
||||
chacha20.init(&stream_ctx, key, nonce)
|
||||
chacha20.init(&stream_ctx, ctx._key[:],iv, ctx._impl)
|
||||
stream_ctx._state._is_ietf_flavor = true
|
||||
|
||||
// otk = poly1305_key_gen(key, nonce)
|
||||
// otk = poly1305_key_gen(key, iv)
|
||||
otk: [poly1305.KEY_SIZE]byte = ---
|
||||
chacha20.keystream_bytes(&stream_ctx, otk[:])
|
||||
mac_ctx: poly1305.Context = ---
|
||||
@@ -87,7 +123,7 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
|
||||
poly1305.update(&mac_ctx, aad)
|
||||
_update_mac_pad16(&mac_ctx, aad_len)
|
||||
|
||||
// ciphertext = chacha20_encrypt(key, 1, nonce, plaintext)
|
||||
// ciphertext = chacha20_encrypt(key, 1, iv, plaintext)
|
||||
chacha20.seek(&stream_ctx, 1)
|
||||
chacha20.xor_bytes(&stream_ctx, ciphertext, plaintext)
|
||||
chacha20.reset(&stream_ctx) // Don't need the stream context anymore.
|
||||
@@ -107,13 +143,16 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
|
||||
poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
|
||||
}
|
||||
|
||||
// decrypt authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided key, nonce, and tag, and stores the output in plaintext,
|
||||
// returning true iff the authentication was successful.
|
||||
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// If authentication fails, the destination plaintext buffer will be zeroed.
|
||||
decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
|
||||
_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
plaintext := dst
|
||||
_validate_common_slice_sizes(tag, iv, aad, ciphertext, ctx._is_xchacha)
|
||||
if len(ciphertext) != len(plaintext) {
|
||||
panic("crypto/chacha20poly1305: invalid destination plaintext size")
|
||||
}
|
||||
@@ -123,9 +162,10 @@ decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
|
||||
// points where needed.
|
||||
|
||||
stream_ctx: chacha20.Context = ---
|
||||
chacha20.init(&stream_ctx, key, nonce)
|
||||
chacha20.init(&stream_ctx, ctx._key[:], iv, ctx._impl)
|
||||
stream_ctx._state._is_ietf_flavor = true
|
||||
|
||||
// otk = poly1305_key_gen(key, nonce)
|
||||
// otk = poly1305_key_gen(key, iv)
|
||||
otk: [poly1305.KEY_SIZE]byte = ---
|
||||
chacha20.keystream_bytes(&stream_ctx, otk[:])
|
||||
defer chacha20.reset(&stream_ctx)
|
||||
@@ -160,9 +200,17 @@ decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// plaintext = chacha20_decrypt(key, 1, nonce, ciphertext)
|
||||
// plaintext = chacha20_decrypt(key, 1, iv, ciphertext)
|
||||
chacha20.seek(&stream_ctx, 1)
|
||||
chacha20.xor_bytes(&stream_ctx, plaintext, ciphertext)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be
|
||||
// re-initialized to be used again.
|
||||
reset :: proc "contextless" (ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._key, len(ctx._key))
|
||||
ctx._is_xchacha = false
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
package ed25519 implements the Ed25519 EdDSA signature algorithm.
|
||||
|
||||
See:
|
||||
- https://datatracker.ietf.org/doc/html/rfc8032
|
||||
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf
|
||||
- https://eprint.iacr.org/2020/1244.pdf
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc8032 ]]
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf ]]
|
||||
- [[ https://eprint.iacr.org/2020/1244.pdf ]]
|
||||
*/
|
||||
package ed25519
|
||||
|
||||
@@ -21,7 +21,7 @@ PUBLIC_KEY_SIZE :: 32
|
||||
SIGNATURE_SIZE :: 64
|
||||
|
||||
@(private)
|
||||
NONCE_SIZE :: 32
|
||||
HDIGEST2_SIZE :: 32
|
||||
|
||||
// Private_Key is an Ed25519 private key.
|
||||
Private_Key :: struct {
|
||||
@@ -33,7 +33,7 @@ Private_Key :: struct {
|
||||
// See: https://github.com/MystenLabs/ed25519-unsafe-libs
|
||||
_b: [PRIVATE_KEY_SIZE]byte,
|
||||
_s: grp.Scalar,
|
||||
_nonce: [NONCE_SIZE]byte,
|
||||
_hdigest2: [HDIGEST2_SIZE]byte,
|
||||
_pub_key: Public_Key,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
@@ -63,7 +63,7 @@ private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
|
||||
sha2.final(&ctx, h_bytes[:])
|
||||
|
||||
copy(priv_key._b[:], b)
|
||||
copy(priv_key._nonce[:], h_bytes[32:])
|
||||
copy(priv_key._hdigest2[:], h_bytes[32:])
|
||||
grp.sc_set_bytes_rfc8032(&priv_key._s, h_bytes[:32])
|
||||
|
||||
// Derive the corresponding public key.
|
||||
@@ -116,7 +116,7 @@ sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
|
||||
ctx: sha2.Context_512 = ---
|
||||
digest_bytes: [sha2.DIGEST_SIZE_512]byte = ---
|
||||
sha2.init_512(&ctx)
|
||||
sha2.update(&ctx, priv_key._nonce[:])
|
||||
sha2.update(&ctx, priv_key._hdigest2[:])
|
||||
sha2.update(&ctx, msg)
|
||||
sha2.final(&ctx, digest_bytes[:])
|
||||
|
||||
|
||||
+28
-30
@@ -17,46 +17,44 @@ accomplish common tasks.
|
||||
A third optional boolean parameter controls if the file is streamed
|
||||
(default), or or read at once.
|
||||
|
||||
```odin
|
||||
package hash_example
|
||||
Example:
|
||||
package hash_example
|
||||
|
||||
import "core:crypto/hash"
|
||||
import "core:crypto/hash"
|
||||
|
||||
main :: proc() {
|
||||
input := "Feed the fire."
|
||||
main :: proc() {
|
||||
input := "Feed the fire."
|
||||
|
||||
// Compute the digest, using the high level API.
|
||||
returned_digest := hash.hash(hash.Algorithm.SHA512_256, input)
|
||||
defer delete(returned_digest)
|
||||
// Compute the digest, using the high level API.
|
||||
returned_digest := hash.hash(hash.Algorithm.SHA512_256, input)
|
||||
defer delete(returned_digest)
|
||||
|
||||
// Variant that takes a destination buffer, instead of returning
|
||||
// the digest.
|
||||
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.BLAKE2B]) // @note: Destination buffer has to be at least as big as the digest size of the hash.
|
||||
defer delete(digest)
|
||||
hash.hash(hash.Algorithm.BLAKE2B, input, digest)
|
||||
}
|
||||
```
|
||||
// Variant that takes a destination buffer, instead of returning
|
||||
// the digest.
|
||||
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.BLAKE2B]) // @note: Destination buffer has to be at least as big as the digest size of the hash.
|
||||
defer delete(digest)
|
||||
hash.hash(hash.Algorithm.BLAKE2B, input, digest)
|
||||
}
|
||||
|
||||
A generic low level API is provided supporting the init/update/final interface
|
||||
that is typical with cryptographic hash function implementations.
|
||||
|
||||
```odin
|
||||
package hash_example
|
||||
Example:
|
||||
package hash_example
|
||||
|
||||
import "core:crypto/hash"
|
||||
import "core:crypto/hash"
|
||||
|
||||
main :: proc() {
|
||||
input := "Let the cinders burn."
|
||||
main :: proc() {
|
||||
input := "Let the cinders burn."
|
||||
|
||||
// Compute the digest, using the low level API.
|
||||
ctx: hash.Context
|
||||
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.SHA3_512])
|
||||
defer delete(digest)
|
||||
// Compute the digest, using the low level API.
|
||||
ctx: hash.Context
|
||||
digest := make([]byte, hash.DIGEST_SIZES[hash.Algorithm.SHA3_512])
|
||||
defer delete(digest)
|
||||
|
||||
hash.init(&ctx, hash.Algorithm.SHA3_512)
|
||||
hash.update(&ctx, transmute([]byte)input)
|
||||
hash.final(&ctx, digest)
|
||||
}
|
||||
```
|
||||
hash.init(&ctx, hash.Algorithm.SHA3_512)
|
||||
hash.update(&ctx, transmute([]byte)input)
|
||||
hash.final(&ctx, digest)
|
||||
}
|
||||
*/
|
||||
package crypto_hash
|
||||
package crypto_hash
|
||||
|
||||
@@ -28,20 +28,26 @@ hash_bytes :: proc(algorithm: Algorithm, data: []byte, allocator := context.allo
|
||||
|
||||
// hash_string_to_buffer will hash the given input and assign the
|
||||
// computed digest to the third parameter. It requires that the
|
||||
// destination buffer is at least as big as the digest size.
|
||||
hash_string_to_buffer :: proc(algorithm: Algorithm, data: string, hash: []byte) {
|
||||
hash_bytes_to_buffer(algorithm, transmute([]byte)(data), hash)
|
||||
// destination buffer is at least as big as the digest size. The
|
||||
// provided destination buffer is returned to match the behavior of
|
||||
// `hash_string`.
|
||||
hash_string_to_buffer :: proc(algorithm: Algorithm, data: string, hash: []byte) -> []byte {
|
||||
return hash_bytes_to_buffer(algorithm, transmute([]byte)(data), hash)
|
||||
}
|
||||
|
||||
// hash_bytes_to_buffer will hash the given input and write the
|
||||
// computed digest into the third parameter. It requires that the
|
||||
// destination buffer is at least as big as the digest size.
|
||||
hash_bytes_to_buffer :: proc(algorithm: Algorithm, data, hash: []byte) {
|
||||
// destination buffer is at least as big as the digest size. The
|
||||
// provided destination buffer is returned to match the behavior of
|
||||
// `hash_bytes`.
|
||||
hash_bytes_to_buffer :: proc(algorithm: Algorithm, data, hash: []byte) -> []byte {
|
||||
ctx: Context
|
||||
|
||||
init(&ctx, algorithm)
|
||||
update(&ctx, data)
|
||||
final(&ctx, hash)
|
||||
|
||||
return hash
|
||||
}
|
||||
|
||||
// hash_stream will incrementally fully consume a stream, and return the
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
package hkdf implements the HKDF HMAC-based Extract-and-Expand Key
|
||||
Derivation Function.
|
||||
|
||||
See: https://www.rfc-editor.org/rfc/rfc5869
|
||||
See: [[ https://www.rfc-editor.org/rfc/rfc5869 ]]
|
||||
*/
|
||||
package hkdf
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
package hmac implements the HMAC MAC algorithm.
|
||||
|
||||
See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.198-1.pdf
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.198-1.pdf ]]
|
||||
*/
|
||||
package hmac
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
package kmac implements the KMAC MAC algorithm.
|
||||
|
||||
See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf ]]
|
||||
*/
|
||||
package kmac
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ WARNING: The MD5 algorithm is known to be insecure and should only be
|
||||
used for interoperating with legacy applications.
|
||||
|
||||
See:
|
||||
- https://eprint.iacr.org/2005/075
|
||||
- https://datatracker.ietf.org/doc/html/rfc1321
|
||||
- [[ https://eprint.iacr.org/2005/075 ]]
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc1321 ]]
|
||||
*/
|
||||
package md5
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ WARNING: The SHA1 algorithm is known to be insecure and should only be
|
||||
used for interoperating with legacy applications.
|
||||
|
||||
See:
|
||||
- https://eprint.iacr.org/2017/190
|
||||
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
|
||||
- https://datatracker.ietf.org/doc/html/rfc3174
|
||||
- [[ https://eprint.iacr.org/2017/190 ]]
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf ]]
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc3174 ]]
|
||||
*/
|
||||
package sha1
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
package pbkdf2 implements the PBKDF2 password-based key derivation function.
|
||||
|
||||
See: https://www.rfc-editor.org/rfc/rfc2898
|
||||
See: [[ https://www.rfc-editor.org/rfc/rfc2898 ]]
|
||||
*/
|
||||
package pbkdf2
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
package poly1305 implements the Poly1305 one-time MAC algorithm.
|
||||
|
||||
See:
|
||||
- https://datatracker.ietf.org/doc/html/rfc8439
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc8439 ]]
|
||||
*/
|
||||
package poly1305
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
package ristretto255 implement the ristretto255 prime-order group.
|
||||
|
||||
See:
|
||||
- https://www.rfc-editor.org/rfc/rfc9496
|
||||
- [[ https://www.rfc-editor.org/rfc/rfc9496 ]]
|
||||
*/
|
||||
package ristretto255
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
package sha2 implements the SHA2 hash algorithm family.
|
||||
|
||||
See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
|
||||
- https://datatracker.ietf.org/doc/html/rfc3874
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf ]]
|
||||
- [[ https://datatracker.ietf.org/doc/html/rfc3874 ]]
|
||||
*/
|
||||
package sha2
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ pre-standardization Keccak algorithm is required, it can be found in
|
||||
crypto/legacy/keccak.
|
||||
|
||||
See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf ]]
|
||||
*/
|
||||
package sha3
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ package shake implements the SHAKE and cSHAKE XOF algorithm families.
|
||||
The SHA3 hash algorithm can be found in the crypto/sha3.
|
||||
|
||||
See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf
|
||||
- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf ]]
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf ]]
|
||||
*/
|
||||
package shake
|
||||
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
/*
|
||||
package siphash Implements the SipHash hashing algorithm.
|
||||
|
||||
Use the specific procedures for a certain setup. The generic procedures will default to Siphash 2-4.
|
||||
|
||||
See:
|
||||
- [[ https://github.com/veorq/SipHash ]]
|
||||
- [[ https://www.aumasson.jp/siphash/siphash.pdf ]]
|
||||
*/
|
||||
package siphash
|
||||
|
||||
/*
|
||||
@@ -6,10 +15,6 @@ package siphash
|
||||
|
||||
List of contributors:
|
||||
zhibog: Initial implementation.
|
||||
|
||||
Implementation of the SipHash hashing algorithm, as defined at <https://github.com/veorq/SipHash> and <https://www.aumasson.jp/siphash/siphash.pdf>
|
||||
|
||||
Use the specific procedures for a certain setup. The generic procdedures will default to Siphash 2-4
|
||||
*/
|
||||
|
||||
import "core:crypto"
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
package sm3 implements the SM3 hash algorithm.
|
||||
|
||||
See:
|
||||
- https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
|
||||
- [[ https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 ]]
|
||||
*/
|
||||
package sm3
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
package tuplehash implements the TupleHash and TupleHashXOF algorithms.
|
||||
|
||||
See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
|
||||
- [[ https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf ]]
|
||||
*/
|
||||
package tuplehash
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ package x25519 implements the X25519 (aka curve25519) Elliptic-Curve
|
||||
Diffie-Hellman key exchange protocol.
|
||||
|
||||
See:
|
||||
- https://www.rfc-editor.org/rfc/rfc7748
|
||||
- [[ https://www.rfc-editor.org/rfc/rfc7748 ]]
|
||||
*/
|
||||
package x25519
|
||||
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
//+build !windows !linux !darwin
|
||||
//+build !windows
|
||||
//+build !linux
|
||||
//+build !darwin
|
||||
package debug_trace
|
||||
|
||||
import "base:runtime"
|
||||
|
||||
@@ -4,7 +4,6 @@ Package `core:dynlib` implements loading of shared libraries/DLLs and their symb
|
||||
The behaviour of dynamically loaded libraries is specific to the target platform of the program.
|
||||
For in depth detail on the underlying behaviour please refer to your target platform's documentation.
|
||||
|
||||
See `example` directory for an example library exporting 3 symbols and a host program loading them automatically
|
||||
by defining a symbol table struct.
|
||||
For a full example, see: [[ core/dynlib/example; https://github.com/odin-lang/Odin/tree/master/core/dynlib/example ]]
|
||||
*/
|
||||
package dynlib
|
||||
|
||||
@@ -13,8 +13,8 @@ If your terminal supports 24-bit true color mode, you can also do this:
|
||||
fmt.println(ansi.CSI + ansi.FG_COLOR_24_BIT + ";0;255;255" + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
|
||||
|
||||
For more information, see:
|
||||
1. https://en.wikipedia.org/wiki/ANSI_escape_code
|
||||
2. https://www.vt100.net/docs/vt102-ug/chapter5.html
|
||||
3. https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
|
||||
- [[ https://en.wikipedia.org/wiki/ANSI_escape_code ]]
|
||||
- [[ https://www.vt100.net/docs/vt102-ug/chapter5.html ]]
|
||||
- [[ https://invisible-island.net/xterm/ctlseqs/ctlseqs.html ]]
|
||||
*/
|
||||
package ansi
|
||||
|
||||
@@ -3,6 +3,7 @@ package encoding_cbor
|
||||
import "base:intrinsics"
|
||||
|
||||
import "core:encoding/json"
|
||||
import "core:encoding/hex"
|
||||
import "core:io"
|
||||
import "core:mem"
|
||||
import "core:strconv"
|
||||
@@ -399,11 +400,11 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i
|
||||
io.write_string(w, str) or_return
|
||||
|
||||
case bool: io.write_string(w, "true" if v else "false") or_return
|
||||
case Nil: io.write_string(w, "nil") or_return
|
||||
case Nil: io.write_string(w, "null") or_return
|
||||
case Undefined: io.write_string(w, "undefined") or_return
|
||||
case ^Bytes:
|
||||
io.write_string(w, "h'") or_return
|
||||
for b in v { io.write_int(w, int(b), 16) or_return }
|
||||
hex.encode_into_writer(w, v^) or_return
|
||||
io.write_string(w, "'") or_return
|
||||
case ^Text:
|
||||
io.write_string(w, `"`) or_return
|
||||
|
||||
@@ -481,9 +481,7 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
|
||||
}
|
||||
}
|
||||
|
||||
marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error {
|
||||
err_conv(_encode_text(e, name)) or_return
|
||||
|
||||
marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, i: int) -> Marshal_Error {
|
||||
id := info.types[i].id
|
||||
data := rawptr(uintptr(v.data) + info.offsets[i])
|
||||
field_any := any{data, id}
|
||||
@@ -517,7 +515,7 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
|
||||
|
||||
if .Deterministic_Map_Sorting in e.flags {
|
||||
Name :: struct {
|
||||
name: string,
|
||||
name: []byte,
|
||||
field: int,
|
||||
}
|
||||
entries := make([dynamic]Name, 0, n, e.temp_allocator) or_return
|
||||
@@ -529,16 +527,19 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
|
||||
continue
|
||||
}
|
||||
|
||||
append(&entries, Name{fname, i}) or_return
|
||||
key_builder := strings.builder_make(e.temp_allocator) or_return
|
||||
err_conv(_encode_text(Encoder{e.flags, strings.to_stream(&key_builder), e.temp_allocator}, fname)) or_return
|
||||
append(&entries, Name{key_builder.buf[:], i}) or_return
|
||||
}
|
||||
|
||||
// Sort lexicographic on the bytes of the key.
|
||||
slice.sort_by_cmp(entries[:], proc(a, b: Name) -> slice.Ordering {
|
||||
return slice.Ordering(bytes.compare(transmute([]byte)a.name, transmute([]byte)b.name))
|
||||
return slice.Ordering(bytes.compare(a.name, b.name))
|
||||
})
|
||||
|
||||
for entry in entries {
|
||||
marshal_entry(e, info, v, entry.name, entry.field) or_return
|
||||
io.write_full(e.writer, entry.name) or_return
|
||||
marshal_entry(e, info, v, entry.field) or_return
|
||||
}
|
||||
} else {
|
||||
for _, i in info.names[:info.field_count] {
|
||||
@@ -547,7 +548,8 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
|
||||
continue
|
||||
}
|
||||
|
||||
marshal_entry(e, info, v, fname, i) or_return
|
||||
err_conv(_encode_text(e, fname)) or_return
|
||||
marshal_entry(e, info, v, i) or_return
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
package csv reads and writes comma-separated values (CSV) files.
|
||||
This package supports the format described in [[ RFC 4180; https://tools.ietf.org/html/rfc4180.html ]]
|
||||
|
||||
Example:
|
||||
package main
|
||||
|
||||
import "core:fmt"
|
||||
import "core:encoding/csv"
|
||||
import "core:os"
|
||||
|
||||
// Requires keeping the entire CSV file in memory at once
|
||||
iterate_csv_from_string :: proc(filename: string) {
|
||||
r: csv.Reader
|
||||
r.trim_leading_space = true
|
||||
r.reuse_record = true // Without it you have to delete(record)
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
csv_data, ok := os.read_entire_file(filename)
|
||||
if ok {
|
||||
csv.reader_init_with_string(&r, string(csv_data))
|
||||
} else {
|
||||
fmt.printfln("Unable to open file: %v", filename)
|
||||
return
|
||||
}
|
||||
defer delete(csv_data)
|
||||
|
||||
for r, i, err in csv.iterator_next(&r) {
|
||||
if err != nil { /* Do something with error */ }
|
||||
for f, j in r {
|
||||
fmt.printfln("Record %v, field %v: %q", i, j, f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reads the CSV as it's processed (with a small buffer)
|
||||
iterate_csv_from_stream :: proc(filename: string) {
|
||||
fmt.printfln("Hellope from %v", filename)
|
||||
r: csv.Reader
|
||||
r.trim_leading_space = true
|
||||
r.reuse_record = true // Without it you have to delete(record)
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
handle, err := os.open(filename)
|
||||
if err != nil {
|
||||
fmt.eprintfln("Error opening file: %v", filename)
|
||||
return
|
||||
}
|
||||
defer os.close(handle)
|
||||
csv.reader_init(&r, os.stream_from_handle(handle))
|
||||
|
||||
for r, i in csv.iterator_next(&r) {
|
||||
for f, j in r {
|
||||
fmt.printfln("Record %v, field %v: %q", i, j, f)
|
||||
}
|
||||
}
|
||||
fmt.printfln("Error: %v", csv.iterator_last_error(r))
|
||||
}
|
||||
|
||||
// Read all records at once
|
||||
read_csv_from_string :: proc(filename: string) {
|
||||
r: csv.Reader
|
||||
r.trim_leading_space = true
|
||||
r.reuse_record = true // Without it you have to delete(record)
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
csv_data, ok := os.read_entire_file(filename)
|
||||
if ok {
|
||||
csv.reader_init_with_string(&r, string(csv_data))
|
||||
} else {
|
||||
fmt.printfln("Unable to open file: %v", filename)
|
||||
return
|
||||
}
|
||||
defer delete(csv_data)
|
||||
|
||||
records, err := csv.read_all(&r)
|
||||
if err != nil { /* Do something with CSV parse error */ }
|
||||
|
||||
defer {
|
||||
for rec in records {
|
||||
delete(rec)
|
||||
}
|
||||
delete(records)
|
||||
}
|
||||
|
||||
for r, i in records {
|
||||
for f, j in r {
|
||||
fmt.printfln("Record %v, field %v: %q", i, j, f)
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
package encoding_csv
|
||||
@@ -1,88 +0,0 @@
|
||||
//+build ignore
|
||||
package encoding_csv
|
||||
|
||||
import "core:fmt"
|
||||
import "core:encoding/csv"
|
||||
import "core:os"
|
||||
|
||||
// Requires keeping the entire CSV file in memory at once
|
||||
iterate_csv_from_string :: proc(filename: string) {
|
||||
r: csv.Reader
|
||||
r.trim_leading_space = true
|
||||
r.reuse_record = true // Without it you have to delete(record)
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
if csv_data, ok := os.read_entire_file(filename); ok {
|
||||
csv.reader_init_with_string(&r, string(csv_data))
|
||||
defer delete(csv_data)
|
||||
} else {
|
||||
fmt.printfln("Unable to open file: %v", filename)
|
||||
return
|
||||
}
|
||||
|
||||
for r, i, err in csv.iterator_next(&r) {
|
||||
if err != nil { /* Do something with error */ }
|
||||
for f, j in r {
|
||||
fmt.printfln("Record %v, field %v: %q", i, j, f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reads the CSV as it's processed (with a small buffer)
|
||||
iterate_csv_from_stream :: proc(filename: string) {
|
||||
fmt.printfln("Hellope from %v", filename)
|
||||
r: csv.Reader
|
||||
r.trim_leading_space = true
|
||||
r.reuse_record = true // Without it you have to delete(record)
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
handle, err := os.open(filename)
|
||||
if err != nil {
|
||||
fmt.eprintfln("Error opening file: %v", filename)
|
||||
return
|
||||
}
|
||||
defer os.close(handle)
|
||||
csv.reader_init(&r, os.stream_from_handle(handle))
|
||||
|
||||
for r, i in csv.iterator_next(&r) {
|
||||
for f, j in r {
|
||||
fmt.printfln("Record %v, field %v: %q", i, j, f)
|
||||
}
|
||||
}
|
||||
fmt.printfln("Error: %v", csv.iterator_last_error(r))
|
||||
}
|
||||
|
||||
// Read all records at once
|
||||
read_csv_from_string :: proc(filename: string) {
|
||||
r: csv.Reader
|
||||
r.trim_leading_space = true
|
||||
r.reuse_record = true // Without it you have to delete(record)
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
if csv_data, ok := os.read_entire_file(filename); ok {
|
||||
csv.reader_init_with_string(&r, string(csv_data))
|
||||
defer delete(csv_data)
|
||||
} else {
|
||||
fmt.printfln("Unable to open file: %v", filename)
|
||||
return
|
||||
}
|
||||
|
||||
records, err := csv.read_all(&r)
|
||||
if err != nil { /* Do something with CSV parse error */ }
|
||||
|
||||
defer {
|
||||
for rec in records {
|
||||
delete(rec)
|
||||
}
|
||||
delete(records)
|
||||
}
|
||||
|
||||
for r, i in records {
|
||||
for f, j in r {
|
||||
fmt.printfln("Record %v, field %v: %q", i, j, f)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
// package csv reads and writes comma-separated values (CSV) files.
|
||||
// This package supports the format described in RFC 4180 <https://tools.ietf.org/html/rfc4180.html>
|
||||
// This package supports the format described in [[ RFC 4180; https://tools.ietf.org/html/rfc4180.html ]]
|
||||
package encoding_csv
|
||||
|
||||
import "core:bufio"
|
||||
@@ -484,4 +484,4 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
|
||||
r.fields_per_record = len(dst)
|
||||
}
|
||||
return dst[:], err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,22 +2,23 @@
|
||||
Package endian implements a simple translation between bytes and numbers with
|
||||
specific endian encodings.
|
||||
|
||||
buf: [100]u8
|
||||
put_u16(buf[:], .Little, 16) or_return
|
||||
Example:
|
||||
buf: [100]u8
|
||||
put_u16(buf[:], .Little, 16) or_return
|
||||
|
||||
You may ask yourself, why isn't `byte_order` platform Endianness by default, so we can write:
|
||||
put_u16(buf[:], 16) or_return
|
||||
// You may ask yourself, why isn't `byte_order` platform Endianness by default, so we can write:
|
||||
put_u16(buf[:], 16) or_return
|
||||
|
||||
The answer is that very few file formats are written in native/platform endianness. Most of them specify the endianness of
|
||||
each of their fields, or use a header field which specifies it for the entire file.
|
||||
// The answer is that very few file formats are written in native/platform endianness. Most of them specify the endianness of
|
||||
// each of their fields, or use a header field which specifies it for the entire file.
|
||||
|
||||
e.g. a file which specifies it at the top for all fields could do this:
|
||||
file_order := .Little if buf[0] == 0 else .Big
|
||||
field := get_u16(buf[1:], file_order) or_return
|
||||
// e.g. a file which specifies it at the top for all fields could do this:
|
||||
file_order := .Little if buf[0] == 0 else .Big
|
||||
field := get_u16(buf[1:], file_order) or_return
|
||||
|
||||
If on the other hand a field is *always* Big-Endian, you're wise to explicitly state it for the benefit of the reader,
|
||||
be that your future self or someone else.
|
||||
// If on the other hand a field is *always* Big-Endian, you're wise to explicitly state it for the benefit of the reader,
|
||||
// be that your future self or someone else.
|
||||
|
||||
field := get_u16(buf[:], .Big) or_return
|
||||
field := get_u16(buf[:], .Big) or_return
|
||||
*/
|
||||
package encoding_endian
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
package encoding_unicode_entity
|
||||
/*
|
||||
A unicode entity encoder/decoder
|
||||
|
||||
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
||||
Made available under Odin's BSD-3 license.
|
||||
|
||||
This code has several procedures to map unicode runes to/from different textual encodings.
|
||||
- SGML/XML/HTML entity
|
||||
-- &#<decimal>;
|
||||
-- &#x<hexadecimal>;
|
||||
-- &<entity name>; (If the lookup tables are compiled in).
|
||||
Reference: https://www.w3.org/2003/entities/2007xml/unicode.xml
|
||||
|
||||
- URL encode / decode %hex entity
|
||||
Reference: https://datatracker.ietf.org/doc/html/rfc3986/#section-2.1
|
||||
|
||||
List of contributors:
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
*/
|
||||
|
||||
/*
|
||||
A unicode entity encoder/decoder.
|
||||
|
||||
This code has several procedures to map unicode runes to/from different textual encodings.
|
||||
- SGML/XML/HTML entity
|
||||
- &#<decimal>;
|
||||
- &#x<hexadecimal>;
|
||||
- &<entity name>; (If the lookup tables are compiled in).
|
||||
Reference: [[ https://www.w3.org/2003/entities/2007xml/unicode.xml ]]
|
||||
|
||||
- URL encode / decode %hex entity
|
||||
Reference: [[ https://datatracker.ietf.org/doc/html/rfc3986/#section-2.1 ]]
|
||||
*/
|
||||
package encoding_unicode_entity
|
||||
|
||||
import "core:unicode/utf8"
|
||||
import "core:unicode"
|
||||
import "core:strings"
|
||||
@@ -353,4 +355,4 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X
|
||||
|
||||
}
|
||||
return false, .None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ XML_NAME_TO_RUNE_MAX_LENGTH :: 31
|
||||
Input:
|
||||
entity_name - a string, like "copy" that describes a user-encoded Unicode entity as used in XML.
|
||||
|
||||
Output:
|
||||
Returns:
|
||||
"decoded" - The decoded rune if found by name, or -1 otherwise.
|
||||
"ok" - true if found, false if not.
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package encoding_hex
|
||||
|
||||
import "core:io"
|
||||
import "core:strings"
|
||||
|
||||
encode :: proc(src: []byte, allocator := context.allocator, loc := #caller_location) -> []byte #no_bounds_check {
|
||||
@@ -14,6 +15,12 @@ encode :: proc(src: []byte, allocator := context.allocator, loc := #caller_locat
|
||||
return dst
|
||||
}
|
||||
|
||||
encode_into_writer :: proc(dst: io.Writer, src: []byte) -> io.Error {
|
||||
for v in src {
|
||||
io.write(dst, {HEXTABLE[v>>4], HEXTABLE[v&0x0f]}) or_return
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
decode :: proc(src: []byte, allocator := context.allocator, loc := #caller_location) -> (dst: []byte, ok: bool) #no_bounds_check {
|
||||
if len(src) % 2 == 1 {
|
||||
|
||||
+89
-83
@@ -1,83 +1,89 @@
|
||||
// Implementation of the HxA 3D asset format
|
||||
// HxA is a interchangeable graphics asset format.
|
||||
// Designed by Eskil Steenberg. @quelsolaar / eskil 'at' obsession 'dot' se / www.quelsolaar.com
|
||||
//
|
||||
// Author of this Odin package: Ginger Bill
|
||||
//
|
||||
// Following comment is copied from the original C-implementation
|
||||
// ---------
|
||||
// -Does the world need another Graphics file format?
|
||||
// Unfortunately, Yes. All existing formats are either too large and complicated to be implemented from
|
||||
// scratch, or don't have some basic features needed in modern computer graphics.
|
||||
// -Who is this format for?
|
||||
// For people who want a capable open Graphics format that can be implemented from scratch in
|
||||
// a few hours. It is ideal for graphics researchers, game developers or other people who
|
||||
// wants to build custom graphics pipelines. Given how easy it is to parse and write, it
|
||||
// should be easy to write utilities that process assets to preform tasks like: generating
|
||||
// normals, light-maps, tangent spaces, Error detection, GPU optimization, LOD generation,
|
||||
// and UV mapping.
|
||||
// -Why store images in the format when there are so many good image formats already?
|
||||
// Yes there are, but only for 2D RGB/RGBA images. A lot of computer graphics rendering rely
|
||||
// on 1D, 3D, cube, multilayer, multi channel, floating point bitmap buffers. There almost no
|
||||
// formats for this kind of data. Also 3D files that reference separate image files rely on
|
||||
// file paths, and this often creates issues when the assets are moved. By including the
|
||||
// texture data in the files directly the assets become self contained.
|
||||
// -Why doesn't the format support <insert whatever>?
|
||||
// Because the entire point is to make a format that can be implemented. Features like NURBSs,
|
||||
// Construction history, or BSP trees would make the format too large to serve its purpose.
|
||||
// The facilities of the formats to store meta data should make the format flexible enough
|
||||
// for most uses. Adding HxA support should be something anyone can do in a days work.
|
||||
//
|
||||
// Structure:
|
||||
// ----------
|
||||
// HxA is designed to be extremely simple to parse, and is therefore based around conventions. It has
|
||||
// a few basic structures, and depending on how they are used they mean different things. This means
|
||||
// that you can implement a tool that loads the entire file, modifies the parts it cares about and
|
||||
// leaves the rest intact. It is also possible to write a tool that makes all data in the file
|
||||
// editable without the need to understand its use. It is also possible for anyone to use the format
|
||||
// to store data axillary data. Anyone who wants to store data not covered by a convention can submit
|
||||
// a convention to extend the format. There should never be a convention for storing the same data in
|
||||
// two differed ways.
|
||||
// The data is story in a number of nodes that are stored in an array. Each node stores an array of
|
||||
// meta data. Meta data can describe anything you want, and a lot of conventions will use meta data
|
||||
// to store additional information, for things like transforms, lights, shaders and animation.
|
||||
// Data for Vertices, Corners, Faces, and Pixels are stored in named layer stacks. Each stack consists
|
||||
// of a number of named layers. All layers in the stack have the same number of elements. Each layer
|
||||
// describes one property of the primitive. Each layer can have multiple channels and each layer can
|
||||
// store data of a different type.
|
||||
//
|
||||
// HaX stores 3 kinds of nodes
|
||||
// - Pixel data.
|
||||
// - Polygon geometry data.
|
||||
// - Meta data only.
|
||||
//
|
||||
// Pixel Nodes stores pixels in a layer stack. A layer may store things like Albedo, Roughness,
|
||||
// Reflectance, Light maps, Masks, Normal maps, and Displacement. Layers use the channels of the
|
||||
// layers to store things like color. The length of the layer stack is determined by the type and
|
||||
// dimensions stored in the
|
||||
//
|
||||
// Geometry data is stored in 3 separate layer stacks for: vertex data, corner data and face data. The
|
||||
// vertex data stores things like verities, blend shapes, weight maps, and vertex colors. The first
|
||||
// layer in a vertex stack has to be a 3 channel layer named "position" describing the base position
|
||||
// of the vertices. The corner stack describes data per corner or edge of the polygons. It can be used
|
||||
// for things like UV, normals, and adjacency. The first layer in a corner stack has to be a 1 channel
|
||||
// integer layer named "index" describing the vertices used to form polygons. The last value in each
|
||||
// polygon has a negative - 1 index to indicate the end of the polygon.
|
||||
//
|
||||
// Example:
|
||||
// A quad and a tri with the vertex index:
|
||||
// [0, 1, 2, 3] [1, 4, 2]
|
||||
// is stored:
|
||||
// [0, 1, 2, -4, 1, 4, -3]
|
||||
// The face stack stores values per face. the length of the face stack has to match the number of
|
||||
// negative values in the index layer in the corner stack. The face stack can be used to store things
|
||||
// like material index.
|
||||
//
|
||||
// Storage
|
||||
// -------
|
||||
// All data is stored in little endian byte order with no padding. The layout mirrors the structs
|
||||
// defined below with a few exceptions. All names are stored as a 8-bit unsigned integer indicating
|
||||
// the length of the name followed by that many characters. Termination is not stored in the file.
|
||||
// Text strings stored in meta data are stored the same way as names, but instead of a 8-bit unsigned
|
||||
// integer a 32-bit unsigned integer is used.
|
||||
package encoding_hxa
|
||||
/*
|
||||
Implementation of the HxA 3D asset format
|
||||
HxA is a interchangeable graphics asset format.
|
||||
Designed by Eskil Steenberg. @quelsolaar / eskil 'at' obsession 'dot' se / www.quelsolaar.com
|
||||
|
||||
Author of this Odin package: Ginger Bill
|
||||
|
||||
Following comment is copied from the original C-implementation
|
||||
---------
|
||||
- Does the world need another Graphics file format?
|
||||
Unfortunately, Yes. All existing formats are either too large and complicated to be implemented from
|
||||
scratch, or don't have some basic features needed in modern computer graphics.
|
||||
|
||||
- Who is this format for?
|
||||
For people who want a capable open Graphics format that can be implemented from scratch in
|
||||
a few hours. It is ideal for graphics researchers, game developers or other people who
|
||||
wants to build custom graphics pipelines. Given how easy it is to parse and write, it
|
||||
should be easy to write utilities that process assets to preform tasks like: generating
|
||||
normals, light-maps, tangent spaces, Error detection, GPU optimization, LOD generation,
|
||||
and UV mapping.
|
||||
|
||||
- Why store images in the format when there are so many good image formats already?
|
||||
Yes there are, but only for 2D RGB/RGBA images. A lot of computer graphics rendering rely
|
||||
on 1D, 3D, cube, multilayer, multi channel, floating point bitmap buffers. There almost no
|
||||
formats for this kind of data. Also 3D files that reference separate image files rely on
|
||||
file paths, and this often creates issues when the assets are moved. By including the
|
||||
texture data in the files directly the assets become self contained.
|
||||
|
||||
- Why doesn't the format support <insert whatever>?
|
||||
Because the entire point is to make a format that can be implemented. Features like NURBSs,
|
||||
Construction history, or BSP trees would make the format too large to serve its purpose.
|
||||
The facilities of the formats to store meta data should make the format flexible enough
|
||||
for most uses. Adding HxA support should be something anyone can do in a days work.
|
||||
|
||||
Structure:
|
||||
----------
|
||||
HxA is designed to be extremely simple to parse, and is therefore based around conventions. It has
|
||||
a few basic structures, and depending on how they are used they mean different things. This means
|
||||
that you can implement a tool that loads the entire file, modifies the parts it cares about and
|
||||
leaves the rest intact. It is also possible to write a tool that makes all data in the file
|
||||
editable without the need to understand its use. It is also possible for anyone to use the format
|
||||
to store data axillary data. Anyone who wants to store data not covered by a convention can submit
|
||||
a convention to extend the format. There should never be a convention for storing the same data in
|
||||
two differed ways.
|
||||
|
||||
The data is story in a number of nodes that are stored in an array. Each node stores an array of
|
||||
meta data. Meta data can describe anything you want, and a lot of conventions will use meta data
|
||||
to store additional information, for things like transforms, lights, shaders and animation.
|
||||
Data for Vertices, Corners, Faces, and Pixels are stored in named layer stacks. Each stack consists
|
||||
of a number of named layers. All layers in the stack have the same number of elements. Each layer
|
||||
describes one property of the primitive. Each layer can have multiple channels and each layer can
|
||||
store data of a different type.
|
||||
|
||||
HaX stores 3 kinds of nodes
|
||||
- Pixel data.
|
||||
- Polygon geometry data.
|
||||
- Meta data only.
|
||||
|
||||
Pixel Nodes stores pixels in a layer stack. A layer may store things like Albedo, Roughness,
|
||||
Reflectance, Light maps, Masks, Normal maps, and Displacement. Layers use the channels of the
|
||||
layers to store things like color.
|
||||
The length of the layer stack is determined by the type and dimensions stored in the Geometry data
|
||||
is stored in 3 separate layer stacks for: vertex data, corner data and face data. The
|
||||
vertex data stores things like verities, blend shapes, weight maps, and vertex colors. The first
|
||||
layer in a vertex stack has to be a 3 channel layer named "position" describing the base position
|
||||
of the vertices. The corner stack describes data per corner or edge of the polygons. It can be used
|
||||
for things like UV, normals, and adjacency. The first layer in a corner stack has to be a 1 channel
|
||||
integer layer named "index" describing the vertices used to form polygons. The last value in each
|
||||
polygon has a negative - 1 index to indicate the end of the polygon.
|
||||
|
||||
For Example:
|
||||
A quad and a tri with the vertex index:
|
||||
[0, 1, 2, 3] [1, 4, 2]
|
||||
is stored:
|
||||
[0, 1, 2, -4, 1, 4, -3]
|
||||
|
||||
The face stack stores values per face. the length of the face stack has to match the number of
|
||||
negative values in the index layer in the corner stack. The face stack can be used to store things
|
||||
like material index.
|
||||
|
||||
Storage:
|
||||
-------
|
||||
All data is stored in little endian byte order with no padding. The layout mirrors the structs
|
||||
defined below with a few exceptions. All names are stored as a 8-bit unsigned integer indicating
|
||||
the length of the name followed by that many characters. Termination is not stored in the file.
|
||||
Text strings stored in meta data are stored the same way as names, but instead of a 8-bit unsigned
|
||||
integer a 32-bit unsigned integer is used.
|
||||
*/
|
||||
package encoding_hxa
|
||||
|
||||
@@ -116,7 +116,30 @@ assign_int :: proc(val: any, i: $T) -> bool {
|
||||
case int: dst = int (i)
|
||||
case uint: dst = uint (i)
|
||||
case uintptr: dst = uintptr(i)
|
||||
case: return false
|
||||
case:
|
||||
ti := type_info_of(v.id)
|
||||
if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
|
||||
do_byte_swap := !reflect.bit_set_is_big_endian(v)
|
||||
switch ti.size * 8 {
|
||||
case 0: // no-op.
|
||||
case 8:
|
||||
x := (^u8)(v.data)
|
||||
x^ = u8(i)
|
||||
case 16:
|
||||
x := (^u16)(v.data)
|
||||
x^ = do_byte_swap ? intrinsics.byte_swap(u16(i)) : u16(i)
|
||||
case 32:
|
||||
x := (^u32)(v.data)
|
||||
x^ = do_byte_swap ? intrinsics.byte_swap(u32(i)) : u32(i)
|
||||
case 64:
|
||||
x := (^u64)(v.data)
|
||||
x^ = do_byte_swap ? intrinsics.byte_swap(u64(i)) : u64(i)
|
||||
case:
|
||||
panic("unknown bit_size size")
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -21,8 +21,9 @@ cryptographically-secure, per RFC 9562's suggestion.
|
||||
- Version 6 without either a clock or node argument.
|
||||
- Version 7 in all cases.
|
||||
|
||||
Here's an example of how to set up one:
|
||||
|
||||
Example:
|
||||
package main
|
||||
|
||||
import "core:crypto"
|
||||
import "core:encoding/uuid"
|
||||
|
||||
@@ -40,7 +41,7 @@ Here's an example of how to set up one:
|
||||
|
||||
|
||||
For more information on the specifications, see here:
|
||||
- https://www.rfc-editor.org/rfc/rfc4122.html
|
||||
- https://www.rfc-editor.org/rfc/rfc9562.html
|
||||
- [[ https://www.rfc-editor.org/rfc/rfc4122.html ]]
|
||||
- [[ https://www.rfc-editor.org/rfc/rfc9562.html ]]
|
||||
*/
|
||||
package uuid
|
||||
|
||||
@@ -11,7 +11,7 @@ Write a UUID in the 8-4-4-4-12 format.
|
||||
This procedure performs error checking with every byte written.
|
||||
|
||||
If you can guarantee beforehand that your stream has enough space to hold the
|
||||
UUID (32 bytes), then it is better to use `unsafe_write` instead as that will
|
||||
UUID (36 bytes), then it is better to use `unsafe_write` instead as that will
|
||||
be faster.
|
||||
|
||||
Inputs:
|
||||
@@ -22,7 +22,7 @@ Returns:
|
||||
- error: An `io` error, if one occurred, otherwise `nil`.
|
||||
*/
|
||||
write :: proc(w: io.Writer, id: Identifier) -> (error: io.Error) #no_bounds_check {
|
||||
write_octet :: proc (w: io.Writer, octet: u8) -> io.Error #no_bounds_check {
|
||||
write_octet :: proc(w: io.Writer, octet: u8) -> io.Error #no_bounds_check {
|
||||
high_nibble := octet >> 4
|
||||
low_nibble := octet & 0xF
|
||||
|
||||
@@ -31,15 +31,15 @@ write :: proc(w: io.Writer, id: Identifier) -> (error: io.Error) #no_bounds_chec
|
||||
return nil
|
||||
}
|
||||
|
||||
for index in 0 ..< 4 { write_octet(w, id[index]) or_return }
|
||||
for index in 0 ..< 4 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 4 ..< 6 { write_octet(w, id[index]) or_return }
|
||||
for index in 4 ..< 6 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 6 ..< 8 { write_octet(w, id[index]) or_return }
|
||||
for index in 6 ..< 8 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 8 ..< 10 { write_octet(w, id[index]) or_return }
|
||||
for index in 8 ..< 10 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 10 ..< 16 { write_octet(w, id[index]) or_return }
|
||||
for index in 10 ..< 16 {write_octet(w, id[index]) or_return}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -54,7 +54,7 @@ Inputs:
|
||||
- id: The identifier to convert.
|
||||
*/
|
||||
unsafe_write :: proc(w: io.Writer, id: Identifier) #no_bounds_check {
|
||||
write_octet :: proc (w: io.Writer, octet: u8) #no_bounds_check {
|
||||
write_octet :: proc(w: io.Writer, octet: u8) #no_bounds_check {
|
||||
high_nibble := octet >> 4
|
||||
low_nibble := octet & 0xF
|
||||
|
||||
@@ -62,15 +62,15 @@ unsafe_write :: proc(w: io.Writer, id: Identifier) #no_bounds_check {
|
||||
io.write_byte(w, strconv.digits[low_nibble])
|
||||
}
|
||||
|
||||
for index in 0 ..< 4 { write_octet(w, id[index]) }
|
||||
for index in 0 ..< 4 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 4 ..< 6 { write_octet(w, id[index]) }
|
||||
for index in 4 ..< 6 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 6 ..< 8 { write_octet(w, id[index]) }
|
||||
for index in 6 ..< 8 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 8 ..< 10 { write_octet(w, id[index]) }
|
||||
for index in 8 ..< 10 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 10 ..< 16 { write_octet(w, id[index]) }
|
||||
for index in 10 ..< 16 {write_octet(w, id[index])}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -106,7 +106,7 @@ Convert a UUID to a string in the 8-4-4-4-12 format.
|
||||
|
||||
Inputs:
|
||||
- id: The identifier to convert.
|
||||
- buffer: A byte buffer to store the result. Must be at least 32 bytes large.
|
||||
- buffer: A byte buffer to store the result. Must be at least 36 bytes large.
|
||||
- loc: The caller location for debugging purposes (default: #caller_location)
|
||||
|
||||
Returns:
|
||||
@@ -119,7 +119,11 @@ to_string_buffer :: proc(
|
||||
) -> (
|
||||
str: string,
|
||||
) {
|
||||
assert(len(buffer) >= EXPECTED_LENGTH, "The buffer provided is not at least 32 bytes large.", loc)
|
||||
assert(
|
||||
len(buffer) >= EXPECTED_LENGTH,
|
||||
"The buffer provided is not at least 36 bytes large.",
|
||||
loc,
|
||||
)
|
||||
builder := strings.builder_from_bytes(buffer)
|
||||
unsafe_write(strings.to_writer(&builder), id)
|
||||
return strings.to_string(builder)
|
||||
@@ -129,3 +133,4 @@ to_string :: proc {
|
||||
to_string_allocated,
|
||||
to_string_buffer,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
/*
|
||||
Implementation of the LEB128 variable integer encoding as used by DWARF encoding and DEX files, among others.
|
||||
Implementation of the LEB128 variable integer encoding as used by DWARF encoding and DEX files, among others.
|
||||
|
||||
Author of this Odin package: Jeroen van Rijn
|
||||
Author of this Odin package: Jeroen van Rijn
|
||||
|
||||
Example:
|
||||
package main
|
||||
|
||||
Example:
|
||||
```odin
|
||||
import "core:encoding/varint"
|
||||
import "core:fmt"
|
||||
|
||||
@@ -22,7 +23,5 @@
|
||||
assert(decoded_val == value && decode_size == encode_size && decode_err == .None)
|
||||
fmt.printf("Decoded as %v, using %v byte%v\n", decoded_val, decode_size, "" if decode_size == 1 else "s")
|
||||
}
|
||||
```
|
||||
|
||||
*/
|
||||
package encoding_varint
|
||||
package encoding_varint
|
||||
|
||||
@@ -6,8 +6,6 @@
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
*/
|
||||
|
||||
// package varint implements variable length integer encoding and decoding using
|
||||
// the LEB128 format as used by DWARF debug info, Android .dex and other file formats.
|
||||
package encoding_varint
|
||||
|
||||
// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
|
||||
@@ -160,4 +158,4 @@ encode_ileb128 :: proc(buf: []u8, val: i128) -> (size: int, err: Error) {
|
||||
buf[size - 1] = u8(low)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user