mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-25 23:14:59 -07:00
Merge branch 'master' into core-simd-indices-redadd-redmul
This commit is contained in:
@@ -298,7 +298,7 @@ simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)
|
||||
simd_masked_expand_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
|
||||
simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
|
||||
|
||||
|
||||
simd_indices :: proc($T: typeid/#simd[$N]$E) -> T where type_is_numeric(T) ---
|
||||
|
||||
simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
|
||||
simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
|
||||
|
||||
@@ -257,7 +257,7 @@ reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
|
||||
for b.r+utf8.UTF_MAX > b.w &&
|
||||
!utf8.full_rune(b.buf[b.r:b.w]) &&
|
||||
b.err == nil &&
|
||||
b.w-b.w < len(b.buf) {
|
||||
b.w-b.r < len(b.buf) {
|
||||
_reader_read_new_chunk(b) or_return
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,30 @@ import win32 "core:sys/windows"
|
||||
|
||||
INVALID_HANDLE :: ~uintptr(0)
|
||||
|
||||
S_IWRITE :: 0o200
|
||||
// NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`.
|
||||
// Let's not do so for Windows for `chmod` or `read_directory_iterator` either.
|
||||
// They're *not* portable between Windows and non-Windows platforms.
|
||||
//
|
||||
// It also leads to information loss as flags like Archive, Hidden and System have no equivalent there.
|
||||
// We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend
|
||||
// that 0o644 is meaningful when returned as a mode.
|
||||
// `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444.
|
||||
FILE_ATTRIBUTE_READONLY :: win32.FILE_ATTRIBUTE_READONLY // 0x00000001
|
||||
FILE_ATTRIBUTE_HIDDEN :: win32.FILE_ATTRIBUTE_HIDDEN // 0x00000002
|
||||
FILE_ATTRIBUTE_SYSTEM :: win32.FILE_ATTRIBUTE_SYSTEM // 0x00000004
|
||||
FILE_ATTRIBUTE_DIRECTORY :: win32.FILE_ATTRIBUTE_DIRECTORY // 0x00000010
|
||||
FILE_ATTRIBUTE_ARCHIVE :: win32.FILE_ATTRIBUTE_ARCHIVE // 0x00000020
|
||||
FILE_ATTRIBUTE_DEVICE :: win32.FILE_ATTRIBUTE_DEVICE // 0x00000040
|
||||
FILE_ATTRIBUTE_NORMAL :: win32.FILE_ATTRIBUTE_NORMAL // 0x00000080
|
||||
FILE_ATTRIBUTE_TEMPORARY :: win32.FILE_ATTRIBUTE_TEMPORARY // 0x00000100
|
||||
FILE_ATTRIBUTE_SPARSE_FILE :: win32.FILE_ATTRIBUTE_SPARSE_FILE // 0x00000200
|
||||
FILE_ATTRIBUTE_REPARSE_Point :: win32.FILE_ATTRIBUTE_REPARSE_Point // 0x00000400
|
||||
FILE_ATTRIBUTE_REPARSE_POINT :: win32.FILE_ATTRIBUTE_REPARSE_POINT // 0x00000400
|
||||
FILE_ATTRIBUTE_COMPRESSED :: win32.FILE_ATTRIBUTE_COMPRESSED // 0x00000800
|
||||
FILE_ATTRIBUTE_OFFLINE :: win32.FILE_ATTRIBUTE_OFFLINE // 0x00001000
|
||||
FILE_ATTRIBUTE_NOT_CONTENT_INDEXED :: win32.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED // 0x00002000
|
||||
FILE_ATTRIBUTE_ENCRYPTED :: win32.FILE_ATTRIBUTE_ENCRYPTED // 0x00004000
|
||||
|
||||
_ERROR_BAD_NETPATH :: 53
|
||||
MAX_RW :: 1<<30
|
||||
|
||||
@@ -122,7 +145,7 @@ _open_internal :: proc(name: string, flags: File_Flags, perm: int) -> (handle: u
|
||||
}
|
||||
|
||||
attrs: u32 = win32.FILE_ATTRIBUTE_NORMAL|win32.FILE_FLAG_BACKUP_SEMANTICS
|
||||
if perm & S_IWRITE == 0 {
|
||||
if u32(perm) & FILE_ATTRIBUTE_NORMAL == 0 {
|
||||
attrs = win32.FILE_ATTRIBUTE_READONLY
|
||||
if create_mode == win32.CREATE_ALWAYS {
|
||||
// NOTE(bill): Open has just asked to create a file in read-only mode.
|
||||
@@ -748,20 +771,10 @@ _fchmod :: proc(f: ^File, mode: int) -> Error {
|
||||
if f == nil || f.impl == nil {
|
||||
return nil
|
||||
}
|
||||
d: win32.BY_HANDLE_FILE_INFORMATION
|
||||
if !win32.GetFileInformationByHandle(_handle(f), &d) {
|
||||
return _get_platform_error()
|
||||
}
|
||||
attrs := d.dwFileAttributes
|
||||
if mode & S_IWRITE != 0 {
|
||||
attrs &~= win32.FILE_ATTRIBUTE_READONLY
|
||||
} else {
|
||||
attrs |= win32.FILE_ATTRIBUTE_READONLY
|
||||
}
|
||||
|
||||
info: win32.FILE_BASIC_INFO
|
||||
info.FileAttributes = attrs
|
||||
if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) {
|
||||
info.FileAttributes = win32.DWORD(mode)
|
||||
if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) {
|
||||
return _get_platform_error()
|
||||
}
|
||||
return nil
|
||||
@@ -800,19 +813,11 @@ _chtimes :: proc(name: string, atime, mtime: time.Time) -> Error {
|
||||
defer close(f)
|
||||
return _fchtimes(f, atime, mtime)
|
||||
}
|
||||
|
||||
_fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error {
|
||||
if f == nil || f.impl == nil {
|
||||
return nil
|
||||
}
|
||||
d: win32.BY_HANDLE_FILE_INFORMATION
|
||||
if !win32.GetFileInformationByHandle(_handle(f), &d) {
|
||||
return _get_platform_error()
|
||||
}
|
||||
|
||||
to_windows_time :: #force_inline proc(t: time.Time) -> win32.LARGE_INTEGER {
|
||||
// a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)
|
||||
return win32.LARGE_INTEGER(time.time_to_unix_nano(t) * 100 + 116444736000000000)
|
||||
}
|
||||
|
||||
atime, mtime := atime, mtime
|
||||
if time.time_to_unix_nano(atime) < time.time_to_unix_nano(mtime) {
|
||||
@@ -820,9 +825,9 @@ _fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error {
|
||||
}
|
||||
|
||||
info: win32.FILE_BASIC_INFO
|
||||
info.LastAccessTime = to_windows_time(atime)
|
||||
info.LastWriteTime = to_windows_time(mtime)
|
||||
if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) {
|
||||
info.LastAccessTime = time_as_filetime(atime)
|
||||
info.LastWriteTime = time_as_filetime(mtime)
|
||||
if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) {
|
||||
return _get_platform_error()
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -162,7 +162,7 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
|
||||
}
|
||||
}
|
||||
|
||||
cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args, .Executable_Path} != {} {
|
||||
cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args} != {} {
|
||||
strings.builder_reset(&path_builder)
|
||||
strings.write_string(&path_builder, "/proc/")
|
||||
strings.write_int(&path_builder, pid)
|
||||
@@ -178,12 +178,12 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
|
||||
terminator := strings.index_byte(cmdline, 0)
|
||||
assert(terminator > 0)
|
||||
|
||||
command_line_exec := cmdline[:terminator]
|
||||
// command_line_exec := cmdline[:terminator]
|
||||
|
||||
// Still need cwd if the execution on the command line is relative.
|
||||
cwd: string
|
||||
cwd_err: Error
|
||||
if .Working_Dir in selection || (.Executable_Path in selection && command_line_exec[0] != '/') {
|
||||
if .Working_Dir in selection {
|
||||
strings.builder_reset(&path_builder)
|
||||
strings.write_string(&path_builder, "/proc/")
|
||||
strings.write_int(&path_builder, pid)
|
||||
@@ -199,18 +199,6 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
|
||||
}
|
||||
}
|
||||
|
||||
if .Executable_Path in selection {
|
||||
if cmdline[0] == '/' {
|
||||
info.executable_path = strings.clone(cmdline[:terminator], allocator) or_return
|
||||
info.fields += {.Executable_Path}
|
||||
} else if cwd_err == nil {
|
||||
info.executable_path = join_path({ cwd, cmdline[:terminator] }, allocator) or_return
|
||||
info.fields += {.Executable_Path}
|
||||
} else {
|
||||
break cmdline_if
|
||||
}
|
||||
}
|
||||
|
||||
if selection & {.Command_Line, .Command_Args} != {} {
|
||||
// skip to first arg
|
||||
//cmdline = cmdline[terminator + 1:]
|
||||
@@ -323,6 +311,30 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator
|
||||
}
|
||||
}
|
||||
|
||||
if .Executable_Path in selection {
|
||||
/*
|
||||
NOTE(Jeroen):
|
||||
|
||||
The old version returned the wrong executable path for things like `bash` or `sh`,
|
||||
for whom `/proc/<pid>/cmdline` will just report "bash" or "sh",
|
||||
resulting in misleading paths like `$PWD/sh`, even though that executable doesn't exist there.
|
||||
|
||||
Thanks to Yawning for suggesting `/proc/self/exe`.
|
||||
*/
|
||||
|
||||
strings.builder_reset(&path_builder)
|
||||
strings.write_string(&path_builder, "/proc/")
|
||||
strings.write_int(&path_builder, pid)
|
||||
strings.write_string(&path_builder, "/exe")
|
||||
|
||||
if exe_bytes, exe_err := _read_link(strings.to_string(path_builder), temp_allocator()); exe_err == nil {
|
||||
info.executable_path = strings.clone(string(exe_bytes), allocator) or_return
|
||||
info.fields += {.Executable_Path}
|
||||
} else {
|
||||
err = exe_err
|
||||
}
|
||||
}
|
||||
|
||||
if .Environment in selection {
|
||||
strings.builder_reset(&path_builder)
|
||||
strings.write_string(&path_builder, "/proc/")
|
||||
|
||||
@@ -212,11 +212,15 @@ _file_type_from_create_file :: proc(wname: win32.wstring, create_file_attributes
|
||||
}
|
||||
|
||||
_file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: win32.HANDLE, ReparseTag: win32.DWORD) -> (type: File_Type, mode: int) {
|
||||
if file_attributes & win32.FILE_ATTRIBUTE_READONLY != 0 {
|
||||
mode |= 0o444
|
||||
} else {
|
||||
mode |= 0o666
|
||||
}
|
||||
// NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`.
|
||||
// Let's not do so for Windows for `chmod` or `read_directory_iterator` either.
|
||||
// They're *not* portable between Windows and non-Windows platforms.
|
||||
//
|
||||
// It also leads to information loss as flags like Archive, Hidden and System have no equivalent there.
|
||||
// We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend
|
||||
// that 0o644 is meaningful when returned as a mode.
|
||||
// `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444.
|
||||
mode = int(file_attributes)
|
||||
|
||||
is_sym := false
|
||||
if file_attributes & win32.FILE_ATTRIBUTE_REPARSE_POINT == 0 {
|
||||
@@ -229,21 +233,36 @@ _file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: wi
|
||||
type = .Symlink
|
||||
} else if file_attributes & win32.FILE_ATTRIBUTE_DIRECTORY != 0 {
|
||||
type = .Directory
|
||||
mode |= 0o111
|
||||
} else if h != nil {
|
||||
type = file_type(h)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)
|
||||
time_as_filetime :: #force_inline proc(t: time.Time) -> (ft: win32.LARGE_INTEGER) {
|
||||
win := u64(t._nsec / 100) + 116444736000000000
|
||||
return win32.LARGE_INTEGER(win)
|
||||
}
|
||||
|
||||
filetime_as_time_li :: #force_inline proc(ft: win32.LARGE_INTEGER) -> (t: time.Time) {
|
||||
return {_nsec=(i64(ft) - 116444736000000000) * 100}
|
||||
}
|
||||
|
||||
filetime_as_time_ft :: #force_inline proc(ft: win32.FILETIME) -> (t: time.Time) {
|
||||
return filetime_as_time_li(win32.LARGE_INTEGER(ft.dwLowDateTime) + win32.LARGE_INTEGER(ft.dwHighDateTime) << 32)
|
||||
}
|
||||
|
||||
filetime_as_time :: proc{filetime_as_time_ft, filetime_as_time_li}
|
||||
|
||||
_file_info_from_win32_file_attribute_data :: proc(d: ^win32.WIN32_FILE_ATTRIBUTE_DATA, name: string, allocator: runtime.Allocator) -> (fi: File_Info, e: Error) {
|
||||
fi.size = i64(d.nFileSizeHigh)<<32 + i64(d.nFileSizeLow)
|
||||
type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0)
|
||||
fi.type = type
|
||||
fi.mode |= mode
|
||||
fi.creation_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
|
||||
fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
|
||||
fi.access_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
|
||||
fi.creation_time = filetime_as_time(d.ftCreationTime)
|
||||
fi.modification_time = filetime_as_time(d.ftLastWriteTime)
|
||||
fi.access_time = filetime_as_time(d.ftLastAccessTime)
|
||||
fi.fullpath, e = full_path_from_name(name, allocator)
|
||||
fi.name = basename(fi.fullpath)
|
||||
return
|
||||
@@ -254,9 +273,9 @@ _file_info_from_win32_find_data :: proc(d: ^win32.WIN32_FIND_DATAW, name: string
|
||||
type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0)
|
||||
fi.type = type
|
||||
fi.mode |= mode
|
||||
fi.creation_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
|
||||
fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
|
||||
fi.access_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
|
||||
fi.creation_time = filetime_as_time(d.ftCreationTime)
|
||||
fi.modification_time = filetime_as_time(d.ftLastWriteTime)
|
||||
fi.access_time = filetime_as_time(d.ftLastAccessTime)
|
||||
fi.fullpath, e = full_path_from_name(name, allocator)
|
||||
fi.name = basename(fi.fullpath)
|
||||
return
|
||||
@@ -286,9 +305,9 @@ _file_info_from_get_file_information_by_handle :: proc(path: string, h: win32.HA
|
||||
type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, h, 0)
|
||||
fi.type = type
|
||||
fi.mode |= mode
|
||||
fi.creation_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime))
|
||||
fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime))
|
||||
fi.access_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime))
|
||||
fi.creation_time = filetime_as_time(d.ftCreationTime)
|
||||
fi.modification_time = filetime_as_time(d.ftLastWriteTime)
|
||||
fi.access_time = filetime_as_time(d.ftLastAccessTime)
|
||||
return fi, nil
|
||||
}
|
||||
|
||||
|
||||
+3
-447
@@ -1759,7 +1759,7 @@ Returns:
|
||||
replace :: intrinsics.simd_replace
|
||||
|
||||
/*
|
||||
Reduce a vector to a scalar by adding up all the lanes in an ordered fashion.
|
||||
Reduce a vector to a scalar by adding up all the lanes.
|
||||
|
||||
This procedure returns a scalar that is the ordered sum of all lanes. The
|
||||
ordered sum may be important for accounting for precision errors in
|
||||
@@ -2511,460 +2511,16 @@ recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where int
|
||||
return T(1) / v
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Create a vector where each lane contains the index of that lane.
|
||||
|
||||
Inputs:
|
||||
- `V`: The type of the vector to create.
|
||||
|
||||
Result:
|
||||
- A vector of the given type, where each lane contains the index of that lane.
|
||||
|
||||
**Operation**:
|
||||
|
||||
for i in 0 ..< N {
|
||||
res[i] = i
|
||||
}
|
||||
*/
|
||||
indices :: #force_inline proc "contextless" ($V: typeid/#simd[$N]$E) -> V where intrinsics.type_is_numeric(E) {
|
||||
when N == 1 {
|
||||
return {0}
|
||||
} else when N == 2 {
|
||||
return {0, 1}
|
||||
} else when N == 4 {
|
||||
return {0, 1, 2, 3}
|
||||
} else when N == 8 {
|
||||
return {0, 1, 2, 3, 4, 5, 6, 7}
|
||||
} else when N == 16 {
|
||||
return {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
} else when N == 32 {
|
||||
return {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
}
|
||||
} else when N == 64 {
|
||||
return {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
}
|
||||
} else {
|
||||
#panic("Unsupported vector size!")
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Reduce a vector to a scalar by adding up all the lanes in a pairwise fashion.
|
||||
|
||||
This procedure returns a scalar that is the sum of all lanes, calculated by
|
||||
adding each even-indexed element with the following odd-indexed element to
|
||||
produce N/2 values. This is repeated until only a single element remains. This
|
||||
order is supported by hardware instructions for some types/architectures (e.g.
|
||||
i16/i32/f32/f64 on x86 SSE, i8/i16/i32/f32 on ARM NEON).
|
||||
|
||||
The order of the sum may be important for accounting for precision errors in
|
||||
floating-point computation, as floating-point addition is not associative, that
|
||||
is `(a+b)+c` may not be equal to `a+(b+c)`.
|
||||
|
||||
Inputs:
|
||||
- `v`: The vector to reduce.
|
||||
|
||||
Result:
|
||||
- Sum of all lanes, as a scalar.
|
||||
|
||||
**Operation**:
|
||||
|
||||
for n > 1 {
|
||||
n = n / 2
|
||||
for i in 0 ..< n {
|
||||
a[i] = a[2*i+0] + a[2*i+1]
|
||||
}
|
||||
}
|
||||
res := a[0]
|
||||
|
||||
Graphical representation of the operation for N=4:
|
||||
|
||||
+-----------------------+
|
||||
v: | v0 | v1 | v2 | v3 |
|
||||
+-----------------------+
|
||||
| | | |
|
||||
`>[+]<' `>[+]<'
|
||||
| |
|
||||
`--->[+]<--'
|
||||
|
|
||||
v
|
||||
+-----+
|
||||
result: | y0 |
|
||||
+-----+
|
||||
*/
|
||||
reduce_add_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
|
||||
where intrinsics.type_is_numeric(E) {
|
||||
when N == 64 { v64 := v }
|
||||
when N == 32 { v32 := v }
|
||||
when N == 16 { v16 := v }
|
||||
when N == 8 { v8 := v }
|
||||
when N == 4 { v4 := v }
|
||||
when N == 2 { v2 := v }
|
||||
|
||||
when N >= 64 {
|
||||
x32 := swizzle(v64,
|
||||
0, 2, 4, 6, 8, 10, 12, 14,
|
||||
16, 18, 20, 22, 24, 26, 28, 30,
|
||||
32, 34, 36, 38, 40, 42, 44, 46,
|
||||
48, 50, 52, 54, 56, 58, 60, 62)
|
||||
y32 := swizzle(v64,
|
||||
1, 3, 5, 7, 9, 11, 13, 15,
|
||||
17, 19, 21, 23, 25, 27, 29, 31,
|
||||
33, 35, 37, 39, 41, 43, 45, 47,
|
||||
49, 51, 53, 55, 57, 59, 61, 63)
|
||||
v32 := x32 + y32
|
||||
}
|
||||
|
||||
when N >= 32 {
|
||||
x16 := swizzle(v32,
|
||||
0, 2, 4, 6, 8, 10, 12, 14,
|
||||
16, 18, 20, 22, 24, 26, 28, 30)
|
||||
y16 := swizzle(v32,
|
||||
1, 3, 5, 7, 9, 11, 13, 15,
|
||||
17, 19, 21, 23, 25, 27, 29, 31)
|
||||
v16 := x16 + y16
|
||||
}
|
||||
|
||||
when N >= 16 {
|
||||
x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14)
|
||||
y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15)
|
||||
v8 := x8 + y8
|
||||
}
|
||||
|
||||
when N >= 8 {
|
||||
x4 := swizzle(v8, 0, 2, 4, 6)
|
||||
y4 := swizzle(v8, 1, 3, 5, 7)
|
||||
v4 := x4 + y4
|
||||
}
|
||||
|
||||
when N >= 4 {
|
||||
x2 := swizzle(v4, 0, 2)
|
||||
y2 := swizzle(v4, 1, 3)
|
||||
v2 := x2 + y2
|
||||
}
|
||||
|
||||
when N >= 2 {
|
||||
return extract(v2, 0) + extract(v2, 1)
|
||||
} else {
|
||||
return extract(v, 0)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Reduce a vector to a scalar by adding up all the lanes in a bisecting fashion.
|
||||
|
||||
This procedure returns a scalar that is the sum of all lanes, calculated by
|
||||
bisecting the vector into two parts, where the first contains lanes [0, N/2)
|
||||
and the second contains lanes [N/2, N), and adding the two halves element-wise
|
||||
to produce N/2 values. This is repeated until only a single element remains.
|
||||
This order may be faster to compute than the ordered sum for floats, as it can
|
||||
often be better parallelized.
|
||||
|
||||
The order of the sum may be important for accounting for precision errors in
|
||||
floating-point computation, as floating-point addition is not associative, that
|
||||
is `(a+b)+c` may not be equal to `a+(b+c)`.
|
||||
|
||||
Inputs:
|
||||
- `v`: The vector to reduce.
|
||||
|
||||
Result:
|
||||
- Sum of all lanes, as a scalar.
|
||||
|
||||
**Operation**:
|
||||
|
||||
for n > 1 {
|
||||
n = n / 2
|
||||
for i in 0 ..< n {
|
||||
a[i] += a[i+n]
|
||||
}
|
||||
}
|
||||
res := a[0]
|
||||
|
||||
Graphical representation of the operation for N=4:
|
||||
|
||||
+-----------------------+
|
||||
| v0 | v1 | v2 | v3 |
|
||||
+-----------------------+
|
||||
| | | |
|
||||
[+]<-- | ---' |
|
||||
| [+]<--------'
|
||||
| |
|
||||
`>[+]<'
|
||||
|
|
||||
v
|
||||
+-----+
|
||||
result: | y0 |
|
||||
+-----+
|
||||
*/
|
||||
reduce_add_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
|
||||
where intrinsics.type_is_numeric(E) {
|
||||
when N == 64 { v64 := v }
|
||||
when N == 32 { v32 := v }
|
||||
when N == 16 { v16 := v }
|
||||
when N == 8 { v8 := v }
|
||||
when N == 4 { v4 := v }
|
||||
when N == 2 { v2 := v }
|
||||
|
||||
when N >= 64 {
|
||||
x32 := swizzle(v64,
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31)
|
||||
y32 := swizzle(v64,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63)
|
||||
v32 := x32 + y32
|
||||
}
|
||||
|
||||
when N >= 32 {
|
||||
x16 := swizzle(v32,
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15)
|
||||
y16 := swizzle(v32,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31)
|
||||
v16 := x16 + y16
|
||||
}
|
||||
|
||||
when N >= 16 {
|
||||
x8 := swizzle(v16, 0, 1, 2, 3, 4, 5, 6, 7)
|
||||
y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15)
|
||||
v8 := x8 + y8
|
||||
}
|
||||
|
||||
when N >= 8 {
|
||||
x4 := swizzle(v8, 0, 1, 2, 3)
|
||||
y4 := swizzle(v8, 4, 5, 6, 7)
|
||||
v4 := x4 + y4
|
||||
}
|
||||
|
||||
when N >= 4 {
|
||||
x2 := swizzle(v4, 0, 1)
|
||||
y2 := swizzle(v4, 2, 3)
|
||||
v2 := x2 + y2
|
||||
}
|
||||
|
||||
when N >= 2 {
|
||||
return extract(v2, 0) + extract(v2, 1)
|
||||
} else {
|
||||
return extract(v, 0)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Reduce a vector to a scalar by multiplying all the lanes in a pairwise fashion.
|
||||
|
||||
This procedure returns a scalar that is the product of all lanes, calculated by
|
||||
bisecting the vector into two parts, where the first contains lanes [0, N/2)
|
||||
and the second contains lanes [N/2, N), and multiplying the two halves together
|
||||
multiplying each even-indexed element with the following odd-indexed element to
|
||||
produce N/2 values. This is repeated until only a single element remains. This
|
||||
order may be faster to compute than the ordered product for floats, as it can
|
||||
often be better parallelized.
|
||||
|
||||
The order of the product may be important for accounting for precision errors
|
||||
in floating-point computation, as floating-point multiplication is not
|
||||
associative, that is `(a*b)*c` may not be equal to `a*(b*c)`.
|
||||
|
||||
Inputs:
|
||||
- `v`: The vector to reduce.
|
||||
|
||||
Result:
|
||||
- Product of all lanes, as a scalar.
|
||||
|
||||
**Operation**:
|
||||
|
||||
for n > 1 {
|
||||
n = n / 2
|
||||
for i in 0 ..< n {
|
||||
a[i] = a[2*i+0] * a[2*i+1]
|
||||
}
|
||||
}
|
||||
res := a[0]
|
||||
|
||||
Graphical representation of the operation for N=4:
|
||||
|
||||
+-----------------------+
|
||||
v: | v0 | v1 | v2 | v3 |
|
||||
+-----------------------+
|
||||
| | | |
|
||||
`>[x]<' `>[x]<'
|
||||
| |
|
||||
`--->[x]<--'
|
||||
|
|
||||
v
|
||||
+-----+
|
||||
result: | y0 |
|
||||
+-----+
|
||||
*/
|
||||
reduce_mul_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
|
||||
where intrinsics.type_is_numeric(E) {
|
||||
when N == 64 { v64 := v }
|
||||
when N == 32 { v32 := v }
|
||||
when N == 16 { v16 := v }
|
||||
when N == 8 { v8 := v }
|
||||
when N == 4 { v4 := v }
|
||||
when N == 2 { v2 := v }
|
||||
|
||||
when N >= 64 {
|
||||
x32 := swizzle(v64,
|
||||
0, 2, 4, 6, 8, 10, 12, 14,
|
||||
16, 18, 20, 22, 24, 26, 28, 30,
|
||||
32, 34, 36, 38, 40, 42, 44, 46,
|
||||
48, 50, 52, 54, 56, 58, 60, 62)
|
||||
y32 := swizzle(v64,
|
||||
1, 3, 5, 7, 9, 11, 13, 15,
|
||||
17, 19, 21, 23, 25, 27, 29, 31,
|
||||
33, 35, 37, 39, 41, 43, 45, 47,
|
||||
49, 51, 53, 55, 57, 59, 61, 63)
|
||||
v32 := x32 * y32
|
||||
}
|
||||
|
||||
when N >= 32 {
|
||||
x16 := swizzle(v32,
|
||||
0, 2, 4, 6, 8, 10, 12, 14,
|
||||
16, 18, 20, 22, 24, 26, 28, 30)
|
||||
y16 := swizzle(v32,
|
||||
1, 3, 5, 7, 9, 11, 13, 15,
|
||||
17, 19, 21, 23, 25, 27, 29, 31)
|
||||
v16 := x16 * y16
|
||||
}
|
||||
|
||||
when N >= 16 {
|
||||
x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14)
|
||||
y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15)
|
||||
v8 := x8 * y8
|
||||
}
|
||||
|
||||
when N >= 8 {
|
||||
x4 := swizzle(v8, 0, 2, 4, 6)
|
||||
y4 := swizzle(v8, 1, 3, 5, 7)
|
||||
v4 := x4 * y4
|
||||
}
|
||||
|
||||
when N >= 4 {
|
||||
x2 := swizzle(v4, 0, 2)
|
||||
y2 := swizzle(v4, 1, 3)
|
||||
v2 := x2 * y2
|
||||
}
|
||||
|
||||
when N >= 2 {
|
||||
return extract(v2, 0) * extract(v2, 1)
|
||||
} else {
|
||||
return extract(v, 0)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Reduce a vector to a scalar by multiplying up all the lanes in a bisecting fashion.
|
||||
|
||||
This procedure returns a scalar that is the product of all lanes, calculated by
|
||||
bisecting the vector into two parts, where the first contains indices [0, N/2)
|
||||
and the second contains indices [N/2, N), and multiplying the two halves
|
||||
together element-wise to produce N/2 values. This is repeated until only a
|
||||
single element remains. This order may be faster to compute than the ordered
|
||||
product for floats, as it can often be better parallelized.
|
||||
|
||||
The order of the product may be important for accounting for precision errors
|
||||
in floating-point computation, as floating-point multiplication is not
|
||||
associative, that is `(a*b)*c` may not be equal to `a*(b*c)`.
|
||||
|
||||
Inputs:
|
||||
- `v`: The vector to reduce.
|
||||
|
||||
Result:
|
||||
- Product of all lanes, as a scalar.
|
||||
|
||||
**Operation**:
|
||||
|
||||
for n > 1 {
|
||||
n = n / 2
|
||||
for i in 0 ..< n {
|
||||
a[i] *= a[i+n]
|
||||
}
|
||||
}
|
||||
res := a[0]
|
||||
|
||||
Graphical representation of the operation for N=4:
|
||||
|
||||
+-----------------------+
|
||||
| v0 | v1 | v2 | v3 |
|
||||
+-----------------------+
|
||||
| | | |
|
||||
[x]<-- | ---' |
|
||||
| [x]<--------'
|
||||
| |
|
||||
`>[x]<'
|
||||
|
|
||||
v
|
||||
+-----+
|
||||
result: | y0 |
|
||||
+-----+
|
||||
*/
|
||||
reduce_mul_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E
|
||||
where intrinsics.type_is_numeric(E) {
|
||||
when N == 64 { v64 := v }
|
||||
when N == 32 { v32 := v }
|
||||
when N == 16 { v16 := v }
|
||||
when N == 8 { v8 := v }
|
||||
when N == 4 { v4 := v }
|
||||
when N == 2 { v2 := v }
|
||||
|
||||
when N >= 64 {
|
||||
x32 := swizzle(v64,
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31)
|
||||
y32 := swizzle(v64,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63)
|
||||
v32 := x32 * y32
|
||||
}
|
||||
|
||||
when N >= 32 {
|
||||
x16 := swizzle(v32,
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15)
|
||||
y16 := swizzle(v32,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31)
|
||||
v16 := x16 * y16
|
||||
}
|
||||
|
||||
when N >= 16 {
|
||||
x8 := swizzle(v16, 0, 1, 2, 3, 4, 5, 6, 7)
|
||||
y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15)
|
||||
v8 := x8 * y8
|
||||
}
|
||||
|
||||
when N >= 8 {
|
||||
x4 := swizzle(v8, 0, 1, 2, 3)
|
||||
y4 := swizzle(v8, 4, 5, 6, 7)
|
||||
v4 := x4 * y4
|
||||
}
|
||||
|
||||
when N >= 4 {
|
||||
x2 := swizzle(v4, 0, 1)
|
||||
y2 := swizzle(v4, 2, 3)
|
||||
v2 := x2 * y2
|
||||
}
|
||||
|
||||
when N >= 2 {
|
||||
return extract(v2, 0) * extract(v2, 1)
|
||||
} else {
|
||||
return extract(v, 0)
|
||||
}
|
||||
}
|
||||
|
||||
indices :: intrinsics.simd_indices
|
||||
@@ -47,6 +47,8 @@ foreign user32 {
|
||||
lpParam: LPVOID,
|
||||
) -> HWND ---
|
||||
|
||||
GetWindowThreadProcessId :: proc(hwnd: HWND, lpdwProcessId: LPDWORD) -> DWORD ---
|
||||
|
||||
DestroyWindow :: proc(hWnd: HWND) -> BOOL ---
|
||||
|
||||
ShowWindow :: proc(hWnd: HWND, nCmdShow: INT) -> BOOL ---
|
||||
|
||||
+35
-3
@@ -760,6 +760,36 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan
|
||||
return true;
|
||||
}
|
||||
|
||||
case BuiltinProc_simd_indices:
|
||||
{
|
||||
Operand x = {};
|
||||
check_expr_or_type(c, &x, ce->args[0], nullptr);
|
||||
if (x.mode == Addressing_Invalid) return false;
|
||||
if (x.mode != Addressing_Type) {
|
||||
gbString s = expr_to_string(x.expr);
|
||||
error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s);
|
||||
gb_string_free(s);
|
||||
return false;
|
||||
}
|
||||
if (!is_type_simd_vector(x.type)) {
|
||||
gbString s = type_to_string(x.type);
|
||||
error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s);
|
||||
gb_string_free(s);
|
||||
return false;
|
||||
}
|
||||
|
||||
Type *elem = base_array_type(x.type);
|
||||
if (!is_type_numeric(elem)) {
|
||||
gbString s = type_to_string(x.type);
|
||||
error(x.expr, "'%.*s' expected a simd vector type with a numeric element type, got '%s'", LIT(builtin_name), s);
|
||||
gb_string_free(s);
|
||||
}
|
||||
|
||||
operand->mode = Addressing_Value;
|
||||
operand->type = x.type;
|
||||
return true;
|
||||
}
|
||||
|
||||
case BuiltinProc_simd_extract:
|
||||
{
|
||||
Operand x = {};
|
||||
@@ -2059,6 +2089,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
|
||||
case BuiltinProc_atomic_type_is_lock_free:
|
||||
case BuiltinProc_has_target_feature:
|
||||
case BuiltinProc_procedure_of:
|
||||
case BuiltinProc_simd_indices:
|
||||
// NOTE(bill): The first arg may be a Type, this will be checked case by case
|
||||
break;
|
||||
|
||||
@@ -6001,12 +6032,13 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As
|
||||
|
||||
// NOTE(jakubtomsu): forces calculation of variant_block_size
|
||||
type_size_of(u);
|
||||
i64 tag_offset = u->Union.variant_block_size;
|
||||
GB_ASSERT(tag_offset > 0);
|
||||
// NOTE(Jeroen): A tag offset of zero is perfectly fine if all members of the union are empty structs.
|
||||
// What matters is that the tag size is > 0.
|
||||
GB_ASSERT(u->Union.tag_size > 0);
|
||||
|
||||
operand->mode = Addressing_Constant;
|
||||
operand->type = t_untyped_integer;
|
||||
operand->value = exact_value_i64(tag_offset);
|
||||
operand->value = exact_value_i64(u->Union.variant_block_size);
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
+16
-5
@@ -2910,9 +2910,20 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
|
||||
if (!defined) {
|
||||
gbString xs = type_to_string(x->type, temporary_allocator());
|
||||
gbString ys = type_to_string(y->type, temporary_allocator());
|
||||
err_str = gb_string_make(temporary_allocator(),
|
||||
gb_bprintf("operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys)
|
||||
);
|
||||
|
||||
if (!is_type_comparable(x->type)) {
|
||||
err_str = gb_string_make(temporary_allocator(),
|
||||
gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", xs, LIT(token_strings[op]))
|
||||
);
|
||||
} else if (!is_type_comparable(y->type)) {
|
||||
err_str = gb_string_make(temporary_allocator(),
|
||||
gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", ys, LIT(token_strings[op]))
|
||||
);
|
||||
} else {
|
||||
err_str = gb_string_make(temporary_allocator(),
|
||||
gb_bprintf("Operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys)
|
||||
);
|
||||
}
|
||||
} else {
|
||||
Type *comparison_type = x->type;
|
||||
if (x->type == err_type && is_operand_nil(*x)) {
|
||||
@@ -2933,11 +2944,11 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper
|
||||
} else {
|
||||
yt = type_to_string(y->type);
|
||||
}
|
||||
err_str = gb_string_make(temporary_allocator(), gb_bprintf("mismatched types '%s' and '%s'", xt, yt));
|
||||
err_str = gb_string_make(temporary_allocator(), gb_bprintf("Mismatched types '%s' and '%s'", xt, yt));
|
||||
}
|
||||
|
||||
if (err_str != nullptr) {
|
||||
error(node, "Cannot compare expression, %s", err_str);
|
||||
error(node, "Cannot compare expression. %s.", err_str);
|
||||
x->type = t_untyped_bool;
|
||||
} else {
|
||||
if (x->mode == Addressing_Constant &&
|
||||
|
||||
@@ -205,6 +205,9 @@ BuiltinProc__simd_begin,
|
||||
BuiltinProc_simd_masked_expand_load,
|
||||
BuiltinProc_simd_masked_compress_store,
|
||||
|
||||
BuiltinProc_simd_indices,
|
||||
|
||||
|
||||
// Platform specific SIMD intrinsics
|
||||
BuiltinProc_simd_x86__MM_SHUFFLE,
|
||||
BuiltinProc__simd_end,
|
||||
@@ -551,6 +554,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = {
|
||||
{STR_LIT("simd_masked_expand_load"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics},
|
||||
{STR_LIT("simd_masked_compress_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics},
|
||||
|
||||
{STR_LIT("simd_indices"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics},
|
||||
|
||||
{STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics},
|
||||
|
||||
{STR_LIT(""), 0, false, Expr_Stmt, BuiltinProcPkg_intrinsics},
|
||||
|
||||
@@ -1293,6 +1293,23 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn
|
||||
lbValue res = {};
|
||||
res.type = tv.type;
|
||||
|
||||
switch (builtin_id) {
|
||||
case BuiltinProc_simd_indices: {
|
||||
Type *type = base_type(res.type);
|
||||
GB_ASSERT(type->kind == Type_SimdVector);
|
||||
Type *elem = type->SimdVector.elem;
|
||||
|
||||
i64 count = type->SimdVector.count;
|
||||
LLVMValueRef *scalars = gb_alloc_array(temporary_allocator(), LLVMValueRef, count);
|
||||
for (i64 i = 0; i < count; i++) {
|
||||
scalars[i] = lb_const_value(m, elem, exact_value_i64(i)).value;
|
||||
}
|
||||
|
||||
res.value = LLVMConstVector(scalars, cast(unsigned)count);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
lbValue arg0 = {}; if (ce->args.count > 0) arg0 = lb_build_expr(p, ce->args[0]);
|
||||
lbValue arg1 = {}; if (ce->args.count > 1) arg1 = lb_build_expr(p, ce->args[1]);
|
||||
lbValue arg2 = {}; if (ce->args.count > 2) arg2 = lb_build_expr(p, ce->args[2]);
|
||||
|
||||
+2
-2
@@ -4108,10 +4108,10 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
|
||||
}
|
||||
|
||||
i64 max = 0;
|
||||
i64 field_size = 0;
|
||||
|
||||
for_array(i, t->Union.variants) {
|
||||
Type *variant_type = t->Union.variants[i];
|
||||
|
||||
i64 size = type_size_of_internal(variant_type, path);
|
||||
if (max < size) {
|
||||
max = size;
|
||||
@@ -4130,7 +4130,7 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) {
|
||||
size = align_formula(max, tag_size);
|
||||
// NOTE(bill): Calculate the padding between the common fields and the tag
|
||||
t->Union.tag_size = cast(i16)tag_size;
|
||||
t->Union.variant_block_size = size - field_size;
|
||||
t->Union.variant_block_size = size;
|
||||
|
||||
size += tag_size;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user