diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index bec452007..c275dedaf 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -298,7 +298,7 @@ simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) simd_masked_expand_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- - +simd_indices :: proc($T: typeid/#simd[$N]$E) -> T where type_is_numeric(T) --- simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T --- simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T --- diff --git a/core/bufio/reader.odin b/core/bufio/reader.odin index a875c732d..b78cac6e1 100644 --- a/core/bufio/reader.odin +++ b/core/bufio/reader.odin @@ -257,7 +257,7 @@ reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) { for b.r+utf8.UTF_MAX > b.w && !utf8.full_rune(b.buf[b.r:b.w]) && b.err == nil && - b.w-b.w < len(b.buf) { + b.w-b.r < len(b.buf) { _reader_read_new_chunk(b) or_return } diff --git a/core/os/os2/file_windows.odin b/core/os/os2/file_windows.odin index 6caf84a64..94e51a14c 100644 --- a/core/os/os2/file_windows.odin +++ b/core/os/os2/file_windows.odin @@ -12,7 +12,30 @@ import win32 "core:sys/windows" INVALID_HANDLE :: ~uintptr(0) -S_IWRITE :: 0o200 +// NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`. +// Let's not do so for Windows for `chmod` or `read_directory_iterator` either. +// They're *not* portable between Windows and non-Windows platforms. +// +// It also leads to information loss as flags like Archive, Hidden and System have no equivalent there. +// We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend +// that 0o644 is meaningful when returned as a mode. +// `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444. +FILE_ATTRIBUTE_READONLY :: win32.FILE_ATTRIBUTE_READONLY // 0x00000001 +FILE_ATTRIBUTE_HIDDEN :: win32.FILE_ATTRIBUTE_HIDDEN // 0x00000002 +FILE_ATTRIBUTE_SYSTEM :: win32.FILE_ATTRIBUTE_SYSTEM // 0x00000004 +FILE_ATTRIBUTE_DIRECTORY :: win32.FILE_ATTRIBUTE_DIRECTORY // 0x00000010 +FILE_ATTRIBUTE_ARCHIVE :: win32.FILE_ATTRIBUTE_ARCHIVE // 0x00000020 +FILE_ATTRIBUTE_DEVICE :: win32.FILE_ATTRIBUTE_DEVICE // 0x00000040 +FILE_ATTRIBUTE_NORMAL :: win32.FILE_ATTRIBUTE_NORMAL // 0x00000080 +FILE_ATTRIBUTE_TEMPORARY :: win32.FILE_ATTRIBUTE_TEMPORARY // 0x00000100 +FILE_ATTRIBUTE_SPARSE_FILE :: win32.FILE_ATTRIBUTE_SPARSE_FILE // 0x00000200 +FILE_ATTRIBUTE_REPARSE_Point :: win32.FILE_ATTRIBUTE_REPARSE_Point // 0x00000400 +FILE_ATTRIBUTE_REPARSE_POINT :: win32.FILE_ATTRIBUTE_REPARSE_POINT // 0x00000400 +FILE_ATTRIBUTE_COMPRESSED :: win32.FILE_ATTRIBUTE_COMPRESSED // 0x00000800 +FILE_ATTRIBUTE_OFFLINE :: win32.FILE_ATTRIBUTE_OFFLINE // 0x00001000 +FILE_ATTRIBUTE_NOT_CONTENT_INDEXED :: win32.FILE_ATTRIBUTE_NOT_CONTENT_INDEXED // 0x00002000 +FILE_ATTRIBUTE_ENCRYPTED :: win32.FILE_ATTRIBUTE_ENCRYPTED // 0x00004000 + _ERROR_BAD_NETPATH :: 53 MAX_RW :: 1<<30 @@ -122,7 +145,7 @@ _open_internal :: proc(name: string, flags: File_Flags, perm: int) -> (handle: u } attrs: u32 = win32.FILE_ATTRIBUTE_NORMAL|win32.FILE_FLAG_BACKUP_SEMANTICS - if perm & S_IWRITE == 0 { + if u32(perm) & FILE_ATTRIBUTE_NORMAL == 0 { attrs = win32.FILE_ATTRIBUTE_READONLY if create_mode == win32.CREATE_ALWAYS { // NOTE(bill): Open has just asked to create a file in read-only mode. @@ -748,20 +771,10 @@ _fchmod :: proc(f: ^File, mode: int) -> Error { if f == nil || f.impl == nil { return nil } - d: win32.BY_HANDLE_FILE_INFORMATION - if !win32.GetFileInformationByHandle(_handle(f), &d) { - return _get_platform_error() - } - attrs := d.dwFileAttributes - if mode & S_IWRITE != 0 { - attrs &~= win32.FILE_ATTRIBUTE_READONLY - } else { - attrs |= win32.FILE_ATTRIBUTE_READONLY - } info: win32.FILE_BASIC_INFO - info.FileAttributes = attrs - if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) { + info.FileAttributes = win32.DWORD(mode) + if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) { return _get_platform_error() } return nil @@ -800,19 +813,11 @@ _chtimes :: proc(name: string, atime, mtime: time.Time) -> Error { defer close(f) return _fchtimes(f, atime, mtime) } + _fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error { if f == nil || f.impl == nil { return nil } - d: win32.BY_HANDLE_FILE_INFORMATION - if !win32.GetFileInformationByHandle(_handle(f), &d) { - return _get_platform_error() - } - - to_windows_time :: #force_inline proc(t: time.Time) -> win32.LARGE_INTEGER { - // a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC) - return win32.LARGE_INTEGER(time.time_to_unix_nano(t) * 100 + 116444736000000000) - } atime, mtime := atime, mtime if time.time_to_unix_nano(atime) < time.time_to_unix_nano(mtime) { @@ -820,9 +825,9 @@ _fchtimes :: proc(f: ^File, atime, mtime: time.Time) -> Error { } info: win32.FILE_BASIC_INFO - info.LastAccessTime = to_windows_time(atime) - info.LastWriteTime = to_windows_time(mtime) - if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(d)) { + info.LastAccessTime = time_as_filetime(atime) + info.LastWriteTime = time_as_filetime(mtime) + if !win32.SetFileInformationByHandle(_handle(f), .FileBasicInfo, &info, size_of(info)) { return _get_platform_error() } return nil diff --git a/core/os/os2/process_linux.odin b/core/os/os2/process_linux.odin index 6d654008b..1480e66b5 100644 --- a/core/os/os2/process_linux.odin +++ b/core/os/os2/process_linux.odin @@ -162,7 +162,7 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator } } - cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args, .Executable_Path} != {} { + cmdline_if: if selection & {.Working_Dir, .Command_Line, .Command_Args} != {} { strings.builder_reset(&path_builder) strings.write_string(&path_builder, "/proc/") strings.write_int(&path_builder, pid) @@ -178,12 +178,12 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator terminator := strings.index_byte(cmdline, 0) assert(terminator > 0) - command_line_exec := cmdline[:terminator] + // command_line_exec := cmdline[:terminator] // Still need cwd if the execution on the command line is relative. cwd: string cwd_err: Error - if .Working_Dir in selection || (.Executable_Path in selection && command_line_exec[0] != '/') { + if .Working_Dir in selection { strings.builder_reset(&path_builder) strings.write_string(&path_builder, "/proc/") strings.write_int(&path_builder, pid) @@ -199,18 +199,6 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator } } - if .Executable_Path in selection { - if cmdline[0] == '/' { - info.executable_path = strings.clone(cmdline[:terminator], allocator) or_return - info.fields += {.Executable_Path} - } else if cwd_err == nil { - info.executable_path = join_path({ cwd, cmdline[:terminator] }, allocator) or_return - info.fields += {.Executable_Path} - } else { - break cmdline_if - } - } - if selection & {.Command_Line, .Command_Args} != {} { // skip to first arg //cmdline = cmdline[terminator + 1:] @@ -323,6 +311,30 @@ _process_info_by_pid :: proc(pid: int, selection: Process_Info_Fields, allocator } } + if .Executable_Path in selection { + /* + NOTE(Jeroen): + + The old version returned the wrong executable path for things like `bash` or `sh`, + for whom `/proc//cmdline` will just report "bash" or "sh", + resulting in misleading paths like `$PWD/sh`, even though that executable doesn't exist there. + + Thanks to Yawning for suggesting `/proc/self/exe`. + */ + + strings.builder_reset(&path_builder) + strings.write_string(&path_builder, "/proc/") + strings.write_int(&path_builder, pid) + strings.write_string(&path_builder, "/exe") + + if exe_bytes, exe_err := _read_link(strings.to_string(path_builder), temp_allocator()); exe_err == nil { + info.executable_path = strings.clone(string(exe_bytes), allocator) or_return + info.fields += {.Executable_Path} + } else { + err = exe_err + } + } + if .Environment in selection { strings.builder_reset(&path_builder) strings.write_string(&path_builder, "/proc/") diff --git a/core/os/os2/stat_windows.odin b/core/os/os2/stat_windows.odin index 7d8dd3843..8c3d4a610 100644 --- a/core/os/os2/stat_windows.odin +++ b/core/os/os2/stat_windows.odin @@ -212,11 +212,15 @@ _file_type_from_create_file :: proc(wname: win32.wstring, create_file_attributes } _file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: win32.HANDLE, ReparseTag: win32.DWORD) -> (type: File_Type, mode: int) { - if file_attributes & win32.FILE_ATTRIBUTE_READONLY != 0 { - mode |= 0o444 - } else { - mode |= 0o666 - } + // NOTE(Jeroen): We don't translate mode flags for Linux when given to `chmod`. + // Let's not do so for Windows for `chmod` or `read_directory_iterator` either. + // They're *not* portable between Windows and non-Windows platforms. + // + // It also leads to information loss as flags like Archive, Hidden and System have no equivalent there. + // We can of course parse them so we can set the `.Symlink` and `.Directory` type, but we shouldn't pretend + // that 0o644 is meaningful when returned as a mode. + // `C:\bootmgr` as an example has attributes read only, hidden, system, archive. In no way is it sensible to replace that with 0o444. + mode = int(file_attributes) is_sym := false if file_attributes & win32.FILE_ATTRIBUTE_REPARSE_POINT == 0 { @@ -229,21 +233,36 @@ _file_type_mode_from_file_attributes :: proc(file_attributes: win32.DWORD, h: wi type = .Symlink } else if file_attributes & win32.FILE_ATTRIBUTE_DIRECTORY != 0 { type = .Directory - mode |= 0o111 } else if h != nil { type = file_type(h) } return } +// a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC) +time_as_filetime :: #force_inline proc(t: time.Time) -> (ft: win32.LARGE_INTEGER) { + win := u64(t._nsec / 100) + 116444736000000000 + return win32.LARGE_INTEGER(win) +} + +filetime_as_time_li :: #force_inline proc(ft: win32.LARGE_INTEGER) -> (t: time.Time) { + return {_nsec=(i64(ft) - 116444736000000000) * 100} +} + +filetime_as_time_ft :: #force_inline proc(ft: win32.FILETIME) -> (t: time.Time) { + return filetime_as_time_li(win32.LARGE_INTEGER(ft.dwLowDateTime) + win32.LARGE_INTEGER(ft.dwHighDateTime) << 32) +} + +filetime_as_time :: proc{filetime_as_time_ft, filetime_as_time_li} + _file_info_from_win32_file_attribute_data :: proc(d: ^win32.WIN32_FILE_ATTRIBUTE_DATA, name: string, allocator: runtime.Allocator) -> (fi: File_Info, e: Error) { fi.size = i64(d.nFileSizeHigh)<<32 + i64(d.nFileSizeLow) type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0) fi.type = type fi.mode |= mode - fi.creation_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime)) - fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime)) - fi.access_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime)) + fi.creation_time = filetime_as_time(d.ftCreationTime) + fi.modification_time = filetime_as_time(d.ftLastWriteTime) + fi.access_time = filetime_as_time(d.ftLastAccessTime) fi.fullpath, e = full_path_from_name(name, allocator) fi.name = basename(fi.fullpath) return @@ -254,9 +273,9 @@ _file_info_from_win32_find_data :: proc(d: ^win32.WIN32_FIND_DATAW, name: string type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, nil, 0) fi.type = type fi.mode |= mode - fi.creation_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime)) - fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime)) - fi.access_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime)) + fi.creation_time = filetime_as_time(d.ftCreationTime) + fi.modification_time = filetime_as_time(d.ftLastWriteTime) + fi.access_time = filetime_as_time(d.ftLastAccessTime) fi.fullpath, e = full_path_from_name(name, allocator) fi.name = basename(fi.fullpath) return @@ -286,9 +305,9 @@ _file_info_from_get_file_information_by_handle :: proc(path: string, h: win32.HA type, mode := _file_type_mode_from_file_attributes(d.dwFileAttributes, h, 0) fi.type = type fi.mode |= mode - fi.creation_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftCreationTime)) - fi.modification_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastWriteTime)) - fi.access_time = time.unix(0, win32.FILETIME_as_unix_nanoseconds(d.ftLastAccessTime)) + fi.creation_time = filetime_as_time(d.ftCreationTime) + fi.modification_time = filetime_as_time(d.ftLastWriteTime) + fi.access_time = filetime_as_time(d.ftLastAccessTime) return fi, nil } diff --git a/core/simd/simd.odin b/core/simd/simd.odin index a2fe22b4b..0e69304c3 100644 --- a/core/simd/simd.odin +++ b/core/simd/simd.odin @@ -1759,7 +1759,7 @@ Returns: replace :: intrinsics.simd_replace /* -Reduce a vector to a scalar by adding up all the lanes in an ordered fashion. +Reduce a vector to a scalar by adding up all the lanes. This procedure returns a scalar that is the ordered sum of all lanes. The ordered sum may be important for accounting for precision errors in @@ -2511,460 +2511,16 @@ recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where int return T(1) / v } + /* Create a vector where each lane contains the index of that lane. - Inputs: - `V`: The type of the vector to create. - Result: - A vector of the given type, where each lane contains the index of that lane. - **Operation**: - for i in 0 ..< N { res[i] = i } */ -indices :: #force_inline proc "contextless" ($V: typeid/#simd[$N]$E) -> V where intrinsics.type_is_numeric(E) { - when N == 1 { - return {0} - } else when N == 2 { - return {0, 1} - } else when N == 4 { - return {0, 1, 2, 3} - } else when N == 8 { - return {0, 1, 2, 3, 4, 5, 6, 7} - } else when N == 16 { - return {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} - } else when N == 32 { - return { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - } - } else when N == 64 { - return { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - } - } else { - #panic("Unsupported vector size!") - } -} - -/* -Reduce a vector to a scalar by adding up all the lanes in a pairwise fashion. - -This procedure returns a scalar that is the sum of all lanes, calculated by -adding each even-indexed element with the following odd-indexed element to -produce N/2 values. This is repeated until only a single element remains. This -order is supported by hardware instructions for some types/architectures (e.g. -i16/i32/f32/f64 on x86 SSE, i8/i16/i32/f32 on ARM NEON). - -The order of the sum may be important for accounting for precision errors in -floating-point computation, as floating-point addition is not associative, that -is `(a+b)+c` may not be equal to `a+(b+c)`. - -Inputs: -- `v`: The vector to reduce. - -Result: -- Sum of all lanes, as a scalar. - -**Operation**: - - for n > 1 { - n = n / 2 - for i in 0 ..< n { - a[i] = a[2*i+0] + a[2*i+1] - } - } - res := a[0] - -Graphical representation of the operation for N=4: - - +-----------------------+ - v: | v0 | v1 | v2 | v3 | - +-----------------------+ - | | | | - `>[+]<' `>[+]<' - | | - `--->[+]<--' - | - v - +-----+ - result: | y0 | - +-----+ -*/ -reduce_add_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E - where intrinsics.type_is_numeric(E) { - when N == 64 { v64 := v } - when N == 32 { v32 := v } - when N == 16 { v16 := v } - when N == 8 { v8 := v } - when N == 4 { v4 := v } - when N == 2 { v2 := v } - - when N >= 64 { - x32 := swizzle(v64, - 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30, - 32, 34, 36, 38, 40, 42, 44, 46, - 48, 50, 52, 54, 56, 58, 60, 62) - y32 := swizzle(v64, - 1, 3, 5, 7, 9, 11, 13, 15, - 17, 19, 21, 23, 25, 27, 29, 31, - 33, 35, 37, 39, 41, 43, 45, 47, - 49, 51, 53, 55, 57, 59, 61, 63) - v32 := x32 + y32 - } - - when N >= 32 { - x16 := swizzle(v32, - 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30) - y16 := swizzle(v32, - 1, 3, 5, 7, 9, 11, 13, 15, - 17, 19, 21, 23, 25, 27, 29, 31) - v16 := x16 + y16 - } - - when N >= 16 { - x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14) - y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15) - v8 := x8 + y8 - } - - when N >= 8 { - x4 := swizzle(v8, 0, 2, 4, 6) - y4 := swizzle(v8, 1, 3, 5, 7) - v4 := x4 + y4 - } - - when N >= 4 { - x2 := swizzle(v4, 0, 2) - y2 := swizzle(v4, 1, 3) - v2 := x2 + y2 - } - - when N >= 2 { - return extract(v2, 0) + extract(v2, 1) - } else { - return extract(v, 0) - } -} - -/* -Reduce a vector to a scalar by adding up all the lanes in a bisecting fashion. - -This procedure returns a scalar that is the sum of all lanes, calculated by -bisecting the vector into two parts, where the first contains lanes [0, N/2) -and the second contains lanes [N/2, N), and adding the two halves element-wise -to produce N/2 values. This is repeated until only a single element remains. -This order may be faster to compute than the ordered sum for floats, as it can -often be better parallelized. - -The order of the sum may be important for accounting for precision errors in -floating-point computation, as floating-point addition is not associative, that -is `(a+b)+c` may not be equal to `a+(b+c)`. - -Inputs: -- `v`: The vector to reduce. - -Result: -- Sum of all lanes, as a scalar. - -**Operation**: - - for n > 1 { - n = n / 2 - for i in 0 ..< n { - a[i] += a[i+n] - } - } - res := a[0] - -Graphical representation of the operation for N=4: - - +-----------------------+ - | v0 | v1 | v2 | v3 | - +-----------------------+ - | | | | - [+]<-- | ---' | - | [+]<--------' - | | - `>[+]<' - | - v - +-----+ - result: | y0 | - +-----+ -*/ -reduce_add_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E - where intrinsics.type_is_numeric(E) { - when N == 64 { v64 := v } - when N == 32 { v32 := v } - when N == 16 { v16 := v } - when N == 8 { v8 := v } - when N == 4 { v4 := v } - when N == 2 { v2 := v } - - when N >= 64 { - x32 := swizzle(v64, - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31) - y32 := swizzle(v64, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63) - v32 := x32 + y32 - } - - when N >= 32 { - x16 := swizzle(v32, - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15) - y16 := swizzle(v32, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31) - v16 := x16 + y16 - } - - when N >= 16 { - x8 := swizzle(v16, 0, 1, 2, 3, 4, 5, 6, 7) - y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15) - v8 := x8 + y8 - } - - when N >= 8 { - x4 := swizzle(v8, 0, 1, 2, 3) - y4 := swizzle(v8, 4, 5, 6, 7) - v4 := x4 + y4 - } - - when N >= 4 { - x2 := swizzle(v4, 0, 1) - y2 := swizzle(v4, 2, 3) - v2 := x2 + y2 - } - - when N >= 2 { - return extract(v2, 0) + extract(v2, 1) - } else { - return extract(v, 0) - } -} - -/* -Reduce a vector to a scalar by multiplying all the lanes in a pairwise fashion. - -This procedure returns a scalar that is the product of all lanes, calculated by -bisecting the vector into two parts, where the first contains lanes [0, N/2) -and the second contains lanes [N/2, N), and multiplying the two halves together -multiplying each even-indexed element with the following odd-indexed element to -produce N/2 values. This is repeated until only a single element remains. This -order may be faster to compute than the ordered product for floats, as it can -often be better parallelized. - -The order of the product may be important for accounting for precision errors -in floating-point computation, as floating-point multiplication is not -associative, that is `(a*b)*c` may not be equal to `a*(b*c)`. - -Inputs: -- `v`: The vector to reduce. - -Result: -- Product of all lanes, as a scalar. - -**Operation**: - - for n > 1 { - n = n / 2 - for i in 0 ..< n { - a[i] = a[2*i+0] * a[2*i+1] - } - } - res := a[0] - -Graphical representation of the operation for N=4: - - +-----------------------+ - v: | v0 | v1 | v2 | v3 | - +-----------------------+ - | | | | - `>[x]<' `>[x]<' - | | - `--->[x]<--' - | - v - +-----+ - result: | y0 | - +-----+ -*/ -reduce_mul_pairs :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E - where intrinsics.type_is_numeric(E) { - when N == 64 { v64 := v } - when N == 32 { v32 := v } - when N == 16 { v16 := v } - when N == 8 { v8 := v } - when N == 4 { v4 := v } - when N == 2 { v2 := v } - - when N >= 64 { - x32 := swizzle(v64, - 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30, - 32, 34, 36, 38, 40, 42, 44, 46, - 48, 50, 52, 54, 56, 58, 60, 62) - y32 := swizzle(v64, - 1, 3, 5, 7, 9, 11, 13, 15, - 17, 19, 21, 23, 25, 27, 29, 31, - 33, 35, 37, 39, 41, 43, 45, 47, - 49, 51, 53, 55, 57, 59, 61, 63) - v32 := x32 * y32 - } - - when N >= 32 { - x16 := swizzle(v32, - 0, 2, 4, 6, 8, 10, 12, 14, - 16, 18, 20, 22, 24, 26, 28, 30) - y16 := swizzle(v32, - 1, 3, 5, 7, 9, 11, 13, 15, - 17, 19, 21, 23, 25, 27, 29, 31) - v16 := x16 * y16 - } - - when N >= 16 { - x8 := swizzle(v16, 0, 2, 4, 6, 8, 10, 12, 14) - y8 := swizzle(v16, 1, 3, 5, 7, 9, 11, 13, 15) - v8 := x8 * y8 - } - - when N >= 8 { - x4 := swizzle(v8, 0, 2, 4, 6) - y4 := swizzle(v8, 1, 3, 5, 7) - v4 := x4 * y4 - } - - when N >= 4 { - x2 := swizzle(v4, 0, 2) - y2 := swizzle(v4, 1, 3) - v2 := x2 * y2 - } - - when N >= 2 { - return extract(v2, 0) * extract(v2, 1) - } else { - return extract(v, 0) - } -} - -/* -Reduce a vector to a scalar by multiplying up all the lanes in a bisecting fashion. - -This procedure returns a scalar that is the product of all lanes, calculated by -bisecting the vector into two parts, where the first contains indices [0, N/2) -and the second contains indices [N/2, N), and multiplying the two halves -together element-wise to produce N/2 values. This is repeated until only a -single element remains. This order may be faster to compute than the ordered -product for floats, as it can often be better parallelized. - -The order of the product may be important for accounting for precision errors -in floating-point computation, as floating-point multiplication is not -associative, that is `(a*b)*c` may not be equal to `a*(b*c)`. - -Inputs: -- `v`: The vector to reduce. - -Result: -- Product of all lanes, as a scalar. - -**Operation**: - - for n > 1 { - n = n / 2 - for i in 0 ..< n { - a[i] *= a[i+n] - } - } - res := a[0] - -Graphical representation of the operation for N=4: - - +-----------------------+ - | v0 | v1 | v2 | v3 | - +-----------------------+ - | | | | - [x]<-- | ---' | - | [x]<--------' - | | - `>[x]<' - | - v - +-----+ - result: | y0 | - +-----+ -*/ -reduce_mul_bisect :: #force_inline proc "contextless" (v: #simd[$N]$E) -> E - where intrinsics.type_is_numeric(E) { - when N == 64 { v64 := v } - when N == 32 { v32 := v } - when N == 16 { v16 := v } - when N == 8 { v8 := v } - when N == 4 { v4 := v } - when N == 2 { v2 := v } - - when N >= 64 { - x32 := swizzle(v64, - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31) - y32 := swizzle(v64, - 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63) - v32 := x32 * y32 - } - - when N >= 32 { - x16 := swizzle(v32, - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15) - y16 := swizzle(v32, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31) - v16 := x16 * y16 - } - - when N >= 16 { - x8 := swizzle(v16, 0, 1, 2, 3, 4, 5, 6, 7) - y8 := swizzle(v16, 8, 9, 10, 11, 12, 13, 14, 15) - v8 := x8 * y8 - } - - when N >= 8 { - x4 := swizzle(v8, 0, 1, 2, 3) - y4 := swizzle(v8, 4, 5, 6, 7) - v4 := x4 * y4 - } - - when N >= 4 { - x2 := swizzle(v4, 0, 1) - y2 := swizzle(v4, 2, 3) - v2 := x2 * y2 - } - - when N >= 2 { - return extract(v2, 0) * extract(v2, 1) - } else { - return extract(v, 0) - } -} - +indices :: intrinsics.simd_indices \ No newline at end of file diff --git a/core/sys/windows/user32.odin b/core/sys/windows/user32.odin index 94cd57811..49ebb49cb 100644 --- a/core/sys/windows/user32.odin +++ b/core/sys/windows/user32.odin @@ -47,6 +47,8 @@ foreign user32 { lpParam: LPVOID, ) -> HWND --- + GetWindowThreadProcessId :: proc(hwnd: HWND, lpdwProcessId: LPDWORD) -> DWORD --- + DestroyWindow :: proc(hWnd: HWND) -> BOOL --- ShowWindow :: proc(hWnd: HWND, nCmdShow: INT) -> BOOL --- diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index f66a8605c..a315d1880 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -760,6 +760,36 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan return true; } + case BuiltinProc_simd_indices: + { + Operand x = {}; + check_expr_or_type(c, &x, ce->args[0], nullptr); + if (x.mode == Addressing_Invalid) return false; + if (x.mode != Addressing_Type) { + gbString s = expr_to_string(x.expr); + error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s); + gb_string_free(s); + return false; + } + if (!is_type_simd_vector(x.type)) { + gbString s = type_to_string(x.type); + error(x.expr, "'%.*s' expected a simd vector type, got '%s'", LIT(builtin_name), s); + gb_string_free(s); + return false; + } + + Type *elem = base_array_type(x.type); + if (!is_type_numeric(elem)) { + gbString s = type_to_string(x.type); + error(x.expr, "'%.*s' expected a simd vector type with a numeric element type, got '%s'", LIT(builtin_name), s); + gb_string_free(s); + } + + operand->mode = Addressing_Value; + operand->type = x.type; + return true; + } + case BuiltinProc_simd_extract: { Operand x = {}; @@ -2059,6 +2089,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_atomic_type_is_lock_free: case BuiltinProc_has_target_feature: case BuiltinProc_procedure_of: + case BuiltinProc_simd_indices: // NOTE(bill): The first arg may be a Type, this will be checked case by case break; @@ -6001,12 +6032,13 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As // NOTE(jakubtomsu): forces calculation of variant_block_size type_size_of(u); - i64 tag_offset = u->Union.variant_block_size; - GB_ASSERT(tag_offset > 0); + // NOTE(Jeroen): A tag offset of zero is perfectly fine if all members of the union are empty structs. + // What matters is that the tag size is > 0. + GB_ASSERT(u->Union.tag_size > 0); operand->mode = Addressing_Constant; operand->type = t_untyped_integer; - operand->value = exact_value_i64(tag_offset); + operand->value = exact_value_i64(u->Union.variant_block_size); } break; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 6f585fe73..10b37bbf3 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -2910,9 +2910,20 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper if (!defined) { gbString xs = type_to_string(x->type, temporary_allocator()); gbString ys = type_to_string(y->type, temporary_allocator()); - err_str = gb_string_make(temporary_allocator(), - gb_bprintf("operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys) - ); + + if (!is_type_comparable(x->type)) { + err_str = gb_string_make(temporary_allocator(), + gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", xs, LIT(token_strings[op])) + ); + } else if (!is_type_comparable(y->type)) { + err_str = gb_string_make(temporary_allocator(), + gb_bprintf("Type '%s' is not simply comparable, so operator '%.*s' is not defined for it", ys, LIT(token_strings[op])) + ); + } else { + err_str = gb_string_make(temporary_allocator(), + gb_bprintf("Operator '%.*s' not defined between the types '%s' and '%s'", LIT(token_strings[op]), xs, ys) + ); + } } else { Type *comparison_type = x->type; if (x->type == err_type && is_operand_nil(*x)) { @@ -2933,11 +2944,11 @@ gb_internal void check_comparison(CheckerContext *c, Ast *node, Operand *x, Oper } else { yt = type_to_string(y->type); } - err_str = gb_string_make(temporary_allocator(), gb_bprintf("mismatched types '%s' and '%s'", xt, yt)); + err_str = gb_string_make(temporary_allocator(), gb_bprintf("Mismatched types '%s' and '%s'", xt, yt)); } if (err_str != nullptr) { - error(node, "Cannot compare expression, %s", err_str); + error(node, "Cannot compare expression. %s.", err_str); x->type = t_untyped_bool; } else { if (x->mode == Addressing_Constant && diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 40dde8240..d8ac10b11 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -205,6 +205,9 @@ BuiltinProc__simd_begin, BuiltinProc_simd_masked_expand_load, BuiltinProc_simd_masked_compress_store, + BuiltinProc_simd_indices, + + // Platform specific SIMD intrinsics BuiltinProc_simd_x86__MM_SHUFFLE, BuiltinProc__simd_end, @@ -551,6 +554,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("simd_masked_expand_load"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_masked_compress_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_indices"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT(""), 0, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index be51f529d..7bd8dea59 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1293,6 +1293,23 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn lbValue res = {}; res.type = tv.type; + switch (builtin_id) { + case BuiltinProc_simd_indices: { + Type *type = base_type(res.type); + GB_ASSERT(type->kind == Type_SimdVector); + Type *elem = type->SimdVector.elem; + + i64 count = type->SimdVector.count; + LLVMValueRef *scalars = gb_alloc_array(temporary_allocator(), LLVMValueRef, count); + for (i64 i = 0; i < count; i++) { + scalars[i] = lb_const_value(m, elem, exact_value_i64(i)).value; + } + + res.value = LLVMConstVector(scalars, cast(unsigned)count); + return res; + } + } + lbValue arg0 = {}; if (ce->args.count > 0) arg0 = lb_build_expr(p, ce->args[0]); lbValue arg1 = {}; if (ce->args.count > 1) arg1 = lb_build_expr(p, ce->args[1]); lbValue arg2 = {}; if (ce->args.count > 2) arg2 = lb_build_expr(p, ce->args[2]); diff --git a/src/types.cpp b/src/types.cpp index 9c9472a28..393e35ca1 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -4108,10 +4108,10 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) { } i64 max = 0; - i64 field_size = 0; for_array(i, t->Union.variants) { Type *variant_type = t->Union.variants[i]; + i64 size = type_size_of_internal(variant_type, path); if (max < size) { max = size; @@ -4130,7 +4130,7 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) { size = align_formula(max, tag_size); // NOTE(bill): Calculate the padding between the common fields and the tag t->Union.tag_size = cast(i16)tag_size; - t->Union.variant_block_size = size - field_size; + t->Union.variant_block_size = size; size += tag_size; }