From 0056cdffa713f6d2a1b87421d80b27c49cce5a0b Mon Sep 17 00:00:00 2001 From: vassvik Date: Sat, 13 Jun 2020 15:19:41 +0200 Subject: [PATCH 1/3] Reworked win32.utf16_to_utf8 and win32.wstring_to_utf8 to scan for zeros from the start of the string instead of from the end. This is useful to correctly convert strings from a buffer possibly containing multiple multiple substrings. The resulting utf8 string is not null terminated, even if the backing memory might be null terminated. --- core/sys/win32/general.odin | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/core/sys/win32/general.odin b/core/sys/win32/general.odin index 7cce2a92d..992fb5514 100644 --- a/core/sys/win32/general.odin +++ b/core/sys/win32/general.odin @@ -813,9 +813,11 @@ wstring_to_utf8 :: proc(s: Wstring, N: int, allocator := context.temp_allocator) return ""; } - // NOTE: If N == -1 the call to wide_char_to_multi_byte assumes the wide string is null terminated, - // and will scan it for the first null terminated character. The resulting string is also null terminated. - // If N != -1 it assumes the wide string is not null terminated and the resulting string is not null terminated. + // If N == -1 the call to wide_char_to_multi_byte assume the wide string is null terminated + // and will scan it to find the first null terminated character. The resulting string will + // also null terminated. + // If N != -1 it assumes the wide string is not null terminated and the resulting string + // will not be null terminated, we therefore have to force it to be null terminated manually. text := make([]byte, n+1 if N != -1 else n, allocator); if n1 := wide_char_to_multi_byte(CP_UTF8, WC_ERR_INVALID_CHARS, s, i32(N), cstring(&text[0]), n, nil, nil); n1 == 0 { @@ -823,14 +825,13 @@ wstring_to_utf8 :: proc(s: Wstring, N: int, allocator := context.temp_allocator) return ""; } - if N > 0 { - // NOTE: The input string is not expected to be null terminated, so we strip excess zeros at the end. - text[n] = 0; - - for n >= 1 && text[n-1] == 0 { - n -= 1; + for i in 0.. Date: Sat, 13 Jun 2020 15:20:39 +0200 Subject: [PATCH 2/3] Make os.get_current_directory no longer strip the zero at the end of the resulting string, as it no longer should occur. --- core/os/os_windows.odin | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/os/os_windows.odin b/core/os/os_windows.odin index 4eb9a3ec8..a925c7a88 100644 --- a/core/os/os_windows.odin +++ b/core/os/os_windows.odin @@ -282,8 +282,7 @@ get_current_directory :: proc() -> string { intrinsics.atomic_store(&cwd_gate, false); - dir_utf8 := win32.utf16_to_utf8(dir_buf_wstr); - return dir_utf8[:len(dir_utf8)-1]; // NOTE(tetra): Remove the NUL. + return win32.utf16_to_utf8(dir_buf_wstr); } set_current_directory :: proc(path: string) -> (err: Errno) { From 9cccb20f49ff885ed5ea16f326011467ca6cf263 Mon Sep 17 00:00:00 2001 From: vassvik Date: Sat, 13 Jun 2020 18:04:19 +0200 Subject: [PATCH 3/3] Add some tests to test utf16_to_utf8 and wstring_to_utf8 --- core/sys/win32/tests/general.odin | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 core/sys/win32/tests/general.odin diff --git a/core/sys/win32/tests/general.odin b/core/sys/win32/tests/general.odin new file mode 100644 index 000000000..80cf33a9c --- /dev/null +++ b/core/sys/win32/tests/general.odin @@ -0,0 +1,44 @@ +package win32_tests + +import "core:fmt" +import "core:sys/win32" + +main :: proc(){ + test_utf16_to_utf8 :: proc(str: []u16, comparison: string, expected_result: bool, loc := #caller_location) { + result := win32.utf16_to_utf8(str[:]); + fmt.assertf((result == comparison) == expected_result, + "Incorrect utf16_to_utf8 conversion: %q %s %q\nloc = %#v\n", + result, "!=" if expected_result else "==", comparison, loc); + } + + test_utf16_to_utf8([]u16{}, "", true); + test_utf16_to_utf8([]u16{0}, "", true); + test_utf16_to_utf8([]u16{0, 't', 'e', 's', 't'}, "", true); + test_utf16_to_utf8([]u16{0, 't', 'e', 's', 't', 0}, "", true); + test_utf16_to_utf8([]u16{'t', 'e', 's', 't'}, "test", true); + test_utf16_to_utf8([]u16{'t', 'e', 's', 't', 0}, "test", true); + test_utf16_to_utf8([]u16{'t', 'e', 0, 's', 't'}, "te", true); + test_utf16_to_utf8([]u16{'t', 'e', 0, 's', 't', 0}, "te", true); + + test_wstring_to_utf8 :: proc(str: []u16, comparison: string, expected_result: bool, loc := #caller_location) { + result := win32.wstring_to_utf8(nil if len(str) == 0 else cast(win32.Wstring)&str[0], -1); + fmt.assertf((result == comparison) == expected_result, + "Incorrect wstring_to_utf8 conversion: %q %s %q\nloc = %#v\n", + result, "!=" if expected_result else "==", comparison, loc); + } + + test_wstring_to_utf8([]u16{}, "", true); + test_wstring_to_utf8([]u16{0}, "", true); + test_wstring_to_utf8([]u16{0, 't', 'e', 's', 't'}, "", true); + test_wstring_to_utf8([]u16{0, 't', 'e', 's', 't', 0}, "", true); + test_wstring_to_utf8([]u16{'t', 'e', 's', 't', 0}, "test", true); + test_wstring_to_utf8([]u16{'t', 'e', 0, 's', 't'}, "te", true); + test_wstring_to_utf8([]u16{'t', 'e', 0, 's', 't', 0}, "te", true); + + // WARNING: Passing a non-zero-terminated string to wstring_to_utf8 is dangerous, + // as it will go out of bounds looking for a zero. + // It will "fail" or "succeed" by having a zero just after the end of the input string or not. + test_wstring_to_utf8([]u16{'t', 'e', 's', 't'}, "test", false); + test_wstring_to_utf8([]u16{'t', 'e', 's', 't', 0}[:4], "test", true); + test_wstring_to_utf8([]u16{'t', 'e', 's', 't', 'q'}[:4], "test", false); +}