From 2fae6eda2321881ccf8d942e2c27e6a7c29aebfd Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Thu, 28 Apr 2022 18:58:49 +0200 Subject: [PATCH 1/6] [i18n] Initial i18n support. - Add initial GetText .MO parser - Add translation struct and helpers - Pluralized lookup TODO: - Support for more translation catalog file formats. --- core/i18n/example/i18n_example.odin | 64 +++++++++++ core/i18n/example/messages.pot | 30 +++++ core/i18n/example/nl_NL.mo | Bin 0 -> 672 bytes core/i18n/example/nl_NL.po | 33 ++++++ core/i18n/gettext.odin | 163 ++++++++++++++++++++++++++++ core/i18n/i18n.odin | 116 ++++++++++++++++++++ 6 files changed, 406 insertions(+) create mode 100644 core/i18n/example/i18n_example.odin create mode 100644 core/i18n/example/messages.pot create mode 100644 core/i18n/example/nl_NL.mo create mode 100644 core/i18n/example/nl_NL.po create mode 100644 core/i18n/gettext.odin create mode 100644 core/i18n/i18n.odin diff --git a/core/i18n/example/i18n_example.odin b/core/i18n/example/i18n_example.odin new file mode 100644 index 000000000..f9fb2a353 --- /dev/null +++ b/core/i18n/example/i18n_example.odin @@ -0,0 +1,64 @@ +package i18n_example + +import "core:mem" +import "core:fmt" +import "core:i18n" + +LOC :: i18n.get + +_main :: proc() { + using fmt + + err: i18n.Error + + /* + Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter. + */ + i18n.ACTIVE, err = i18n.parse_mo(#load("nl_NL.mo")) + defer i18n.destroy() + + if err != .None { return } + + /* + These are in the .MO catalog. + */ + println("-----") + println(LOC("")) + println("-----") + println(LOC("There are 69,105 leaves here.")) + println("-----") + println(LOC("Hellope, World!")) + + /* + For ease of use, pluralized lookup can use both singular and plural form as key for the same translation. + */ + println("-----") + printf(LOC("There is %d leaf.\n", 1), 1) + printf(LOC("There is %d leaf.\n", 42), 42) + + printf(LOC("There are %d leaves.\n", 1), 1) + printf(LOC("There are %d leaves.\n", 42), 42) + + /* + This isn't. + */ + println("-----") + println(LOC("Come visit us on Discord!")) +} + +main :: proc() { + using fmt + + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) + + _main() + + if len(track.allocation_map) > 0 { + println() + for _, v in track.allocation_map { + printf("%v Leaked %v bytes.\n", v.location, v.size) + } + } +} \ No newline at end of file diff --git a/core/i18n/example/messages.pot b/core/i18n/example/messages.pot new file mode 100644 index 000000000..53d521b6b --- /dev/null +++ b/core/i18n/example/messages.pot @@ -0,0 +1,30 @@ +# Odin i18n Example +# Copyright (C) 2021 Jeroen van Rijn +# This file is distributed under the same license as the PACKAGE package. +# Jeroen van Rijn , 2021. +# +#, fuzzy +msgid "" +msgstr "Project-Id-Version: Example 0.0.1\n" + "Report-Msgid-Bugs-To: Jeroen van Rijn \n" + "POT-Creation-Date: 2021-11-27 19:23+0100\n" + "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" + "Last-Translator: FULL NAME \n" + "Language: en-GB\n" + "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=UTF-8\n" + "Content-Transfer-Encoding: 8bit\n" + +#: i18n_example.odin:28 +msgid "There are 69,105 leaves here." +msgstr "Er zijn hier 69.105 bladeren." + +#: i18n_example.odin:30 +msgid "Hellope, World!" +msgstr "Hallo, Wereld!" + +#: i18n_example.odin:36 +msgid "There is %d leaf.\n" +msgid_plural "There are %d leaves.\n" +msgstr[0] "Er is %d blad.\n" +msgstr[1] "Er zijn %d bladeren.\n" \ No newline at end of file diff --git a/core/i18n/example/nl_NL.mo b/core/i18n/example/nl_NL.mo new file mode 100644 index 0000000000000000000000000000000000000000..0b1a668f4d225e8695e479d6135779288870ac39 GIT binary patch literal 672 zcmZ9J&u$Yj5XKiM7au_44AS&asfgO_2DJ*Crb0kU3ki*)<+lpj3*#GPkbdF5#NYS;wSO_ zR+9Xp^&T;~-RZ})ZqWIHR+5uDL<~A!zL6ZD*0#Yd_+*_f*OOw24nYz-&$qI48#Kxl zdWdQ9S5+Q1$|#%&_D@5XqhiWVojt`x;D;suh|Vi(au}V-y3q)!NJYkG*jN|%#8;~1 zFWbuV!shT6okasHX<(*K4eY){t!iq2(R#W^80)an=b}f6x^+_Xed}tUgJ~|0e%Kp8w;gU8SRX?6cyK)hmFX|<$1FLJ r#*sB9{CC6VbdO literal 0 HcmV?d00001 diff --git a/core/i18n/example/nl_NL.po b/core/i18n/example/nl_NL.po new file mode 100644 index 000000000..1b8acbcc1 --- /dev/null +++ b/core/i18n/example/nl_NL.po @@ -0,0 +1,33 @@ +# Odin i18n Example +# Copyright (C) 2021 Jeroen van Rijn +# This file is distributed under the same license as the PACKAGE package. +# Jeroen van Rijn , 2021. +# +msgid "" +msgstr "" +"Project-Id-Version: Example 0.0.1\n" +"Report-Msgid-Bugs-To: Jeroen van Rijn \n" +"POT-Creation-Date: 2021-11-27 19:23+0100\n" +"PO-Revision-Date: 2021-11-28 02:56+0100\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Language-Team: Odin Language Team\n" +"X-Generator: Poedit 3.0\n" +"Last-Translator: Jeroen van Rijn\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"Language: nl_NL\n" + +#: i18n_example.odin:28 +msgid "There are 69,105 leaves here." +msgstr "Er zijn hier 69.105 bladeren." + +#: i18n_example.odin:30 +msgid "Hellope, World!" +msgstr "Hallo, Wereld!" + +#: i18n_example.odin:36 +msgid "There is %d leaf.\n" +msgid_plural "There are %d leaves.\n" +msgstr[0] "Er is %d blad.\n" +msgstr[1] "Er zijn %d bladeren.\n" diff --git a/core/i18n/gettext.odin b/core/i18n/gettext.odin new file mode 100644 index 000000000..7918e217e --- /dev/null +++ b/core/i18n/gettext.odin @@ -0,0 +1,163 @@ +package i18n +/* + A parser for GNU GetText .MO files. + + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + A from-scratch implementation based after the specification found here: + https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +import "core:os" +import "core:strings" +import "core:bytes" + +parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { + context.allocator = allocator + /* + An MO file should have at least a 4-byte magic, 2 x 2 byte version info, + a 4-byte number of strings value, and 2 x 4-byte offsets. + */ + if len(data) < 20 { + return {}, .MO_File_Invalid + } + + /* + Check magic. Should be 0x950412de in native Endianness. + */ + native := true + magic := read_u32(data, native) or_return + + if magic != 0x950412de { + native = false + magic = read_u32(data, native) or_return + + if magic != 0x950412de { return {}, .MO_File_Invalid_Signature } + } + + /* + We can ignore version_minor at offset 6. + */ + version_major := read_u16(data[4:]) or_return + if version_major > 1 { return {}, .MO_File_Unsupported_Version } + + count := read_u32(data[ 8:]) or_return + original_offset := read_u32(data[12:]) or_return + translated_offset := read_u32(data[16:]) or_return + + if count == 0 { return {}, .Empty_Translation_Catalog } + + /* + Initalize Translation, interner and optional pluralizer. + */ + translation = new(Translation) + translation.pluralize = pluralizer + strings.intern_init(&translation.intern, allocator, allocator) + + for n := u32(0); n < count; n += 1 { + /* + Grab string's original length and offset. + */ + offset := original_offset + 8 * n + if len(data) < int(offset + 8) { return translation, .MO_File_Invalid } + + o_length := read_u32(data[offset :], native) or_return + o_offset := read_u32(data[offset + 4:], native) or_return + + offset = translated_offset + 8 * n + if len(data) < int(offset + 8) { return translation, .MO_File_Invalid } + + t_length := read_u32(data[offset :], native) or_return + t_offset := read_u32(data[offset + 4:], native) or_return + + max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1)) + if len(data) < max_offset { return translation, .Premature_EOF } + + key := data[o_offset:][:o_length] + val := data[t_offset:][:t_length] + + /* + Could be a pluralized string. + */ + zero := []byte{0} + + keys := bytes.split(key, zero) + vals := bytes.split(val, zero) + + if len(keys) != len(vals) || max(len(keys), len(vals)) > MAX_PLURALS { + return translation, .MO_File_Incorrect_Plural_Count + } + + for k in keys { + interned_key := strings.intern_get(&translation.intern, string(k)) + + interned_vals: [MAX_PLURALS]string = {} + last_val: string + + i := 0 + for v in vals { + interned_vals[i] = strings.intern_get(&translation.intern, string(v)) + last_val = interned_vals[i] + i += 1 + } + for ; i < MAX_PLURALS; i += 1 { + interned_vals[i] = last_val + } + translation.k_v[interned_key] = interned_vals + } + delete(vals) + delete(keys) + } + return +} + +parse_mo_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { + context.allocator = allocator + + data, data_ok := os.read_entire_file(filename) + defer delete(data) + + if !data_ok { return {}, .File_Error } + + return parse_mo_from_slice(data, pluralizer) +} + +parse_mo :: proc { parse_mo_file, parse_mo_from_slice } + +/* + Helpers. +*/ +read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) { + if len(data) < size_of(u32) { return 0, .Premature_EOF } + + val := (^u32)(raw_data(data))^ + + if native_endian { + return val, .None + } else { + when ODIN_ENDIAN == .Little { + return u32(transmute(u32be)val), .None + } else { + return u32(transmute(u32le)val), .None + } + } +} + +read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) { + if len(data) < size_of(u16) { return 0, .Premature_EOF } + + val := (^u16)(raw_data(data))^ + + if native_endian { + return val, .None + } else { + when ODIN_ENDIAN == .Little { + return u16(transmute(u16be)val), .None + } else { + return u16(transmute(u16le)val), .None + } + } +} \ No newline at end of file diff --git a/core/i18n/i18n.odin b/core/i18n/i18n.odin new file mode 100644 index 000000000..7c72f9858 --- /dev/null +++ b/core/i18n/i18n.odin @@ -0,0 +1,116 @@ +package i18n +/* + Internationalization helpers. + + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +import "core:strings" + +/* + TODO: + - Support for more translation catalog file formats. +*/ + +MAX_PLURALS :: 10 + +/* + Currently active catalog. +*/ +ACTIVE: ^Translation + +/* + The main data structure. This can be generated from various different file formats, as long as we have a parser for them. +*/ +Translation :: struct { + k_v: map[string][MAX_PLURALS]string, + intern: strings.Intern, + + pluralize: proc(number: int) -> int, +} + +Error :: enum { + /* + General return values. + */ + None = 0, + Empty_Translation_Catalog, + + /* + Couldn't find, open or read file. + */ + File_Error, + + /* + File too short. + */ + Premature_EOF, + + /* + GNU Gettext *.MO file errors. + */ + MO_File_Invalid_Signature, + MO_File_Unsupported_Version, + MO_File_Invalid, + MO_File_Incorrect_Plural_Count, +} + +/* + Several ways to use: + - get(key), which defaults to the singular form and i18n.ACTIVE catalog, or + - get(key, number), which returns the appropriate plural from the active catalog, or + - get(key, number, catalog) to grab text from a specific one. +*/ +get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) { + /* + A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule. + */ + plural := 1 if number != 1 else 0 + + if catalog.pluralize != nil { + plural = catalog.pluralize(number) + } + return get_by_slot(key, plural, catalog) +} + +/* + Several ways to use: + - get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or + - get_by_slot(key, slot), which returns the requested plural from the active catalog, or + - get_by_slot(key, slot, catalog) to grab text from a specific one. + + If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string. +*/ +get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) { + if catalog == nil { + /* + Return the key if the catalog catalog hasn't been initialized yet. + */ + return key + } + + /* + Return the translation from the requested slot if this key is known, else return the key. + */ + if translations, ok := catalog.k_v[key]; ok { + plural := min(max(0, slot), MAX_PLURALS - 1) + return translations[plural] + } + return key +} + +/* + Same for destroy: + - destroy(), to clean up the currently active catalog catalog i18n.ACTIVE + - destroy(catalog), to clean up a specific catalog. +*/ +destroy :: proc(catalog: ^Translation = ACTIVE) { + if catalog != nil { + strings.intern_destroy(&catalog.intern) + delete(catalog.k_v) + free(catalog) + } +} \ No newline at end of file From ba23bfb7b9eb32eb9bcf22b0364a25b6ae32203e Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Thu, 28 Apr 2022 20:12:32 +0200 Subject: [PATCH 2/6] [i18n] Allow multiple sections. --- core/i18n/gettext.odin | 11 ++++---- core/i18n/i18n.odin | 57 +++++++++++++++++++++++++++++++++++------- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/core/i18n/gettext.odin b/core/i18n/gettext.odin index 7918e217e..70c922cfb 100644 --- a/core/i18n/gettext.odin +++ b/core/i18n/gettext.odin @@ -57,6 +57,10 @@ parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allo translation.pluralize = pluralizer strings.intern_init(&translation.intern, allocator, allocator) + // Gettext MO files only have one section. + translation.k_v[""] = {} + section := &translation.k_v[""] + for n := u32(0); n < count; n += 1 { /* Grab string's original length and offset. @@ -94,7 +98,7 @@ parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allo for k in keys { interned_key := strings.intern_get(&translation.intern, string(k)) - interned_vals: [MAX_PLURALS]string = {} + interned_vals := make([]string, len(keys)) last_val: string i := 0 @@ -103,10 +107,7 @@ parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allo last_val = interned_vals[i] i += 1 } - for ; i < MAX_PLURALS; i += 1 { - interned_vals[i] = last_val - } - translation.k_v[interned_key] = interned_vals + section[interned_key] = interned_vals } delete(vals) delete(keys) diff --git a/core/i18n/i18n.odin b/core/i18n/i18n.odin index 7c72f9858..1ee19c2b4 100644 --- a/core/i18n/i18n.odin +++ b/core/i18n/i18n.odin @@ -15,18 +15,19 @@ import "core:strings" - Support for more translation catalog file formats. */ -MAX_PLURALS :: 10 - /* Currently active catalog. */ ACTIVE: ^Translation +// Allow between 1 and 255 plural forms. Default: 10. +MAX_PLURALS :: min(max(#config(ODIN_i18N_MAX_PLURAL_FORMS, 10), 1), 255) + /* The main data structure. This can be generated from various different file formats, as long as we have a parser for them. */ Translation :: struct { - k_v: map[string][MAX_PLURALS]string, + k_v: map[string]map[string][]string, intern: strings.Intern, pluralize: proc(number: int) -> int, @@ -64,7 +65,7 @@ Error :: enum { - get(key, number), which returns the appropriate plural from the active catalog, or - get(key, number, catalog) to grab text from a specific one. */ -get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) { +get_single_section :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) { /* A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule. */ @@ -76,6 +77,25 @@ get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: return get_by_slot(key, plural, catalog) } +/* + Several ways to use: + - get(section, key), which defaults to the singular form and i18n.ACTIVE catalog, or + - get(section, key, number), which returns the appropriate plural from the active catalog, or + - get(section, key, number, catalog) to grab text from a specific one. +*/ +get_by_section :: proc(section, key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) { + /* + A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule. + */ + plural := 1 if number != 1 else 0 + + if catalog.pluralize != nil { + plural = catalog.pluralize(number) + } + return get_by_slot(key, plural, catalog) +} +get :: proc{get_single_section, get_by_section} + /* Several ways to use: - get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or @@ -84,10 +104,22 @@ get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string. */ -get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) { - if catalog == nil { +get_by_slot_single_section :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) { + return get_by_slot_by_section("", key, slot, catalog) +} + +/* + Several ways to use: + - get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or + - get_by_slot(key, slot), which returns the requested plural from the active catalog, or + - get_by_slot(key, slot, catalog) to grab text from a specific one. + + If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string. +*/ +get_by_slot_by_section :: proc(section, key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) { + if catalog == nil || section not_in catalog.k_v { /* - Return the key if the catalog catalog hasn't been initialized yet. + Return the key if the catalog catalog hasn't been initialized yet, or the section is not present. */ return key } @@ -95,12 +127,13 @@ get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> ( /* Return the translation from the requested slot if this key is known, else return the key. */ - if translations, ok := catalog.k_v[key]; ok { - plural := min(max(0, slot), MAX_PLURALS - 1) + if translations, ok := catalog.k_v[section][key]; ok { + plural := min(max(0, slot), len(catalog.k_v[section][key]) - 1) return translations[plural] } return key } +get_by_slot :: proc{get_by_slot_single_section, get_by_slot_by_section} /* Same for destroy: @@ -110,6 +143,12 @@ get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> ( destroy :: proc(catalog: ^Translation = ACTIVE) { if catalog != nil { strings.intern_destroy(&catalog.intern) + for section in &catalog.k_v { + for key in &catalog.k_v[section] { + delete(catalog.k_v[section][key]) + } + delete(catalog.k_v[section]) + } delete(catalog.k_v) free(catalog) } From 1289c96e2cf9fdcdb9c4fb4988f73c0e319fb329 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Fri, 29 Apr 2022 00:29:55 +0200 Subject: [PATCH 3/6] [i18n] QT Linguist TS reader. --- core/encoding/xml/xml_reader.odin | 1 - core/i18n/example/i18n_example.odin | 56 ++++++++-- core/i18n/gettext.odin | 2 +- core/i18n/i18n.odin | 21 +++- core/i18n/qt_linguist.odin | 153 +++++++++++++++++++++++++++ tests/core/assets/XML/nl_NL-qt-ts.ts | 52 ++++----- 6 files changed, 243 insertions(+), 42 deletions(-) create mode 100644 core/i18n/qt_linguist.odin diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 636dd0ae4..6d0d4e1aa 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -87,7 +87,6 @@ Option_Flag :: enum { If a tag body has a comment, it will be stripped unless this option is given. */ Keep_Tag_Body_Comments, - } Option_Flags :: bit_set[Option_Flag; u16] diff --git a/core/i18n/example/i18n_example.odin b/core/i18n/example/i18n_example.odin index f9fb2a353..8c173ee4a 100644 --- a/core/i18n/example/i18n_example.odin +++ b/core/i18n/example/i18n_example.odin @@ -4,9 +4,9 @@ import "core:mem" import "core:fmt" import "core:i18n" -LOC :: i18n.get +_T :: i18n.get -_main :: proc() { +mo :: proc() { using fmt err: i18n.Error @@ -23,27 +23,60 @@ _main :: proc() { These are in the .MO catalog. */ println("-----") - println(LOC("")) + println(_T("")) println("-----") - println(LOC("There are 69,105 leaves here.")) + println(_T("There are 69,105 leaves here.")) println("-----") - println(LOC("Hellope, World!")) + println(_T("Hellope, World!")) /* For ease of use, pluralized lookup can use both singular and plural form as key for the same translation. */ println("-----") - printf(LOC("There is %d leaf.\n", 1), 1) - printf(LOC("There is %d leaf.\n", 42), 42) + printf(_T("There is %d leaf.\n", 1), 1) + printf(_T("There is %d leaf.\n", 42), 42) - printf(LOC("There are %d leaves.\n", 1), 1) - printf(LOC("There are %d leaves.\n", 42), 42) + printf(_T("There are %d leaves.\n", 1), 1) + printf(_T("There are %d leaves.\n", 42), 42) /* This isn't. */ println("-----") - println(LOC("Come visit us on Discord!")) + println(_T("Come visit us on Discord!")) +} + +qt :: proc() { + using fmt + + err: i18n.Error + + /* + Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter. + */ + i18n.ACTIVE, err = i18n.parse_qt(#load("../../../tests/core/assets/XML/nl_NL-qt-ts.ts")) + defer i18n.destroy() + + fmt.printf("parse_qt returned %v\n", err) + if err != .None { + return + } + + /* + These are in the .TS catalog. + */ + println("--- Page section ---") + println("Page:Text for translation =", _T("Page", "Text for translation")) + println("-----") + println("Page:Also text to translate =", _T("Page", "Also text to translate")) + println("-----") + println("--- installscript section ---") + println("installscript:99 bottles of beer on the wall =", _T("installscript", "99 bottles of beer on the wall")) + println("-----") + println("--- apple_count section ---") + println("apple_count:%d apple(s) =") + println("\t 1 =", _T("apple_count", "%d apple(s)", 1)) + println("\t 42 =", _T("apple_count", "%d apple(s)", 42)) } main :: proc() { @@ -53,7 +86,8 @@ main :: proc() { mem.tracking_allocator_init(&track, context.allocator) context.allocator = mem.tracking_allocator(&track) - _main() + // mo() + qt() if len(track.allocation_map) > 0 { println() diff --git a/core/i18n/gettext.odin b/core/i18n/gettext.odin index 70c922cfb..54c5a1111 100644 --- a/core/i18n/gettext.odin +++ b/core/i18n/gettext.odin @@ -2,7 +2,7 @@ package i18n /* A parser for GNU GetText .MO files. - Copyright 2021 Jeroen van Rijn . + Copyright 2021-2022 Jeroen van Rijn . Made available under Odin's BSD-3 license. A from-scratch implementation based after the specification found here: diff --git a/core/i18n/i18n.odin b/core/i18n/i18n.odin index 1ee19c2b4..36204efd9 100644 --- a/core/i18n/i18n.odin +++ b/core/i18n/i18n.odin @@ -2,7 +2,7 @@ package i18n /* Internationalization helpers. - Copyright 2021 Jeroen van Rijn . + Copyright 2021-2022 Jeroen van Rijn . Made available under Odin's BSD-3 license. List of contributors: @@ -26,8 +26,11 @@ MAX_PLURALS :: min(max(#config(ODIN_i18N_MAX_PLURAL_FORMS, 10), 1), 255) /* The main data structure. This can be generated from various different file formats, as long as we have a parser for them. */ + +Section :: map[string][]string + Translation :: struct { - k_v: map[string]map[string][]string, + k_v: map[string]Section, // k_v[section][key][plural_form] = ... intern: strings.Intern, pluralize: proc(number: int) -> int, @@ -39,6 +42,7 @@ Error :: enum { */ None = 0, Empty_Translation_Catalog, + Duplicate_Key, /* Couldn't find, open or read file. @@ -57,6 +61,17 @@ Error :: enum { MO_File_Unsupported_Version, MO_File_Invalid, MO_File_Incorrect_Plural_Count, + + /* + Qt Linguist *.TS file errors. + */ + TS_File_Parse_Error, + TS_File_Expected_Context, + TS_File_Expected_Context_Name, + TS_File_Expected_Source, + TS_File_Expected_Translation, + TS_File_Expected_NumerusForm, + } /* @@ -92,7 +107,7 @@ get_by_section :: proc(section, key: string, number := 0, catalog: ^Translation if catalog.pluralize != nil { plural = catalog.pluralize(number) } - return get_by_slot(key, plural, catalog) + return get_by_slot(section, key, plural, catalog) } get :: proc{get_single_section, get_by_section} diff --git a/core/i18n/qt_linguist.odin b/core/i18n/qt_linguist.odin new file mode 100644 index 000000000..65d51444e --- /dev/null +++ b/core/i18n/qt_linguist.odin @@ -0,0 +1,153 @@ +package i18n +/* + A parser for Qt Linguist TS files. + + Copyright 2022 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + A from-scratch implementation based after the specification found here: + https://doc.qt.io/qt-5/linguist-ts-file-format.html + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +import "core:os" +import "core:encoding/xml" +import "core:strings" + +TS_XML_Options := xml.Options{ + flags = { + .Input_May_Be_Modified, + .Must_Have_Prolog, + .Must_Have_DocType, + .Ignore_Unsupported, + .Unbox_CDATA, + .Decode_SGML_Entities, + }, + expected_doctype = "TS", +} + +parse_qt_linguist_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { + context.allocator = allocator + + ts, xml_err := xml.parse(data, TS_XML_Options) + defer xml.destroy(ts) + + if xml_err != .None || ts.element_count < 1 || ts.elements[0].ident != "TS" || len(ts.elements[0].children) == 0 { + return nil, .TS_File_Parse_Error + } + + /* + Initalize Translation, interner and optional pluralizer. + */ + translation = new(Translation) + translation.pluralize = pluralizer + strings.intern_init(&translation.intern, allocator, allocator) + + section: ^Section + + for child_id in ts.elements[0].children { + // These should be s. + child := ts.elements[child_id] + if child.ident != "context" { + return translation, .TS_File_Expected_Context + } + + // Find section name. + section_name_id, section_name_found := xml.find_child_by_ident(ts, child_id, "name") + if !section_name_found { + return translation, .TS_File_Expected_Context_Name, + } + + section_name := ts.elements[section_name_id].value + + if section_name not_in translation.k_v { + translation.k_v[section_name] = {} + } + section = &translation.k_v[section_name] + + // Find messages in section. + nth: int + for { + message_id, message_found := xml.find_child_by_ident(ts, child_id, "message", nth) + if !message_found { + break + } + + numerus_tag, _ := xml.find_attribute_val_by_key(ts, message_id, "numerus") + has_plurals := numerus_tag == "yes" + + // We must have a = key + source_id, source_found := xml.find_child_by_ident(ts, message_id, "source") + if !source_found { + return translation, .TS_File_Expected_Source + } + + // We must have a + translation_id, translation_found := xml.find_child_by_ident(ts, message_id, "translation") + if !translation_found { + return translation, .TS_File_Expected_Translation + } + + source := ts.elements[source_id] + xlat := ts.elements[translation_id] + + if source.value in section { + return translation, .Duplicate_Key + } + + if has_plurals { + if xlat.value != "" { + return translation, .TS_File_Expected_NumerusForm + } + + num_plurals: int + for { + numerus_id, numerus_found := xml.find_child_by_ident(ts, translation_id, "numerusform", num_plurals) + if !numerus_found { + break + } + num_plurals += 1 + } + + if num_plurals < 2 { + return translation, .TS_File_Expected_NumerusForm + } + section[source.value] = make([]string, num_plurals) + + num_plurals = 0 + for { + numerus_id, numerus_found := xml.find_child_by_ident(ts, translation_id, "numerusform", num_plurals) + if !numerus_found { + break + } + numerus := ts.elements[numerus_id] + section[source.value][num_plurals] = strings.intern_get(&translation.intern, numerus.value) + + num_plurals += 1 + } + } else { + // Single translation + section[source.value] = make([]string, 1) + section[source.value][0] = strings.intern_get(&translation.intern, xlat.value) + } + + nth += 1 + } + } + + return +} + +parse_qt_linguist_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { + context.allocator = allocator + + data, data_ok := os.read_entire_file(filename) + defer delete(data) + + if !data_ok { return {}, .File_Error } + + return parse_qt_linguist_from_slice(data, pluralizer) +} + +parse_qt :: proc { parse_qt_linguist_file, parse_qt_linguist_from_slice } \ No newline at end of file diff --git a/tests/core/assets/XML/nl_NL-qt-ts.ts b/tests/core/assets/XML/nl_NL-qt-ts.ts index 6ec3f2f47..36c95ce2e 100644 --- a/tests/core/assets/XML/nl_NL-qt-ts.ts +++ b/tests/core/assets/XML/nl_NL-qt-ts.ts @@ -2,34 +2,34 @@ - Page - - Text for translation - commenting - Tekst om te vertalen - - - Also text to translate - some text - Ook tekst om te vertalen - + Page + + Text for translation + commenting + Tekst om te vertalen + + + Also text to translate + some text + Ook tekst om te vertalen + - installscript - - 99 bottles of beer on the wall - some new comments here - 99 flessen bier op de muur - + installscript + + 99 bottles of beer on the wall + some new comments here + 99 flessen bier op de muur + - apple_count - - %d apple(s) - - %d appel - %d appels - - - + apple_count + + %d apple(s) + + %d appel + %d appels + + + From 2e11a8da5b3d0031f99a7534b029f08dc0fe5a36 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Fri, 29 Apr 2022 13:02:40 +0200 Subject: [PATCH 4/6] [i18n] Move to `core:text/i18n`. --- core/{ => text}/i18n/example/i18n_example.odin | 5 +++-- core/{ => text}/i18n/example/messages.pot | 0 core/{ => text}/i18n/example/nl_NL.mo | Bin core/{ => text}/i18n/example/nl_NL.po | 0 core/{ => text}/i18n/gettext.odin | 0 core/{ => text}/i18n/i18n.odin | 0 core/{ => text}/i18n/qt_linguist.odin | 0 7 files changed, 3 insertions(+), 2 deletions(-) rename core/{ => text}/i18n/example/i18n_example.odin (91%) rename core/{ => text}/i18n/example/messages.pot (100%) rename core/{ => text}/i18n/example/nl_NL.mo (100%) rename core/{ => text}/i18n/example/nl_NL.po (100%) rename core/{ => text}/i18n/gettext.odin (100%) rename core/{ => text}/i18n/i18n.odin (100%) rename core/{ => text}/i18n/qt_linguist.odin (100%) diff --git a/core/i18n/example/i18n_example.odin b/core/text/i18n/example/i18n_example.odin similarity index 91% rename from core/i18n/example/i18n_example.odin rename to core/text/i18n/example/i18n_example.odin index 8c173ee4a..32eb38a7d 100644 --- a/core/i18n/example/i18n_example.odin +++ b/core/text/i18n/example/i18n_example.odin @@ -2,7 +2,7 @@ package i18n_example import "core:mem" import "core:fmt" -import "core:i18n" +import "core:text/i18n" _T :: i18n.get @@ -31,6 +31,7 @@ mo :: proc() { /* For ease of use, pluralized lookup can use both singular and plural form as key for the same translation. + This is a quirk of the GetText format which has separate keys for their different plurals. */ println("-----") printf(_T("There is %d leaf.\n", 1), 1) @@ -54,7 +55,7 @@ qt :: proc() { /* Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter. */ - i18n.ACTIVE, err = i18n.parse_qt(#load("../../../tests/core/assets/XML/nl_NL-qt-ts.ts")) + i18n.ACTIVE, err = i18n.parse_qt(#load("../../../../tests/core/assets/XML/nl_NL-qt-ts.ts")) defer i18n.destroy() fmt.printf("parse_qt returned %v\n", err) diff --git a/core/i18n/example/messages.pot b/core/text/i18n/example/messages.pot similarity index 100% rename from core/i18n/example/messages.pot rename to core/text/i18n/example/messages.pot diff --git a/core/i18n/example/nl_NL.mo b/core/text/i18n/example/nl_NL.mo similarity index 100% rename from core/i18n/example/nl_NL.mo rename to core/text/i18n/example/nl_NL.mo diff --git a/core/i18n/example/nl_NL.po b/core/text/i18n/example/nl_NL.po similarity index 100% rename from core/i18n/example/nl_NL.po rename to core/text/i18n/example/nl_NL.po diff --git a/core/i18n/gettext.odin b/core/text/i18n/gettext.odin similarity index 100% rename from core/i18n/gettext.odin rename to core/text/i18n/gettext.odin diff --git a/core/i18n/i18n.odin b/core/text/i18n/i18n.odin similarity index 100% rename from core/i18n/i18n.odin rename to core/text/i18n/i18n.odin diff --git a/core/i18n/qt_linguist.odin b/core/text/i18n/qt_linguist.odin similarity index 100% rename from core/i18n/qt_linguist.odin rename to core/text/i18n/qt_linguist.odin From 957ef8e8fe885fed32b62d532e642be5e756ea67 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Fri, 29 Apr 2022 13:16:30 +0200 Subject: [PATCH 5/6] [i18n/xml] Move I18N XML files to their own assets directory. --- .../core/assets/I18N}/messages.pot | 0 .../core/assets/{XML => I18N}/nl_NL-qt-ts.ts | 0 .../{XML => I18N}/nl_NL-xliff-1.2.xliff | 0 .../{XML => I18N}/nl_NL-xliff-2.0.xliff | 0 .../core/assets/I18N}/nl_NL.mo | Bin .../core/assets/I18N}/nl_NL.po | 0 tests/core/encoding/xml/test_core_xml.odin | 33 ++++++------------ 7 files changed, 11 insertions(+), 22 deletions(-) rename {core/text/i18n/example => tests/core/assets/I18N}/messages.pot (100%) rename tests/core/assets/{XML => I18N}/nl_NL-qt-ts.ts (100%) rename tests/core/assets/{XML => I18N}/nl_NL-xliff-1.2.xliff (100%) rename tests/core/assets/{XML => I18N}/nl_NL-xliff-2.0.xliff (100%) rename {core/text/i18n/example => tests/core/assets/I18N}/nl_NL.mo (100%) rename {core/text/i18n/example => tests/core/assets/I18N}/nl_NL.po (100%) diff --git a/core/text/i18n/example/messages.pot b/tests/core/assets/I18N/messages.pot similarity index 100% rename from core/text/i18n/example/messages.pot rename to tests/core/assets/I18N/messages.pot diff --git a/tests/core/assets/XML/nl_NL-qt-ts.ts b/tests/core/assets/I18N/nl_NL-qt-ts.ts similarity index 100% rename from tests/core/assets/XML/nl_NL-qt-ts.ts rename to tests/core/assets/I18N/nl_NL-qt-ts.ts diff --git a/tests/core/assets/XML/nl_NL-xliff-1.2.xliff b/tests/core/assets/I18N/nl_NL-xliff-1.2.xliff similarity index 100% rename from tests/core/assets/XML/nl_NL-xliff-1.2.xliff rename to tests/core/assets/I18N/nl_NL-xliff-1.2.xliff diff --git a/tests/core/assets/XML/nl_NL-xliff-2.0.xliff b/tests/core/assets/I18N/nl_NL-xliff-2.0.xliff similarity index 100% rename from tests/core/assets/XML/nl_NL-xliff-2.0.xliff rename to tests/core/assets/I18N/nl_NL-xliff-2.0.xliff diff --git a/core/text/i18n/example/nl_NL.mo b/tests/core/assets/I18N/nl_NL.mo similarity index 100% rename from core/text/i18n/example/nl_NL.mo rename to tests/core/assets/I18N/nl_NL.mo diff --git a/core/text/i18n/example/nl_NL.po b/tests/core/assets/I18N/nl_NL.po similarity index 100% rename from core/text/i18n/example/nl_NL.po rename to tests/core/assets/I18N/nl_NL.po diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index a79c939c8..07cbc1779 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -27,7 +27,7 @@ TEST :: struct { /* Relative to ODIN_ROOT */ -TEST_FILE_PATH_PREFIX :: "tests/core/assets/XML" +TEST_FILE_PATH_PREFIX :: "tests/core/assets" TESTS :: []TEST{ /* @@ -35,23 +35,12 @@ TESTS :: []TEST{ */ { - /* - - - <恥ずべきフクロウ 올빼미_id="Foozle Hello, world!"]]>Barzle"> - <부끄러운:barzle> - ရှက်စရာ ဇီးကွက် - Owl of Shame - More CDATA Hello, world! Nonsense. - - */ - /* Tests UTF-8 idents and values. Test namespaced ident. Tests that nested partial CDATA start doesn't trip up parser. */ - filename = "utf8.xml", + filename = "XML/utf8.xml", options = { flags = { .Ignore_Unsupported, .Intern_Comments, @@ -66,7 +55,7 @@ TESTS :: []TEST{ Same as above. Unbox CDATA in data tag. */ - filename = "utf8.xml", + filename = "XML/utf8.xml", options = { flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, @@ -81,7 +70,7 @@ TESTS :: []TEST{ Simple Qt TS translation file. `core:i18n` requires it to be parsed properly. */ - filename = "nl_NL-qt-ts.ts", + filename = "I18N/nl_NL-qt-ts.ts", options = { flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, @@ -96,7 +85,7 @@ TESTS :: []TEST{ Simple XLiff 1.2 file. `core:i18n` requires it to be parsed properly. */ - filename = "nl_NL-xliff-1.2.xliff", + filename = "I18N/nl_NL-xliff-1.2.xliff", options = { flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, @@ -111,7 +100,7 @@ TESTS :: []TEST{ Simple XLiff 2.0 file. `core:i18n` requires it to be parsed properly. */ - filename = "nl_NL-xliff-2.0.xliff", + filename = "I18N/nl_NL-xliff-2.0.xliff", options = { flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, @@ -122,7 +111,7 @@ TESTS :: []TEST{ }, { - filename = "entities.html", + filename = "XML/entities.html", options = { flags = { .Ignore_Unsupported, .Intern_Comments, @@ -133,7 +122,7 @@ TESTS :: []TEST{ }, { - filename = "entities.html", + filename = "XML/entities.html", options = { flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, @@ -144,7 +133,7 @@ TESTS :: []TEST{ }, { - filename = "entities.html", + filename = "XML/entities.html", options = { flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, @@ -158,7 +147,7 @@ TESTS :: []TEST{ Then we test that certain errors are returned as expected. */ { - filename = "utf8.xml", + filename = "XML/utf8.xml", options = { flags = { .Ignore_Unsupported, .Intern_Comments, @@ -173,7 +162,7 @@ TESTS :: []TEST{ Parse the 8.2 MiB unicode.xml for good measure. */ { - filename = "unicode.xml", + filename = "XML/unicode.xml", options = { flags = { .Ignore_Unsupported, From 09e1c0fa27a262d7fbfaa5b3e305054304847e75 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Fri, 29 Apr 2022 16:19:13 +0200 Subject: [PATCH 6/6] [i18n] Add tests. --- core/text/i18n/doc.odin | 111 ++++++++++++ core/text/i18n/example/i18n_example.odin | 99 ----------- core/text/i18n/gettext.odin | 9 +- core/text/i18n/i18n.odin | 8 + core/text/i18n/qt_linguist.odin | 8 +- examples/all/all_main.odin | 4 + tests/core/Makefile | 5 +- tests/core/assets/I18N/duplicate-key.ts | 22 +++ tests/core/build.bat | 7 +- tests/core/text/i18n/test_core_text_i18n.odin | 165 ++++++++++++++++++ 10 files changed, 329 insertions(+), 109 deletions(-) create mode 100644 core/text/i18n/doc.odin delete mode 100644 core/text/i18n/example/i18n_example.odin create mode 100644 tests/core/assets/I18N/duplicate-key.ts create mode 100644 tests/core/text/i18n/test_core_text_i18n.odin diff --git a/core/text/i18n/doc.odin b/core/text/i18n/doc.odin new file mode 100644 index 000000000..cff1ce11f --- /dev/null +++ b/core/text/i18n/doc.odin @@ -0,0 +1,111 @@ +//+ignore +package i18n + +/* + The i18n package is flexible and easy to use. + + It has one call to get a translation: `get`, which the user can alias into something like `T`. + + `get`, referred to as `T` here, has a few different signatures. + All of them will return the key if the entry can't be found in the active translation catalog. + + - `T(key)` returns the translation of `key`. + - `T(key, n)` returns a pluralized translation of `key` according to value `n`. + + - `T(section, key)` returns the translation of `key` in `section`. + - `T(section, key, n)` returns a pluralized translation of `key` in `section` according to value `n`. + + By default lookup take place in the global `i18n.ACTIVE` catalog for ease of use. + If you want to override which translation to use, for example in a language preview dialog, you can use the following: + + - `T(key, n, catalog)` returns the pluralized version of `key` from explictly supplied catalog. + - `T(section, key, n, catalog)` returns the pluralized version of `key` in `section` from explictly supplied catalog. + + If a catalog has translation contexts or sections, then ommitting it in the above calls looks up in section "". + + The default pluralization rule is n != 1, which is to say that passing n == 1 (or not passing n) returns the singular form. + Passing n != 1 returns plural form 1. + + Should a language not conform to this rule, you can pass a pluralizer procedure to the catalog parser. + This is a procedure that maps an integer to an integer, taking a value and returning which plural slot should be used. + + You can also assign it to a loaded catalog after parsing, of course. + + Some code examples follow. +*/ + +/* +```cpp +import "core:fmt" +import "core:text/i18n" + +T :: i18n.get + +mo :: proc() { + using fmt + + err: i18n.Error + + /* + Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter. + */ + i18n.ACTIVE, err = i18n.parse_mo(#load("translations/nl_NL.mo")) + defer i18n.destroy() + + if err != .None { return } + + /* + These are in the .MO catalog. + */ + println("-----") + println(T("")) + println("-----") + println(T("There are 69,105 leaves here.")) + println("-----") + println(T("Hellope, World!")) + println("-----") + // We pass 1 into `T` to get the singular format string, then 1 again into printf. + printf(T("There is %d leaf.\n", 1), 1) + // We pass 42 into `T` to get the plural format string, then 42 again into printf. + printf(T("There is %d leaf.\n", 42), 42) + + /* + This isn't in the translation catalog, so the key is passed back untranslated. + */ + println("-----") + println(T("Come visit us on Discord!")) +} + +qt :: proc() { + using fmt + + err: i18n.Error + + /* + Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter. + */ + i18n.ACTIVE, err = i18n.parse_qt(#load("translations/nl_NL-qt-ts.ts")) + defer i18n.destroy() + + if err != .None { + return + } + + /* + These are in the .TS catalog. As you can see they have sections. + */ + println("--- Page section ---") + println("Page:Text for translation =", T("Page", "Text for translation")) + println("-----") + println("Page:Also text to translate =", T("Page", "Also text to translate")) + println("-----") + println("--- installscript section ---") + println("installscript:99 bottles of beer on the wall =", T("installscript", "99 bottles of beer on the wall")) + println("-----") + println("--- apple_count section ---") + println("apple_count:%d apple(s) =") + println("\t 1 =", T("apple_count", "%d apple(s)", 1)) + println("\t 42 =", T("apple_count", "%d apple(s)", 42)) +} +``` +*/ \ No newline at end of file diff --git a/core/text/i18n/example/i18n_example.odin b/core/text/i18n/example/i18n_example.odin deleted file mode 100644 index 32eb38a7d..000000000 --- a/core/text/i18n/example/i18n_example.odin +++ /dev/null @@ -1,99 +0,0 @@ -package i18n_example - -import "core:mem" -import "core:fmt" -import "core:text/i18n" - -_T :: i18n.get - -mo :: proc() { - using fmt - - err: i18n.Error - - /* - Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter. - */ - i18n.ACTIVE, err = i18n.parse_mo(#load("nl_NL.mo")) - defer i18n.destroy() - - if err != .None { return } - - /* - These are in the .MO catalog. - */ - println("-----") - println(_T("")) - println("-----") - println(_T("There are 69,105 leaves here.")) - println("-----") - println(_T("Hellope, World!")) - - /* - For ease of use, pluralized lookup can use both singular and plural form as key for the same translation. - This is a quirk of the GetText format which has separate keys for their different plurals. - */ - println("-----") - printf(_T("There is %d leaf.\n", 1), 1) - printf(_T("There is %d leaf.\n", 42), 42) - - printf(_T("There are %d leaves.\n", 1), 1) - printf(_T("There are %d leaves.\n", 42), 42) - - /* - This isn't. - */ - println("-----") - println(_T("Come visit us on Discord!")) -} - -qt :: proc() { - using fmt - - err: i18n.Error - - /* - Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter. - */ - i18n.ACTIVE, err = i18n.parse_qt(#load("../../../../tests/core/assets/XML/nl_NL-qt-ts.ts")) - defer i18n.destroy() - - fmt.printf("parse_qt returned %v\n", err) - if err != .None { - return - } - - /* - These are in the .TS catalog. - */ - println("--- Page section ---") - println("Page:Text for translation =", _T("Page", "Text for translation")) - println("-----") - println("Page:Also text to translate =", _T("Page", "Also text to translate")) - println("-----") - println("--- installscript section ---") - println("installscript:99 bottles of beer on the wall =", _T("installscript", "99 bottles of beer on the wall")) - println("-----") - println("--- apple_count section ---") - println("apple_count:%d apple(s) =") - println("\t 1 =", _T("apple_count", "%d apple(s)", 1)) - println("\t 42 =", _T("apple_count", "%d apple(s)", 42)) -} - -main :: proc() { - using fmt - - track: mem.Tracking_Allocator - mem.tracking_allocator_init(&track, context.allocator) - context.allocator = mem.tracking_allocator(&track) - - // mo() - qt() - - if len(track.allocation_map) > 0 { - println() - for _, v in track.allocation_map { - printf("%v Leaked %v bytes.\n", v.location, v.size) - } - } -} \ No newline at end of file diff --git a/core/text/i18n/gettext.odin b/core/text/i18n/gettext.odin index 54c5a1111..eed73855b 100644 --- a/core/text/i18n/gettext.odin +++ b/core/text/i18n/gettext.odin @@ -8,6 +8,9 @@ package i18n A from-scratch implementation based after the specification found here: https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html + Options are ignored as they're not applicable to this format. + They're part of the signature for consistency with other catalog formats. + List of contributors: Jeroen van Rijn: Initial implementation. */ @@ -15,7 +18,7 @@ import "core:os" import "core:strings" import "core:bytes" -parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { +parse_mo_from_slice :: proc(data: []u8, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { context.allocator = allocator /* An MO file should have at least a 4-byte magic, 2 x 2 byte version info, @@ -115,7 +118,7 @@ parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allo return } -parse_mo_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { +parse_mo_file :: proc(filename: string, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { context.allocator = allocator data, data_ok := os.read_entire_file(filename) @@ -123,7 +126,7 @@ parse_mo_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allo if !data_ok { return {}, .File_Error } - return parse_mo_from_slice(data, pluralizer) + return parse_mo_from_slice(data, options, pluralizer, allocator) } parse_mo :: proc { parse_mo_file, parse_mo_from_slice } diff --git a/core/text/i18n/i18n.odin b/core/text/i18n/i18n.odin index 36204efd9..e007401af 100644 --- a/core/text/i18n/i18n.odin +++ b/core/text/i18n/i18n.odin @@ -74,6 +74,14 @@ Error :: enum { } +Parse_Options :: struct { + merge_sections: bool, +} + +DEFAULT_PARSE_OPTIONS :: Parse_Options{ + merge_sections = false, +} + /* Several ways to use: - get(key), which defaults to the singular form and i18n.ACTIVE catalog, or diff --git a/core/text/i18n/qt_linguist.odin b/core/text/i18n/qt_linguist.odin index 65d51444e..0a241c1aa 100644 --- a/core/text/i18n/qt_linguist.odin +++ b/core/text/i18n/qt_linguist.odin @@ -27,7 +27,7 @@ TS_XML_Options := xml.Options{ expected_doctype = "TS", } -parse_qt_linguist_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { +parse_qt_linguist_from_slice :: proc(data: []u8, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { context.allocator = allocator ts, xml_err := xml.parse(data, TS_XML_Options) @@ -59,7 +59,7 @@ parse_qt_linguist_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = return translation, .TS_File_Expected_Context_Name, } - section_name := ts.elements[section_name_id].value + section_name := "" if options.merge_sections else ts.elements[section_name_id].value if section_name not_in translation.k_v { translation.k_v[section_name] = {} @@ -139,7 +139,7 @@ parse_qt_linguist_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = return } -parse_qt_linguist_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { +parse_qt_linguist_file :: proc(filename: string, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { context.allocator = allocator data, data_ok := os.read_entire_file(filename) @@ -147,7 +147,7 @@ parse_qt_linguist_file :: proc(filename: string, pluralizer: proc(int) -> int = if !data_ok { return {}, .File_Error } - return parse_qt_linguist_from_slice(data, pluralizer) + return parse_qt_linguist_from_slice(data, options, pluralizer, allocator) } parse_qt :: proc { parse_qt_linguist_file, parse_qt_linguist_from_slice } \ No newline at end of file diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index 27f199062..36acf7714 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -56,6 +56,7 @@ import csv "core:encoding/csv" import hxa "core:encoding/hxa" import json "core:encoding/json" import varint "core:encoding/varint" +import xml "core:encoding/xml" import fmt "core:fmt" import hash "core:hash" @@ -100,6 +101,7 @@ import strings "core:strings" import sync "core:sync" import testing "core:testing" import scanner "core:text/scanner" +import i18n "core:text/i18n" import thread "core:thread" import time "core:time" @@ -158,6 +160,7 @@ _ :: csv _ :: hxa _ :: json _ :: varint +_ :: xml _ :: fmt _ :: hash _ :: image @@ -192,6 +195,7 @@ _ :: strings _ :: sync _ :: testing _ :: scanner +_ :: i18n _ :: thread _ :: time _ :: unicode diff --git a/tests/core/Makefile b/tests/core/Makefile index 2c24fef75..1405ae5c6 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -26,9 +26,10 @@ noise_test: $(ODIN) run math/noise -out:test_noise encoding_test: - $(ODIN) run encoding/hxa -collection:tests=.. -out:test_hxa - $(ODIN) run encoding/json -out:test_json + $(ODIN) run encoding/hxa -out:test_hxa -collection:tests=.. + $(ODIN) run encoding/json -out:test_json $(ODIN) run encoding/varint -out:test_varint + $(ODIN) run encoding/xml -out:test_xml math_test: $(ODIN) run math/test_core_math.odin -file -collection:tests=.. -out:test_core_math diff --git a/tests/core/assets/I18N/duplicate-key.ts b/tests/core/assets/I18N/duplicate-key.ts new file mode 100644 index 000000000..a38824d01 --- /dev/null +++ b/tests/core/assets/I18N/duplicate-key.ts @@ -0,0 +1,22 @@ + + + + + Page + + %d apple(s) + commenting + Tekst om te vertalen + + + + apple_count + + %d apple(s) + + %d appel + %d appels + + + + diff --git a/tests/core/build.bat b/tests/core/build.bat index 8e4ba1d15..77ff38038 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -64,4 +64,9 @@ echo --- echo --- echo Running core:reflect tests echo --- -%PATH_TO_ODIN% run reflect %COMMON% %COLLECTION% -out:test_core_reflect.exe \ No newline at end of file +%PATH_TO_ODIN% run reflect %COMMON% %COLLECTION% -out:test_core_reflect.exe + +echo --- +echo Running core:text/i18n tests +echo --- +%PATH_TO_ODIN% run text\i18n %COMMON% -out:test_core_i18n.exe \ No newline at end of file diff --git a/tests/core/text/i18n/test_core_text_i18n.odin b/tests/core/text/i18n/test_core_text_i18n.odin new file mode 100644 index 000000000..ba668c4fd --- /dev/null +++ b/tests/core/text/i18n/test_core_text_i18n.odin @@ -0,0 +1,165 @@ +package test_core_text_i18n + +import "core:mem" +import "core:fmt" +import "core:os" +import "core:testing" +import "core:text/i18n" + +TEST_count := 0 +TEST_fail := 0 + +when ODIN_TEST { + expect :: testing.expect + log :: testing.log +} else { + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.printf("[%v] %v\n", loc, message) + return + } + } + log :: proc(t: ^testing.T, v: any, loc := #caller_location) { + fmt.printf("[%v] ", loc) + fmt.printf("log: %v\n", v) + } +} +T :: i18n.get + +Test :: struct { + section: string, + key: string, + val: string, + n: int, +} + +Test_Suite :: struct { + file: string, + loader: proc(string, i18n.Parse_Options, proc(int) -> int, mem.Allocator) -> (^i18n.Translation, i18n.Error), + err: i18n.Error, + options: i18n.Parse_Options, + tests: []Test, +} + +TESTS := []Test_Suite{ + { + file = "assets/I18N/nl_NL.mo", + loader = i18n.parse_mo_file, + tests = { + // These are in the catalog. + { "", "There are 69,105 leaves here.", "Er zijn hier 69.105 bladeren.", 1 }, + { "", "Hellope, World!", "Hallo, Wereld!", 1 }, + { "", "There is %d leaf.\n", "Er is %d blad.\n", 1 }, + { "", "There are %d leaves.\n", "Er is %d blad.\n", 1 }, + { "", "There is %d leaf.\n", "Er zijn %d bladeren.\n", 42 }, + { "", "There are %d leaves.\n", "Er zijn %d bladeren.\n", 42 }, + + // This isn't in the catalog, so should ruturn the key. + { "", "Come visit us on Discord!", "Come visit us on Discord!", 1 }, + }, + }, + + // QT Linguist with default loader options. + { + file = "assets/I18N/nl_NL-qt-ts.ts", + loader = i18n.parse_qt_linguist_file, + tests = { + // These are in the catalog. + { "Page", "Text for translation", "Tekst om te vertalen", 1}, + { "Page", "Also text to translate", "Ook tekst om te vertalen", 1}, + { "installscript", "99 bottles of beer on the wall", "99 flessen bier op de muur", 1}, + { "apple_count", "%d apple(s)", "%d appel", 1}, + { "apple_count", "%d apple(s)", "%d appels", 42}, + + // These aren't in the catalog, so should ruturn the key. + { "", "Come visit us on Discord!", "Come visit us on Discord!", 1 }, + { "Fake_Section", "Come visit us on Discord!", "Come visit us on Discord!", 1 }, + }, + }, + + // QT Linguist, merging sections. + { + file = "assets/I18N/nl_NL-qt-ts.ts", + loader = i18n.parse_qt_linguist_file, + options = {merge_sections = true}, + tests = { + // All of them are now in section "", lookup with original section should return the key. + { "", "Text for translation", "Tekst om te vertalen", 1}, + { "", "Also text to translate", "Ook tekst om te vertalen", 1}, + { "", "99 bottles of beer on the wall", "99 flessen bier op de muur", 1}, + { "", "%d apple(s)", "%d appel", 1}, + { "", "%d apple(s)", "%d appels", 42}, + + // All of them are now in section "", lookup with original section should return the key. + { "Page", "Text for translation", "Text for translation", 1}, + { "Page", "Also text to translate", "Also text to translate", 1}, + { "installscript", "99 bottles of beer on the wall", "99 bottles of beer on the wall", 1}, + { "apple_count", "%d apple(s)", "%d apple(s)", 1}, + { "apple_count", "%d apple(s)", "%d apple(s)", 42}, + }, + }, + + // QT Linguist, merging sections. Expecting .Duplicate_Key error because same key exists in more than 1 section. + { + file = "assets/I18N/duplicate-key.ts", + loader = i18n.parse_qt_linguist_file, + options = {merge_sections = true}, + err = .Duplicate_Key, + }, + + // QT Linguist, not merging sections. Shouldn't return error despite same key existing in more than 1 section. + { + file = "assets/I18N/duplicate-key.ts", + loader = i18n.parse_qt_linguist_file, + }, +} + +@test +tests :: proc(t: ^testing.T) { + using fmt + + cat: ^i18n.Translation + err: i18n.Error + + for suite in TESTS { + cat, err = suite.loader(suite.file, suite.options, nil, context.allocator) + + msg := fmt.tprintf("Expected loading %v to return %v, got %v", suite.file, suite.err, err) + expect(t, err == suite.err, msg) + + if err == .None { + for test in suite.tests { + val := T(test.section, test.key, test.n, cat) + + msg = fmt.tprintf("Expected key `%v` from section `%v`'s form for value `%v` to equal `%v`, got `%v`", test.key, test.section, test.n, test.val, val) + expect(t, val == test.val, msg) + } + } + i18n.destroy(cat) + } +} + +main :: proc() { + using fmt + + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) + + t := testing.T{} + tests(&t) + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } + + if len(track.allocation_map) > 0 { + println() + for _, v in track.allocation_map { + printf("%v Leaked %v bytes.\n", v.location, v.size) + } + } +} \ No newline at end of file