mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-13 09:22:22 -07:00
842cfee0f3
This change was made in order to allow things produced with Odin and using Odin's core library, to not require the LICENSE to also be distributed alongside the binary form.
186 lines
5.0 KiB
Odin
186 lines
5.0 KiB
Odin
package i18n
|
|
/*
|
|
A parser for GNU GetText .MO files.
|
|
|
|
Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
|
|
Made available under Odin's license.
|
|
|
|
A from-scratch implementation based after the specification found here:
|
|
https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
|
|
|
|
Options are ignored as they're not applicable to this format.
|
|
They're part of the signature for consistency with other catalog formats.
|
|
|
|
List of contributors:
|
|
Jeroen van Rijn: Initial implementation.
|
|
*/
|
|
import "core:os"
|
|
import "core:strings"
|
|
import "core:bytes"
|
|
|
|
parse_mo_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
|
|
context.allocator = allocator
|
|
/*
|
|
An MO file should have at least a 4-byte magic, 2 x 2 byte version info,
|
|
a 4-byte number of strings value, and 2 x 4-byte offsets.
|
|
*/
|
|
if len(data) < 20 {
|
|
return {}, .MO_File_Invalid
|
|
}
|
|
|
|
/*
|
|
Check magic. Should be 0x950412de in native Endianness.
|
|
*/
|
|
native := true
|
|
magic := read_u32(data, native) or_return
|
|
|
|
if magic != 0x950412de {
|
|
native = false
|
|
magic = read_u32(data, native) or_return
|
|
|
|
if magic != 0x950412de { return {}, .MO_File_Invalid_Signature }
|
|
}
|
|
|
|
/*
|
|
We can ignore version_minor at offset 6.
|
|
*/
|
|
version_major := read_u16(data[4:]) or_return
|
|
if version_major > 1 { return {}, .MO_File_Unsupported_Version }
|
|
|
|
count := read_u32(data[ 8:]) or_return
|
|
original_offset := read_u32(data[12:]) or_return
|
|
translated_offset := read_u32(data[16:]) or_return
|
|
|
|
if count == 0 { return {}, .Empty_Translation_Catalog }
|
|
|
|
/*
|
|
Initalize Translation, interner and optional pluralizer.
|
|
*/
|
|
translation = new(Translation)
|
|
translation.pluralize = pluralizer
|
|
strings.intern_init(&translation.intern, allocator, allocator)
|
|
|
|
for n := u32(0); n < count; n += 1 {
|
|
/*
|
|
Grab string's original length and offset.
|
|
*/
|
|
offset := original_offset + 8 * n
|
|
if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
|
|
|
|
o_length := read_u32(data[offset :], native) or_return
|
|
o_offset := read_u32(data[offset + 4:], native) or_return
|
|
|
|
offset = translated_offset + 8 * n
|
|
if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
|
|
|
|
t_length := read_u32(data[offset :], native) or_return
|
|
t_offset := read_u32(data[offset + 4:], native) or_return
|
|
|
|
max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1))
|
|
if len(data) < max_offset { return translation, .Premature_EOF }
|
|
|
|
key_data := data[o_offset:][:o_length]
|
|
val_data := data[t_offset:][:t_length]
|
|
|
|
/*
|
|
Could be a pluralized string.
|
|
*/
|
|
zero := []byte{0}
|
|
keys := bytes.split(key_data, zero); defer delete(keys)
|
|
vals := bytes.split(val_data, zero); defer delete(vals)
|
|
|
|
if (len(keys) != 1 && len(keys) != 2) || len(vals) > MAX_PLURALS {
|
|
return translation, .MO_File_Incorrect_Plural_Count
|
|
}
|
|
|
|
for k in keys {
|
|
section_name := ""
|
|
key := string(k)
|
|
|
|
// Scan for <context>EOT<key>
|
|
for ch, i in k {
|
|
if ch == 0x04 {
|
|
section_name = string(k[:i])
|
|
key = string(k[i+1:])
|
|
break
|
|
}
|
|
}
|
|
|
|
// If we merge sections, then all entries end in the "" context.
|
|
if options.merge_sections {
|
|
section_name = ""
|
|
}
|
|
|
|
section_name, _ = strings.intern_get(&translation.intern, section_name)
|
|
if section_name not_in translation.k_v {
|
|
translation.k_v[section_name] = {}
|
|
}
|
|
|
|
section := &translation.k_v[section_name]
|
|
interned_key, _ := strings.intern_get(&translation.intern, string(key))
|
|
|
|
// Duplicate key should not be allowed.
|
|
if interned_key in section {
|
|
return translation, .Duplicate_Key
|
|
}
|
|
|
|
interned_vals := make([]string, len(vals))
|
|
last_val: string
|
|
|
|
for v, i in vals {
|
|
interned_vals[i], _ = strings.intern_get(&translation.intern, string(v))
|
|
last_val = interned_vals[i]
|
|
}
|
|
section[interned_key] = interned_vals
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
parse_mo_file :: proc(filename: string, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
|
|
context.allocator = allocator
|
|
|
|
data, data_ok := os.read_entire_file(filename)
|
|
defer delete(data)
|
|
|
|
if !data_ok { return {}, .File_Error }
|
|
|
|
return parse_mo_from_bytes(data, options, pluralizer, allocator)
|
|
}
|
|
|
|
parse_mo :: proc { parse_mo_file, parse_mo_from_bytes }
|
|
|
|
/*
|
|
Helpers.
|
|
*/
|
|
read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) {
|
|
if len(data) < size_of(u32) { return 0, .Premature_EOF }
|
|
|
|
val := (^u32)(raw_data(data))^
|
|
|
|
if native_endian {
|
|
return val, .None
|
|
} else {
|
|
when ODIN_ENDIAN == .Little {
|
|
return u32(transmute(u32be)val), .None
|
|
} else {
|
|
return u32(transmute(u32le)val), .None
|
|
}
|
|
}
|
|
}
|
|
|
|
read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) {
|
|
if len(data) < size_of(u16) { return 0, .Premature_EOF }
|
|
|
|
val := (^u16)(raw_data(data))^
|
|
|
|
if native_endian {
|
|
return val, .None
|
|
} else {
|
|
when ODIN_ENDIAN == .Little {
|
|
return u16(transmute(u16be)val), .None
|
|
} else {
|
|
return u16(transmute(u16le)val), .None
|
|
}
|
|
}
|
|
} |