[xml] Improvements.

This commit is contained in:
Jeroen van Rijn
2021-12-02 18:00:29 +01:00
parent 23baf56c87
commit 5807214406
11 changed files with 137 additions and 46 deletions
+41 -30
View File
@@ -1,45 +1,55 @@
package xml_example
import "core:encoding/xml"
import "core:os"
import "core:path"
import "core:mem"
import "core:strings"
import "core:fmt"
Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {
/*
Silent error handler for the parser.
*/
Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {}
}
OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", }
FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff"
DOC :: #load(FILENAME)
OPTIONS :: xml.Options{
flags = {
.Ignore_Unsupported, .Intern_Comments,
},
expected_doctype = "",
}
_main :: proc() {
example :: proc() {
using fmt
println("--- DOCUMENT TO PARSE ---")
println(string(DOC))
println("--- /DOCUMENT TO PARSE ---\n")
filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml")
defer delete(filename)
doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler)
doc, err := xml.parse(filename, OPTIONS, Error_Handler)
defer xml.destroy(doc)
buf: strings.Builder
defer strings.destroy_builder(&buf)
w := strings.to_writer(&buf)
xml.print(w, doc)
println(strings.to_string(buf))
if err != .None {
printf("Parse error: %v\n", err)
} else {
println("DONE!")
printf("Load/Parse error: %v\n", err)
if err == .File_Error {
printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename)
}
os.exit(1)
}
printf("\"%v\" loaded and parsed.\n", filename)
charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist")
if !charlist_ok {
eprintln("Could not locate top-level `<charlist>` tag.")
os.exit(1)
}
printf("Found `<charlist>` with %v children.\n", len(charlist.children))
for char in charlist.children {
if char.ident != "character" {
eprintf("Expected `<character>`, got `<%v>`\n", char.ident)
os.exit(1)
}
if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok {
eprintln("`<character dec=\"...\">` attribute not found.")
os.exit(1)
}
}
}
@@ -50,12 +60,13 @@ main :: proc() {
mem.tracking_allocator_init(&track, context.allocator)
context.allocator = mem.tracking_allocator(&track)
_main()
example()
if len(track.allocation_map) > 0 {
println()
for _, v in track.allocation_map {
printf("%v Leaked %v bytes.\n", v.location, v.size)
}
}
}
println("Done and cleaned up!")
}
+49
View File
@@ -0,0 +1,49 @@
package xml
/*
An XML 1.0 / 1.1 parser
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
This file contains helper functions.
*/
/*
Find `tag`'s nth child with a given ident.
*/
find_child_by_ident :: proc(tag: ^Element, ident: string, nth := 0) -> (res: ^Element, found: bool) {
if tag == nil { return nil, false }
count := 0
for child in tag.children {
/*
Skip commments. They have no name.
*/
if child.kind != .Element { continue }
/*
If the ident matches and it's the nth such child, return it.
*/
if child.ident == ident {
if count == nth { return child, true }
count += 1
}
}
return nil, false
}
/*
Find an attribute by key.
*/
find_attribute_val_by_key :: proc(tag: ^Element, key: string) -> (val: string, found: bool) {
if tag == nil { return "", false }
for attr in tag.attribs {
/*
If the ident matches, we're done. There can only ever be one attribute with the same name.
*/
if attr.key == key { return attr.val, true }
}
return "", false
}
+3 -3
View File
@@ -403,11 +403,11 @@ scan :: proc(t: ^Tokenizer) -> Token {
case ':': kind = .Colon
case '"', '\'':
kind = .Invalid
lit, err = scan_string(t, t.offset, ch, true, false)
if err == .None {
kind = .String
} else {
kind = .Invalid
}
case '\n':
@@ -418,7 +418,7 @@ scan :: proc(t: ^Tokenizer) -> Token {
}
}
if lit == "" {
if kind != .String && lit == "" {
lit = string(t.src[offset : t.offset])
}
return Token{kind, lit, pos}
+2
View File
@@ -519,6 +519,8 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error)
_ = expect(t, .Eq) or_return
value := expect(t, .String) or_return
error(t, t.offset, "String: %v\n", value)
attr.key = strings.intern_get(&doc.intern, key.text)
attr.val = strings.intern_get(&doc.intern, value.text)
+2
View File
@@ -0,0 +1,2 @@
# This file will be downloaded by download_assets.py
unicode.xml
+35 -8
View File
@@ -50,10 +50,7 @@ def try_download_file(url, out_file):
print("Could not download", url)
return 1
def try_download_and_unpack_zip(suite):
url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite))
out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
def try_download_and_unpack_zip(url, out_file, extract_path):
print("\tDownloading {} to {}.".format(url, out_file))
if try_download_file(url, out_file) is not None:
@@ -65,7 +62,6 @@ def try_download_and_unpack_zip(suite):
with zipfile.ZipFile(out_file) as z:
for file in z.filelist:
filename = file.filename
extract_path = DOWNLOAD_BASE_PATH.format(suite)
print("\t\tExtracting: {}".format(filename))
z.extract(file, extract_path)
@@ -73,25 +69,56 @@ def try_download_and_unpack_zip(suite):
print("Could not extract ZIP file")
return 2
def download_png_assets():
suite = "PNG"
url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite))
out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
extract_path = DOWNLOAD_BASE_PATH.format(suite)
def main():
print("Downloading PNG assets")
# Make PNG assets path
try:
path = DOWNLOAD_BASE_PATH.format("PNG")
path = DOWNLOAD_BASE_PATH.format(suite)
os.makedirs(path)
except FileExistsError:
pass
# Try downloading and unpacking the PNG assets
r = try_download_and_unpack_zip("PNG")
r = try_download_and_unpack_zip(url, out_file, extract_path)
if r is not None:
return r
# We could fall back on downloading the PNG files individually, but it's slow
print("Done downloading PNG assets")
def download_unicode_assets():
suite = "XML"
url = "https://www.w3.org/2003/entities/2007xml/unicode.xml.zip"
out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
extract_path = DOWNLOAD_BASE_PATH.format(suite)
print("Downloading {}.".format(url))
# Make XML assets path
try:
path = DOWNLOAD_BASE_PATH.format(suite)
os.makedirs(path)
except FileExistsError:
pass
# Try downloading and unpacking the assets
r = try_download_and_unpack_zip(url, out_file, extract_path)
if r is not None:
return r
print("Done downloading Unicode/XML assets")
def main():
download_png_assets()
download_unicode_assets()
return 0
if __name__ == '__main__':
+5 -5
View File
@@ -35,7 +35,7 @@ TESTS :: []TEST{
First we test that certain files parse without error.
*/
{
filename = "assets/xml/utf8.xml",
filename = "assets/XML/utf8.xml",
options = OPTIONS,
expected = {
error = .None,
@@ -45,7 +45,7 @@ TESTS :: []TEST{
},
},
{
filename = "assets/xml/nl_NL-qt-ts.ts",
filename = "assets/XML/nl_NL-qt-ts.ts",
options = OPTIONS,
expected = {
error = .None,
@@ -55,7 +55,7 @@ TESTS :: []TEST{
},
},
{
filename = "assets/xml/nl_NL-xliff-1.0.xliff",
filename = "assets/XML/nl_NL-xliff-1.0.xliff",
options = OPTIONS,
expected = {
error = .None,
@@ -65,7 +65,7 @@ TESTS :: []TEST{
},
},
{
filename = "assets/xml/nl_NL-xliff-2.0.xliff",
filename = "assets/XML/nl_NL-xliff-2.0.xliff",
options = OPTIONS,
expected = {
error = .None,
@@ -79,7 +79,7 @@ TESTS :: []TEST{
Then we test that certain errors are returned as expected.
*/
{
filename = "assets/xml/utf8.xml",
filename = "assets/XML/utf8.xml",
options = {
flags = {
.Ignore_Unsupported, .Intern_Comments,