diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index b169bd57a..0315b0e05 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -26,6 +26,7 @@ package xml Jeroen van Rijn: Initial implementation. */ +import "core:bytes" import "core:strings" import "core:encoding/entity" import "core:mem" @@ -39,6 +40,12 @@ DEFAULT_Options :: Options{ } Option_Flag :: enum { + /* + If the caller says that input may be modified, we can perform in-situ parsing. + If this flag isn't provided, the XML parser first duplicates the input so that it can. + */ + Input_May_Be_Modified, + /* Document MUST start with ` (doc: ^Document, err: Error) { + data := data context.allocator = allocator opts := validate_options(options) or_return + /* + If `.Input_May_Be_Modified` is not specified, we duplicate the input so that we can modify it in-place. + */ + if .Input_May_Be_Modified not_in opts.flags { + data = bytes.clone(data) + } + t := &Tokenizer{} init(t, string(data), path, error_handler) doc = new(Document) doc.allocator = allocator doc.tokenizer = t + doc.input = data - strings.intern_init(&doc.intern, allocator, allocator) + // strings.intern_init(&doc.intern, allocator, allocator) - err = .Unexpected_Token - element, parent: ^Element + err = .Unexpected_Token + element, parent: ^Element tag_is_open := false @@ -292,8 +313,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err case: if .Error_on_Unsupported in opts.flags { error(t, t.offset, "Unhandled: (doc: ^Document, err: Error) { context.allocator = allocator + options := options data, data_ok := os.read_entire_file(filename) - defer delete(data) - if !data_ok { return {}, .File_Error } + options.flags += { .Input_May_Be_Modified } + return parse_from_slice(data, options, filename, error_handler, allocator) } @@ -499,10 +517,16 @@ destroy :: proc(doc: ^Document) { if doc == nil { return } free_element(doc.root) - strings.intern_destroy(&doc.intern) delete(doc.prolog) delete(doc.comments) + delete(doc.input) + + for s in doc.strings_to_free { + delete(s) + } + delete(doc.strings_to_free) + free(doc) } @@ -538,8 +562,8 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) _ = expect(t, .Eq) or_return value := expect(t, .String) or_return - attr.key = strings.intern_get(&doc.intern, key.text) - attr.val = strings.intern_get(&doc.intern, value.text) + attr.key = key.text + attr.val = value.text err = .None return @@ -651,7 +675,7 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) { t := doc.tokenizer tok := expect(t, .Ident) or_return - doc.doctype.ident = strings.intern_get(&doc.intern, tok.text) + doc.doctype.ident = tok.text skip_whitespace(t) offset := t.offset @@ -660,6 +684,6 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) { /* -1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it. */ - doc.doctype.rest = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1])) + doc.doctype.rest = string(t.src[offset : t.offset - 1]) return .None } \ No newline at end of file