mirror of
https://github.com/Ed94/Odin.git
synced 2026-06-13 01:21:38 -07:00
[xml] Implement optional unboxing of CDATA and decoding of tag values.
This commit is contained in:
@@ -60,16 +60,22 @@ COMMENT_END :: "-->"
|
||||
Default: CDATA and comments are passed through unchanged.
|
||||
*/
|
||||
XML_Decode_Option :: enum u8 {
|
||||
/*
|
||||
Do not decode & entities. It decodes by default.
|
||||
If given, overrides `Decode_CDATA`.
|
||||
*/
|
||||
No_Entity_Decode,
|
||||
|
||||
/*
|
||||
CDATA is unboxed.
|
||||
*/
|
||||
CDATA_Unbox,
|
||||
Unbox_CDATA,
|
||||
|
||||
/*
|
||||
Unboxed CDATA is decoded as well.
|
||||
Ignored if `.CDATA_Unbox` is not given.
|
||||
Ignored if `.Unbox_CDATA` is not given.
|
||||
*/
|
||||
CDATA_Decode,
|
||||
Decode_CDATA,
|
||||
|
||||
/*
|
||||
Comments are stripped.
|
||||
@@ -129,7 +135,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
|
||||
}
|
||||
|
||||
case:
|
||||
if in_data && .CDATA_Decode not_in options {
|
||||
if in_data && .Decode_CDATA not_in options {
|
||||
/*
|
||||
Unboxed, but undecoded.
|
||||
*/
|
||||
@@ -145,17 +151,20 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
|
||||
*/
|
||||
write_string(&builder, entity)
|
||||
} else {
|
||||
if decoded, ok := xml_decode_entity(entity); ok {
|
||||
write_rune(&builder, decoded)
|
||||
} else {
|
||||
/*
|
||||
Decode failed. Pass through original.
|
||||
*/
|
||||
write_string(&builder, "&")
|
||||
write_string(&builder, entity)
|
||||
write_string(&builder, ";")
|
||||
|
||||
if .No_Entity_Decode not_in options {
|
||||
if decoded, ok := xml_decode_entity(entity); ok {
|
||||
write_rune(&builder, decoded)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Literal passthrough because the decode failed or we want entities not decoded.
|
||||
*/
|
||||
write_string(&builder, "&")
|
||||
write_string(&builder, entity)
|
||||
write_string(&builder, ";")
|
||||
}
|
||||
} else {
|
||||
write_rune(&builder, t.r)
|
||||
@@ -290,7 +299,7 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X
|
||||
if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
|
||||
t.read_offset += len(CDATA_START) - 1
|
||||
|
||||
if .CDATA_Unbox in options && .CDATA_Decode in options {
|
||||
if .Unbox_CDATA in options && .Decode_CDATA in options {
|
||||
/*
|
||||
We're unboxing _and_ decoding CDATA
|
||||
*/
|
||||
@@ -315,7 +324,7 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X
|
||||
|
||||
cdata := string(t.src[offset : t.read_offset])
|
||||
|
||||
if .CDATA_Unbox in options {
|
||||
if .Unbox_CDATA in options {
|
||||
cdata = cdata[len(CDATA_START):]
|
||||
cdata = cdata[:len(cdata) - len(CDATA_END)]
|
||||
}
|
||||
|
||||
@@ -1,19 +1,11 @@
|
||||
package unicode_entity_example
|
||||
|
||||
import "core:encoding/xml"
|
||||
import "core:encoding/entity"
|
||||
import "core:strings"
|
||||
import "core:mem"
|
||||
import "core:fmt"
|
||||
import "core:time"
|
||||
|
||||
OPTIONS :: xml.Options{
|
||||
flags = {
|
||||
.Ignore_Unsupported, .Intern_Comments,
|
||||
},
|
||||
expected_doctype = "",
|
||||
}
|
||||
|
||||
doc_print :: proc(doc: ^xml.Document) {
|
||||
buf: strings.Builder
|
||||
defer strings.destroy_builder(&buf)
|
||||
@@ -29,6 +21,13 @@ _entities :: proc() {
|
||||
|
||||
DOC :: #load("../../../../tests/core/assets/XML/unicode.xml")
|
||||
|
||||
OPTIONS :: xml.Options{
|
||||
flags = {
|
||||
.Ignore_Unsupported, .Intern_Comments,
|
||||
},
|
||||
expected_doctype = "",
|
||||
}
|
||||
|
||||
parse_duration: time.Duration
|
||||
|
||||
{
|
||||
@@ -50,57 +49,11 @@ _entities :: proc() {
|
||||
_main :: proc() {
|
||||
using fmt
|
||||
|
||||
doc, err := xml.parse(#load("test.html"))
|
||||
options := xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities }}
|
||||
doc, _ := xml.parse(#load("test.html"), options)
|
||||
|
||||
defer xml.destroy(doc)
|
||||
doc_print(doc)
|
||||
|
||||
if false {
|
||||
val := doc.root.children[1].children[2].value
|
||||
|
||||
println()
|
||||
replaced, ok := entity.decode_xml(val)
|
||||
defer delete(replaced)
|
||||
|
||||
printf("Before: '%v', Err: %v\n", val, err)
|
||||
printf("Passthrough: '%v'\nOK: %v\n", replaced, ok)
|
||||
println()
|
||||
}
|
||||
|
||||
if false {
|
||||
val := doc.root.children[1].children[2].value
|
||||
|
||||
println()
|
||||
replaced, ok := entity.decode_xml(val, { .CDATA_Unbox })
|
||||
defer delete(replaced)
|
||||
|
||||
printf("Before: '%v', Err: %v\n", val, err)
|
||||
printf("CDATA_Unbox: '%v'\nOK: %v\n", replaced, ok)
|
||||
println()
|
||||
}
|
||||
|
||||
if true {
|
||||
val := doc.root.children[1].children[2].value
|
||||
|
||||
println()
|
||||
replaced, ok := entity.decode_xml(val, { .CDATA_Unbox, .CDATA_Decode })
|
||||
defer delete(replaced)
|
||||
|
||||
printf("Before: '%v', Err: %v\n", val, err)
|
||||
printf("CDATA_Decode: '%v'\nOK: %v\n", replaced, ok)
|
||||
println()
|
||||
}
|
||||
|
||||
if true {
|
||||
val := doc.root.children[1].children[1].value
|
||||
|
||||
println()
|
||||
replaced, ok := entity.decode_xml(val, { .Comment_Strip })
|
||||
defer delete(replaced)
|
||||
|
||||
printf("Before: '%v', Err: %v\n", val, err)
|
||||
printf("Comment_Strip: '%v'\nOK: %v\n", replaced, ok)
|
||||
println()
|
||||
}
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
|
||||
@@ -16,9 +16,11 @@
|
||||
<div id="test_cdata_in_comment" foo="">
|
||||
Foozle]! © <!-- <![CDATA[ ® ]]> -->42&;1234&
|
||||
</div>
|
||||
<!-- EXPECTED: Foozle]! © 42&;1234& -->
|
||||
<div id="test_cdata_unwrap_and_passthrough">
|
||||
Foozle]! © <![CDATA[BOX ® /BOX]]>42&;1234&
|
||||
</div>
|
||||
<!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
|
||||
<div>
|
||||
| | | fj ` \ ® ϱ ∳
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user