From 084d55d55d59393e83169bb5acd0bc845e2c6f79 Mon Sep 17 00:00:00 2001 From: Miguel Lechon Date: Sun, 14 Mar 2021 12:12:30 +0100 Subject: [PATCH] Report and skip non-ASCII characters. --- source/md.h | 4 ++++ source/md_impl.c | 12 ++++++++++++ tests/grammar.md | 2 +- tests/sanity_tests.c | 9 +++++++-- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/source/md.h b/source/md.h index 588d3e0..eed92e5 100644 --- a/source/md.h +++ b/source/md.h @@ -451,6 +451,10 @@ typedef enum MD_TokenKind MD_TokenKind_Whitespace, MD_TokenKind_Newline, MD_TokenKind_WhitespaceMax, + + MD_TokenKind_NonASCII, + // Provisional category to deal with single characters > 127 + // while we don't support encodings other than ASCII MD_TokenKind_MAX, } diff --git a/source/md_impl.c b/source/md_impl.c index 9cea4b1..0c2589c 100644 --- a/source/md_impl.c +++ b/source/md_impl.c @@ -1292,6 +1292,12 @@ MD_Parse_LexNext(MD_ParseCtx *ctx) token.kind = MD_TokenKind_Symbol; at += 1; } + + else + { + token.kind = MD_TokenKind_NonASCII; + at += 1; + } }break; } @@ -1689,6 +1695,12 @@ _MD_ParseOneNode(MD_ParseCtx *ctx) } goto end_parse; } + + else if(MD_Parse_RequireKind(ctx, MD_TokenKind_NonASCII, &token)) + { + result.node = MD_MakeNodeFromToken(MD_NodeKind_Label, ctx->filename, ctx->file_contents.str, ctx->at, token); + _MD_Error(ctx, result.node, ctx->at-1, 0, "Non-ASCII character %d", token.string.str[0]); + } end_parse:; diff --git a/tests/grammar.md b/tests/grammar.md index 185f056..1e857ff 100644 --- a/tests/grammar.md +++ b/tests/grammar.md @@ -34,7 +34,7 @@ symbol_no_backslash_no_quotes_2 : ';'|'<'|'='|'>'|'?'|'@'|'['|']'|'^'|'_'|'{'|'| string_literal : @markup '"' [string_literal_items] @markup '"' | @markup '`' [string_literal_items] @markup '`' string_literal_items : string_literal_item [string_literal_items] string_literal_item : ascii_no_backslash_no_quotes | '\'' | '\\' ascii -symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'$' +symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'_' /* What follows is a range of annotated grammars that can be used to generate * tests of increasing complexity and completeness to check against MetaDesk. diff --git a/tests/sanity_tests.c b/tests/sanity_tests.c index 3300755..b8435fd 100644 --- a/tests/sanity_tests.c +++ b/tests/sanity_tests.c @@ -455,9 +455,9 @@ int main(void) {"a:'''\nmulti-line text literal", {3}}, {"/* foo", {1}}, {"label:@tag {1, 2, 3} /* /* unterminated comment */", {8, 22}}, - {"#include ", {2}}, {"@\"tag\" node", {2}}, - {"{a,,#b}", {4, 5}}, + {"{a,,#b,}", {4, 5}}, + {"foo""\x80""bar", {4}}, }; int max_error_count = MD_ArrayCount(tests[0].columns); @@ -478,6 +478,11 @@ int main(void) } e = e->next; } + + if(e && e->next) + { + columns_match = 0; + } } TestResult(columns_match); }