mirror of
https://github.com/Ed94/metadesk.git
synced 2026-07-01 07:41:48 -07:00
117 lines
6.5 KiB
Markdown
117 lines
6.5 KiB
Markdown
/* MetaDesk grammar with semantic annotations
|
|
*
|
|
* Each line represents a BNF-esque production:
|
|
* symbol : rule_1 | ... | rule_n
|
|
* - Pipe signs indicate mutually exclusive alternatives
|
|
* - Square quotes denote optional rules
|
|
* - Character literals are terminal productions
|
|
* - Tags indicate which way the productions attach to the generated tree (@child, @sibling, @tag)
|
|
* and miscellaneous semantics (@fill, @markup)
|
|
*/
|
|
|
|
//// A bunch of common definitions
|
|
separator : ' ' | unscoped_separator
|
|
unscoped_separator : '\n'| ',' | ';'
|
|
alt_scope_beg : '(' | '['
|
|
alt_scope_end : ')' | ']'
|
|
id : alpha [alphanumeric] | '_' [alphanumeric]
|
|
alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '_' [alphanumeric]
|
|
alpha : lowercase | uppercase
|
|
lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z'
|
|
uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z'
|
|
digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
|
|
label : id | integer_literal | char_literal | string_literal | symbol_label
|
|
integer_literal : { ['-'] natural_literal }
|
|
natural_literal : digit [natural_literal]
|
|
char_literal : @markup '\'' [char_literal_items] @markup '\''
|
|
char_literal_items : char_literal_item [char_literal_items]
|
|
char_literal_item : ascii_no_backslash_no_quotes | '"' | '\\' ascii
|
|
ascii : ascii_no_backslash_no_quotes | '\'' | '"' | '`' | '\\'
|
|
ascii_no_backslash_no_quotes : digit | alpha | symbol_no_backslash_no_quotes | ' '
|
|
symbol_no_backslash_no_quotes : symbol_no_backslash_no_quotes_1 | symbol_no_backslash_no_quotes_2
|
|
symbol_no_backslash_no_quotes_1 : '!'|'#'|'$'|'%'|'&'|'('|')'|'*'|'+'|','|'-'|'.'|'/'|':'
|
|
symbol_no_backslash_no_quotes_2 : ';'|'<'|'='|'>'|'?'|'@'|'['|']'|'^'|'_'|'{'|'|'|'}'|'~'
|
|
string_literal : @markup '"' [string_literal_items] @markup '"' | @markup '`' [string_literal_items] @markup '`'
|
|
string_literal_items : string_literal_item [string_literal_items]
|
|
string_literal_item : ascii_no_backslash_no_quotes | '\'' | '\\' ascii
|
|
symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'_'
|
|
|
|
/* What follows is a range of annotated grammars that can be used to generate
|
|
* tests of increasing complexity and completeness to check against MetaDesk.
|
|
* To run them, uncomment them one by one and run the build/grammar test.
|
|
*/
|
|
|
|
//// Arbitrarily deep tree, possibly empty
|
|
// file : [@child set_list]
|
|
// set_list : '{' [@child set_list] '}' [separator @sibling set_list]
|
|
|
|
//// Labeled leaves
|
|
// file : [@child set_list]
|
|
// set_list : set [separator @sibling set_list]
|
|
// set : @fill 'A' | '{' [@child set_list] '}'
|
|
|
|
//// Labeled internal nodes
|
|
// file : [@child set_list]
|
|
// set_list : set [separator @sibling set_list]
|
|
// set : @fill 'A' | [@fill 'A' ':'] '{' [@child set_list] '}'
|
|
|
|
//// Unscoped sets
|
|
// file : [@child set_list]
|
|
// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list]
|
|
// scoped_set : '{' [@child set_list] '}'
|
|
// unscoped_set : @fill 'A' [unscoped_set_tail]
|
|
// unscoped_set_tail : ':' @child unscoped_set | ' ' @sibling unscoped_set | ':' scoped_set |' ' @sibling scoped_set
|
|
|
|
//// Tags
|
|
// file : [@child set_list]
|
|
// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list]
|
|
// scoped_set : {[tag_list] untagged_scoped_set}
|
|
// untagged_scoped_set : '{' [@child set_list] '}'
|
|
// unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]}
|
|
// tag_list : '@' @tag tag ' ' [tag_list]
|
|
// tag : @fill 'T'['(' [@child set_list] ')']
|
|
// unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
|
|
|
|
//// Alternative scope markers
|
|
// file : [@child set_list]
|
|
// set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list]
|
|
// scoped_set : {[tag_list] untagged_scoped_set}
|
|
// untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end
|
|
// unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]}
|
|
// tag_list : '@' @tag tag ' ' [tag_list]
|
|
// tag : @fill 'T'['(' [@child set_list] ')']
|
|
// unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
|
|
|
|
//// General tags and labels
|
|
file : [@child set_list]
|
|
set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list]
|
|
scoped_set : {[tag_list] untagged_scoped_set}
|
|
untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end
|
|
unscoped_set : {[tag_list] @fill label [unscoped_set_tail]}
|
|
tag_list : '@' @tag tag ' ' [tag_list]
|
|
tag : @fill id['(' [@child set_list] ')']
|
|
unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
|
|
|
|
/* Comments
|
|
* Comments around nodes are accessible to the user. Here's how they behave:
|
|
* - The text inside a comment immediatly following a node is stored as the
|
|
* comment_after member of that node. No newlines can happen between a
|
|
* node and its after_comment.
|
|
* - The text inside a comment preceding a node is stored as the
|
|
* comment_before of that node _unless_ it is already the comment_after
|
|
* of another node. One newline between the comment and the node is
|
|
* obviously necessary in the case of C++-style comments and it's also
|
|
* allowed for C-style comments.
|
|
* - If the first character inside a C++-style comment is a space, it's
|
|
* omitted from the stored string
|
|
*
|
|
* The semantically annotated Backus-Naur form that we're using is not a
|
|
* good fit to describe the grammar of comments.
|
|
* To prevent the comment in "a /* comment */ b" from being interpreted as
|
|
* a comment_before of "b", instead of what it is (a comment_after of "a"),
|
|
* we would have to complicate the grammar by introducing several extra
|
|
* productions with this specific purpose in mind.
|
|
* The sensitivity of whitespace in the attachment of comments to nodes is
|
|
* also cumbersome to express in BNF.
|
|
*/
|