6.5 KiB
/* MetaDesk grammar with semantic annotations *
- Each line represents a BNF-esque production:
-
symbol : rule_1 | ... | rule_n -
- Pipe signs indicate mutually exclusive alternatives
-
- Square quotes denote optional rules
-
- Character literals are terminal productions
-
- Tags indicate which way the productions attach to the generated tree (@child, @sibling, @tag)
- and miscellaneous semantics (@fill, @markup) */
//// A bunch of common definitions
separator : ' ' | unscoped_separator
unscoped_separator : '\n'| ',' | ';'
alt_scope_beg : '(' | '['
alt_scope_end : ')' | ']'
id : alpha [alphanumeric] | '' [alphanumeric]
alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '' [alphanumeric]
alpha : lowercase | uppercase
lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z'
uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z'
digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
label : id | integer_literal | char_literal | string_literal | symbol_label
integer_literal : { ['-'] natural_literal }
natural_literal : digit [natural_literal]
char_literal : @markup ''' [char_literal_items] @markup '''
char_literal_items : char_literal_item [char_literal_items]
char_literal_item : ascii_no_backslash_no_quotes | '"' | '\' ascii
ascii : ascii_no_backslash_no_quotes | ''' | '"' | '' | '\\' ascii_no_backslash_no_quotes : digit | alpha | symbol_no_backslash_no_quotes | ' ' symbol_no_backslash_no_quotes : symbol_no_backslash_no_quotes_1 | symbol_no_backslash_no_quotes_2 symbol_no_backslash_no_quotes_1 : '!'|'#'|'$'|'%'|'&'|'('|')'|'*'|'+'|','|'-'|'.'|'/'|':' symbol_no_backslash_no_quotes_2 : ';'|'<'|'='|'>'|'?'|'@'|'['|']'|'^'|'_'|'{'|'|'|'}'|'~' string_literal : @markup '"' [string_literal_items] @markup '"' | @markup '' [string_literal_items] @markup '`'
string_literal_items : string_literal_item [string_literal_items]
string_literal_item : ascii_no_backslash_no_quotes | ''' | '\' ascii
symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'_'
/* What follows is a range of annotated grammars that can be used to generate
- tests of increasing complexity and completeness to check against MetaDesk.
- To run them, uncomment them one by one and run the build/grammar test. */
//// Arbitrarily deep tree, possibly empty // file : [@child set_list] // set_list : '{' [@child set_list] '}' [separator @sibling set_list]
//// Labeled leaves // file : [@child set_list] // set_list : set [separator @sibling set_list] // set : @fill 'A' | '{' [@child set_list] '}'
//// Labeled internal nodes // file : [@child set_list] // set_list : set [separator @sibling set_list] // set : @fill 'A' | [@fill 'A' ':'] '{' [@child set_list] '}'
//// Unscoped sets // file : [@child set_list] // set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] // scoped_set : '{' [@child set_list] '}' // unscoped_set : @fill 'A' [unscoped_set_tail] // unscoped_set_tail : ':' @child unscoped_set | ' ' @sibling unscoped_set | ':' scoped_set |' ' @sibling scoped_set
//// Tags // file : [@child set_list] // set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] // scoped_set : {[tag_list] untagged_scoped_set} // untagged_scoped_set : '{' [@child set_list] '}' // unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} // tag_list : '@' @tag tag ' ' [tag_list] // tag : @fill 'T'['(' [@child set_list] ')'] // unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
//// Alternative scope markers // file : [@child set_list] // set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] // scoped_set : {[tag_list] untagged_scoped_set} // untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end // unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} // tag_list : '@' @tag tag ' ' [tag_list] // tag : @fill 'T'['(' [@child set_list] ')'] // unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
//// General tags and labels file : [@child set_list] set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] scoped_set : {[tag_list] untagged_scoped_set} untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end unscoped_set : {[tag_list] @fill label [unscoped_set_tail]} tag_list : '@' @tag tag ' ' [tag_list] tag : @fill id['(' [@child set_list] ')'] unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}
/* Comments
- Comments around nodes are accessible to the user. Here's how they behave:
-
- The text inside a comment immediatly following a node is stored as the
- comment_after member of that node. No newlines can happen between a
- node and its after_comment.
-
- The text inside a comment preceding a node is stored as the
- comment_before of that node unless it is already the comment_after
- of another node. One newline between the comment and the node is
- obviously necessary in the case of C++-style comments and it's also
- allowed for C-style comments.
-
- If the first character inside a C++-style comment is a space, it's
- omitted from the stored string
- The semantically annotated Backus-Naur form that we're using is not a
- good fit to describe the grammar of comments.
- To prevent the comment in "a /* comment */ b" from being interpreted as
- a comment_before of "b", instead of what it is (a comment_after of "a"),
- we would have to complicate the grammar by introducing several extra
- productions with this specific purpose in mind.
- The sensitivity of whitespace in the attachment of comments to nodes is
- also cumbersome to express in BNF. */