Files
metadesk/tests/grammar.md
T
2021-03-14 12:12:30 +01:00

6.5 KiB

/* MetaDesk grammar with semantic annotations *

  • Each line represents a BNF-esque production:
  •  symbol : rule_1 | ... | rule_n
    
    • Pipe signs indicate mutually exclusive alternatives
    • Square quotes denote optional rules
    • Character literals are terminal productions
    • Tags indicate which way the productions attach to the generated tree (@child, @sibling, @tag)
  • and miscellaneous semantics (@fill, @markup) */

//// A bunch of common definitions separator : ' ' | unscoped_separator unscoped_separator : '\n'| ',' | ';' alt_scope_beg : '(' | '[' alt_scope_end : ')' | ']' id : alpha [alphanumeric] | '' [alphanumeric] alphanumeric : alpha [alphanumeric] | digit [alphanumeric] | '' [alphanumeric] alpha : lowercase | uppercase lowercase : 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'z'|'y'|'z' uppercase : 'A'|'B'|'C'|'D'|'E'|'F'|'G'|'H'|'I'|'J'|'K'|'L'|'M'|'N'|'O'|'P'|'Q'|'R'|'S'|'T'|'U'|'V'|'W'|'Z'|'Y'|'Z' digit : '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' label : id | integer_literal | char_literal | string_literal | symbol_label integer_literal : { ['-'] natural_literal } natural_literal : digit [natural_literal] char_literal : @markup ''' [char_literal_items] @markup ''' char_literal_items : char_literal_item [char_literal_items] char_literal_item : ascii_no_backslash_no_quotes | '"' | '\' ascii ascii : ascii_no_backslash_no_quotes | ''' | '"' | '' | '\\' ascii_no_backslash_no_quotes : digit | alpha | symbol_no_backslash_no_quotes | ' ' symbol_no_backslash_no_quotes : symbol_no_backslash_no_quotes_1 | symbol_no_backslash_no_quotes_2 symbol_no_backslash_no_quotes_1 : '!'|'#'|'$'|'%'|'&'|'('|')'|'*'|'+'|','|'-'|'.'|'/'|':' symbol_no_backslash_no_quotes_2 : ';'|'<'|'='|'>'|'?'|'@'|'['|']'|'^'|'_'|'{'|'|'|'}'|'~' string_literal : @markup '"' [string_literal_items] @markup '"' | @markup '' [string_literal_items] @markup '`' string_literal_items : string_literal_item [string_literal_items] string_literal_item : ascii_no_backslash_no_quotes | ''' | '\' ascii symbol_label : '~'|'!'|'%'|'^'|'&'|'*'|'+'|'-'|'/'|'|'|'<'|'>'|'$'|'='|'.'|'?'|'_'

/* What follows is a range of annotated grammars that can be used to generate

  • tests of increasing complexity and completeness to check against MetaDesk.
  • To run them, uncomment them one by one and run the build/grammar test. */

//// Arbitrarily deep tree, possibly empty // file : [@child set_list] // set_list : '{' [@child set_list] '}' [separator @sibling set_list]

//// Labeled leaves // file : [@child set_list] // set_list : set [separator @sibling set_list] // set : @fill 'A' | '{' [@child set_list] '}'

//// Labeled internal nodes // file : [@child set_list] // set_list : set [separator @sibling set_list] // set : @fill 'A' | [@fill 'A' ':'] '{' [@child set_list] '}'

//// Unscoped sets // file : [@child set_list] // set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] // scoped_set : '{' [@child set_list] '}' // unscoped_set : @fill 'A' [unscoped_set_tail] // unscoped_set_tail : ':' @child unscoped_set | ' ' @sibling unscoped_set | ':' scoped_set |' ' @sibling scoped_set

//// Tags // file : [@child set_list] // set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] // scoped_set : {[tag_list] untagged_scoped_set} // untagged_scoped_set : '{' [@child set_list] '}' // unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} // tag_list : '@' @tag tag ' ' [tag_list] // tag : @fill 'T'['(' [@child set_list] ')'] // unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}

//// Alternative scope markers // file : [@child set_list] // set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] // scoped_set : {[tag_list] untagged_scoped_set} // untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end // unscoped_set : {[tag_list] @fill 'A' [unscoped_set_tail]} // tag_list : '@' @tag tag ' ' [tag_list] // tag : @fill 'T'['(' [@child set_list] ')'] // unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}

//// General tags and labels file : [@child set_list] set_list : scoped_set [separator @sibling set_list] | unscoped_set [unscoped_separator @sibling set_list] scoped_set : {[tag_list] untagged_scoped_set} untagged_scoped_set : '{' [@child set_list] '}' | alt_scope_beg [@child set_list] alt_scope_end unscoped_set : {[tag_list] @fill label [unscoped_set_tail]} tag_list : '@' @tag tag ' ' [tag_list] tag : @fill id['(' [@child set_list] ')'] unscoped_set_tail : {':' @child unscoped_set | ' ' @sibling unscoped_set | ':' untagged_scoped_set | ' ' @sibling scoped_set}

/* Comments

  • Comments around nodes are accessible to the user. Here's how they behave:
    • The text inside a comment immediatly following a node is stored as the
  • comment_after member of that node. No newlines can happen between a
  • node and its after_comment.
    • The text inside a comment preceding a node is stored as the
  • comment_before of that node unless it is already the comment_after
  • of another node. One newline between the comment and the node is
  • obviously necessary in the case of C++-style comments and it's also
  • allowed for C-style comments.
    • If the first character inside a C++-style comment is a space, it's
  • omitted from the stored string
  • The semantically annotated Backus-Naur form that we're using is not a
  • good fit to describe the grammar of comments.
  • To prevent the comment in "a /* comment */ b" from being interpreted as
  • a comment_before of "b", instead of what it is (a comment_after of "a"),
  • we would have to complicate the grammar by introducing several extra
  • productions with this specific purpose in mind.
  • The sensitivity of whitespace in the attachment of comments to nodes is
  • also cumbersome to express in BNF. */