Made the initial whitespace parser
This commit is contained in:
parent
035c726a71
commit
b0adfbf5f5
@ -4,6 +4,7 @@ package sectr
|
||||
import "base:builtin"
|
||||
copy :: builtin.copy
|
||||
import "base:intrinsics"
|
||||
ptr_sub :: intrinsics.ptr_sub
|
||||
type_has_field :: intrinsics.type_has_field
|
||||
type_elem_type :: intrinsics.type_elem_type
|
||||
import "base:runtime"
|
||||
@ -60,8 +61,9 @@ import "core:time"
|
||||
import "core:unicode"
|
||||
is_white_space :: unicode.is_white_space
|
||||
import "core:unicode/utf8"
|
||||
str_rune_count :: utf8.rune_count_in_string
|
||||
runes_to_string :: utf8.runes_to_string
|
||||
string_to_runes :: utf8.string_to_runes
|
||||
// string_to_runes :: utf8.string_to_runes
|
||||
|
||||
OS_Type :: type_of(ODIN_OS)
|
||||
|
||||
@ -84,3 +86,7 @@ to_string :: proc {
|
||||
runes_to_string,
|
||||
str_builder_to_string,
|
||||
}
|
||||
|
||||
context_ext :: proc( $ Type : typeid ) -> (^Type) {
|
||||
return cast(^Type) context.user_ptr
|
||||
}
|
||||
|
@ -15,8 +15,18 @@ Array :: struct ( $ Type : typeid ) {
|
||||
data : [^]Type,
|
||||
}
|
||||
|
||||
array_to_slice :: proc( using self : Array( $ Type) ) -> []Type {
|
||||
return slice_ptr( data, num )
|
||||
array_underlying_slice :: proc(slice: []($ Type)) -> Array(Type) {
|
||||
if len(slice) == 0 {
|
||||
return nil
|
||||
}
|
||||
array_size := size_of( Array(Type))
|
||||
raw_data := & slice[0]
|
||||
array_ptr := cast( ^Array(Type)) ( uintptr(first_element_ptr) - uintptr(array_size))
|
||||
return array_ptr ^
|
||||
}
|
||||
|
||||
array_to_slice :: proc( using self : Array($ Type) ) -> []Type {
|
||||
return slice_ptr( data, int(num) )
|
||||
}
|
||||
|
||||
array_grow_formula :: proc( value : u64 ) -> u64 {
|
||||
@ -29,12 +39,12 @@ array_init :: proc( $ Type : typeid, allocator : Allocator ) -> ( Array(Type), A
|
||||
|
||||
array_init_reserve :: proc( $ Type : typeid, allocator : Allocator, capacity : u64 ) -> ( Array(Type), AllocatorError )
|
||||
{
|
||||
raw_data, result_code := alloc( int(capacity) * size_of(Type), allocator = allocator )
|
||||
result : Array( Type);
|
||||
result.data = cast( [^] Type ) raw_data
|
||||
raw_data, result_code := alloc( size_of(Array) + int(capacity) * size_of(Type), allocator = allocator )
|
||||
result := cast(^Array(Type)) raw_data;
|
||||
result.data = cast( [^]Type ) ptr_offset( result, 1 )
|
||||
result.allocator = allocator
|
||||
result.capacity = capacity
|
||||
return result, result_code
|
||||
return (result ^), result_code
|
||||
}
|
||||
|
||||
array_append :: proc( using self : ^ Array( $ Type), value : Type ) -> AllocatorError
|
||||
@ -231,7 +241,7 @@ array_set_capacity :: proc( using self : ^ Array( $ Type ), new_capacity : u64 )
|
||||
ensure( false, "Failed to allocate for new array capacity" )
|
||||
return result_code
|
||||
}
|
||||
free( raw_data(data) )
|
||||
free( data )
|
||||
data = cast( [^] Type ) new_data
|
||||
capacity = new_capacity
|
||||
return result_code
|
||||
|
21
code/grime_unicode.odin
Normal file
21
code/grime_unicode.odin
Normal file
@ -0,0 +1,21 @@
|
||||
package sectr
|
||||
|
||||
string_to_runes :: proc( content : string, allocator := context.allocator ) -> ( []rune, AllocatorError )
|
||||
{
|
||||
num := cast(u64) str_rune_count(content)
|
||||
|
||||
runes_array, alloc_error := array_init_reserve( rune, allocator, num )
|
||||
if alloc_error != AllocatorError.None {
|
||||
ensure( false, "Failed to allocate runes array" )
|
||||
return nil, alloc_error
|
||||
}
|
||||
|
||||
runes := array_to_slice(runes_array)
|
||||
|
||||
idx := 0
|
||||
for codepoint in content {
|
||||
runes[idx] = codepoint
|
||||
idx += 1
|
||||
}
|
||||
return runes, alloc_error
|
||||
}
|
15
code/parser_code.odin
Normal file
15
code/parser_code.odin
Normal file
@ -0,0 +1,15 @@
|
||||
/* Code Agnostic Parser
|
||||
This is a 'coding langauge agnostic' parser.
|
||||
Its not meant to parse regular textual formats used in natural langauges (paragraphs, sentences, etc).
|
||||
It instead is meant to encode constructs significant to most programming languages.
|
||||
|
||||
AST Types:
|
||||
* Word
|
||||
* Operator
|
||||
* BracketsScope
|
||||
|
||||
This parser supports parsing whitepsace asts or raw text content.
|
||||
*/
|
||||
package sectr
|
||||
|
||||
|
14
code/parser_code_formatting.odin
Normal file
14
code/parser_code_formatting.odin
Normal file
@ -0,0 +1,14 @@
|
||||
/* Parser : Code Formatting
|
||||
This is a prototype parser meant to parse whitespace formatting constructs used in text based languages.
|
||||
These include indentation of a block, spacial alignment of similar statement components, etc.
|
||||
|
||||
This would be used to have awareness of constructs having associating with each other via formatting.
|
||||
|
||||
AST Types:
|
||||
|
||||
* Statement
|
||||
* Block-Indent Group
|
||||
* Aligned-Statements
|
||||
|
||||
*/
|
||||
package sectr
|
358
code/parser_whitespace.odin
Normal file
358
code/parser_whitespace.odin
Normal file
@ -0,0 +1,358 @@
|
||||
/* Parser: Whitespace
|
||||
This is a prototype parser meant to only parse whitespace from visible blocks of code.
|
||||
Its meant to be the most minimal useful AST for boostrapping an AST Editor.
|
||||
|
||||
All symbols related directly to the parser are prefixed with the WS_ namespace.
|
||||
|
||||
The AST is composed of the following node types:
|
||||
* Visible
|
||||
* Spaces
|
||||
* Tabs
|
||||
* Line
|
||||
|
||||
AST_Visible tracks a slice of visible codepoints.
|
||||
It tracks a neighboring ASTs (left or right) which should always be Spaces, or Tabs.
|
||||
|
||||
AST_Spaces tracks a slice of singluar or consecutive Spaces.
|
||||
Neighboring ASTS should either be Visible, Tabs.
|
||||
|
||||
AST_Tabs tracks a slice of singlar or consectuive Tabs.
|
||||
Neighboring ASTS should be either Visible or Spaces.
|
||||
|
||||
AST_Line tracks a slice of AST nodes of Visible, Spaces, or Tabs that terminate with a New-Line token.
|
||||
Neighboring ASTS are only Lines.
|
||||
|
||||
The ParseData struct will contain an Array of AST_Line. This represents the entire AST where the root is the first entry.
|
||||
ASTs keep track of neighboring ASTs in double-linked list pattern for ease of use.
|
||||
This may be removed in the future for perforamance reasons,
|
||||
since this is a prototype it will only be removed if there is a performance issue.
|
||||
|
||||
Because this parser is so primtive, it can only be
|
||||
manually constructed via an AST editor or from parsed text.
|
||||
So there is only a parser directly dealing with text.
|
||||
|
||||
If its constructed from an AST-Editor. There will not be a content string referencable or runes derived fromt hat content string.
|
||||
Instead the AST's content will directly contain the runes associated.
|
||||
*/
|
||||
package sectr
|
||||
|
||||
import "core:os"
|
||||
|
||||
Rune_Space :: ' '
|
||||
Rune_Tab :: '\t'
|
||||
Rune_Carriage_Return :: 'r'
|
||||
Rune_New_Line :: '\n'
|
||||
// Rune_Tab_Vertical :: '\v'
|
||||
|
||||
WS_TokenType :: enum u32 {
|
||||
Invalid,
|
||||
Visible,
|
||||
Space,
|
||||
Tab,
|
||||
New_Line,
|
||||
Count,
|
||||
}
|
||||
|
||||
// TODO(Ed) : The runes and token arrays should be handled by a slab allocator dedicated to ASTs
|
||||
// This can grow in undeterministic ways, persistent will get very polluted otherwise.
|
||||
WS_LexResult :: struct {
|
||||
allocator : Allocator,
|
||||
content : string,
|
||||
runes : []rune,
|
||||
tokens : Array(WS_Token),
|
||||
}
|
||||
|
||||
WS_Token :: struct {
|
||||
type : WS_TokenType,
|
||||
line, column : u32,
|
||||
ptr : ^rune,
|
||||
}
|
||||
|
||||
WS_AST_Content :: union #no_nil {
|
||||
[] WS_Token,
|
||||
[] rune,
|
||||
}
|
||||
|
||||
WS_AST_Spaces :: struct {
|
||||
content : WS_AST_Content,
|
||||
|
||||
using links : DLL_NodePN(WS_AST),
|
||||
}
|
||||
|
||||
WS_AST_Tabs :: struct {
|
||||
content : WS_AST_Content,
|
||||
|
||||
using links : DLL_NodePN(WS_AST),
|
||||
}
|
||||
|
||||
WS_AST_Visible :: struct {
|
||||
content : WS_AST_Content,
|
||||
|
||||
using links : DLL_NodePN(WS_AST),
|
||||
}
|
||||
|
||||
WS_AST_Line :: struct {
|
||||
using content : DLL_NodeFL(WS_AST),
|
||||
end_token : ^ WS_Token,
|
||||
|
||||
using links : DLL_NodePN(WS_AST),
|
||||
}
|
||||
|
||||
WS_AST :: union #no_nil {
|
||||
WS_AST_Visible,
|
||||
WS_AST_Spaces,
|
||||
WS_AST_Tabs,
|
||||
WS_AST_Line,
|
||||
}
|
||||
|
||||
WS_ParseError :: struct {
|
||||
token : ^WS_Token,
|
||||
msg : string,
|
||||
}
|
||||
|
||||
WS_ParseError_Max :: 32
|
||||
WS_NodeArray_ReserveSize :: Kilobyte * 4
|
||||
WS_LineArray_RserveSize :: Kilobyte
|
||||
|
||||
// TODO(Ed) : The ast arrays should be handled by a slab allocator dedicated to ASTs
|
||||
// This can grow in undeterministic ways, persistent will get very polluted otherwise.
|
||||
WS_ParseResult :: struct {
|
||||
content : string,
|
||||
runes : []rune,
|
||||
tokens : Array(WS_Token),
|
||||
nodes : Array(WS_AST),
|
||||
lines : Array( ^WS_AST_Line),
|
||||
errors : [WS_ParseError_Max] WS_ParseError,
|
||||
}
|
||||
|
||||
// @(private="file")
|
||||
// AST :: WS_AST
|
||||
|
||||
ws_parser_lex :: proc ( content : string, allocator : Allocator ) -> ( WS_LexResult, AllocatorError )
|
||||
{
|
||||
LexerData :: struct {
|
||||
using result : WS_LexResult,
|
||||
|
||||
head : [^] rune,
|
||||
left : i32,
|
||||
line : u32,
|
||||
column : u32,
|
||||
}
|
||||
using lexer : LexerData
|
||||
context.user_ptr = & lexer
|
||||
|
||||
rune_type :: proc() -> WS_TokenType
|
||||
{
|
||||
using self := context_ext( LexerData)
|
||||
|
||||
switch (head[0])
|
||||
{
|
||||
case Rune_Space:
|
||||
return WS_TokenType.Space
|
||||
|
||||
case Rune_Tab:
|
||||
return WS_TokenType.Tab
|
||||
|
||||
case Rune_New_Line:
|
||||
return WS_TokenType.New_Line
|
||||
|
||||
// Support for CRLF format
|
||||
case Rune_Carriage_Return:
|
||||
{
|
||||
previous := cast( ^ rune) (uintptr(head) - 1)
|
||||
if (previous ^) == Rune_New_Line {
|
||||
return WS_TokenType.New_Line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Everything that isn't the supported whitespace code points is considered 'visible'
|
||||
// Eventually we should support other types of whitespace
|
||||
return WS_TokenType.Visible
|
||||
}
|
||||
|
||||
advance :: proc() -> WS_TokenType {
|
||||
using self := context_ext( LexerData)
|
||||
|
||||
head = head[1:]
|
||||
left -= 1
|
||||
column += 1
|
||||
type := rune_type()
|
||||
line += u32(type == WS_TokenType.New_Line)
|
||||
return type
|
||||
}
|
||||
|
||||
alloc_error : AllocatorError
|
||||
runes, alloc_error = to_runes( content, allocator )
|
||||
if alloc_error != AllocatorError.None {
|
||||
return result, alloc_error
|
||||
}
|
||||
|
||||
left = cast(i32) len(runes)
|
||||
head = & runes[0]
|
||||
|
||||
tokens, alloc_error = array_init_reserve( WS_Token, allocator, u64(left / 2) )
|
||||
if alloc_error != AllocatorError.None {
|
||||
ensure(false, "Failed to allocate token's array")
|
||||
return result, alloc_error
|
||||
}
|
||||
|
||||
line = 0
|
||||
column = 0
|
||||
|
||||
for ; left > 0;
|
||||
{
|
||||
current : WS_Token
|
||||
current.type = rune_type()
|
||||
current.line = line
|
||||
current.column = column
|
||||
|
||||
for ; advance() == current.type; {
|
||||
}
|
||||
|
||||
alloc_error = array_append( & tokens, current )
|
||||
if alloc_error != AllocatorError.None {
|
||||
ensure(false, "Failed to append token to token array")
|
||||
return lexer, alloc_error
|
||||
}
|
||||
}
|
||||
|
||||
return result, alloc_error
|
||||
}
|
||||
|
||||
ws_parser_parse :: proc( content : string, allocator : Allocator ) -> ( WS_ParseResult, AllocatorError )
|
||||
{
|
||||
ParseData :: struct {
|
||||
using result : WS_ParseResult,
|
||||
|
||||
left : u32,
|
||||
head : [^]WS_Token,
|
||||
line : WS_AST_Line,
|
||||
}
|
||||
|
||||
using parser : ParseData
|
||||
context.user_ptr = & result
|
||||
|
||||
//region Helper procs
|
||||
peek_next :: proc() -> ( ^WS_Token)
|
||||
{
|
||||
using self := context_ext( ParseData)
|
||||
if left - 1 == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return head[ 1: ]
|
||||
}
|
||||
|
||||
check_next :: proc( expected : WS_TokenType ) -> b32 {
|
||||
using self := context_ext( ParseData)
|
||||
|
||||
next := peek_next()
|
||||
return next != nil && next.type == expected
|
||||
}
|
||||
|
||||
advance :: proc( expected : WS_TokenType ) -> (^WS_Token)
|
||||
{
|
||||
using self := context_ext( ParseData)
|
||||
next := peek_next()
|
||||
if next == nil {
|
||||
return nil
|
||||
}
|
||||
if next.type != expected {
|
||||
ensure( false, "Didn't get expected token type from next in lexed" )
|
||||
return nil
|
||||
}
|
||||
head = next
|
||||
return head
|
||||
}
|
||||
//endregion Helper procs
|
||||
|
||||
lex, alloc_error := ws_parser_lex( content, allocator )
|
||||
if alloc_error != AllocatorError.None {
|
||||
|
||||
}
|
||||
|
||||
runes = lex.runes
|
||||
tokens = lex.tokens
|
||||
|
||||
nodes, alloc_error = array_init_reserve( WS_AST, allocator, WS_NodeArray_ReserveSize )
|
||||
if alloc_error != AllocatorError.None {
|
||||
|
||||
}
|
||||
|
||||
lines, alloc_error = array_init_reserve( ^WS_AST_Line, allocator, WS_LineArray_RserveSize )
|
||||
if alloc_error != AllocatorError.None {
|
||||
|
||||
}
|
||||
|
||||
head = & tokens.data[0]
|
||||
|
||||
// Parse Line
|
||||
for ; left > 0;
|
||||
{
|
||||
parse_content :: proc( $ Type : typeid, tok_type : WS_TokenType ) -> Type
|
||||
{
|
||||
using self := context_ext( ParseData)
|
||||
|
||||
ast : Type
|
||||
start := head
|
||||
end : [^]WS_Token
|
||||
|
||||
for ; check_next( WS_TokenType.Visible ); {
|
||||
end = advance( tok_type )
|
||||
}
|
||||
ast.content = slice_ptr( start, ptr_sub( end, start ))
|
||||
return ast
|
||||
}
|
||||
|
||||
add_node :: proc( ast : WS_AST ) //-> ( should_return : b32 )
|
||||
{
|
||||
using self := context_ext( ParseData)
|
||||
|
||||
// TODO(Ed) : Harden this
|
||||
array_append( & nodes, ast )
|
||||
|
||||
if line.first == nil {
|
||||
line.first = array_back( & nodes )
|
||||
}
|
||||
else
|
||||
{
|
||||
line.last = array_back( & nodes)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(Ed) : Harden this
|
||||
#partial switch head[0].type
|
||||
{
|
||||
case WS_TokenType.Visible:
|
||||
{
|
||||
ast := parse_content( WS_AST_Visible, WS_TokenType.Visible )
|
||||
add_node( ast )
|
||||
}
|
||||
case WS_TokenType.Space:
|
||||
{
|
||||
ast := parse_content( WS_AST_Visible, WS_TokenType.Space )
|
||||
add_node( ast )
|
||||
}
|
||||
case WS_TokenType.Tab:
|
||||
{
|
||||
ast := parse_content( WS_AST_Tabs, WS_TokenType.Tab )
|
||||
add_node( ast )
|
||||
}
|
||||
case WS_TokenType.New_Line:
|
||||
{
|
||||
line.end_token = head
|
||||
|
||||
ast : WS_AST
|
||||
ast = line
|
||||
|
||||
// TODO(Ed) : Harden This
|
||||
array_append( & nodes, ast )
|
||||
array_append( & lines, & array_back( & nodes).(WS_AST_Line) )
|
||||
line = {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, alloc_error
|
||||
}
|
@ -11,7 +11,8 @@ debug_draw_text :: proc( content : string, pos : Vec2, size : f32, color : rl.Co
|
||||
if len( content ) == 0 {
|
||||
return
|
||||
}
|
||||
runes := to_runes( content, context.temp_allocator )
|
||||
runes, alloc_error := to_runes( content, context.temp_allocator )
|
||||
verify( alloc_error != AllocatorError.None, "Failed to temp allocate runes" )
|
||||
|
||||
font := font
|
||||
if font.key == Font_Default.key {
|
||||
@ -38,7 +39,8 @@ debug_draw_text_world :: proc( content : string, pos : Vec2, size : f32, color :
|
||||
if len( content ) == 0 {
|
||||
return
|
||||
}
|
||||
runes := to_runes( content, context.temp_allocator )
|
||||
runes, alloc_error := to_runes( content, context.temp_allocator )
|
||||
verify( alloc_error != AllocatorError.None, "Failed to temp allocate runes" )
|
||||
|
||||
font := font
|
||||
if font.key == Font_Default.key {
|
||||
|
@ -192,11 +192,12 @@ push-location $path_root
|
||||
return
|
||||
}
|
||||
|
||||
$dependencies_built = $sectr_build_code -gt $module_build_failed
|
||||
if ( -not $dependencies_built ) {
|
||||
write-host 'Skipping sectr_host build, dependencies failed to build'
|
||||
return
|
||||
}
|
||||
# TODO(Ed): FIX THIS
|
||||
# $dependencies_built = $sectr_build_code -eq $module_build_failed
|
||||
# if ( -not $dependencies_built ) {
|
||||
# write-host 'Skipping sectr_host build, dependencies failed to build'
|
||||
# return
|
||||
# }
|
||||
|
||||
$should_build = (check-ModuleForChanges $module_host) || ( $sectr_build_code == $module_built )
|
||||
if ( -not( $should_build)) {
|
||||
|
Loading…
Reference in New Issue
Block a user