Made the initial whitespace parser

This commit is contained in:
Edward R. Gonzalez 2024-03-02 18:19:27 -05:00
parent 035c726a71
commit b0adfbf5f5
11 changed files with 442 additions and 15 deletions

View File

View File

@ -4,6 +4,7 @@ package sectr
import "base:builtin"
copy :: builtin.copy
import "base:intrinsics"
ptr_sub :: intrinsics.ptr_sub
type_has_field :: intrinsics.type_has_field
type_elem_type :: intrinsics.type_elem_type
import "base:runtime"
@ -60,8 +61,9 @@ import "core:time"
import "core:unicode"
is_white_space :: unicode.is_white_space
import "core:unicode/utf8"
str_rune_count :: utf8.rune_count_in_string
runes_to_string :: utf8.runes_to_string
string_to_runes :: utf8.string_to_runes
// string_to_runes :: utf8.string_to_runes
OS_Type :: type_of(ODIN_OS)
@ -84,3 +86,7 @@ to_string :: proc {
runes_to_string,
str_builder_to_string,
}
context_ext :: proc( $ Type : typeid ) -> (^Type) {
return cast(^Type) context.user_ptr
}

View File

@ -15,8 +15,18 @@ Array :: struct ( $ Type : typeid ) {
data : [^]Type,
}
array_to_slice :: proc( using self : Array( $ Type) ) -> []Type {
return slice_ptr( data, num )
array_underlying_slice :: proc(slice: []($ Type)) -> Array(Type) {
if len(slice) == 0 {
return nil
}
array_size := size_of( Array(Type))
raw_data := & slice[0]
array_ptr := cast( ^Array(Type)) ( uintptr(first_element_ptr) - uintptr(array_size))
return array_ptr ^
}
array_to_slice :: proc( using self : Array($ Type) ) -> []Type {
return slice_ptr( data, int(num) )
}
array_grow_formula :: proc( value : u64 ) -> u64 {
@ -29,12 +39,12 @@ array_init :: proc( $ Type : typeid, allocator : Allocator ) -> ( Array(Type), A
array_init_reserve :: proc( $ Type : typeid, allocator : Allocator, capacity : u64 ) -> ( Array(Type), AllocatorError )
{
raw_data, result_code := alloc( int(capacity) * size_of(Type), allocator = allocator )
result : Array( Type);
result.data = cast( [^] Type ) raw_data
raw_data, result_code := alloc( size_of(Array) + int(capacity) * size_of(Type), allocator = allocator )
result := cast(^Array(Type)) raw_data;
result.data = cast( [^]Type ) ptr_offset( result, 1 )
result.allocator = allocator
result.capacity = capacity
return result, result_code
return (result ^), result_code
}
array_append :: proc( using self : ^ Array( $ Type), value : Type ) -> AllocatorError
@ -231,7 +241,7 @@ array_set_capacity :: proc( using self : ^ Array( $ Type ), new_capacity : u64 )
ensure( false, "Failed to allocate for new array capacity" )
return result_code
}
free( raw_data(data) )
free( data )
data = cast( [^] Type ) new_data
capacity = new_capacity
return result_code

21
code/grime_unicode.odin Normal file
View File

@ -0,0 +1,21 @@
package sectr
string_to_runes :: proc( content : string, allocator := context.allocator ) -> ( []rune, AllocatorError )
{
num := cast(u64) str_rune_count(content)
runes_array, alloc_error := array_init_reserve( rune, allocator, num )
if alloc_error != AllocatorError.None {
ensure( false, "Failed to allocate runes array" )
return nil, alloc_error
}
runes := array_to_slice(runes_array)
idx := 0
for codepoint in content {
runes[idx] = codepoint
idx += 1
}
return runes, alloc_error
}

15
code/parser_code.odin Normal file
View File

@ -0,0 +1,15 @@
/* Code Agnostic Parser
This is a 'coding langauge agnostic' parser.
Its not meant to parse regular textual formats used in natural langauges (paragraphs, sentences, etc).
It instead is meant to encode constructs significant to most programming languages.
AST Types:
* Word
* Operator
* BracketsScope
This parser supports parsing whitepsace asts or raw text content.
*/
package sectr

View File

@ -0,0 +1,14 @@
/* Parser : Code Formatting
This is a prototype parser meant to parse whitespace formatting constructs used in text based languages.
These include indentation of a block, spacial alignment of similar statement components, etc.
This would be used to have awareness of constructs having associating with each other via formatting.
AST Types:
* Statement
* Block-Indent Group
* Aligned-Statements
*/
package sectr

358
code/parser_whitespace.odin Normal file
View File

@ -0,0 +1,358 @@
/* Parser: Whitespace
This is a prototype parser meant to only parse whitespace from visible blocks of code.
Its meant to be the most minimal useful AST for boostrapping an AST Editor.
All symbols related directly to the parser are prefixed with the WS_ namespace.
The AST is composed of the following node types:
* Visible
* Spaces
* Tabs
* Line
AST_Visible tracks a slice of visible codepoints.
It tracks a neighboring ASTs (left or right) which should always be Spaces, or Tabs.
AST_Spaces tracks a slice of singluar or consecutive Spaces.
Neighboring ASTS should either be Visible, Tabs.
AST_Tabs tracks a slice of singlar or consectuive Tabs.
Neighboring ASTS should be either Visible or Spaces.
AST_Line tracks a slice of AST nodes of Visible, Spaces, or Tabs that terminate with a New-Line token.
Neighboring ASTS are only Lines.
The ParseData struct will contain an Array of AST_Line. This represents the entire AST where the root is the first entry.
ASTs keep track of neighboring ASTs in double-linked list pattern for ease of use.
This may be removed in the future for perforamance reasons,
since this is a prototype it will only be removed if there is a performance issue.
Because this parser is so primtive, it can only be
manually constructed via an AST editor or from parsed text.
So there is only a parser directly dealing with text.
If its constructed from an AST-Editor. There will not be a content string referencable or runes derived fromt hat content string.
Instead the AST's content will directly contain the runes associated.
*/
package sectr
import "core:os"
Rune_Space :: ' '
Rune_Tab :: '\t'
Rune_Carriage_Return :: 'r'
Rune_New_Line :: '\n'
// Rune_Tab_Vertical :: '\v'
WS_TokenType :: enum u32 {
Invalid,
Visible,
Space,
Tab,
New_Line,
Count,
}
// TODO(Ed) : The runes and token arrays should be handled by a slab allocator dedicated to ASTs
// This can grow in undeterministic ways, persistent will get very polluted otherwise.
WS_LexResult :: struct {
allocator : Allocator,
content : string,
runes : []rune,
tokens : Array(WS_Token),
}
WS_Token :: struct {
type : WS_TokenType,
line, column : u32,
ptr : ^rune,
}
WS_AST_Content :: union #no_nil {
[] WS_Token,
[] rune,
}
WS_AST_Spaces :: struct {
content : WS_AST_Content,
using links : DLL_NodePN(WS_AST),
}
WS_AST_Tabs :: struct {
content : WS_AST_Content,
using links : DLL_NodePN(WS_AST),
}
WS_AST_Visible :: struct {
content : WS_AST_Content,
using links : DLL_NodePN(WS_AST),
}
WS_AST_Line :: struct {
using content : DLL_NodeFL(WS_AST),
end_token : ^ WS_Token,
using links : DLL_NodePN(WS_AST),
}
WS_AST :: union #no_nil {
WS_AST_Visible,
WS_AST_Spaces,
WS_AST_Tabs,
WS_AST_Line,
}
WS_ParseError :: struct {
token : ^WS_Token,
msg : string,
}
WS_ParseError_Max :: 32
WS_NodeArray_ReserveSize :: Kilobyte * 4
WS_LineArray_RserveSize :: Kilobyte
// TODO(Ed) : The ast arrays should be handled by a slab allocator dedicated to ASTs
// This can grow in undeterministic ways, persistent will get very polluted otherwise.
WS_ParseResult :: struct {
content : string,
runes : []rune,
tokens : Array(WS_Token),
nodes : Array(WS_AST),
lines : Array( ^WS_AST_Line),
errors : [WS_ParseError_Max] WS_ParseError,
}
// @(private="file")
// AST :: WS_AST
ws_parser_lex :: proc ( content : string, allocator : Allocator ) -> ( WS_LexResult, AllocatorError )
{
LexerData :: struct {
using result : WS_LexResult,
head : [^] rune,
left : i32,
line : u32,
column : u32,
}
using lexer : LexerData
context.user_ptr = & lexer
rune_type :: proc() -> WS_TokenType
{
using self := context_ext( LexerData)
switch (head[0])
{
case Rune_Space:
return WS_TokenType.Space
case Rune_Tab:
return WS_TokenType.Tab
case Rune_New_Line:
return WS_TokenType.New_Line
// Support for CRLF format
case Rune_Carriage_Return:
{
previous := cast( ^ rune) (uintptr(head) - 1)
if (previous ^) == Rune_New_Line {
return WS_TokenType.New_Line
}
}
}
// Everything that isn't the supported whitespace code points is considered 'visible'
// Eventually we should support other types of whitespace
return WS_TokenType.Visible
}
advance :: proc() -> WS_TokenType {
using self := context_ext( LexerData)
head = head[1:]
left -= 1
column += 1
type := rune_type()
line += u32(type == WS_TokenType.New_Line)
return type
}
alloc_error : AllocatorError
runes, alloc_error = to_runes( content, allocator )
if alloc_error != AllocatorError.None {
return result, alloc_error
}
left = cast(i32) len(runes)
head = & runes[0]
tokens, alloc_error = array_init_reserve( WS_Token, allocator, u64(left / 2) )
if alloc_error != AllocatorError.None {
ensure(false, "Failed to allocate token's array")
return result, alloc_error
}
line = 0
column = 0
for ; left > 0;
{
current : WS_Token
current.type = rune_type()
current.line = line
current.column = column
for ; advance() == current.type; {
}
alloc_error = array_append( & tokens, current )
if alloc_error != AllocatorError.None {
ensure(false, "Failed to append token to token array")
return lexer, alloc_error
}
}
return result, alloc_error
}
ws_parser_parse :: proc( content : string, allocator : Allocator ) -> ( WS_ParseResult, AllocatorError )
{
ParseData :: struct {
using result : WS_ParseResult,
left : u32,
head : [^]WS_Token,
line : WS_AST_Line,
}
using parser : ParseData
context.user_ptr = & result
//region Helper procs
peek_next :: proc() -> ( ^WS_Token)
{
using self := context_ext( ParseData)
if left - 1 == 0 {
return nil
}
return head[ 1: ]
}
check_next :: proc( expected : WS_TokenType ) -> b32 {
using self := context_ext( ParseData)
next := peek_next()
return next != nil && next.type == expected
}
advance :: proc( expected : WS_TokenType ) -> (^WS_Token)
{
using self := context_ext( ParseData)
next := peek_next()
if next == nil {
return nil
}
if next.type != expected {
ensure( false, "Didn't get expected token type from next in lexed" )
return nil
}
head = next
return head
}
//endregion Helper procs
lex, alloc_error := ws_parser_lex( content, allocator )
if alloc_error != AllocatorError.None {
}
runes = lex.runes
tokens = lex.tokens
nodes, alloc_error = array_init_reserve( WS_AST, allocator, WS_NodeArray_ReserveSize )
if alloc_error != AllocatorError.None {
}
lines, alloc_error = array_init_reserve( ^WS_AST_Line, allocator, WS_LineArray_RserveSize )
if alloc_error != AllocatorError.None {
}
head = & tokens.data[0]
// Parse Line
for ; left > 0;
{
parse_content :: proc( $ Type : typeid, tok_type : WS_TokenType ) -> Type
{
using self := context_ext( ParseData)
ast : Type
start := head
end : [^]WS_Token
for ; check_next( WS_TokenType.Visible ); {
end = advance( tok_type )
}
ast.content = slice_ptr( start, ptr_sub( end, start ))
return ast
}
add_node :: proc( ast : WS_AST ) //-> ( should_return : b32 )
{
using self := context_ext( ParseData)
// TODO(Ed) : Harden this
array_append( & nodes, ast )
if line.first == nil {
line.first = array_back( & nodes )
}
else
{
line.last = array_back( & nodes)
}
}
// TODO(Ed) : Harden this
#partial switch head[0].type
{
case WS_TokenType.Visible:
{
ast := parse_content( WS_AST_Visible, WS_TokenType.Visible )
add_node( ast )
}
case WS_TokenType.Space:
{
ast := parse_content( WS_AST_Visible, WS_TokenType.Space )
add_node( ast )
}
case WS_TokenType.Tab:
{
ast := parse_content( WS_AST_Tabs, WS_TokenType.Tab )
add_node( ast )
}
case WS_TokenType.New_Line:
{
line.end_token = head
ast : WS_AST
ast = line
// TODO(Ed) : Harden This
array_append( & nodes, ast )
array_append( & lines, & array_back( & nodes).(WS_AST_Line) )
line = {}
}
}
}
return result, alloc_error
}

View File

@ -11,7 +11,8 @@ debug_draw_text :: proc( content : string, pos : Vec2, size : f32, color : rl.Co
if len( content ) == 0 {
return
}
runes := to_runes( content, context.temp_allocator )
runes, alloc_error := to_runes( content, context.temp_allocator )
verify( alloc_error != AllocatorError.None, "Failed to temp allocate runes" )
font := font
if font.key == Font_Default.key {
@ -38,7 +39,8 @@ debug_draw_text_world :: proc( content : string, pos : Vec2, size : f32, color :
if len( content ) == 0 {
return
}
runes := to_runes( content, context.temp_allocator )
runes, alloc_error := to_runes( content, context.temp_allocator )
verify( alloc_error != AllocatorError.None, "Failed to temp allocate runes" )
font := font
if font.key == Font_Default.key {

View File

@ -192,11 +192,12 @@ push-location $path_root
return
}
$dependencies_built = $sectr_build_code -gt $module_build_failed
if ( -not $dependencies_built ) {
write-host 'Skipping sectr_host build, dependencies failed to build'
return
}
# TODO(Ed): FIX THIS
# $dependencies_built = $sectr_build_code -eq $module_build_failed
# if ( -not $dependencies_built ) {
# write-host 'Skipping sectr_host build, dependencies failed to build'
# return
# }
$should_build = (check-ModuleForChanges $module_host) || ( $sectr_build_code == $module_built )
if ( -not( $should_build)) {