Made the initial whitespace parser
This commit is contained in:
		| @@ -4,6 +4,7 @@ package sectr | ||||
| import "base:builtin" | ||||
| 	copy :: builtin.copy | ||||
| import "base:intrinsics" | ||||
| 	ptr_sub        :: intrinsics.ptr_sub | ||||
| 	type_has_field :: intrinsics.type_has_field | ||||
| 	type_elem_type :: intrinsics.type_elem_type | ||||
| import "base:runtime" | ||||
| @@ -60,8 +61,9 @@ import "core:time" | ||||
| import "core:unicode" | ||||
| 	is_white_space  :: unicode.is_white_space | ||||
| import "core:unicode/utf8" | ||||
| 	str_rune_count  :: utf8.rune_count_in_string | ||||
| 	runes_to_string :: utf8.runes_to_string | ||||
| 	string_to_runes :: utf8.string_to_runes | ||||
| 	// string_to_runes :: utf8.string_to_runes | ||||
|  | ||||
| OS_Type :: type_of(ODIN_OS) | ||||
|  | ||||
| @@ -84,3 +86,7 @@ to_string :: proc { | ||||
| 	runes_to_string, | ||||
| 	str_builder_to_string, | ||||
| } | ||||
|  | ||||
| context_ext :: proc( $ Type : typeid ) -> (^Type) { | ||||
| 	return cast(^Type) context.user_ptr | ||||
| } | ||||
|   | ||||
| @@ -15,8 +15,18 @@ Array :: struct ( $ Type : typeid ) { | ||||
| 	data      : [^]Type, | ||||
| } | ||||
|  | ||||
| array_to_slice :: proc( using self : Array( $ Type) ) -> []Type { | ||||
| 	return slice_ptr( data, num ) | ||||
| array_underlying_slice :: proc(slice: []($ Type)) -> Array(Type) { | ||||
| 	if len(slice) == 0 { | ||||
| 			return nil | ||||
| 	} | ||||
| 	array_size := size_of( Array(Type)) | ||||
| 	raw_data   := & slice[0] | ||||
| 	array_ptr  := cast( ^Array(Type)) ( uintptr(first_element_ptr) - uintptr(array_size)) | ||||
| 	return array_ptr ^ | ||||
| } | ||||
|  | ||||
| array_to_slice :: proc( using self : Array($ Type) ) -> []Type { | ||||
| 	return slice_ptr( data, int(num) ) | ||||
| } | ||||
|  | ||||
| array_grow_formula :: proc( value : u64 ) -> u64 { | ||||
| @@ -29,12 +39,12 @@ array_init :: proc( $ Type : typeid, allocator : Allocator ) -> ( Array(Type), A | ||||
|  | ||||
| array_init_reserve :: proc( $ Type : typeid, allocator : Allocator, capacity : u64 ) -> ( Array(Type), AllocatorError ) | ||||
| { | ||||
| 	raw_data, result_code := alloc( int(capacity) * size_of(Type), allocator = allocator ) | ||||
| 	result : Array( Type); | ||||
| 	result.data      = cast( [^] Type ) raw_data | ||||
| 	raw_data, result_code := alloc( size_of(Array) + int(capacity) * size_of(Type), allocator = allocator ) | ||||
| 	result          := cast(^Array(Type)) raw_data; | ||||
| 	result.data      = cast( [^]Type ) ptr_offset( result, 1 ) | ||||
| 	result.allocator = allocator | ||||
| 	result.capacity  = capacity | ||||
| 	return result, result_code | ||||
| 	return (result ^), result_code | ||||
| } | ||||
|  | ||||
| array_append :: proc( using self : ^ Array( $ Type), value : Type ) -> AllocatorError | ||||
| @@ -231,7 +241,7 @@ array_set_capacity :: proc( using self : ^ Array( $ Type ), new_capacity : u64 ) | ||||
| 		ensure( false, "Failed to allocate for new array capacity" ) | ||||
| 		return result_code | ||||
| 	} | ||||
| 	free( raw_data(data) ) | ||||
| 	free( data ) | ||||
| 	data     = cast( [^] Type ) new_data | ||||
| 	capacity = new_capacity | ||||
| 	return result_code | ||||
|   | ||||
							
								
								
									
										21
									
								
								code/grime_unicode.odin
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								code/grime_unicode.odin
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,21 @@ | ||||
| package sectr | ||||
|  | ||||
| string_to_runes :: proc( content : string, allocator := context.allocator ) -> ( []rune, AllocatorError ) | ||||
| { | ||||
| 	num := cast(u64) str_rune_count(content) | ||||
|  | ||||
| 	runes_array, alloc_error := array_init_reserve( rune, allocator, num ) | ||||
| 	if alloc_error != AllocatorError.None { | ||||
| 		ensure( false, "Failed to allocate runes array" ) | ||||
| 		return nil, alloc_error | ||||
| 	} | ||||
|  | ||||
| 	runes := array_to_slice(runes_array) | ||||
|  | ||||
| 	idx := 0 | ||||
| 	for codepoint in content { | ||||
| 		runes[idx] = codepoint | ||||
| 		idx        += 1 | ||||
| 	} | ||||
| 	return runes, alloc_error | ||||
| } | ||||
							
								
								
									
										15
									
								
								code/parser_code.odin
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								code/parser_code.odin
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,15 @@ | ||||
| /* Code Agnostic Parser | ||||
| This is a 'coding langauge agnostic' parser. | ||||
| Its not meant to parse regular textual formats used in natural langauges (paragraphs, sentences, etc). | ||||
| It instead is meant to encode constructs significant to most programming languages. | ||||
|  | ||||
| AST Types: | ||||
| * Word | ||||
| * Operator | ||||
| * BracketsScope | ||||
|  | ||||
| This parser supports parsing whitepsace asts or raw text content. | ||||
| */ | ||||
| package sectr | ||||
|  | ||||
|  | ||||
							
								
								
									
										14
									
								
								code/parser_code_formatting.odin
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								code/parser_code_formatting.odin
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| /* Parser : Code Formatting | ||||
| This is a prototype parser meant to parse whitespace formatting constructs used in text based languages. | ||||
| These include indentation of a block, spacial alignment of similar statement components, etc. | ||||
|  | ||||
| This would be used to have awareness of constructs having associating with each other via formatting. | ||||
|  | ||||
| AST Types: | ||||
|  | ||||
| * Statement | ||||
| * Block-Indent Group | ||||
| * Aligned-Statements | ||||
|  | ||||
| */ | ||||
| package sectr | ||||
							
								
								
									
										358
									
								
								code/parser_whitespace.odin
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										358
									
								
								code/parser_whitespace.odin
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,358 @@ | ||||
| /* Parser: Whitespace | ||||
| This is a prototype parser meant to only parse whitespace from visible blocks of code. | ||||
| Its meant to be the most minimal useful AST for boostrapping an AST Editor. | ||||
|  | ||||
| All symbols related directly to the parser are prefixed with the WS_ namespace. | ||||
|  | ||||
| The AST is composed of the following node types: | ||||
| * Visible | ||||
| * Spaces | ||||
| * Tabs | ||||
| * Line | ||||
|  | ||||
| AST_Visible tracks a slice of visible codepoints. | ||||
| It tracks a neighboring ASTs (left or right) which should always be Spaces, or Tabs. | ||||
|  | ||||
| AST_Spaces tracks a slice of singluar or consecutive Spaces. | ||||
| Neighboring ASTS should either be Visible, Tabs. | ||||
|  | ||||
| AST_Tabs tracks a slice of singlar or consectuive Tabs. | ||||
| Neighboring ASTS should be either Visible or Spaces. | ||||
|  | ||||
| AST_Line tracks a slice of AST nodes of Visible, Spaces, or Tabs that terminate with a New-Line token. | ||||
| Neighboring ASTS are only Lines. | ||||
|  | ||||
| The ParseData struct will contain an Array of AST_Line. This represents the entire AST where the root is the first entry. | ||||
| ASTs keep track of neighboring ASTs in double-linked list pattern for ease of use. | ||||
| This may be removed in the future for perforamance reasons, | ||||
| since this is a prototype it will only be removed if there is a performance issue. | ||||
|  | ||||
| Because this parser is so primtive, it can only be | ||||
| manually constructed via an AST editor or from parsed text. | ||||
| So there is only a parser directly dealing with text. | ||||
|  | ||||
| If its constructed from an AST-Editor. There will not be a content string referencable or runes derived fromt hat content string. | ||||
| Instead the AST's content will directly contain the runes associated. | ||||
| */ | ||||
| package sectr | ||||
|  | ||||
| import "core:os" | ||||
|  | ||||
| Rune_Space           :: ' ' | ||||
| Rune_Tab             :: '\t' | ||||
| Rune_Carriage_Return :: 'r' | ||||
| Rune_New_Line        :: '\n' | ||||
| // Rune_Tab_Vertical :: '\v' | ||||
|  | ||||
| WS_TokenType :: enum u32 { | ||||
| 	Invalid, | ||||
| 	Visible, | ||||
| 	Space, | ||||
| 	Tab, | ||||
| 	New_Line, | ||||
| 	Count, | ||||
| } | ||||
|  | ||||
| // TODO(Ed) : The runes and token arrays should be handled by a slab allocator dedicated to ASTs | ||||
| // This can grow in undeterministic ways, persistent will get very polluted otherwise. | ||||
| WS_LexResult :: struct { | ||||
| 	allocator : Allocator, | ||||
| 	content   : string, | ||||
| 	runes     : []rune, | ||||
| 	tokens    : Array(WS_Token), | ||||
| } | ||||
|  | ||||
| WS_Token :: struct { | ||||
| 	type         : WS_TokenType, | ||||
| 	line, column : u32, | ||||
| 	ptr          : ^rune, | ||||
| } | ||||
|  | ||||
| WS_AST_Content :: union #no_nil { | ||||
| 	[] WS_Token, | ||||
| 	[] rune, | ||||
| } | ||||
|  | ||||
| WS_AST_Spaces :: struct { | ||||
| 	content : WS_AST_Content, | ||||
|  | ||||
| 	using links : DLL_NodePN(WS_AST), | ||||
| } | ||||
|  | ||||
| WS_AST_Tabs :: struct { | ||||
| 	content : WS_AST_Content, | ||||
|  | ||||
| 	using links : DLL_NodePN(WS_AST), | ||||
| } | ||||
|  | ||||
| WS_AST_Visible :: struct { | ||||
| 	content : WS_AST_Content, | ||||
|  | ||||
| 	using links : DLL_NodePN(WS_AST), | ||||
| } | ||||
|  | ||||
| WS_AST_Line :: struct { | ||||
| 	using content : DLL_NodeFL(WS_AST), | ||||
| 	end_token     : ^ WS_Token, | ||||
|  | ||||
| 	using links : DLL_NodePN(WS_AST), | ||||
| } | ||||
|  | ||||
| WS_AST :: union #no_nil { | ||||
| 	WS_AST_Visible, | ||||
| 	WS_AST_Spaces, | ||||
| 	WS_AST_Tabs, | ||||
| 	WS_AST_Line, | ||||
| } | ||||
|  | ||||
| WS_ParseError :: struct { | ||||
| 	token : ^WS_Token, | ||||
| 	msg   : string, | ||||
| } | ||||
|  | ||||
| WS_ParseError_Max        :: 32 | ||||
| WS_NodeArray_ReserveSize :: Kilobyte * 4 | ||||
| WS_LineArray_RserveSize  :: Kilobyte | ||||
|  | ||||
| // TODO(Ed) : The ast arrays should be handled by a slab allocator dedicated to ASTs | ||||
| // This can grow in undeterministic ways, persistent will get very polluted otherwise. | ||||
| WS_ParseResult :: struct { | ||||
| 	content   : string, | ||||
| 	runes     : []rune, | ||||
| 	tokens    : Array(WS_Token), | ||||
| 	nodes     : Array(WS_AST), | ||||
| 	lines     : Array( ^WS_AST_Line), | ||||
| 	errors    : [WS_ParseError_Max] WS_ParseError, | ||||
| } | ||||
|  | ||||
| // @(private="file") | ||||
| // AST :: WS_AST | ||||
|  | ||||
| ws_parser_lex :: proc ( content : string, allocator : Allocator ) -> ( WS_LexResult, AllocatorError ) | ||||
| { | ||||
| 	LexerData :: struct { | ||||
| 		using result : WS_LexResult, | ||||
|  | ||||
| 		head   : [^] rune, | ||||
| 		left   : i32, | ||||
| 		line   : u32, | ||||
| 		column : u32, | ||||
| 	} | ||||
| 	using lexer : LexerData | ||||
| 	context.user_ptr = & lexer | ||||
|  | ||||
| 	rune_type :: proc() -> WS_TokenType | ||||
| 	{ | ||||
| 		using self := context_ext( LexerData) | ||||
|  | ||||
| 		switch (head[0]) | ||||
| 		{ | ||||
| 			case Rune_Space: | ||||
| 				return WS_TokenType.Space | ||||
|  | ||||
| 			case Rune_Tab: | ||||
| 				return WS_TokenType.Tab | ||||
|  | ||||
| 			case Rune_New_Line: | ||||
| 				return WS_TokenType.New_Line | ||||
|  | ||||
| 			// Support for CRLF format | ||||
| 			case Rune_Carriage_Return: | ||||
| 			{ | ||||
| 				previous := cast( ^ rune) (uintptr(head) - 1) | ||||
| 				if (previous ^) == Rune_New_Line { | ||||
| 					return WS_TokenType.New_Line | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		// Everything that isn't the supported whitespace code points is considered 'visible' | ||||
| 		// Eventually we should support other types of whitespace | ||||
| 		return WS_TokenType.Visible | ||||
| 	} | ||||
|  | ||||
| 	advance :: proc() -> WS_TokenType { | ||||
| 		using self := context_ext( LexerData) | ||||
|  | ||||
| 		head    = head[1:] | ||||
| 		left   -= 1 | ||||
| 		column += 1 | ||||
| 		type   := rune_type() | ||||
| 		line   += u32(type == WS_TokenType.New_Line) | ||||
| 		return type | ||||
| 	} | ||||
|  | ||||
| 	alloc_error : AllocatorError | ||||
| 	runes, alloc_error = to_runes( content, allocator ) | ||||
| 	if alloc_error != AllocatorError.None { | ||||
| 		return result, alloc_error | ||||
| 	} | ||||
|  | ||||
| 	left = cast(i32) len(runes) | ||||
| 	head = & runes[0] | ||||
|  | ||||
| 	tokens, alloc_error = array_init_reserve( WS_Token, allocator, u64(left / 2) ) | ||||
| 	if alloc_error != AllocatorError.None { | ||||
| 		ensure(false, "Failed to allocate token's array") | ||||
| 		return result, alloc_error | ||||
| 	} | ||||
|  | ||||
| 	line   = 0 | ||||
| 	column = 0 | ||||
|  | ||||
| 	for ; left > 0; | ||||
| 	{ | ||||
| 		current       : WS_Token | ||||
| 		current.type   = rune_type() | ||||
| 		current.line   = line | ||||
| 		current.column = column | ||||
|  | ||||
| 		for ; advance() == current.type; { | ||||
| 		} | ||||
|  | ||||
| 		alloc_error = array_append( & tokens, current ) | ||||
| 		if alloc_error != AllocatorError.None { | ||||
| 			ensure(false, "Failed to append token to token array") | ||||
| 			return lexer, alloc_error | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return result, alloc_error | ||||
| } | ||||
|  | ||||
| ws_parser_parse :: proc( content : string, allocator : Allocator ) -> ( WS_ParseResult, AllocatorError ) | ||||
| { | ||||
| 	ParseData :: struct { | ||||
| 		using result :  WS_ParseResult, | ||||
|  | ||||
| 		left  : u32, | ||||
| 		head  : [^]WS_Token, | ||||
| 		line  : WS_AST_Line, | ||||
| 	} | ||||
|  | ||||
| 	using parser : ParseData | ||||
| 	context.user_ptr = & result | ||||
|  | ||||
| 	//region Helper procs | ||||
| 	peek_next :: proc() -> ( ^WS_Token) | ||||
| 	{ | ||||
| 		using self := context_ext( ParseData) | ||||
| 		if left - 1 ==  0 { | ||||
| 			return nil | ||||
| 		} | ||||
|  | ||||
| 		return head[ 1: ] | ||||
| 	} | ||||
|  | ||||
| 	check_next :: proc(  expected : WS_TokenType ) -> b32 { | ||||
| 		using self := context_ext( ParseData) | ||||
|  | ||||
| 		next := peek_next() | ||||
| 		return next != nil && next.type == expected | ||||
| 	} | ||||
|  | ||||
| 	advance :: proc( expected : WS_TokenType ) -> (^WS_Token) | ||||
| 	{ | ||||
| 		using self := context_ext( ParseData) | ||||
| 		next := peek_next() | ||||
| 		if next == nil { | ||||
| 			return nil | ||||
| 		} | ||||
| 		if next.type != expected { | ||||
| 			ensure( false, "Didn't get expected token type from next in lexed" ) | ||||
| 			return nil | ||||
| 		} | ||||
| 		head = next | ||||
| 		return head | ||||
| 	} | ||||
| 	//endregion Helper procs | ||||
|  | ||||
| 	lex, alloc_error := ws_parser_lex( content, allocator ) | ||||
| 	if alloc_error != AllocatorError.None { | ||||
|  | ||||
| 	} | ||||
|  | ||||
| 	runes  = lex.runes | ||||
| 	tokens = lex.tokens | ||||
|  | ||||
| 	nodes, alloc_error = array_init_reserve( WS_AST, allocator, WS_NodeArray_ReserveSize ) | ||||
| 	if alloc_error != AllocatorError.None { | ||||
|  | ||||
| 	} | ||||
|  | ||||
| 	lines, alloc_error = array_init_reserve( ^WS_AST_Line, allocator, WS_LineArray_RserveSize ) | ||||
| 	if alloc_error != AllocatorError.None { | ||||
|  | ||||
| 	} | ||||
|  | ||||
| 	head = & tokens.data[0] | ||||
|  | ||||
| 	// Parse Line | ||||
| 	for ; left > 0; | ||||
| 	{ | ||||
| 		parse_content :: proc( $ Type : typeid, tok_type : WS_TokenType ) -> Type | ||||
| 		{ | ||||
| 			using self := context_ext( ParseData) | ||||
|  | ||||
| 			ast   : Type | ||||
| 			start := head | ||||
| 			end   : [^]WS_Token | ||||
|  | ||||
| 			for ; check_next( WS_TokenType.Visible ); { | ||||
| 				end = advance( tok_type ) | ||||
| 			} | ||||
| 			ast.content = slice_ptr( start, ptr_sub( end, start )) | ||||
| 			return ast | ||||
| 		} | ||||
|  | ||||
| 		add_node :: proc( ast : WS_AST ) //-> ( should_return : b32 ) | ||||
| 		{ | ||||
| 			using self := context_ext( ParseData) | ||||
|  | ||||
| 			// TODO(Ed) : Harden this | ||||
| 			array_append( & nodes, ast ) | ||||
|  | ||||
| 			if line.first == nil { | ||||
| 				line.first = array_back( & nodes ) | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				line.last = array_back( & nodes) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		// TODO(Ed) : Harden this | ||||
| 		#partial switch head[0].type | ||||
| 		{ | ||||
| 			case WS_TokenType.Visible: | ||||
| 			{ | ||||
| 				ast := parse_content( WS_AST_Visible, WS_TokenType.Visible ) | ||||
| 				add_node( ast ) | ||||
| 			} | ||||
| 			case WS_TokenType.Space: | ||||
| 			{ | ||||
| 				ast := parse_content( WS_AST_Visible, WS_TokenType.Space ) | ||||
| 				add_node( ast ) | ||||
| 			} | ||||
| 			case WS_TokenType.Tab: | ||||
| 			{ | ||||
| 				ast := parse_content( WS_AST_Tabs, WS_TokenType.Tab ) | ||||
| 				add_node( ast ) | ||||
| 			} | ||||
| 			case WS_TokenType.New_Line: | ||||
| 			{ | ||||
| 				line.end_token = head | ||||
|  | ||||
| 				ast : WS_AST | ||||
| 				ast = line | ||||
|  | ||||
| 				// TODO(Ed) : Harden This | ||||
| 				array_append( & nodes, ast ) | ||||
| 				array_append( & lines, & array_back( & nodes).(WS_AST_Line) ) | ||||
| 				line = {} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return result, alloc_error | ||||
| } | ||||
| @@ -11,7 +11,8 @@ debug_draw_text :: proc( content : string, pos : Vec2, size : f32, color : rl.Co | ||||
| 	if len( content ) == 0 { | ||||
| 		return | ||||
| 	} | ||||
| 	runes := to_runes( content, context.temp_allocator ) | ||||
| 	runes, alloc_error := to_runes( content, context.temp_allocator ) | ||||
| 	verify( alloc_error != AllocatorError.None, "Failed to temp allocate runes" ) | ||||
|  | ||||
| 	font := font | ||||
| 	if font.key == Font_Default.key { | ||||
| @@ -38,7 +39,8 @@ debug_draw_text_world :: proc( content : string, pos : Vec2, size : f32, color : | ||||
| 	if len( content ) == 0 { | ||||
| 		return | ||||
| 	} | ||||
| 	runes := to_runes( content, context.temp_allocator ) | ||||
| 	runes, alloc_error := to_runes( content, context.temp_allocator ) | ||||
| 	verify( alloc_error != AllocatorError.None, "Failed to temp allocate runes" ) | ||||
|  | ||||
| 	font := font | ||||
| 	if  font.key == Font_Default.key { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user