Finished V1

2025-05-13 23:17:25 -04:00
parent 5a1ae41716
commit 6411af15a6
3 changed files with 12189 additions and 101 deletions
--- a/build.ps1
+++ b/build.ps1
@ -9,6 +9,7 @@ $flag_all_c 					   = '/TC'
 $flag_c11                          = '/std:c11'
 $flag_all_cpp                      = '/TP'
 $flag_compile                      = '/c'
+$flag_charset_utf8                 = '/utf-8'
 $flag_debug                        = '/Zi'
 $flag_define                       = '/D'
 $flag_exceptions_disabled          = '/EHsc-'
@ -77,13 +78,15 @@ $compiler_args += $flag_exceptions_disabled
 $compiler_args += $flag_RTTI_disabled
 $compiler_args += $flag_preprocess_conform

+# Set charset encoding for both execution and source to UTF-8
+
 # Dump preprocess file
 if ($false) {
 	$compiler_args += $flag_preprocess_to_file
 	$compiler_args += $flag_preprocess_preserve_comments
 }

-# Diagnostic loggign
+# Diagnostic logging
 $compiler_args += $flag_full_src_path

 # Specifing output pathing
@ -105,7 +108,7 @@ $unit           = join-path $PSScriptRoot 'demo.str_cache.c'
 $compiler_args += $flag_compile, $unit

 # Diagnoistc print for the args
-$compiler_args | ForEach-Object { Write-Host $_ }
+# $compiler_args | ForEach-Object { Write-Host $_ }
 write-host

 # $compiler_args += ( $flag_define + 'DEMO_STR_SLICE' )
@ -134,7 +137,7 @@ if ($true) {
 	$linker_args += $object

 	# Diagnoistc print for the args
-	$linker_args | ForEach-Object { Write-Host $_ }
+	# $linker_args | ForEach-Object { Write-Host $_ }
 	write-host

 	& $linker $linker_args
--- a/demo.str_cache.c
+++ b/demo.str_cache.c
@ -19,20 +19,26 @@ to see definitions related to a sepecific kind of data or operation (strings, me
 #if 0
 int main()
 {
-	VArena   cache_arena; varena_init(cache_arena);	
-	StrCache cache = strcache_init(varena_ainfo(cache));
+	// This will limit for our V1 read to 64kb at most.
+	FMem_128KB   read_mem = {0};
+	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

-	VArena      file_arena; varena_init(file_arena);
-	Str8         path_text = lit("../demo.strcache.c");
-	FileContent text_file = file_read_contents(varena_ainfo(file_arena), path_text);
+	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
+	SliceByte    mem_toks = slicemem_alloc(MEGABYTES(16));
+	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);

-	Arena ast_arena; arena_init(ast_arena);
+	SliceByte mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
+	SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
+	SliceByte mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
+	Str8Cache str_cache       = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);

-	WATL_ParseOps   ops    = { .str_cache = &cache, .node_backing = arena_ainfo(ast_arena) }
-	WATL_ParsedInfo parsed = watl_parse(text_file.content, ops);
+	SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
+	SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
+	WATL_ParseInfo parse_res  = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);

-	watl_dbg_dump(parsed.root);
-	strcache_dbg_listing(cache);
+	SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
+	Str8      listing     = watl_dump_listing(scratch_dmp, parse_res.lines);
+	file_write_str8(lit("demo.str_cache.listing.txt"), listing);
 	return 0;
 }
 #endif
@ -57,8 +63,6 @@ Secondarily for the purposes of using the above sufficiently the following are a
 * Read/Write Files
 * Lexing & Parsing
 * Debug printing
-
-TODO(Ed): Do we introduce gencpp in this?
 */

 /*
@ -108,7 +112,7 @@ typedef S32 B32;

 // Functional style cast
 #define cast(type, data)  ((type)(data))
-#define pcast(type, data) * cast(type*, & data)
+#define pcast(type, data) * cast(type*, & (data))

 #define nullptr cast(void*, 0)

@ -306,7 +310,11 @@ void slice__zero(SliceByte mem, SSIZE typewidth) {

 // Now for our "Version 1"

-#if defined(DEMO__FILE_READ_CONTENTS_V1) || defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_PREREQ_V1) || defined(DEMO__WATL_DUMP_V1)
+#if	defined(DEMO__FILE_READ_CONTENTS_V1) || \
+	defined(DEMO__WATL_LEX_V1)           || \
+	defined(DEMO__WATL_PARSE_V1)         || \
+	defined(DEMO__WATL_DUMP_PREREQ_V1)   || \
+	defined(DEMO__WATL_DUMP_V1)

 struct FileOpResult
 {
@ -406,8 +414,6 @@ FileOpResult file__read_contents(Str8 path, Opts__read_file_contents* opts) {
 #ifdef DEMO__FILE_READ_CONTENTS_V1
 int main()
 {
-	set_utf8_codepage();
-
 	// This will limit for our V1 read to 64kb at most.
 	FMem_64KB    read_mem = {0};
 	FileOpResult res      = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
@ -534,7 +540,10 @@ inline void    farena_reset(FArena* arena) { arena->used = 0; }
 inline ArenaSP farena_save (FArena  arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; } 
 #pragma endregion FArena

-#if defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_PREREQ_V1) || defined(DEMO__WATL_DUMP_V1)
+#if defined(DEMO__WATL_LEX_V1)         || \
+	defined(DEMO__WATL_PARSE_V1)       || \
+	defined(DEMO__WATL_DUMP_PREREQ_V1) || \
+	defined(DEMO__WATL_DUMP_V1)

 struct WATL_LexInfo {
 	// For now just the tokens
@ -544,8 +553,10 @@ struct WATL_LexInfo {
 struct Opts__watl_lex {
 /*
 	For this operation we'll enforce that the arena must linearly allocate each token, forming a strictly adjacent sent of elements in an array.
-	This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, or the tokens are sparely cached.
-	Where their position in their originating string is not preserved. In this case we're keeping it simple. Tokens are in the same block of memory and they don't use a string cache.
+	This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, 
+	or the tokens are sparely cached.
+	Where their position in their originating string is not preserved. In this case we're keeping it simple.
+	Tokens are in the same block of memory and they don't use a string cache.
 */
 	SliceByte pool_toks;
 };
@ -649,8 +660,6 @@ void slicemem_free(SliceByte mem) {
 #ifdef DEMO__WATL_LEX_V1
 int main()
 {
-	set_utf8_codepage();
-
 	// This will limit for our V1 read to 64kb at most.
 	FMem_64KB    read_mem = {0};
 	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
@ -930,8 +939,6 @@ WATL_ParseInfo watl__parse(WATL_SliceTok tokens, Opts__watl_parse* opts) { WATL_
 #ifdef DEMO__WATL_PARSE_V1
 int main()
 {
-	set_utf8_codepage();
-
 	// This will limit for our V1 read to 64kb at most.
 	FMem_64KB    read_mem = {0};
 	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
@ -1123,11 +1130,11 @@ Str8 fmt__vtoken(SliceByte backing_tbl, SliceByte backing_buf, Str8 fmt_template

 // Expected to take a Str8 array of entries formatted as a 2D array of key-value pairs (Str8[length][2])
 // The array will be tracked using a SliceStr8 structure.
-#define fmt_vtoken(backing_tbl, backing_buf, fmt_template, tokens) \
+#define fmt_vtoken(backing_tbl, backing_buf, fmt_template, ...)         \
 fmt__vtoken(backing_tbl, backing_buf, lit(fmt_template),                \
 	&(SliceStr8){                                                       \
-		.ptr = cast(Str8*, ((Str8[])tokens)),                      \
-		.len = size_of(    ((Str8[])tokens)) / size_of(Str8)       \
+		.ptr = (Str8[]){ __VA_ARGS__ },                                 \
+		.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
 	}                                                                   \
 )
 #pragma endregion fmt_vtoken
@ -1137,14 +1144,15 @@ void str8gen__append_fmt(Str8Gen* gen, Str8 fmt_template, SliceStr8* tokens)
 {
 	local_persist FMem_64KB tbl_backing = {0};
 	SliceByte fmt_backing = {gen->ptr + gen->len, gen->backing.len - gen->len};
-	fmt__vtoken(fmem_slice(tbl_backing), fmt_backing, fmt_template, tokens);
+	Str8 appended = fmt__vtoken(fmem_slice(tbl_backing), fmt_backing, fmt_template, tokens);
+	gen->len += appended.len;
 	return;
 }
-#define str8gen_append_fmt(gen, fmt_template, tokens)        \
+#define str8gen_append_fmt(gen, fmt_template, ...)                      \
 str8gen__append_fmt(& gen, lit(fmt_template),                           \
 	&(SliceStr8){                                                       \
-		.ptr = cast(Str8*, ((Str8[])tokens)),                \
-		.len = size_of(    ((Str8[])tokens)) / size_of(Str8) \
+		.ptr = (Str8[]){ __VA_ARGS__ },                                 \
+		.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
 	}                                                                   \
 )

@ -1155,24 +1163,22 @@ Str8 mappings [][2] = {
 	^^ Add entries as above ^^
 }
 */
-#define fmt_entry(key, value) lit(key), lit(value)
+#define fmt_entry(key, value) lit(key), value

 #ifdef DEMO__WATL_DUMP_PREREQ_V1
 int main()
 {
-	set_utf8_codepage();
-
 	local_persist FMem_64KB tbl_scratch;
 	SliceByte fmt_scratch = slicemem_alloc(MEGABYTES(8));
-	Str8 test_str = fmt_vtoken(fmem_slice(tbl_scratch), fmt_scratch, "Will this work? <maybe_sub>", {
-		fmt_entry("maybe_sub", "IT SUBST!!!")
-	});
+	Str8 test_str = fmt_vtoken(fmem_slice(tbl_scratch), fmt_scratch, "Will this work? <maybe_sub>",
+		fmt_entry("maybe_sub", lit("IT SUBST!!!"))
+	);

 	SliceByte scratchgen = slicemem_alloc(MEGABYTES(16));
 	Str8Gen gen = str8gen_make(scratchgen);
-	str8gen_append_fmt(gen, "Testing now with Str8Gen!! <maybe_sub>!", {
-		fmt_entry("maybe_sub", "lets fucking go!!!")
-	});
+	str8gen_append_fmt(gen, "Testing now with Str8Gen!! <maybe_sub>!",
+		fmt_entry("maybe_sub", lit("lets fucking go!!!"))
+	);
 	return 0;
 }
 #endif
@ -1180,6 +1186,19 @@ int main()
 /*
 We'll need to do some integer serialization for our dump listing's metrics.
 */
+
+inline B32 char_is_upper(U8 c) { return('A' <= c && c <= 'Z'); }
+inline U8  char_to_lower(U8 c) { if (char_is_upper(c)) { c += ('a' - 'A'); } return(c); }
+
+inline
+U8 integer_symbols(U8 value) {
+	local_persist
+	U8 lookup_table[16] = {
+		'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F',
+	};
+	return lookup_table[value];
+}
+
 Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_group_separator)
 {
 	Str8 result = {.ptr = mem.ptr, .len = 0};
@ -1191,6 +1210,18 @@ Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_gr
 		case 2:  { prefix = lit("0b"); } break;
 	}
 	
+	U8 digit_group_size = 3;
+	switch (radix)
+	{
+		default: break;
+		case 2:
+		case 8:
+		case 16: { 
+			digit_group_size = 4;
+		}
+		break;
+	}
+
 	U32 needed_leading_zeros = 0;
 	{
 		U32 needed_digits = 1;
@ -1210,12 +1241,57 @@ Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_gr
 		U32 needed_separators    = 0;
 		if (digit_group_separator != 0)
 		{
-
+			needed_separators = (needed_digits + needed_leading_zeros) / digit_group_size;
+			if (needed_separators > 0 && (needed_digits + needed_leading_zeros) % digit_group_size == 0) {
+				needed_separators -= 1;
 			}
 		}
+		
+		result.len = prefix.len + needed_leading_zeros + needed_separators + needed_digits;
+		assert(result.len <= mem.len);
+	}
+	
+	// Fill Content
+	{
+		U32 num_reduce             = num;
+		U32 digits_until_separator = digit_group_size;
+		for (U32 idx = 0; idx < result.len; idx += 1)
+		{
+			SSIZE separator_pos = result.len - idx - 1;
+			if (digits_until_separator == 0 && digit_group_separator != 0) {
+				result.ptr[separator_pos] = digit_group_separator;
+				digits_until_separator = digit_group_size + 1;
+			}
+			else {
+				result.ptr[separator_pos] = char_to_lower(integer_symbols(num_reduce % radix));
+				num_reduce /= radix;
+			}
+			digits_until_separator -= 1;
+			if (num_reduce == 0) {
+				break;
+			}
+		}
+		for (U32 leading_0_idx = 0; leading_0_idx < needed_leading_zeros; leading_0_idx += 1) {
+			result.ptr[prefix.len + leading_0_idx] = '0';
+		}
+	}
+	// Fill Prefix
+	if (prefix.len > 0) {
+		slice_copy(result, prefix);
+	}
 	return result;
 }

+// Utilized for file_write_str8:
+#if 0
+BOOL WriteFile(
+  [in]                HANDLE       hFile,
+  [in]                LPCVOID      lpBuffer,
+  [in]                DWORD        nNumberOfBytesToWrite,
+  [out, optional]     LPDWORD      lpNumberOfBytesWritten,
+  [in, out, optional] LPOVERLAPPED lpOverlapped
+);
+#endif
 /*
 Lastly: Writting to file using the Win API.
 */
@ -1262,44 +1338,36 @@ We now have what we need to create the structural listing dump for WATL's data s
 Str8 watl_dump_listing(SliceByte buffer, WATL_SliceLine lines)
 {
 	local_persist FMem_64KB scratch = {0};
+	FArena sarena = farena_init(fmem_slice(scratch));

 	Str8Gen result = str8gen_make(buffer);
 	U32 line_num = 0;
 	for (slice_iter(lines, line))
 	{
-	#define fmt_entry_u32(label, num) lit(label), str8_from_u32(fmem_slice(scratch), num, 10, 0, 0)
+	#define push_str8_u32()           (SliceByte){ farena__push(& sarena, size_of(UTF8), 64, lit("UTF8")), 64 }
+	#define fmt_entry_u32(label, num) lit(label), str8_from_u32(push_str8_u32(), num, 10, 0, 0)
 		++ line_num;
-		str8gen_append_fmt(result, "Line <line_num> - Chunks <chunk_num>:\n", {(
-			fmt_entry_u32("line_num", line_num)
-		,	fmt_entry_u32("chunk_num", line->len)
-		)});
+		str8gen_append_fmt(result, "Line <line_num> - Chunks <chunk_num>:\n"
+		,	fmt_entry("line_num",  str8_from_u32(push_str8_u32(), line_num, 10, 0, 0))
+		,	fmt_entry("chunk_num", str8_from_u32(push_str8_u32(), line->len, 10, 0, 0))
+		);
 		for (slice_iter(* line, chunk)) 
 		{
-			str8gen_append_str8(& result, lit("\t"));
 			Str8 id;
 			switch (* chunk->ptr)
 			{
-				case WATL_Tok_Space:
-					id = lit("Space");
-				break;
-
-				case WATL_Tok_Tab:
-					id = lit("Tab");
-				break;
-
-				default: 
-					id = lit("Visible");
-				break;
+				case WATL_Tok_Space: id = lit("Space");   break;
+				case WATL_Tok_Tab:   id = lit("Tab");     break;
+				default:             id = lit("Visible"); break;
 			}
-
-			str8gen_append_fmt(result, "<id>(<size>): '<spaces>'", {(
-				lit("id"), id
+			str8gen_append_fmt(result, "\t<id>(<size>): '<chunk>'\n"
+			,	fmt_entry("id", id)
 			,	fmt_entry_u32("size", chunk->len)
-			,	lit("spaces"), * chunk
-			)});
-
-			str8gen_append_str8(& result, lit("\n"));
+			,	fmt_entry("chunk", * chunk)
+			);
 		}
+		farena_reset(& sarena);
+	#undef push_str8_u32
 	#undef fmt_entry_u32
 	}
 	return (Str8){ result.ptr, result.len };
@ -1308,8 +1376,6 @@ Str8 watl_dump_listing(SliceByte buffer, WATL_SliceLine lines)
 #ifdef DEMO__WATL_DUMP_V1
 int main()
 {
-	set_utf8_codepage();
-
 	// This will limit for our V1 read to 64kb at most.
 	FMem_128KB   read_mem = {0};
 	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
--- a/demo.str_cache.listing.txt
+++ b/demo.str_cache.listing.txt