Finished V1

This commit is contained in:
Edward R. Gonzalez 2025-05-13 23:17:25 -04:00
parent 5a1ae41716
commit 6411af15a6
3 changed files with 12189 additions and 101 deletions

View File

@ -9,6 +9,7 @@ $flag_all_c = '/TC'
$flag_c11 = '/std:c11'
$flag_all_cpp = '/TP'
$flag_compile = '/c'
$flag_charset_utf8 = '/utf-8'
$flag_debug = '/Zi'
$flag_define = '/D'
$flag_exceptions_disabled = '/EHsc-'
@ -77,13 +78,15 @@ $compiler_args += $flag_exceptions_disabled
$compiler_args += $flag_RTTI_disabled
$compiler_args += $flag_preprocess_conform
# Set charset encoding for both execution and source to UTF-8
# Dump preprocess file
if ($false) {
$compiler_args += $flag_preprocess_to_file
$compiler_args += $flag_preprocess_preserve_comments
}
# Diagnostic loggign
# Diagnostic logging
$compiler_args += $flag_full_src_path
# Specifing output pathing
@ -105,7 +108,7 @@ $unit = join-path $PSScriptRoot 'demo.str_cache.c'
$compiler_args += $flag_compile, $unit
# Diagnoistc print for the args
$compiler_args | ForEach-Object { Write-Host $_ }
# $compiler_args | ForEach-Object { Write-Host $_ }
write-host
# $compiler_args += ( $flag_define + 'DEMO_STR_SLICE' )
@ -134,7 +137,7 @@ if ($true) {
$linker_args += $object
# Diagnoistc print for the args
$linker_args | ForEach-Object { Write-Host $_ }
# $linker_args | ForEach-Object { Write-Host $_ }
write-host
& $linker $linker_args

View File

@ -19,20 +19,26 @@ to see definitions related to a sepecific kind of data or operation (strings, me
#if 0
int main()
{
VArena cache_arena; varena_init(cache_arena);
StrCache cache = strcache_init(varena_ainfo(cache));
// This will limit for our V1 read to 64kb at most.
FMem_128KB read_mem = {0};
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
VArena file_arena; varena_init(file_arena);
Str8 path_text = lit("../demo.strcache.c");
FileContent text_file = file_read_contents(varena_ainfo(file_arena), path_text);
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
SliceByte mem_toks = slicemem_alloc(MEGABYTES(16));
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
Arena ast_arena; arena_init(ast_arena);
SliceByte mem_cache_strs = slicemem_alloc(MEGABYTES(64));
SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
SliceByte mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot));
Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
WATL_ParseOps ops = { .str_cache = &cache, .node_backing = arena_ainfo(ast_arena) }
WATL_ParsedInfo parsed = watl_parse(text_file.content, ops);
SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
WATL_ParseInfo parse_res = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
watl_dbg_dump(parsed.root);
strcache_dbg_listing(cache);
SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
Str8 listing = watl_dump_listing(scratch_dmp, parse_res.lines);
file_write_str8(lit("demo.str_cache.listing.txt"), listing);
return 0;
}
#endif
@ -57,8 +63,6 @@ Secondarily for the purposes of using the above sufficiently the following are a
* Read/Write Files
* Lexing & Parsing
* Debug printing
TODO(Ed): Do we introduce gencpp in this?
*/
/*
@ -108,7 +112,7 @@ typedef S32 B32;
// Functional style cast
#define cast(type, data) ((type)(data))
#define pcast(type, data) * cast(type*, & data)
#define pcast(type, data) * cast(type*, & (data))
#define nullptr cast(void*, 0)
@ -116,7 +120,7 @@ typedef S32 B32;
#define glue(A, B) glue_(A,B)
// Enforces size querying uses SSIZE type.
#define size_of(data) cast(SSIZE, sizeof(data))
#define size_of(data) cast(SSIZE, sizeof(data) )
#define stringify_(S) #S
#define stringify(S) stringify_(S)
@ -306,7 +310,11 @@ void slice__zero(SliceByte mem, SSIZE typewidth) {
// Now for our "Version 1"
#if defined(DEMO__FILE_READ_CONTENTS_V1) || defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_PREREQ_V1) || defined(DEMO__WATL_DUMP_V1)
#if defined(DEMO__FILE_READ_CONTENTS_V1) || \
defined(DEMO__WATL_LEX_V1) || \
defined(DEMO__WATL_PARSE_V1) || \
defined(DEMO__WATL_DUMP_PREREQ_V1) || \
defined(DEMO__WATL_DUMP_V1)
struct FileOpResult
{
@ -406,8 +414,6 @@ FileOpResult file__read_contents(Str8 path, Opts__read_file_contents* opts) {
#ifdef DEMO__FILE_READ_CONTENTS_V1
int main()
{
set_utf8_codepage();
// This will limit for our V1 read to 64kb at most.
FMem_64KB read_mem = {0};
FileOpResult res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
@ -534,7 +540,10 @@ inline void farena_reset(FArena* arena) { arena->used = 0; }
inline ArenaSP farena_save (FArena arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; }
#pragma endregion FArena
#if defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_PREREQ_V1) || defined(DEMO__WATL_DUMP_V1)
#if defined(DEMO__WATL_LEX_V1) || \
defined(DEMO__WATL_PARSE_V1) || \
defined(DEMO__WATL_DUMP_PREREQ_V1) || \
defined(DEMO__WATL_DUMP_V1)
struct WATL_LexInfo {
// For now just the tokens
@ -544,8 +553,10 @@ struct WATL_LexInfo {
struct Opts__watl_lex {
/*
For this operation we'll enforce that the arena must linearly allocate each token, forming a strictly adjacent sent of elements in an array.
This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, or the tokens are sparely cached.
Where their position in their originating string is not preserved. In this case we're keeping it simple. Tokens are in the same block of memory and they don't use a string cache.
This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena,
or the tokens are sparely cached.
Where their position in their originating string is not preserved. In this case we're keeping it simple.
Tokens are in the same block of memory and they don't use a string cache.
*/
SliceByte pool_toks;
};
@ -649,8 +660,6 @@ void slicemem_free(SliceByte mem) {
#ifdef DEMO__WATL_LEX_V1
int main()
{
set_utf8_codepage();
// This will limit for our V1 read to 64kb at most.
FMem_64KB read_mem = {0};
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
@ -930,8 +939,6 @@ WATL_ParseInfo watl__parse(WATL_SliceTok tokens, Opts__watl_parse* opts) { WATL_
#ifdef DEMO__WATL_PARSE_V1
int main()
{
set_utf8_codepage();
// This will limit for our V1 read to 64kb at most.
FMem_64KB read_mem = {0};
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
@ -1123,11 +1130,11 @@ Str8 fmt__vtoken(SliceByte backing_tbl, SliceByte backing_buf, Str8 fmt_template
// Expected to take a Str8 array of entries formatted as a 2D array of key-value pairs (Str8[length][2])
// The array will be tracked using a SliceStr8 structure.
#define fmt_vtoken(backing_tbl, backing_buf, fmt_template, tokens) \
#define fmt_vtoken(backing_tbl, backing_buf, fmt_template, ...) \
fmt__vtoken(backing_tbl, backing_buf, lit(fmt_template), \
&(SliceStr8){ \
.ptr = cast(Str8*, ((Str8[])tokens)), \
.len = size_of( ((Str8[])tokens)) / size_of(Str8) \
.ptr = (Str8[]){ __VA_ARGS__ }, \
.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
} \
)
#pragma endregion fmt_vtoken
@ -1137,14 +1144,15 @@ void str8gen__append_fmt(Str8Gen* gen, Str8 fmt_template, SliceStr8* tokens)
{
local_persist FMem_64KB tbl_backing = {0};
SliceByte fmt_backing = {gen->ptr + gen->len, gen->backing.len - gen->len};
fmt__vtoken(fmem_slice(tbl_backing), fmt_backing, fmt_template, tokens);
Str8 appended = fmt__vtoken(fmem_slice(tbl_backing), fmt_backing, fmt_template, tokens);
gen->len += appended.len;
return;
}
#define str8gen_append_fmt(gen, fmt_template, tokens) \
#define str8gen_append_fmt(gen, fmt_template, ...) \
str8gen__append_fmt(& gen, lit(fmt_template), \
&(SliceStr8){ \
.ptr = cast(Str8*, ((Str8[])tokens)), \
.len = size_of( ((Str8[])tokens)) / size_of(Str8) \
.ptr = (Str8[]){ __VA_ARGS__ }, \
.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
} \
)
@ -1155,24 +1163,22 @@ Str8 mappings [][2] = {
^^ Add entries as above ^^
}
*/
#define fmt_entry(key, value) lit(key), lit(value)
#define fmt_entry(key, value) lit(key), value
#ifdef DEMO__WATL_DUMP_PREREQ_V1
int main()
{
set_utf8_codepage();
local_persist FMem_64KB tbl_scratch;
SliceByte fmt_scratch = slicemem_alloc(MEGABYTES(8));
Str8 test_str = fmt_vtoken(fmem_slice(tbl_scratch), fmt_scratch, "Will this work? <maybe_sub>", {
fmt_entry("maybe_sub", "IT SUBST!!!")
});
Str8 test_str = fmt_vtoken(fmem_slice(tbl_scratch), fmt_scratch, "Will this work? <maybe_sub>",
fmt_entry("maybe_sub", lit("IT SUBST!!!"))
);
SliceByte scratchgen = slicemem_alloc(MEGABYTES(16));
Str8Gen gen = str8gen_make(scratchgen);
str8gen_append_fmt(gen, "Testing now with Str8Gen!! <maybe_sub>!", {
fmt_entry("maybe_sub", "lets fucking go!!!")
});
str8gen_append_fmt(gen, "Testing now with Str8Gen!! <maybe_sub>!",
fmt_entry("maybe_sub", lit("lets fucking go!!!"))
);
return 0;
}
#endif
@ -1180,15 +1186,40 @@ int main()
/*
We'll need to do some integer serialization for our dump listing's metrics.
*/
inline B32 char_is_upper(U8 c) { return('A' <= c && c <= 'Z'); }
inline U8 char_to_lower(U8 c) { if (char_is_upper(c)) { c += ('a' - 'A'); } return(c); }
inline
U8 integer_symbols(U8 value) {
local_persist
U8 lookup_table[16] = {
'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F',
};
return lookup_table[value];
}
Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_group_separator)
{
Str8 result = {.ptr = mem.ptr, .len = 0};
Str8 prefix = {0};
switch (radix)
{
case 16:{prefix = lit("0x");}break;
case 8: {prefix = lit("0o");}break;
case 2: {prefix = lit("0b");}break;
case 16: { prefix = lit("0x"); } break;
case 8: { prefix = lit("0o"); } break;
case 2: { prefix = lit("0b"); } break;
}
U8 digit_group_size = 3;
switch (radix)
{
default: break;
case 2:
case 8:
case 16: {
digit_group_size = 4;
}
break;
}
U32 needed_leading_zeros = 0;
@ -1210,12 +1241,57 @@ Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_gr
U32 needed_separators = 0;
if (digit_group_separator != 0)
{
needed_separators = (needed_digits + needed_leading_zeros) / digit_group_size;
if (needed_separators > 0 && (needed_digits + needed_leading_zeros) % digit_group_size == 0) {
needed_separators -= 1;
}
}
result.len = prefix.len + needed_leading_zeros + needed_separators + needed_digits;
assert(result.len <= mem.len);
}
// Fill Content
{
U32 num_reduce = num;
U32 digits_until_separator = digit_group_size;
for (U32 idx = 0; idx < result.len; idx += 1)
{
SSIZE separator_pos = result.len - idx - 1;
if (digits_until_separator == 0 && digit_group_separator != 0) {
result.ptr[separator_pos] = digit_group_separator;
digits_until_separator = digit_group_size + 1;
}
else {
result.ptr[separator_pos] = char_to_lower(integer_symbols(num_reduce % radix));
num_reduce /= radix;
}
digits_until_separator -= 1;
if (num_reduce == 0) {
break;
}
}
for (U32 leading_0_idx = 0; leading_0_idx < needed_leading_zeros; leading_0_idx += 1) {
result.ptr[prefix.len + leading_0_idx] = '0';
}
}
// Fill Prefix
if (prefix.len > 0) {
slice_copy(result, prefix);
}
return result;
}
// Utilized for file_write_str8:
#if 0
BOOL WriteFile(
[in] HANDLE hFile,
[in] LPCVOID lpBuffer,
[in] DWORD nNumberOfBytesToWrite,
[out, optional] LPDWORD lpNumberOfBytesWritten,
[in, out, optional] LPOVERLAPPED lpOverlapped
);
#endif
/*
Lastly: Writting to file using the Win API.
*/
@ -1262,44 +1338,36 @@ We now have what we need to create the structural listing dump for WATL's data s
Str8 watl_dump_listing(SliceByte buffer, WATL_SliceLine lines)
{
local_persist FMem_64KB scratch = {0};
FArena sarena = farena_init(fmem_slice(scratch));
Str8Gen result = str8gen_make(buffer);
U32 line_num = 0;
for (slice_iter(lines, line))
{
#define fmt_entry_u32(label, num) lit(label), str8_from_u32(fmem_slice(scratch), num, 10, 0, 0)
#define push_str8_u32() (SliceByte){ farena__push(& sarena, size_of(UTF8), 64, lit("UTF8")), 64 }
#define fmt_entry_u32(label, num) lit(label), str8_from_u32(push_str8_u32(), num, 10, 0, 0)
++ line_num;
str8gen_append_fmt(result, "Line <line_num> - Chunks <chunk_num>:\n", {(
fmt_entry_u32("line_num", line_num)
, fmt_entry_u32("chunk_num", line->len)
)});
str8gen_append_fmt(result, "Line <line_num> - Chunks <chunk_num>:\n"
, fmt_entry("line_num", str8_from_u32(push_str8_u32(), line_num, 10, 0, 0))
, fmt_entry("chunk_num", str8_from_u32(push_str8_u32(), line->len, 10, 0, 0))
);
for (slice_iter(* line, chunk))
{
str8gen_append_str8(& result, lit("\t"));
Str8 id;
switch (* chunk->ptr)
{
case WATL_Tok_Space:
id = lit("Space");
break;
case WATL_Tok_Tab:
id = lit("Tab");
break;
default:
id = lit("Visible");
break;
case WATL_Tok_Space: id = lit("Space"); break;
case WATL_Tok_Tab: id = lit("Tab"); break;
default: id = lit("Visible"); break;
}
str8gen_append_fmt(result, "<id>(<size>): '<spaces>'", {(
lit("id"), id
str8gen_append_fmt(result, "\t<id>(<size>): '<chunk>'\n"
, fmt_entry("id", id)
, fmt_entry_u32("size", chunk->len)
, lit("spaces"), * chunk
)});
str8gen_append_str8(& result, lit("\n"));
, fmt_entry("chunk", * chunk)
);
}
farena_reset(& sarena);
#undef push_str8_u32
#undef fmt_entry_u32
}
return (Str8){ result.ptr, result.len };
@ -1308,8 +1376,6 @@ Str8 watl_dump_listing(SliceByte buffer, WATL_SliceLine lines)
#ifdef DEMO__WATL_DUMP_V1
int main()
{
set_utf8_codepage();
// This will limit for our V1 read to 64kb at most.
FMem_128KB read_mem = {0};
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

12019
demo.str_cache.listing.txt Normal file

File diff suppressed because it is too large Load Diff