From f87a098fe40cd8afc7d505cd1fa09d8e864cdec7 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 6 May 2025 04:21:03 -0400 Subject: [PATCH] massive --- demo.str_cache.c | 363 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 282 insertions(+), 81 deletions(-) diff --git a/demo.str_cache.c b/demo.str_cache.c index 430d57c..e7b2090 100644 --- a/demo.str_cache.c +++ b/demo.str_cache.c @@ -41,7 +41,8 @@ int main() // #define DEMO__STR_SLICE // #define DEMO__FILE_READ_CONTENTS_V1 // #define DEMO__WATL_LEX_V1 -#define DEMO__WATL_PARSE_V1 +// #define DEMO__WATL_PARSE_V1 +#define DEMO__WATL_DUMP_V1 /* The above makes use of the following core concepts to achieve its net result: @@ -94,7 +95,6 @@ enum { true, true_overflow, }; - typedef S8 B8; typedef S16 B16; typedef S32 B32; @@ -102,9 +102,9 @@ typedef S32 B32; // Common macros we'll use throughout this. #define assert_bounds(point, start, end) do { \ - SSIZE pos_point = cast(SSIZE, point); \ - SSIZE pos_start = cast(SSIZE, start); \ - SSIZE pos_end = cast(SSIZE, end); \ + USIZE pos_point = cast(USIZE, point); \ + USIZE pos_start = cast(USIZE, start); \ + USIZE pos_end = cast(USIZE, end); \ assert(pos_start <= pos_point); \ assert(pos_point <= pos_end); \ } while(0) @@ -140,7 +140,7 @@ struct Str8 { #define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 } // For now this string can visualized using a debugger. -#ifdef DEMO__STR_SLICE +#ifdef DEMO__STR_SLICE int main() { Str8 first = lit("Our first string as a slice"); @@ -238,10 +238,10 @@ HANDLE CreateFileA( // We need to covert our string slice to a c-string for CreateFileA's path input. -#define KILOBTYES(n) (cast(SSIZE, n) << 10) -#define MEGABYTES(n) (cast(SSIZE, n) << 20) -#define GIGABYTES(n) (cast(SSIZE, n) << 30) -#define TERABYTES(n) (cast(SSIZE, n) << 40) +#define KILOBTYES(n) (cast(USIZE, n) << 10) +#define MEGABYTES(n) (cast(USIZE, n) << 20) +#define GIGABYTES(n) (cast(USIZE, n) << 30) +#define TERABYTES(n) (cast(USIZE, n) << 40) /* We'll be defining here Fixed-sized memory blocks using typedefs on-demand @@ -257,7 +257,7 @@ typedef U8 FMem_64KB [ KILOBTYES(64) ]; #define fmem_slice(mem) (SliceMem) { mem, size_of(mem) } // We'll be using an intrinsic for copying memory: -void* memory_copy(void* dest, void const* src, SSIZE length) +void* memory_copy(void* dest, void const* src, USIZE length) { if (dest == nullptr || src == nullptr || length == 0) { return nullptr; @@ -273,12 +273,16 @@ void* memory_copy(void* dest, void const* src, SSIZE length) assert(slice.len > 0); \ } while(0) -void slice_copy(SliceMem dest, SliceMem src) { +void slice__copy(SliceMem dest, SSIZE const dest_typewidth, SliceMem const src, SSIZE const src_typewidth) { assert(dest.len >= src.len); slice_assert(dest); slice_assert(src); memory_copy(dest.ptr, src.ptr, src.len); } +#define slice_copy(dest,src) slice__copy( \ + (SliceMem ){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \ +, (SliceMem const){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \ +) // Assumes memory is zeroed. char const* str8_to_cstr_capped(Str8 content, SliceMem mem) { @@ -288,22 +292,23 @@ char const* str8_to_cstr_capped(Str8 content, SliceMem mem) { } // To support zeroing slices we'll utilize an intrinisc. -B32 memory_zero(void* dest, SSIZE length) { +B32 memory_zero(void* dest, USIZE const length) { if (dest == nullptr || length <= 0) { return false; } - __stosd((unsigned long*)dest, 0, length); + __stosb((unsigned char*)dest, 0, length); return true; } -void slice_zero(SliceMem mem) { +void slice__zero(SliceMem mem, SSIZE typewidth) { slice_assert(mem); memory_zero(mem.ptr, mem.len); } +#define slice_zero(slice) slice__zero((SliceMem){(slice).ptr, (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr)) // Now for our "Version 1" -#if defined(DEMO__FILE_READ_CONTENTS_V1) || defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) +#if defined(DEMO__FILE_READ_CONTENTS_V1) || defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1) struct FileOpResult { @@ -364,10 +369,10 @@ void api_file_read_contents(FileOpResult* result, Str8 path, Opts__read_file_con } if (opts->zero_backing) { - slice_zero(opts->backing); + slice_zero(pcast(SliceByte, opts->backing)); } - DWORD amount_read = 0; + DWORD amount_read = 0; BOOL read_result = ReadFile( id_file, opts->backing.ptr, @@ -455,17 +460,18 @@ struct WATL_SliceTok { }; Str8 watl_tok_str8(WATL_SliceTok toks, WATL_Tok* tok) { - SSIZE start = cast(SSIZE, toks.ptr); - SSIZE curr = cast(SSIZE, tok->code); - SSIZE offset = curr - start; + WATL_Tok* next = tok + 1; + USIZE start = cast(USIZE, toks.ptr->code); + USIZE curr = cast(USIZE, tok->code); + USIZE offset = curr - start; SSIZE left = toks.len - offset; B32 last_tok = (start + toks.len) == (curr + left); Str8 text = {0}; text.ptr = tok->code; - text.len = last_tok ? - left + text.len = next > (toks.ptr + toks.len) ? + left // Othwerise its the last minus the curr. - : cast(SSIZE, (tok + 1) - curr); + : cast(SSIZE, next->code - tok->code); return text; } @@ -486,18 +492,18 @@ struct ArenaSP { void* ptr; }; typedef struct FArena FArena; struct FArena { void* start; - SSIZE capacity; - SSIZE used; + USIZE capacity; + USIZE used; }; void api_farena_init(FArena* arena, SliceMem mem); FArena farena_init (SliceMem mem); -SliceMem farena__push (FArena* arena, SSIZE type_size, SSIZE amount, Str8 dbg_typename); +void* farena__push (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename); void farena_reset (FArena* arena); void farena_rewind (FArena* arena, ArenaSP savepoint); ArenaSP farena_save (FArena arena); -#define farena_push(arena, type) cast(type*, farena__push(& arena, size_of(type), 1, lit(stringify(type))).ptr) -#define farena_push_array(arena, type, amount) pcast(Slice ## type, farena__push(& arena, size_of(type), amount, lit(stringify(type))) ) +#define farena_push(arena, type) cast(type*, farena__push(& arena, size_of(type), 1, lit(stringify(type))) ) +#define farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount } inline void api_farena_init(FArena* arena, SliceMem mem) { @@ -508,30 +514,27 @@ void api_farena_init(FArena* arena, SliceMem mem) { inline FArena farena_init(SliceMem mem) { FArena arena; api_farena_init(& arena, mem); return arena; } inline -SliceMem farena__push(FArena* arena, SSIZE type_size, SSIZE amount, Str8 dbg_typename) { - SSIZE to_commit = type_size * amount; - SSIZE unused = arena->capacity - arena->used; +void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) { + USIZE to_commit = type_size * amount; + USIZE unused = arena->capacity - arena->used; assert(to_commit <= unused); - - SliceMem result = {0}; - result.ptr = cast(void*, cast(SSIZE, arena->start) + arena->used); - result.len = to_commit; + void* ptr = cast(void*, cast(USIZE, arena->start) + arena->used); arena->used += to_commit; - return result; + return ptr; } inline void farena_rewind(FArena* arena, ArenaSP savepoint) { - void* end = cast(void*, cast(SSIZE, arena->start) + arena->used); + void* end = cast(void*, cast(USIZE, arena->start) + arena->used); assert_bounds(savepoint.ptr, arena->start, end); - arena->used -= cast(SSIZE, savepoint.ptr) - cast(SSIZE, arena->start); + arena->used -= cast(USIZE, savepoint.ptr) - cast(USIZE, arena->start); } inline void farena_reset(FArena* arena) { arena->used = 0; } inline ArenaSP farena_save (FArena arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; } #pragma endregion FArena -#if defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) +#if defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1) struct WATL_LexInfo { // For now just the tokens @@ -561,8 +564,8 @@ void api_watl_lex(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts) char const* prev = source.ptr; char code = * cursor; - B32 was_text = false; - WATL_Tok* tok = nullptr; + B32 was_formatting = true; + WATL_Tok* tok = nullptr; for (; cursor < end;) { switch (code) @@ -571,35 +574,43 @@ void api_watl_lex(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts) case WATL_Tok_Tab: { if (* prev != * cursor) { - tok = farena_push(arena, WATL_Tok); - tok->code = cursor; - was_text = false; + tok = farena_push(arena, WATL_Tok); + tok->code = cursor; + was_formatting = true; } - cursor += 1; - code = * cursor; + cursor += 1; } - continue; + break; - case WATL_Tok_CarriageReturn: { - // Assumes next is line feed. - cursor += 1; - } case WATL_Tok_LineFeed: { - cursor += 1; - code = * cursor; + tok = farena_push(arena, WATL_Tok); + tok->code = cursor; + cursor += 1; + was_formatting = true; } - continue; + break; + + // Assuming what comes after is line feed. + case WATL_Tok_CarriageReturn: { + tok = farena_push(arena, WATL_Tok); + tok->code = cursor; + cursor += 2; + was_formatting = true; + } + break; default: + { + if (was_formatting) { + tok = farena_push(arena, WATL_Tok); + tok->code = cursor; + was_formatting = false; + } + cursor += 1; + } break; } - if (! was_text) { - tok = farena_push(arena, WATL_Tok); - tok->code = cursor; - was_text = true; - } - prev = cursor; - cursor += 1; + prev = cursor - 1; code = * cursor; } info->tokens.ptr = arena.start; @@ -620,7 +631,7 @@ To allocate onto the heap we'll make a basic slicemem_malloc to allocate, we'll However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination. */ -SliceMem slicemem_alloc(SSIZE amount) +SliceMem slicemem_alloc(USIZE amount) { assert(amount > KILOBTYES(4)); void* result = malloc(amount); @@ -675,11 +686,11 @@ Str8Cache str8cache_init ( SliceMem mem_strs, SliceMem mem_s // For these strings we'll be using a hash called djb8: // Introducing a slice iterator: -#define slice_iter(container, iter) typeof(container.ptr) iter = container.ptr; iter != (container.ptr + container.len); ++ iter +#define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter inline -void hash64_djb8(U64* hash, SliceByte bytes) { - for (slice_iter(bytes, elem)) { +void hash64_djb8(U64* hash, SliceByte const bytes) { + for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) { *hash = (((*hash) << 8) + (*hash)) + (*elem); } } @@ -722,8 +733,10 @@ void api_str8cache_init(Str8Cache* cache, SliceMem mem_strs, SliceMem mem_slots, slice_assert(mem_slots); slice_assert(mem_table); cache->a_str = farena_init(mem_strs); - cache->pool = pcast(Str8Cache_SliceSlot, mem_slots); - cache->table = pcast(Str8Cache_SliceSlot, mem_table); + cache->pool = (Str8Cache_SliceSlot){mem_slots.ptr, mem_slots.len / size_of(Str8Cache_Slot)}; + cache->table = (Str8Cache_SliceSlot){mem_table.ptr, mem_table.len / size_of(Str8Cache_Slot)}; + slice_zero(cache->pool); + slice_zero(cache->table); } void str8cache_clear(Str8Cache* cache) @@ -779,8 +792,8 @@ Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value) if (! surface_slot->occupied || surface_slot->key == key) { if (value.ptr != surface_slot->value.ptr) { - SliceMem mem = farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")); - slice_copy(mem, pcast(SliceMem, value)); + SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len }; + slice_copy(pcast(SliceByte, mem), value); surface_slot->value = pcast(Str8, mem); } surface_slot->key = key; @@ -805,9 +818,9 @@ Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value) if ( ! slot->next->occupied || slot->next->key == key) { if (value.ptr != slot->next->value.ptr) { - SliceMem mem = farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")); - slice_copy(mem, pcast(SliceMem, value)); - slot->next->value = pcast(Str8, mem); + SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len }; + slice_copy(pcast(SliceByte, mem), value); + slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)}; } slot->next->value = value; slot->next->key = key; @@ -832,10 +845,14 @@ Str8 cache_str8(Str8Cache* cache, Str8 str) return * result; } +typedef Str8 WATL_Node; +#if 0 typedef struct WATL_Node WATL_Node; struct WATL_Node { + WATL_NodeKind kind; Str8 entry; }; +#endif typedef struct WATL_Line WATL_Line; struct WATL_Line { @@ -849,7 +866,7 @@ struct WATL_SliceLine { SSIZE len; }; -#ifdef DEMO__WATL_PARSE_V1 +#if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1) struct Opts__watl_parse { SliceMem backing_nodes; @@ -872,6 +889,7 @@ void api_watl_parse(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse WATL_Line* line = farena_push(a_lines, WATL_Line); WATL_Node* curr = farena_push(a_nodes, WATL_Node); // Preemtively allocate a node for the line (may not be used) + * curr = (WATL_Node){0}; line->ptr = curr; line->len = 0; info->lines.ptr = line; @@ -880,21 +898,25 @@ void api_watl_parse(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse { switch (* token->code) { + case WATL_Tok_CarriageReturn: case WATL_Tok_LineFeed: { WATL_Line* new_line = farena_push(a_lines, WATL_Line); line = new_line; line->ptr = curr; + line->len = 0; info->lines.len += 1; - continue; } + continue; default: break; } - curr->entry = watl_tok_str8(tokens, token); - curr = farena_push(a_nodes, WATL_Node); - line->len += 1; + Str8 tok_str = watl_tok_str8(tokens, token); + * curr = cache_str8( opts->str_cache, tok_str ); + curr = farena_push(a_nodes, WATL_Node); + * curr = (WATL_Node){0}; + line->len += 1; continue; } } @@ -914,9 +936,9 @@ int main() SliceMem mem_toks = slicemem_alloc(MEGABYTES(8)); WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks); - SliceMem mem_cache_strs = slicemem_alloc(MEGABYTES(16)); - SliceMem mem_cache_slots = slicemem_alloc(KILOBTYES(512)); - SliceMem mem_cache_table = slicemem_alloc(KILOBTYES(64)); + SliceMem mem_cache_strs = slicemem_alloc(MEGABYTES(64)); + SliceMem mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot)); + SliceMem mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot)); Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table); SliceMem mem_parse_nodes = slicemem_alloc(MEGABYTES(4)); @@ -928,3 +950,182 @@ int main() return 0; } #endif + +/* +Now we'll like to dump this WATL structure into a file. +To do so we'll need to generate the content string of the file dynamically based on the WATL's content. + +We'll be utilizing a new construct called a string generator which be tied to all functionality for constructing strings. +*/ + +typedef struct Str8Gen Str8Gen; +struct Str8Gen { + SliceMem backing; // For V1 the backing buffer is fixed size. + char* ptr; + SSIZE len; +}; + +void str8gen_init(Str8Gen* gen, SliceMem backing); +Str8Gen str8gen_make( SliceMem backing); + +void str8gen_append_str8(Str8Gen* gen, Str8 str); +// void str8gen_append_fmt (Str8Gen* gen, Str8 fmt, ...); + +void str8gen_init(Str8Gen* gen, SliceMem backing) { + assert(gen != nullptr); + gen->backing = backing; + gen->ptr = backing.ptr; + gen->len = 0; +} +Str8Gen str8gen_make(SliceMem backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; } + +void str8gen_append_str8(Str8Gen* gen, Str8 str) { + SSIZE left = gen->backing.len - gen->len; + assert(left >= str.len); + SliceByte dest = {gen->ptr + gen->len, str.len}; + slice_copy(dest, str); + return; +} + +/* +In order to support appending formatted content via str8gen_apppend_fmt, we'll be using a substiution formatter utilizing string identifiation token pattern. + +Where a format template string is provided with a 'id' wrapped in delimiters which will be the angle brackets: +Example: This formatted string will have subsituted into it. +*/ + +typedef struct FmtTokEntry FmtTokEntry; +struct FmtTokEntry { + U64 key; + Str8 value; +}; +typedef struct SliceFmtTokEntry SliceFmtTokEntry; +struct SliceFmtTokEntry { + FmtTokEntry* ptr; + SSIZE len; +}; + +#define slice_end(slice) (slice.ptr + slice.len) + +/* +This is a token substiuting formatter using a array table lookup for tokens to substitute. +*/ +Str8 fmt_vtoken_slice(SliceMem buffer, SliceFmtTokEntry tokens, Str8 fmt_template) +{ + slice_assert(buffer); + slice_assert(tokens); + slice_assert(fmt_template); + + char* cursor_buffer = buffer.ptr; + SSIZE buffer_remaining = buffer.len; + + char curr_code = * fmt_template.ptr; + + char const* cursor_fmt = fmt_template.ptr; + SSIZE left_fmt = fmt_template.len; + while (left_fmt && buffer_remaining) + { + // Forward until we hit the delimiter '<' or the template's contents are exhausted. + while (curr_code && curr_code != '<' && cursor_fmt != slice_end(fmt_template)) + { + * cursor_buffer = * cursor_fmt; + ++ cursor_buffer; + ++ cursor_fmt; + -- buffer_remaining; + -- left_fmt; + + curr_code = * cursor_fmt; + } + + if (curr_code == '<') + { + char const* cursor_potential_token = cursor_fmt + 1; + SSIZE potential_token_length = 0; + + while (* (cursor_potential_token + potential_token_length) != '>') { + ++ potential_token_length; + } + + // Hashing the potential token and cross checking it with our token table + U64 key = 0; hash64_djb8(& key, (SliceByte){cursor_fmt + 1, potential_token_length}); + Str8* value = nullptr; + for (slice_iter(tokens, token)) + { + // We do a linear iteration instead of a hash table lookup because the user should be never substiuting with more than 100 unqiue tokens.. + if (token->key == key) { + value = & token->value; + break; + } + } + + if (value) + { + SSIZE left = value->len; + char const* cursor_value = value->ptr; + + while (left --) + { + * cursor_buffer = * cursor_value; + ++ cursor_buffer; + ++ cursor_value; + -- buffer_remaining; + } + + // Sync cursor format to after the processed token + cursor_fmt = cursor_potential_token + potential_token_length + 1; + curr_code = * cursor_fmt; + left_fmt -= potential_token_length + 2; // The 2 here ar ethe '<' & '>' delimiters being omitted. + continue; + } + + * cursor_buffer = * cursor_fmt; + ++ cursor_buffer; + ++ cursor_fmt; + -- buffer_remaining; + -- left_fmt; + + curr_code = * cursor_fmt; + } + } + Str8 result = {buffer.ptr, buffer.len - buffer_remaining}; + return result; +} + +typedef struct SliceStr8 SliceStr8; +struct SliceStr8 { + Str8* ptr; + SSIZE len; +}; + +Str8 fmt__vtoken(SliceMem backing, Str8 fmt_template, SliceStr8* tokens) +{ + FArena a_backing = farena_init(backing); + SliceFmtTokEntry table = {a_backing.start, 0}; + + S32 left = tokens->len; + for (slice_iter(*tokens, token)) { + FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry); + * entry = (FmtTokEntry){0}; + hash64_djb8(& entry->key, (SliceByte){token->ptr, token->len}); + ++ token; + entry->value = * token; + ++ table.len; + } + SliceMem buffer = { .ptr = cast(U8*, a_backing.start) + a_backing.used, .len = a_backing.capacity - a_backing.used }; + Str8 result = fmt_vtoken_slice(buffer, table, fmt_template); + return result; +} + +#define fmt_vtoken(backing, fmt_template, tokens) fmt__vtoken(backing, fmt_template, &(SliceStr8){.ptr = tokens, .len = size_of(tokens) / size_of(Str8) }) + +#ifdef DEMO__WATL_DUMP_V1 +int main() +{ + SliceMem scratch = slicemem_alloc(MEGABYTES(64)); + Str8 mappings [] = { + lit("maybe_sub"), lit("IT SUBST!!!!"), + }; + Str8 test_str = fmt_vtoken(scratch, lit("Will this work? "), mappings); + return 0; +} +#endif