This commit is contained in:
Edward R. Gonzalez 2025-05-06 04:21:03 -04:00
parent 6bef824dce
commit f87a098fe4

View File

@ -41,7 +41,8 @@ int main()
// #define DEMO__STR_SLICE
// #define DEMO__FILE_READ_CONTENTS_V1
// #define DEMO__WATL_LEX_V1
#define DEMO__WATL_PARSE_V1
// #define DEMO__WATL_PARSE_V1
#define DEMO__WATL_DUMP_V1
/*
The above makes use of the following core concepts to achieve its net result:
@ -94,7 +95,6 @@ enum {
true,
true_overflow,
};
typedef S8 B8;
typedef S16 B16;
typedef S32 B32;
@ -102,9 +102,9 @@ typedef S32 B32;
// Common macros we'll use throughout this.
#define assert_bounds(point, start, end) do { \
SSIZE pos_point = cast(SSIZE, point); \
SSIZE pos_start = cast(SSIZE, start); \
SSIZE pos_end = cast(SSIZE, end); \
USIZE pos_point = cast(USIZE, point); \
USIZE pos_start = cast(USIZE, start); \
USIZE pos_end = cast(USIZE, end); \
assert(pos_start <= pos_point); \
assert(pos_point <= pos_end); \
} while(0)
@ -140,7 +140,7 @@ struct Str8 {
#define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 }
// For now this string can visualized using a debugger.
#ifdef DEMO__STR_SLICE
#ifdef DEMO__STR_SLICE
int main()
{
Str8 first = lit("Our first string as a slice");
@ -238,10 +238,10 @@ HANDLE CreateFileA(
// We need to covert our string slice to a c-string for CreateFileA's path input.
#define KILOBTYES(n) (cast(SSIZE, n) << 10)
#define MEGABYTES(n) (cast(SSIZE, n) << 20)
#define GIGABYTES(n) (cast(SSIZE, n) << 30)
#define TERABYTES(n) (cast(SSIZE, n) << 40)
#define KILOBTYES(n) (cast(USIZE, n) << 10)
#define MEGABYTES(n) (cast(USIZE, n) << 20)
#define GIGABYTES(n) (cast(USIZE, n) << 30)
#define TERABYTES(n) (cast(USIZE, n) << 40)
/*
We'll be defining here Fixed-sized memory blocks using typedefs on-demand
@ -257,7 +257,7 @@ typedef U8 FMem_64KB [ KILOBTYES(64) ];
#define fmem_slice(mem) (SliceMem) { mem, size_of(mem) }
// We'll be using an intrinsic for copying memory:
void* memory_copy(void* dest, void const* src, SSIZE length)
void* memory_copy(void* dest, void const* src, USIZE length)
{
if (dest == nullptr || src == nullptr || length == 0) {
return nullptr;
@ -273,12 +273,16 @@ void* memory_copy(void* dest, void const* src, SSIZE length)
assert(slice.len > 0); \
} while(0)
void slice_copy(SliceMem dest, SliceMem src) {
void slice__copy(SliceMem dest, SSIZE const dest_typewidth, SliceMem const src, SSIZE const src_typewidth) {
assert(dest.len >= src.len);
slice_assert(dest);
slice_assert(src);
memory_copy(dest.ptr, src.ptr, src.len);
}
#define slice_copy(dest,src) slice__copy( \
(SliceMem ){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \
, (SliceMem const){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \
)
// Assumes memory is zeroed.
char const* str8_to_cstr_capped(Str8 content, SliceMem mem) {
@ -288,22 +292,23 @@ char const* str8_to_cstr_capped(Str8 content, SliceMem mem) {
}
// To support zeroing slices we'll utilize an intrinisc.
B32 memory_zero(void* dest, SSIZE length) {
B32 memory_zero(void* dest, USIZE const length) {
if (dest == nullptr || length <= 0) {
return false;
}
__stosd((unsigned long*)dest, 0, length);
__stosb((unsigned char*)dest, 0, length);
return true;
}
void slice_zero(SliceMem mem) {
void slice__zero(SliceMem mem, SSIZE typewidth) {
slice_assert(mem);
memory_zero(mem.ptr, mem.len);
}
#define slice_zero(slice) slice__zero((SliceMem){(slice).ptr, (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr))
// Now for our "Version 1"
#if defined(DEMO__FILE_READ_CONTENTS_V1) || defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1)
#if defined(DEMO__FILE_READ_CONTENTS_V1) || defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)
struct FileOpResult
{
@ -364,10 +369,10 @@ void api_file_read_contents(FileOpResult* result, Str8 path, Opts__read_file_con
}
if (opts->zero_backing) {
slice_zero(opts->backing);
slice_zero(pcast(SliceByte, opts->backing));
}
DWORD amount_read = 0;
DWORD amount_read = 0;
BOOL read_result = ReadFile(
id_file,
opts->backing.ptr,
@ -455,17 +460,18 @@ struct WATL_SliceTok {
};
Str8 watl_tok_str8(WATL_SliceTok toks, WATL_Tok* tok) {
SSIZE start = cast(SSIZE, toks.ptr);
SSIZE curr = cast(SSIZE, tok->code);
SSIZE offset = curr - start;
WATL_Tok* next = tok + 1;
USIZE start = cast(USIZE, toks.ptr->code);
USIZE curr = cast(USIZE, tok->code);
USIZE offset = curr - start;
SSIZE left = toks.len - offset;
B32 last_tok = (start + toks.len) == (curr + left);
Str8 text = {0};
text.ptr = tok->code;
text.len = last_tok ?
left
text.len = next > (toks.ptr + toks.len) ?
left
// Othwerise its the last minus the curr.
: cast(SSIZE, (tok + 1) - curr);
: cast(SSIZE, next->code - tok->code);
return text;
}
@ -486,18 +492,18 @@ struct ArenaSP { void* ptr; };
typedef struct FArena FArena;
struct FArena {
void* start;
SSIZE capacity;
SSIZE used;
USIZE capacity;
USIZE used;
};
void api_farena_init(FArena* arena, SliceMem mem);
FArena farena_init (SliceMem mem);
SliceMem farena__push (FArena* arena, SSIZE type_size, SSIZE amount, Str8 dbg_typename);
void* farena__push (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename);
void farena_reset (FArena* arena);
void farena_rewind (FArena* arena, ArenaSP savepoint);
ArenaSP farena_save (FArena arena);
#define farena_push(arena, type) cast(type*, farena__push(& arena, size_of(type), 1, lit(stringify(type))).ptr)
#define farena_push_array(arena, type, amount) pcast(Slice ## type, farena__push(& arena, size_of(type), amount, lit(stringify(type))) )
#define farena_push(arena, type) cast(type*, farena__push(& arena, size_of(type), 1, lit(stringify(type))) )
#define farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount }
inline
void api_farena_init(FArena* arena, SliceMem mem) {
@ -508,30 +514,27 @@ void api_farena_init(FArena* arena, SliceMem mem) {
inline FArena farena_init(SliceMem mem) { FArena arena; api_farena_init(& arena, mem); return arena; }
inline
SliceMem farena__push(FArena* arena, SSIZE type_size, SSIZE amount, Str8 dbg_typename) {
SSIZE to_commit = type_size * amount;
SSIZE unused = arena->capacity - arena->used;
void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) {
USIZE to_commit = type_size * amount;
USIZE unused = arena->capacity - arena->used;
assert(to_commit <= unused);
SliceMem result = {0};
result.ptr = cast(void*, cast(SSIZE, arena->start) + arena->used);
result.len = to_commit;
void* ptr = cast(void*, cast(USIZE, arena->start) + arena->used);
arena->used += to_commit;
return result;
return ptr;
}
inline
void farena_rewind(FArena* arena, ArenaSP savepoint) {
void* end = cast(void*, cast(SSIZE, arena->start) + arena->used);
void* end = cast(void*, cast(USIZE, arena->start) + arena->used);
assert_bounds(savepoint.ptr, arena->start, end);
arena->used -= cast(SSIZE, savepoint.ptr) - cast(SSIZE, arena->start);
arena->used -= cast(USIZE, savepoint.ptr) - cast(USIZE, arena->start);
}
inline void farena_reset(FArena* arena) { arena->used = 0; }
inline ArenaSP farena_save (FArena arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; }
#pragma endregion FArena
#if defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1)
#if defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)
struct WATL_LexInfo {
// For now just the tokens
@ -561,8 +564,8 @@ void api_watl_lex(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts)
char const* prev = source.ptr;
char code = * cursor;
B32 was_text = false;
WATL_Tok* tok = nullptr;
B32 was_formatting = true;
WATL_Tok* tok = nullptr;
for (; cursor < end;)
{
switch (code)
@ -571,35 +574,43 @@ void api_watl_lex(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts)
case WATL_Tok_Tab:
{
if (* prev != * cursor) {
tok = farena_push(arena, WATL_Tok);
tok->code = cursor;
was_text = false;
tok = farena_push(arena, WATL_Tok);
tok->code = cursor;
was_formatting = true;
}
cursor += 1;
code = * cursor;
cursor += 1;
}
continue;
break;
case WATL_Tok_CarriageReturn: {
// Assumes next is line feed.
cursor += 1;
}
case WATL_Tok_LineFeed: {
cursor += 1;
code = * cursor;
tok = farena_push(arena, WATL_Tok);
tok->code = cursor;
cursor += 1;
was_formatting = true;
}
continue;
break;
// Assuming what comes after is line feed.
case WATL_Tok_CarriageReturn: {
tok = farena_push(arena, WATL_Tok);
tok->code = cursor;
cursor += 2;
was_formatting = true;
}
break;
default:
{
if (was_formatting) {
tok = farena_push(arena, WATL_Tok);
tok->code = cursor;
was_formatting = false;
}
cursor += 1;
}
break;
}
if (! was_text) {
tok = farena_push(arena, WATL_Tok);
tok->code = cursor;
was_text = true;
}
prev = cursor;
cursor += 1;
prev = cursor - 1;
code = * cursor;
}
info->tokens.ptr = arena.start;
@ -620,7 +631,7 @@ To allocate onto the heap we'll make a basic slicemem_malloc to allocate, we'll
However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination.
*/
SliceMem slicemem_alloc(SSIZE amount)
SliceMem slicemem_alloc(USIZE amount)
{
assert(amount > KILOBTYES(4));
void* result = malloc(amount);
@ -675,11 +686,11 @@ Str8Cache str8cache_init ( SliceMem mem_strs, SliceMem mem_s
// For these strings we'll be using a hash called djb8:
// Introducing a slice iterator:
#define slice_iter(container, iter) typeof(container.ptr) iter = container.ptr; iter != (container.ptr + container.len); ++ iter
#define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter
inline
void hash64_djb8(U64* hash, SliceByte bytes) {
for (slice_iter(bytes, elem)) {
void hash64_djb8(U64* hash, SliceByte const bytes) {
for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) {
*hash = (((*hash) << 8) + (*hash)) + (*elem);
}
}
@ -722,8 +733,10 @@ void api_str8cache_init(Str8Cache* cache, SliceMem mem_strs, SliceMem mem_slots,
slice_assert(mem_slots);
slice_assert(mem_table);
cache->a_str = farena_init(mem_strs);
cache->pool = pcast(Str8Cache_SliceSlot, mem_slots);
cache->table = pcast(Str8Cache_SliceSlot, mem_table);
cache->pool = (Str8Cache_SliceSlot){mem_slots.ptr, mem_slots.len / size_of(Str8Cache_Slot)};
cache->table = (Str8Cache_SliceSlot){mem_table.ptr, mem_table.len / size_of(Str8Cache_Slot)};
slice_zero(cache->pool);
slice_zero(cache->table);
}
void str8cache_clear(Str8Cache* cache)
@ -779,8 +792,8 @@ Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
if (! surface_slot->occupied || surface_slot->key == key)
{
if (value.ptr != surface_slot->value.ptr) {
SliceMem mem = farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8"));
slice_copy(mem, pcast(SliceMem, value));
SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
slice_copy(pcast(SliceByte, mem), value);
surface_slot->value = pcast(Str8, mem);
}
surface_slot->key = key;
@ -805,9 +818,9 @@ Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
if ( ! slot->next->occupied || slot->next->key == key)
{
if (value.ptr != slot->next->value.ptr) {
SliceMem mem = farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8"));
slice_copy(mem, pcast(SliceMem, value));
slot->next->value = pcast(Str8, mem);
SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
slice_copy(pcast(SliceByte, mem), value);
slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)};
}
slot->next->value = value;
slot->next->key = key;
@ -832,10 +845,14 @@ Str8 cache_str8(Str8Cache* cache, Str8 str)
return * result;
}
typedef Str8 WATL_Node;
#if 0
typedef struct WATL_Node WATL_Node;
struct WATL_Node {
WATL_NodeKind kind;
Str8 entry;
};
#endif
typedef struct WATL_Line WATL_Line;
struct WATL_Line {
@ -849,7 +866,7 @@ struct WATL_SliceLine {
SSIZE len;
};
#ifdef DEMO__WATL_PARSE_V1
#if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)
struct Opts__watl_parse {
SliceMem backing_nodes;
@ -872,6 +889,7 @@ void api_watl_parse(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse
WATL_Line* line = farena_push(a_lines, WATL_Line);
WATL_Node* curr = farena_push(a_nodes, WATL_Node); // Preemtively allocate a node for the line (may not be used)
* curr = (WATL_Node){0};
line->ptr = curr;
line->len = 0;
info->lines.ptr = line;
@ -880,21 +898,25 @@ void api_watl_parse(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse
{
switch (* token->code)
{
case WATL_Tok_CarriageReturn:
case WATL_Tok_LineFeed: {
WATL_Line* new_line = farena_push(a_lines, WATL_Line);
line = new_line;
line->ptr = curr;
line->len = 0;
info->lines.len += 1;
continue;
}
continue;
default:
break;
}
curr->entry = watl_tok_str8(tokens, token);
curr = farena_push(a_nodes, WATL_Node);
line->len += 1;
Str8 tok_str = watl_tok_str8(tokens, token);
* curr = cache_str8( opts->str_cache, tok_str );
curr = farena_push(a_nodes, WATL_Node);
* curr = (WATL_Node){0};
line->len += 1;
continue;
}
}
@ -914,9 +936,9 @@ int main()
SliceMem mem_toks = slicemem_alloc(MEGABYTES(8));
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
SliceMem mem_cache_strs = slicemem_alloc(MEGABYTES(16));
SliceMem mem_cache_slots = slicemem_alloc(KILOBTYES(512));
SliceMem mem_cache_table = slicemem_alloc(KILOBTYES(64));
SliceMem mem_cache_strs = slicemem_alloc(MEGABYTES(64));
SliceMem mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
SliceMem mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot));
Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
SliceMem mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
@ -928,3 +950,182 @@ int main()
return 0;
}
#endif
/*
Now we'll like to dump this WATL structure into a file.
To do so we'll need to generate the content string of the file dynamically based on the WATL's content.
We'll be utilizing a new construct called a string generator which be tied to all functionality for constructing strings.
*/
typedef struct Str8Gen Str8Gen;
struct Str8Gen {
SliceMem backing; // For V1 the backing buffer is fixed size.
char* ptr;
SSIZE len;
};
void str8gen_init(Str8Gen* gen, SliceMem backing);
Str8Gen str8gen_make( SliceMem backing);
void str8gen_append_str8(Str8Gen* gen, Str8 str);
// void str8gen_append_fmt (Str8Gen* gen, Str8 fmt, ...);
void str8gen_init(Str8Gen* gen, SliceMem backing) {
assert(gen != nullptr);
gen->backing = backing;
gen->ptr = backing.ptr;
gen->len = 0;
}
Str8Gen str8gen_make(SliceMem backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; }
void str8gen_append_str8(Str8Gen* gen, Str8 str) {
SSIZE left = gen->backing.len - gen->len;
assert(left >= str.len);
SliceByte dest = {gen->ptr + gen->len, str.len};
slice_copy(dest, str);
return;
}
/*
In order to support appending formatted content via str8gen_apppend_fmt, we'll be using a substiution formatter utilizing string identifiation token pattern.
Where a format template string is provided with a 'id' wrapped in delimiters which will be the angle brackets: <id>
Example: This formatted string will have <id> subsituted into it.
*/
typedef struct FmtTokEntry FmtTokEntry;
struct FmtTokEntry {
U64 key;
Str8 value;
};
typedef struct SliceFmtTokEntry SliceFmtTokEntry;
struct SliceFmtTokEntry {
FmtTokEntry* ptr;
SSIZE len;
};
#define slice_end(slice) (slice.ptr + slice.len)
/*
This is a token substiuting formatter using a array table lookup for tokens to substitute.
*/
Str8 fmt_vtoken_slice(SliceMem buffer, SliceFmtTokEntry tokens, Str8 fmt_template)
{
slice_assert(buffer);
slice_assert(tokens);
slice_assert(fmt_template);
char* cursor_buffer = buffer.ptr;
SSIZE buffer_remaining = buffer.len;
char curr_code = * fmt_template.ptr;
char const* cursor_fmt = fmt_template.ptr;
SSIZE left_fmt = fmt_template.len;
while (left_fmt && buffer_remaining)
{
// Forward until we hit the delimiter '<' or the template's contents are exhausted.
while (curr_code && curr_code != '<' && cursor_fmt != slice_end(fmt_template))
{
* cursor_buffer = * cursor_fmt;
++ cursor_buffer;
++ cursor_fmt;
-- buffer_remaining;
-- left_fmt;
curr_code = * cursor_fmt;
}
if (curr_code == '<')
{
char const* cursor_potential_token = cursor_fmt + 1;
SSIZE potential_token_length = 0;
while (* (cursor_potential_token + potential_token_length) != '>') {
++ potential_token_length;
}
// Hashing the potential token and cross checking it with our token table
U64 key = 0; hash64_djb8(& key, (SliceByte){cursor_fmt + 1, potential_token_length});
Str8* value = nullptr;
for (slice_iter(tokens, token))
{
// We do a linear iteration instead of a hash table lookup because the user should be never substiuting with more than 100 unqiue tokens..
if (token->key == key) {
value = & token->value;
break;
}
}
if (value)
{
SSIZE left = value->len;
char const* cursor_value = value->ptr;
while (left --)
{
* cursor_buffer = * cursor_value;
++ cursor_buffer;
++ cursor_value;
-- buffer_remaining;
}
// Sync cursor format to after the processed token
cursor_fmt = cursor_potential_token + potential_token_length + 1;
curr_code = * cursor_fmt;
left_fmt -= potential_token_length + 2; // The 2 here ar ethe '<' & '>' delimiters being omitted.
continue;
}
* cursor_buffer = * cursor_fmt;
++ cursor_buffer;
++ cursor_fmt;
-- buffer_remaining;
-- left_fmt;
curr_code = * cursor_fmt;
}
}
Str8 result = {buffer.ptr, buffer.len - buffer_remaining};
return result;
}
typedef struct SliceStr8 SliceStr8;
struct SliceStr8 {
Str8* ptr;
SSIZE len;
};
Str8 fmt__vtoken(SliceMem backing, Str8 fmt_template, SliceStr8* tokens)
{
FArena a_backing = farena_init(backing);
SliceFmtTokEntry table = {a_backing.start, 0};
S32 left = tokens->len;
for (slice_iter(*tokens, token)) {
FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry);
* entry = (FmtTokEntry){0};
hash64_djb8(& entry->key, (SliceByte){token->ptr, token->len});
++ token;
entry->value = * token;
++ table.len;
}
SliceMem buffer = { .ptr = cast(U8*, a_backing.start) + a_backing.used, .len = a_backing.capacity - a_backing.used };
Str8 result = fmt_vtoken_slice(buffer, table, fmt_template);
return result;
}
#define fmt_vtoken(backing, fmt_template, tokens) fmt__vtoken(backing, fmt_template, &(SliceStr8){.ptr = tokens, .len = size_of(tokens) / size_of(Str8) })
#ifdef DEMO__WATL_DUMP_V1
int main()
{
SliceMem scratch = slicemem_alloc(MEGABYTES(64));
Str8 mappings [] = {
lit("maybe_sub"), lit("IT SUBST!!!!"),
};
Str8 test_str = fmt_vtoken(scratch, lit("Will this work? <maybe_sub>"), mappings);
return 0;
}
#endif