C_Intro/demo.str_cache.c

/*
A introduction to C11 with a str cache demo.
Attempting to showcase better conventions and constructs in C; Discovered to me as of 2025 from scouring the internet.
*/

/*
The below will be implemented within this single file.
Because of this, definitions will be kept on a need-to-have basis to target only one vendor target and toolchain.
We will not use nearly any libraries and will be targeting only Windows 11 x64 using MSVC.

Even so the constructs defined and their dependencies can be properly abstracted into a ergonomic library for multiple targets with enough time and pain.
The difference is just more preprocess conditionals, and how far a library is trying to support a larger range of targets and their age discrpancy.
The more minimal the less cruft.

Definitions are defined linearly on the file on-demand as needed. Since the file is to be read linearly.
This will cause non-categorical organization so it will be more difficult to sift through if you wanted
to see definitions related to a sepecific kind of data or operation (strings, memory, etc).
*/
#if 0
int main()
{
	VArena   cache_arena; varena_init(cache_arena);
	StrCache cache = strcache_init(varena_ainfo(cache));

	VArena      file_arena; varena_init(file_arena);
	Str8         path_text = lit("../demo.strcache.c");
	FileContent text_file = file_read_contents(varena_ainfo(file_arena), path_text);

	Arena ast_arena; arena_init(ast_arena);

	WATL_ParseOps   ops    = { .str_cache = &cache, .node_backing = arena_ainfo(ast_arena) }
	WATL_ParsedInfo parsed = watl_parse(text_file.content, ops);

	watl_dbg_dump(parsed.root);
	strcache_dbg_listing(cache);
	return 0;
}
#endif

// Demo selection
// #define DEMO__STR_SLICE
// #define DEMO__FILE_READ_CONTENTS_V1
// #define DEMO__WATL_LEX_V1
// #define DEMO__WATL_PARSE_V1
#define DEMO__WATL_DUMP_V1

/*
The above makes use of the following core concepts to achieve its net result:
* Slices
* Arenas
* Generic Runtime Allocator Interface
* Hashing

Secondarily for the purposes of using the above sufficiently the following are also utilized:
* Virtual Address Space
* Read/Write Files
* Lexing & Parsing
* Debug printing

TODO(Ed): Do we introduce gencpp in this?
*/

/*
First thing we'll problably want is a way to deal with text effectively.
So we'll setup the the minimum for that when dealing with immutable constructs.
*/

// We'll need some minimum set of dependencies to adequately define the constructs.
// ASSUMING MODERN MSVC TOOLCHAIN.

#include <stdarg.h>
#include <stddef.h>

#include <intrin.h>
#include <tmmintrin.h>
#include <wmmintrin.h>

#include <assert.h>
// #include <stdbool.h>

typedef unsigned __int8  U8;
typedef signed   __int8  S8;
typedef unsigned __int16 U16;
typedef signed   __int16 S16;
typedef unsigned __int32 U32;
typedef signed   __int32 S32;
typedef unsigned __int64 U64;
typedef signed   __int64 S64;

typedef size_t    USIZE;
typedef ptrdiff_t SSIZE;

enum {
	false,
	true,
	true_overflow,
};
typedef S8  B8;
typedef S16 B16;
typedef S32 B32;

// Common macros we'll use throughout this.

#define assert_bounds(point, start, end) do { \
	USIZE pos_point = cast(USIZE, point);     \
	USIZE pos_start = cast(USIZE, start);     \
	USIZE pos_end   = cast(USIZE, end);       \
	assert(pos_start <= pos_point);           \
	assert(pos_point <= pos_end);             \
} while(0)

// Functional style cast
#define cast(type, data)  ((type)(data))
#define pcast(type, data) * cast(type*, & data)

#define nullptr cast(void*, 0)

#define glue_(A, B) A ## B
#define glue(A, B)  glue_(A,B)

// Enforces size querying uses SSIZE type.
#define size_of(data) cast(SSIZE, sizeof(data))

#define stringify_(S) #S
#define stringify(S)  stringify_(S)

/*
The first construct we'll utilize is a String Slice.
In modern programming with the memory sizes utilized, it is more ergonomic to track the length of strings with their pointer.
Most strings are not stored in some immutable table tracked statically, performance loss in doing so is negligble on modern hardware constraints.
*/

typedef struct Str8 Str8;
struct Str8 {
	char const* ptr;
	SSIZE       len;
};

// String iterals in C include null-terminators, we aren't interested in preserving that.
#define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 }

// For now this string can visualized using a debugger.
#ifdef DEMO__STR_SLICE
int main()
{
	Str8 first = lit("Our first string as a slice");
	return 0;
}
#endif DEMO__STR_SLICE

/*
We now want to be able to read a file. This will be a heavy rabbit-hole as we'll need to setup a basic file interface
and related definitions for handling the memory.

For the purposes of the initial definition we'll introduced fixed-sized memory handling statically allocated onto the stack.
*/

/*
First off we need to find out how to aquire the contents of a file on Windows.

We'll be wrapping the operation in a procedure called file_read_contents. We'll have it take a path and optional arguments (Opts__read_file_contents).
It will return a result in a composite struct: FileOpResult; which may be expanded as needed in the future.
*/

typedef struct FileOpResult             FileOpResult;
typedef struct Opts__read_file_contents Opts__read_file_contents;
void         api_file_read_contents(FileOpResult* result, Str8 path, Opts__read_file_contents* opts);
FileOpResult file__read_contents   (                      Str8 path, Opts__read_file_contents* opts);
#define file_read_contents(path, ...) file__read_contents(path, & (Opts__read_file_contents){__VA_ARGS__} )

/*
The file contents will be returned in bytes.
To view or manage any slice of bytes we'll be utilizing a byte slice.
*/
typedef struct SliceByte SliceByte;
struct SliceByte {
	U8*   ptr;
	SSIZE len;
};

/*
To address memory we'll use a memory slice.
*/
typedef struct SliceMem SliceMem;
struct SliceMem {
	void* ptr;
	SSIZE len;
};

/*
The above is a pattern that can be provided so that whether or not the result is formatted and provided to the user via the stack is entirely optional.
It also allows for default parameters to be defined conviently.
*/

// We'll utilize the ReadFile procedure within the WinAPI: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
#define NOMINMAX
#define WIN32_LEAN_AND_MEAN
#define WIN32_MEAN_AND_LEAN
#define VC_EXTRALEAN
#include <windows.h>
#include <windowsx.h>
#include <timeapi.h>
#include <tlhelp32.h>
#include <Shlobj.h>
#include <processthreadsapi.h>
#pragma comment(lib, "user32")
#pragma comment(lib, "winmm")
#pragma comment(lib, "shell32")
#pragma comment(lib, "advapi32")
#pragma comment(lib, "rpcrt4")
#pragma comment(lib, "shlwapi")
#pragma comment(lib, "comctl32")
#pragma comment(linker,"\"/manifestdependency:type='win32' name='Microsoft.Windows.Common-Controls' version='6.0.0.0' processorArchitecture='*' publicKeyToken='6595b64144ccf1df' language='*'\"") // this is required for loading correct comctl32 dll file
#undef NOMINMAX
#undef WIN32_LEAN_AND_MEAN
#undef WIN32_MEAN_AND_LEAN
#undef VC_EXTRALEAN
#if 0
BOOL ReadFile(
  [in]                HANDLE       hFile,
  [out]               LPVOID       lpBuffer,
  [in]                DWORD        nNumberOfBytesToRead,
  [out, optional]     LPDWORD      lpNumberOfBytesRead,
  [in, out, optional] LPOVERLAPPED lpOverlapped
);

// In order to read a file we need a handle to a valid filesystem entity to read from: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
HANDLE CreateFileA(
  [in]           LPCSTR                lpFileName,
  [in]           DWORD                 dwDesiredAccess,
  [in]           DWORD                 dwShareMode,
  [in, optional] LPSECURITY_ATTRIBUTES lpSecurityAttributes,
  [in]           DWORD                 dwCreationDisposition,
  [in]           DWORD                 dwFlagsAndAttributes,
  [in, optional] HANDLE                hTemplateFile
);
#endif

// We need to covert our string slice to a c-string for CreateFileA's path input.

#define KILOBTYES(n) (cast(USIZE, n) << 10)
#define MEGABYTES(n) (cast(USIZE, n) << 20)
#define GIGABYTES(n) (cast(USIZE, n) << 30)
#define TERABYTES(n) (cast(USIZE, n) << 40)

/*
We'll be defining here Fixed-sized memory blocks using typedefs on-demand

They will having the following format:
typedef U8 FMem_<size>KB [ <U8 amount> ];
*/

typedef U8 FMem_16KB [ KILOBTYES(16) ];
typedef U8 FMem_64KB [ KILOBTYES(64) ];

#define typeof          __typeof__
#define fmem_slice(mem) (SliceMem) { mem, size_of(mem) }

// We'll be using an intrinsic for copying memory:
void* memory_copy(void* dest, void const* src, USIZE length)
{
	if (dest == nullptr || src == nullptr || length == 0) {
		return nullptr;
	}
	// https://learn.microsoft.com/en-us/cpp/intrinsics/movsb?view=msvc-170
	__movsb((unsigned char*)dest, (const unsigned char*)src, length);
	return dest;
}

// Often we'll want to check validity of a slice:
#define slice_assert(slice) do {  \
	assert(slice.ptr != nullptr); \
	assert(slice.len > 0);        \
} while(0)

void slice__copy(SliceMem dest, SSIZE const dest_typewidth, SliceMem const src, SSIZE const src_typewidth) {
	assert(dest.len >= src.len);
	slice_assert(dest);
	slice_assert(src);
	memory_copy(dest.ptr, src.ptr, src.len);
}
#define slice_copy(dest,src) slice__copy(                                                 \
	(SliceMem      ){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \
,	(SliceMem const){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \
)

// Assumes memory is zeroed.
char const* str8_to_cstr_capped(Str8 content, SliceMem mem) {
	assert(mem.len >= content.len);
	memory_copy(mem.ptr, content.ptr, content.len);
	return mem.ptr;
}

// To support zeroing slices we'll utilize an intrinisc.
B32 memory_zero(void* dest, USIZE const length) {
	if (dest == nullptr || length <= 0) {
		return false;
	}
	__stosb((unsigned char*)dest, 0, length);
	return true;
}

void slice__zero(SliceMem mem, SSIZE typewidth) {
	slice_assert(mem);
	memory_zero(mem.ptr, mem.len);
}
#define slice_zero(slice) slice__zero((SliceMem){(slice).ptr, (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr))

// Now for our "Version 1"

#if defined(DEMO__FILE_READ_CONTENTS_V1) || defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)

struct FileOpResult
{
	// For now we'll just have the content
	SliceByte content;
};

struct Opts__read_file_contents
{
	// For now we'll just have the backing memory provided as a slice.
	SliceMem backing;
	// And whether we should zero the backing.
	B32 zero_backing;
};

void api_file_read_contents(FileOpResult* result, Str8 path, Opts__read_file_contents* opts)
{
	assert(result != nullptr);
	assert(opts   != nullptr);
	slice_assert(path);
	// Backing is required at this point
	slice_assert(opts->backing);

	// This will limit a path for V1 to be 16kb worth of codepoints.
	FMem_16KB   scratch   = {0};
	char const* path_cstr = str8_to_cstr_capped(path, fmem_slice(scratch) );

	HANDLE id_file = CreateFileA(
		path_cstr,
		GENERIC_READ,
		FILE_SHARE_READ,
		NULL,
		OPEN_EXISTING,
		FILE_ATTRIBUTE_NORMAL,
		NULL
	);
	B32 open_failed = id_file == INVALID_HANDLE_VALUE;
	if (open_failed) {
		DWORD  error_code = GetLastError();
		assert(error_code != 0);
		return;
	}

	LARGE_INTEGER file_size = {0};
	DWORD get_size_failed = ! GetFileSizeEx(id_file, & file_size);
	if   (get_size_failed) {
		assert(get_size_failed == INVALID_FILE_SIZE);
		return;
	}

	// Because we are currently using fixed size memory, we need to confirm that we can hold this content.
	B32 not_enough_backing = opts->backing.len < file_size.QuadPart;
	if (not_enough_backing) {
		assert(not_enough_backing);
		// Otherwise we don't provide a result.
		result->content = (SliceByte){0};
		return;
	}

	if (opts->zero_backing) {
          slice_zero(pcast(SliceByte, opts->backing));
	}

        DWORD amount_read = 0;
	BOOL read_result = ReadFile(
		id_file,
		opts->backing.ptr,
		file_size.QuadPart,
		& amount_read,
		nullptr
	);
	CloseHandle(id_file);

	B32 read_failed  = ! read_result;
	    read_failed |= amount_read != file_size.QuadPart;
	if (read_failed) {
		assert(read_failed);
		return;
	}

	result->content.ptr = opts->backing.ptr;
	result->content.len = file_size.QuadPart;
	return;
}

#endif DEMO__FILE_READ_CONTENTS_V1

// Version agnostic code:
inline
FileOpResult file__read_contents(Str8 path, Opts__read_file_contents* opts) {
	FileOpResult result;
	api_file_read_contents(& result, path, opts);
	return result;
}

// And now to put it all together into a test run in the debugger. Content should be properly formatted if the code is correct.
#ifdef DEMO__FILE_READ_CONTENTS_V1
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_64KB    read_mem = {0};
	FileOpResult res      = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
	return 0;
}
#endif DEMO__FILE_READ_CONTENTS_V1

/*
Now that we have file reading done we need to be able to process the content.

First we want to do lexical analysis. So we'll create a token listing delimiting aspects of the text file relevant to us.
For our data structure, we are going for a Whitespace-Aware Text Layout; where we'll track text and the formatting around them.

Just like with the read file contents operation, we'll define an interface to performing this analysis.
It will be called watl_lex and take the SliceMem from the file as a Str8 slice and some Opts__watl_lex;
returning a WATL_LexInfo for providing user info on how the operation went.
*/

typedef struct WATL_LexInfo WATL_LexInfo;
typedef struct Opts__watl_lex Opts__watl_lex;

void         api_watl_lex(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts);
WATL_LexInfo watl__lex   (                    Str8 source, Opts__watl_lex* opts);
#define watl_lex(source, ...) watl__lex(source, &(Opts__watl_lex){__VA_ARGS__})

/*
Token identification will be done using a WATL_TokKind enumeration.
The token type itself will be the id along with a ptr to its start of the slice. We can resolve the width of the token by its delta to the next token.
If its the last token, then its delta is determined by its offset to the end of the Str8 slice.
*/

typedef U32 WATL_TokKind;
enum WATL_TokKind {
	WATL_Tok_Space          = ' ',
	WATL_Tok_Tab            = '\t',
	WATL_Tok_CarriageReturn = '\r',
	WATL_Tok_LineFeed       = '\n',
	WATL_Tok_Text           = 0xFFFFFFFF,
};

typedef struct WATL_Tok WATL_Tok;
struct WATL_Tok {
	char const* code;
};

typedef struct WATL_SliceTok WATL_SliceTok;
struct WATL_SliceTok {
	WATL_Tok* ptr;
	SSIZE     len;
};

Str8 watl_tok_str8(WATL_SliceTok toks, WATL_Tok* tok) {
	WATL_Tok* next = tok + 1;
	USIZE start    = cast(USIZE, toks.ptr->code);
	USIZE curr     = cast(USIZE, tok->code);
	USIZE offset   = curr - start;
	SSIZE left     = toks.len - offset;
	B32   last_tok = (start + toks.len) == (curr + left);
	Str8  text     = {0};
	text.ptr = tok->code;
	text.len = next > (toks.ptr + toks.len) ?
		left
	// Othwerise its the last minus the curr.
	:	cast(SSIZE, next->code - tok->code);
	return text;
}

/*
Tokens are allocated to a backing slice of memory defined by the user. This pool of memory will ideally not be constrained to a fixed size on the stack.
So for V1 we'll allocate 10 megs of heap memory to act as a pool for the tokens. We'll keep track of how much for the pool we used via a new memory tracking construct:
The fixed-sized arena.

A basic fixed size arena only has three components which can vary depending on the convention the user perfers.
In our case we'll track its capacity, its starting address, and how much has been comitted..
*/

// We use this in-conjunction with Areans to save a point thats safe to rewind to by the user.
typedef struct ArenaSP ArenaSP;
struct ArenaSP { void* ptr; };

#pragma region FArena
typedef struct FArena FArena;
struct FArena {
	void* start;
	USIZE capacity;
	USIZE used;
};
void     api_farena_init(FArena* arena, SliceMem mem);
FArena   farena_init    (SliceMem mem);
void*    farena__push   (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename);
void     farena_reset   (FArena* arena);
void     farena_rewind  (FArena* arena, ArenaSP savepoint);
ArenaSP  farena_save    (FArena  arena);

#define  farena_push(arena, type)               cast(type*,      farena__push(& arena, size_of(type), 1,      lit(stringify(type)))         )
#define  farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount }

inline
void api_farena_init(FArena* arena, SliceMem mem) {
	arena->start    = mem.ptr;
	arena->capacity = mem.len;
	arena->used      = 0;
}
inline FArena farena_init(SliceMem mem) { FArena arena; api_farena_init(& arena, mem); return arena; }

inline
void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) {
	USIZE to_commit = type_size * amount;
	USIZE unused    = arena->capacity - arena->used;
	assert(to_commit <= unused);
	void*  ptr    = cast(void*, cast(USIZE, arena->start) + arena->used);
	arena->used += to_commit;
	return ptr;
}

inline
void farena_rewind(FArena* arena, ArenaSP savepoint) {
	void* end = cast(void*, cast(USIZE, arena->start) + arena->used);
	assert_bounds(savepoint.ptr, arena->start, end);
	arena->used -= cast(USIZE, savepoint.ptr) - cast(USIZE, arena->start);
}

inline void    farena_reset(FArena* arena) { arena->used = 0; }
inline ArenaSP farena_save (FArena  arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; }
#pragma endregion FArena

#if defined(DEMO__WATL_LEX_V1) || defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)

struct WATL_LexInfo {
	// For now just the tokens
	WATL_SliceTok tokens;
};

struct Opts__watl_lex {
/*
	For this operation we'll enforce that the arena must linearly allocate each token, forming a strictly adjacent sent of elements in an array.
	This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, or the tokens are sparely cached.
	Where their position in their originating string is not preserved. In this case we're keeping it simple. Tokens are in the same block of memory and they don't use a string cache.
*/
	SliceMem pool_toks;
};

// We are assuming everything is utf8-ascii.
void api_watl_lex(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts)
{
	assert(info != nullptr);
	slice_assert(source);
	assert(opts != nullptr);

	FArena arena = farena_init(opts->pool_toks);

	char const* end    = source.ptr + source.len;
	char const* cursor = source.ptr;
	char const* prev   = source.ptr;
	char        code   = * cursor;

	B32       was_formatting = true;
	WATL_Tok* tok            = nullptr;
	for (; cursor < end;)
	{
		switch (code)
		{
			case WATL_Tok_Space:
			case WATL_Tok_Tab:
			{
				if (* prev != * cursor) {
					tok            = farena_push(arena, WATL_Tok);
					tok->code      = cursor;
					was_formatting = true;
				}
				cursor += 1;
			}
			break;

			case WATL_Tok_LineFeed: {
					tok            = farena_push(arena, WATL_Tok);
					tok->code      = cursor;
					cursor        += 1;
					was_formatting = true;
			}
			break;

			// Assuming what comes after is line feed.
			case WATL_Tok_CarriageReturn: {
					tok            = farena_push(arena, WATL_Tok);
					tok->code      = cursor;
					cursor        += 2;
					was_formatting = true;
			}
			break;

			default:
			{
				if (was_formatting) {
					tok       = farena_push(arena, WATL_Tok);
					tok->code = cursor;
					was_formatting = false;
				}
				cursor += 1;
			}
			break;
		}
		prev    =  cursor - 1;
		code    = * cursor;
	}
	info->tokens.ptr = arena.start;
	info->tokens.len = arena.used / size_of(WATL_Tok*);
}

#endif DEMO__WATL_LEX_V1

inline
WATL_LexInfo watl__lex(Str8 source, Opts__watl_lex* opts) {
	WATL_LexInfo result = {0};
	api_watl_lex(& result, source, opts);
	return result;
}

/*
To allocate onto the heap we'll make a basic slicemem_malloc to allocate, we'll make a corresponding slicemem_free aswell.
However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination.
*/

SliceMem slicemem_alloc(USIZE amount)
{
	assert(amount > KILOBTYES(4));
	void* result = malloc(amount);
	assert(result != nullptr);
	SliceMem mem = {
		.ptr = result,
		.len = amount
	};
	return mem;
}
void slicemem_free(SliceMem mem) {
	free(mem.ptr);
}

#ifdef DEMO__WATL_LEX_V1
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_64KB    read_mem = {0};
	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
	SliceMem     mem_toks = slicemem_alloc(MEGABYTES(8));
	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
	// unnecessary in this case but if you want to explicitly:
	slicemem_free(mem_toks);
	return 0;
}
#endif

/*
Next we'll parse these tokens into a rudimentary WATL Abstract Syntax Tree.
* The tree will be top-level organized by lines consisting of linked slices of visble and non-visible tokens.
* Just as with the the lexical analysis, lines and nodes will be linearly allocated adjacent to each other. This allows us to utilize array operations.
*/

typedef struct WATL_ParseInfo WATL_ParseInfo;
typedef struct Opts__watl_parse Opts__watl_parse;
void           api_watl_parse(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts);
WATL_ParseInfo watl__parse   (                      WATL_SliceTok tokens, Opts__watl_parse* opts);
#define watl_parse(tokens, ...) watl__parse(tokens, & (Opts__watl_parse) {__VA_ARGS__})

/*
For the sake of the exercise, we'll be eliminating the association with the file's strings and we'll need to instead cache them.
*/
#pragma region Str8Cache
typedef struct Str8Cache Str8Cache;
void      api_str8cache_init(Str8Cache* cache, SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table);
Str8Cache str8cache_init    (                  SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table);

// A cache like this relies on tabling string entires utiliszing an index derived from a hashed ID.
// For these strings we'll be using a hash called djb8:

// Introducing a slice iterator:
#define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter

inline
void hash64_djb8(U64* hash, SliceByte const bytes) {
	for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) {
		*hash = (((*hash) << 8) + (*hash)) + (*elem);
	}
}

// For a library or codebase its recommended to setup a metaprogram to generate hash utilizing containers
// Or other containers that cannot be sufficiently lifted to general runtime paths without losing ergonomic debug type info or type-constraint enforcements.
// Unlike with the template markup C++ uses, you can strike a balance between how many definitions are redundantly made or optimized for collapsing to a general path
// based on target optimization and debugability.

// For this V1 example, we'll be hand-rolling a fixed sized table with excess slot chaining for colliding slots.
// Its a relatively simple implementation to hand-roll. These things tend to become unyeilding with more advanced variants.

typedef struct Str8Cache_Slot Str8Cache_Slot;
struct Str8Cache_Slot {
	Str8Cache_Slot* prev;
	Str8Cache_Slot* next;
	Str8 value;
	U64  key;
	B32  occupied;
};

typedef struct Str8Cache_SliceSlot Str8Cache_SliceSlot;
struct Str8Cache_SliceSlot {
	Str8Cache_Slot* ptr;
	SSIZE           len;
};

struct Str8Cache {
	FArena               a_str;
	Str8Cache_SliceSlot  pool;
	Str8Cache_Slot*      vacant;
	Str8Cache_SliceSlot  table;
};

Str8Cache str8cache_init(SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table) { Str8Cache cache; api_str8cache_init(& cache, mem_strs, mem_slots, mem_table); return cache; }
inline
void api_str8cache_init(Str8Cache* cache, SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table) {
	assert(cache != nullptr);
	slice_assert(mem_strs);
	slice_assert(mem_slots);
	slice_assert(mem_table);
	cache->a_str = farena_init(mem_strs);
	cache->pool  = (Str8Cache_SliceSlot){mem_slots.ptr, mem_slots.len / size_of(Str8Cache_Slot)};
	cache->table = (Str8Cache_SliceSlot){mem_table.ptr, mem_table.len / size_of(Str8Cache_Slot)};
	slice_zero(cache->pool);
	slice_zero(cache->table);
}

void str8cache_clear(Str8Cache* cache)
{
	for (slice_iter(cache->table, slot))
	{
		if (slot == nullptr) {
			continue;
		}
		for (Str8Cache_Slot* probe_slot = slot->next; probe_slot != nullptr; probe_slot = probe_slot->next) {
			slot->occupied = false;
		}
		slot->occupied = false;
	}
}

// We don't introduce a remove option because we're not tracking fixed sized entities.
// Strings take up non-determistic sizes of their backing arena. So the only thing that can be done with the cache is wiping it and recaching all strings.

/*
When storing a hash of a slot, we can almost never utilize the full width of a key,
so we must truncate the key via module to get a "good enough" unique ID to place in the table.
*/
inline
U64 str8cache_slot_id(Str8Cache cache, U64 key) {
	U64 hash_index = key % cast(U64, cache.table.len);
	return hash_index;
}

Str8* str8cache_get(Str8Cache cache, U64 key)
{
	U64             hash_index   = str8cache_slot_id(cache, key);
	Str8Cache_Slot* surface_slot = & cache.table.ptr[hash_index];
	if (surface_slot == nullptr) {
		return nullptr;
	}
	if (surface_slot->occupied && surface_slot->key == key) {
		return & surface_slot->value;
	}
	for (Str8Cache_Slot* slot = surface_slot->next; slot != nullptr; slot = slot->next)
	{
		if (slot->occupied && slot->key == key) {
			return & slot->value;
		}
	}
	return nullptr;
}

Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
{
	U64             hash_index   = str8cache_slot_id(*cache, key);
	Str8Cache_Slot* surface_slot = & cache->table.ptr[hash_index];
	if (! surface_slot->occupied || surface_slot->key == key)
	{
		if (value.ptr != surface_slot->value.ptr) {
			SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
			slice_copy(pcast(SliceByte, mem), value);
			surface_slot->value = pcast(Str8, mem);
		}
		surface_slot->key      = key;
		surface_slot->occupied = true;
		return & surface_slot->value;
	}
	Str8Cache_Slot* slot = surface_slot;
	for (;; slot = slot->next)
	{
		if (slot->next == nullptr)
		{
			// We had a collision, we need to grab a vacant slot from the pool and utilize it instead.
			slot->next       = cache->vacant;
			* slot->next     = (Str8Cache_Slot){0};
			slot->next->prev = slot;

			Str8Cache_Slot* next_vacant = cache->vacant + 1;
			assert(next_vacant < cache->pool.ptr + cache->pool.len );
			// If the above fails we ran out of extra slots.
			cache->vacant = cache->vacant + 1;
		}
		if ( ! slot->next->occupied || slot->next->key == key)
		{
			if (value.ptr != slot->next->value.ptr) {
				SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
				slice_copy(pcast(SliceByte, mem), value);
				slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)};
			}
			slot->next->value = value;
			slot->next->key      = key;
			slot->next->occupied = true;
			return & slot->next->value;
		}
		// We keep traversing till we find a match or we find a vacancy for this list in the table.
		// Make sure to tune the size of the table so it does this less!
		// Note: Tables sized by prime values collide less aswell.
		// You can use a closest prime number lookup table to derive what length to expose to the cache's table for hash ID resolution.
	}
	return nullptr;
}
#pragma endregion Str8Cache

// Finally our abstracted cache interface:
Str8 cache_str8(Str8Cache* cache, Str8 str)
{
	U64 key = 0; hash64_djb8(& key, pcast(SliceByte, str));
	Str8* result = str8cache_set(cache, key, str);
	assert(result != nullptr);
	return * result;
}

typedef Str8 WATL_Node;
#if 0
typedef struct WATL_Node WATL_Node;
struct WATL_Node {
	WATL_NodeKind kind;
	Str8 entry;
};
#endif

typedef struct WATL_Line WATL_Line;
struct WATL_Line {
	WATL_Node* ptr;
	SSIZE      len;
};

typedef struct WATL_SliceLine WATL_SliceLine;
struct WATL_SliceLine {
	WATL_Line* ptr;
	SSIZE      len;
};

#if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)

struct Opts__watl_parse {
	SliceMem   backing_nodes;
	SliceMem   backing_lines;
	Str8Cache* str_cache;
};

struct WATL_ParseInfo {
	WATL_SliceLine lines;
};

void api_watl_parse(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts)
{
	assert(info != nullptr);
	slice_assert(tokens);
	assert(opts != nullptr);

	FArena a_lines = farena_init(opts->backing_lines);
	FArena a_nodes = farena_init(opts->backing_nodes);

	WATL_Line* line = farena_push(a_lines, WATL_Line);
	WATL_Node* curr = farena_push(a_nodes, WATL_Node); // Preemtively allocate a node for the line (may not be used)
	* curr    = (WATL_Node){0};
	line->ptr = curr;
	line->len = 0;
	info->lines.ptr = line;
	info->lines.len = 0;
	for (slice_iter(tokens, token))
	{
		switch (* token->code)
		{
			case WATL_Tok_CarriageReturn:
			case WATL_Tok_LineFeed: {
				WATL_Line* new_line = farena_push(a_lines, WATL_Line);
				line             = new_line;
				line->ptr        = curr;
				line->len        = 0;
				info->lines.len += 1;
			}
			continue;

			default:
			break;
		}

		Str8 tok_str = watl_tok_str8(tokens, token);
		* curr     = cache_str8( opts->str_cache, tok_str );
		curr       = farena_push(a_nodes, WATL_Node);
		* curr     = (WATL_Node){0};
		line->len += 1;
		continue;
	}
}

#endif DEMO__WATL_PARSE_V1

WATL_ParseInfo watl__parse(WATL_SliceTok tokens, Opts__watl_parse* opts) { WATL_ParseInfo info; api_watl_parse(& info, tokens, opts); return info; }

#ifdef DEMO__WATL_PARSE_V1
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_64KB    read_mem = {0};
	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
	SliceMem     mem_toks = slicemem_alloc(MEGABYTES(8));
	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);

	SliceMem mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
	SliceMem mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
	SliceMem mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
	Str8Cache str_cache      = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);

	SliceMem mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
	SliceMem mem_parse_lines = slicemem_alloc(MEGABYTES(4));
	WATL_ParseInfo parse_res = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);

	// unnecessary in this case but if you want to explicitly:
	slicemem_free(mem_toks);
	return 0;
}
#endif

/*
Now we'll like to dump this WATL structure into a file.
To do so we'll need to generate the content string of the file dynamically based on the WATL's content.

We'll be utilizing a new construct called a string generator which be tied to all functionality for constructing strings.
*/

typedef struct Str8Gen Str8Gen;
struct Str8Gen {
	SliceMem backing; // For V1 the backing buffer is fixed size.
	char* ptr;
	SSIZE len;
};

void    str8gen_init(Str8Gen* gen, SliceMem backing);
Str8Gen str8gen_make(              SliceMem backing);

void str8gen_append_str8(Str8Gen* gen, Str8 str);
// void str8gen_append_fmt (Str8Gen* gen, Str8 fmt, ...);

void str8gen_init(Str8Gen* gen, SliceMem backing) {
	assert(gen != nullptr);
	gen->backing = backing;
	gen->ptr     = backing.ptr;
	gen->len     = 0;
}
Str8Gen str8gen_make(SliceMem backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; }

void str8gen_append_str8(Str8Gen* gen, Str8 str) {
	SSIZE  left = gen->backing.len - gen->len;
	assert(left >= str.len);
	SliceByte dest = {gen->ptr + gen->len, str.len};
	slice_copy(dest, str);
	return;
}

/*
In order to support appending formatted content via str8gen_apppend_fmt, we'll be using a substiution formatter utilizing string identifiation token pattern.

Where a format template string is provided with a 'id' wrapped in delimiters which will be the angle brackets: <id>
Example: This formatted string will have <id> subsituted into it.
*/

typedef struct FmtTokEntry FmtTokEntry;
struct FmtTokEntry {
	U64  key;
	Str8 value;
};
typedef struct SliceFmtTokEntry SliceFmtTokEntry;
struct SliceFmtTokEntry {
	FmtTokEntry* ptr;
	SSIZE        len;
};

#define slice_end(slice) (slice.ptr + slice.len)

/*
This is a token substiuting formatter using a array table lookup for tokens to substitute.
*/
Str8 fmt_vtoken_slice(SliceMem buffer, SliceFmtTokEntry tokens, Str8 fmt_template)
{
	slice_assert(buffer);
	slice_assert(tokens);
	slice_assert(fmt_template);

	char* cursor_buffer    = buffer.ptr;
	SSIZE buffer_remaining = buffer.len;

	char curr_code = * fmt_template.ptr;

	char const* cursor_fmt = fmt_template.ptr;
	SSIZE       left_fmt   = fmt_template.len;
	while (left_fmt && buffer_remaining)
	{
		// Forward until we hit the delimiter '<' or the template's contents are exhausted.
		while (curr_code && curr_code != '<' && cursor_fmt != slice_end(fmt_template))
		{
			* cursor_buffer = * cursor_fmt;
			++ cursor_buffer;
			++ cursor_fmt;
			-- buffer_remaining;
			-- left_fmt;

			curr_code = * cursor_fmt;
		}

		if (curr_code == '<')
		{
			char const* cursor_potential_token = cursor_fmt + 1;
			SSIZE       potential_token_length = 0;

			while (* (cursor_potential_token + potential_token_length) != '>') {
				++ potential_token_length;
			}

			// Hashing the potential token and cross checking it with our token table
			U64   key   = 0; hash64_djb8(& key, (SliceByte){cursor_fmt + 1, potential_token_length});
			Str8* value = nullptr;
			for (slice_iter(tokens, token))
			{
				// We do a linear iteration instead of a hash table lookup because the user should be never substiuting with more than 100 unqiue tokens..
				if (token->key == key) {
					value = & token->value;
					break;
				}
			}

			if (value)
			{
				SSIZE       left         = value->len;
				char const* cursor_value = value->ptr;

				while (left --)
				{
					* cursor_buffer = * cursor_value;
					++ cursor_buffer;
					++ cursor_value;
					-- buffer_remaining;
				}

				// Sync cursor format to after the processed token
				cursor_fmt = cursor_potential_token + potential_token_length + 1;
				curr_code  = * cursor_fmt;
				left_fmt  -= potential_token_length + 2; // The 2 here are the '<' & '>' delimiters being omitted.
				continue;
			}

			* cursor_buffer = * cursor_fmt;
			++ cursor_buffer;
			++ cursor_fmt;
			-- buffer_remaining;
			-- left_fmt;

			curr_code = * cursor_fmt;
		}
	}
	Str8 result = {buffer.ptr, buffer.len - buffer_remaining};
	return result;
}

typedef struct SliceStr8 SliceStr8;
struct SliceStr8 {
	Str8* ptr;
	SSIZE len;
};

Str8 fmt__vtoken(SliceMem backing, Str8 fmt_template, SliceStr8* tokens)
{
	FArena           a_backing = farena_init(backing);
	SliceFmtTokEntry table     = {a_backing.start, 0};

	S32 left = tokens->len;
	for (slice_iter(*tokens, token)) {
		FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry);
		* entry = (FmtTokEntry){0};
		hash64_djb8(& entry->key, (SliceByte){token->ptr, token->len});
		++ token;
		entry->value = * token;
		++ table.len;
	}
	SliceMem buffer = { .ptr = cast(U8*, a_backing.start) + a_backing.used, .len = a_backing.capacity - a_backing.used };
	Str8 result = fmt_vtoken_slice(buffer, table, fmt_template);
	return result;
}

// Expected to take a Str8 array of entries formatted as a 2D array of key-value pairs (Str8[length][2])
// The array will be tracked using a SliceStr8 structure.
#define fmt_vtoken(backing, fmt_template, tokens) fmt__vtoken(backing, fmt_template, &(SliceStr8){.ptr = cast(Str8*, tokens), .len = size_of(tokens) / size_of(Str8) })

/*
Define a mapping array:
Str8 mappings [][2] = {
	fmt_vtoken_entry("key", "value"),
	^^ Add entries as above ^^
}
*/
#define fmt_vtoken_entry(key, value) { lit(key), lit(value) }

#ifdef DEMO__WATL_DUMP_V1
int main()
{
	SliceMem scratch = slicemem_alloc(MEGABYTES(64));
	Str8 subst_table [][2] = {
		fmt_vtoken_entry("maybe_sub", "IT SUBST!!!"),
	};
	Str8 test_str = fmt_vtoken(scratch, lit("Will this work? <maybe_sub>"), subst_table);
	return 0;
}
#endif