/*
A introduction to C11 with a str cache demo.
Attempting to showcase better conventions and constructs in C; Discovered to me as of 2025 from scouring the internet.
*/

/*
The below will be implemented within this single file.
Because of this, definitions will be kept on a need-to-have basis to target only one vendor target and toolchain.
We will not use nearly any libraries and will be targeting only Windows 11 x64 using MSVC.

Even so the constructs defined and their dependencies can be properly abstracted into a ergonomic library for multiple targets with enough time and pain.
The difference is just more preprocess conditionals, and how far a library is trying to support a larger range of targets and their age discrpancy.
The more minimal the less cruft.

Definitions are defined linearly on the file on-demand as needed. Since the file is to be read linearly.
This will cause non-categorical organization so it will be more difficult to sift through if you wanted
to see definitions related to a sepecific kind of data or operation (strings, memory, etc).
*/
#if 0
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_128KB   read_mem = {0};
	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
	SliceByte    mem_toks = slicemem_alloc(MEGABYTES(16));
	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);

	SliceByte mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
	SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
	SliceByte mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
	Str8Cache str_cache       = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);

	SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
	SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
	WATL_ParseInfo parse_res  = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);

	SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
	Str8      listing     = watl_dump_listing(scratch_dmp, parse_res.lines);
	file_write_str8(lit("demo.str_cache.listing.txt"), listing);
	return 0;
}
#endif

// Demo selection
// #define DEMO__STR_SLICE
// #define DEMO__FILE_READ_CONTENTS_V1
// #define DEMO__WATL_LEX_V1
// #define DEMO__WATL_PARSE_V1
// #define DEMO__WATL_DUMP_PREREQ_V1
#define DEMO__WATL_DUMP_V1

/*
The above makes use of the following core concepts to achieve its net result:
* Slices
* Arenas
* Generic Runtime Allocator Interface
* Hashing

Secondarily for the purposes of using the above sufficiently the following are also utilized:
* Virtual Address Space
* Read/Write Files
* Lexing & Parsing
* Debug printing
*/

/*
First thing we'll problably want is a way to deal with text effectively.
So we'll setup the the minimum for that when dealing with immutable constructs.
*/

// We'll need some minimum set of dependencies to adequately define the constructs.
// ASSUMING MODERN MSVC TOOLCHAIN.

#include <intrin.h>
#include <tmmintrin.h>
#include <wmmintrin.h>

#include <assert.h>

typedef unsigned __int8  U8;
typedef signed   __int8  S8;
typedef unsigned __int16 U16;
typedef signed   __int16 S16;
typedef unsigned __int32 U32;
typedef signed   __int32 S32;
typedef unsigned __int64 U64;
typedef signed   __int64 S64;

typedef size_t    USIZE;
typedef ptrdiff_t SSIZE;

enum {
	false,
	true,
	true_overflow,
};
typedef S8  B8;
typedef S16 B16;
typedef S32 B32;

// Common macros we'll use throughout this.

#define assert_bounds(point, start, end) do { \
	USIZE pos_point = cast(USIZE, point);     \
	USIZE pos_start = cast(USIZE, start);     \
	USIZE pos_end   = cast(USIZE, end);       \
	assert(pos_start <= pos_point);           \
	assert(pos_point <= pos_end);             \
} while(0)

// Functional style cast
#define cast(type, data)  ((type)(data))
#define pcast(type, data) * cast(type*, & (data))

#define nullptr cast(void*, 0)

#define glue_(A, B) A ## B
#define glue(A, B)  glue_(A,B)

// Enforces size querying uses SSIZE type.
#define size_of(data) cast(SSIZE, sizeof(data) )

#define stringify_(S) #S
#define stringify(S)  stringify_(S)

/*
The first construct we'll utilize is a String Slice.
In modern programming with the memory sizes utilized, it is more ergonomic to track the length of strings with their pointer.
Most strings are not stored in some immutable table tracked statically, performance loss in doing so is negligble on modern hardware constraints.
*/

// UTF-8 character encoding unit
typedef unsigned char UTF8;

typedef struct Str8 Str8;
struct Str8 {
	UTF8* ptr;
	SSIZE len;
};

// String iterals in C include null-terminators, we aren't interested in preserving that.
#define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 }

/*
We'll want all of our textual process to operate with UTF-8 code pages:
*/
#include <locale.h>
inline void set_utf8_codepage() { setlocale(LC_ALL, ".UTF-8"); }

// For now this string can visualized using a debugger.
#ifdef DEMO__STR_SLICE
int main()
{
	Str8 first = lit("Our first string as a slice");
	return 0;
}
#endif DEMO__STR_SLICE

/*
We now want to be able to read a file. This will be a heavy rabbit-hole as we'll need to setup a basic file interface
and related definitions for handling the memory.

For the purposes of the initial definition we'll introduced fixed-sized memory handling statically allocated onto the stack.
*/

/*
First off we need to find out how to aquire the contents of a file on Windows.

We'll be wrapping the operation in a procedure called file_read_contents. We'll have it take a path and optional arguments (Opts__read_file_contents).
It will return a result in a composite struct: FileOpResult; which may be expanded as needed in the future.
*/

typedef struct FileOpResult             FileOpResult;
typedef struct Opts__read_file_contents Opts__read_file_contents;
void         file_read_contents_api(FileOpResult* result, Str8 path, Opts__read_file_contents* opts);
FileOpResult file__read_contents   (                      Str8 path, Opts__read_file_contents* opts);
#define file_read_contents(path, ...) file__read_contents(path, & (Opts__read_file_contents){__VA_ARGS__} )

/*
The file contents will be returned in bytes.
To view or manage any slice of bytes we'll be utilizing a byte slice.
*/
typedef struct SliceByte SliceByte;
struct SliceByte {
	U8*   ptr;
	SSIZE len;
};

/*
The above is a pattern that can be provided so that whether or not the result is formatted and provided to the user via the stack is entirely optional.
It also allows for default parameters to be defined conviently.
*/

// We'll utilize the ReadFile procedure within the WinAPI: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
#define NOMINMAX
#define WIN32_LEAN_AND_MEAN
#define WIN32_MEAN_AND_LEAN
#define VC_EXTRALEAN
#include <windows.h>
#include <windowsx.h>
#include <timeapi.h>
#include <tlhelp32.h>
#include <Shlobj.h>
#include <processthreadsapi.h>
#pragma comment(lib, "user32")
#pragma comment(lib, "winmm")
#pragma comment(lib, "shell32")
#pragma comment(lib, "advapi32")
#pragma comment(lib, "rpcrt4")
#pragma comment(lib, "shlwapi")
#pragma comment(lib, "comctl32")
#pragma comment(linker,"\"/manifestdependency:type='win32' name='Microsoft.Windows.Common-Controls' version='6.0.0.0' processorArchitecture='*' publicKeyToken='6595b64144ccf1df' language='*'\"") // this is required for loading correct comctl32 dll file
#undef NOMINMAX
#undef WIN32_LEAN_AND_MEAN
#undef WIN32_MEAN_AND_LEAN
#undef VC_EXTRALEAN
#if 0
BOOL ReadFile(
  [in]                HANDLE       hFile,
  [out]               LPVOID       lpBuffer,
  [in]                DWORD        nNumberOfBytesToRead,
  [out, optional]     LPDWORD      lpNumberOfBytesRead,
  [in, out, optional] LPOVERLAPPED lpOverlapped
);

// In order to read a file we need a handle to a valid filesystem entity to read from: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
HANDLE CreateFileA(
  [in]           LPCSTR                lpFileName,
  [in]           DWORD                 dwDesiredAccess,
  [in]           DWORD                 dwShareMode,
  [in, optional] LPSECURITY_ATTRIBUTES lpSecurityAttributes,
  [in]           DWORD                 dwCreationDisposition,
  [in]           DWORD                 dwFlagsAndAttributes,
  [in, optional] HANDLE                hTemplateFile
);
#endif

// We need to covert our string slice to a c-string for CreateFileA's path input.

#define KILOBTYES(n) (cast(USIZE, n) << 10)
#define MEGABYTES(n) (cast(USIZE, n) << 20)
#define GIGABYTES(n) (cast(USIZE, n) << 30)
#define TERABYTES(n) (cast(USIZE, n) << 40)

/*
We'll be defining here Fixed-sized memory blocks using typedefs on-demand

They will having the following format:
typedef U8 FMem_<size>KB [ <U8 amount> ];
*/

typedef U8 FMem_16KB  [ KILOBTYES(16)  ];
typedef U8 FMem_64KB  [ KILOBTYES(64)  ];
typedef U8 FMem_128KB [ KILOBTYES(128) ];

#define typeof          __typeof__
#define fmem_slice(mem) (SliceByte) { mem, size_of(mem) }

// We'll be using an intrinsic for copying memory:
void* memory_copy(void* restrict dest, void const* restrict src, USIZE length)
{
	if (dest == nullptr || src == nullptr || length == 0) {
		return nullptr;
	}
	// https://learn.microsoft.com/en-us/cpp/intrinsics/movsb?view=msvc-170
	__movsb((unsigned char*)dest, (const unsigned char*)src, length);
	return dest;
}

// Often we'll want to check validity of a slice:
#define slice_assert(slice) do {  \
	assert(slice.ptr != nullptr); \
	assert(slice.len > 0);        \
} while(0)

void slice__copy(SliceByte dest, SSIZE dest_typewidth, SliceByte src, SSIZE src_typewidth) {
	assert(dest.len >= src.len);
	slice_assert(dest);
	slice_assert(src);
	memory_copy(dest.ptr, src.ptr, src.len);
}
#define slice_copy(dest,src) slice__copy(                                            \
	(SliceByte){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \
,	(SliceByte){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \
)

// Assumes memory is zeroed.
char* str8_to_cstr_capped(Str8 content, SliceByte mem) {
	assert(mem.len >= content.len);
	memory_copy(mem.ptr, content.ptr, content.len);
	return mem.ptr;
}

// To support zeroing slices we'll utilize an intrinisc.
B32 memory_zero(void* dest, USIZE length) {
	if (dest == nullptr || length <= 0) {
		return false;
	}
	__stosb((unsigned char*)dest, 0, length);
	return true;
}

void slice__zero(SliceByte mem, SSIZE typewidth) {
	slice_assert(mem);
	memory_zero(mem.ptr, mem.len);
}
#define slice_zero(slice) slice__zero((SliceByte){ cast(void*, (slice).ptr), (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr))

// Now for our "Version 1"

#if	defined(DEMO__FILE_READ_CONTENTS_V1) || \
	defined(DEMO__WATL_LEX_V1)           || \
	defined(DEMO__WATL_PARSE_V1)         || \
	defined(DEMO__WATL_DUMP_PREREQ_V1)   || \
	defined(DEMO__WATL_DUMP_V1)

struct FileOpResult
{
	// For now we'll just have the content
	SliceByte content;
};

struct Opts__read_file_contents
{
	// For now we'll just have the backing memory provided as a slice.
	SliceByte backing;
	// And whether we should zero the backing.
	B32 zero_backing;
};

void file_read_contents_api(FileOpResult* result, Str8 path, Opts__read_file_contents* opts)
{
	assert(result != nullptr);
	assert(opts   != nullptr);
	slice_assert(path);
	// Backing is required at this point
	slice_assert(opts->backing);

	// This will limit a path for V1 to be 16kb worth of codepoints.
	FMem_16KB   scratch   = {0};
	char const* path_cstr = str8_to_cstr_capped(path, fmem_slice(scratch) );

	HANDLE id_file = CreateFileA(
		path_cstr,
		GENERIC_READ,
		FILE_SHARE_READ,
		NULL,
		OPEN_EXISTING,
		FILE_ATTRIBUTE_NORMAL,
		NULL
	);
	B32 open_failed = id_file == INVALID_HANDLE_VALUE;
	if (open_failed) {
		DWORD  error_code = GetLastError();
		assert(error_code != 0);
		return;
	}

	LARGE_INTEGER file_size = {0};
	DWORD get_size_failed = ! GetFileSizeEx(id_file, & file_size);
	if   (get_size_failed) {
		assert(get_size_failed == INVALID_FILE_SIZE);
		return;
	}

	// Because we are currently using fixed size memory, we need to confirm that we can hold this content.
	B32 not_enough_backing = opts->backing.len < file_size.QuadPart;
	if (not_enough_backing) {
		assert(not_enough_backing);
		// Otherwise we don't provide a result.
		result->content = (SliceByte){0};
		return;
	}

	if (opts->zero_backing) {
          slice_zero(pcast(SliceByte, opts->backing));
	}

	DWORD amount_read = 0;
	BOOL  read_result = ReadFile(
		id_file,
		opts->backing.ptr,
		file_size.QuadPart,
		& amount_read,
		nullptr
	);
	CloseHandle(id_file);
	
	B32 read_failed  = ! read_result;
	    read_failed |= amount_read != file_size.QuadPart;
	if (read_failed) {
		assert(read_failed);
		return;
	}

	result->content.ptr = opts->backing.ptr;
	result->content.len = file_size.QuadPart;
	return;
}

#endif DEMO__FILE_READ_CONTENTS_V1

// Version agnostic code:
inline
FileOpResult file__read_contents(Str8 path, Opts__read_file_contents* opts) {
	FileOpResult result;
	file_read_contents_api(& result, path, opts);
	return result;
}

// And now to put it all together into a test run in the debugger. Content should be properly formatted if the code is correct.
#ifdef DEMO__FILE_READ_CONTENTS_V1
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_64KB    read_mem = {0};
	FileOpResult res      = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
	return 0;
}
#endif DEMO__FILE_READ_CONTENTS_V1

/*
Now that we have file reading done we need to be able to process the content.

First we want to do lexical analysis. So we'll create a token listing delimiting aspects of the text file relevant to us.
For our data structure, we are going for a Whitespace-Aware Text Layout; where we'll track text and the formatting around them.

Just like with the read file contents operation, we'll define an interface to performing this analysis.
It will be called watl_lex and take the SliceByte from the file as a Str8 slice and some Opts__watl_lex; 
returning a WATL_LexInfo for providing user info on how the operation went.
*/

typedef struct WATL_LexInfo WATL_LexInfo;
typedef struct Opts__watl_lex Opts__watl_lex;

void         watl_lex_api(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts);
WATL_LexInfo watl__lex   (                    Str8 source, Opts__watl_lex* opts);
#define watl_lex(source, ...) watl__lex(source, &(Opts__watl_lex){__VA_ARGS__})

/*
Token identification will be done using a WATL_TokKind enumeration.
The token type itself will be the id along with a ptr to its start of the slice. We can resolve the width of the token by its delta to the next token.
If its the last token, then its delta is determined by its offset to the end of the Str8 slice.
*/

typedef U32 WATL_TokKind;
enum WATL_TokKind {
	WATL_Tok_Space          = ' ',
	WATL_Tok_Tab            = '\t',
	WATL_Tok_CarriageReturn = '\r',
	WATL_Tok_LineFeed       = '\n',
	WATL_Tok_Text           = 0xFFFFFFFF,
};

typedef struct WATL_Tok WATL_Tok;
struct WATL_Tok {
	UTF8* code;
};

typedef struct WATL_SliceTok WATL_SliceTok;
struct WATL_SliceTok {
	WATL_Tok* ptr;
	SSIZE     len;
};

Str8 watl_tok_str8(WATL_SliceTok toks, WATL_Tok* tok) {
	WATL_Tok* next = tok + 1;
	USIZE start    = cast(USIZE, toks.ptr->code);
	USIZE curr     = cast(USIZE, tok->code);
	USIZE offset   = curr - start;
	SSIZE left     = toks.len - offset;
	B32   last_tok = (start + toks.len) == (curr + left);
	Str8  text     = {0};
	text.ptr = tok->code;
	text.len = next > (toks.ptr + toks.len) ? 
		left
	// Othwerise its the last minus the curr.
	:	cast(SSIZE, next->code - tok->code);
	return text;
}

/*
Tokens are allocated to a backing slice of memory defined by the user. This pool of memory will ideally not be constrained to a fixed size on the stack.
So for V1 we'll allocate 10 megs of heap memory to act as a pool for the tokens. We'll keep track of how much for the pool we used via a new memory tracking construct:
The fixed-sized arena.

A basic fixed size arena only has three components which can vary depending on the convention the user perfers.
In our case we'll track its capacity, its starting address, and how much has been comitted..
*/

// We use this in-conjunction with Areans to save a point thats safe to rewind to by the user.
typedef struct ArenaSP ArenaSP;
struct ArenaSP { void* ptr; };

#pragma region FArena
typedef struct FArena FArena;
struct FArena {
	void* start;
	USIZE capacity;
	USIZE used;
};
void     api_farena_init(FArena* arena, SliceByte mem);
FArena   farena_init    (SliceByte mem);
void*    farena__push   (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename);
void     farena_reset   (FArena* arena);
void     farena_rewind  (FArena* arena, ArenaSP savepoint);
ArenaSP  farena_save    (FArena  arena);

#define  farena_push(arena, type)               cast(type*,      farena__push(& arena, size_of(type), 1,      lit(stringify(type)))         )
#define  farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount }

inline
void api_farena_init(FArena* arena, SliceByte mem) {
	arena->start    = mem.ptr;
	arena->capacity = mem.len;
	arena->used     = 0;
}
inline FArena farena_init(SliceByte mem) { FArena arena; api_farena_init(& arena, mem); return arena; }

inline
void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) {
	USIZE to_commit = type_size * amount;
	USIZE unused    = arena->capacity - arena->used;
	assert(to_commit <= unused);
	void*  ptr   = cast(void*, cast(USIZE, arena->start) + arena->used);
	arena->used += to_commit;
	return ptr;
}

inline
void farena_rewind(FArena* arena, ArenaSP savepoint) {
	void* end = cast(void*, cast(USIZE, arena->start) + arena->used);
	assert_bounds(savepoint.ptr, arena->start, end);
	arena->used -= cast(USIZE, savepoint.ptr) - cast(USIZE, arena->start);
}

inline void    farena_reset(FArena* arena) { arena->used = 0; }
inline ArenaSP farena_save (FArena  arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; } 
#pragma endregion FArena

#if defined(DEMO__WATL_LEX_V1)         || \
	defined(DEMO__WATL_PARSE_V1)       || \
	defined(DEMO__WATL_DUMP_PREREQ_V1) || \
	defined(DEMO__WATL_DUMP_V1)

struct WATL_LexInfo {
	// For now just the tokens
	WATL_SliceTok tokens;
};

struct Opts__watl_lex {
/*
	For this operation we'll enforce that the arena must linearly allocate each token, forming a strictly adjacent sent of elements in an array.
	This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, 
	or the tokens are sparely cached.
	Where their position in their originating string is not preserved. In this case we're keeping it simple.
	Tokens are in the same block of memory and they don't use a string cache.
*/
	SliceByte pool_toks;
};

// We are assuming everything is utf8-ascii.
void watl_lex_api(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts)
{
	assert(info != nullptr);
	slice_assert(source);
	assert(opts != nullptr);

	FArena arena = farena_init(opts->pool_toks);

	UTF8* end    = source.ptr + source.len;
	UTF8* cursor = source.ptr;
	UTF8* prev   = source.ptr;
	UTF8  code   = * cursor;

	B32       was_formatting = true;
	WATL_Tok* tok            = nullptr;
	for (; cursor < end;)
	{
		switch (code)
		{
			case WATL_Tok_Space:
			case WATL_Tok_Tab:
			{
				if (* prev != * cursor) {
					tok            = farena_push(arena, WATL_Tok);
					tok->code      = cursor;
					was_formatting = true;
				}
				cursor += 1;
			}
			break;

			case WATL_Tok_LineFeed: {
					tok            = farena_push(arena, WATL_Tok);
					tok->code      = cursor;
					cursor        += 1;
					was_formatting = true; 
			}
			break;

			// Assuming what comes after is line feed.
			case WATL_Tok_CarriageReturn: {
					tok            = farena_push(arena, WATL_Tok);
					tok->code      = cursor;
					cursor        += 2;
					was_formatting = true; 
			}
			break;

			default:
			{
				if (was_formatting) {
					tok            = farena_push(arena, WATL_Tok);
					tok->code      = cursor;
					was_formatting = false;
				}
				cursor += 1;
			}
			break;
		}
		prev =  cursor - 1;
		code = * cursor;
	}
	info->tokens.ptr = arena.start;
	info->tokens.len = arena.used / size_of(WATL_Tok*);
}

#endif DEMO__WATL_LEX_V1

inline
WATL_LexInfo watl__lex(Str8 source, Opts__watl_lex* opts) {
	WATL_LexInfo result = {0};
	watl_lex_api(& result, source, opts);
	return result;
}

/*
To allocate onto the heap we'll make a basic slicemem_malloc to allocate, we'll make a corresponding slicemem_free aswell.
However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination.
*/

SliceByte slicemem_alloc(USIZE amount)
{
	assert(amount > KILOBTYES(4));
	void* result = malloc(amount);
	assert(result != nullptr);
	SliceByte mem = {
		.ptr = result,
		.len = amount
	};
	return mem;
}
void slicemem_free(SliceByte mem) {
	free(mem.ptr);
}

#ifdef DEMO__WATL_LEX_V1
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_64KB    read_mem = {0};
	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
	SliceByte     mem_toks = slicemem_alloc(MEGABYTES(8));
	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
	// unnecessary in this case but if you want to explicitly:
	slicemem_free(mem_toks);
	return 0;
}
#endif

/*
Next we'll parse these tokens into a rudimentary WATL Abstract Syntax Tree.
* The tree will be top-level organized by lines consisting of linked slices of visble and non-visible tokens.
* Just as with the the lexical analysis, lines and nodes will be linearly allocated adjacent to each other. This allows us to utilize array operations.
*/

typedef struct WATL_ParseInfo WATL_ParseInfo;
typedef struct Opts__watl_parse Opts__watl_parse;
void           watl_parse_api(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts);
WATL_ParseInfo watl__parse   (                      WATL_SliceTok tokens, Opts__watl_parse* opts);
#define watl_parse(tokens, ...) watl__parse(tokens, & (Opts__watl_parse) {__VA_ARGS__})

/*
For the sake of the exercise, we'll be eliminating the association with the file's strings and we'll need to instead cache them.
*/
#pragma region Str8Cache
typedef struct Str8Cache Str8Cache;
void      str8cache_init_api(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);
Str8Cache str8cache_init    (                  SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);

// A cache like this relies on tabling string entires utiliszing an index derived from a hashed ID.
// For these strings we'll be using a hash called djb8:

// Introducing a slice iterator:
#define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter

inline
void hash64_djb8(U64* hash, SliceByte bytes) {
	for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) {
		*hash = (((*hash) << 8) + (*hash)) + (*elem);
	}
}

// For a library or codebase its recommended to setup a metaprogram to generate hash utilizing containers
// Or other containers that cannot be sufficiently lifted to general runtime paths without losing ergonomic debug type info or type-constraint enforcements.
// Unlike with the template markup C++ uses, you can strike a balance between how many definitions are redundantly made or optimized for collapsing to a general path
// based on target optimization and debugability.

// For this V1 example, we'll be hand-rolling a fixed sized table with excess slot chaining for colliding slots.
// Its a relatively simple implementation to hand-roll. These things tend to become unyeilding with more advanced variants.

typedef struct Str8Cache_Slot Str8Cache_Slot;
struct Str8Cache_Slot {
	Str8Cache_Slot* prev;
	Str8Cache_Slot* next;
	Str8 value;
	U64  key;
	B32  occupied;
};

typedef struct Str8Cache_SliceSlot Str8Cache_SliceSlot;
struct Str8Cache_SliceSlot {
	Str8Cache_Slot* ptr;
	SSIZE           len;
};

struct Str8Cache {
	FArena               a_str;
	Str8Cache_SliceSlot  pool;
	Str8Cache_Slot*      vacant;
	Str8Cache_SliceSlot  table;
};

Str8Cache str8cache_init(SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) { Str8Cache cache; str8cache_init_api(& cache, mem_strs, mem_slots, mem_table); return cache; }
inline
void str8cache_init_api(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) {
	assert(cache != nullptr);
	slice_assert(mem_strs);
	slice_assert(mem_slots);
	slice_assert(mem_table);
	cache->a_str = farena_init(mem_strs);
	cache->pool  = (Str8Cache_SliceSlot){ cast(void*, mem_slots.ptr), mem_slots.len / size_of(Str8Cache_Slot)};
	cache->table = (Str8Cache_SliceSlot){ cast(void*, mem_table.ptr), mem_table.len / size_of(Str8Cache_Slot)};
	slice_zero(cache->pool);
	slice_zero(cache->table);
}

void str8cache_clear(Str8Cache* cache)
{
	for (slice_iter(cache->table, slot))
	{
		if (slot == nullptr) {
			continue;
		}
		for (Str8Cache_Slot* probe_slot = slot->next; probe_slot != nullptr; probe_slot = probe_slot->next) {
			slot->occupied = false;
		}
		slot->occupied = false;
	}
}

// We don't introduce a remove option because we're not tracking fixed sized entities.
// Strings take up non-determistic sizes of their backing arena. So the only thing that can be done with the cache is wiping it and recaching all strings.

/*
When storing a hash of a slot, we can almost never utilize the full width of a key,
so we must truncate the key via module to get a "good enough" unique ID to place in the table.
*/
inline
U64 str8cache_slot_id(Str8Cache cache, U64 key) {
	U64 hash_index = key % cast(U64, cache.table.len);
	return hash_index;
}

Str8* str8cache_get(Str8Cache cache, U64 key)
{
	U64             hash_index   = str8cache_slot_id(cache, key);
	Str8Cache_Slot* surface_slot = & cache.table.ptr[hash_index];
	if (surface_slot == nullptr) { 
		return nullptr;
	}
	if (surface_slot->occupied && surface_slot->key == key) {
		return & surface_slot->value;
	}
	for (Str8Cache_Slot* slot = surface_slot->next; slot != nullptr; slot = slot->next)
	{
		if (slot->occupied && slot->key == key) {
			return & slot->value;
		}
	}
	return nullptr;
}

Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
{
	U64             hash_index   = str8cache_slot_id(*cache, key);
	Str8Cache_Slot* surface_slot = & cache->table.ptr[hash_index];
	if (! surface_slot->occupied || surface_slot->key == key)
	{
		if (value.ptr != surface_slot->value.ptr) {
			SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
			slice_copy(mem, value);
			surface_slot->value = pcast(Str8, mem);
		}
		surface_slot->key      = key;
		surface_slot->occupied = true;
		return & surface_slot->value;
	}
	Str8Cache_Slot* slot = surface_slot;
	for (;; slot = slot->next)
	{
		if (slot->next == nullptr)
		{
			// We had a collision, we need to grab a vacant slot from the pool and utilize it instead.
			slot->next       = cache->vacant;
			* slot->next     = (Str8Cache_Slot){0};
			slot->next->prev = slot;

			Str8Cache_Slot* next_vacant = cache->vacant + 1;
			assert(next_vacant < cache->pool.ptr + cache->pool.len );
			// If the above fails we ran out of extra slots.
			cache->vacant = cache->vacant + 1;
		}
		if ( ! slot->next->occupied || slot->next->key == key)
		{
			if (value.ptr != slot->next->value.ptr) {
				SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
				slice_copy(mem, value);
				slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)};
			}
			slot->next->value    = value;
			slot->next->key      = key;
			slot->next->occupied = true;
			return & slot->next->value;
		}
		// We keep traversing till we find a match or we find a vacancy for this list in the table.
		// Make sure to tune the size of the table so it does this less! 
		// Note: Tables sized by prime values collide less aswell. 
		// You can use a closest prime number lookup table to derive what length to expose to the cache's table for hash ID resolution.
	}
	return nullptr;
}
#pragma endregion Str8Cache

// Finally our abstracted cache interface:
Str8 cache_str8(Str8Cache* cache, Str8 str)
{
	U64 key = 0; hash64_djb8(& key, pcast(SliceByte, str));
	Str8* result = str8cache_set(cache, key, str);
	assert(result != nullptr);
	return * result;
}

typedef Str8 WATL_Node;
#if 0
typedef struct WATL_Node WATL_Node;
struct WATL_Node {
	WATL_NodeKind kind;
	Str8 entry;
};
#endif

typedef struct WATL_Line WATL_Line;
struct WATL_Line {
	WATL_Node* ptr;
	SSIZE      len;
};

typedef struct WATL_SliceLine WATL_SliceLine;
struct WATL_SliceLine {
	WATL_Line* ptr;
	SSIZE      len;
};

#if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_PREREQ_V1) || defined(DEMO__WATL_DUMP_V1)

struct Opts__watl_parse {
	SliceByte  backing_nodes;
	SliceByte  backing_lines;
	Str8Cache* str_cache;
};

struct WATL_ParseInfo {
	WATL_SliceLine lines;
};

void watl_parse_api(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts)
{
	assert(info != nullptr);
	slice_assert(tokens);
	assert(opts != nullptr);

	FArena a_lines = farena_init(opts->backing_lines);
	FArena a_nodes = farena_init(opts->backing_nodes);

	WATL_Line* line = farena_push(a_lines, WATL_Line);
	WATL_Node* curr = farena_push(a_nodes, WATL_Node); // Preemtively allocate a node for the line (may not be used)
	* curr    = (WATL_Node){0};
	line->ptr = curr;
	line->len = 0;
	info->lines.ptr = line;
	info->lines.len = 0;
	for (slice_iter(tokens, token))
	{
		switch (* token->code)
		{
			case WATL_Tok_CarriageReturn:
			case WATL_Tok_LineFeed: {
				WATL_Line* new_line = farena_push(a_lines, WATL_Line);
				line             = new_line;
				line->ptr        = curr;
				line->len        = 0;
				info->lines.len += 1;
			}
			continue;

			default:
			break;
		}

		Str8 tok_str = watl_tok_str8(tokens, token);
		* curr       = cache_str8( opts->str_cache, tok_str );
		curr         = farena_push(a_nodes, WATL_Node);
		* curr       = (WATL_Node){0};
		line->len   += 1;
		continue;
	}
}

#endif DEMO__WATL_PARSE_V1

WATL_ParseInfo watl__parse(WATL_SliceTok tokens, Opts__watl_parse* opts) { WATL_ParseInfo info; watl_parse_api(& info, tokens, opts); return info; }

#ifdef DEMO__WATL_PARSE_V1
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_64KB    read_mem = {0};
	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
	SliceByte    mem_toks = slicemem_alloc(MEGABYTES(8));
	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);

	SliceByte mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
	SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
	SliceByte mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
	Str8Cache str_cache       = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);

	SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
	SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
	WATL_ParseInfo parse_res  = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);

	// unnecessary in this case but if you want to explicitly:
	slicemem_free(mem_toks);
	return 0;
}
#endif

/*
Now we'll like to dump this WATL structure into a file.
To do so we'll need to generate the content string of the file dynamically based on the WATL's content.

We'll be utilizing a new construct called a string generator which be tied to all functionality for constructing strings.
*/

typedef struct Str8Gen Str8Gen;
struct Str8Gen {
	SliceByte backing; // For V1 the backing buffer is fixed size.
	UTF8* ptr;
	SSIZE len;
};

void    str8gen_init(Str8Gen* gen, SliceByte backing);
Str8Gen str8gen_make(              SliceByte backing);

void str8gen_append_str8(Str8Gen* gen, Str8 str);

void str8gen_init(Str8Gen* gen, SliceByte backing) {
	assert(gen != nullptr);
	gen->backing = backing;
	gen->ptr     = backing.ptr;
	gen->len     = 0;
}
Str8Gen str8gen_make(SliceByte backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; }

void str8gen_append_str8(Str8Gen* gen, Str8 str) {
	SSIZE  left = gen->backing.len - gen->len;
	assert(left >= str.len);
	SliceByte dest = {gen->ptr + gen->len, str.len};
	slice_copy(dest, str);
	return;
}

/*
In order to support appending formatted content via str8gen_apppend_fmt, we'll be using a substiution formatter utilizing string identifiation token pattern.

Where a format template string is provided with a 'id' wrapped in delimiters which will be the angle brackets: <id>
Example: This formatted string will have <id> subsituted into it.
*/
#pragma region fmt_vtoken

typedef struct FmtTokEntry FmtTokEntry;
struct FmtTokEntry {
	U64  key;
	Str8 value;
};
typedef struct SliceFmtTokEntry SliceFmtTokEntry;
struct SliceFmtTokEntry {
	FmtTokEntry* ptr;
	SSIZE        len;
};

#define slice_end(slice) (slice.ptr + slice.len)

/*
This is a token substiuting formatter using a array table lookup for tokens to substitute.
*/
Str8 fmt_vtoken_slice(SliceByte buffer, SliceFmtTokEntry tokens, Str8 fmt_template)
{
	slice_assert(buffer);
	slice_assert(tokens);
	slice_assert(fmt_template);

	UTF8* cursor_buffer    = buffer.ptr;
	SSIZE buffer_remaining = buffer.len;

	char curr_code = * fmt_template.ptr;

	UTF8* cursor_fmt = fmt_template.ptr;
	SSIZE left_fmt   = fmt_template.len;
	while (left_fmt && buffer_remaining)
	{
		// Forward until we hit the delimiter '<' or the template's contents are exhausted.
		while (curr_code && curr_code != '<' && cursor_fmt != slice_end(fmt_template))
		{
			* cursor_buffer = * cursor_fmt;
			++ cursor_buffer;
			++ cursor_fmt;
			-- buffer_remaining;
			-- left_fmt;

			curr_code = * cursor_fmt;
		}

		if (curr_code == '<')
		{
			UTF8* cursor_potential_token = cursor_fmt + 1;
			SSIZE potential_token_length = 0;

			while (* (cursor_potential_token + potential_token_length) != '>') {
				++ potential_token_length;
			}

			// Hashing the potential token and cross checking it with our token table
			U64   key   = 0; hash64_djb8(& key, (SliceByte){ cast(void*, cursor_fmt + 1), potential_token_length});
			Str8* value = nullptr;
			for (slice_iter(tokens, token))
			{
				// We do a linear iteration instead of a hash table lookup because the user should be never substiuting with more than 100 unqiue tokens..
				if (token->key == key) {
					value = & token->value;
					break;
				}
			}

			if (value)
			{
				SSIZE       left         = value->len;
				char const* cursor_value = value->ptr;

				while (left --)
				{
					* cursor_buffer = * cursor_value;
					++ cursor_buffer;
					++ cursor_value;
					-- buffer_remaining;
				}

				// Sync cursor format to after the processed token
				cursor_fmt = cursor_potential_token + potential_token_length + 1;
				curr_code  = * cursor_fmt;
				left_fmt  -= potential_token_length + 2; // The 2 here are the '<' & '>' delimiters being omitted.
				continue;
			}

			* cursor_buffer = * cursor_fmt;
			++ cursor_buffer;
			++ cursor_fmt;
			-- buffer_remaining;
			-- left_fmt;

			curr_code = * cursor_fmt;
		}
	}
	Str8   result = {buffer.ptr, buffer.len - buffer_remaining};
	return result;
}

typedef struct SliceStr8 SliceStr8;
struct SliceStr8 {
	Str8* ptr;
	SSIZE len;
};

#define local_persist static

Str8 fmt__vtoken(SliceByte backing_tbl, SliceByte backing_buf, Str8 fmt_template, SliceStr8* tokens)
{
	assert(tokens != nullptr);
	FArena           a_backing = farena_init(backing_tbl);
	SliceFmtTokEntry table     = {a_backing.start, 0};
	S32 left = tokens->len;
	for (slice_iter(*tokens, token)) {
		FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry); 
		* entry = (FmtTokEntry){0};
		hash64_djb8(& entry->key, (SliceByte){cast(void*, token->ptr), token->len});
		++ token;
		entry->value = * token;
		++ table.len;
	}
	Str8   result = fmt_vtoken_slice(backing_buf, table, fmt_template);
	return result;
}

// Expected to take a Str8 array of entries formatted as a 2D array of key-value pairs (Str8[length][2])
// The array will be tracked using a SliceStr8 structure.
#define fmt_vtoken(backing_tbl, backing_buf, fmt_template, ...)         \
fmt__vtoken(backing_tbl, backing_buf, lit(fmt_template),                \
	&(SliceStr8){                                                       \
		.ptr = (Str8[]){ __VA_ARGS__ },                                 \
		.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
	}                                                                   \
)
#pragma endregion fmt_vtoken

inline
void str8gen__append_fmt(Str8Gen* gen, Str8 fmt_template, SliceStr8* tokens)
{
	local_persist FMem_64KB tbl_backing = {0};
	SliceByte fmt_backing = {gen->ptr + gen->len, gen->backing.len - gen->len};
	Str8 appended = fmt__vtoken(fmem_slice(tbl_backing), fmt_backing, fmt_template, tokens);
	gen->len += appended.len;
	return;
}
#define str8gen_append_fmt(gen, fmt_template, ...)                      \
str8gen__append_fmt(& gen, lit(fmt_template),                           \
	&(SliceStr8){                                                       \
		.ptr = (Str8[]){ __VA_ARGS__ },                                 \
		.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
	}                                                                   \
)

/*
Define a mapping array:
Str8 mappings [][2] = {
	fmt_vtoken_entry("key", "value"),
	^^ Add entries as above ^^
}
*/
#define fmt_entry(key, value) lit(key), value

#ifdef DEMO__WATL_DUMP_PREREQ_V1
int main()
{
	local_persist FMem_64KB tbl_scratch;
	SliceByte fmt_scratch = slicemem_alloc(MEGABYTES(8));
	Str8 test_str = fmt_vtoken(fmem_slice(tbl_scratch), fmt_scratch, "Will this work? <maybe_sub>",
		fmt_entry("maybe_sub", lit("IT SUBST!!!"))
	);

	SliceByte scratchgen = slicemem_alloc(MEGABYTES(16));
	Str8Gen gen = str8gen_make(scratchgen);
	str8gen_append_fmt(gen, "Testing now with Str8Gen!! <maybe_sub>!",
		fmt_entry("maybe_sub", lit("lets fucking go!!!"))
	);
	return 0;
}
#endif

/*
We'll need to do some integer serialization for our dump listing's metrics.
*/

inline B32 char_is_upper(U8 c) { return('A' <= c && c <= 'Z'); }
inline U8  char_to_lower(U8 c) { if (char_is_upper(c)) { c += ('a' - 'A'); } return(c); }

inline
U8 integer_symbols(U8 value) {
	local_persist
	U8 lookup_table[16] = {
		'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F',
	};
	return lookup_table[value];
}

Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_group_separator)
{
	Str8 result = {.ptr = mem.ptr, .len = 0};
	Str8 prefix = {0};
	switch (radix)
	{
		case 16: { prefix = lit("0x"); } break;
		case 8:  { prefix = lit("0o"); } break;
		case 2:  { prefix = lit("0b"); } break;
	}
	
	U8 digit_group_size = 3;
	switch (radix)
	{
		default: break;
		case 2:
		case 8:
		case 16: { 
			digit_group_size = 4;
		}
		break;
	}

	U32 needed_leading_zeros = 0;
	{
		U32 needed_digits = 1;
		{
			U32 u32_reduce = num;
			for(;;)
			{
				u32_reduce /= radix;
				if (u32_reduce == 0) {
					break;
				}
				needed_digits += 1;
			}
		}

		    needed_leading_zeros = (min_digits > needed_digits) ? min_digits - needed_digits : 0;
		U32 needed_separators    = 0;
		if (digit_group_separator != 0)
		{
			needed_separators = (needed_digits + needed_leading_zeros) / digit_group_size;
			if (needed_separators > 0 && (needed_digits + needed_leading_zeros) % digit_group_size == 0) {
				needed_separators -= 1;
			}
		}
		
		result.len = prefix.len + needed_leading_zeros + needed_separators + needed_digits;
		assert(result.len <= mem.len);
	}
	
	// Fill Content
	{
		U32 num_reduce             = num;
		U32 digits_until_separator = digit_group_size;
		for (U32 idx = 0; idx < result.len; idx += 1)
		{
			SSIZE separator_pos = result.len - idx - 1;
			if (digits_until_separator == 0 && digit_group_separator != 0) {
				result.ptr[separator_pos] = digit_group_separator;
				digits_until_separator = digit_group_size + 1;
			}
			else {
				result.ptr[separator_pos] = char_to_lower(integer_symbols(num_reduce % radix));
				num_reduce /= radix;
			}
			digits_until_separator -= 1;
			if (num_reduce == 0) {
				break;
			}
		}
		for (U32 leading_0_idx = 0; leading_0_idx < needed_leading_zeros; leading_0_idx += 1) {
			result.ptr[prefix.len + leading_0_idx] = '0';
		}
	}
	// Fill Prefix
	if (prefix.len > 0) {
		slice_copy(result, prefix);
	}
	return result;
}

// Utilized for file_write_str8:
#if 0
BOOL WriteFile(
  [in]                HANDLE       hFile,
  [in]                LPCVOID      lpBuffer,
  [in]                DWORD        nNumberOfBytesToWrite,
  [out, optional]     LPDWORD      lpNumberOfBytesWritten,
  [in, out, optional] LPOVERLAPPED lpOverlapped
);
#endif
/*
Lastly: Writting to file using the Win API.
*/
void file_write_str8(Str8 path, Str8 content)
{
	slice_assert(path);
	slice_assert(content);

	FMem_16KB scratch = {0};
	char const* path_cstr = str8_to_cstr_capped(path, fmem_slice(scratch));

	HANDLE id_file = CreateFileA(
		path_cstr,
		GENERIC_WRITE,
		FILE_SHARE_READ,
		NULL,
		CREATE_ALWAYS,
		FILE_ATTRIBUTE_NORMAL,
		NULL
	);

	B32 open_failed = id_file == INVALID_HANDLE_VALUE;
	if (open_failed) {
		DWORD  error_code = GetLastError();
		assert(error_code != 0);
		return;
	}

	DWORD bytes_written = 0;
	B32 status = WriteFile(id_file
		, cast(void*, content.ptr)
		, cast(USIZE, content.len)
		, & bytes_written
		, NULL
	);
	assert(status != 0);
	assert(bytes_written == content.len);
}

/*
We now have what we need to create the structural listing dump for WATL's data structures.
*/

Str8 watl_dump_listing(SliceByte buffer, WATL_SliceLine lines)
{
	local_persist FMem_64KB scratch = {0};
	FArena sarena = farena_init(fmem_slice(scratch));

	Str8Gen result = str8gen_make(buffer);
	U32 line_num = 0;
	for (slice_iter(lines, line))
	{
	#define push_str8_u32()           (SliceByte){ farena__push(& sarena, size_of(UTF8), 64, lit("UTF8")), 64 }
	#define fmt_entry_u32(label, num) lit(label), str8_from_u32(push_str8_u32(), num, 10, 0, 0)
		++ line_num;
		str8gen_append_fmt(result, "Line <line_num> - Chunks <chunk_num>:\n"
		,	fmt_entry("line_num",  str8_from_u32(push_str8_u32(), line_num, 10, 0, 0))
		,	fmt_entry("chunk_num", str8_from_u32(push_str8_u32(), line->len, 10, 0, 0))
		);
		for (slice_iter(* line, chunk)) 
		{
			Str8 id;
			switch (* chunk->ptr)
			{
				case WATL_Tok_Space: id = lit("Space");   break;
				case WATL_Tok_Tab:   id = lit("Tab");     break;
				default:             id = lit("Visible"); break;
			}
			str8gen_append_fmt(result, "\t<id>(<size>): '<chunk>'\n"
			,	fmt_entry("id", id)
			,	fmt_entry_u32("size", chunk->len)
			,	fmt_entry("chunk", * chunk)
			);
		}
		farena_reset(& sarena);
	#undef push_str8_u32
	#undef fmt_entry_u32
	}
	return (Str8){ result.ptr, result.len };
}

#ifdef DEMO__WATL_DUMP_V1
int main()
{
	// This will limit for our V1 read to 64kb at most.
	FMem_128KB   read_mem = {0};
	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );

	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
	SliceByte    mem_toks = slicemem_alloc(MEGABYTES(16));
	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);

	SliceByte mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
	SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
	SliceByte mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
	Str8Cache str_cache       = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);

	SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
	SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
	WATL_ParseInfo parse_res  = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);

	SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
	Str8      listing     = watl_dump_listing(scratch_dmp, parse_res.lines);
	file_write_str8(lit("demo.str_cache.listing.txt"), listing);
	return 0;
}
#endif