1402 lines
		
	
	
		
			44 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1402 lines
		
	
	
		
			44 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
| A introduction to C11 with a str cache demo.
 | |
| Attempting to showcase better conventions and constructs in C; Discovered to me as of 2025 from scouring the internet.
 | |
| */
 | |
| 
 | |
| /*
 | |
| The below will be implemented within this single file.
 | |
| Because of this, definitions will be kept on a need-to-have basis to target only one vendor target and toolchain.
 | |
| We will not use nearly any libraries and will be targeting only Windows 11 x64 using MSVC.
 | |
| 
 | |
| Even so the constructs defined and their dependencies can be properly abstracted into a ergonomic library for multiple targets with enough time and pain.
 | |
| The difference is just more preprocess conditionals, and how far a library is trying to support a larger range of targets and their age discrpancy.
 | |
| The more minimal the less cruft.
 | |
| 
 | |
| Definitions are defined linearly on the file on-demand as needed. Since the file is to be read linearly.
 | |
| This will cause non-categorical organization so it will be more difficult to sift through if you wanted
 | |
| to see definitions related to a sepecific kind of data or operation (strings, memory, etc).
 | |
| */
 | |
| #if 0
 | |
| int main()
 | |
| {
 | |
| 	// This will limit for our V1 read to 64kb at most.
 | |
| 	FMem_128KB   read_mem = {0};
 | |
| 	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
 | |
| 
 | |
| 	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
 | |
| 	SliceByte    mem_toks = slicemem_alloc(MEGABYTES(16));
 | |
| 	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
 | |
| 
 | |
| 	SliceByte mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
 | |
| 	SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
 | |
| 	SliceByte mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
 | |
| 	Str8Cache str_cache       = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
 | |
| 
 | |
| 	SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
 | |
| 	SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
 | |
| 	WATL_ParseInfo parse_res  = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
 | |
| 
 | |
| 	SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
 | |
| 	Str8      listing     = watl_dump_listing(scratch_dmp, parse_res.lines);
 | |
| 	file_write_str8(lit("demo.str_cache.listing.txt"), listing);
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| // Demo selection
 | |
| // #define DEMO__STR_SLICE
 | |
| // #define DEMO__FILE_READ_CONTENTS_V1
 | |
| // #define DEMO__WATL_LEX_V1
 | |
| // #define DEMO__WATL_PARSE_V1
 | |
| // #define DEMO__WATL_DUMP_PREREQ_V1
 | |
| #define DEMO__WATL_DUMP_V1
 | |
| 
 | |
| /*
 | |
| The above makes use of the following core concepts to achieve its net result:
 | |
| * Slices
 | |
| * Arenas
 | |
| * Generic Runtime Allocator Interface
 | |
| * Hashing
 | |
| 
 | |
| Secondarily for the purposes of using the above sufficiently the following are also utilized:
 | |
| * Virtual Address Space
 | |
| * Read/Write Files
 | |
| * Lexing & Parsing
 | |
| * Debug printing
 | |
| */
 | |
| 
 | |
| /*
 | |
| First thing we'll problably want is a way to deal with text effectively.
 | |
| So we'll setup the the minimum for that when dealing with immutable constructs.
 | |
| */
 | |
| 
 | |
| // We'll need some minimum set of dependencies to adequately define the constructs.
 | |
| // ASSUMING MODERN MSVC TOOLCHAIN.
 | |
| 
 | |
| #include <intrin.h>
 | |
| #include <tmmintrin.h>
 | |
| #include <wmmintrin.h>
 | |
| 
 | |
| #include <assert.h>
 | |
| 
 | |
| typedef unsigned __int8  U8;
 | |
| typedef signed   __int8  S8;
 | |
| typedef unsigned __int16 U16;
 | |
| typedef signed   __int16 S16;
 | |
| typedef unsigned __int32 U32;
 | |
| typedef signed   __int32 S32;
 | |
| typedef unsigned __int64 U64;
 | |
| typedef signed   __int64 S64;
 | |
| 
 | |
| typedef size_t    USIZE;
 | |
| typedef ptrdiff_t SSIZE;
 | |
| 
 | |
| enum {
 | |
| 	false,
 | |
| 	true,
 | |
| 	true_overflow,
 | |
| };
 | |
| typedef S8  B8;
 | |
| typedef S16 B16;
 | |
| typedef S32 B32;
 | |
| 
 | |
| // Common macros we'll use throughout this.
 | |
| 
 | |
| #define assert_bounds(point, start, end) do { \
 | |
| 	USIZE pos_point = cast(USIZE, point);     \
 | |
| 	USIZE pos_start = cast(USIZE, start);     \
 | |
| 	USIZE pos_end   = cast(USIZE, end);       \
 | |
| 	assert(pos_start <= pos_point);           \
 | |
| 	assert(pos_point <= pos_end);             \
 | |
| } while(0)
 | |
| 
 | |
| // Functional style cast
 | |
| #define cast(type, data)  ((type)(data))
 | |
| #define pcast(type, data) * cast(type*, & (data))
 | |
| 
 | |
| #define nullptr cast(void*, 0)
 | |
| 
 | |
| #define glue_(A, B) A ## B
 | |
| #define glue(A, B)  glue_(A,B)
 | |
| 
 | |
| // Enforces size querying uses SSIZE type.
 | |
| #define size_of(data) cast(SSIZE, sizeof(data) )
 | |
| 
 | |
| #define stringify_(S) #S
 | |
| #define stringify(S)  stringify_(S)
 | |
| 
 | |
| /*
 | |
| The first construct we'll utilize is a String Slice.
 | |
| In modern programming with the memory sizes utilized, it is more ergonomic to track the length of strings with their pointer.
 | |
| Most strings are not stored in some immutable table tracked statically, performance loss in doing so is negligble on modern hardware constraints.
 | |
| */
 | |
| 
 | |
| // UTF-8 character encoding unit
 | |
| typedef unsigned char UTF8;
 | |
| 
 | |
| typedef struct Str8 Str8;
 | |
| struct Str8 {
 | |
| 	UTF8* ptr;
 | |
| 	SSIZE len;
 | |
| };
 | |
| 
 | |
| // String iterals in C include null-terminators, we aren't interested in preserving that.
 | |
| #define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 }
 | |
| 
 | |
| /*
 | |
| We'll want all of our textual process to operate with UTF-8 code pages:
 | |
| */
 | |
| #include <locale.h>
 | |
| inline void set_utf8_codepage() { setlocale(LC_ALL, ".UTF-8"); }
 | |
| 
 | |
| // For now this string can visualized using a debugger.
 | |
| #ifdef DEMO__STR_SLICE
 | |
| int main()
 | |
| {
 | |
| 	Str8 first = lit("Our first string as a slice");
 | |
| 	return 0;
 | |
| }
 | |
| #endif DEMO__STR_SLICE
 | |
| 
 | |
| /*
 | |
| We now want to be able to read a file. This will be a heavy rabbit-hole as we'll need to setup a basic file interface
 | |
| and related definitions for handling the memory.
 | |
| 
 | |
| For the purposes of the initial definition we'll introduced fixed-sized memory handling statically allocated onto the stack.
 | |
| */
 | |
| 
 | |
| /*
 | |
| First off we need to find out how to aquire the contents of a file on Windows.
 | |
| 
 | |
| We'll be wrapping the operation in a procedure called file_read_contents. We'll have it take a path and optional arguments (Opts__read_file_contents).
 | |
| It will return a result in a composite struct: FileOpResult; which may be expanded as needed in the future.
 | |
| */
 | |
| 
 | |
| typedef struct FileOpResult             FileOpResult;
 | |
| typedef struct Opts__read_file_contents Opts__read_file_contents;
 | |
| void         file_read_contents_api(FileOpResult* result, Str8 path, Opts__read_file_contents* opts);
 | |
| FileOpResult file__read_contents   (                      Str8 path, Opts__read_file_contents* opts);
 | |
| #define file_read_contents(path, ...) file__read_contents(path, & (Opts__read_file_contents){__VA_ARGS__} )
 | |
| 
 | |
| /*
 | |
| The file contents will be returned in bytes.
 | |
| To view or manage any slice of bytes we'll be utilizing a byte slice.
 | |
| */
 | |
| typedef struct SliceByte SliceByte;
 | |
| struct SliceByte {
 | |
| 	U8*   ptr;
 | |
| 	SSIZE len;
 | |
| };
 | |
| 
 | |
| /*
 | |
| The above is a pattern that can be provided so that whether or not the result is formatted and provided to the user via the stack is entirely optional.
 | |
| It also allows for default parameters to be defined conviently.
 | |
| */
 | |
| 
 | |
| // We'll utilize the ReadFile procedure within the WinAPI: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
 | |
| #define NOMINMAX
 | |
| #define WIN32_LEAN_AND_MEAN
 | |
| #define WIN32_MEAN_AND_LEAN
 | |
| #define VC_EXTRALEAN
 | |
| #include <windows.h>
 | |
| #include <windowsx.h>
 | |
| #include <timeapi.h>
 | |
| #include <tlhelp32.h>
 | |
| #include <Shlobj.h>
 | |
| #include <processthreadsapi.h>
 | |
| #pragma comment(lib, "user32")
 | |
| #pragma comment(lib, "winmm")
 | |
| #pragma comment(lib, "shell32")
 | |
| #pragma comment(lib, "advapi32")
 | |
| #pragma comment(lib, "rpcrt4")
 | |
| #pragma comment(lib, "shlwapi")
 | |
| #pragma comment(lib, "comctl32")
 | |
| #pragma comment(linker,"\"/manifestdependency:type='win32' name='Microsoft.Windows.Common-Controls' version='6.0.0.0' processorArchitecture='*' publicKeyToken='6595b64144ccf1df' language='*'\"") // this is required for loading correct comctl32 dll file
 | |
| #undef NOMINMAX
 | |
| #undef WIN32_LEAN_AND_MEAN
 | |
| #undef WIN32_MEAN_AND_LEAN
 | |
| #undef VC_EXTRALEAN
 | |
| #if 0
 | |
| BOOL ReadFile(
 | |
|   [in]                HANDLE       hFile,
 | |
|   [out]               LPVOID       lpBuffer,
 | |
|   [in]                DWORD        nNumberOfBytesToRead,
 | |
|   [out, optional]     LPDWORD      lpNumberOfBytesRead,
 | |
|   [in, out, optional] LPOVERLAPPED lpOverlapped
 | |
| );
 | |
| 
 | |
| // In order to read a file we need a handle to a valid filesystem entity to read from: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
 | |
| HANDLE CreateFileA(
 | |
|   [in]           LPCSTR                lpFileName,
 | |
|   [in]           DWORD                 dwDesiredAccess,
 | |
|   [in]           DWORD                 dwShareMode,
 | |
|   [in, optional] LPSECURITY_ATTRIBUTES lpSecurityAttributes,
 | |
|   [in]           DWORD                 dwCreationDisposition,
 | |
|   [in]           DWORD                 dwFlagsAndAttributes,
 | |
|   [in, optional] HANDLE                hTemplateFile
 | |
| );
 | |
| #endif
 | |
| 
 | |
| // We need to covert our string slice to a c-string for CreateFileA's path input.
 | |
| 
 | |
| #define KILOBTYES(n) (cast(USIZE, n) << 10)
 | |
| #define MEGABYTES(n) (cast(USIZE, n) << 20)
 | |
| #define GIGABYTES(n) (cast(USIZE, n) << 30)
 | |
| #define TERABYTES(n) (cast(USIZE, n) << 40)
 | |
| 
 | |
| /*
 | |
| We'll be defining here Fixed-sized memory blocks using typedefs on-demand
 | |
| 
 | |
| They will having the following format:
 | |
| typedef U8 FMem_<size>KB [ <U8 amount> ];
 | |
| */
 | |
| 
 | |
| typedef U8 FMem_16KB  [ KILOBTYES(16)  ];
 | |
| typedef U8 FMem_64KB  [ KILOBTYES(64)  ];
 | |
| typedef U8 FMem_128KB [ KILOBTYES(128) ];
 | |
| 
 | |
| #define typeof          __typeof__
 | |
| #define fmem_slice(mem) (SliceByte) { mem, size_of(mem) }
 | |
| 
 | |
| // We'll be using an intrinsic for copying memory:
 | |
| void* memory_copy(void* restrict dest, void const* restrict src, USIZE length)
 | |
| {
 | |
| 	if (dest == nullptr || src == nullptr || length == 0) {
 | |
| 		return nullptr;
 | |
| 	}
 | |
| 	// https://learn.microsoft.com/en-us/cpp/intrinsics/movsb?view=msvc-170
 | |
| 	__movsb((unsigned char*)dest, (const unsigned char*)src, length);
 | |
| 	return dest;
 | |
| }
 | |
| 
 | |
| // Often we'll want to check validity of a slice:
 | |
| #define slice_assert(slice) do {  \
 | |
| 	assert(slice.ptr != nullptr); \
 | |
| 	assert(slice.len > 0);        \
 | |
| } while(0)
 | |
| 
 | |
| void slice__copy(SliceByte dest, SSIZE dest_typewidth, SliceByte src, SSIZE src_typewidth) {
 | |
| 	assert(dest.len >= src.len);
 | |
| 	slice_assert(dest);
 | |
| 	slice_assert(src);
 | |
| 	memory_copy(dest.ptr, src.ptr, src.len);
 | |
| }
 | |
| #define slice_copy(dest,src) slice__copy(                                            \
 | |
| 	(SliceByte){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \
 | |
| ,	(SliceByte){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \
 | |
| )
 | |
| 
 | |
| // Assumes memory is zeroed.
 | |
| char* str8_to_cstr_capped(Str8 content, SliceByte mem) {
 | |
| 	assert(mem.len >= content.len);
 | |
| 	memory_copy(mem.ptr, content.ptr, content.len);
 | |
| 	return mem.ptr;
 | |
| }
 | |
| 
 | |
| // To support zeroing slices we'll utilize an intrinisc.
 | |
| B32 memory_zero(void* dest, USIZE length) {
 | |
| 	if (dest == nullptr || length <= 0) {
 | |
| 		return false;
 | |
| 	}
 | |
| 	__stosb((unsigned char*)dest, 0, length);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| void slice__zero(SliceByte mem, SSIZE typewidth) {
 | |
| 	slice_assert(mem);
 | |
| 	memory_zero(mem.ptr, mem.len);
 | |
| }
 | |
| #define slice_zero(slice) slice__zero((SliceByte){ cast(void*, (slice).ptr), (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr))
 | |
| 
 | |
| // Now for our "Version 1"
 | |
| 
 | |
| #if	defined(DEMO__FILE_READ_CONTENTS_V1) || \
 | |
| 	defined(DEMO__WATL_LEX_V1)           || \
 | |
| 	defined(DEMO__WATL_PARSE_V1)         || \
 | |
| 	defined(DEMO__WATL_DUMP_PREREQ_V1)   || \
 | |
| 	defined(DEMO__WATL_DUMP_V1)
 | |
| 
 | |
| struct FileOpResult
 | |
| {
 | |
| 	// For now we'll just have the content
 | |
| 	SliceByte content;
 | |
| };
 | |
| 
 | |
| struct Opts__read_file_contents
 | |
| {
 | |
| 	// For now we'll just have the backing memory provided as a slice.
 | |
| 	SliceByte backing;
 | |
| 	// And whether we should zero the backing.
 | |
| 	B32 zero_backing;
 | |
| };
 | |
| 
 | |
| void file_read_contents_api(FileOpResult* result, Str8 path, Opts__read_file_contents* opts)
 | |
| {
 | |
| 	assert(result != nullptr);
 | |
| 	assert(opts   != nullptr);
 | |
| 	slice_assert(path);
 | |
| 	// Backing is required at this point
 | |
| 	slice_assert(opts->backing);
 | |
| 
 | |
| 	// This will limit a path for V1 to be 16kb worth of codepoints.
 | |
| 	FMem_16KB   scratch   = {0};
 | |
| 	char const* path_cstr = str8_to_cstr_capped(path, fmem_slice(scratch) );
 | |
| 
 | |
| 	HANDLE id_file = CreateFileA(
 | |
| 		path_cstr,
 | |
| 		GENERIC_READ,
 | |
| 		FILE_SHARE_READ,
 | |
| 		NULL,
 | |
| 		OPEN_EXISTING,
 | |
| 		FILE_ATTRIBUTE_NORMAL,
 | |
| 		NULL
 | |
| 	);
 | |
| 	B32 open_failed = id_file == INVALID_HANDLE_VALUE;
 | |
| 	if (open_failed) {
 | |
| 		DWORD  error_code = GetLastError();
 | |
| 		assert(error_code != 0);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	LARGE_INTEGER file_size = {0};
 | |
| 	DWORD get_size_failed = ! GetFileSizeEx(id_file, & file_size);
 | |
| 	if   (get_size_failed) {
 | |
| 		assert(get_size_failed == INVALID_FILE_SIZE);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	// Because we are currently using fixed size memory, we need to confirm that we can hold this content.
 | |
| 	B32 not_enough_backing = opts->backing.len < file_size.QuadPart;
 | |
| 	if (not_enough_backing) {
 | |
| 		assert(not_enough_backing);
 | |
| 		// Otherwise we don't provide a result.
 | |
| 		result->content = (SliceByte){0};
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	if (opts->zero_backing) {
 | |
|           slice_zero(pcast(SliceByte, opts->backing));
 | |
| 	}
 | |
| 
 | |
| 	DWORD amount_read = 0;
 | |
| 	BOOL  read_result = ReadFile(
 | |
| 		id_file,
 | |
| 		opts->backing.ptr,
 | |
| 		file_size.QuadPart,
 | |
| 		& amount_read,
 | |
| 		nullptr
 | |
| 	);
 | |
| 	CloseHandle(id_file);
 | |
| 	
 | |
| 	B32 read_failed  = ! read_result;
 | |
| 	    read_failed |= amount_read != file_size.QuadPart;
 | |
| 	if (read_failed) {
 | |
| 		assert(read_failed);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	result->content.ptr = opts->backing.ptr;
 | |
| 	result->content.len = file_size.QuadPart;
 | |
| 	return;
 | |
| }
 | |
| 
 | |
| #endif DEMO__FILE_READ_CONTENTS_V1
 | |
| 
 | |
| // Version agnostic code:
 | |
| inline
 | |
| FileOpResult file__read_contents(Str8 path, Opts__read_file_contents* opts) {
 | |
| 	FileOpResult result;
 | |
| 	file_read_contents_api(& result, path, opts);
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| // And now to put it all together into a test run in the debugger. Content should be properly formatted if the code is correct.
 | |
| #ifdef DEMO__FILE_READ_CONTENTS_V1
 | |
| int main()
 | |
| {
 | |
| 	// This will limit for our V1 read to 64kb at most.
 | |
| 	FMem_64KB    read_mem = {0};
 | |
| 	FileOpResult res      = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
 | |
| 	return 0;
 | |
| }
 | |
| #endif DEMO__FILE_READ_CONTENTS_V1
 | |
| 
 | |
| /*
 | |
| Now that we have file reading done we need to be able to process the content.
 | |
| 
 | |
| First we want to do lexical analysis. So we'll create a token listing delimiting aspects of the text file relevant to us.
 | |
| For our data structure, we are going for a Whitespace-Aware Text Layout; where we'll track text and the formatting around them.
 | |
| 
 | |
| Just like with the read file contents operation, we'll define an interface to performing this analysis.
 | |
| It will be called watl_lex and take the SliceByte from the file as a Str8 slice and some Opts__watl_lex; 
 | |
| returning a WATL_LexInfo for providing user info on how the operation went.
 | |
| */
 | |
| 
 | |
| typedef struct WATL_LexInfo WATL_LexInfo;
 | |
| typedef struct Opts__watl_lex Opts__watl_lex;
 | |
| 
 | |
| void         watl_lex_api(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts);
 | |
| WATL_LexInfo watl__lex   (                    Str8 source, Opts__watl_lex* opts);
 | |
| #define watl_lex(source, ...) watl__lex(source, &(Opts__watl_lex){__VA_ARGS__})
 | |
| 
 | |
| /*
 | |
| Token identification will be done using a WATL_TokKind enumeration.
 | |
| The token type itself will be the id along with a ptr to its start of the slice. We can resolve the width of the token by its delta to the next token.
 | |
| If its the last token, then its delta is determined by its offset to the end of the Str8 slice.
 | |
| */
 | |
| 
 | |
| typedef U32 WATL_TokKind;
 | |
| enum WATL_TokKind {
 | |
| 	WATL_Tok_Space          = ' ',
 | |
| 	WATL_Tok_Tab            = '\t',
 | |
| 	WATL_Tok_CarriageReturn = '\r',
 | |
| 	WATL_Tok_LineFeed       = '\n',
 | |
| 	WATL_Tok_Text           = 0xFFFFFFFF,
 | |
| };
 | |
| 
 | |
| typedef struct WATL_Tok WATL_Tok;
 | |
| struct WATL_Tok {
 | |
| 	UTF8* code;
 | |
| };
 | |
| 
 | |
| typedef struct WATL_SliceTok WATL_SliceTok;
 | |
| struct WATL_SliceTok {
 | |
| 	WATL_Tok* ptr;
 | |
| 	SSIZE     len;
 | |
| };
 | |
| 
 | |
| Str8 watl_tok_str8(WATL_SliceTok toks, WATL_Tok* tok) {
 | |
| 	WATL_Tok* next = tok + 1;
 | |
| 	USIZE start    = cast(USIZE, toks.ptr->code);
 | |
| 	USIZE curr     = cast(USIZE, tok->code);
 | |
| 	USIZE offset   = curr - start;
 | |
| 	SSIZE left     = toks.len - offset;
 | |
| 	B32   last_tok = (start + toks.len) == (curr + left);
 | |
| 	Str8  text     = {0};
 | |
| 	text.ptr = tok->code;
 | |
| 	text.len = next > (toks.ptr + toks.len) ? 
 | |
| 		left
 | |
| 	// Othwerise its the last minus the curr.
 | |
| 	:	cast(SSIZE, next->code - tok->code);
 | |
| 	return text;
 | |
| }
 | |
| 
 | |
| /*
 | |
| Tokens are allocated to a backing slice of memory defined by the user. This pool of memory will ideally not be constrained to a fixed size on the stack.
 | |
| So for V1 we'll allocate 10 megs of heap memory to act as a pool for the tokens. We'll keep track of how much for the pool we used via a new memory tracking construct:
 | |
| The fixed-sized arena.
 | |
| 
 | |
| A basic fixed size arena only has three components which can vary depending on the convention the user perfers.
 | |
| In our case we'll track its capacity, its starting address, and how much has been comitted..
 | |
| */
 | |
| 
 | |
| // We use this in-conjunction with Areans to save a point thats safe to rewind to by the user.
 | |
| typedef struct ArenaSP ArenaSP;
 | |
| struct ArenaSP { void* ptr; };
 | |
| 
 | |
| #pragma region FArena
 | |
| typedef struct FArena FArena;
 | |
| struct FArena {
 | |
| 	void* start;
 | |
| 	USIZE capacity;
 | |
| 	USIZE used;
 | |
| };
 | |
| void     api_farena_init(FArena* arena, SliceByte mem);
 | |
| FArena   farena_init    (SliceByte mem);
 | |
| void*    farena__push   (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename);
 | |
| void     farena_reset   (FArena* arena);
 | |
| void     farena_rewind  (FArena* arena, ArenaSP savepoint);
 | |
| ArenaSP  farena_save    (FArena  arena);
 | |
| 
 | |
| #define  farena_push(arena, type)               cast(type*,      farena__push(& arena, size_of(type), 1,      lit(stringify(type)))         )
 | |
| #define  farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount }
 | |
| 
 | |
| inline
 | |
| void api_farena_init(FArena* arena, SliceByte mem) {
 | |
| 	arena->start    = mem.ptr;
 | |
| 	arena->capacity = mem.len;
 | |
| 	arena->used     = 0;
 | |
| }
 | |
| inline FArena farena_init(SliceByte mem) { FArena arena; api_farena_init(& arena, mem); return arena; }
 | |
| 
 | |
| inline
 | |
| void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) {
 | |
| 	USIZE to_commit = type_size * amount;
 | |
| 	USIZE unused    = arena->capacity - arena->used;
 | |
| 	assert(to_commit <= unused);
 | |
| 	void*  ptr   = cast(void*, cast(USIZE, arena->start) + arena->used);
 | |
| 	arena->used += to_commit;
 | |
| 	return ptr;
 | |
| }
 | |
| 
 | |
| inline
 | |
| void farena_rewind(FArena* arena, ArenaSP savepoint) {
 | |
| 	void* end = cast(void*, cast(USIZE, arena->start) + arena->used);
 | |
| 	assert_bounds(savepoint.ptr, arena->start, end);
 | |
| 	arena->used -= cast(USIZE, savepoint.ptr) - cast(USIZE, arena->start);
 | |
| }
 | |
| 
 | |
| inline void    farena_reset(FArena* arena) { arena->used = 0; }
 | |
| inline ArenaSP farena_save (FArena  arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; } 
 | |
| #pragma endregion FArena
 | |
| 
 | |
| #if defined(DEMO__WATL_LEX_V1)         || \
 | |
| 	defined(DEMO__WATL_PARSE_V1)       || \
 | |
| 	defined(DEMO__WATL_DUMP_PREREQ_V1) || \
 | |
| 	defined(DEMO__WATL_DUMP_V1)
 | |
| 
 | |
| struct WATL_LexInfo {
 | |
| 	// For now just the tokens
 | |
| 	WATL_SliceTok tokens;
 | |
| };
 | |
| 
 | |
| struct Opts__watl_lex {
 | |
| /*
 | |
| 	For this operation we'll enforce that the arena must linearly allocate each token, forming a strictly adjacent sent of elements in an array.
 | |
| 	This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, 
 | |
| 	or the tokens are sparely cached.
 | |
| 	Where their position in their originating string is not preserved. In this case we're keeping it simple.
 | |
| 	Tokens are in the same block of memory and they don't use a string cache.
 | |
| */
 | |
| 	SliceByte pool_toks;
 | |
| };
 | |
| 
 | |
| // We are assuming everything is utf8-ascii.
 | |
| void watl_lex_api(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts)
 | |
| {
 | |
| 	assert(info != nullptr);
 | |
| 	slice_assert(source);
 | |
| 	assert(opts != nullptr);
 | |
| 
 | |
| 	FArena arena = farena_init(opts->pool_toks);
 | |
| 
 | |
| 	UTF8* end    = source.ptr + source.len;
 | |
| 	UTF8* cursor = source.ptr;
 | |
| 	UTF8* prev   = source.ptr;
 | |
| 	UTF8  code   = * cursor;
 | |
| 
 | |
| 	B32       was_formatting = true;
 | |
| 	WATL_Tok* tok            = nullptr;
 | |
| 	for (; cursor < end;)
 | |
| 	{
 | |
| 		switch (code)
 | |
| 		{
 | |
| 			case WATL_Tok_Space:
 | |
| 			case WATL_Tok_Tab:
 | |
| 			{
 | |
| 				if (* prev != * cursor) {
 | |
| 					tok            = farena_push(arena, WATL_Tok);
 | |
| 					tok->code      = cursor;
 | |
| 					was_formatting = true;
 | |
| 				}
 | |
| 				cursor += 1;
 | |
| 			}
 | |
| 			break;
 | |
| 
 | |
| 			case WATL_Tok_LineFeed: {
 | |
| 					tok            = farena_push(arena, WATL_Tok);
 | |
| 					tok->code      = cursor;
 | |
| 					cursor        += 1;
 | |
| 					was_formatting = true; 
 | |
| 			}
 | |
| 			break;
 | |
| 
 | |
| 			// Assuming what comes after is line feed.
 | |
| 			case WATL_Tok_CarriageReturn: {
 | |
| 					tok            = farena_push(arena, WATL_Tok);
 | |
| 					tok->code      = cursor;
 | |
| 					cursor        += 2;
 | |
| 					was_formatting = true; 
 | |
| 			}
 | |
| 			break;
 | |
| 
 | |
| 			default:
 | |
| 			{
 | |
| 				if (was_formatting) {
 | |
| 					tok            = farena_push(arena, WATL_Tok);
 | |
| 					tok->code      = cursor;
 | |
| 					was_formatting = false;
 | |
| 				}
 | |
| 				cursor += 1;
 | |
| 			}
 | |
| 			break;
 | |
| 		}
 | |
| 		prev =  cursor - 1;
 | |
| 		code = * cursor;
 | |
| 	}
 | |
| 	info->tokens.ptr = arena.start;
 | |
| 	info->tokens.len = arena.used / size_of(WATL_Tok*);
 | |
| }
 | |
| 
 | |
| #endif DEMO__WATL_LEX_V1
 | |
| 
 | |
| inline
 | |
| WATL_LexInfo watl__lex(Str8 source, Opts__watl_lex* opts) {
 | |
| 	WATL_LexInfo result = {0};
 | |
| 	watl_lex_api(& result, source, opts);
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| /*
 | |
| To allocate onto the heap we'll make a basic slicemem_malloc to allocate, we'll make a corresponding slicemem_free aswell.
 | |
| However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination.
 | |
| */
 | |
| 
 | |
| SliceByte slicemem_alloc(USIZE amount)
 | |
| {
 | |
| 	assert(amount > KILOBTYES(4));
 | |
| 	void* result = malloc(amount);
 | |
| 	assert(result != nullptr);
 | |
| 	SliceByte mem = {
 | |
| 		.ptr = result,
 | |
| 		.len = amount
 | |
| 	};
 | |
| 	return mem;
 | |
| }
 | |
| void slicemem_free(SliceByte mem) {
 | |
| 	free(mem.ptr);
 | |
| }
 | |
| 
 | |
| #ifdef DEMO__WATL_LEX_V1
 | |
| int main()
 | |
| {
 | |
| 	// This will limit for our V1 read to 64kb at most.
 | |
| 	FMem_64KB    read_mem = {0};
 | |
| 	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
 | |
| 
 | |
| 	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
 | |
| 	SliceByte     mem_toks = slicemem_alloc(MEGABYTES(8));
 | |
| 	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
 | |
| 	// unnecessary in this case but if you want to explicitly:
 | |
| 	slicemem_free(mem_toks);
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /*
 | |
| Next we'll parse these tokens into a rudimentary WATL Abstract Syntax Tree.
 | |
| * The tree will be top-level organized by lines consisting of linked slices of visble and non-visible tokens.
 | |
| * Just as with the the lexical analysis, lines and nodes will be linearly allocated adjacent to each other. This allows us to utilize array operations.
 | |
| */
 | |
| 
 | |
| typedef struct WATL_ParseInfo WATL_ParseInfo;
 | |
| typedef struct Opts__watl_parse Opts__watl_parse;
 | |
| void           watl_parse_api(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts);
 | |
| WATL_ParseInfo watl__parse   (                      WATL_SliceTok tokens, Opts__watl_parse* opts);
 | |
| #define watl_parse(tokens, ...) watl__parse(tokens, & (Opts__watl_parse) {__VA_ARGS__})
 | |
| 
 | |
| /*
 | |
| For the sake of the exercise, we'll be eliminating the association with the file's strings and we'll need to instead cache them.
 | |
| */
 | |
| #pragma region Str8Cache
 | |
| typedef struct Str8Cache Str8Cache;
 | |
| void      str8cache_init_api(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);
 | |
| Str8Cache str8cache_init    (                  SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);
 | |
| 
 | |
| // A cache like this relies on tabling string entires utiliszing an index derived from a hashed ID.
 | |
| // For these strings we'll be using a hash called djb8:
 | |
| 
 | |
| // Introducing a slice iterator:
 | |
| #define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter
 | |
| 
 | |
| inline
 | |
| void hash64_djb8(U64* hash, SliceByte bytes) {
 | |
| 	for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) {
 | |
| 		*hash = (((*hash) << 8) + (*hash)) + (*elem);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // For a library or codebase its recommended to setup a metaprogram to generate hash utilizing containers
 | |
| // Or other containers that cannot be sufficiently lifted to general runtime paths without losing ergonomic debug type info or type-constraint enforcements.
 | |
| // Unlike with the template markup C++ uses, you can strike a balance between how many definitions are redundantly made or optimized for collapsing to a general path
 | |
| // based on target optimization and debugability.
 | |
| 
 | |
| // For this V1 example, we'll be hand-rolling a fixed sized table with excess slot chaining for colliding slots.
 | |
| // Its a relatively simple implementation to hand-roll. These things tend to become unyeilding with more advanced variants.
 | |
| 
 | |
| typedef struct Str8Cache_Slot Str8Cache_Slot;
 | |
| struct Str8Cache_Slot {
 | |
| 	Str8Cache_Slot* prev;
 | |
| 	Str8Cache_Slot* next;
 | |
| 	Str8 value;
 | |
| 	U64  key;
 | |
| 	B32  occupied;
 | |
| };
 | |
| 
 | |
| typedef struct Str8Cache_SliceSlot Str8Cache_SliceSlot;
 | |
| struct Str8Cache_SliceSlot {
 | |
| 	Str8Cache_Slot* ptr;
 | |
| 	SSIZE           len;
 | |
| };
 | |
| 
 | |
| struct Str8Cache {
 | |
| 	FArena               a_str;
 | |
| 	Str8Cache_SliceSlot  pool;
 | |
| 	Str8Cache_Slot*      vacant;
 | |
| 	Str8Cache_SliceSlot  table;
 | |
| };
 | |
| 
 | |
| Str8Cache str8cache_init(SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) { Str8Cache cache; str8cache_init_api(& cache, mem_strs, mem_slots, mem_table); return cache; }
 | |
| inline
 | |
| void str8cache_init_api(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) {
 | |
| 	assert(cache != nullptr);
 | |
| 	slice_assert(mem_strs);
 | |
| 	slice_assert(mem_slots);
 | |
| 	slice_assert(mem_table);
 | |
| 	cache->a_str = farena_init(mem_strs);
 | |
| 	cache->pool  = (Str8Cache_SliceSlot){ cast(void*, mem_slots.ptr), mem_slots.len / size_of(Str8Cache_Slot)};
 | |
| 	cache->table = (Str8Cache_SliceSlot){ cast(void*, mem_table.ptr), mem_table.len / size_of(Str8Cache_Slot)};
 | |
| 	slice_zero(cache->pool);
 | |
| 	slice_zero(cache->table);
 | |
| }
 | |
| 
 | |
| void str8cache_clear(Str8Cache* cache)
 | |
| {
 | |
| 	for (slice_iter(cache->table, slot))
 | |
| 	{
 | |
| 		if (slot == nullptr) {
 | |
| 			continue;
 | |
| 		}
 | |
| 		for (Str8Cache_Slot* probe_slot = slot->next; probe_slot != nullptr; probe_slot = probe_slot->next) {
 | |
| 			slot->occupied = false;
 | |
| 		}
 | |
| 		slot->occupied = false;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // We don't introduce a remove option because we're not tracking fixed sized entities.
 | |
| // Strings take up non-determistic sizes of their backing arena. So the only thing that can be done with the cache is wiping it and recaching all strings.
 | |
| 
 | |
| /*
 | |
| When storing a hash of a slot, we can almost never utilize the full width of a key,
 | |
| so we must truncate the key via module to get a "good enough" unique ID to place in the table.
 | |
| */
 | |
| inline
 | |
| U64 str8cache_slot_id(Str8Cache cache, U64 key) {
 | |
| 	U64 hash_index = key % cast(U64, cache.table.len);
 | |
| 	return hash_index;
 | |
| }
 | |
| 
 | |
| Str8* str8cache_get(Str8Cache cache, U64 key)
 | |
| {
 | |
| 	U64             hash_index   = str8cache_slot_id(cache, key);
 | |
| 	Str8Cache_Slot* surface_slot = & cache.table.ptr[hash_index];
 | |
| 	if (surface_slot == nullptr) { 
 | |
| 		return nullptr;
 | |
| 	}
 | |
| 	if (surface_slot->occupied && surface_slot->key == key) {
 | |
| 		return & surface_slot->value;
 | |
| 	}
 | |
| 	for (Str8Cache_Slot* slot = surface_slot->next; slot != nullptr; slot = slot->next)
 | |
| 	{
 | |
| 		if (slot->occupied && slot->key == key) {
 | |
| 			return & slot->value;
 | |
| 		}
 | |
| 	}
 | |
| 	return nullptr;
 | |
| }
 | |
| 
 | |
| Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
 | |
| {
 | |
| 	U64             hash_index   = str8cache_slot_id(*cache, key);
 | |
| 	Str8Cache_Slot* surface_slot = & cache->table.ptr[hash_index];
 | |
| 	if (! surface_slot->occupied || surface_slot->key == key)
 | |
| 	{
 | |
| 		if (value.ptr != surface_slot->value.ptr) {
 | |
| 			SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
 | |
| 			slice_copy(mem, value);
 | |
| 			surface_slot->value = pcast(Str8, mem);
 | |
| 		}
 | |
| 		surface_slot->key      = key;
 | |
| 		surface_slot->occupied = true;
 | |
| 		return & surface_slot->value;
 | |
| 	}
 | |
| 	Str8Cache_Slot* slot = surface_slot;
 | |
| 	for (;; slot = slot->next)
 | |
| 	{
 | |
| 		if (slot->next == nullptr)
 | |
| 		{
 | |
| 			// We had a collision, we need to grab a vacant slot from the pool and utilize it instead.
 | |
| 			slot->next       = cache->vacant;
 | |
| 			* slot->next     = (Str8Cache_Slot){0};
 | |
| 			slot->next->prev = slot;
 | |
| 
 | |
| 			Str8Cache_Slot* next_vacant = cache->vacant + 1;
 | |
| 			assert(next_vacant < cache->pool.ptr + cache->pool.len );
 | |
| 			// If the above fails we ran out of extra slots.
 | |
| 			cache->vacant = cache->vacant + 1;
 | |
| 		}
 | |
| 		if ( ! slot->next->occupied || slot->next->key == key)
 | |
| 		{
 | |
| 			if (value.ptr != slot->next->value.ptr) {
 | |
| 				SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
 | |
| 				slice_copy(mem, value);
 | |
| 				slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)};
 | |
| 			}
 | |
| 			slot->next->value    = value;
 | |
| 			slot->next->key      = key;
 | |
| 			slot->next->occupied = true;
 | |
| 			return & slot->next->value;
 | |
| 		}
 | |
| 		// We keep traversing till we find a match or we find a vacancy for this list in the table.
 | |
| 		// Make sure to tune the size of the table so it does this less! 
 | |
| 		// Note: Tables sized by prime values collide less aswell. 
 | |
| 		// You can use a closest prime number lookup table to derive what length to expose to the cache's table for hash ID resolution.
 | |
| 	}
 | |
| 	return nullptr;
 | |
| }
 | |
| #pragma endregion Str8Cache
 | |
| 
 | |
| // Finally our abstracted cache interface:
 | |
| Str8 cache_str8(Str8Cache* cache, Str8 str)
 | |
| {
 | |
| 	U64 key = 0; hash64_djb8(& key, pcast(SliceByte, str));
 | |
| 	Str8* result = str8cache_set(cache, key, str);
 | |
| 	assert(result != nullptr);
 | |
| 	return * result;
 | |
| }
 | |
| 
 | |
| typedef Str8 WATL_Node;
 | |
| #if 0
 | |
| typedef struct WATL_Node WATL_Node;
 | |
| struct WATL_Node {
 | |
| 	WATL_NodeKind kind;
 | |
| 	Str8 entry;
 | |
| };
 | |
| #endif
 | |
| 
 | |
| typedef struct WATL_Line WATL_Line;
 | |
| struct WATL_Line {
 | |
| 	WATL_Node* ptr;
 | |
| 	SSIZE      len;
 | |
| };
 | |
| 
 | |
| typedef struct WATL_SliceLine WATL_SliceLine;
 | |
| struct WATL_SliceLine {
 | |
| 	WATL_Line* ptr;
 | |
| 	SSIZE      len;
 | |
| };
 | |
| 
 | |
| #if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_PREREQ_V1) || defined(DEMO__WATL_DUMP_V1)
 | |
| 
 | |
| struct Opts__watl_parse {
 | |
| 	SliceByte  backing_nodes;
 | |
| 	SliceByte  backing_lines;
 | |
| 	Str8Cache* str_cache;
 | |
| };
 | |
| 
 | |
| struct WATL_ParseInfo {
 | |
| 	WATL_SliceLine lines;
 | |
| };
 | |
| 
 | |
| void watl_parse_api(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts)
 | |
| {
 | |
| 	assert(info != nullptr);
 | |
| 	slice_assert(tokens);
 | |
| 	assert(opts != nullptr);
 | |
| 
 | |
| 	FArena a_lines = farena_init(opts->backing_lines);
 | |
| 	FArena a_nodes = farena_init(opts->backing_nodes);
 | |
| 
 | |
| 	WATL_Line* line = farena_push(a_lines, WATL_Line);
 | |
| 	WATL_Node* curr = farena_push(a_nodes, WATL_Node); // Preemtively allocate a node for the line (may not be used)
 | |
| 	* curr    = (WATL_Node){0};
 | |
| 	line->ptr = curr;
 | |
| 	line->len = 0;
 | |
| 	info->lines.ptr = line;
 | |
| 	info->lines.len = 0;
 | |
| 	for (slice_iter(tokens, token))
 | |
| 	{
 | |
| 		switch (* token->code)
 | |
| 		{
 | |
| 			case WATL_Tok_CarriageReturn:
 | |
| 			case WATL_Tok_LineFeed: {
 | |
| 				WATL_Line* new_line = farena_push(a_lines, WATL_Line);
 | |
| 				line             = new_line;
 | |
| 				line->ptr        = curr;
 | |
| 				line->len        = 0;
 | |
| 				info->lines.len += 1;
 | |
| 			}
 | |
| 			continue;
 | |
| 
 | |
| 			default:
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		Str8 tok_str = watl_tok_str8(tokens, token);
 | |
| 		* curr       = cache_str8( opts->str_cache, tok_str );
 | |
| 		curr         = farena_push(a_nodes, WATL_Node);
 | |
| 		* curr       = (WATL_Node){0};
 | |
| 		line->len   += 1;
 | |
| 		continue;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| #endif DEMO__WATL_PARSE_V1
 | |
| 
 | |
| WATL_ParseInfo watl__parse(WATL_SliceTok tokens, Opts__watl_parse* opts) { WATL_ParseInfo info; watl_parse_api(& info, tokens, opts); return info; }
 | |
| 
 | |
| #ifdef DEMO__WATL_PARSE_V1
 | |
| int main()
 | |
| {
 | |
| 	// This will limit for our V1 read to 64kb at most.
 | |
| 	FMem_64KB    read_mem = {0};
 | |
| 	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
 | |
| 
 | |
| 	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
 | |
| 	SliceByte    mem_toks = slicemem_alloc(MEGABYTES(8));
 | |
| 	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
 | |
| 
 | |
| 	SliceByte mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
 | |
| 	SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
 | |
| 	SliceByte mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
 | |
| 	Str8Cache str_cache       = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
 | |
| 
 | |
| 	SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
 | |
| 	SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
 | |
| 	WATL_ParseInfo parse_res  = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
 | |
| 
 | |
| 	// unnecessary in this case but if you want to explicitly:
 | |
| 	slicemem_free(mem_toks);
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /*
 | |
| Now we'll like to dump this WATL structure into a file.
 | |
| To do so we'll need to generate the content string of the file dynamically based on the WATL's content.
 | |
| 
 | |
| We'll be utilizing a new construct called a string generator which be tied to all functionality for constructing strings.
 | |
| */
 | |
| 
 | |
| typedef struct Str8Gen Str8Gen;
 | |
| struct Str8Gen {
 | |
| 	SliceByte backing; // For V1 the backing buffer is fixed size.
 | |
| 	UTF8* ptr;
 | |
| 	SSIZE len;
 | |
| };
 | |
| 
 | |
| void    str8gen_init(Str8Gen* gen, SliceByte backing);
 | |
| Str8Gen str8gen_make(              SliceByte backing);
 | |
| 
 | |
| void str8gen_append_str8(Str8Gen* gen, Str8 str);
 | |
| 
 | |
| void str8gen_init(Str8Gen* gen, SliceByte backing) {
 | |
| 	assert(gen != nullptr);
 | |
| 	gen->backing = backing;
 | |
| 	gen->ptr     = backing.ptr;
 | |
| 	gen->len     = 0;
 | |
| }
 | |
| Str8Gen str8gen_make(SliceByte backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; }
 | |
| 
 | |
| void str8gen_append_str8(Str8Gen* gen, Str8 str) {
 | |
| 	SSIZE  left = gen->backing.len - gen->len;
 | |
| 	assert(left >= str.len);
 | |
| 	SliceByte dest = {gen->ptr + gen->len, str.len};
 | |
| 	slice_copy(dest, str);
 | |
| 	return;
 | |
| }
 | |
| 
 | |
| /*
 | |
| In order to support appending formatted content via str8gen_apppend_fmt, we'll be using a substiution formatter utilizing string identifiation token pattern.
 | |
| 
 | |
| Where a format template string is provided with a 'id' wrapped in delimiters which will be the angle brackets: <id>
 | |
| Example: This formatted string will have <id> subsituted into it.
 | |
| */
 | |
| #pragma region fmt_vtoken
 | |
| 
 | |
| typedef struct FmtTokEntry FmtTokEntry;
 | |
| struct FmtTokEntry {
 | |
| 	U64  key;
 | |
| 	Str8 value;
 | |
| };
 | |
| typedef struct SliceFmtTokEntry SliceFmtTokEntry;
 | |
| struct SliceFmtTokEntry {
 | |
| 	FmtTokEntry* ptr;
 | |
| 	SSIZE        len;
 | |
| };
 | |
| 
 | |
| #define slice_end(slice) (slice.ptr + slice.len)
 | |
| 
 | |
| /*
 | |
| This is a token substiuting formatter using a array table lookup for tokens to substitute.
 | |
| */
 | |
| Str8 fmt_vtoken_slice(SliceByte buffer, SliceFmtTokEntry tokens, Str8 fmt_template)
 | |
| {
 | |
| 	slice_assert(buffer);
 | |
| 	slice_assert(tokens);
 | |
| 	slice_assert(fmt_template);
 | |
| 
 | |
| 	UTF8* cursor_buffer    = buffer.ptr;
 | |
| 	SSIZE buffer_remaining = buffer.len;
 | |
| 
 | |
| 	char curr_code = * fmt_template.ptr;
 | |
| 
 | |
| 	UTF8* cursor_fmt = fmt_template.ptr;
 | |
| 	SSIZE left_fmt   = fmt_template.len;
 | |
| 	while (left_fmt && buffer_remaining)
 | |
| 	{
 | |
| 		// Forward until we hit the delimiter '<' or the template's contents are exhausted.
 | |
| 		while (curr_code && curr_code != '<' && cursor_fmt != slice_end(fmt_template))
 | |
| 		{
 | |
| 			* cursor_buffer = * cursor_fmt;
 | |
| 			++ cursor_buffer;
 | |
| 			++ cursor_fmt;
 | |
| 			-- buffer_remaining;
 | |
| 			-- left_fmt;
 | |
| 
 | |
| 			curr_code = * cursor_fmt;
 | |
| 		}
 | |
| 
 | |
| 		if (curr_code == '<')
 | |
| 		{
 | |
| 			UTF8* cursor_potential_token = cursor_fmt + 1;
 | |
| 			SSIZE potential_token_length = 0;
 | |
| 
 | |
| 			while (* (cursor_potential_token + potential_token_length) != '>') {
 | |
| 				++ potential_token_length;
 | |
| 			}
 | |
| 
 | |
| 			// Hashing the potential token and cross checking it with our token table
 | |
| 			U64   key   = 0; hash64_djb8(& key, (SliceByte){ cast(void*, cursor_fmt + 1), potential_token_length});
 | |
| 			Str8* value = nullptr;
 | |
| 			for (slice_iter(tokens, token))
 | |
| 			{
 | |
| 				// We do a linear iteration instead of a hash table lookup because the user should be never substiuting with more than 100 unqiue tokens..
 | |
| 				if (token->key == key) {
 | |
| 					value = & token->value;
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			if (value)
 | |
| 			{
 | |
| 				SSIZE       left         = value->len;
 | |
| 				char const* cursor_value = value->ptr;
 | |
| 
 | |
| 				while (left --)
 | |
| 				{
 | |
| 					* cursor_buffer = * cursor_value;
 | |
| 					++ cursor_buffer;
 | |
| 					++ cursor_value;
 | |
| 					-- buffer_remaining;
 | |
| 				}
 | |
| 
 | |
| 				// Sync cursor format to after the processed token
 | |
| 				cursor_fmt = cursor_potential_token + potential_token_length + 1;
 | |
| 				curr_code  = * cursor_fmt;
 | |
| 				left_fmt  -= potential_token_length + 2; // The 2 here are the '<' & '>' delimiters being omitted.
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			* cursor_buffer = * cursor_fmt;
 | |
| 			++ cursor_buffer;
 | |
| 			++ cursor_fmt;
 | |
| 			-- buffer_remaining;
 | |
| 			-- left_fmt;
 | |
| 
 | |
| 			curr_code = * cursor_fmt;
 | |
| 		}
 | |
| 	}
 | |
| 	Str8   result = {buffer.ptr, buffer.len - buffer_remaining};
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| typedef struct SliceStr8 SliceStr8;
 | |
| struct SliceStr8 {
 | |
| 	Str8* ptr;
 | |
| 	SSIZE len;
 | |
| };
 | |
| 
 | |
| #define local_persist static
 | |
| 
 | |
| Str8 fmt__vtoken(SliceByte backing_tbl, SliceByte backing_buf, Str8 fmt_template, SliceStr8* tokens)
 | |
| {
 | |
| 	assert(tokens != nullptr);
 | |
| 	FArena           a_backing = farena_init(backing_tbl);
 | |
| 	SliceFmtTokEntry table     = {a_backing.start, 0};
 | |
| 	S32 left = tokens->len;
 | |
| 	for (slice_iter(*tokens, token)) {
 | |
| 		FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry); 
 | |
| 		* entry = (FmtTokEntry){0};
 | |
| 		hash64_djb8(& entry->key, (SliceByte){cast(void*, token->ptr), token->len});
 | |
| 		++ token;
 | |
| 		entry->value = * token;
 | |
| 		++ table.len;
 | |
| 	}
 | |
| 	Str8   result = fmt_vtoken_slice(backing_buf, table, fmt_template);
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| // Expected to take a Str8 array of entries formatted as a 2D array of key-value pairs (Str8[length][2])
 | |
| // The array will be tracked using a SliceStr8 structure.
 | |
| #define fmt_vtoken(backing_tbl, backing_buf, fmt_template, ...)         \
 | |
| fmt__vtoken(backing_tbl, backing_buf, lit(fmt_template),                \
 | |
| 	&(SliceStr8){                                                       \
 | |
| 		.ptr = (Str8[]){ __VA_ARGS__ },                                 \
 | |
| 		.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
 | |
| 	}                                                                   \
 | |
| )
 | |
| #pragma endregion fmt_vtoken
 | |
| 
 | |
| inline
 | |
| void str8gen__append_fmt(Str8Gen* gen, Str8 fmt_template, SliceStr8* tokens)
 | |
| {
 | |
| 	local_persist FMem_64KB tbl_backing = {0};
 | |
| 	SliceByte fmt_backing = {gen->ptr + gen->len, gen->backing.len - gen->len};
 | |
| 	Str8 appended = fmt__vtoken(fmem_slice(tbl_backing), fmt_backing, fmt_template, tokens);
 | |
| 	gen->len += appended.len;
 | |
| 	return;
 | |
| }
 | |
| #define str8gen_append_fmt(gen, fmt_template, ...)                      \
 | |
| str8gen__append_fmt(& gen, lit(fmt_template),                           \
 | |
| 	&(SliceStr8){                                                       \
 | |
| 		.ptr = (Str8[]){ __VA_ARGS__ },                                 \
 | |
| 		.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
 | |
| 	}                                                                   \
 | |
| )
 | |
| 
 | |
| /*
 | |
| Define a mapping array:
 | |
| Str8 mappings [][2] = {
 | |
| 	fmt_vtoken_entry("key", "value"),
 | |
| 	^^ Add entries as above ^^
 | |
| }
 | |
| */
 | |
| #define fmt_entry(key, value) lit(key), value
 | |
| 
 | |
| #ifdef DEMO__WATL_DUMP_PREREQ_V1
 | |
| int main()
 | |
| {
 | |
| 	local_persist FMem_64KB tbl_scratch;
 | |
| 	SliceByte fmt_scratch = slicemem_alloc(MEGABYTES(8));
 | |
| 	Str8 test_str = fmt_vtoken(fmem_slice(tbl_scratch), fmt_scratch, "Will this work? <maybe_sub>",
 | |
| 		fmt_entry("maybe_sub", lit("IT SUBST!!!"))
 | |
| 	);
 | |
| 
 | |
| 	SliceByte scratchgen = slicemem_alloc(MEGABYTES(16));
 | |
| 	Str8Gen gen = str8gen_make(scratchgen);
 | |
| 	str8gen_append_fmt(gen, "Testing now with Str8Gen!! <maybe_sub>!",
 | |
| 		fmt_entry("maybe_sub", lit("lets fucking go!!!"))
 | |
| 	);
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /*
 | |
| We'll need to do some integer serialization for our dump listing's metrics.
 | |
| */
 | |
| 
 | |
| inline B32 char_is_upper(U8 c) { return('A' <= c && c <= 'Z'); }
 | |
| inline U8  char_to_lower(U8 c) { if (char_is_upper(c)) { c += ('a' - 'A'); } return(c); }
 | |
| 
 | |
| inline
 | |
| U8 integer_symbols(U8 value) {
 | |
| 	local_persist
 | |
| 	U8 lookup_table[16] = {
 | |
| 		'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F',
 | |
| 	};
 | |
| 	return lookup_table[value];
 | |
| }
 | |
| 
 | |
| Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_group_separator)
 | |
| {
 | |
| 	Str8 result = {.ptr = mem.ptr, .len = 0};
 | |
| 	Str8 prefix = {0};
 | |
| 	switch (radix)
 | |
| 	{
 | |
| 		case 16: { prefix = lit("0x"); } break;
 | |
| 		case 8:  { prefix = lit("0o"); } break;
 | |
| 		case 2:  { prefix = lit("0b"); } break;
 | |
| 	}
 | |
| 	
 | |
| 	U8 digit_group_size = 3;
 | |
| 	switch (radix)
 | |
| 	{
 | |
| 		default: break;
 | |
| 		case 2:
 | |
| 		case 8:
 | |
| 		case 16: { 
 | |
| 			digit_group_size = 4;
 | |
| 		}
 | |
| 		break;
 | |
| 	}
 | |
| 
 | |
| 	U32 needed_leading_zeros = 0;
 | |
| 	{
 | |
| 		U32 needed_digits = 1;
 | |
| 		{
 | |
| 			U32 u32_reduce = num;
 | |
| 			for(;;)
 | |
| 			{
 | |
| 				u32_reduce /= radix;
 | |
| 				if (u32_reduce == 0) {
 | |
| 					break;
 | |
| 				}
 | |
| 				needed_digits += 1;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		    needed_leading_zeros = (min_digits > needed_digits) ? min_digits - needed_digits : 0;
 | |
| 		U32 needed_separators    = 0;
 | |
| 		if (digit_group_separator != 0)
 | |
| 		{
 | |
| 			needed_separators = (needed_digits + needed_leading_zeros) / digit_group_size;
 | |
| 			if (needed_separators > 0 && (needed_digits + needed_leading_zeros) % digit_group_size == 0) {
 | |
| 				needed_separators -= 1;
 | |
| 			}
 | |
| 		}
 | |
| 		
 | |
| 		result.len = prefix.len + needed_leading_zeros + needed_separators + needed_digits;
 | |
| 		assert(result.len <= mem.len);
 | |
| 	}
 | |
| 	
 | |
| 	// Fill Content
 | |
| 	{
 | |
| 		U32 num_reduce             = num;
 | |
| 		U32 digits_until_separator = digit_group_size;
 | |
| 		for (U32 idx = 0; idx < result.len; idx += 1)
 | |
| 		{
 | |
| 			SSIZE separator_pos = result.len - idx - 1;
 | |
| 			if (digits_until_separator == 0 && digit_group_separator != 0) {
 | |
| 				result.ptr[separator_pos] = digit_group_separator;
 | |
| 				digits_until_separator = digit_group_size + 1;
 | |
| 			}
 | |
| 			else {
 | |
| 				result.ptr[separator_pos] = char_to_lower(integer_symbols(num_reduce % radix));
 | |
| 				num_reduce /= radix;
 | |
| 			}
 | |
| 			digits_until_separator -= 1;
 | |
| 			if (num_reduce == 0) {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 		for (U32 leading_0_idx = 0; leading_0_idx < needed_leading_zeros; leading_0_idx += 1) {
 | |
| 			result.ptr[prefix.len + leading_0_idx] = '0';
 | |
| 		}
 | |
| 	}
 | |
| 	// Fill Prefix
 | |
| 	if (prefix.len > 0) {
 | |
| 		slice_copy(result, prefix);
 | |
| 	}
 | |
| 	return result;
 | |
| }
 | |
| 
 | |
| // Utilized for file_write_str8:
 | |
| #if 0
 | |
| BOOL WriteFile(
 | |
|   [in]                HANDLE       hFile,
 | |
|   [in]                LPCVOID      lpBuffer,
 | |
|   [in]                DWORD        nNumberOfBytesToWrite,
 | |
|   [out, optional]     LPDWORD      lpNumberOfBytesWritten,
 | |
|   [in, out, optional] LPOVERLAPPED lpOverlapped
 | |
| );
 | |
| #endif
 | |
| /*
 | |
| Lastly: Writting to file using the Win API.
 | |
| */
 | |
| void file_write_str8(Str8 path, Str8 content)
 | |
| {
 | |
| 	slice_assert(path);
 | |
| 	slice_assert(content);
 | |
| 
 | |
| 	FMem_16KB scratch = {0};
 | |
| 	char const* path_cstr = str8_to_cstr_capped(path, fmem_slice(scratch));
 | |
| 
 | |
| 	HANDLE id_file = CreateFileA(
 | |
| 		path_cstr,
 | |
| 		GENERIC_WRITE,
 | |
| 		FILE_SHARE_READ,
 | |
| 		NULL,
 | |
| 		CREATE_ALWAYS,
 | |
| 		FILE_ATTRIBUTE_NORMAL,
 | |
| 		NULL
 | |
| 	);
 | |
| 
 | |
| 	B32 open_failed = id_file == INVALID_HANDLE_VALUE;
 | |
| 	if (open_failed) {
 | |
| 		DWORD  error_code = GetLastError();
 | |
| 		assert(error_code != 0);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	DWORD bytes_written = 0;
 | |
| 	B32 status = WriteFile(id_file
 | |
| 		, cast(void*, content.ptr)
 | |
| 		, cast(USIZE, content.len)
 | |
| 		, & bytes_written
 | |
| 		, NULL
 | |
| 	);
 | |
| 	assert(status != 0);
 | |
| 	assert(bytes_written == content.len);
 | |
| }
 | |
| 
 | |
| /*
 | |
| We now have what we need to create the structural listing dump for WATL's data structures.
 | |
| */
 | |
| 
 | |
| Str8 watl_dump_listing(SliceByte buffer, WATL_SliceLine lines)
 | |
| {
 | |
| 	local_persist FMem_64KB scratch = {0};
 | |
| 	FArena sarena = farena_init(fmem_slice(scratch));
 | |
| 
 | |
| 	Str8Gen result = str8gen_make(buffer);
 | |
| 	U32 line_num = 0;
 | |
| 	for (slice_iter(lines, line))
 | |
| 	{
 | |
| 	#define push_str8_u32()           (SliceByte){ farena__push(& sarena, size_of(UTF8), 64, lit("UTF8")), 64 }
 | |
| 	#define fmt_entry_u32(label, num) lit(label), str8_from_u32(push_str8_u32(), num, 10, 0, 0)
 | |
| 		++ line_num;
 | |
| 		str8gen_append_fmt(result, "Line <line_num> - Chunks <chunk_num>:\n"
 | |
| 		,	fmt_entry("line_num",  str8_from_u32(push_str8_u32(), line_num, 10, 0, 0))
 | |
| 		,	fmt_entry("chunk_num", str8_from_u32(push_str8_u32(), line->len, 10, 0, 0))
 | |
| 		);
 | |
| 		for (slice_iter(* line, chunk)) 
 | |
| 		{
 | |
| 			Str8 id;
 | |
| 			switch (* chunk->ptr)
 | |
| 			{
 | |
| 				case WATL_Tok_Space: id = lit("Space");   break;
 | |
| 				case WATL_Tok_Tab:   id = lit("Tab");     break;
 | |
| 				default:             id = lit("Visible"); break;
 | |
| 			}
 | |
| 			str8gen_append_fmt(result, "\t<id>(<size>): '<chunk>'\n"
 | |
| 			,	fmt_entry("id", id)
 | |
| 			,	fmt_entry_u32("size", chunk->len)
 | |
| 			,	fmt_entry("chunk", * chunk)
 | |
| 			);
 | |
| 		}
 | |
| 		farena_reset(& sarena);
 | |
| 	#undef push_str8_u32
 | |
| 	#undef fmt_entry_u32
 | |
| 	}
 | |
| 	return (Str8){ result.ptr, result.len };
 | |
| }
 | |
| 
 | |
| #ifdef DEMO__WATL_DUMP_V1
 | |
| int main()
 | |
| {
 | |
| 	// This will limit for our V1 read to 64kb at most.
 | |
| 	FMem_128KB   read_mem = {0};
 | |
| 	FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
 | |
| 
 | |
| 	// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
 | |
| 	SliceByte    mem_toks = slicemem_alloc(MEGABYTES(16));
 | |
| 	WATL_LexInfo lex_res  = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
 | |
| 
 | |
| 	SliceByte mem_cache_strs  = slicemem_alloc(MEGABYTES(64));
 | |
| 	SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
 | |
| 	SliceByte mem_cache_table = slicemem_alloc(1024        * 16 * size_of(Str8Cache_SliceSlot));
 | |
| 	Str8Cache str_cache       = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
 | |
| 
 | |
| 	SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
 | |
| 	SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
 | |
| 	WATL_ParseInfo parse_res  = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
 | |
| 
 | |
| 	SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
 | |
| 	Str8      listing     = watl_dump_listing(scratch_dmp, parse_res.lines);
 | |
| 	file_write_str8(lit("demo.str_cache.listing.txt"), listing);
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 |