1402 lines
44 KiB
C
1402 lines
44 KiB
C
/*
|
|
A introduction to C11 with a str cache demo.
|
|
Attempting to showcase better conventions and constructs in C; Discovered to me as of 2025 from scouring the internet.
|
|
*/
|
|
|
|
/*
|
|
The below will be implemented within this single file.
|
|
Because of this, definitions will be kept on a need-to-have basis to target only one vendor target and toolchain.
|
|
We will not use nearly any libraries and will be targeting only Windows 11 x64 using MSVC.
|
|
|
|
Even so the constructs defined and their dependencies can be properly abstracted into a ergonomic library for multiple targets with enough time and pain.
|
|
The difference is just more preprocess conditionals, and how far a library is trying to support a larger range of targets and their age discrpancy.
|
|
The more minimal the less cruft.
|
|
|
|
Definitions are defined linearly on the file on-demand as needed. Since the file is to be read linearly.
|
|
This will cause non-categorical organization so it will be more difficult to sift through if you wanted
|
|
to see definitions related to a sepecific kind of data or operation (strings, memory, etc).
|
|
*/
|
|
#if 0
|
|
int main()
|
|
{
|
|
// This will limit for our V1 read to 64kb at most.
|
|
FMem_128KB read_mem = {0};
|
|
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
|
|
|
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
|
SliceByte mem_toks = slicemem_alloc(MEGABYTES(16));
|
|
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
|
|
|
SliceByte mem_cache_strs = slicemem_alloc(MEGABYTES(64));
|
|
SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
|
|
SliceByte mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot));
|
|
Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
|
|
|
|
SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
|
|
SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
|
|
WATL_ParseInfo parse_res = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
|
|
|
|
SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
|
|
Str8 listing = watl_dump_listing(scratch_dmp, parse_res.lines);
|
|
file_write_str8(lit("demo.str_cache.listing.txt"), listing);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
// Demo selection
|
|
// #define DEMO__STR_SLICE
|
|
// #define DEMO__FILE_READ_CONTENTS_V1
|
|
// #define DEMO__WATL_LEX_V1
|
|
// #define DEMO__WATL_PARSE_V1
|
|
// #define DEMO__WATL_DUMP_PREREQ_V1
|
|
#define DEMO__WATL_DUMP_V1
|
|
|
|
/*
|
|
The above makes use of the following core concepts to achieve its net result:
|
|
* Slices
|
|
* Arenas
|
|
* Generic Runtime Allocator Interface
|
|
* Hashing
|
|
|
|
Secondarily for the purposes of using the above sufficiently the following are also utilized:
|
|
* Virtual Address Space
|
|
* Read/Write Files
|
|
* Lexing & Parsing
|
|
* Debug printing
|
|
*/
|
|
|
|
/*
|
|
First thing we'll problably want is a way to deal with text effectively.
|
|
So we'll setup the the minimum for that when dealing with immutable constructs.
|
|
*/
|
|
|
|
// We'll need some minimum set of dependencies to adequately define the constructs.
|
|
// ASSUMING MODERN MSVC TOOLCHAIN.
|
|
|
|
#include <intrin.h>
|
|
#include <tmmintrin.h>
|
|
#include <wmmintrin.h>
|
|
|
|
#include <assert.h>
|
|
|
|
typedef unsigned __int8 U8;
|
|
typedef signed __int8 S8;
|
|
typedef unsigned __int16 U16;
|
|
typedef signed __int16 S16;
|
|
typedef unsigned __int32 U32;
|
|
typedef signed __int32 S32;
|
|
typedef unsigned __int64 U64;
|
|
typedef signed __int64 S64;
|
|
|
|
typedef size_t USIZE;
|
|
typedef ptrdiff_t SSIZE;
|
|
|
|
enum {
|
|
false,
|
|
true,
|
|
true_overflow,
|
|
};
|
|
typedef S8 B8;
|
|
typedef S16 B16;
|
|
typedef S32 B32;
|
|
|
|
// Common macros we'll use throughout this.
|
|
|
|
#define assert_bounds(point, start, end) do { \
|
|
USIZE pos_point = cast(USIZE, point); \
|
|
USIZE pos_start = cast(USIZE, start); \
|
|
USIZE pos_end = cast(USIZE, end); \
|
|
assert(pos_start <= pos_point); \
|
|
assert(pos_point <= pos_end); \
|
|
} while(0)
|
|
|
|
// Functional style cast
|
|
#define cast(type, data) ((type)(data))
|
|
#define pcast(type, data) * cast(type*, & (data))
|
|
|
|
#define nullptr cast(void*, 0)
|
|
|
|
#define glue_(A, B) A ## B
|
|
#define glue(A, B) glue_(A,B)
|
|
|
|
// Enforces size querying uses SSIZE type.
|
|
#define size_of(data) cast(SSIZE, sizeof(data) )
|
|
|
|
#define stringify_(S) #S
|
|
#define stringify(S) stringify_(S)
|
|
|
|
/*
|
|
The first construct we'll utilize is a String Slice.
|
|
In modern programming with the memory sizes utilized, it is more ergonomic to track the length of strings with their pointer.
|
|
Most strings are not stored in some immutable table tracked statically, performance loss in doing so is negligble on modern hardware constraints.
|
|
*/
|
|
|
|
// UTF-8 character encoding unit
|
|
typedef unsigned char UTF8;
|
|
|
|
typedef struct Str8 Str8;
|
|
struct Str8 {
|
|
UTF8* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
// String iterals in C include null-terminators, we aren't interested in preserving that.
|
|
#define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 }
|
|
|
|
/*
|
|
We'll want all of our textual process to operate with UTF-8 code pages:
|
|
*/
|
|
#include <locale.h>
|
|
inline void set_utf8_codepage() { setlocale(LC_ALL, ".UTF-8"); }
|
|
|
|
// For now this string can visualized using a debugger.
|
|
#ifdef DEMO__STR_SLICE
|
|
int main()
|
|
{
|
|
Str8 first = lit("Our first string as a slice");
|
|
return 0;
|
|
}
|
|
#endif DEMO__STR_SLICE
|
|
|
|
/*
|
|
We now want to be able to read a file. This will be a heavy rabbit-hole as we'll need to setup a basic file interface
|
|
and related definitions for handling the memory.
|
|
|
|
For the purposes of the initial definition we'll introduced fixed-sized memory handling statically allocated onto the stack.
|
|
*/
|
|
|
|
/*
|
|
First off we need to find out how to aquire the contents of a file on Windows.
|
|
|
|
We'll be wrapping the operation in a procedure called file_read_contents. We'll have it take a path and optional arguments (Opts__read_file_contents).
|
|
It will return a result in a composite struct: FileOpResult; which may be expanded as needed in the future.
|
|
*/
|
|
|
|
typedef struct FileOpResult FileOpResult;
|
|
typedef struct Opts__read_file_contents Opts__read_file_contents;
|
|
void file_read_contents_api(FileOpResult* result, Str8 path, Opts__read_file_contents* opts);
|
|
FileOpResult file__read_contents ( Str8 path, Opts__read_file_contents* opts);
|
|
#define file_read_contents(path, ...) file__read_contents(path, & (Opts__read_file_contents){__VA_ARGS__} )
|
|
|
|
/*
|
|
The file contents will be returned in bytes.
|
|
To view or manage any slice of bytes we'll be utilizing a byte slice.
|
|
*/
|
|
typedef struct SliceByte SliceByte;
|
|
struct SliceByte {
|
|
U8* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
/*
|
|
The above is a pattern that can be provided so that whether or not the result is formatted and provided to the user via the stack is entirely optional.
|
|
It also allows for default parameters to be defined conviently.
|
|
*/
|
|
|
|
// We'll utilize the ReadFile procedure within the WinAPI: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
|
|
#define NOMINMAX
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#define WIN32_MEAN_AND_LEAN
|
|
#define VC_EXTRALEAN
|
|
#include <windows.h>
|
|
#include <windowsx.h>
|
|
#include <timeapi.h>
|
|
#include <tlhelp32.h>
|
|
#include <Shlobj.h>
|
|
#include <processthreadsapi.h>
|
|
#pragma comment(lib, "user32")
|
|
#pragma comment(lib, "winmm")
|
|
#pragma comment(lib, "shell32")
|
|
#pragma comment(lib, "advapi32")
|
|
#pragma comment(lib, "rpcrt4")
|
|
#pragma comment(lib, "shlwapi")
|
|
#pragma comment(lib, "comctl32")
|
|
#pragma comment(linker,"\"/manifestdependency:type='win32' name='Microsoft.Windows.Common-Controls' version='6.0.0.0' processorArchitecture='*' publicKeyToken='6595b64144ccf1df' language='*'\"") // this is required for loading correct comctl32 dll file
|
|
#undef NOMINMAX
|
|
#undef WIN32_LEAN_AND_MEAN
|
|
#undef WIN32_MEAN_AND_LEAN
|
|
#undef VC_EXTRALEAN
|
|
#if 0
|
|
BOOL ReadFile(
|
|
[in] HANDLE hFile,
|
|
[out] LPVOID lpBuffer,
|
|
[in] DWORD nNumberOfBytesToRead,
|
|
[out, optional] LPDWORD lpNumberOfBytesRead,
|
|
[in, out, optional] LPOVERLAPPED lpOverlapped
|
|
);
|
|
|
|
// In order to read a file we need a handle to a valid filesystem entity to read from: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilea
|
|
HANDLE CreateFileA(
|
|
[in] LPCSTR lpFileName,
|
|
[in] DWORD dwDesiredAccess,
|
|
[in] DWORD dwShareMode,
|
|
[in, optional] LPSECURITY_ATTRIBUTES lpSecurityAttributes,
|
|
[in] DWORD dwCreationDisposition,
|
|
[in] DWORD dwFlagsAndAttributes,
|
|
[in, optional] HANDLE hTemplateFile
|
|
);
|
|
#endif
|
|
|
|
// We need to covert our string slice to a c-string for CreateFileA's path input.
|
|
|
|
#define KILOBTYES(n) (cast(USIZE, n) << 10)
|
|
#define MEGABYTES(n) (cast(USIZE, n) << 20)
|
|
#define GIGABYTES(n) (cast(USIZE, n) << 30)
|
|
#define TERABYTES(n) (cast(USIZE, n) << 40)
|
|
|
|
/*
|
|
We'll be defining here Fixed-sized memory blocks using typedefs on-demand
|
|
|
|
They will having the following format:
|
|
typedef U8 FMem_<size>KB [ <U8 amount> ];
|
|
*/
|
|
|
|
typedef U8 FMem_16KB [ KILOBTYES(16) ];
|
|
typedef U8 FMem_64KB [ KILOBTYES(64) ];
|
|
typedef U8 FMem_128KB [ KILOBTYES(128) ];
|
|
|
|
#define typeof __typeof__
|
|
#define fmem_slice(mem) (SliceByte) { mem, size_of(mem) }
|
|
|
|
// We'll be using an intrinsic for copying memory:
|
|
void* memory_copy(void* restrict dest, void const* restrict src, USIZE length)
|
|
{
|
|
if (dest == nullptr || src == nullptr || length == 0) {
|
|
return nullptr;
|
|
}
|
|
// https://learn.microsoft.com/en-us/cpp/intrinsics/movsb?view=msvc-170
|
|
__movsb((unsigned char*)dest, (const unsigned char*)src, length);
|
|
return dest;
|
|
}
|
|
|
|
// Often we'll want to check validity of a slice:
|
|
#define slice_assert(slice) do { \
|
|
assert(slice.ptr != nullptr); \
|
|
assert(slice.len > 0); \
|
|
} while(0)
|
|
|
|
void slice__copy(SliceByte dest, SSIZE dest_typewidth, SliceByte src, SSIZE src_typewidth) {
|
|
assert(dest.len >= src.len);
|
|
slice_assert(dest);
|
|
slice_assert(src);
|
|
memory_copy(dest.ptr, src.ptr, src.len);
|
|
}
|
|
#define slice_copy(dest,src) slice__copy( \
|
|
(SliceByte){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \
|
|
, (SliceByte){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \
|
|
)
|
|
|
|
// Assumes memory is zeroed.
|
|
char* str8_to_cstr_capped(Str8 content, SliceByte mem) {
|
|
assert(mem.len >= content.len);
|
|
memory_copy(mem.ptr, content.ptr, content.len);
|
|
return mem.ptr;
|
|
}
|
|
|
|
// To support zeroing slices we'll utilize an intrinisc.
|
|
B32 memory_zero(void* dest, USIZE length) {
|
|
if (dest == nullptr || length <= 0) {
|
|
return false;
|
|
}
|
|
__stosb((unsigned char*)dest, 0, length);
|
|
return true;
|
|
}
|
|
|
|
void slice__zero(SliceByte mem, SSIZE typewidth) {
|
|
slice_assert(mem);
|
|
memory_zero(mem.ptr, mem.len);
|
|
}
|
|
#define slice_zero(slice) slice__zero((SliceByte){ cast(void*, (slice).ptr), (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr))
|
|
|
|
// Now for our "Version 1"
|
|
|
|
#if defined(DEMO__FILE_READ_CONTENTS_V1) || \
|
|
defined(DEMO__WATL_LEX_V1) || \
|
|
defined(DEMO__WATL_PARSE_V1) || \
|
|
defined(DEMO__WATL_DUMP_PREREQ_V1) || \
|
|
defined(DEMO__WATL_DUMP_V1)
|
|
|
|
struct FileOpResult
|
|
{
|
|
// For now we'll just have the content
|
|
SliceByte content;
|
|
};
|
|
|
|
struct Opts__read_file_contents
|
|
{
|
|
// For now we'll just have the backing memory provided as a slice.
|
|
SliceByte backing;
|
|
// And whether we should zero the backing.
|
|
B32 zero_backing;
|
|
};
|
|
|
|
void file_read_contents_api(FileOpResult* result, Str8 path, Opts__read_file_contents* opts)
|
|
{
|
|
assert(result != nullptr);
|
|
assert(opts != nullptr);
|
|
slice_assert(path);
|
|
// Backing is required at this point
|
|
slice_assert(opts->backing);
|
|
|
|
// This will limit a path for V1 to be 16kb worth of codepoints.
|
|
FMem_16KB scratch = {0};
|
|
char const* path_cstr = str8_to_cstr_capped(path, fmem_slice(scratch) );
|
|
|
|
HANDLE id_file = CreateFileA(
|
|
path_cstr,
|
|
GENERIC_READ,
|
|
FILE_SHARE_READ,
|
|
NULL,
|
|
OPEN_EXISTING,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
NULL
|
|
);
|
|
B32 open_failed = id_file == INVALID_HANDLE_VALUE;
|
|
if (open_failed) {
|
|
DWORD error_code = GetLastError();
|
|
assert(error_code != 0);
|
|
return;
|
|
}
|
|
|
|
LARGE_INTEGER file_size = {0};
|
|
DWORD get_size_failed = ! GetFileSizeEx(id_file, & file_size);
|
|
if (get_size_failed) {
|
|
assert(get_size_failed == INVALID_FILE_SIZE);
|
|
return;
|
|
}
|
|
|
|
// Because we are currently using fixed size memory, we need to confirm that we can hold this content.
|
|
B32 not_enough_backing = opts->backing.len < file_size.QuadPart;
|
|
if (not_enough_backing) {
|
|
assert(not_enough_backing);
|
|
// Otherwise we don't provide a result.
|
|
result->content = (SliceByte){0};
|
|
return;
|
|
}
|
|
|
|
if (opts->zero_backing) {
|
|
slice_zero(pcast(SliceByte, opts->backing));
|
|
}
|
|
|
|
DWORD amount_read = 0;
|
|
BOOL read_result = ReadFile(
|
|
id_file,
|
|
opts->backing.ptr,
|
|
file_size.QuadPart,
|
|
& amount_read,
|
|
nullptr
|
|
);
|
|
CloseHandle(id_file);
|
|
|
|
B32 read_failed = ! read_result;
|
|
read_failed |= amount_read != file_size.QuadPart;
|
|
if (read_failed) {
|
|
assert(read_failed);
|
|
return;
|
|
}
|
|
|
|
result->content.ptr = opts->backing.ptr;
|
|
result->content.len = file_size.QuadPart;
|
|
return;
|
|
}
|
|
|
|
#endif DEMO__FILE_READ_CONTENTS_V1
|
|
|
|
// Version agnostic code:
|
|
inline
|
|
FileOpResult file__read_contents(Str8 path, Opts__read_file_contents* opts) {
|
|
FileOpResult result;
|
|
file_read_contents_api(& result, path, opts);
|
|
return result;
|
|
}
|
|
|
|
// And now to put it all together into a test run in the debugger. Content should be properly formatted if the code is correct.
|
|
#ifdef DEMO__FILE_READ_CONTENTS_V1
|
|
int main()
|
|
{
|
|
// This will limit for our V1 read to 64kb at most.
|
|
FMem_64KB read_mem = {0};
|
|
FileOpResult res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
|
return 0;
|
|
}
|
|
#endif DEMO__FILE_READ_CONTENTS_V1
|
|
|
|
/*
|
|
Now that we have file reading done we need to be able to process the content.
|
|
|
|
First we want to do lexical analysis. So we'll create a token listing delimiting aspects of the text file relevant to us.
|
|
For our data structure, we are going for a Whitespace-Aware Text Layout; where we'll track text and the formatting around them.
|
|
|
|
Just like with the read file contents operation, we'll define an interface to performing this analysis.
|
|
It will be called watl_lex and take the SliceByte from the file as a Str8 slice and some Opts__watl_lex;
|
|
returning a WATL_LexInfo for providing user info on how the operation went.
|
|
*/
|
|
|
|
typedef struct WATL_LexInfo WATL_LexInfo;
|
|
typedef struct Opts__watl_lex Opts__watl_lex;
|
|
|
|
void watl_lex_api(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts);
|
|
WATL_LexInfo watl__lex ( Str8 source, Opts__watl_lex* opts);
|
|
#define watl_lex(source, ...) watl__lex(source, &(Opts__watl_lex){__VA_ARGS__})
|
|
|
|
/*
|
|
Token identification will be done using a WATL_TokKind enumeration.
|
|
The token type itself will be the id along with a ptr to its start of the slice. We can resolve the width of the token by its delta to the next token.
|
|
If its the last token, then its delta is determined by its offset to the end of the Str8 slice.
|
|
*/
|
|
|
|
typedef U32 WATL_TokKind;
|
|
enum WATL_TokKind {
|
|
WATL_Tok_Space = ' ',
|
|
WATL_Tok_Tab = '\t',
|
|
WATL_Tok_CarriageReturn = '\r',
|
|
WATL_Tok_LineFeed = '\n',
|
|
WATL_Tok_Text = 0xFFFFFFFF,
|
|
};
|
|
|
|
typedef struct WATL_Tok WATL_Tok;
|
|
struct WATL_Tok {
|
|
UTF8* code;
|
|
};
|
|
|
|
typedef struct WATL_SliceTok WATL_SliceTok;
|
|
struct WATL_SliceTok {
|
|
WATL_Tok* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
Str8 watl_tok_str8(WATL_SliceTok toks, WATL_Tok* tok) {
|
|
WATL_Tok* next = tok + 1;
|
|
USIZE start = cast(USIZE, toks.ptr->code);
|
|
USIZE curr = cast(USIZE, tok->code);
|
|
USIZE offset = curr - start;
|
|
SSIZE left = toks.len - offset;
|
|
B32 last_tok = (start + toks.len) == (curr + left);
|
|
Str8 text = {0};
|
|
text.ptr = tok->code;
|
|
text.len = next > (toks.ptr + toks.len) ?
|
|
left
|
|
// Othwerise its the last minus the curr.
|
|
: cast(SSIZE, next->code - tok->code);
|
|
return text;
|
|
}
|
|
|
|
/*
|
|
Tokens are allocated to a backing slice of memory defined by the user. This pool of memory will ideally not be constrained to a fixed size on the stack.
|
|
So for V1 we'll allocate 10 megs of heap memory to act as a pool for the tokens. We'll keep track of how much for the pool we used via a new memory tracking construct:
|
|
The fixed-sized arena.
|
|
|
|
A basic fixed size arena only has three components which can vary depending on the convention the user perfers.
|
|
In our case we'll track its capacity, its starting address, and how much has been comitted..
|
|
*/
|
|
|
|
// We use this in-conjunction with Areans to save a point thats safe to rewind to by the user.
|
|
typedef struct ArenaSP ArenaSP;
|
|
struct ArenaSP { void* ptr; };
|
|
|
|
#pragma region FArena
|
|
typedef struct FArena FArena;
|
|
struct FArena {
|
|
void* start;
|
|
USIZE capacity;
|
|
USIZE used;
|
|
};
|
|
void api_farena_init(FArena* arena, SliceByte mem);
|
|
FArena farena_init (SliceByte mem);
|
|
void* farena__push (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename);
|
|
void farena_reset (FArena* arena);
|
|
void farena_rewind (FArena* arena, ArenaSP savepoint);
|
|
ArenaSP farena_save (FArena arena);
|
|
|
|
#define farena_push(arena, type) cast(type*, farena__push(& arena, size_of(type), 1, lit(stringify(type))) )
|
|
#define farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount }
|
|
|
|
inline
|
|
void api_farena_init(FArena* arena, SliceByte mem) {
|
|
arena->start = mem.ptr;
|
|
arena->capacity = mem.len;
|
|
arena->used = 0;
|
|
}
|
|
inline FArena farena_init(SliceByte mem) { FArena arena; api_farena_init(& arena, mem); return arena; }
|
|
|
|
inline
|
|
void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) {
|
|
USIZE to_commit = type_size * amount;
|
|
USIZE unused = arena->capacity - arena->used;
|
|
assert(to_commit <= unused);
|
|
void* ptr = cast(void*, cast(USIZE, arena->start) + arena->used);
|
|
arena->used += to_commit;
|
|
return ptr;
|
|
}
|
|
|
|
inline
|
|
void farena_rewind(FArena* arena, ArenaSP savepoint) {
|
|
void* end = cast(void*, cast(USIZE, arena->start) + arena->used);
|
|
assert_bounds(savepoint.ptr, arena->start, end);
|
|
arena->used -= cast(USIZE, savepoint.ptr) - cast(USIZE, arena->start);
|
|
}
|
|
|
|
inline void farena_reset(FArena* arena) { arena->used = 0; }
|
|
inline ArenaSP farena_save (FArena arena) { ArenaSP savepoint; savepoint.ptr = arena.start; return savepoint; }
|
|
#pragma endregion FArena
|
|
|
|
#if defined(DEMO__WATL_LEX_V1) || \
|
|
defined(DEMO__WATL_PARSE_V1) || \
|
|
defined(DEMO__WATL_DUMP_PREREQ_V1) || \
|
|
defined(DEMO__WATL_DUMP_V1)
|
|
|
|
struct WATL_LexInfo {
|
|
// For now just the tokens
|
|
WATL_SliceTok tokens;
|
|
};
|
|
|
|
struct Opts__watl_lex {
|
|
/*
|
|
For this operation we'll enforce that the arena must linearly allocate each token, forming a strictly adjacent sent of elements in an array.
|
|
This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena,
|
|
or the tokens are sparely cached.
|
|
Where their position in their originating string is not preserved. In this case we're keeping it simple.
|
|
Tokens are in the same block of memory and they don't use a string cache.
|
|
*/
|
|
SliceByte pool_toks;
|
|
};
|
|
|
|
// We are assuming everything is utf8-ascii.
|
|
void watl_lex_api(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts)
|
|
{
|
|
assert(info != nullptr);
|
|
slice_assert(source);
|
|
assert(opts != nullptr);
|
|
|
|
FArena arena = farena_init(opts->pool_toks);
|
|
|
|
UTF8* end = source.ptr + source.len;
|
|
UTF8* cursor = source.ptr;
|
|
UTF8* prev = source.ptr;
|
|
UTF8 code = * cursor;
|
|
|
|
B32 was_formatting = true;
|
|
WATL_Tok* tok = nullptr;
|
|
for (; cursor < end;)
|
|
{
|
|
switch (code)
|
|
{
|
|
case WATL_Tok_Space:
|
|
case WATL_Tok_Tab:
|
|
{
|
|
if (* prev != * cursor) {
|
|
tok = farena_push(arena, WATL_Tok);
|
|
tok->code = cursor;
|
|
was_formatting = true;
|
|
}
|
|
cursor += 1;
|
|
}
|
|
break;
|
|
|
|
case WATL_Tok_LineFeed: {
|
|
tok = farena_push(arena, WATL_Tok);
|
|
tok->code = cursor;
|
|
cursor += 1;
|
|
was_formatting = true;
|
|
}
|
|
break;
|
|
|
|
// Assuming what comes after is line feed.
|
|
case WATL_Tok_CarriageReturn: {
|
|
tok = farena_push(arena, WATL_Tok);
|
|
tok->code = cursor;
|
|
cursor += 2;
|
|
was_formatting = true;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
{
|
|
if (was_formatting) {
|
|
tok = farena_push(arena, WATL_Tok);
|
|
tok->code = cursor;
|
|
was_formatting = false;
|
|
}
|
|
cursor += 1;
|
|
}
|
|
break;
|
|
}
|
|
prev = cursor - 1;
|
|
code = * cursor;
|
|
}
|
|
info->tokens.ptr = arena.start;
|
|
info->tokens.len = arena.used / size_of(WATL_Tok*);
|
|
}
|
|
|
|
#endif DEMO__WATL_LEX_V1
|
|
|
|
inline
|
|
WATL_LexInfo watl__lex(Str8 source, Opts__watl_lex* opts) {
|
|
WATL_LexInfo result = {0};
|
|
watl_lex_api(& result, source, opts);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
To allocate onto the heap we'll make a basic slicemem_malloc to allocate, we'll make a corresponding slicemem_free aswell.
|
|
However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination.
|
|
*/
|
|
|
|
SliceByte slicemem_alloc(USIZE amount)
|
|
{
|
|
assert(amount > KILOBTYES(4));
|
|
void* result = malloc(amount);
|
|
assert(result != nullptr);
|
|
SliceByte mem = {
|
|
.ptr = result,
|
|
.len = amount
|
|
};
|
|
return mem;
|
|
}
|
|
void slicemem_free(SliceByte mem) {
|
|
free(mem.ptr);
|
|
}
|
|
|
|
#ifdef DEMO__WATL_LEX_V1
|
|
int main()
|
|
{
|
|
// This will limit for our V1 read to 64kb at most.
|
|
FMem_64KB read_mem = {0};
|
|
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
|
|
|
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
|
SliceByte mem_toks = slicemem_alloc(MEGABYTES(8));
|
|
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
|
// unnecessary in this case but if you want to explicitly:
|
|
slicemem_free(mem_toks);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
Next we'll parse these tokens into a rudimentary WATL Abstract Syntax Tree.
|
|
* The tree will be top-level organized by lines consisting of linked slices of visble and non-visible tokens.
|
|
* Just as with the the lexical analysis, lines and nodes will be linearly allocated adjacent to each other. This allows us to utilize array operations.
|
|
*/
|
|
|
|
typedef struct WATL_ParseInfo WATL_ParseInfo;
|
|
typedef struct Opts__watl_parse Opts__watl_parse;
|
|
void watl_parse_api(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts);
|
|
WATL_ParseInfo watl__parse ( WATL_SliceTok tokens, Opts__watl_parse* opts);
|
|
#define watl_parse(tokens, ...) watl__parse(tokens, & (Opts__watl_parse) {__VA_ARGS__})
|
|
|
|
/*
|
|
For the sake of the exercise, we'll be eliminating the association with the file's strings and we'll need to instead cache them.
|
|
*/
|
|
#pragma region Str8Cache
|
|
typedef struct Str8Cache Str8Cache;
|
|
void str8cache_init_api(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);
|
|
Str8Cache str8cache_init ( SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);
|
|
|
|
// A cache like this relies on tabling string entires utiliszing an index derived from a hashed ID.
|
|
// For these strings we'll be using a hash called djb8:
|
|
|
|
// Introducing a slice iterator:
|
|
#define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter
|
|
|
|
inline
|
|
void hash64_djb8(U64* hash, SliceByte bytes) {
|
|
for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) {
|
|
*hash = (((*hash) << 8) + (*hash)) + (*elem);
|
|
}
|
|
}
|
|
|
|
// For a library or codebase its recommended to setup a metaprogram to generate hash utilizing containers
|
|
// Or other containers that cannot be sufficiently lifted to general runtime paths without losing ergonomic debug type info or type-constraint enforcements.
|
|
// Unlike with the template markup C++ uses, you can strike a balance between how many definitions are redundantly made or optimized for collapsing to a general path
|
|
// based on target optimization and debugability.
|
|
|
|
// For this V1 example, we'll be hand-rolling a fixed sized table with excess slot chaining for colliding slots.
|
|
// Its a relatively simple implementation to hand-roll. These things tend to become unyeilding with more advanced variants.
|
|
|
|
typedef struct Str8Cache_Slot Str8Cache_Slot;
|
|
struct Str8Cache_Slot {
|
|
Str8Cache_Slot* prev;
|
|
Str8Cache_Slot* next;
|
|
Str8 value;
|
|
U64 key;
|
|
B32 occupied;
|
|
};
|
|
|
|
typedef struct Str8Cache_SliceSlot Str8Cache_SliceSlot;
|
|
struct Str8Cache_SliceSlot {
|
|
Str8Cache_Slot* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
struct Str8Cache {
|
|
FArena a_str;
|
|
Str8Cache_SliceSlot pool;
|
|
Str8Cache_Slot* vacant;
|
|
Str8Cache_SliceSlot table;
|
|
};
|
|
|
|
Str8Cache str8cache_init(SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) { Str8Cache cache; str8cache_init_api(& cache, mem_strs, mem_slots, mem_table); return cache; }
|
|
inline
|
|
void str8cache_init_api(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) {
|
|
assert(cache != nullptr);
|
|
slice_assert(mem_strs);
|
|
slice_assert(mem_slots);
|
|
slice_assert(mem_table);
|
|
cache->a_str = farena_init(mem_strs);
|
|
cache->pool = (Str8Cache_SliceSlot){ cast(void*, mem_slots.ptr), mem_slots.len / size_of(Str8Cache_Slot)};
|
|
cache->table = (Str8Cache_SliceSlot){ cast(void*, mem_table.ptr), mem_table.len / size_of(Str8Cache_Slot)};
|
|
slice_zero(cache->pool);
|
|
slice_zero(cache->table);
|
|
}
|
|
|
|
void str8cache_clear(Str8Cache* cache)
|
|
{
|
|
for (slice_iter(cache->table, slot))
|
|
{
|
|
if (slot == nullptr) {
|
|
continue;
|
|
}
|
|
for (Str8Cache_Slot* probe_slot = slot->next; probe_slot != nullptr; probe_slot = probe_slot->next) {
|
|
slot->occupied = false;
|
|
}
|
|
slot->occupied = false;
|
|
}
|
|
}
|
|
|
|
// We don't introduce a remove option because we're not tracking fixed sized entities.
|
|
// Strings take up non-determistic sizes of their backing arena. So the only thing that can be done with the cache is wiping it and recaching all strings.
|
|
|
|
/*
|
|
When storing a hash of a slot, we can almost never utilize the full width of a key,
|
|
so we must truncate the key via module to get a "good enough" unique ID to place in the table.
|
|
*/
|
|
inline
|
|
U64 str8cache_slot_id(Str8Cache cache, U64 key) {
|
|
U64 hash_index = key % cast(U64, cache.table.len);
|
|
return hash_index;
|
|
}
|
|
|
|
Str8* str8cache_get(Str8Cache cache, U64 key)
|
|
{
|
|
U64 hash_index = str8cache_slot_id(cache, key);
|
|
Str8Cache_Slot* surface_slot = & cache.table.ptr[hash_index];
|
|
if (surface_slot == nullptr) {
|
|
return nullptr;
|
|
}
|
|
if (surface_slot->occupied && surface_slot->key == key) {
|
|
return & surface_slot->value;
|
|
}
|
|
for (Str8Cache_Slot* slot = surface_slot->next; slot != nullptr; slot = slot->next)
|
|
{
|
|
if (slot->occupied && slot->key == key) {
|
|
return & slot->value;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
|
|
{
|
|
U64 hash_index = str8cache_slot_id(*cache, key);
|
|
Str8Cache_Slot* surface_slot = & cache->table.ptr[hash_index];
|
|
if (! surface_slot->occupied || surface_slot->key == key)
|
|
{
|
|
if (value.ptr != surface_slot->value.ptr) {
|
|
SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
|
|
slice_copy(mem, value);
|
|
surface_slot->value = pcast(Str8, mem);
|
|
}
|
|
surface_slot->key = key;
|
|
surface_slot->occupied = true;
|
|
return & surface_slot->value;
|
|
}
|
|
Str8Cache_Slot* slot = surface_slot;
|
|
for (;; slot = slot->next)
|
|
{
|
|
if (slot->next == nullptr)
|
|
{
|
|
// We had a collision, we need to grab a vacant slot from the pool and utilize it instead.
|
|
slot->next = cache->vacant;
|
|
* slot->next = (Str8Cache_Slot){0};
|
|
slot->next->prev = slot;
|
|
|
|
Str8Cache_Slot* next_vacant = cache->vacant + 1;
|
|
assert(next_vacant < cache->pool.ptr + cache->pool.len );
|
|
// If the above fails we ran out of extra slots.
|
|
cache->vacant = cache->vacant + 1;
|
|
}
|
|
if ( ! slot->next->occupied || slot->next->key == key)
|
|
{
|
|
if (value.ptr != slot->next->value.ptr) {
|
|
SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
|
|
slice_copy(mem, value);
|
|
slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)};
|
|
}
|
|
slot->next->value = value;
|
|
slot->next->key = key;
|
|
slot->next->occupied = true;
|
|
return & slot->next->value;
|
|
}
|
|
// We keep traversing till we find a match or we find a vacancy for this list in the table.
|
|
// Make sure to tune the size of the table so it does this less!
|
|
// Note: Tables sized by prime values collide less aswell.
|
|
// You can use a closest prime number lookup table to derive what length to expose to the cache's table for hash ID resolution.
|
|
}
|
|
return nullptr;
|
|
}
|
|
#pragma endregion Str8Cache
|
|
|
|
// Finally our abstracted cache interface:
|
|
Str8 cache_str8(Str8Cache* cache, Str8 str)
|
|
{
|
|
U64 key = 0; hash64_djb8(& key, pcast(SliceByte, str));
|
|
Str8* result = str8cache_set(cache, key, str);
|
|
assert(result != nullptr);
|
|
return * result;
|
|
}
|
|
|
|
typedef Str8 WATL_Node;
|
|
#if 0
|
|
typedef struct WATL_Node WATL_Node;
|
|
struct WATL_Node {
|
|
WATL_NodeKind kind;
|
|
Str8 entry;
|
|
};
|
|
#endif
|
|
|
|
typedef struct WATL_Line WATL_Line;
|
|
struct WATL_Line {
|
|
WATL_Node* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
typedef struct WATL_SliceLine WATL_SliceLine;
|
|
struct WATL_SliceLine {
|
|
WATL_Line* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
#if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_PREREQ_V1) || defined(DEMO__WATL_DUMP_V1)
|
|
|
|
struct Opts__watl_parse {
|
|
SliceByte backing_nodes;
|
|
SliceByte backing_lines;
|
|
Str8Cache* str_cache;
|
|
};
|
|
|
|
struct WATL_ParseInfo {
|
|
WATL_SliceLine lines;
|
|
};
|
|
|
|
void watl_parse_api(WATL_ParseInfo* info, WATL_SliceTok tokens, Opts__watl_parse* opts)
|
|
{
|
|
assert(info != nullptr);
|
|
slice_assert(tokens);
|
|
assert(opts != nullptr);
|
|
|
|
FArena a_lines = farena_init(opts->backing_lines);
|
|
FArena a_nodes = farena_init(opts->backing_nodes);
|
|
|
|
WATL_Line* line = farena_push(a_lines, WATL_Line);
|
|
WATL_Node* curr = farena_push(a_nodes, WATL_Node); // Preemtively allocate a node for the line (may not be used)
|
|
* curr = (WATL_Node){0};
|
|
line->ptr = curr;
|
|
line->len = 0;
|
|
info->lines.ptr = line;
|
|
info->lines.len = 0;
|
|
for (slice_iter(tokens, token))
|
|
{
|
|
switch (* token->code)
|
|
{
|
|
case WATL_Tok_CarriageReturn:
|
|
case WATL_Tok_LineFeed: {
|
|
WATL_Line* new_line = farena_push(a_lines, WATL_Line);
|
|
line = new_line;
|
|
line->ptr = curr;
|
|
line->len = 0;
|
|
info->lines.len += 1;
|
|
}
|
|
continue;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
Str8 tok_str = watl_tok_str8(tokens, token);
|
|
* curr = cache_str8( opts->str_cache, tok_str );
|
|
curr = farena_push(a_nodes, WATL_Node);
|
|
* curr = (WATL_Node){0};
|
|
line->len += 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
#endif DEMO__WATL_PARSE_V1
|
|
|
|
WATL_ParseInfo watl__parse(WATL_SliceTok tokens, Opts__watl_parse* opts) { WATL_ParseInfo info; watl_parse_api(& info, tokens, opts); return info; }
|
|
|
|
#ifdef DEMO__WATL_PARSE_V1
|
|
int main()
|
|
{
|
|
// This will limit for our V1 read to 64kb at most.
|
|
FMem_64KB read_mem = {0};
|
|
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
|
|
|
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
|
SliceByte mem_toks = slicemem_alloc(MEGABYTES(8));
|
|
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
|
|
|
SliceByte mem_cache_strs = slicemem_alloc(MEGABYTES(64));
|
|
SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
|
|
SliceByte mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot));
|
|
Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
|
|
|
|
SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
|
|
SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
|
|
WATL_ParseInfo parse_res = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
|
|
|
|
// unnecessary in this case but if you want to explicitly:
|
|
slicemem_free(mem_toks);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
Now we'll like to dump this WATL structure into a file.
|
|
To do so we'll need to generate the content string of the file dynamically based on the WATL's content.
|
|
|
|
We'll be utilizing a new construct called a string generator which be tied to all functionality for constructing strings.
|
|
*/
|
|
|
|
typedef struct Str8Gen Str8Gen;
|
|
struct Str8Gen {
|
|
SliceByte backing; // For V1 the backing buffer is fixed size.
|
|
UTF8* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
void str8gen_init(Str8Gen* gen, SliceByte backing);
|
|
Str8Gen str8gen_make( SliceByte backing);
|
|
|
|
void str8gen_append_str8(Str8Gen* gen, Str8 str);
|
|
|
|
void str8gen_init(Str8Gen* gen, SliceByte backing) {
|
|
assert(gen != nullptr);
|
|
gen->backing = backing;
|
|
gen->ptr = backing.ptr;
|
|
gen->len = 0;
|
|
}
|
|
Str8Gen str8gen_make(SliceByte backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; }
|
|
|
|
void str8gen_append_str8(Str8Gen* gen, Str8 str) {
|
|
SSIZE left = gen->backing.len - gen->len;
|
|
assert(left >= str.len);
|
|
SliceByte dest = {gen->ptr + gen->len, str.len};
|
|
slice_copy(dest, str);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
In order to support appending formatted content via str8gen_apppend_fmt, we'll be using a substiution formatter utilizing string identifiation token pattern.
|
|
|
|
Where a format template string is provided with a 'id' wrapped in delimiters which will be the angle brackets: <id>
|
|
Example: This formatted string will have <id> subsituted into it.
|
|
*/
|
|
#pragma region fmt_vtoken
|
|
|
|
typedef struct FmtTokEntry FmtTokEntry;
|
|
struct FmtTokEntry {
|
|
U64 key;
|
|
Str8 value;
|
|
};
|
|
typedef struct SliceFmtTokEntry SliceFmtTokEntry;
|
|
struct SliceFmtTokEntry {
|
|
FmtTokEntry* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
#define slice_end(slice) (slice.ptr + slice.len)
|
|
|
|
/*
|
|
This is a token substiuting formatter using a array table lookup for tokens to substitute.
|
|
*/
|
|
Str8 fmt_vtoken_slice(SliceByte buffer, SliceFmtTokEntry tokens, Str8 fmt_template)
|
|
{
|
|
slice_assert(buffer);
|
|
slice_assert(tokens);
|
|
slice_assert(fmt_template);
|
|
|
|
UTF8* cursor_buffer = buffer.ptr;
|
|
SSIZE buffer_remaining = buffer.len;
|
|
|
|
char curr_code = * fmt_template.ptr;
|
|
|
|
UTF8* cursor_fmt = fmt_template.ptr;
|
|
SSIZE left_fmt = fmt_template.len;
|
|
while (left_fmt && buffer_remaining)
|
|
{
|
|
// Forward until we hit the delimiter '<' or the template's contents are exhausted.
|
|
while (curr_code && curr_code != '<' && cursor_fmt != slice_end(fmt_template))
|
|
{
|
|
* cursor_buffer = * cursor_fmt;
|
|
++ cursor_buffer;
|
|
++ cursor_fmt;
|
|
-- buffer_remaining;
|
|
-- left_fmt;
|
|
|
|
curr_code = * cursor_fmt;
|
|
}
|
|
|
|
if (curr_code == '<')
|
|
{
|
|
UTF8* cursor_potential_token = cursor_fmt + 1;
|
|
SSIZE potential_token_length = 0;
|
|
|
|
while (* (cursor_potential_token + potential_token_length) != '>') {
|
|
++ potential_token_length;
|
|
}
|
|
|
|
// Hashing the potential token and cross checking it with our token table
|
|
U64 key = 0; hash64_djb8(& key, (SliceByte){ cast(void*, cursor_fmt + 1), potential_token_length});
|
|
Str8* value = nullptr;
|
|
for (slice_iter(tokens, token))
|
|
{
|
|
// We do a linear iteration instead of a hash table lookup because the user should be never substiuting with more than 100 unqiue tokens..
|
|
if (token->key == key) {
|
|
value = & token->value;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (value)
|
|
{
|
|
SSIZE left = value->len;
|
|
char const* cursor_value = value->ptr;
|
|
|
|
while (left --)
|
|
{
|
|
* cursor_buffer = * cursor_value;
|
|
++ cursor_buffer;
|
|
++ cursor_value;
|
|
-- buffer_remaining;
|
|
}
|
|
|
|
// Sync cursor format to after the processed token
|
|
cursor_fmt = cursor_potential_token + potential_token_length + 1;
|
|
curr_code = * cursor_fmt;
|
|
left_fmt -= potential_token_length + 2; // The 2 here are the '<' & '>' delimiters being omitted.
|
|
continue;
|
|
}
|
|
|
|
* cursor_buffer = * cursor_fmt;
|
|
++ cursor_buffer;
|
|
++ cursor_fmt;
|
|
-- buffer_remaining;
|
|
-- left_fmt;
|
|
|
|
curr_code = * cursor_fmt;
|
|
}
|
|
}
|
|
Str8 result = {buffer.ptr, buffer.len - buffer_remaining};
|
|
return result;
|
|
}
|
|
|
|
typedef struct SliceStr8 SliceStr8;
|
|
struct SliceStr8 {
|
|
Str8* ptr;
|
|
SSIZE len;
|
|
};
|
|
|
|
#define local_persist static
|
|
|
|
Str8 fmt__vtoken(SliceByte backing_tbl, SliceByte backing_buf, Str8 fmt_template, SliceStr8* tokens)
|
|
{
|
|
assert(tokens != nullptr);
|
|
FArena a_backing = farena_init(backing_tbl);
|
|
SliceFmtTokEntry table = {a_backing.start, 0};
|
|
S32 left = tokens->len;
|
|
for (slice_iter(*tokens, token)) {
|
|
FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry);
|
|
* entry = (FmtTokEntry){0};
|
|
hash64_djb8(& entry->key, (SliceByte){cast(void*, token->ptr), token->len});
|
|
++ token;
|
|
entry->value = * token;
|
|
++ table.len;
|
|
}
|
|
Str8 result = fmt_vtoken_slice(backing_buf, table, fmt_template);
|
|
return result;
|
|
}
|
|
|
|
// Expected to take a Str8 array of entries formatted as a 2D array of key-value pairs (Str8[length][2])
|
|
// The array will be tracked using a SliceStr8 structure.
|
|
#define fmt_vtoken(backing_tbl, backing_buf, fmt_template, ...) \
|
|
fmt__vtoken(backing_tbl, backing_buf, lit(fmt_template), \
|
|
&(SliceStr8){ \
|
|
.ptr = (Str8[]){ __VA_ARGS__ }, \
|
|
.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
|
|
} \
|
|
)
|
|
#pragma endregion fmt_vtoken
|
|
|
|
inline
|
|
void str8gen__append_fmt(Str8Gen* gen, Str8 fmt_template, SliceStr8* tokens)
|
|
{
|
|
local_persist FMem_64KB tbl_backing = {0};
|
|
SliceByte fmt_backing = {gen->ptr + gen->len, gen->backing.len - gen->len};
|
|
Str8 appended = fmt__vtoken(fmem_slice(tbl_backing), fmt_backing, fmt_template, tokens);
|
|
gen->len += appended.len;
|
|
return;
|
|
}
|
|
#define str8gen_append_fmt(gen, fmt_template, ...) \
|
|
str8gen__append_fmt(& gen, lit(fmt_template), \
|
|
&(SliceStr8){ \
|
|
.ptr = (Str8[]){ __VA_ARGS__ }, \
|
|
.len = (SSIZE)sizeof( (Str8[]){ __VA_ARGS__ } ) / size_of(Str8) \
|
|
} \
|
|
)
|
|
|
|
/*
|
|
Define a mapping array:
|
|
Str8 mappings [][2] = {
|
|
fmt_vtoken_entry("key", "value"),
|
|
^^ Add entries as above ^^
|
|
}
|
|
*/
|
|
#define fmt_entry(key, value) lit(key), value
|
|
|
|
#ifdef DEMO__WATL_DUMP_PREREQ_V1
|
|
int main()
|
|
{
|
|
local_persist FMem_64KB tbl_scratch;
|
|
SliceByte fmt_scratch = slicemem_alloc(MEGABYTES(8));
|
|
Str8 test_str = fmt_vtoken(fmem_slice(tbl_scratch), fmt_scratch, "Will this work? <maybe_sub>",
|
|
fmt_entry("maybe_sub", lit("IT SUBST!!!"))
|
|
);
|
|
|
|
SliceByte scratchgen = slicemem_alloc(MEGABYTES(16));
|
|
Str8Gen gen = str8gen_make(scratchgen);
|
|
str8gen_append_fmt(gen, "Testing now with Str8Gen!! <maybe_sub>!",
|
|
fmt_entry("maybe_sub", lit("lets fucking go!!!"))
|
|
);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
We'll need to do some integer serialization for our dump listing's metrics.
|
|
*/
|
|
|
|
inline B32 char_is_upper(U8 c) { return('A' <= c && c <= 'Z'); }
|
|
inline U8 char_to_lower(U8 c) { if (char_is_upper(c)) { c += ('a' - 'A'); } return(c); }
|
|
|
|
inline
|
|
U8 integer_symbols(U8 value) {
|
|
local_persist
|
|
U8 lookup_table[16] = {
|
|
'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F',
|
|
};
|
|
return lookup_table[value];
|
|
}
|
|
|
|
Str8 str8_from_u32(SliceByte mem, U32 num, U32 radix, U8 min_digits, U8 digit_group_separator)
|
|
{
|
|
Str8 result = {.ptr = mem.ptr, .len = 0};
|
|
Str8 prefix = {0};
|
|
switch (radix)
|
|
{
|
|
case 16: { prefix = lit("0x"); } break;
|
|
case 8: { prefix = lit("0o"); } break;
|
|
case 2: { prefix = lit("0b"); } break;
|
|
}
|
|
|
|
U8 digit_group_size = 3;
|
|
switch (radix)
|
|
{
|
|
default: break;
|
|
case 2:
|
|
case 8:
|
|
case 16: {
|
|
digit_group_size = 4;
|
|
}
|
|
break;
|
|
}
|
|
|
|
U32 needed_leading_zeros = 0;
|
|
{
|
|
U32 needed_digits = 1;
|
|
{
|
|
U32 u32_reduce = num;
|
|
for(;;)
|
|
{
|
|
u32_reduce /= radix;
|
|
if (u32_reduce == 0) {
|
|
break;
|
|
}
|
|
needed_digits += 1;
|
|
}
|
|
}
|
|
|
|
needed_leading_zeros = (min_digits > needed_digits) ? min_digits - needed_digits : 0;
|
|
U32 needed_separators = 0;
|
|
if (digit_group_separator != 0)
|
|
{
|
|
needed_separators = (needed_digits + needed_leading_zeros) / digit_group_size;
|
|
if (needed_separators > 0 && (needed_digits + needed_leading_zeros) % digit_group_size == 0) {
|
|
needed_separators -= 1;
|
|
}
|
|
}
|
|
|
|
result.len = prefix.len + needed_leading_zeros + needed_separators + needed_digits;
|
|
assert(result.len <= mem.len);
|
|
}
|
|
|
|
// Fill Content
|
|
{
|
|
U32 num_reduce = num;
|
|
U32 digits_until_separator = digit_group_size;
|
|
for (U32 idx = 0; idx < result.len; idx += 1)
|
|
{
|
|
SSIZE separator_pos = result.len - idx - 1;
|
|
if (digits_until_separator == 0 && digit_group_separator != 0) {
|
|
result.ptr[separator_pos] = digit_group_separator;
|
|
digits_until_separator = digit_group_size + 1;
|
|
}
|
|
else {
|
|
result.ptr[separator_pos] = char_to_lower(integer_symbols(num_reduce % radix));
|
|
num_reduce /= radix;
|
|
}
|
|
digits_until_separator -= 1;
|
|
if (num_reduce == 0) {
|
|
break;
|
|
}
|
|
}
|
|
for (U32 leading_0_idx = 0; leading_0_idx < needed_leading_zeros; leading_0_idx += 1) {
|
|
result.ptr[prefix.len + leading_0_idx] = '0';
|
|
}
|
|
}
|
|
// Fill Prefix
|
|
if (prefix.len > 0) {
|
|
slice_copy(result, prefix);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// Utilized for file_write_str8:
|
|
#if 0
|
|
BOOL WriteFile(
|
|
[in] HANDLE hFile,
|
|
[in] LPCVOID lpBuffer,
|
|
[in] DWORD nNumberOfBytesToWrite,
|
|
[out, optional] LPDWORD lpNumberOfBytesWritten,
|
|
[in, out, optional] LPOVERLAPPED lpOverlapped
|
|
);
|
|
#endif
|
|
/*
|
|
Lastly: Writting to file using the Win API.
|
|
*/
|
|
void file_write_str8(Str8 path, Str8 content)
|
|
{
|
|
slice_assert(path);
|
|
slice_assert(content);
|
|
|
|
FMem_16KB scratch = {0};
|
|
char const* path_cstr = str8_to_cstr_capped(path, fmem_slice(scratch));
|
|
|
|
HANDLE id_file = CreateFileA(
|
|
path_cstr,
|
|
GENERIC_WRITE,
|
|
FILE_SHARE_READ,
|
|
NULL,
|
|
CREATE_ALWAYS,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
NULL
|
|
);
|
|
|
|
B32 open_failed = id_file == INVALID_HANDLE_VALUE;
|
|
if (open_failed) {
|
|
DWORD error_code = GetLastError();
|
|
assert(error_code != 0);
|
|
return;
|
|
}
|
|
|
|
DWORD bytes_written = 0;
|
|
B32 status = WriteFile(id_file
|
|
, cast(void*, content.ptr)
|
|
, cast(USIZE, content.len)
|
|
, & bytes_written
|
|
, NULL
|
|
);
|
|
assert(status != 0);
|
|
assert(bytes_written == content.len);
|
|
}
|
|
|
|
/*
|
|
We now have what we need to create the structural listing dump for WATL's data structures.
|
|
*/
|
|
|
|
Str8 watl_dump_listing(SliceByte buffer, WATL_SliceLine lines)
|
|
{
|
|
local_persist FMem_64KB scratch = {0};
|
|
FArena sarena = farena_init(fmem_slice(scratch));
|
|
|
|
Str8Gen result = str8gen_make(buffer);
|
|
U32 line_num = 0;
|
|
for (slice_iter(lines, line))
|
|
{
|
|
#define push_str8_u32() (SliceByte){ farena__push(& sarena, size_of(UTF8), 64, lit("UTF8")), 64 }
|
|
#define fmt_entry_u32(label, num) lit(label), str8_from_u32(push_str8_u32(), num, 10, 0, 0)
|
|
++ line_num;
|
|
str8gen_append_fmt(result, "Line <line_num> - Chunks <chunk_num>:\n"
|
|
, fmt_entry("line_num", str8_from_u32(push_str8_u32(), line_num, 10, 0, 0))
|
|
, fmt_entry("chunk_num", str8_from_u32(push_str8_u32(), line->len, 10, 0, 0))
|
|
);
|
|
for (slice_iter(* line, chunk))
|
|
{
|
|
Str8 id;
|
|
switch (* chunk->ptr)
|
|
{
|
|
case WATL_Tok_Space: id = lit("Space"); break;
|
|
case WATL_Tok_Tab: id = lit("Tab"); break;
|
|
default: id = lit("Visible"); break;
|
|
}
|
|
str8gen_append_fmt(result, "\t<id>(<size>): '<chunk>'\n"
|
|
, fmt_entry("id", id)
|
|
, fmt_entry_u32("size", chunk->len)
|
|
, fmt_entry("chunk", * chunk)
|
|
);
|
|
}
|
|
farena_reset(& sarena);
|
|
#undef push_str8_u32
|
|
#undef fmt_entry_u32
|
|
}
|
|
return (Str8){ result.ptr, result.len };
|
|
}
|
|
|
|
#ifdef DEMO__WATL_DUMP_V1
|
|
int main()
|
|
{
|
|
// This will limit for our V1 read to 64kb at most.
|
|
FMem_128KB read_mem = {0};
|
|
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
|
|
|
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
|
SliceByte mem_toks = slicemem_alloc(MEGABYTES(16));
|
|
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
|
|
|
SliceByte mem_cache_strs = slicemem_alloc(MEGABYTES(64));
|
|
SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
|
|
SliceByte mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot));
|
|
Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
|
|
|
|
SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
|
|
SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
|
|
WATL_ParseInfo parse_res = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
|
|
|
|
SliceByte scratch_dmp = slicemem_alloc(MEGABYTES(16));
|
|
Str8 listing = watl_dump_listing(scratch_dmp, parse_res.lines);
|
|
file_write_str8(lit("demo.str_cache.listing.txt"), listing);
|
|
return 0;
|
|
}
|
|
#endif
|