not using char const*
This commit is contained in:
parent
62beed20a9
commit
3094a51872
143
demo.str_cache.c
143
demo.str_cache.c
@ -76,7 +76,6 @@ So we'll setup the the minimum for that when dealing with immutable constructs.
|
|||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
// #include <stdbool.h>
|
|
||||||
|
|
||||||
typedef unsigned __int8 U8;
|
typedef unsigned __int8 U8;
|
||||||
typedef signed __int8 S8;
|
typedef signed __int8 S8;
|
||||||
@ -130,15 +129,24 @@ In modern programming with the memory sizes utilized, it is more ergonomic to tr
|
|||||||
Most strings are not stored in some immutable table tracked statically, performance loss in doing so is negligble on modern hardware constraints.
|
Most strings are not stored in some immutable table tracked statically, performance loss in doing so is negligble on modern hardware constraints.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// UTF-8 character encoding unit
|
||||||
|
typedef unsigned char UTF8;
|
||||||
|
|
||||||
typedef struct Str8 Str8;
|
typedef struct Str8 Str8;
|
||||||
struct Str8 {
|
struct Str8 {
|
||||||
char const* ptr;
|
UTF8* ptr;
|
||||||
SSIZE len;
|
SSIZE len;
|
||||||
};
|
};
|
||||||
|
|
||||||
// String iterals in C include null-terminators, we aren't interested in preserving that.
|
// String iterals in C include null-terminators, we aren't interested in preserving that.
|
||||||
#define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 }
|
#define lit(string_literal) (Str8){ string_literal, size_of(string_literal) - 1 }
|
||||||
|
|
||||||
|
/*
|
||||||
|
We'll want all of our textual process to operate with UTF-8 code pages:
|
||||||
|
*/
|
||||||
|
#include <locale.h>
|
||||||
|
inline void set_utf8_codepage() { setlocale(LC_ALL, ".UTF-8"); }
|
||||||
|
|
||||||
// For now this string can visualized using a debugger.
|
// For now this string can visualized using a debugger.
|
||||||
#ifdef DEMO__STR_SLICE
|
#ifdef DEMO__STR_SLICE
|
||||||
int main()
|
int main()
|
||||||
@ -178,15 +186,6 @@ struct SliceByte {
|
|||||||
SSIZE len;
|
SSIZE len;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
To address memory we'll use a memory slice.
|
|
||||||
*/
|
|
||||||
typedef struct SliceMem SliceMem;
|
|
||||||
struct SliceMem {
|
|
||||||
void* ptr;
|
|
||||||
SSIZE len;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The above is a pattern that can be provided so that whether or not the result is formatted and provided to the user via the stack is entirely optional.
|
The above is a pattern that can be provided so that whether or not the result is formatted and provided to the user via the stack is entirely optional.
|
||||||
It also allows for default parameters to be defined conviently.
|
It also allows for default parameters to be defined conviently.
|
||||||
@ -254,7 +253,7 @@ typedef U8 FMem_16KB [ KILOBTYES(16) ];
|
|||||||
typedef U8 FMem_64KB [ KILOBTYES(64) ];
|
typedef U8 FMem_64KB [ KILOBTYES(64) ];
|
||||||
|
|
||||||
#define typeof __typeof__
|
#define typeof __typeof__
|
||||||
#define fmem_slice(mem) (SliceMem) { mem, size_of(mem) }
|
#define fmem_slice(mem) (SliceByte) { mem, size_of(mem) }
|
||||||
|
|
||||||
// We'll be using an intrinsic for copying memory:
|
// We'll be using an intrinsic for copying memory:
|
||||||
void* memory_copy(void* dest, void const* src, USIZE length)
|
void* memory_copy(void* dest, void const* src, USIZE length)
|
||||||
@ -273,19 +272,19 @@ void* memory_copy(void* dest, void const* src, USIZE length)
|
|||||||
assert(slice.len > 0); \
|
assert(slice.len > 0); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
void slice__copy(SliceMem dest, SSIZE const dest_typewidth, SliceMem const src, SSIZE const src_typewidth) {
|
void slice__copy(SliceByte dest, SSIZE dest_typewidth, SliceByte src, SSIZE src_typewidth) {
|
||||||
assert(dest.len >= src.len);
|
assert(dest.len >= src.len);
|
||||||
slice_assert(dest);
|
slice_assert(dest);
|
||||||
slice_assert(src);
|
slice_assert(src);
|
||||||
memory_copy(dest.ptr, src.ptr, src.len);
|
memory_copy(dest.ptr, src.ptr, src.len);
|
||||||
}
|
}
|
||||||
#define slice_copy(dest,src) slice__copy( \
|
#define slice_copy(dest,src) slice__copy( \
|
||||||
(SliceMem ){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \
|
(SliceByte){(dest).ptr, (dest).len * size_of(*(dest).ptr)}, size_of(*(dest).ptr) \
|
||||||
, (SliceMem const){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \
|
, (SliceByte){(src ).ptr, (src ).len * size_of(*(src ).ptr)}, size_of(*(src ).ptr) \
|
||||||
)
|
)
|
||||||
|
|
||||||
// Assumes memory is zeroed.
|
// Assumes memory is zeroed.
|
||||||
char const* str8_to_cstr_capped(Str8 content, SliceMem mem) {
|
char* str8_to_cstr_capped(Str8 content, SliceByte mem) {
|
||||||
assert(mem.len >= content.len);
|
assert(mem.len >= content.len);
|
||||||
memory_copy(mem.ptr, content.ptr, content.len);
|
memory_copy(mem.ptr, content.ptr, content.len);
|
||||||
return mem.ptr;
|
return mem.ptr;
|
||||||
@ -300,11 +299,11 @@ B32 memory_zero(void* dest, USIZE const length) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void slice__zero(SliceMem mem, SSIZE typewidth) {
|
void slice__zero(SliceByte mem, SSIZE typewidth) {
|
||||||
slice_assert(mem);
|
slice_assert(mem);
|
||||||
memory_zero(mem.ptr, mem.len);
|
memory_zero(mem.ptr, mem.len);
|
||||||
}
|
}
|
||||||
#define slice_zero(slice) slice__zero((SliceMem){(slice).ptr, (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr))
|
#define slice_zero(slice) slice__zero((SliceByte){ cast(void*, (slice).ptr), (slice).len * size_of(*(slice).ptr)}, size_of(*(slice).ptr))
|
||||||
|
|
||||||
// Now for our "Version 1"
|
// Now for our "Version 1"
|
||||||
|
|
||||||
@ -319,7 +318,7 @@ struct FileOpResult
|
|||||||
struct Opts__read_file_contents
|
struct Opts__read_file_contents
|
||||||
{
|
{
|
||||||
// For now we'll just have the backing memory provided as a slice.
|
// For now we'll just have the backing memory provided as a slice.
|
||||||
SliceMem backing;
|
SliceByte backing;
|
||||||
// And whether we should zero the backing.
|
// And whether we should zero the backing.
|
||||||
B32 zero_backing;
|
B32 zero_backing;
|
||||||
};
|
};
|
||||||
@ -372,8 +371,8 @@ void api_file_read_contents(FileOpResult* result, Str8 path, Opts__read_file_con
|
|||||||
slice_zero(pcast(SliceByte, opts->backing));
|
slice_zero(pcast(SliceByte, opts->backing));
|
||||||
}
|
}
|
||||||
|
|
||||||
DWORD amount_read = 0;
|
DWORD amount_read = 0;
|
||||||
BOOL read_result = ReadFile(
|
BOOL read_result = ReadFile(
|
||||||
id_file,
|
id_file,
|
||||||
opts->backing.ptr,
|
opts->backing.ptr,
|
||||||
file_size.QuadPart,
|
file_size.QuadPart,
|
||||||
@ -408,6 +407,8 @@ FileOpResult file__read_contents(Str8 path, Opts__read_file_contents* opts) {
|
|||||||
#ifdef DEMO__FILE_READ_CONTENTS_V1
|
#ifdef DEMO__FILE_READ_CONTENTS_V1
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
set_utf8_codepage();
|
||||||
|
|
||||||
// This will limit for our V1 read to 64kb at most.
|
// This will limit for our V1 read to 64kb at most.
|
||||||
FMem_64KB read_mem = {0};
|
FMem_64KB read_mem = {0};
|
||||||
FileOpResult res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
FileOpResult res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
||||||
@ -422,7 +423,7 @@ First we want to do lexical analysis. So we'll create a token listing delimiting
|
|||||||
For our data structure, we are going for a Whitespace-Aware Text Layout; where we'll track text and the formatting around them.
|
For our data structure, we are going for a Whitespace-Aware Text Layout; where we'll track text and the formatting around them.
|
||||||
|
|
||||||
Just like with the read file contents operation, we'll define an interface to performing this analysis.
|
Just like with the read file contents operation, we'll define an interface to performing this analysis.
|
||||||
It will be called watl_lex and take the SliceMem from the file as a Str8 slice and some Opts__watl_lex;
|
It will be called watl_lex and take the SliceByte from the file as a Str8 slice and some Opts__watl_lex;
|
||||||
returning a WATL_LexInfo for providing user info on how the operation went.
|
returning a WATL_LexInfo for providing user info on how the operation went.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -450,7 +451,7 @@ enum WATL_TokKind {
|
|||||||
|
|
||||||
typedef struct WATL_Tok WATL_Tok;
|
typedef struct WATL_Tok WATL_Tok;
|
||||||
struct WATL_Tok {
|
struct WATL_Tok {
|
||||||
char const* code;
|
UTF8* code;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct WATL_SliceTok WATL_SliceTok;
|
typedef struct WATL_SliceTok WATL_SliceTok;
|
||||||
@ -495,8 +496,8 @@ struct FArena {
|
|||||||
USIZE capacity;
|
USIZE capacity;
|
||||||
USIZE used;
|
USIZE used;
|
||||||
};
|
};
|
||||||
void api_farena_init(FArena* arena, SliceMem mem);
|
void api_farena_init(FArena* arena, SliceByte mem);
|
||||||
FArena farena_init (SliceMem mem);
|
FArena farena_init (SliceByte mem);
|
||||||
void* farena__push (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename);
|
void* farena__push (FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename);
|
||||||
void farena_reset (FArena* arena);
|
void farena_reset (FArena* arena);
|
||||||
void farena_rewind (FArena* arena, ArenaSP savepoint);
|
void farena_rewind (FArena* arena, ArenaSP savepoint);
|
||||||
@ -506,12 +507,12 @@ ArenaSP farena_save (FArena arena);
|
|||||||
#define farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount }
|
#define farena_push_array(arena, type, amount) (Slice ## type){ farena__push(& arena, size_of(type), amount, lit(stringify(type))), amount }
|
||||||
|
|
||||||
inline
|
inline
|
||||||
void api_farena_init(FArena* arena, SliceMem mem) {
|
void api_farena_init(FArena* arena, SliceByte mem) {
|
||||||
arena->start = mem.ptr;
|
arena->start = mem.ptr;
|
||||||
arena->capacity = mem.len;
|
arena->capacity = mem.len;
|
||||||
arena->used = 0;
|
arena->used = 0;
|
||||||
}
|
}
|
||||||
inline FArena farena_init(SliceMem mem) { FArena arena; api_farena_init(& arena, mem); return arena; }
|
inline FArena farena_init(SliceByte mem) { FArena arena; api_farena_init(& arena, mem); return arena; }
|
||||||
|
|
||||||
inline
|
inline
|
||||||
void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) {
|
void* farena__push(FArena* arena, USIZE type_size, USIZE amount, Str8 dbg_typename) {
|
||||||
@ -547,7 +548,7 @@ struct Opts__watl_lex {
|
|||||||
This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, or the tokens are sparely cached.
|
This is not necessary and an equivalent process could be done where the tokens instead are semi-contigously organized into linked list with a chained arena, or the tokens are sparely cached.
|
||||||
Where their position in their originating string is not preserved. In this case we're keeping it simple. Tokens are in the same block of memory and they don't use a string cache.
|
Where their position in their originating string is not preserved. In this case we're keeping it simple. Tokens are in the same block of memory and they don't use a string cache.
|
||||||
*/
|
*/
|
||||||
SliceMem pool_toks;
|
SliceByte pool_toks;
|
||||||
};
|
};
|
||||||
|
|
||||||
// We are assuming everything is utf8-ascii.
|
// We are assuming everything is utf8-ascii.
|
||||||
@ -559,10 +560,10 @@ void api_watl_lex(WATL_LexInfo* info, Str8 source, Opts__watl_lex* opts)
|
|||||||
|
|
||||||
FArena arena = farena_init(opts->pool_toks);
|
FArena arena = farena_init(opts->pool_toks);
|
||||||
|
|
||||||
char const* end = source.ptr + source.len;
|
UTF8* end = source.ptr + source.len;
|
||||||
char const* cursor = source.ptr;
|
UTF8* cursor = source.ptr;
|
||||||
char const* prev = source.ptr;
|
UTF8* prev = source.ptr;
|
||||||
char code = * cursor;
|
UTF8 code = * cursor;
|
||||||
|
|
||||||
B32 was_formatting = true;
|
B32 was_formatting = true;
|
||||||
WATL_Tok* tok = nullptr;
|
WATL_Tok* tok = nullptr;
|
||||||
@ -631,30 +632,32 @@ To allocate onto the heap we'll make a basic slicemem_malloc to allocate, we'll
|
|||||||
However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination.
|
However we don't need to use it for the V1 example. The OS will cleanup the pages used by the process during its termination.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
SliceMem slicemem_alloc(USIZE amount)
|
SliceByte slicemem_alloc(USIZE amount)
|
||||||
{
|
{
|
||||||
assert(amount > KILOBTYES(4));
|
assert(amount > KILOBTYES(4));
|
||||||
void* result = malloc(amount);
|
void* result = malloc(amount);
|
||||||
assert(result != nullptr);
|
assert(result != nullptr);
|
||||||
SliceMem mem = {
|
SliceByte mem = {
|
||||||
.ptr = result,
|
.ptr = result,
|
||||||
.len = amount
|
.len = amount
|
||||||
};
|
};
|
||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
void slicemem_free(SliceMem mem) {
|
void slicemem_free(SliceByte mem) {
|
||||||
free(mem.ptr);
|
free(mem.ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEMO__WATL_LEX_V1
|
#ifdef DEMO__WATL_LEX_V1
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
set_utf8_codepage();
|
||||||
|
|
||||||
// This will limit for our V1 read to 64kb at most.
|
// This will limit for our V1 read to 64kb at most.
|
||||||
FMem_64KB read_mem = {0};
|
FMem_64KB read_mem = {0};
|
||||||
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
||||||
|
|
||||||
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
||||||
SliceMem mem_toks = slicemem_alloc(MEGABYTES(8));
|
SliceByte mem_toks = slicemem_alloc(MEGABYTES(8));
|
||||||
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
||||||
// unnecessary in this case but if you want to explicitly:
|
// unnecessary in this case but if you want to explicitly:
|
||||||
slicemem_free(mem_toks);
|
slicemem_free(mem_toks);
|
||||||
@ -679,8 +682,8 @@ For the sake of the exercise, we'll be eliminating the association with the file
|
|||||||
*/
|
*/
|
||||||
#pragma region Str8Cache
|
#pragma region Str8Cache
|
||||||
typedef struct Str8Cache Str8Cache;
|
typedef struct Str8Cache Str8Cache;
|
||||||
void api_str8cache_init(Str8Cache* cache, SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table);
|
void api_str8cache_init(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);
|
||||||
Str8Cache str8cache_init ( SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table);
|
Str8Cache str8cache_init ( SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table);
|
||||||
|
|
||||||
// A cache like this relies on tabling string entires utiliszing an index derived from a hashed ID.
|
// A cache like this relies on tabling string entires utiliszing an index derived from a hashed ID.
|
||||||
// For these strings we'll be using a hash called djb8:
|
// For these strings we'll be using a hash called djb8:
|
||||||
@ -689,7 +692,7 @@ Str8Cache str8cache_init ( SliceMem mem_strs, SliceMem mem_s
|
|||||||
#define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter
|
#define slice_iter(container, iter) typeof((container).ptr) iter = (container).ptr; iter != ((container).ptr + (container).len); ++ iter
|
||||||
|
|
||||||
inline
|
inline
|
||||||
void hash64_djb8(U64* hash, SliceByte const bytes) {
|
void hash64_djb8(U64* hash, SliceByte bytes) {
|
||||||
for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) {
|
for (U8 const* elem = bytes.ptr; elem != (bytes.ptr + bytes.len); ++ elem) {
|
||||||
*hash = (((*hash) << 8) + (*hash)) + (*elem);
|
*hash = (((*hash) << 8) + (*hash)) + (*elem);
|
||||||
}
|
}
|
||||||
@ -725,16 +728,16 @@ struct Str8Cache {
|
|||||||
Str8Cache_SliceSlot table;
|
Str8Cache_SliceSlot table;
|
||||||
};
|
};
|
||||||
|
|
||||||
Str8Cache str8cache_init(SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table) { Str8Cache cache; api_str8cache_init(& cache, mem_strs, mem_slots, mem_table); return cache; }
|
Str8Cache str8cache_init(SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) { Str8Cache cache; api_str8cache_init(& cache, mem_strs, mem_slots, mem_table); return cache; }
|
||||||
inline
|
inline
|
||||||
void api_str8cache_init(Str8Cache* cache, SliceMem mem_strs, SliceMem mem_slots, SliceMem mem_table) {
|
void api_str8cache_init(Str8Cache* cache, SliceByte mem_strs, SliceByte mem_slots, SliceByte mem_table) {
|
||||||
assert(cache != nullptr);
|
assert(cache != nullptr);
|
||||||
slice_assert(mem_strs);
|
slice_assert(mem_strs);
|
||||||
slice_assert(mem_slots);
|
slice_assert(mem_slots);
|
||||||
slice_assert(mem_table);
|
slice_assert(mem_table);
|
||||||
cache->a_str = farena_init(mem_strs);
|
cache->a_str = farena_init(mem_strs);
|
||||||
cache->pool = (Str8Cache_SliceSlot){mem_slots.ptr, mem_slots.len / size_of(Str8Cache_Slot)};
|
cache->pool = (Str8Cache_SliceSlot){ cast(void*, mem_slots.ptr), mem_slots.len / size_of(Str8Cache_Slot)};
|
||||||
cache->table = (Str8Cache_SliceSlot){mem_table.ptr, mem_table.len / size_of(Str8Cache_Slot)};
|
cache->table = (Str8Cache_SliceSlot){ cast(void*, mem_table.ptr), mem_table.len / size_of(Str8Cache_Slot)};
|
||||||
slice_zero(cache->pool);
|
slice_zero(cache->pool);
|
||||||
slice_zero(cache->table);
|
slice_zero(cache->table);
|
||||||
}
|
}
|
||||||
@ -792,8 +795,8 @@ Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
|
|||||||
if (! surface_slot->occupied || surface_slot->key == key)
|
if (! surface_slot->occupied || surface_slot->key == key)
|
||||||
{
|
{
|
||||||
if (value.ptr != surface_slot->value.ptr) {
|
if (value.ptr != surface_slot->value.ptr) {
|
||||||
SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
|
SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
|
||||||
slice_copy(pcast(SliceByte, mem), value);
|
slice_copy(mem, value);
|
||||||
surface_slot->value = pcast(Str8, mem);
|
surface_slot->value = pcast(Str8, mem);
|
||||||
}
|
}
|
||||||
surface_slot->key = key;
|
surface_slot->key = key;
|
||||||
@ -818,8 +821,8 @@ Str8* str8cache_set(Str8Cache* cache, U64 key, Str8 value)
|
|||||||
if ( ! slot->next->occupied || slot->next->key == key)
|
if ( ! slot->next->occupied || slot->next->key == key)
|
||||||
{
|
{
|
||||||
if (value.ptr != slot->next->value.ptr) {
|
if (value.ptr != slot->next->value.ptr) {
|
||||||
SliceMem mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
|
SliceByte mem = { farena__push(& cache->a_str, size_of(U8), value.len, lit("Str8")), value.len };
|
||||||
slice_copy(pcast(SliceByte, mem), value);
|
slice_copy(mem, value);
|
||||||
slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)};
|
slot->next->value = (Str8){mem.ptr, mem.len / size_of(char)};
|
||||||
}
|
}
|
||||||
slot->next->value = value;
|
slot->next->value = value;
|
||||||
@ -869,8 +872,8 @@ struct WATL_SliceLine {
|
|||||||
#if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)
|
#if defined(DEMO__WATL_PARSE_V1) || defined(DEMO__WATL_DUMP_V1)
|
||||||
|
|
||||||
struct Opts__watl_parse {
|
struct Opts__watl_parse {
|
||||||
SliceMem backing_nodes;
|
SliceByte backing_nodes;
|
||||||
SliceMem backing_lines;
|
SliceByte backing_lines;
|
||||||
Str8Cache* str_cache;
|
Str8Cache* str_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -928,21 +931,23 @@ WATL_ParseInfo watl__parse(WATL_SliceTok tokens, Opts__watl_parse* opts) { WATL_
|
|||||||
#ifdef DEMO__WATL_PARSE_V1
|
#ifdef DEMO__WATL_PARSE_V1
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
|
set_utf8_codepage();
|
||||||
|
|
||||||
// This will limit for our V1 read to 64kb at most.
|
// This will limit for our V1 read to 64kb at most.
|
||||||
FMem_64KB read_mem = {0};
|
FMem_64KB read_mem = {0};
|
||||||
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
FileOpResult read_res = file_read_contents(lit("demo.str_cache.c"), .backing = fmem_slice(read_mem) );
|
||||||
|
|
||||||
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
// This will limit our V1 lex to only 8 megs worth of token tracking on a file.
|
||||||
SliceMem mem_toks = slicemem_alloc(MEGABYTES(8));
|
SliceByte mem_toks = slicemem_alloc(MEGABYTES(8));
|
||||||
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
WATL_LexInfo lex_res = watl_lex(pcast(Str8, read_res.content), .pool_toks = mem_toks);
|
||||||
|
|
||||||
SliceMem mem_cache_strs = slicemem_alloc(MEGABYTES(64));
|
SliceByte mem_cache_strs = slicemem_alloc(MEGABYTES(64));
|
||||||
SliceMem mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
|
SliceByte mem_cache_slots = slicemem_alloc(1024 * 1024 * 16 * size_of(Str8Cache_SliceSlot));
|
||||||
SliceMem mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot));
|
SliceByte mem_cache_table = slicemem_alloc(1024 * 16 * size_of(Str8Cache_SliceSlot));
|
||||||
Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
|
Str8Cache str_cache = str8cache_init(mem_cache_strs, mem_cache_slots, mem_cache_table);
|
||||||
|
|
||||||
SliceMem mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
|
SliceByte mem_parse_nodes = slicemem_alloc(MEGABYTES(4));
|
||||||
SliceMem mem_parse_lines = slicemem_alloc(MEGABYTES(4));
|
SliceByte mem_parse_lines = slicemem_alloc(MEGABYTES(4));
|
||||||
WATL_ParseInfo parse_res = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
|
WATL_ParseInfo parse_res = watl_parse(lex_res.tokens, .backing_nodes = mem_parse_nodes, .backing_lines = mem_parse_lines, .str_cache = & str_cache);
|
||||||
|
|
||||||
// unnecessary in this case but if you want to explicitly:
|
// unnecessary in this case but if you want to explicitly:
|
||||||
@ -960,24 +965,24 @@ We'll be utilizing a new construct called a string generator which be tied to al
|
|||||||
|
|
||||||
typedef struct Str8Gen Str8Gen;
|
typedef struct Str8Gen Str8Gen;
|
||||||
struct Str8Gen {
|
struct Str8Gen {
|
||||||
SliceMem backing; // For V1 the backing buffer is fixed size.
|
SliceByte backing; // For V1 the backing buffer is fixed size.
|
||||||
char* ptr;
|
char* ptr;
|
||||||
SSIZE len;
|
SSIZE len;
|
||||||
};
|
};
|
||||||
|
|
||||||
void str8gen_init(Str8Gen* gen, SliceMem backing);
|
void str8gen_init(Str8Gen* gen, SliceByte backing);
|
||||||
Str8Gen str8gen_make( SliceMem backing);
|
Str8Gen str8gen_make( SliceByte backing);
|
||||||
|
|
||||||
void str8gen_append_str8(Str8Gen* gen, Str8 str);
|
void str8gen_append_str8(Str8Gen* gen, Str8 str);
|
||||||
// void str8gen_append_fmt (Str8Gen* gen, Str8 fmt, ...);
|
// void str8gen_append_fmt (Str8Gen* gen, Str8 fmt, ...);
|
||||||
|
|
||||||
void str8gen_init(Str8Gen* gen, SliceMem backing) {
|
void str8gen_init(Str8Gen* gen, SliceByte backing) {
|
||||||
assert(gen != nullptr);
|
assert(gen != nullptr);
|
||||||
gen->backing = backing;
|
gen->backing = backing;
|
||||||
gen->ptr = backing.ptr;
|
gen->ptr = backing.ptr;
|
||||||
gen->len = 0;
|
gen->len = 0;
|
||||||
}
|
}
|
||||||
Str8Gen str8gen_make(SliceMem backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; }
|
Str8Gen str8gen_make(SliceByte backing) { Str8Gen gen; str8gen_init(& gen, backing); return gen; }
|
||||||
|
|
||||||
void str8gen_append_str8(Str8Gen* gen, Str8 str) {
|
void str8gen_append_str8(Str8Gen* gen, Str8 str) {
|
||||||
SSIZE left = gen->backing.len - gen->len;
|
SSIZE left = gen->backing.len - gen->len;
|
||||||
@ -1010,7 +1015,7 @@ struct SliceFmtTokEntry {
|
|||||||
/*
|
/*
|
||||||
This is a token substiuting formatter using a array table lookup for tokens to substitute.
|
This is a token substiuting formatter using a array table lookup for tokens to substitute.
|
||||||
*/
|
*/
|
||||||
Str8 fmt_vtoken_slice(SliceMem buffer, SliceFmtTokEntry tokens, Str8 fmt_template)
|
Str8 fmt_vtoken_slice(SliceByte buffer, SliceFmtTokEntry tokens, Str8 fmt_template)
|
||||||
{
|
{
|
||||||
slice_assert(buffer);
|
slice_assert(buffer);
|
||||||
slice_assert(tokens);
|
slice_assert(tokens);
|
||||||
@ -1047,7 +1052,7 @@ Str8 fmt_vtoken_slice(SliceMem buffer, SliceFmtTokEntry tokens, Str8 fmt_templat
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Hashing the potential token and cross checking it with our token table
|
// Hashing the potential token and cross checking it with our token table
|
||||||
U64 key = 0; hash64_djb8(& key, (SliceByte){cursor_fmt + 1, potential_token_length});
|
U64 key = 0; hash64_djb8(& key, (SliceByte){ cast(void*, cursor_fmt + 1), potential_token_length});
|
||||||
Str8* value = nullptr;
|
Str8* value = nullptr;
|
||||||
for (slice_iter(tokens, token))
|
for (slice_iter(tokens, token))
|
||||||
{
|
{
|
||||||
@ -1097,7 +1102,7 @@ struct SliceStr8 {
|
|||||||
SSIZE len;
|
SSIZE len;
|
||||||
};
|
};
|
||||||
|
|
||||||
Str8 fmt__vtoken(SliceMem backing, Str8 fmt_template, SliceStr8* tokens)
|
Str8 fmt__vtoken(SliceByte backing, Str8 fmt_template, SliceStr8* tokens)
|
||||||
{
|
{
|
||||||
FArena a_backing = farena_init(backing);
|
FArena a_backing = farena_init(backing);
|
||||||
SliceFmtTokEntry table = {a_backing.start, 0};
|
SliceFmtTokEntry table = {a_backing.start, 0};
|
||||||
@ -1106,12 +1111,12 @@ Str8 fmt__vtoken(SliceMem backing, Str8 fmt_template, SliceStr8* tokens)
|
|||||||
for (slice_iter(*tokens, token)) {
|
for (slice_iter(*tokens, token)) {
|
||||||
FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry);
|
FmtTokEntry* entry = farena_push(a_backing, FmtTokEntry);
|
||||||
* entry = (FmtTokEntry){0};
|
* entry = (FmtTokEntry){0};
|
||||||
hash64_djb8(& entry->key, (SliceByte){token->ptr, token->len});
|
hash64_djb8(& entry->key, (SliceByte){cast(void*, token->ptr), token->len});
|
||||||
++ token;
|
++ token;
|
||||||
entry->value = * token;
|
entry->value = * token;
|
||||||
++ table.len;
|
++ table.len;
|
||||||
}
|
}
|
||||||
SliceMem buffer = { .ptr = cast(U8*, a_backing.start) + a_backing.used, .len = a_backing.capacity - a_backing.used };
|
SliceByte buffer = { .ptr = cast(U8*, a_backing.start) + a_backing.used, .len = a_backing.capacity - a_backing.used };
|
||||||
Str8 result = fmt_vtoken_slice(buffer, table, fmt_template);
|
Str8 result = fmt_vtoken_slice(buffer, table, fmt_template);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -1132,7 +1137,9 @@ Str8 mappings [][2] = {
|
|||||||
#ifdef DEMO__WATL_DUMP_V1
|
#ifdef DEMO__WATL_DUMP_V1
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
SliceMem scratch = slicemem_alloc(MEGABYTES(64));
|
set_utf8_codepage();
|
||||||
|
|
||||||
|
SliceByte scratch = slicemem_alloc(MEGABYTES(64));
|
||||||
Str8 subst_table [][2] = {
|
Str8 subst_table [][2] = {
|
||||||
fmt_vtoken_entry("maybe_sub", "IT SUBST!!!"),
|
fmt_vtoken_entry("maybe_sub", "IT SUBST!!!"),
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user