Update vefontcache to latest

2025-01-12 14:00:58 -05:00
parent a869ebab69
commit bc47b37a46
16 changed files with 1280 additions and 942 deletions
--- a/code/font/vefontcache/LICENSE.md
+++ b/code/font/vefontcache/LICENSE.md
@@ -1,8 +1,9 @@
-VE Text Rendering Library
+VEFontCache Odin
 Copyright 2024 Edward R. Gonzalez

 This project is based on Vertex Engine GPU Font Cache
-by Xi Chen (https://github.com/hypernewbie/VEFontCache). It has been substantially overhauled from its original implementation.
+by Xi Chen (https://github.com/hypernewbie/VEFontCache). It has been substantially
+rewritten and redesigned for the Odin programming language.

 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
 associated documentation files (the "Software"), to deal in the Software without restriction,
--- a/code/font/vefontcache/LRU.odin
+++ b/code/font/vefontcache/LRU.odin
@@ -12,8 +12,6 @@ package vefontcache
 	are marginal changes at best.
 */

-import "base:runtime"
-
 // 16-bit hashing was attempted, however it seems to get collisions with djb8_hash_16

 LRU_Fail_Mask_16 :: 0xFFFF
@@ -43,11 +41,11 @@ pool_list_init :: proc( pool : ^Pool_List($V_Type), capacity : i32, dbg_name : s
 {
 	error : Allocator_Error
 	pool.items, error = make( [dynamic]Pool_List_Item(V_Type), int(capacity) )
-	assert( error == .None, "VEFontCache.pool_list_init : Failed to allocate items array")
+	assert( error == .None, "VEFontCache.pool_list_inits: Failed to allocate items array")
 	resize( & pool.items, capacity )

 	pool.free_list, error = make( [dynamic]Pool_ListIter, len = 0, cap = int(capacity) )
-	assert( error == .None, "VEFontCache.pool_list_init : Failed to allocate free_list array")
+	assert( error == .None, "VEFontCache.pool_list_init: Failed to allocate free_list array")
 	resize( & pool.free_list, capacity )

 	pool.capacity = capacity
@@ -106,16 +104,16 @@ pool_list_push_front :: proc( pool : ^Pool_List($V_Type), value : V_Type ) #no_b
 	assert( length == int(pool.capacity - pool.size) )

 	id := pool.free_list[ len(pool.free_list) - 1 ]
-	if pool.dbg_name != "" {
-		logf("pool_list: back %v", id)
-	}
+	// if pool.dbg_name != "" {
+	// 	logf("pool_list: back %v", id)
+	// }
 	pop( & pool.free_list )
 	pool.items[ id ].prev  = -1
 	pool.items[ id ].next  = pool.front
 	pool.items[ id ].value = value
-	if pool.dbg_name != "" {
-		logf("pool_list: pushed %v into id %v", value, id)
-	}
+	// if pool.dbg_name != "" {
+	// 	logf("pool_list: pushed %v into id %v", value, id)
+	// }

 	if pool.front != -1 do pool.items[ pool.front ].prev = id
 	if pool.back  == -1 do pool.back = id
--- a/code/font/vefontcache/Readme.md
+++ b/code/font/vefontcache/Readme.md
@@ -1,54 +0,0 @@
-# VE Text Rendering Library
-
-> Vertex Engine GPU Text Rendering Library
-
-https://github.com/user-attachments/assets/b74f1ec1-f980-45df-b604-d6b7d87d40ff
-
-This started off as a port of the [VEFontCache](https://github.com/hypernewbie/VEFontCache) library to the Odin programming language.
-Its original purpose was for use in game engines, however its rendeirng quality and performance is more than adequate for many other applications.
-
-Since then the library has been overhauled to offer higher performance, improved visual fidelity, additional features, and quality of life improvements.
-
-Features:
-
-* Simple and well documented.
-* Load and unload fonts at anytime
-* Almost entirely configurabe and tunable at runtime!
-* Full support for hot-reload
-  * Clear the caches at any-time!
-* Robust quality of life features:
-  * Tracks text layers!
-  * Push and pop stack for font, font_size, colour, view, position, scale and zoom!
-  * Enforce even only font-sizing (useful for linear-zoom) [TODO]
-  * Snap-positining to view for better hinting
-* Basic or advanced text shaping via Harfbuzz
-* All rendering is real-time, triangulation done on the CPU, vertex rendering and texture blitting on the gpu.
-  * Can hand thousands of draw text calls with very large or small shapes.
-* 4-Level Regioned Texture Atlas for caching rendered glyphs
-* Text shape caching
-* Glyph texture buffer for rendering the text with super-sampling to downsample to the atlas or direct to target screen.
-* Super-sample by a font size scalar for sharper glyphs
-* All caching backed by an optimized 32-bit LRU indexing cache
-* Provides a draw list that is backend agnostic (see [backend](./backend) for usage example).
-
-Upcoming:
-
-* Support for ear-clipping triangulation
-  * Support for which triangulation method used on a by font basis?
-* Multi-threading supported job queue.
-  * Lift heavy-lifting portion of the library's context into a thread context.
-  * Synchronize threads by merging their generated layered drawlist into a file draw-list for processing on the user's render thread.
-  * User defines how context's are distributed for drawing (a basic quandrant basic selector procedure will be provided.)
-
-See: [docs/Readme.md](docs/Readme.md) for the library's interface.
-
-## Building
-
-See [scripts/Readme.md](scripts/Readme.md) for building examples or utilizing the provided backends.
-
-Currently the scripts provided & the library itself were developed & tested on Windows. There are bash scripts for building on linux (they build on WSL but need additional testing).
-
-The library depends on harfbuzz, & stb_truetype to build.  
-Note: harfbuzz could technically be gutted if the user removes their definitions, however they have not been made into a conditional compilation option (yet).
-
-![image](https://github.com/user-attachments/assets/2f6c0b36-179c-42fe-8903-7640ae3c209e)
--- a/code/font/vefontcache/atlas.odin
+++ b/code/font/vefontcache/atlas.odin
@@ -12,7 +12,6 @@ Atlas_Region_Kind :: enum u8 {
 	Ignore = 0xFF, // ve_fontcache_cache_glyph_to_atlas uses a -1 value in clear draw call
 }

-// Note(Ed): Using 16 bit hash had collision failures and no observable performance improvement (tried several 16-bit hashers)
 Atlas_Key :: u32

 // TODO(Ed) It might perform better with a tailored made hashtable implementation for the LRU_Cache or dedicated array struct/procs for the Atlas.
--- a/code/font/vefontcache/docs/Readme.md
+++ b/code/font/vefontcache/docs/Readme.md
@@ -1,23 +1,6 @@
 # Interface

-Notes
---
-
-The freetype setup is not finished. Specifically due to cache_glyph_freetype not parsing the glyph outline data structure properly. 
-
-Freetype supports specifying a FT_Memory handle which is a pointer to a FT_MemoryRect. This can be used to define an allocator for the parser. Currently this library does not wrap this interface (yet). If using freetype its recommend to update `parser_init` with the necessary changes to wrap the context's backing allocator for freetype to utilize.
-
-```c
-  struct  FT_MemoryRec_
-  {
-    void*            user;
-    FT_Alloc_Func    alloc;
-    FT_Free_Func     free;
-    FT_Realloc_Func  realloc;
-  };
-  ```
-
-This library (seems) to perform best if the text commands are fed in 'whitespace aware chunks', where instead of feeding it entire blobs of text, the user identfies "words" in the text and feeding the visible and whitespce chunks derived from this to draw_text as separate calls. It improves the caching of the text shapes. The downside is there has to be a time where the text is parsed into tokens beforehand so that the this iteration does not have to occur continously.
+## Lifetime

 ### startup

@@ -31,30 +14,68 @@ Much of the data structures within the context struct are not fixed-capacity all

 The library supports being used in a dynamically loaded module. If its hot-reloaded simply make sure to call this procedure with a reference to the backing allocator provided during startup as all dynamic containers tend to lose a proper reference to the allocator's procedure.

+Call `clear_atlas_region_caches` & `clear_shape_cache` to reset the library's shape and glyph cache state to force a re-render.
+
 ### shutdown

 Release resources from the context.

-### configure_snap
+### clear_atlas_region_caches

-You'll find this used immediately in draw_text it acts as a way to snap the position of the text to the nearest pixel for the width and height specified.
+Clears the LRU caches of regions A-D of the Atlas & sets their next_idx to 0. Effectively will force a re-cache of all previously rendered glyphs. Shape configuration for the glyph will remain unchanged unless clear_shape_cache is also called.

-If snapping is not desired, set the snap_width and height before calling draw_text to 0.
+### clear_shape_cache

-### get_cursor_pos
+Clears the LRU cache of the shaper along with clearing all existing storage entries. Effectively will force a re-cache of previously cached text shapes (Does not recache their rendered glyphs).

-Will provide the current cursor_pos for the resulting text drawn.
+### load_font

-### set_color
+Will load an instance of a font. The user needs to load the file's bytes themselves, the font entry (Entry :: struct) will by tracked by the library. The user will be given a font_id which is a direct index for the entry in the tracked array.

-Sets the color to utilize on `Draw_Call`s for FrameBuffer.Target or .Target_Uncached passes
+### unload_font
+
+Will free an entry, (parser and shaper resources also freed)
+
+## Shaping
+
+Ideally the user should track the shapes themselves in a time-scale beyond the per-frame draw call. This avoids having to do caching/lookups of the shope.
+
+### shape_text
+
+Will shape the text using the `shaper_proc` arugment (user overloadable). Shape will be cached by the library.
+
+### shape_text_uncached
+
+Will shape the text using the `shaper_proc` arugment (user overloadable).
+Shape will NOT be cached by the library. Use this if you want to roll your own solution for tracking shapes.
+
+## Draw list generation
+
+### draw_text procedures
+
+There a total of six procedures, 3 for shapes, 3 for text:
+
+* `draw_shape_normalized_space`
+* `draw_shape_view_space`
+* `draw_shape`
+* `draw_text_normalized_space`
+* `draw_text_view_space`
+* `draw_text`
+
+The normalized space procedures are the `baseline` interface draw procedures. They expec the position, and scale provided to operate with an unsigned normalized space where the bottom left is 0.0, 0.0 and the top right is 1.0, 1.0.
+
+The view space will normalize the position and scale for the user based on the provided view and zoom. The coordinate system is still unsigned just scaled to the view's size.
+
+The non-suffix named procedures use the scope stack to derive the position and scale the user provides a relative position and scale for the text that will be adjusted to the scope's view, position, scale, & zoom.
+
+See the comment above each of the procedures for diagrams.

 ### get_draw_list

 Get the enqueded draw_list (vertices, indices, and draw call arrays) in its entirety.
 By default, if get_draw_list is called, it will first call `optimize_draw_list` to optimize the draw list's calls for the user. If this is undesired, make sure to pass `optimize_before_returning = false` in the arguments.

-###  get_draw_list_layer
+### get_draw_list_layer

 Get the enqueued draw_list for the current "layer".
 A layer is considered the slice of the `Draw_List`'s content from the last call to `flush_draw_list_layer` onward.
@@ -70,6 +91,12 @@ Will clear the draw list and draw layer offsets.

 Will update the draw list layer with the latest offset based on the current lenght of the draw list vertices, indices, and calls arrays.

+## Metrics
+
+### measure_shape_size
+
+This provide's the shape size scaled down by the ctx.px_scale to get intended usage size. Size is equivalent to `measure_text_size`.
+
 ### measure_text_size

 Provides a Vec2 the width and height occupied by the provided text string. The y is measured to be the the largest glyph box bounds height of the text. The width is derived from the `end_cursor_pos` field from a `Shaped_Text` entry.
@@ -77,3 +104,85 @@ Provides a Vec2 the width and height occupied by the provided text string. The y
 ### get_font_vertical_metrics

 A wrapper for `parser_get_font_vertical_metrics`. Will provide the ascent, descent, and line_gap for a font entry.
+
+## Miscellaneous
+
+Stuff used by the draw list generation interface or just getters and setters.
+
+### get_cursor_pos
+
+Will provide the current cursor_pos for the resulting text drawn.
+
+### get_normalized_position_scale
+
+Will normalize the value of the position and scale based on the provided view.  
+Position will also be snapped to the nearest pixel via ceil.  
+Does nothing if view is 1 or 0
+
+This is used by draw via view relative space procedures to normalize it to the intended space for the render pass.
+
+### resolve_draw_px_size
+
+Used to constrain the px_size used in `resolve_zoom_size_scale`.
+
+The view relative space and scoping stack-based procedures support zoom. When utilizing zoom their is a nasty jitter that will occur if the user smoothly goes across different font sizes because the spacing can drastically change between even and odd font-sizes. This is applied to enforce the font sticks to a specific interval.
+
+The library uses the context's zoom_px_interval as the reference interval in the draw procedures. It can be set with `set_zoom_px_interval` and the default value is 2.
+
+### resolve_zoom_size_scale
+
+Provides a way to get a "zoom" on the font size and scale, similar conceptually to a canvas UX zoom
+Does nothing when zoom is 1.0
+
+Uses `resolve_draw_px_size` to constrain which font size is used for the zoom.
+
+### set_alpha_scalar
+
+This is an artifact feature of the current shader, it *may* be removed in the future... Increasing the alpha of the colour draw with above 1.0 increases the edge contrast of the glyph shape.
+
+For the value to be added to the colour, the alph of the text must already be at 1.0 or greater.
+
+### set_px_scalar
+
+This another "super-scalar" applied to rendering glyphs. In each draw procedure the following is computed before passing the values to the shaper and draw list generation passes:
+
+```go
+target_px_size    := px_size * ctx.px_scalar
+target_scale      := scale   * (1 / ctx.px_scalar)
+target_font_scale := parser_scale( entry.parser_info, target_px_size )
+```
+
+Essentially, `ctx.px_scalar` is used to upscale the px_size by its value and then downscale the render target scale back the indended size. Doing so provides better shape positioning and futher improves text hinting. The downside is that small text tends to become more jagged (as its really hitting the limits of of how well the shader can blend those edges at that resolution).
+
+This will most likely be preserved with future shader upgrades, however it will most likely not be as necessary as it is right now to achieve crisp text.
+
+### set_zoom_px_interval
+
+Used with by draw procedures with `resolve_draw_px_size` & `resolve_zoom_size_scale`. Provides the interval to use when constraining the px_size to a specific set of values when using zoom scaling.
+
+### set_snap_glyph_shape_position
+
+During the shaping pass, the position of each glyph can be rounded up to the integer to (ussually) allow better hinting.
+
+### set_snap_glyph_render_height
+
+During the draw list generation pass, the position of each glyph when blitting to atlas can have teh quad size rounded up to the integer.
+Can yield better hinting but may significantly stretch the glyphs at small scales.
+
+## Scope Stack
+
+These are a set of push & pop pairs of functions that operator ont he context's stack containers. They are used with the draw_shape and draw_text procedures. This mainly for quick scratch usage where the user wants to directly compose a large amount of text without having a UI framework directly handle the text backend.
+
+* font
+* font_size
+* colour: Linear colour.
+* view: Width and height of the 2D area the text will be drawn within.
+* position: Uses relative positioning will offset the incoming position by the given amount.
+* scale: Uses relative scaling, will scale the procedures incoming scale by the given amount.
+* zoom: Affects scaling, will scale the procedure's incoming font size & scale based on an *UX canvas camera's* notion of it.
+
+Procedure types:
+
+* `scope_<stack_option>`: push with a defer pop
+* `push_<stack_option>`
+* `pop_<stack_option>`
--- a/code/font/vefontcache/docs/guide_architecture.md
+++ b/code/font/vefontcache/docs/guide_architecture.md
@@ -0,0 +1,234 @@
+# Guide: Architecture
+
+Overview of the package design and code-path layout.
+
+---
+
+The purpose of this library is to alleviate four key challenges with one encapsulating package:
+
+* Font parsing
+* Text codepoint shaping
+* Glyph shape triangulation
+* Glyph draw-list generation
+
+Shaping text, getting metrics for glyphs, triangulating glyphs, and anti-aliasing their render are expensive operations to perform per frame. Therefore, any compute operations that can be cached, will be.
+
+There are two cache types used:
+
+* Shape cache (`Shaped_Text_Cache.state`)
+* Atlas region cache (`Atlas_Region.state`)
+
+The shape cache stores all data for a piece of text that will be utilized in a draw call that is not dependent on a specific position & scale (and is faster to lookup vs compute per draw call).  
+The atlas region cache tracks what slots have glyphs rendered to the texture atlas. This essentially caches triangulation and super-sampling computations.
+
+All caching uses [LRU.odin](../vefontcache/LRU.odin)
+
+## Code Paths
+
+### Lifetime
+
+The library lifetime is straightforward: you have a startup procedure that should be called during your usual app initialization. From there you may either choose to manually shut it down or let the OS clean it up.
+
+If hot-reload is desired, you just need to call hot_reload with the context's backing allocator to refresh the procedure references. After the DLL has been reloaded, these should be the only aspects that have been scrambled.  
+Usually when hot-reloading the library for tuning or major changes, you'd also want to clear the caches. Simply call `clear_atlas_region_caches` & `clear_shape_cache` right after.
+
+Ideally, there should be zero dynamic allocation on a per-frame basis as long as the reserves for the dynamic containers are never exceeded. It's acceptable if they do exceed as their memory locality is so large their distance in the pages to load into CPU cache won't matter - it just needs to be a low incidence.
+
+### Shaping Pass
+
+If using the library's cache, `shaper_shape_text_cached` handles the hashing and lookup. As long as a shape is found, it will not enter the uncached code path. By default, this library uses `shaper_shape_harfbuzz` as the `shape_text_uncached` procedure.
+
+Shapes are cached using the following parameters to hash a key:
+
+* font: Font_ID
+* font_size: f32
+* the text itself: string
+
+All shapers fulfill the following interface:
+
+```odin
+Shaper_Shape_Text_Uncached_Proc :: #type proc( ctx : ^Shaper_Context,
+    atlas             : Atlas, 
+    glyph_buffer_size : Vec2,
+    font              : Font_ID,
+    entry             : Entry, 
+    font_px_Size      : f32, 
+    font_scale        : f32, 
+    text_utf8         : string, 
+    output            : ^Shaped_Text 
+)
+```
+
+Which will resolve the output `Shaped_Text`. It has the following definition:
+
+```odin
+Shaped_Text :: struct #packed {
+    glyph          : [dynamic]Glyph,
+    position       : [dynamic]Vec2,
+    visible        : [dynamic]i16,
+    atlas_lru_code : [dynamic]Atlas_Key,
+    region_kind    : [dynamic]Atlas_Region_Kind,
+    bounds         : [dynamic]Range2,
+    end_cursor_pos : Vec2,
+    size           : Vec2,
+    font           : Font_ID, 
+    px_size        : f32,
+}
+```
+
+The result of the shaping process is the glyphs and their positions for the the shape; historically resembling whats known as a *Slug* of prepared text for printing. The end position of where the user's "cursor" would be is also recorded which provided the end position of the shape. The size of the shape is also resolved here, which if using px_scalar must be downscaled. `measure_shape_size` does the downscaling for the user.
+
+`visible` tracks which of the glyphs will actually be relevant for the draw_list pass. This is to avoid a conditional jump during the draw list gen pass. When accessing glyph or position during the draw_list gen, they will use visible's relative index.
+
+The font and px_size is tracked here as well so they user does not need to provide it to the library's interface and related.
+
+As stated under the main heading of this guide, the the following are within shaped text so that they may be resolved outside of the draw list generation (see: `generate_shape_draw_list`):
+
+* atlas_lru_code
+* region_kind
+* bounds
+
+These are the same length as the `visible` array, so indexing those will not need to use visibile's relative index.
+
+`shaper_shape_text_latin` does naive shaping by utilizing the codepoint's kern_advance and detecting newlines.  
+`shaper_shape_harfbuzz` is an actual shaping *engine*. Here is the general idea of how the library utilizes it for shaping:
+
+1. Reset the state of the hb_buffer
+2. Determine the line height
+3. Go through the codepoints: (for each)
+    1. Determine the codepoint's script
+    2. If the script is netural (Uknown, Inherited, or of Common type), the script has not changed, or this is the first codepoint of the shape we can add the codepoint to the buffer.
+    3. Otherwise we will have to start a shaping run if we do encounter a significant script change. After, we can add the codepoint to the post-run-cleared hb_buffer.
+    4. This continues until all codepoints have been processed.
+4. We do a final shape run after iterating to make sure all codepoints have been processed.
+5. Set the size of the shape: X is max line width, Y is line height multiplied by the line count.
+6. Resolve the atlas_lru_code, region_kind, and bounds for all visible glyphs
+7. Store the font and px_size information.
+
+The `shape_run` procedure within does the following:
+
+1. Setup the buffer for the batch
+2. Have harfbuzz shape the buffer
+3. Extract glyph infos and positions from the buffer.
+4. Iterate through all glyphs
+    1. If the hb_glyph cluster is > 0, we need to treat it as the indication of a newline glyph. ***(We update position and skip)***
+    2. Update positioning and other metrics and append output shape's glyph and position.
+    3. If the glyph is visible we append it to shape's visible (harfbuzz must specify it as not .nodef, and parser must identify it as non-empty)
+5. We update the output.end_cursor_pos with the last position processed by the iteration
+6. Clear the hb_buffer's contents to prepare for a possible upcoming shape run.
+
+**Note on shape_run.4: The iteration doesn't preserve tracking the clusters, so that information is lost.**  
+*In the future cluster tracking may be added if its found to be important for high level text features beyond rendering.*
+
+**Note on shape_run.4.1: Don't know if the glyph signifiying newline should be preserved**  
+
+See [Harfbuzz documentation](https://harfbuzz.github.io) for additional information.
+
+There are other shapers out there:
+
+* [hamza](https://github.com/saidwho12/hamza): A notable C library that could be setup with bindings.
+
+***Note: Monospace fonts may have a much more trivial shaper (however for fonts with ligatures this may not be the case)***  
+***They should only need the kern advance of a single glyph as they're all the same. ligatures (I believe) should preserve this kern advance.***
+
+### Draw List Generation
+
+All interface draw text procedures will ultimately call `generate_shape_draw_list`. If the draw procedure is given text, it will call `shaper_shape_text_cached` the text immediately before calling it.
+
+Its implementation uses a batched-pipeline approach where its goal is to populate three arrays behavings as queues:  
+
+* oversized: For drawing oversized glyphs
+* to_cache: For glyphs that need triangulation & rendering to glyph buffer then blitting to atlas.
+* cache: For glyphs that are already cached in the atlas and just need to be blit to the render target.
+
+And then sent those off to `batch_generate_glyphs_draw_list` for further actual generation to be done. The size of a batch is determined by the capacity of the glyph_buffer's `batch_cache`. This can be set in `glyph_draw_params` for startup.
+
+`glyph_buffer.glyph_pack` is utilized by both `generate_shape_draw_list` and `batch_generate_glyphs_draw_list` to various computed data in an SOA data structure for the glyphs.
+
+generate_shape_draw_list outline:
+
+1. Prepare glyph_pack, oversized, to_cache, cached, and reset the batch cache
+    * `glyph_pack` is resized to to the length of `shape.visible`
+    * The other arrays populated have their reserved set to that length as well (they will not bounds check capacity on append)
+2. Iterate through the shape.visible and resolve glyph_pack's positions.
+3. Iterate through shape.visible this time for final region resolution and segregation of glyphs to their appropriate queue.
+    1. If the glyphs assigned region is `.E` its oversized. The `oversample` used for rendering to render target will either be 2x or 1x depending on how huge it is.
+    2. The following glyphs are checked to see if their assigned region has the glyph `cached`.
+        1. If it does, its just appended to cached and marked as seen in the `batch_cache`.
+        2. If its doesn't then a slot is reserved for within the atlas's region and the glyph is appended to `to_cache`.
+        3. For either case the atlas_region_bbox is computed.
+    3. After a batch has been resolved, `batch_generate_glyphs_draw_list` is called.
+4. If there is an partially filled batch (the usual case), batch_generate_glyphs_draw_list will be called for it.
+5. The cursor_pos is updated with the shape's end cursor position adjusted for the target space.
+
+batch_generate_glyphs_draw_list outline:
+
+The batch is organized into three major stages:
+
+1. glyph transform & draw quads compute
+2. glyph_buffer draw list generation (`oversized` & `to_cache`)
+3. blit-from-atlas to render target draw list generation (`to_cache` & `cached`)
+
+Glyph transform & draw quads compute does an iteration for each of the 3 arrays.  
+Nearly all the math for all three is done there *except* for `to_cache`, which does its blitting compute in its glyph_buffer draw-list gen pass.
+
+glyph_buffer draw list generation paths for `oversized` and `to_cache` are unique to each.
+
+For `oversized`:
+
+1. Allocate glyph shapes
+2. Iterate oversized:
+    1. Flush the glyph buffer if flagged todo so (reached glyph allocation limit)
+    2. Call `generate_glyph_pass_draw_list` for trianglation and rendering to buffer.
+    3. blit quad.
+3. flush the glyph buffer's draw list.
+4. free glyph shapes
+
+For `to_cached`:
+
+1. Allocate glyph shapes
+2. Iterate to_cache:
+    1. Flush the glyph buffer if flagged todo so (reached glyph allocation limit)
+    2. Compute & blit quads for clearing the atlas region and blitting from the buffer to the atlas.
+    3. Call `generate_glyph_pass_draw_list` for trianglation and rendering to buffer.
+3. flush the glyph buffer's draw list.
+4. free glyph shapes
+5. Do blits from atlas to draw list.
+
+`cached` only needs to blit from the atlas to the render target.
+
+`generate_glyph_pass_draw_list`: sets up the draw call for glyph to the glyph buffer. Currently it also handles triangulation as well. For now the shape triangulation is rudimentary and uses triangle fanning. Eventually it would be nice to offer alternative modes that can be specified on a per-font basis.
+
+`flush_glyph_buffer_draw_list`: Will merge the draw_lists contents of the glyph buffer over to the library's general draw_list, the clear the buffer's draw lists.
+
+### On Layering
+
+The base draw list generation pippline provided by the library allows the user to batch whatever they want into a single "layer".
+However, the user most likely would want take into consideration: font instances, font size, colors; these are things that may benefit from having shared locality during a layer batch. Overlaping text benefits from the user to handle the ordering via layers.
+
+Layers (so far) are just a set of offssets tracked by the library's `Context.draw_layer` struct. When `flush_draw_list_layer` is called, the offsets are set to the current length of the draw list. This allows the rendering backend to retrieve the latest set of vertices, indices, and calls to render on a per-layer basis with: `get_draw_list_layer`.
+
+Importantly, this leads to the following pattern when enuquing a layer to render:
+
+1. Begin render pass
+2. For codepath that will deal with text layers
+    1. Process user-level code-path that calls the draw text interface, populating the draw list layer (usually a for loop)
+    2. After iteration on the layer is complete, render the text layer
+        1. grab the draw list layer
+        2. flush the layer so the draw list offsets are reset
+    3. Repeat until all layers for the codepath are exhausted.
+
+There is consideration to instead explicitly have a draw list with more contextual information of the start and end of each layer. So that batching can be orchestrated in an isolated section of their pipeline.
+
+This would involve just tracking *slices* of thier draw-list that represents layers:
+
+```odin
+Draw_List_Layer :: struct {
+    vertices : []Vertex,
+    indices  : []u32,
+    calls    : []Draw_Call,
+}
+```
+
+Eventually the library may provide this since adding that feature is relatively cheap and and a low line-count addition to the interface.
+There should be little to no perfomrance loss from doing so as the iteration size is two large of a surface area to matter (so its just pipeline ergonomics)
--- a/code/font/vefontcache/docs/guide_backend.md
+++ b/code/font/vefontcache/docs/guide_backend.md
@@ -0,0 +1,68 @@
+# Guide: Backend
+
+The end-user needs to adapt this library to hook into their own codebase. For reference, they can check the [examples](../examples/) and [backend](../backend/) directories for working code that demonstrates what this guide covers.
+
+When rendering text, users need to handle two main aspects: the text to draw and its "layering". Similar to UIs, text should be drawn in layer batches, where each layer can represent a pass with arbitrary distinctions from other layers.
+
+The following components are required:
+
+* Vertex and Index Buffers for glyph meshes
+* Glyph shader for rendering glyphs to the glyph buffer
+* Atlas shader for blitting upscaled glyph quads from the glyph buffer to an atlas region slot (downsampled)
+* "Screen or Target" shader for blitting glyph quads from the atlas to a render target or swapchain
+* The glyph, atlas, and target image buffers
+
+Currently, the library doesn't support sub-pixel AA, so we're only rendering to R8 images.
+
+## Rendering Passes
+
+There are four passes that need to be handled when rendering a draw list:
+
+* Glyph: Rendering a glyph mesh to the glyph buffer
+* Atlas: Blitting a glyph quad from the glyph buffer to an atlas slot
+* Target: Blitting from the atlas image to the target image
+* Target_Uncached: Blitting from the glyph buffer image to the target image
+
+The Target & Target_Uncached passes can technically be handled in the same case. The user just needs to swap between using the atlas image and the glyph buffer image. This is how the backend_soko.odin's `render_text_layer` has these passes set up.
+
+## Vertex Buffer Layout
+
+The vertex buffer has the following layout for all passes:
+
+* `[2]f32` for positions
+* `[2]f32` for texture coords (Offset is naturally `[2]f32`)
+* Total stride: `[4]f32`
+
+---
+
+The index buffer is a simple u32 stream.
+
+For quad mesh layout details, see `blit_quad` in [draw.odin](../vefontcache/draw.odin).
+
+For glyph shape triangulation meshes, the library currently only uses a triangle fanning technique, implemented in `fill_path_via_fan_triangulation` within [draw.odin](../vefontcache/draw.odin). Eventually, the library will support other modes on a per-font basis.
+
+## UV Coordinate Conventions (GLSL vs HLSL)
+
+DirectX, Metal, and Vulkan consider the top-left corner as (0, 0), where the Y axis increases downward (traditional screenspace). This library follows OpenGL's convention, where (0, 0) is at the bottom-left (Y goes up).
+
+Adjust the UV coordinates in your shader accordingly:
+
+```c
+#if !OpenGL
+uv = vec2(v_texture.x, 1.0 - v_texture.y);
+#else
+uv = vec2(v_texture.x, v_texture.y);
+#endif
+```
+
+Eventually, the library will support both conventions as a comp-time conditional.
+
+## Retrieving & Processing the layer
+
+`get_draw_list_layer` will provide the layer's vertex, index, and draw call slices. Unless the default is overwritten, it will call `optimize_draw_list` before returning the slices (profile to see whats better for your use case).  
+Once those are retrived, call `flush_draw_list_layer` to update the layer offsets tracked by the library's `Context`.
+
+The vertex and index slices just needed to be appended to your backend's vertex and index buffers.  
+The draw calls need to be iterated with a switch statement for the aforementioned pass types. Within the case you can construct the enqueue the passes.
+
+---
--- a/code/font/vefontcache/docs/original/README.md
+++ b/code/font/vefontcache/docs/original/README.md
@@ -1,114 +0,0 @@
-# Notice
-
-This is the original readme fo the C++ implementation by Xi Chen.
-
-# VE Font Cache is a single header-only GPU font rendering library designed for game engines.
-
-It aims to:
- * Be fast and simple to integrate.
- * Take advantage of modern GPU power.
- * Be backend agnostic and easy to port to any API such as Vulkan, DirectX, OpenGL.
- * Load TTF & OTF file formats directly.
- * Use only runtime cache with no offline calculation.
- * Render glyphs at reasonable quality at a wide range of font sizes.
- * Support a good amount of internationalisation. そうですね!
- * Support cached text shaping with HarfBuzz with simple Latin-style fallback.
- * Load and unload fonts at any time.
-
-# How it works
-
-Glyphs are GPU rasterised with 16x supersampling. This method is a simplification of "Easy Scalable Text Rendering on the GPU",
-by Evan Wallace, making use of XOR blending. Bézier curves are handled via brute force triangle tessellation; even 6 triangles per
-curve only generates < 300 triangles, which is nothing for modern GPUs! This avoids complex frag shader for reasonable quality.
-
-![Wireframe with GPU XOR blending](images/wireframe.png)
-
-Texture atlas caching uses naïve grid placement; this wastes a lot of space but ensures interchangeable cache slots allowing for
-straight up LRU ( Least Recently Used ) caching scheme to be employed.
-
-The font atlas is a single 4k x 2k R8 texture divided into 4 regions:
-
-```
-     2k
-     --------------------
-     |         |        |
-     |    A    |        |
-     |         |        | 2
-     |---------|    C   | k  
-     |         |        |
-  1k |    B    |        |
-     |         |        |
-     --------------------
-     |                  |
-     |                  |
-     |                  | 2
-     |        D         | k  
-     |                  |
-     |                  |
-     |                  |
-     --------------------        
-               
-     Region A = 32x32 caches, 1024 glyphs
-     Region B = 32x64 caches, 512 glyphs
-     Region C = 64x64 caches, 512 glyphs
-     Region D = 128x128 caches, 256 glyphs
-```
-
-Region A is designed for small glyphs, Region B is for tall glyphs, Region C is for large glyphs, and Region D for huge glyphs.
-Glyphs are first rendered to an intermediate 2k x 512px R8 texture. This allows for minimum 4 Region D glyphs supersampled at
-4 x 4 = 16x supersampling, and 8 Region C glyphs similarly. A simple 16-tap box downsample shader is then used to blit from this
-intermediate texture to the final atlas location.
-
-The atlas texture looks something like this:
-![Wireframe with GPU XOR blending](images/atlas_small.png)
-
-# Usage
-
-Pseudo-code demonstrating simple usage:
-```cpp
-#define VE_FONTCACHE_IMPL
-#include "../ve_fontcache.h"
-
-static std::vector< uint8_t > buffer;
-ve_fontcache_init( &cache );
-ve_fontcache_configure_snap( &cache, width, height );
-print_font = ve_fontcache_loadfile( &cache, "fonts/NotoSansJP-Light.otf", buffer, 19.0f );
-ve_fontcache_draw_text( &cache, print_font, u8"hello world", 0, 0, 1.0f / width,  1.0f / height );
-```
-
-These header files need to be copied to your project:
-```
-ve_fontcache.h
-utf8.h
-stb_truetype.h
-```
-
-Except HarfBuzz, that's all the required dependencies. That said it's strongly recommended
-to use HarfBuzz ( TODO: HarfBuzz not supported yet, coming soon!! ) over the default utf8.h latin
-fallback text shaper.
-
-## Integration with rendering backend
-
-VEFontCache is largely backend agnostic. Currently the demo project uses OpenGL 3.3 for Windows.
-That said it's designed to be integrated with VE, a Vulkan engine.
-Please read the "How to plug into rendering API" section in ve_fontcache.h for more documentation
-on how to implement your own backend to plumb this directly into your engine!
-
-# Screenshots
-
-![Screenshot 1](images/ve_fontcache_demo1.png)
-
-![Screenshot 2](images/ve_fontcache_demo2.png)
-
-![Screenshot 3](images/raincode.png)
-
-![Screenshot 4](images/ve_fontcache_pressure_test.gif)
-
-# Similar projects and links
-
-Here are links to some awesome similar and related projects:
-* fontstash - https://github.com/memononen/fontstash
-* stb_truetype ( has font rasterisation itself ) - https://github.com/nothings/stb/blob/master/stb_truetype.h
-* slug - http://sluglibrary.com/
-* pathfinder - https://github.com/pcwalton/pathfinder
-* https://medium.com/@evanwallace/easy-scalable-text-rendering-on-the-gpu-c3f4d782c5ac
--- a/code/font/vefontcache/draw.odin
+++ b/code/font/vefontcache/draw.odin
@@ -4,10 +4,7 @@ package vefontcache
 	Note(Ed): This may be seperated in the future into another file dedending on how much is involved with supportin ear-clipping triangulation.
 */

-import "base:runtime"
-import "base:intrinsics"
-import "core:slice"
-import "thirdparty:freetype"
+// import "thirdparty:freetype"

 Glyph_Trianglation_Method :: enum(i32) {
 	Ear_Clipping,
@@ -32,21 +29,13 @@ Glyph_Draw_Quad :: struct {
 // to track relevant glyph data in soa format for pipelined processing
 Glyph_Pack_Entry :: struct #packed {
 	position           : Vec2,
-
 	atlas_index        : i32,
-	in_atlas           : b8,
-	should_cache       : b8,
 	region_pos         : Vec2,
 	region_size        : Vec2,
-
 	over_sample        : Vec2, // Only used for oversized glyphs
-
-	shape             : Parser_Glyph_Shape,
-	draw_transform    : Transform,
-
+	shape              : Parser_Glyph_Shape,
+	draw_transform     : Transform,
 	draw_quad          : Glyph_Draw_Quad,
-	draw_atlas_quad    : Glyph_Draw_Quad,
-	draw_quad_clear    : Glyph_Draw_Quad,
 	buffer_x           : f32,
 	flush_glyph_buffer : b8,
 }
@@ -109,7 +98,7 @@ Glyph_Draw_Buffer :: struct{
 	cached     : [dynamic]i32,
 }

-// Contructs a quad mesh for bliting a texture from one render target (src uv0 & 1) to the destination rendertarget (p0, p1)
+// Contructs a quad mesh for bliting a texture from source render target (src uv0 & 1) to the destination render target (p0, p1)
@(optimization_mode="favor_size")
 blit_quad :: #force_inline proc ( draw_list : ^Draw_List, 
 	p0  : Vec2 = {0, 0}, 
@@ -279,34 +268,25 @@ generate_shapes_draw_list :: #force_inline proc ( ctx : ^Context,
 }

 /* Generator pipeline for shapes
-
-	If you'd like to make a custom draw procedure, this can either be used directly or 
-    modified to create an augmented derivative for a specific code path.
-
 	This procedure has no awareness of layers. That should be handled by a higher-order codepath. 
-	For this level of codepaths what matters is maximizing memory locality for:
-	  * Dealing with shaping (essentially minimizing having to ever deal with it in a hot path if possible)
-		* Dealing with atlas regioning (the expensive region resolution & parser calls are done on the shape pass)

 	Pipleine order:
 	* Resolve the glyph's position offset from the target position
 	* Segregate the glyphs into three slices: oversized, to_cache, cached. 
-	  * If oversized is not necessary for your use case and your hitting a bottleneck, omit it with setting ENABLE_OVERSIZED_GLYPHS to false.
-		  * You have to to be drawing a px font size > ~140 px for it to trigger.
-			* The atlas can be scaled with the size_multiplier parameter of startup so that it becomes more irrelevant if processing a larger atlas is a non-issue.
+		* If oversized is not necessary for your use case and your hitting a bottleneck, omit it with setting ENABLE_OVERSIZED_GLYPHS to false.
 		* The segregation will not allow slices to exceed the batch_cache capacity of the glyph_buffer (configurable within startup params)
-		  * When The capacity is reached batch_generate_glyphs_draw_list will be called which will do futher compute and then finally draw_list generation.
-	* This may perform better with smaller shapes vs larger shapes, but having more shapes has a cache lookup penatly so keep that in mind.
+		* When The capacity is reached batch_generate_glyphs_draw_list will be called which will do futher compute and then finally draw_list generation.
+	* This may perform better with smaller shapes vs larger shapes, but having more shapes has a cache lookup penatly (if done per frame) so keep that in mind.
 */
 generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text,
 	atlas        : ^Atlas,
 	glyph_buffer : ^Glyph_Draw_Buffer,
 	px_scalar    : f32,

-	colour           : RGBAN,
-	entry            : Entry,
-	px_size          : f32,
-	font_scale       : f32,
+	colour     : RGBAN,
+	entry      : Entry,
+	px_size    : f32,
+	font_scale : f32,

 	target_position : Vec2,
 	target_scale    : Vec2,
@@ -332,11 +312,21 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text,
 	oversized  := & glyph_buffer.oversized
 	to_cache   := & glyph_buffer.to_cache
 	cached     := & glyph_buffer.cached
-	resize_soa_non_zero(glyph_pack, len(shape.glyph))
+	resize_soa_non_zero(glyph_pack, len(shape.visible))
+	
+	profile_begin("batching & segregating glyphs")
+	// We do any reservation up front as appending to the array's will not check.
+	reserve(oversized, len(shape.visible))
+	reserve(to_cache,  len(shape.visible))
+	reserve(cached,    len(shape.visible))
+	clear(oversized)
+	clear(to_cache)
+	clear(cached)
+	reset_batch( & glyph_buffer.batch_cache)

 	append_sub_pack :: #force_inline proc ( pack : ^[dynamic]i32, entry : i32 )
 	{
-		raw := cast(^runtime.Raw_Dynamic_Array) pack
+		raw := cast(^Raw_Dynamic_Array) pack
 		raw.len            += 1
 		pack[len(pack) - 1] = entry
 	}
@@ -344,27 +334,22 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text,

 	profile_begin("translate")
 	for & glyph, index in glyph_pack {
-		glyph.position = target_position + (shape.position[index]) * target_scale
+		// Throughout the draw list generation vis_id will need to be used over index as 
+		// not all glyphs or positions for the shape are visibly rendered.
+		vis_id        := shape.visible[index]
+		glyph.position = target_position + (shape.position[vis_id]) * target_scale
 	}
 	profile_end()

-	profile_begin("batching & segregating glyphs")
-	clear(oversized)
-	clear(to_cache)
-	clear(cached)
-	reset_batch( & glyph_buffer.batch_cache)
-
 	for & glyph, index in glyph_pack
 	{
+		// atlas_lru_code, region_kind, and bounds are all 1:1 with shape.visible
 		atlas_key          := shape.atlas_lru_code[index]
 		region_kind        := shape.region_kind[index]
 		bounds             := shape.bounds[index]
 		bounds_size_scaled := size(bounds) * font_scale

-		if region_kind == .None { 
-			assert(false, "FAILED TO ASSGIN REGION")
-			continue
-	 	}
+		assert(region_kind != .None, "FAILED TO ASSGIN REGION")
 		when ENABLE_OVERSIZED_GLYPHS
 		{
 			if region_kind == .E
@@ -463,16 +448,20 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text,

 /*
 	The glyphs types have been segregated by this point into a batch slice of indices to the glyph_pack
-	The transform and draw quads are computed first (getting the math done in one spot as possible...)
-	Some of the math from to_cache pass for glyph generation was not moved over (it could be but I'm not sure its worth it...)
+	The transform and draw quads are computed first (getting the math done in one spot as possible)
+	Some of the math from to_cache pass for glyph generation was not moved over (it could be but I'm not sure its worth it)

-	Order: Oversized first, then to_cache, then cached.
+	Order    : Oversized first, then to_cache, then cached.
+	Important: These slices store ids for glyph_pack which matches shape.visible in index. 
+	shape.position and shape.glyph DO NOT.
+
+	There are only two places this matters for: getting glyph shapes when doing glyph pass generation for oversized and to_cache iterations.

 	Oversized and to_cache will both enqueue operations for rendering glyphs to the glyph buffer render target.
-	The compute section will have operations reguarding how many glyphs they may alloate before a flush must occur.
+	The compute section will have operations regarding how many glyphs they may alloate before a flush must occur.
 	A flush will force one of the following:
 	  * Oversized will have a draw call setup to blit directly from the glyph buffer to the target.
-		* to_cache will blit the glyphs rendered to the buffer to the atlas.
+		* to_cache will blit the glyphs rendered from the buffer to the atlas.
 */
@(optimization_mode = "favor_size")
 batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
@@ -487,10 +476,10 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
 	atlas_size        : Vec2,
 	glyph_buffer_size : Vec2,

-	entry                 : Entry,
-	colour                : RGBAN,
-	font_scale            : Vec2,
-	target_scale          : Vec2,
+	entry        : Entry,
+	colour       : RGBAN,
+	font_scale   : Vec2,
+	target_scale : Vec2,
 ) #no_bounds_check
 {
 	profile(#procedure)
@@ -602,9 +591,11 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
 			colour.b = 0.0
 		}
 		for pack_id, index in oversized {
+			vis_id := shape.visible[pack_id]
 			error : Allocator_Error
-			glyph_pack[pack_id].shape, error = parser_get_glyph_shape(entry.parser_info, shape.glyph[pack_id])
+			glyph_pack[pack_id].shape, error = parser_get_glyph_shape(entry.parser_info, shape.glyph[vis_id])
 			assert(error == .None)
+			assert(glyph_pack[pack_id].shape != nil)
 		}
 		for id, index in oversized
 		{
@@ -615,7 +606,7 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
 				& glyph_buffer.clear_draw_list, 
 				& glyph_buffer.allocated_x
 			)
-			
+
 			generate_glyph_pass_draw_list( draw_list, & glyph_buffer.shape_gen_scratch,
 				glyph_pack[id].shape, 
 				entry.curve_quality, 
@@ -642,7 +633,10 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
 		}

 		flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.allocated_x)
-		for id, index in oversized do parser_free_shape(entry.parser_info, glyph_pack[id].shape)
+		for pack_id, index in oversized {
+			assert(glyph_pack[pack_id].shape != nil)
+			parser_free_shape(entry.parser_info, glyph_pack[pack_id].shape)
+		}
 	}
 	profile_end()

@@ -672,9 +666,11 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
 	if len(to_cache) > 0
 	{
 		for pack_id, index in to_cache {
+			vis_id := shape.visible[pack_id]
 			error : Allocator_Error
-			glyph_pack[pack_id].shape, error = parser_get_glyph_shape(entry.parser_info, shape.glyph[pack_id])
+			glyph_pack[pack_id].shape, error = parser_get_glyph_shape(entry.parser_info, shape.glyph[vis_id])
 			assert(error == .None)
+			assert(glyph_pack[pack_id].shape != nil)
 		}

 		for id, index in to_cache
@@ -728,7 +724,7 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
 	
 			append( & glyph_buffer.clear_draw_list.calls, clear_target_region )
 			append( & glyph_buffer.draw_list.calls,       blit_to_atlas )
-	
+
 			// Render glyph to glyph render target (FBO)
 			generate_glyph_pass_draw_list( draw_list, & glyph_buffer.shape_gen_scratch, 
 				glyph.shape, 
@@ -740,7 +736,10 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
 		}

 		flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.allocated_x)
-		for id, index in to_cache do parser_free_shape(entry.parser_info, glyph_pack[id].shape)
+		for pack_id, index in to_cache {
+			assert(glyph_pack[pack_id].shape != nil)
+			parser_free_shape(entry.parser_info, glyph_pack[pack_id].shape)
+		} 

 		profile_begin("gen_cached_draw_list: to_cache")
 		when ENABLE_DRAW_TYPE_VISUALIZATION {
--- a/code/font/vefontcache/freetype_wip.odin
+++ b/code/font/vefontcache/freetype_wip.odin
@@ -1,163 +0,0 @@
-package vefontcache
-
-when false {
-// TODO(Ed): Freetype support
-
-// TODO(Ed): glyph triangulation cannot be handled in a 'font parser' abstraction. Just going to have explicit procedures to grab info neatly...
-cache_glyph_freetype :: proc(ctx: ^Context, font: Font_ID, glyph_index: Glyph, entry: ^Entry, bounds_0, bounds_1: Vec2, scale, translate: Vec2) -> b32
-{
-	draw_filled_path_freetype :: proc( draw_list : ^Draw_List, outside_point : Vec2, path : []Vertex,
-		scale     := Vec2 { 1, 1 },
-		translate := Vec2 { 0, 0 },
-		debug_print_verbose : b32 = false
-	)
-	{
-		if debug_print_verbose {
-			log("outline_path:")
-			for point in path {
-				vec := point.pos * scale + translate
-				logf(" %0.2f %0.2f", vec.x, vec.y )
-			}
-		}
-
-		v_offset := cast(u32) len(draw_list.vertices)
-		for point in path
-		{
-			transformed_point := Vertex {
-				pos = point.pos * scale + translate,
-				u = 0,
-				v = 0
-			}
-			append( & draw_list.vertices, transformed_point )
-		}
-
-		if len(path) > 2
-		{
-			indices := & draw_list.indices
-			for index : u32 = 1; index < cast(u32) len(path) - 1; index += 1 {
-				to_add := [3]u32 {
-					v_offset,
-					v_offset + index,
-					v_offset + index + 1
-				}
-				append( indices, ..to_add[:] )
-			}
-
-			// Close the path by connecting the last vertex to the first two
-			to_add := [3]u32 {
-				v_offset,
-				v_offset + cast(u32)(len(path) - 1),
-				v_offset + 1
-			}
-			append( indices, ..to_add[:] )
-		}
-	}
-
-	if glyph_index == Glyph(0) {
-		return false
-	}
-
-	face := entry.parser_info.freetype_info
-	error := freetype.load_glyph(face, u32(glyph_index), {.No_Bitmap, .No_Scale})
-	if error != .Ok {
-		return false
-	}
-
-	glyph := face.glyph
-	if glyph.format != .Outline {
-		return false
-	}
-
-	outline := &glyph.outline
-	if outline.n_points == 0 {
-		return false
-	}
-
-	draw            := Draw_Call_Default
-	draw.pass        = Frame_Buffer_Pass.Glyph
-	draw.start_index = cast(u32) len(ctx.draw_list.indices)
-
-	contours := slice.from_ptr(cast( [^]i16)             outline.contours, int(outline.n_contours))
-	points   := slice.from_ptr(cast( [^]freetype.Vector) outline.points,   int(outline.n_points))
-	tags     := slice.from_ptr(cast( [^]u8)              outline.tags,     int(outline.n_points))
-
-	path := &ctx.temp_path
-	clear(path)
-
-	outside := Vec2{ bounds_0.x - 21, bounds_0.y - 33 }
-
-	start_index: int = 0
-	for contour_index in 0 ..< int(outline.n_contours)
-	{
-		end_index   := int(contours[contour_index]) + 1
-		prev_point  : Vec2
-		first_point : Vec2
-
-		for idx := start_index; idx < end_index; idx += 1
-		{
-			current_pos := Vec2 { f32( points[idx].x ), f32( points[idx].y ) }
-			if ( tags[idx] & 1 ) == 0
-			{
-				// If current point is off-curve
-				if (idx == start_index || (tags[ idx - 1 ] & 1) != 0)
-				{
-					// current is the first or following an on-curve point
-					prev_point = current_pos
-				}
-				else
-				{
-					// current and previous are off-curve, calculate midpoint
-					midpoint := (prev_point + current_pos) * 0.5
-					append( path, Vertex { pos = midpoint } )  // Add midpoint as on-curve point
-					if idx < end_index - 1
-					{
-						// perform interp from prev_point to current_pos via midpoint
-						step := 1.0 / entry.curve_quality
-						for alpha : f32 = 0.0; alpha <= 1.0; alpha += step
-						{
-							bezier_point := eval_point_on_bezier3( prev_point, midpoint, current_pos, alpha )
-							append( path, Vertex{ pos = bezier_point } )
-						}
-					}
-
-					prev_point = current_pos
-				}
-			}
-			else
-			{
-				if idx == start_index {
-					first_point = current_pos
-				}
-				if prev_point != (Vec2{}) {
-					// there was an off-curve point before this
-					append(path, Vertex{ pos = prev_point}) // Ensure previous off-curve is handled
-				}
-				append(path, Vertex{ pos = current_pos})
-				prev_point = {}
-			}
-		}
-
-		// ensure the contour is closed
-		if path[0].pos != path[ len(path) - 1 ].pos {
-			append(path, Vertex{pos = path[0].pos})
-		}
-		draw_filled_path(&ctx.draw_list, bounds_0, path[:], scale, translate)
-		// draw_filled_path(&ctx.draw_list, bounds_0, path[:], scale, translate, ctx.debug_print_verbose)
-		clear(path)
-		start_index = end_index
-	}
-
-	if len(path) > 0 {
-		// draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose)
-		draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate)
-	}
-
-	draw.end_index = cast(u32) len(ctx.draw_list.indices)
-	if draw.end_index > draw.start_index {
-		append( & ctx.draw_list.calls, draw)
-	}
-
-	return true
-}
-
-}
--- a/code/font/vefontcache/misc.odin
+++ b/code/font/vefontcache/misc.odin
@@ -5,9 +5,7 @@ package vefontcache
 	Just a bunch of utilities.
 */

-import "base:runtime"
 import "core:simd"
-import "core:math"

 import core_log "core:log"

@@ -16,17 +14,17 @@ peek_array :: #force_inline proc "contextless" ( self : [dynamic]$Type ) -> Type
 }

 reload_array :: #force_inline proc( self : ^[dynamic]$Type, allocator : Allocator ) {
-	raw          := transmute( ^runtime.Raw_Dynamic_Array) self
+	raw          := transmute( ^Raw_Dynamic_Array) self
 	raw.allocator = allocator
 }

 reload_array_soa :: #force_inline proc( self : ^#soa[dynamic]$Type, allocator : Allocator ) {
-	raw          := runtime.raw_soa_footer(self)
+	raw          := raw_soa_footer(self)
 	raw.allocator = allocator
 }

 reload_map :: #force_inline proc( self : ^map [$KeyType] $EntryType, allocator : Allocator ) {
-	raw          := transmute( ^runtime.Raw_Map) self
+	raw          := transmute( ^Raw_Map) self
 	raw.allocator = allocator
 }

@@ -61,7 +59,7 @@ vec2i_from_vec2   :: #force_inline proc "contextless" ( v2     : Vec2  ) -> Vec2
@(require_results) ceil_vec2  :: proc "contextless" ( v : Vec2 ) -> Vec2 { return { ceil_f32(v.x), ceil_f32(v.y) } }
@(require_results) floor_vec2 :: proc "contextless" ( v : Vec2 ) -> Vec2 { return { floor_f32(v.x), floor_f32(v.y) } }

-// This buffer is used below excluisvely to prevent any allocator recusion when verbose logging from allocators.
+// This buffer is used below excluisvely to prevent any allocator recursion when verbose logging from allocators.
 // This means a single line is limited to 4k buffer
 // Logger_Allocator_Buffer : [4 * Kilobyte]u8

--- a/code/font/vefontcache/parser.odin
+++ b/code/font/vefontcache/parser.odin
@@ -2,10 +2,10 @@ package vefontcache

 /*
 Notes:
-This is a minimal wrapper I originally did incase something than stb_truetype is introduced in the future.
+This is a minimal wrapper I originally did incase a font parser other than stb_truetype is introduced in the future.
 Otherwise, its essentially 1:1 with it.

-Freetype isn't really supported and its not a high priority (pretty sure its too slow).
+Freetype isn't really supported and its not a high priority.
 ~~Freetype will do memory allocations and has an interface the user can implement.~~
 ~~That interface is not exposed from this parser but could be added to parser_init.~~

@@ -15,11 +15,8 @@ TODO(Ed): Just keep a local version of stb_truetype and modify it to support a s
 Already wanted to do so anyway to evaluate the shape generation implementation.
 */

-import "base:runtime"
 import "core:c"
-import "core:math"
-import "core:slice"
-import stbtt    "vendor:stb/truetype"
+import stbtt    "thirdparty:stb/truetype"
 // import freetype "thirdparty:freetype"

 Parser_Kind :: enum u32 {
@@ -31,7 +28,7 @@ Parser_Font_Info :: struct {
 	label : string,
 	kind  : Parser_Kind,
 	using _ : struct #raw_union {
-		stbtt_info    : stbtt.fontinfo,
+		stbtt_info : stbtt.fontinfo,
 		// freetype_info : freetype.Face
 	},
 	data : []byte,
@@ -57,13 +54,46 @@ Parser_Glyph_Vertex :: struct {
 Parser_Glyph_Shape :: [dynamic]Parser_Glyph_Vertex

 Parser_Context :: struct {
-	kind       : Parser_Kind,
+	lib_backing : Allocator,
+	kind        : Parser_Kind,
 	// ft_library : freetype.Library,
 }

-parser_init :: proc( ctx : ^Parser_Context, kind : Parser_Kind )
+parser_stbtt_allocator_proc :: proc(
+	allocator_data : rawptr, 
+	type           : stbtt.gbAllocationType, 
+	size           : c.ssize_t, 
+	alignment      : c.ssize_t, 
+	old_memory     : rawptr, 
+	old_size       : c.ssize_t, 
+	flags          : c.ulonglong
+) -> rawptr
 {
-	ctx.kind = kind
+	allocator := transmute(^Allocator) allocator_data
+	result, error := allocator.procedure( allocator.data, cast(Allocator_Mode) type, cast(int) size, cast(int) alignment, old_memory, cast(int) old_size )
+	assert(error == .None)
+
+	if type == .Alloc || type == .Resize {
+		raw := transmute(Raw_Slice) result
+		// assert(raw.len > 0, "Allocation is 0 bytes?")
+		return transmute(rawptr) raw.data
+	}
+	else do return nil
+}
+
+parser_init :: proc( ctx : ^Parser_Context, kind : Parser_Kind, allocator := context.allocator )
+{
+	ctx.kind        = kind
+	ctx.lib_backing = allocator
+
+	stbtt_allocator := stbtt.gbAllocator { parser_stbtt_allocator_proc, & ctx.lib_backing }
+	stbtt.SetAllocator( stbtt_allocator )
+}
+
+parser_reload :: proc( ctx : ^Parser_Context, allocator := context.allocator) {
+	ctx.lib_backing = allocator
+	stbtt_allocator := stbtt.gbAllocator { parser_stbtt_allocator_proc, & ctx.lib_backing }
+	stbtt.SetAllocator( stbtt_allocator )
 }

 parser_shutdown :: proc( ctx : ^Parser_Context ) {
@@ -94,7 +124,9 @@ parser_find_glyph_index :: #force_inline proc "contextless" ( font : Parser_Font

 parser_free_shape :: #force_inline proc( font : Parser_Font_Info, shape : Parser_Glyph_Shape )
 {
-	stbtt.FreeShape( font.stbtt_info, transmute( [^]stbtt.vertex) raw_data(shape) )
+	shape     := shape
+	shape_raw := transmute( ^Raw_Dynamic_Array) & shape
+	stbtt.FreeShape( font.stbtt_info, transmute( [^]stbtt.vertex) shape_raw.data )
 }

 parser_get_codepoint_horizontal_metrics :: #force_inline proc "contextless" ( font : Parser_Font_Info, codepoint : rune ) -> ( advance, to_left_side_glyph : i32 )
@@ -134,11 +166,11 @@ parser_get_glyph_shape :: #force_inline proc ( font : Parser_Font_Info, glyph_in
 	stb_shape : [^]stbtt.vertex
 	nverts    := stbtt.GetGlyphShape( font.stbtt_info, cast(i32) glyph_index, & stb_shape )

-	shape_raw          := transmute( ^runtime.Raw_Dynamic_Array) & shape
+	shape_raw          := transmute( ^Raw_Dynamic_Array) & shape
 	shape_raw.data      = stb_shape
 	shape_raw.len       = int(nverts)
 	shape_raw.cap       = int(nverts)
-	shape_raw.allocator = runtime.nil_allocator()
+	shape_raw.allocator = nil_allocator()
 	error = Allocator_Error.None
 	return
 }
@@ -151,7 +183,7 @@ parser_is_glyph_empty :: #force_inline proc "contextless" ( font : Parser_Font_I
 parser_scale :: #force_inline proc "contextless" ( font : Parser_Font_Info, size : f32 ) -> f32
 {
 	// profile(#procedure)
-	size_scale := size > 0.0 ? parser_scale_for_pixel_height( font, size ) : parser_scale_for_mapping_em_to_pixels( font, -size )
+	size_scale := size > 0.0 ? parser_scale_for_mapping_em_to_pixels( font, size ) :  parser_scale_for_pixel_height( font, -size )
 	return size_scale
 }

--- a/code/font/vefontcache/pkg_mapping.odin
+++ b/code/font/vefontcache/pkg_mapping.odin
@@ -3,6 +3,11 @@ package vefontcache
 import "base:builtin"
 	resize_soa_non_zero :: non_zero_resize_soa
 import "base:runtime"
+	Raw_Dynamic_Array :: runtime.Raw_Dynamic_Array
+	Raw_Map           :: runtime.Raw_Map
+	Raw_Slice         :: runtime.Raw_Slice
+	raw_soa_footer    :: runtime.raw_soa_footer
+	nil_allocator     :: runtime.nil_allocator
 import "core:hash"
 	ginger16 :: hash.ginger16
 import "core:math"
@@ -32,6 +37,7 @@ import "core:mem"

 	Allocator       :: mem.Allocator
 	Allocator_Error :: mem.Allocator_Error
+	Allocator_Mode  :: mem.Allocator_Mode

 	Arena           :: mem.Arena
 	arena_allocator :: mem.arena_allocator
@@ -48,7 +54,7 @@ append :: proc {
 }

 append_soa :: proc {
-	append_soa_elem
+	append_soa_elem,
 }

 ceil :: proc {
@@ -119,6 +125,10 @@ resize :: proc {
 	builtin.resize_dynamic_array,
 }

+round :: proc {
+	math.round_f32,
+}
+
 size :: proc {
 	size_range2,
 }
@@ -136,22 +146,4 @@ vec2_64 :: proc {
 	vec2_64_from_vec2,
 }

-import "../../grime"
-
-@(deferred_none = profile_end, disabled = DISABLE_PROFILING)
-profile :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) {
-	grime.profile_begin(name, loc)
-}
-
-@(disabled = DISABLE_PROFILING)
-profile_begin :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) {
-	grime.profile_begin(name, loc)
-}
-
-@(disabled = DISABLE_PROFILING)
-profile_end :: #force_inline proc "contextless" () {
-	grime.profile_end()
-}
-
 //#endregion("Proc overload mappings")
-
--- a/code/font/vefontcache/profiling.odin
+++ b/code/font/vefontcache/profiling.odin
@@ -0,0 +1,17 @@
+package vefontcache
+
+// Add profiling hookup here
+
+// import ""
+
+@(deferred_none = profile_end, disabled = DISABLE_PROFILING)
+profile :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) {
+}
+
+@(disabled = DISABLE_PROFILING)
+profile_begin :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) {
+}
+
+@(disabled = DISABLE_PROFILING)
+profile_end :: #force_inline proc "contextless" () {
+}
--- a/code/font/vefontcache/shaper.odin
+++ b/code/font/vefontcache/shaper.odin
@@ -1,6 +1,8 @@
 package vefontcache
 /*
-Note(Ed): The only reason I didn't directly use harfbuzz is because hamza exists and seems to be under active development as an alternative.
+Note(Ed): The only reason I didn't directly use harfbuzz is:
+https://github.com/saidwho12/hamza 
+and seems to be under active development as an alternative.
 */

 import "core:c"
@@ -13,26 +15,26 @@ Shape_Key :: u32
 	Traditionally a shape only refers to resolving which glyph and 
 	its position should be used for rendering.

-	For this library's case it also involes keeping any content 
-	that does not have to be resolved once again in the later stage of processing:
-		* Resolve atlas lru codes
-		* Resolve glyph bounds and scale
-		* Resolve atlas region the glyph is associated with.
+	For this library's case it also resolves any content that does not have to be done 
+	on a per-frame basis for draw list generation:
+		* atlas lru codes
+		* glyph bounds and scale
+		* atlas region the glyph is associated with.

 	Ideally the user should resolve this shape once and cache/store it on their side.
-	They have the best ability to avoid costly lookups to streamline 
-	a hot path to only focusing on draw list generation that must be computed every frame.
+	They have the best ability to avoid costly lookups.
 */
 Shaped_Text :: struct #packed {
-	glyph              : [dynamic]Glyph,
-	position           : [dynamic]Vec2,
-	atlas_lru_code     : [dynamic]Atlas_Key,
-	region_kind        : [dynamic]Atlas_Region_Kind,
-	bounds             : [dynamic]Range2,
-	end_cursor_pos     : Vec2,
-	size               : Vec2,
-	font_id            : Font_ID, 
-	// TODO(Ed): We need to track the font here for usage in user interface when directly drawing the shape.
+	glyph          : [dynamic]Glyph,
+	position       : [dynamic]Vec2,
+	visible        : [dynamic]i32,
+	atlas_lru_code : [dynamic]Atlas_Key,
+	region_kind    : [dynamic]Atlas_Region_Kind,
+	bounds         : [dynamic]Range2,
+	end_cursor_pos : Vec2,
+	size           : Vec2,
+	font           : Font_ID, 
+	px_size        : f32,
 }

 // Ease of use cache, can handle thousands of lookups per frame with ease.
@@ -47,6 +49,7 @@ Shaped_Text_Cache :: struct {
 Shaper_Shape_Text_Uncached_Proc :: #type proc( ctx : ^Shaper_Context,
 	atlas             : Atlas, 
 	glyph_buffer_size : Vec2,
+	font              : Font_ID,
 	entry             : Entry, 
 	font_px_Size      : f32, 
 	font_scale        : f32, 
@@ -81,7 +84,7 @@ shaper_init :: proc( ctx : ^Shaper_Context )
 	assert( ctx.hb_buffer != nil, "VEFontCache.shaper_init: Failed to create harfbuzz buffer")
 }

-shaper_shutdown :: proc( ctx : ^Shaper_Context )
+shaper_shutdown :: proc( ctx : ^Shaper_Context ) 
 {
 	if ctx.hb_buffer != nil {
 		harfbuzz.buffer_destroy( ctx.hb_buffer )
@@ -98,17 +101,33 @@ shaper_load_font :: #force_inline proc( ctx : ^Shaper_Context, label : string, d

 shaper_unload_font :: #force_inline proc( info : ^Shaper_Info )
 {
-	if info.blob != nil do harfbuzz.font_destroy( info.font )
+	if info.font != nil do harfbuzz.font_destroy( info.font )
 	if info.face != nil do harfbuzz.face_destroy( info.face )
 	if info.blob != nil do harfbuzz.blob_destroy( info.blob )
 }

+// TODO(Ed): Allow the user to override snap_glyph_position of the shaper context on a per-call basis (as a param)
 // Recommended shaper. Very performant.
 // TODO(Ed): Would be nice to properly support vertical shaping, right now its strictly just horizontal...
@(optimization_mode="favor_size")
-shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry : Entry, font_px_Size, font_scale : f32, output :^Shaped_Text )
+shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, 
+	atlas             : Atlas, 
+	glyph_buffer_size : Vec2,
+	font              : Font_ID,
+	entry             : Entry, 
+	font_px_size      : f32, 
+	font_scale        : f32, 
+	text_utf8         : string, 
+	output            : ^Shaped_Text
+)
 {
 	profile(#procedure)
+	assert( ctx != nil )
+
+	clear( & output.glyph )
+	clear( & output.position )
+	clear( & output.visible )
+
 	current_script := harfbuzz.Script.UNKNOWN
 	hb_ucfunc      := harfbuzz.unicode_funcs_get_default()
 	harfbuzz.buffer_clear_contents( ctx.hb_buffer )
@@ -141,11 +160,9 @@ shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry
 	)
 	{
 		profile(#procedure)
-		// Set script and direction. We use the system's default langauge.
-		// script = HB_SCRIPT_LATIN
-		harfbuzz.buffer_set_script( buffer, script )
+		harfbuzz.buffer_set_script   ( buffer, script )
 		harfbuzz.buffer_set_direction( buffer, harfbuzz.script_get_horizontal_direction( script ))
-		harfbuzz.buffer_set_language( buffer, harfbuzz.language_get_default() )
+		harfbuzz.buffer_set_language ( buffer, harfbuzz.language_get_default() )

 		// Perform the actual shaping of this run using HarfBuzz.
 		harfbuzz.buffer_set_content_type( buffer, harfbuzz.Buffer_Content_Type.UNICODE )
@@ -158,24 +175,26 @@ shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry

 		line_height := (entry.ascent - entry.descent + entry.line_gap) * font_scale

+		last_cluster := u32(0)
 		for index : i32; index < i32(glyph_count); index += 1
 		{
-			hb_glyph     := glyph_infos[ index ]
+			hb_glyph     := glyph_infos    [ index ]
 			hb_gposition := glyph_positions[ index ]
-			glyph     := cast(Glyph) hb_glyph.codepoint
+			glyph        := cast(Glyph) hb_glyph.codepoint

 			if hb_glyph.cluster > 0
 			{
-				(max_line_width^)     = max( max_line_width^, position.x )
-				position.x            = 0.0
-				position.y           -= line_height
-				position.y            = floor(position.y)
-				(line_count^)         += 1
+				(max_line_width^) = max( max_line_width^, position.x )
+				position.x        = 0.0
+				position.y       -= line_height
+				position.y        = floor(position.y)
+				(line_count^)    += 1
+
+				last_cluster = hb_glyph.cluster
 				continue
 			}
-			if abs( font_px_size ) <= adv_snap_small_font_threshold
-			{
-				(position^) =  ceil( position^ )
+			if abs( font_px_size ) <= adv_snap_small_font_threshold {
+				(position^) = ceil( position^ )
 			}

 			glyph_pos := position^
@@ -193,10 +212,15 @@ shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry
 			(position^)      += advance
 			(max_line_width^) = max(max_line_width^, position.x)

-			is_empty := parser_is_glyph_empty(entry.parser_info, glyph)
-			if ! is_empty {
-				append( & output.glyph, glyph )
-				append( & output.position, glyph_pos)
+			// We track all glyphs so that user can use the shape for navigation purposes.
+			append( & output.glyph, glyph )
+			append( & output.position, glyph_pos)
+
+			// We don't accept all glyphs for rendering, harfbuzz preserves positions of non-visible codepoints (as .notdef glyphs)
+			// We also double check to make sure the glyph isn't detected for drawing by the parser.
+			visible_glyph := glyph != 0 && ! parser_is_glyph_empty(entry.parser_info, glyph)
+			if visible_glyph {
+				append( & output.visible, cast(i32) len(output.glyph) - 1 )
 			}
 		}

@@ -217,14 +241,23 @@ shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry
 		// Can we continue the current run?
 		ScriptKind :: harfbuzz.Script

-		special_script : b32 = script == ScriptKind.UNKNOWN || script == ScriptKind.INHERITED || script == ScriptKind.COMMON
-		if special_script || script == current_script || byte_offset == 0 {
+		// These scripts don't break runs because they don't represent script transitions - they adapt to their context. 
+		// Maintaining the current shaping run for these scripts ensures correct processing of marks, numbers, 
+		// and punctuation within the primary text flow.
+		is_neutral_script := script == ScriptKind.UNKNOWN || script == ScriptKind.INHERITED || script == ScriptKind.COMMON
+
+		// Essentially if the script is neutral, or the same as current, 
+		// or this is the first codepoint: add it to the buffer and continue the loop.
+		if is_neutral_script             \
+		|| script      == current_script \
+		|| byte_offset == 0 
+		{
 			harfbuzz.buffer_add( ctx.hb_buffer, hb_codepoint, codepoint == '\n' ? 1 : 0 )
-			current_script = special_script ? current_script : script
+			current_script = is_neutral_script ? current_script : script
 			continue
 		}

-		// End current run since we've encountered a script change.
+		// End current run since we've encountred a significant script change.
 		shape_run( output,
 			entry, 
 			ctx.hb_buffer, 
@@ -232,7 +265,7 @@ shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry
 			& position, 
 			& max_line_width, 
 			& line_count, 
-			font_px_Size, 
+			font_px_size, 
 			font_scale, 
 			ctx.snap_glyph_position, 
 			ctx.adv_snap_small_font_threshold
@@ -249,7 +282,7 @@ shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry
 		& position, 
 		& max_line_width, 
 		& line_count, 
-		font_px_Size, 
+		font_px_size, 
 		font_scale, 
 		ctx.snap_glyph_position, 
 		ctx.adv_snap_small_font_threshold
@@ -258,55 +291,43 @@ shaper_shape_harfbuzz :: proc( ctx : ^Shaper_Context, text_utf8 : string, entry
 	// Set the final size
 	output.size.x = max_line_width
 	output.size.y = f32(line_count) * line_height
-	return
-}

-shaper_shape_text_uncached_advanced :: #force_inline proc( ctx : ^Shaper_Context, 
-	atlas             : Atlas, 
-	glyph_buffer_size : Vec2,
-	entry             : Entry, 
-	font_px_size      : f32, 
-	font_scale        : f32, 
-	text_utf8         : string, 
-	output            : ^Shaped_Text
-)
-{
-	profile(#procedure)
-	assert( ctx != nil )
-
-	clear( & output.glyph )
-	clear( & output.position )
-
-	shaper_shape_harfbuzz( ctx, text_utf8, entry, font_px_size, font_scale, output )
-	
 	// Resolve each glyphs: bounds, atlas lru, and the atlas region as we have everything we need now.

-	resize( & output.atlas_lru_code, len(output.glyph) )
-	resize( & output.region_kind,    len(output.glyph) )
-	resize( & output.bounds,         len(output.glyph) )
+	resize( & output.atlas_lru_code, len(output.visible) )
+	resize( & output.region_kind,    len(output.visible) )
+	resize( & output.bounds,         len(output.visible) )

 	profile_begin("atlas_lru_code")
-	for id, index in output.glyph
-	{
-		output.atlas_lru_code[index] = atlas_glyph_lru_code(entry.id, font_px_size, id)
+	for vis_id, index in output.visible {
+		glyph_id                    := output.glyph[vis_id]
+		output.atlas_lru_code[index] = atlas_glyph_lru_code(entry.id, font_px_size, glyph_id)
+		// atlas_lru_code is 1:1 with visible index
 	}
 	profile_end()

 	profile_begin("bounds & region")
-	for id, index in output.glyph
-	{
+	for vis_id, index in output.visible {
+		glyph_id                 := output.glyph[vis_id]
 		bounds                   := & output.bounds[index]
-		(bounds ^)                = parser_get_bounds( entry.parser_info, id )
+		(bounds ^)                = parser_get_bounds( entry.parser_info, glyph_id )
 		bounds_size_scaled       := (bounds.p1 - bounds.p0) * font_scale
 		output.region_kind[index] = atlas_decide_region( atlas, glyph_buffer_size, bounds_size_scaled )
+		// bounds & region_kind are 1:1 with visible index
 	}
 	profile_end()
+
+	output.font    = font
+	output.px_size = font_px_size
+	return
 }

+// TODO(Ed): Allow the user to override snap_glyph_position of the shaper context on a per-call basis (as an param)
 // Basic western alphabet based shaping. Not that much faster than harfbuzz if at all.
 shaper_shape_text_latin :: proc( ctx : ^Shaper_Context,
 	atlas             : Atlas, 
 	glyph_buffer_size : Vec2,
+	font              : Font_ID,
 	entry             : Entry, 
 	font_px_size      : f32, 
 	font_scale        : f32, 
@@ -319,6 +340,7 @@ shaper_shape_text_latin :: proc( ctx : ^Shaper_Context,

 	clear( & output.glyph )
 	clear( & output.position )
+	clear( & output.visible )

 	line_height := (entry.ascent - entry.descent + entry.line_gap) * font_scale

@@ -349,13 +371,16 @@ shaper_shape_text_latin :: proc( ctx : ^Shaper_Context,

 		glyph_index    := parser_find_glyph_index( entry.parser_info, codepoint )
 		is_glyph_empty := parser_is_glyph_empty( entry.parser_info, glyph_index )
-		if ! is_glyph_empty
-		{
-			append( & output.glyph, glyph_index)
-			append( & output.position, Vec2 {
-				ceil(position.x),
-				ceil(position.y)
-			})
+
+		if ctx.snap_glyph_position {
+			position.x = ceil(position.x)
+			position.y = ceil(position.y)
+		}
+		append( & output.glyph, glyph_index)
+		append( & output.position, position)
+
+		if ! is_glyph_empty {
+			append( & output.visible, cast(i32) len(output.glyph) - 1 )
 		}

 		advance, _ := parser_get_codepoint_horizontal_metrics( entry.parser_info, codepoint )
@@ -376,27 +401,32 @@ shaper_shape_text_latin :: proc( ctx : ^Shaper_Context,
 	resize( & output.bounds,         len(output.glyph) )

 	profile_begin("atlas_lru_code")
-	for id, index in output.glyph
-	{
-		output.atlas_lru_code[index] = atlas_glyph_lru_code(entry.id, font_px_size, id)
+	for vis_id, index in output.visible {
+		glyph_id                    := output.glyph[vis_id]
+		output.atlas_lru_code[index] = atlas_glyph_lru_code(entry.id, font_px_size, glyph_id)
+		// atlas_lru_code is 1:1 with visible index
 	}
 	profile_end()

 	profile_begin("bounds & region")
-	for id, index in output.glyph
-	{
+	for vis_id, index in output.visible {
+		glyph_id                 := output.glyph[vis_id]
 		bounds                   := & output.bounds[index]
-		(bounds ^)                = parser_get_bounds( entry.parser_info, id )
+		(bounds ^)                = parser_get_bounds( entry.parser_info, glyph_id )
 		bounds_size_scaled       := (bounds.p1 - bounds.p0) * font_scale
 		output.region_kind[index] = atlas_decide_region( atlas, glyph_buffer_size, bounds_size_scaled )
+		// bounds & region_kind are 1:1 with visible index
 	}
 	profile_end()
+
+	output.font    = font
+	output.px_size = font_px_size
 }

 // Shapes are tracked by the library's context using the shape cache 
 // and the key is resolved using the font, the desired pixel size, and the text bytes to be shaped.
-// Thus this procedures cost will be proporitonal to how muh text it has to sift through.
-// djb8_hash is used as its been pretty good for thousands of hashed lines that around 6-120 charactes long
+// Thus this procedures cost will be proporitonal to how much text it has to sift through.
+// djb8_hash is used as its been pretty good for thousands of hashed lines that around 6-250 charactes long
 // (and its very fast).
@(optimization_mode="favor_size")
 shaper_shape_text_cached :: proc( text_utf8 : string, 
@@ -428,12 +458,12 @@ shaper_shape_text_cached :: proc( text_utf8 : string,
 	shape_cache_idx := lru_get( state, lru_code )
 	if shape_cache_idx == -1
 	{
-		if shape_cache.next_cache_id < i32(state.capacity) {
+		if shape_cache.next_cache_id < i32(state.capacity){
 			shape_cache_idx            = shape_cache.next_cache_id
 			shape_cache.next_cache_id += 1
 			evicted := lru_put( state, lru_code, shape_cache_idx )
 		}
-		else
+		else 
 		{
 			next_evict_idx := lru_get_next_evicted( state ^ )
 			assert( next_evict_idx != LRU_Fail_Mask_32 )
@@ -445,7 +475,7 @@ shaper_shape_text_cached :: proc( text_utf8 : string,
 		}

 		storage_entry := & shape_cache.storage[ shape_cache_idx ]
-		shape_text_uncached( ctx, atlas, glyph_buffer_size, entry, font_px_size, font_scale, text_utf8, storage_entry )
+		shape_text_uncached( ctx, atlas, glyph_buffer_size, font, entry, font_px_size, font_scale, text_utf8, storage_entry )

 		shaped_text = storage_entry ^
 		return
--- a/code/font/vefontcache/vefontcache.odin
+++ b/code/font/vefontcache/vefontcache.odin