From d454778fd6a2c6ba6c10bef076fda1ab60a55cff Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Fri, 28 Jun 2024 07:52:48 -0400
Subject: [PATCH] improvements while working Sectr Prototype

---
 LRU.odin                     |  57 +++--
 Readme.md                    |  32 ++-
 VEFontCache.odin             |  71 +++---
 atlas.odin                   | 152 +++++++-----
 docs/draw_text_codepaths.pur | Bin 0 -> 58941 bytes
 draw.odin                    | 432 +++++++++++++++++++----------------
 mappings.odin                |   8 +-
 misc.odin                    | 275 ++++++++++++++++------
 shaped_text.odin             |  75 +++---
 9 files changed, 668 insertions(+), 434 deletions(-)
 create mode 100644 docs/draw_text_codepaths.pur

diff --git a/LRU.odin b/LRU.odin
index db5d440..ea97a6b 100644
--- a/LRU.odin
+++ b/LRU.odin
@@ -28,11 +28,11 @@ PoolList :: struct {
 pool_list_init :: proc( pool : ^PoolList, capacity : u32, dbg_name : string = "" )
 {
 	error : AllocatorError
-	pool.items, error = make( [dynamic]PoolListItem, u64(capacity) )
+	pool.items, error = make( [dynamic]PoolListItem, int(capacity) )
 	assert( error == .None, "VEFontCache.pool_list_init : Failed to allocate items array")
 	resize( & pool.items, capacity )
 
-	pool.free_list, error = make( [dynamic]PoolListIter, u64(capacity) )
+	pool.free_list, error = make( [dynamic]PoolListIter, len = 0, cap = int(capacity) )
 	assert( error == .None, "VEFontCache.pool_list_init : Failed to allocate free_list array")
 	resize( & pool.free_list, capacity )
 
@@ -55,7 +55,7 @@ pool_list_init :: proc( pool : ^PoolList, capacity : u32, dbg_name : string = ""
 
 pool_list_free :: proc( pool : ^PoolList )
 {
- // TODO(Ed): Implement
+	// TODO(Ed): Implement
 }
 
 pool_list_reload :: proc( pool : ^PoolList, allocator : Allocator )
@@ -120,6 +120,23 @@ pool_list_erase :: proc( pool : ^PoolList, iter : PoolListIter )
 	}
 }
 
+pool_list_move_to_front :: #force_inline proc( pool : ^PoolList, iter : PoolListIter )
+{
+	using pool
+
+	if front == iter do return
+
+	item := & items[iter]
+	if item.prev != -1   do items[ item.prev ].next = item.next
+	if item.next != -1   do items[ item.next ].prev = item.prev
+	if back      == iter do back = item.prev
+
+	item.prev           = -1
+	item.next           = front
+	items[ front ].prev = iter
+	front               = iter
+}
+
 pool_list_peek_back :: #force_inline proc ( pool : ^PoolList ) -> PoolListValue {
 	assert( pool.back != - 1 )
 	value := pool.items[ pool.back ].value
@@ -160,7 +177,7 @@ LRU_init :: proc( cache : ^LRU_Cache, capacity : u32, dbg_name : string = "" ) {
 
 LRU_free :: proc( cache : ^LRU_Cache )
 {
- // TODO(Ed): Implement
+	// TODO(Ed): Implement
 }
 
 LRU_reload :: #force_inline proc( cache : ^LRU_Cache, allocator : Allocator )
@@ -180,13 +197,12 @@ LRU_find :: #force_inline proc "contextless" ( cache : ^LRU_Cache, key : u64, mu
 	return link, success
 }
 
-LRU_get :: #force_inline proc( cache : ^LRU_Cache, key : u64 ) -> i32 {
-	iter, success := LRU_find( cache, key )
-	if success == false {
-		return -1
+LRU_get :: #force_inline proc( cache: ^LRU_Cache, key : u64 ) -> i32 {
+	if link, ok := &cache.table[ key ]; ok {
+			pool_list_move_to_front(&cache.key_queue, link.ptr)
+			return link.value
 	}
-	LRU_refresh( cache, key )
-	return iter.value
+	return -1
 }
 
 LRU_get_next_evicted :: #force_inline proc ( cache : ^LRU_Cache ) -> u64
@@ -206,26 +222,25 @@ LRU_peek :: #force_inline proc ( cache : ^LRU_Cache, key : u64, must_find := fal
 	return iter.value
 }
 
-LRU_put :: #force_inline proc ( cache : ^LRU_Cache, key : u64,  value : i32 ) -> u64
+LRU_put :: #force_inline proc( cache : ^LRU_Cache, key : u64, value : i32 ) -> u64
 {
-	iter, success := cache.table[key]
-	if success {
-		LRU_refresh( cache, key )
-		iter.value = value
+	if link, ok := & cache.table[ key ]; ok {
+		pool_list_move_to_front( & cache.key_queue, link.ptr )
+		link.value = value
 		return key
 	}
 
 	evict := key
 	if cache.key_queue.size >= cache.capacity {
-		evict = pool_list_pop_back( & cache.key_queue )
-		delete_key( & cache.table, evict )
+		evict = pool_list_pop_back(&cache.key_queue)
+		delete_key(&cache.table, evict)
 		cache.num -= 1
 	}
 
-	pool_list_push_front( & cache.key_queue, key )
-	cache.table[key] = LRU_Link {
-		value = value,
-		ptr   = cache.key_queue.front
+	pool_list_push_front(&cache.key_queue, key)
+	cache.table[key] = LRU_Link{
+			value = value,
+			ptr   = cache.key_queue.front,
 	}
 	cache.num += 1
 	return evict
diff --git a/Readme.md b/Readme.md
index 0e9f370..f6690a9 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,29 +1,51 @@
 # VE Font Cache : Odin Port
 
-This is a port of the library base on the [original](https://github.com/hypernewbie/VEFontCache).
+This is a port of the library based on [fork](https://github.com/hypernewbie/VEFontCache)
 
 Its original purpose was for use in game engines, however its rendeirng quality and performance is more than adequate for many other applications.
 
 See: [docs/Readme.md](docs/Readme.md) for the library's interface
 
-TODO (Making it a more idiomatic library):
+## TODOs
+
+### (Making it a more idiomatic library):
 
 * Setup freetype, harfbuzz, depedency management within the library
 
-TODO Documentation:
+### Documentation:
 
 * Pureref outline of draw_text exectuion
 * Markdown general documentation
 
-TODO Content:
+### Content:
 
 * Port over the original demo utilizing sokol libraries instead
 * Provide a sokol_gfx backend package
 
-TODO Additional Features:
+### Additional Features:
 
 * Support for freetype
 * Support for harfbuzz
 * Ability to set a draw transform, viewport and projection
   * By default the library's position is in unsigned normalized render space
 * Allow curve_quality to be set on a per-font basis
+
+### Optimization:
+
+* Look into setting up multi-threading by giving each thread a context
+  * There is a heavy performance bottleneck in iterating the text/shape/glyphs on the cpu (single-thread) vs the actual rendering
+  * draw_text can provide in the context a job list per thread for the user to thenk hookup to their own threading solution to handle.
+  * Context would need to be segregated into staged data structures for each thread to utilize
+    * Each should have their own?
+      * draw_list
+      * draw_layer
+      * atlas.next_idx
+      * glyph_draw_buffer
+      * shape_cache
+    * This would need to converge to the singlar draw_list on a per layer basis (then user reqeusts a draw_list layer there could a yield to wait for the jobs to finish); if the interface expects the user to issue the commands single-threaded unless, we just assume the user is going to feed the gpu the commands & data through separate threads as well (not ideal ux).
+
+Failed Attempts:
+
+* Attempted to chunk the text to more granular 'shapes' from `draw_list` before doing the actual call to `draw_text_shape`. This lead to a larger performance cost due to the additional iteration across the text string.
+* Attempted to cache the shape draw_list for future calls. Led to larger performance cost due to additional iteration in the `merge_draw_list`. 
+  * The shapes glyphs must still be traversed to identify if the glyph is cached. This arguably could be handled in `shape_text_uncached`, however that would require a significan't amount of refactoring to identify... (and would be more unergonomic when shapers libs are processing the text)
diff --git a/VEFontCache.odin b/VEFontCache.odin
index 2a8c710..7dba58f 100644
--- a/VEFontCache.odin
+++ b/VEFontCache.odin
@@ -44,7 +44,7 @@ Context :: struct {
 
 	entries : [dynamic]Entry,
 
-	temp_path               : [dynamic]Vec2,
+	temp_path               : [dynamic]Vertex,
 	temp_codepoint_seen     : map[u64]bool,
 	temp_codepoint_seen_num : u32,
 
@@ -133,8 +133,8 @@ InitShapeCacheParams :: struct {
 }
 
 InitShapeCacheParams_Default :: InitShapeCacheParams {
-	capacity       = 1024,
-	reserve_length = 1024,
+	capacity       = 2048,
+	reserve_length = 2048,
 }
 
 // ve_fontcache_init
@@ -145,8 +145,8 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind,
 	shape_cache_params          := InitShapeCacheParams_Default,
 	curve_quality               : u32 = 3,
 	entires_reserve             : u32 = 512,
-	temp_path_reserve           : u32 = 512,
-	temp_codepoint_seen_reserve : u32 = 512,
+	temp_path_reserve           : u32 = 1024,
+	temp_codepoint_seen_reserve : u32 = 2048,
 )
 {
 	assert( ctx != nil, "Must provide a valid context" )
@@ -161,25 +161,26 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind,
 	ctx.curve_quality = curve_quality
 
 	error : AllocatorError
-	entries, error = make( [dynamic]Entry, entires_reserve )
+	entries, error = make( [dynamic]Entry, len = 0, cap = entires_reserve )
 	assert(error == .None, "VEFontCache.init : Failed to allocate entries")
 
-	temp_path, error = make( [dynamic]Vec2, temp_path_reserve )
+	temp_path, error = make( [dynamic]Vertex, len = 0, cap = temp_path_reserve )
 	assert(error == .None, "VEFontCache.init : Failed to allocate temp_path")
 
 	temp_codepoint_seen, error = make( map[u64]bool, uint(temp_codepoint_seen_reserve) )
 	assert(error == .None, "VEFontCache.init : Failed to allocate temp_path")
 
-	draw_list.vertices, error = make( [dynamic]Vertex, 4 * Kilobyte )
+	draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = 4 * Kilobyte )
 	assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.vertices")
 
-	draw_list.indices, error = make( [dynamic]u32, 8 * Kilobyte )
+	draw_list.indices, error = make( [dynamic]u32, len = 0, cap = 8 * Kilobyte )
 	assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.indices")
 
-	draw_list.calls, error = make( [dynamic]DrawCall, 512 )
+	draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = 512 )
 	assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.calls")
 
-	init_atlas_region :: proc( region : ^AtlasRegion, params : InitAtlasParams, region_params : InitAtlasRegionParams, factor : Vec2i, expected_cap : i32 ) {
+	init_atlas_region :: proc( region : ^AtlasRegion, params : InitAtlasParams, region_params : InitAtlasRegionParams, factor : Vec2i, expected_cap : i32 )
+	{
 		using region
 
 		next_idx = 0;
@@ -225,11 +226,20 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind,
 	for idx : u32 = 0; idx < shape_cache_params.capacity; idx += 1 {
 		stroage_entry := & shape_cache.storage[idx]
 		using stroage_entry
-		glyphs, error = make( [dynamic]Glyph, shape_cache_params.reserve_length )
+		glyphs, error = make( [dynamic]Glyph, len = 0, cap = shape_cache_params.reserve_length )
 		assert( error == .None, "VEFontCache.init : Failed to allocate glyphs array for shape cache storage" )
 
-		positions, error = make( [dynamic]Vec2, shape_cache_params.reserve_length )
+		positions, error = make( [dynamic]Vec2, len = 0, cap = shape_cache_params.reserve_length )
 		assert( error == .None, "VEFontCache.init : Failed to allocate positions array for shape cache storage" )
+
+		draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = glyph_draw_params.buffer_batch * 2 )
+		assert( error == .None, "VEFontCache.init : Failed to allocate calls for draw_list" )
+
+		draw_list.indices, error = make( [dynamic]u32, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 6 )
+		assert( error == .None, "VEFontCache.init : Failed to allocate indices array for draw_list" )
+
+		draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 )
+		assert( error == .None, "VEFontCache.init : Failed to allocate vertices array for draw_list" )
 	}
 
 	// Note(From original author): We can actually go over VE_FONTCACHE_GLYPHDRAW_BUFFER_BATCH batches due to smart packing!
@@ -241,22 +251,22 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind,
 		height        = atlas.region_d.height * u32(over_sample.y)
 		draw_padding  = glyph_draw_params.draw_padding
 
-		draw_list.calls, error = make( [dynamic]DrawCall, cast(u64) glyph_draw_params.buffer_batch * 2 )
+		draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = glyph_draw_params.buffer_batch * 2 )
 		assert( error == .None, "VEFontCache.init : Failed to allocate calls for draw_list" )
 
-		draw_list.indices, error = make( [dynamic]u32, cast(u64) glyph_draw_params.buffer_batch * 2 * 6 )
+		draw_list.indices, error = make( [dynamic]u32, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 6 )
 		assert( error == .None, "VEFontCache.init : Failed to allocate indices array for draw_list" )
 
-		draw_list.vertices, error = make( [dynamic]Vertex, glyph_draw_params.buffer_batch * 2 * 4 )
+		draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 )
 		assert( error == .None, "VEFontCache.init : Failed to allocate vertices array for draw_list" )
 
-		clear_draw_list.calls, error = make( [dynamic]DrawCall, cast(u64) glyph_draw_params.buffer_batch * 2 )
+		clear_draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = glyph_draw_params.buffer_batch * 2 )
 		assert( error == .None, "VEFontCache.init : Failed to allocate calls for calls for clear_draw_list" )
 
-		clear_draw_list.indices, error = make( [dynamic]u32, cast(u64) glyph_draw_params.buffer_batch * 2 * 4 )
+		clear_draw_list.indices, error = make( [dynamic]u32, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 )
 		assert( error == .None, "VEFontCache.init : Failed to allocate calls for indices array for clear_draw_list" )
 
-		clear_draw_list.vertices, error = make( [dynamic]Vertex, glyph_draw_params.buffer_batch * 2 * 4 )
+		clear_draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 )
 		assert( error == .None, "VEFontCache.init : Failed to allocate vertices array for clear_draw_list" )
 	}
 
@@ -395,7 +405,7 @@ configure_snap :: #force_inline proc( ctx : ^Context, snap_width, snap_height :
 get_cursor_pos :: #force_inline proc "contextless" ( ctx : ^Context                  ) -> Vec2 { return ctx.cursor_pos }
 set_colour     :: #force_inline proc "contextless" ( ctx : ^Context, colour : Colour )         { ctx.colour = colour }
 
-draw_text :: proc( ctx : ^Context, font : FontID, text_utf8 : string, position : Vec2, scale : Vec2 ) -> b32
+draw_text :: proc( ctx : ^Context, font : FontID, text_utf8 : string, position, scale : Vec2 ) -> b32
 {
 	// profile(#procedure)
 	assert( ctx != nil )
@@ -471,24 +481,9 @@ measure_text_size :: proc( ctx : ^Context, font : FontID, text_utf8 : string ) -
 	assert( ctx != nil )
 	assert( font >= 0 && int(font) < len(ctx.entries) )
 
-	atlas   := ctx.atlas
-	entry   := & ctx.entries[ font ]
-	shaped  := shape_text_cached( ctx, font, text_utf8, entry )
-	padding := cast(f32) atlas.glyph_padding
-
-	for index : i32 = 0; index < i32(len(shaped.glyphs)); index += 1
-	{
-		glyph_index := shaped.glyphs[ index ]
-		if is_empty( ctx, entry, glyph_index ) do continue
-
-		bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index )
-		bounds_size := bounds_1 - bounds_0
-
-		glyph_size := Vec2 { f32(bounds_size.x), f32(bounds_size.y) } * entry.size_scale
-		measured.y = max(measured.y, glyph_size.y)
-	}
-	measured.x = shaped.end_cursor_pos.x
-	return measured
+	entry  := &ctx.entries[font]
+	shaped := shape_text_cached(ctx, font, text_utf8, entry)
+	return shaped.size
 }
 
 get_font_vertical_metrics :: #force_inline proc ( ctx : ^Context, font : FontID ) -> ( ascent, descent, line_gap : i32 )
diff --git a/atlas.odin b/atlas.odin
index 180d5f2..c20a0fb 100644
--- a/atlas.odin
+++ b/atlas.odin
@@ -86,68 +86,110 @@ atlas_bbox :: proc( atlas : ^Atlas, region : AtlasRegionKind, local_idx : i32 )
 	return
 }
 
-decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph
+// decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph
+// ) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2)
+// {
+// 	if parser_is_glyph_empty( & entry.parser_info, glyph_index ) {
+// 		region_kind = .None
+// 	}
+
+// 	bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index )
+// 	bounds_width  := f32(bounds_1.x - bounds_0.x)
+// 	bounds_height := f32(bounds_1.y - bounds_0.y)
+
+// 	atlas        := & ctx.atlas
+// 	glyph_buffer := & ctx.glyph_buffer
+
+// 	glyph_padding := f32(atlas.glyph_padding) * 2
+
+// 	bounds_width_scaled  := cast(u32) (bounds_width  * entry.size_scale + glyph_padding)
+// 	bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding)
+
+// 	if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height
+// 	{
+// 		// Region A for small glyphs. These are good for things such as punctuation.
+// 		region_kind = .A
+// 		region      = & atlas.region_a
+// 	}
+// 	else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height
+// 	{
+// 		// Region B for tall glyphs. These are good for things such as european alphabets.
+// 		region_kind = .B
+// 		region      = & atlas.region_b
+// 	}
+// 	else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height
+// 	{
+// 		// Region C for big glyphs. These are good for things such as asian typography.
+// 		region_kind = .C
+// 		region      = & atlas.region_c
+// 	}
+// 	else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height
+// 	{
+// 		// Region D for huge glyphs. These are good for things such as titles and 4k.
+// 		region_kind = .D
+// 		region      = & atlas.region_d
+// 	}
+// 	else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height
+// 	{
+// 		// Region 'E' for massive glyphs. These are rendered uncached and un-oversampled.
+// 		region_kind = .E
+// 		region      = nil
+// 		if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 {
+// 			over_sample = { 2.0, 2.0 }
+// 		}
+// 		else {
+// 			over_sample = { 1.0, 1.0 }
+// 		}
+// 		return
+// 	}
+// 	else {
+// 		region_kind = .None
+// 		return
+// 	}
+
+// 	over_sample = glyph_buffer.over_sample
+// 	assert(region != nil)
+// 	return
+// }
+
+decide_codepoint_region :: proc(ctx : ^Context, entry : ^Entry, glyph_index : Glyph
 ) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2)
 {
-	if parser_is_glyph_empty( & entry.parser_info, glyph_index ) {
-		region_kind = .None
+	if parser_is_glyph_empty(&entry.parser_info, glyph_index) {
+		return .None, nil, {}
 	}
 
-	bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index )
-	bounds_width  := f32(bounds_1.x - bounds_0.x)
-	bounds_height := f32(bounds_1.y - bounds_0.y)
+	bounds_0, bounds_1 := parser_get_glyph_box(&entry.parser_info, glyph_index)
+	bounds_width       := f32(bounds_1.x - bounds_0.x)
+	bounds_height      := f32(bounds_1.y - bounds_0.y)
 
-	atlas        := & ctx.atlas
-	glyph_buffer := & ctx.glyph_buffer
+	atlas         := & ctx.atlas
+	glyph_buffer  := & ctx.glyph_buffer
+	glyph_padding := f32( atlas.glyph_padding ) * 2
 
-	glyph_padding := f32(atlas.glyph_padding) * 2
+	bounds_width_scaled  := u32(bounds_width  * entry.size_scale + glyph_padding)
+	bounds_height_scaled := u32(bounds_height * entry.size_scale + glyph_padding)
 
-	bounds_width_scaled  := cast(u32) (bounds_width  * entry.size_scale + glyph_padding)
-	bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding)
-
-	if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height
-	{
-		// Region A for small glyphs. These are good for things such as punctuation.
-		region_kind = .A
-		region      = & atlas.region_a
-	}
-	else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height
-	{
-		// Region B for tall glyphs. These are good for things such as european alphabets.
-		region_kind = .B
-		region      = & atlas.region_b
-	}
-	else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height
-	{
-		// Region C for big glyphs. These are good for things such as asian typography.
-		region_kind = .C
-		region      = & atlas.region_c
-	}
-	else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height
-	{
-		// Region D for huge glyphs. These are good for things such as titles and 4k.
-		region_kind = .D
-		region      = & atlas.region_d
-	}
-	else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height
-	{
-		// Region 'E' for massive glyphs. These are rendered uncached and un-oversampled.
-		region_kind = .E
-		region      = nil
-		if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 {
-			over_sample = { 2.0, 2.0 }
-		}
-		else {
-			over_sample = { 1.0, 1.0 }
-		}
-		return
-	}
-	else {
-		region_kind = .None
-		return
+	// Use a lookup table for faster region selection
+	region_lookup := [4]struct { kind: AtlasRegionKind, region: ^AtlasRegion } {
+			{ .A, & atlas.region_a },
+			{ .B, & atlas.region_b },
+			{ .C, & atlas.region_c },
+			{ .D, & atlas.region_d },
 	}
 
-	over_sample = glyph_buffer.over_sample
-	assert(region != nil)
-	return
+	for region in region_lookup do if bounds_width_scaled <= region.region.width && bounds_height_scaled <= region.region.height {
+		return region.kind, region.region, glyph_buffer.over_sample
+	}
+
+	if bounds_width_scaled  <= glyph_buffer.width \
+	&& bounds_height_scaled <= glyph_buffer.height {
+		over_sample = \
+			bounds_width_scaled  <= glyph_buffer.width  / 2 &&
+			bounds_height_scaled <= glyph_buffer.height / 2 ? \
+			  {2.0, 2.0} \
+			: {1.0, 1.0}
+		return .E, nil, over_sample
+	}
+	return .None, nil, {}
 }
diff --git a/docs/draw_text_codepaths.pur b/docs/draw_text_codepaths.pur
new file mode 100644
index 0000000000000000000000000000000000000000..801029cb8c7067835a2ac67f482eb4a0bf6a1761
GIT binary patch
literal 58941
zcmeHw2_RH!|Nj|d-<738CQBmwzKx--l!}riTusA_ov{pKDNI^~az&|#N+tUiWs7EP
zS=z3hQX~-)MOnuG%t-gT)qCIW``-8W`@R43Fms;ste@xee9!kR^ZlL&003q{1yBH#
zfiK|^EyhkyRRVLAz&!&%9sI+BaYry+6>tJS%77-I4aP9wp9UCn0P~%|TnF%j0n4d@
zC1`mnV7fX02s~cwTzn4jnrz##4S>M_00#a6i#@<r0M0-Qv~M^V7?~Mqft8t=iJ6U+
zot=%9jg6g?hm)Oyn}dywi;s(&hnJU+mz{If>Q%g}!8k9i5g4r|oRJl*$jiaT0jB<7
zSgZ#4SZSExFg}2R4+iIhE!F@c^aL0fzR&|_WMY6Zv#^1=db|J#8U|-zgflU*Fo8v2
z3;>wV$GmDa{{|N2z2@R-h5{14afN~^`!EHPsun(^GmlxXT)p=Kti2X&6#l!0!RicP
zWm>mNd@z6k24iGl1B)#63}fJfuiBvO!^o^^xHqmqO&#M)x>9)W@oE+G7mM8hCmgK7
z2j>G&0M*~%zdSgF6=f!fOa{)7_P^{-KN5vPdmkjaJMFdC=!<H*u|-(Mc?(nI+JwUR
zDbdEJh6Rh*N7YHIlPI59n9M|21(C?GeX!v-v*&rt(5=<M!PFb@Upqc1txu`S2!H=!
zVHZn!-pIkfvk*&;lb6OejexRO>C%F2Wm$aXHlk*-)zqWBoLpSe*8Z_(!Dd}HT4!e^
zT=MQmobo9ww~$ZhdAV(BkHI3~=j?x|DCL>`N$%i?jNpjFUKLVm?mp^kc(OWPywHt$
zSKv|j#^a`H`Tl#O_ZtG<2B0c&BAYB>7fx8VC9Qi7^Luz(Y{8(2Cehys49~Kr?$0YO
zY{U&$#w;K)&qg{bh8q`wW0weFCjI{DHV*H{!>rtb{lug?TJpQ?`>$Rkt=~5E(q>;J
z`ONh>UH(L@RqON8n%9~5-lv&OZ*ni*yBf7^tA|2kNwcF3kbxG!I~H)BsJrQLpkLJD
zItgx;t}l3Myg*e$Y*m?Vx}V%_h3oBJg_FvYC(^Hb%H4Cp=KQQ>JNNWZY;V^6ebt8s
z4{M+ARLn2O<<|Y=b}x6ncxR;dXtpe>Le}cG*Op&UfS#}$x6pf}Oh!*heTV;alJ)C+
zKb7HKaY2V}-#AwwlUK~nQqXEu`2hJ)B9+p+w)4QpjEA2Vfo!(ti0=fuw$LEdu|?oo
zbERfMYI?TrQ`?zK+USWQji-l+=PECqxWfpi8ZQE?P1_d|1=Vg4hOEmrN<2i>WIQQ%
zztx2_-WH7yNaKFIZChQ1v4?H;Jc^V9FSdJ^UZ6*nnmzgRn_+8nwHuF%^(2&26%-{*
zr|j>XtdI3YjgC{=2i(IPJu45mTixlTo~=kMj*8uHG;mI?#EKGo-e02g<l8yd(U$2n
zzB)gO$e{v#GY$FkxtfhXQ_nngn`YTpH2n10M(_GVMbV+FJMbidOQWMWmI6!BXrF5K
znmM?HX<S-{ec6GHsxdWnsxKaf^tc`<DsoA#tR5D9^{d(0R@(<j&u}C8J2|sQlv0|5
z*~4p)QP-`*4fWzie?nh#8Gfdg7Z`2EdLicFrp?zwtndQK#-XAmdG4fZb<fagU2U27
z1G2<)nyqaWyv<}w__}Mn50e%o)9g$;E^WHyX}spaA|USSlG|JNmcNI~bjb8$@%XJI
zQFmvD8(y25Yl9DO(_JOlJs19Vi)A>bLCUP^mg^hWH_r28t54SS`<<LfjlDOR;pXV3
zd}-uK&34vr2Eh;L6tfp$!>WqAN1cb?aL-s(4w+id%aOtq7J<5|*H4DWBoe5}jUiH0
zw^@7Bi!;JIy;|Ls{Zs`D0{k5J$mKI--U#v9RE0PzQKTt$OyDE}E>*O)yKegD2TE7G
zEE9{6FFPMJkJ@*hseffA{VHr9GBWb{uI6{Popz<Q4>Ua8o3m7A-EWl`7I+pWxpAIq
zT5nQmdQB`|f?Jz8h(CK<^R|9ZRqYFgRutK&i%|+CWxXneQrQ9#KE13gz3bbC-gN87
zsbxQKV!c)OI?&J8ju(?8r`5R7pC=JWiJKZ5Ivu#+=v3NWH>DaQNhz*<)ADnWA+kQj
z>|tNud`)~^4E=2S^%qBQ-b3k$s`nQXZ%nJc|8OEC>%dg{W~!h;Rbtox`n@Y*I<7Iy
z6o)*s2&f-cD7Z56tZ10;R@#l}hta2Q*Ov<^TrKXhFYbvA&`s%J-2Bnkce5ZMYW0R8
ze_XG#q4-H3pHAO39el{+i^&yXql<t=&k3g~dz||8P_BB4NtU~Hbe8E(2H65OPD3S`
z57O2j&=yWM@u>;L=Kk;9WNWXKyQVQ5IUa*!Jhv%D=cAm>$pf#P9QGfpOt0b2d-%HP
zuCRm1Zv6JHa_XbpL;<*^RaiQIkX6slXJG+aW9}w%@%2hd!w-7~#!O!wtJ#&RTtzx%
zc2s$2&>A13SUQiatQv&x%haJ{>r4@2@4nfcHzqft9M3ywWAE-6S=p<e9WLx4b<$m|
zv@^vfh8@{7!!rcyt@5UNBf~!jQd3#`vEB)m2aNhv!f#Ox)xq=libX&I_U7H)sreW-
z*}LWU)Kd(m^&d$t*T7qTdPx<c#;d%4Ru`I*|JL9A8DTJt+#odhcHW(ON+IQmw%KrV
zdLjB%-u1T0&6?(I8NluK{SB$^UK{JtIb_Q4rER&R>rN2|`)v~1hYAMX^(vG;QhL4!
zl$mt&PV(I5A0D<nSDX`c%ZxSILEAz6(5rpL{g3$sBI}X!Q~KxavYaz>=c7|E2M4s?
z8Q{;z>kN3ZIbZeYfTXfu_};l_?`t3DJt?fF$FJ2G&-PSN%csN?iEHY^MCx9qnU^jC
zM~Q)pK*$`m(>$|#_=#PHy<kS6La3cX|Ev3K4ag{tj|sl8aaOTcx_2anrA?g86^ped
zp2qH4csJ3g92hm)uRXH}=s%F-*Ef}alh)pTo~kG%`$_ZyGBWDv%g+781ZOXDRaD2=
z*q*4nweoz*#kx~3%44~n_DX2KSlj%{x#ecFDKaeM{iijyP5GWJ!@D)QeB?`-vYKzp
zTg%&9$OnA%%09N`dScz)(tvR}U4zrPXFrHL+`c#@qCq_5sH_(<b$)_uQDNn%esZdB
zP<h(2HaGjQmlIc6GG|iZ#Ud+QUSK)DBGEfrA_t3-D6bTu^7Zt*ZO++|WOVgcKGB{6
zy%}dB*>NDj;W3|b?0iUa!XOoGmPcL~%$qw=F{kUBHP<BE#qt8(5&Kq9|DB0#g{Laz
zG-|I%2(?q*C510m(=x-Pji(uqiY!|e2|*LpN3SE!80V7|%|?9)?a><cd^h_x6{G9(
zZ_D%F%x#mQwj4ITRUA3gS6&uXy<vCM3&XyJ)5(SzwL3ElI}W7`Y1rmQX2x(Nz}Smq
zEV)`v_3me4U#H9UKrrBh@NTo8T$Kt7#*B4$g{V|o-YlNJ*oz$7Vddo^X(e`NBKxLb
zKJTZIDn&(w3Qo_%HbRxf{g1}m_FOx&d8?!9**$@x%`Me4l9LDhpQQ#b0$S^jTYXYO
zO)4!N1Ee1j=f_QJzt+vdivY(g>dI_i_N3aSu#SN=|7XYNGQ0I<u@<H5X?5phiq}Z2
zu2+v1PGLD{-fktJVK4PqXO0;c##?7qe7cl>UmjO#ORHtT&UH+8l@f$X?(XibjEpGW
zs%x=sVaLSOi<DIByAN9DF?sIs5ho(zGi!G5F>Wg9$idfD9vdyT*%KnN&UCkj%!ZW0
zhBXR!eUU>O?)8lp&!$Fyy3o0%tK)HWg1S~(t7FIOM*FuHwI+jOE{9J9)te!%A=B6B
ztNQ0X8VOEE+5akB9x&UeWuod?&o5k=c;_g0-%(9(mX!TKjSTX#1bwEz$5rE%hAk<*
zt9T}d76Db$2Mg(f>P~7yJIkD;n@}C`ZP>50Hme-vYQ4fp5DFE@iyd%uY!|Dsi?0{$
zHOP8dzX%*h%~S^|F9I)O7bYaLWHZ-P2F|b2qRz90xrt4nK81aPmj$eMx2h5n?aL!@
z`~uz}A&rB3O4nrZw65ozY>VC|zt-k@Rz{j<<>uYab<(S=h-vpn3TgLde+Wz-v(Fm0
zTs?=Zo<O~Uj~3jO*(mt%=5$~fOE4SvB5<H=c2l{1+*#AK<JMD{oS7=m>v3TMDW*aR
zfP~5~pWe*0Vx3DKN?-NY7V10e<g^`I7?tfzaLSOa3(mR`*FBP9n_*d8jdyFol$zDF
z=UvWtEgZ`K9+_b!|M66*&N*RsrVIK$nEQfZ72DV&UmVUt3lk-D@|`<cFS54?_Dl?|
zRvF3PRELQ_Gi;gImXNC;b6S=<Ez9XFGFdxXerJF6O?`y_-Hv|r&*=6LBzKnS%)pEO
zjodd*r1Pvx_ON}IX%hFe@n&wq^tk@~yXxma)90i0TN_f#(WuUdx7`P^(b;3jj&9-i
z@Ry-za5_;7m$%D_q&ry-Mjffn12l<EDQ|u1t59e!)c$oe>i;}!Mw=cmHAv&D#_Uey
z^<>rEyYqG_!mm4>G$kFbEy=o0?Y*Ww*1`VLsT<8DQQl%MqOAHJY7QmnJwr+6g}HK@
zgzGyO7AkY94J(E(EdpkvLN)~@S3=zLHNq84Qe2wrZ_EbH3?>zc-?d;c?jP#fK_XtI
ziXJlh82L$^U~D?7snNg3*E3J(omPfN?rrai$$RTYqTfAqxE<nsIiEA;(t)7^T`ZbW
z3+wk&N=>&23Qo?4UnxDP=h?Ig;ad^cTXF3C*N6Oaiom1~^Ulpeo2v%~VJBka^P197
za7?I%m3p+l=95~v%v!yy&G5#21h4TOakxrM&S~5n*KzWEc6QdfH*ZU$yF6opoSmf4
zIqZ;3_1(o*GCf9|;|Q1=d*;=oL+w+rvTyM(3AkxF9w3>*4``zhjI8oCMG{hzlfI*6
zZ$!ErOUIoZf+XX|cc8L4lvYlJs_!H|Z^L6b1h<yhdy-K0{Y79+^=owUy~ZTgY$A$d
z;c<gVYI-T0iZvK4ENq=<SJH!zqqf3ECc|o71a*tv74>kaduEPgtJ_&dcQ;f|SVWAo
zN}NWZza>3O%Hcwy{g62MY^l|AU!jyx^hXk2)4!_I(e6Z|BT`eu{PiYF#(~zheUp3Z
z-E?{Isd_19%*?Oi`@?DuBsVlS{_2^uuI2a;$4wQpE7v22`fYIMrzaW=MnkfZ5l4}|
zD!%}*WAS(TaIYt3@!~yV_N23MNwH?fjBy>-Mgh0IMe7slE_?5Q7ySa0Z#W#9IN)6K
zAo)<VnoY)0{4;5D;QW=d8ZvRaKiGFaKk=>-5bqQ<D12N$;IMT_(|&L3f^~du=+&k}
zsa<F7Vq<o*p--2q<Td(y<o9do(yWp`qx(^0(;j%_^0a?rD=?`%-h987T#psR+RB<q
zW!Vyy0irH%CX$K`*T+VRI=ru&QAA%T+JzTg!?Cm1E8psAiAtt`6`~3`5_rJORleS>
z0G8D%#t}So9sgwD1M>y%>=980fBk%$B*V4q+B3%NNZbZevZ0&3MB=)I?u2h0E<w%u
zd`!yf9`+9k%ddWBLo%|?eY-Y({b&Uy&fbPy?EcQ9<I`Bt+A!iv9vkoZMZo&kp0Rd?
z4o%CvD%ndLrSm$Gq105d7JL0|i-4k3o8s-K4;xl7jOSeqPQ%9*m#?-t`!u~{WBaUa
zlHRGPL9GG{nZXb_!I||er))}}8$3u8lNR2Xsv{s|A)h_fK#=G>f{eX`SYSoW9r?bT
zkeIEM@3X_Fr>AE97^+5#s7i}K3G=yfTx;23uh7)Jtvw<lTzN-_`UkLGmfE2k$Fmxv
z?s<$l*o2VAq~q?9Z0a5~Z>uiyJk$AL*wNg>(N1S(y5K;dWRT}`YKrk(>;|`mp?!A`
zq@$#>N<8DQ=4Sd^>*J0$vyV7@xXx*J@$8y&dy)Y&i3o<{b-g)vDbk)LPrH&XCHcL?
zzI<uxQS#>Jb3~%UPG!k6J6^^&bNl%0l^5K&r3(HMVINMazgSfUZ$UqWy~-O8pR%4Y
z2+Eqftk<^iXuTWtHII52f#u&@;Kn@rluN_I4$<RC`_WFS-}ZuXvx83X@}lxv`R3Uc
zyldIG;lnCA=`S8mUm@;CGr#Mwu@W?Lz0uHzvoX40W#cYY(s^CMZN!dG@K*4Behd5F
zBreiaSI4kNmrZE2<=Gsv`qiAZ+R<m7y#ZBW!CNe6v<GehI|h2f;jy1b!?gb!!v5cs
zB(~V~<&h{9ff#@ofEa)nfEa)nfEa)n_`kvc7Y!)y{}r?lPKW`B0f+&J0f+&J0f+&J
z0f+&J0f+&J0f+&Jfxp54?fxIe`V0X7|0e=Z1mFh%Jcj{4A(C<_Xk|hqN$+L_k?;eQ
zaB_q{C4MV8l3cKf67IbO&ww0pcsbGF5aqNVIhm5KO*yTiprk-^RM-QcD0j#g8T_TT
zaei+=|4fPerOJPw%R)84LH9O*f!+q7!M`ZY9fAq8$o>qP+l@vHH<m_>{QIB&8Dh>+
zN+}n}XDE3Z<f{hc<0d+kbAcPwe^1QPpq9J%iXIf@pDY0UeYE_2o*XoWL2nCMSROg(
zY{AR`V_~Jaav*sD7$b~<fti_=8AkKu02lxmt-vb2)hztx`{D`(HmL6<oM9DLR#nsR
zdm$)c=;K?UA|$y5;{aA)x76})Z36%kBb<Sm=FFi9I&;9_pfd*xBg0bDpqmJskCADE
zuOIU&Wi`WsLKRhYjlD(pR*Pdu{61F%Bp$z*Sn1QT2<ZH>(op3;zGpjV+==D)YV_G%
zsL$CeJd4O{cN)`dY!G$w`t?vY>AO1-CU-c0A^lQRemjRT`Q{uasWCRcLF8|ZM09gj
z#P*}-ZCGCG*j*`Kdr2axRa-B|ZTk7I6GO`TKfZFfxc_BSTYXHtM#q|a&w?%;yMKN5
za!3$X(r{bVR@;<u_dUbChv#w4#}lK13JnkJz(#GzUA4pg=zy41H}7$C-@cOF72-km
zUCD}W57Vkl)=6+V>t^`)%Ua!7E0O=ySLKX!T3oc~!R<^>J1;iB<Itq;vP)|1-LvOc
zB(Jl2=!F_*B^z14r!7tj4zm59{PbRaICv!JMsM}ITNu&f;>3*s2c?}tMR;u*2t0Yp
z#Z#IdOoCbIFQVE_h?nkVcBB;T&yqPAc|K-zKHphzW75%W2(0*vmN!o&b%ZU^PoAZu
zCB}=%7^=M|V@_RL<%4*!3FhOBFTT*z-YdFrh@sMqVocp6^&J->!^F7xor^#;V&vxg
z((HGgVlPI1edK}`TQ4;f%v*URUGz~IZ$de7wR&Xrg{QqG(JZ1z#P8|r-neu)rp~^m
zy>i#(N(BespiN}wwS$|v+#jyhddG5x7xp2n9^ckebLVJ@j$`Y^GqsHih@Q&xwYb5I
zjkdCWTTP$d$P>KRbMQ){;{~N1o9_r|Y_Dlau00ts@IGVXrE@v@r!o%I^l$1uvtZH{
ztv=~j>)>)+7}c`w(Me;qz4dEr_TIS@%Q8^=@z}acn@8roG!>85nwg(d)mK<+qSVH1
zbnoS4n(I$@-VVN#ksyAk-#o?C*>y64$J|ZtbXMd2q$YJ@xvr-E{wr%f)h}Q*#P(e{
zoI8*7I%1M~Ja&GM$#*Qbj#Ruq;`fR#9>u->8sDsn!o)AgwL*d0*T+OxNb+nmdbWxs
zP5dnz{Ba!d=#kcgkAb=e<K$kX3-UWYCWwMqnC^%0+DMaw&d&IKC9i%~->24@>v((h
z%dGsI$02U6IlVEzYHm7ta`gPgG-a6&j6Rg)1|wcp!1nA>qdTe^LmyNXgjLG29@}JQ
zG`FaI+LCT6XZz%RlFR;rUHgnQb7?w_bu|zqMAGiz{qYk@BzRsu>BdSvN51;|b4n&9
zmK?WBn4H5%YyVpTMfqpS{4FQkTp@smKHv_8|33x{IYmNFk&sg)<P-@xMM6%If9ezo
zo~+rqm~Q|)jO#ggT(}iE)^mPhCva4-J>dMAHHyWU$$=Tpc#lnkas9VW>rfiR0K~w5
zEd!ulxCtX61UbN`C8Qlh7#l0F2o9F^d@bY4W4GAv4gZ^k42bHKybwwX<u<v>K%R9A
zFZn2fa&`Hs6WG+!Ed=KlS7&n!!4>a?@u0Q8{J4g;r9IR8{XUF`w4R?1(P#z4B1$4V
z(wm${Nu;&#t$?C@UB)1EBjqaj0wq_5oMJ#UmZ22+l2bK*2p~Jy*9w5#r1h)50q`0n
zmJ&%$G+V#!Ti|GED9YDm49Fx(>{iMNa-kex;6^^;fkcs`n0^Q#?cwSQfE*;T)pUTy
zD;WH`^FIRpHo#-#Xmv^=xyYUzVL%SIMhW>*3Icx!A=@7ja>=p`1D8S2q^&C;em#L-
zL;Sjofg9x%<(MiZc^}dsnB}j`@mBy91W4=ozA?!K<a2VA0yZDrKjK>6M^U~iVn9AZ
zi3Sg`D}m%JN{PXt?RfI>RpdguAEJ=uj}&rBvdI9cGztw@Q21A8ITE!jg$@k=4k1(m
zk%{oqDO>``0FHpBYd${~SSIX`+ph{3kWW)`DTS1118|maQo^y6e98q%l%f*z?~4Ap
zo}nEfD~=L2jx_)mNmzgmk)O0`DI&ju%pb;j$b~*0gM9|{?GGX3_#;Ag0JfSW!9@pX
zwgT5#N&>l<94kmpB44I=^o=hR<y)n|-(X0}<n&O=83l7nxGkO>Pf0R5NcbV3%zp&R
zMB2bYN4awa%FCcWyg)v(o%bI+Ncy@AB|MOFlblP=R5l_PQ^Kvtmkh}{7<Y2S4`HOg
zx#=@2^r>g&v;vr607;mEjuf>5>5tAcCH&AnO5}D?P4Wo?KXM5uq3PswN(}kN4`HOe
z25N;!vXX?@IY3=R(x#&Wv;WcovlE=;VsfUp9p$`%Ev1}%K~T;9;141D>R^F$CDGYi
z$4zGsNykUiNq-3-W;6Ygp^}nEFmN>mFCM@ccop%lL1ZC`!Rg>up7s9fr47j85iRo3
z4Xj)EDB(^R`n8St4<V#U{fg;j<Zc8Kz!Z}1zbJemUf}%ZxZ6?^Dd7%o=HvwOb(x(%
zgpH<*m#~r6!04<kiQMu9;a@l))>HB*rCZIyKuF|dayCW|7V;M%EkOg{y9NMBG`g0c
zt@?}7MhRE7z*3UM$yo*lK`!Fz<Xe;+a>)-tqcOF_)@Nu4F;+&B06-^BoyhjPE9oC6
zjvP+@1#BMxe;)Th&?EmEHpu>;P9pRLF#s_DF#s_DF#s_DF#s_DF#s_DF#s_DG4Nl_
z0PX&tf%^!+^Aq?51&9HN0f+&J0f+&J0f+&J0f+&J0f+&J0f>RWjR6vhixG$bty`At
zeh_<ix%v_GjD0cQF0PJ#Klx&aI|w*W9i4pyU%Wd`pN0d5g;A=O{+3`F@PC2#J?kNw
zeE>ax9^{nEpX1AGtAPvL{dwwgov*eeZ8#%vg!rEuK@cwu1I@<Z^V)DA{(qeI73Spx
zn&lG%&oph9i)W5!8ZHXhEa&}8+hc~@ps6W}D~j&UF9?vUmTmcdi|K9n$!x2I%^oAf
zc8lG+5PK}Q?AoyvAug||Xsx<cQSm2>pO!Mz6qFPdjZDQ6;w}V&w~nGBkw{b^sw&`p
zofR$270r#d%DWIeJ=ByG{RqCUjsyiP0V}T0u18DM*K@&Pu=;wQI06RYh4I88#eKjs
zE(Ba4K^)<T_aflD2uN{dafIUMLXIvNUq2j}=1*{v*P^HU5rRB$2ttrISesT+(b3Nj
z?7&+N;o*u1K@eSB2{?H_Z;T^O2jT6DlP7}GtBW|y?%}H7@8#%%b9BdHb#y#2zV0|*
zu>9v<br9k|t7s@IiGMEoX9ZR01@*u|f@t*h6qg3(fX4<Q=t)R%gcCR(c_)mgt4ENI
zw3#=~3$YL5<tMGXl;P(ZjMGt3@ecgFPsF)8yAX8Ll$4eW&|22<!uxt+JalOcdVgsd
z1n=zXB~QS6>nPD0_`L6cClK(SE0R2LPJ|T+zVu$`wd8#W@(vz&M|XKwFDyvB4%jn|
z&@a1S)8hJiAeCNUdR{rEt>y?rgi#<kYgil>9GD~C1MjON;pBu>)>H-SI(pzRz6fWJ
zAa55x#72xKI1k=<KUbPLxgW@aFT&3S<Bh}8dqv>=y$SvVkWsKdkWg>%L!*<%ElqTe
zak0~7_#}>IH4VuyB*Qex|CeRh6i>i^J}$-e>Eei{9Rp}kRbW6Jfc?HaDzPl-)Xk7Q
zLOK=H-2Wk+igP8nfF~|r9C&uc1>g`E#FC&c=~4u!f4?uXYz#{()E*L9NM!$QBD+F`
zV!?ZaB@Iefrr(!E4%m_`Vjx+BWbyBj#owLI|LkNA2bQF9FC>kSH2yu(_(e1RK_`ND
zGn@k87Y8?F|Nk#N<$x+f3_uJ(3_uJ(3_uJ(3_uJ(3_uJ(3_uJ(4E&#A!0biXu&Uzj
zQRm?|+%r~{L#EdAa-=YYMWC+g^^@T-i3DnLV~7;hZPwoO;*9W4uU2<uKUKkk06)h)
za`{Y|H$uENRUytw6lsbb6F7;0OBJo{uABb(fzlN(%fuq&%gzVQqxPL=>R*{jzY5!j
zjEsD~tNER6r(J370}W62<}8(2_gf`~1)haTZk(r@)|*tCUK5L#;MQgi;?LgJysh6;
zRr`XW6-74cVw6HjS+7c=RJK5bPcJJ=@A|f(H{JSiYS|B*SZ~$64)pW2<HaP&X*DkN
z=Sc)o;-<!iP6sYHI+b?UO{vC6Qi^NewEP@oh^$XBd)U`EUlU&!!}#sM5uEo>dZOz6
zg~S`vs_#FX2+2AymA;uOXi$|HHh_NbN|=so3^T<c&nyD!hZPF0j65qE=DU@4WBOtA
zsoVACLJC)lyX=d5VgqzjIv6*9^!42=2#8v}VaOlX>ue}~(#NOMcTEQ$^7vwMMcC*f
zV9|5JY04g_K0TDHo??>aZXKOvx|2b+fQ{2oN#=vJ^#`<tlTCbTLb193dpFtIE9I_f
z3`dT~;26(sO40c!XLItvD<_Bj$12lnxbq&qZn`V%AhH|3y{nx1C^u06ZfO;k&L3pe
zv-4S4fYzA1$y|KBlG5<Qo`EsbSI260r7BmEPMIB59vZa9$0(N0BP**0;rlXmDA_tw
z#MrxUcIS=BjVQ<S4%*nedq!6Fs%M7_dq|yh7c1>dv58?vHqG!1!FsE_sou!tH)WHW
z%G!_hPOv;+)UOhLi)yG2p2t@#0urz{@9s{`$FRxXEx)IpVlb`$Nb++P65jIDOR5kx
zUgiC>y3myTxBl+W2!mnd2BFEf^X}AB3Mo&t&4!!P3(>FguD4BY)--R+0B*PMZ%B3b
z+E|ayAybAgZOa{9cZxXJZ<EkIR50+aSE2Ng((^^2%%r1tlIJ%6@UZQ<;+&vcW~|8$
z+79A}UhOOHf6ONkS&y8b(m!vP<(!#2ADwzRIH2{;0DneaXTX!q`Km_;B$Wlj_s&Io
zU;8-kNntfTeyzrMwx^0(J|(6|TvHz=Qui{=ymS#bN(@{CLguKQ=9%5YPwX=61v3g2
zLhT&-U)^VGKt^$VOz?$`vx>day(1|sZQ^XMSgbYiG<Mg*yNO2Sz^KuF?U_YD|A8F8
zzN!41wD$J%R7EM-Pofu)kx@@ycJ3!8ID3(+qB_RL_C(#SmFH6~)}4A$9?SK#S3>*6
z+U8fzEjOD@kzpC{KdrHC%J*y;-mTH)BVW>#)qGptTHfA5KH#HQ_OUJ36YKVt28_$;
z8l28O`$62{_QfF)4dNk3WxbH8^Alu?3M)tTlT&?z%F~v$x!H%koVd!8Ig<)67Fpr)
z0?YXoiQd@~IariLd8G)Iuczm2bIy(=qpQF2iS`ue%{UXujspn}kNK2i=R=Yc2B~PX
zJo3U|-rR|bIbGkZxhCN*mKW%b*tdfE?@VkfJXI;DQF}!~sGagIDSWY-mKi2(Jk5Yq
zWZAMv2%4xqdL41bIG?0wHtIuYkJhl~yV<v?7+s%#Tb}=BZkr6X<*@Os;>e-C^0KJv
z4ZEXW81^lkPBzS_-I-a~aVTX-!!|cEGlnAp#$F_2$<=bIcRv&RI$f>@f&nLlcbom>
zs#I7oW~{p_M5WU5X7TjJUgX#gD=!a8E3rEh**69Ac|VO*DJm*daC#oL5vnZie>C2<
z=h~UgTOC!;?g<oaZmFJ;oIL3NEH!u$&{}`o>XQ;`QfcWJ_#ttA+_aYUt024xaLl5v
z%=TqZs$B}}7)bMfc6=_gTVEDyQOcfHcTT2wjl}AD^=RP~mV@T)RstIKQjc}!m~mme
zbymfvOZoTZaizAjS_bS~$8=XIL8#>J?%vACh~llf7V8#vOiaB<NwvQFpmiRT=N=z%
zA|gJsX7?WBrjm{vd|l<S(PEoDAtLKccYDZeNGWVsqkz{JIke$k-)Qk{YV@ZIool)}
z9!DprYo)b1cD!!1e|u4DGC1aP_(V{>8R8l;eT}}Vf8L{!;B=JzufpX4vyECNs-E@y
z!j*}4j&k=M)%0db*$>pnATLYMXZm|wHC}1hlG3}1XL4u}P&Iw9kS?h1q&Bp(%t^Wl
z)e+x@{YrDQ%2BS?D~tr8P=UPI0Y}Gnu^PMhdeL5kte5qRz;V<}b&&ER@FI3$LNZG>
zb4_L7{3<Q#JX@HX*aYfR*e7^dz<PJ9Dk0InJOT%8u??hga8K!)ES}c&oRe+Q+vL~U
zT+hl#^Q_#w+qq79brmt~{zxIXWX&G~lgI3{#w}ORA*&})Z{VW^cV#vTKD;>{7{(IJ
z#=Qs}D4X3>ZXb8nH0`+c6eee;%JX_$m_UlDPy!&K^2?_;Gp$(Xl84e){k4Vq&N?}5
z#}-CqI}@BTWb1;nZp3wuWY}g{7FXllS}>(%HSKwqGhPda^1nxBSjm4pRjPAN*q!Nu
zz7OWUU|7XA_Q)59v(Ums37vfBj@FCpErLB0L#tIr@;B9C;?E3QCblKyD#)CcrB2Io
zI*Ux!j+WoqUwu;_;eWTIAN@1BJp{>}WjZtPqJJazjT7lS>ykZe-({M_J#D<1n=n1D
zKmV@!IncDM^S`krwH%G=jCk995F4F6hV1ATeh+^ciUy|>wQza6oJhKp<zUp2>O4S`
z*p%|tr@jh>_CoDnN2C7F!)CPU0aJrCzG}?wL|#u;-Mu?+mm>VS(@9g(;o6d{>(t(B
z+G8E;FP*y4ToUCi<|4|f@1f>Ug5EQfWL}snw@J9Zb77$}r`oV$_|hU^HY#LOP;w>2
zJzpbS!6e0{x&Fp%;LKoBk@#H;2IKyrt{o)eRjTMAqmPlF)CtC>qnaB1dwf0fgx+ao
zc;w#pu9&>HZY28MLx<ZT-k0+^V=f&SI?%<U8MUx}Kc&=ki=g1-eE5~pgL<A#n-IPg
zalIAC&VO~pe@PIS^kLq)S!i?hz#!~IY<yl*8VZgH)v!{J_Sbw;E0<ZTm$ez*n2+E!
zz9SA-iOD&Qo8vl8p3lzCTKDE{X>^xoOpvpa^f`welBvGC*h;3yh;tkPb7RlEnslgr
z3Rd<l{v`o7Eyn{SQ}_XG6oQdezNSb*YI4$dwCs&Ymt*O;vqO+%{P+%3Hiy#6iBR>O
z#OG~zEQjFM5_?Y)%D%q{tf_vDPQKTe#F|Y+aV$J;5J^ohg;TKxgN22y6YWZR@Nv{u
z*vMp9t&5;;(YvA^4t3AWk!*E4%joWg>IsX8aaM`b2=q6EXDNwvA<=$FoP4&_>bb8_
zN+|jx39sp2)#+$=BGD14DPsP5lO^LoYumobz4dOoJor?-6f<V#SMmK}H3yO#nj3%h
zOj_4+e2C+wirJOx5kvhpxbxE!4F;nj*~o~a$X=CS0NAnkJAJs<6SH{n9x;2;*|?-w
zvt!1%4r`-;+uow}33ZpfcfgB&fyp-<4ow_zu6dArC|b=X<0$@_v^jA8%2^GWxZNM@
zyPuzUR|$xBiW(F?t{-sNI;3g8w{^ifJ~#Ag)1lO^vv#pDyV=mE%T@9keLnL0wRCA#
zNuSaED6(k}Jo3xDe;pB+R32}>UrVmX3Sw<#&7`tyiOK*`mp2nh#fIx+BSjtF*Uc!R
zFBI*<i>~3=+3S^W^|VAKQ@{#Qg&YYyVCE`c?^XcIY8B%Mp1F>HGVp=<f_L_aD1*O#
zzD<(h+I8(2V|FBN11Z_i&0ZpL-9mT5w+@$}W_>;;Wpxkx2ZiNVKeHhjS?9i88^3<E
z0uyI%!!CAz=h5+LtY~c*@g<Lq_xvJY{cF!yyF!PiWnPu+rH#^goybsXs#uG?{<cLx
zQL0Vx_S1(As~E=ft_G*!V~fjI+njxx-m$TL);3A+RMenWfrZRqh@9ZedX`f*rOyo>
zq=`ujZ%ow@5VDZZo@yXSbRI#*-a#y|BIb^KUrtEOR?7F;;nUMovwjR!qeWDuMWBTF
zTsf|_?66m8>fY8K5fQGuqeJ}z*e*-$(2e6+jZybJMjdQINMq7*_eeH%51O}C7kQrP
zd@$^2?%`;sGc#RqAW$;M^EoxecrJE>+rrSky9d%y(pe>*@mF&*{jK$J$D7$l96ns<
zw7YnA&AC0vfSE)D!|}S_oVyfh&yuHINtcrRUSeOqwDl-?^Yb|((P5{u<e42W<D0pC
zeD=x<ZroA@e~GXUC)Ho9DucJ6pTb_{jfYQJ&lm({&0W@OTX?kIjry8LJ&eHe?=5g+
zo_)%t;bDj9aisldC)ICzLAlvMCwO^L`K^5OYzyACY~1i+6`k}K52vpX_oJELb=X)5
z8oAzR=)>6<U9hromn!MJuHZIe$0v9z_&&ddeQy#MX{xJZ*rUrPG}`iP4q5$b&RXs0
zv(Dats<7ZKmNVJ|w}2f3J>l@!&!b`5|Fx|DSEYz8b}gz=E4S(>di&ztaE=5&#eLwn
z?;c;!_LF8uuIPxz;uLA7&5Bk=+rZ6Mn(3z^7VqfymntcK`KcdGYuOt^aPd>{_V-nF
zRCdtN($c`FsXM7UXsW8IsbbX}G_-L}4r*8jEiKyb|IDDtKUXmODC<F{9LWA30ongU
z_WzLmKjg#mxB0NFFkbzg!60P+5BXHle60T0`BXvn|BxFX%`*^mF#KQQ2Dn0n{&wjL
z+5bZ>ssC~Hxk46ycRGjc{~-tI|NO~(g*1NAjDN8I|INjD>HfbA;PPdE&T7N-1ilR@
z`@dnhaEwi0$$tj&EQDS#KzohBzsDK&+s6;u0Yh+f`R-A4jEx_XMd)Q0w41ko=gTfu
z$f6_0Ye@>x^p{(FUltix(_|4ixexLyg!~Hs9=(Vmcwqd{4i5Ohe;|gdmQ*4nhS2Hx
zZ#q5yGh)cYw<LzHkT!(0;oqYT9ckBwxTVuFnt(^s#q#@#k%M<hBB8fXK#K8iQjGNb
z{-x(ne{!`*yZ?uA5diQX3J?Pj0}ulc0}ulc0}ulc0}ulc0}ulc1OH|Qj<E?YJpq7z
zxrCko{GCq#=x>$&$2L*>z~vSQINFQRJuw7?D!>cCV1NMv0gAjM_@TcZ@U!@{fZOHO
z71+Qy41jZ8r{!s_qEKnuZv;m`!A_ml2nz=<z_XKEnTy1El~aQw4%~$T!~nzq!~nzq
z!~nzq!~nzq#K8YJ17R#ILQ+yNk|6=(-~sx*;5_}%&c5I}LEsWXzs1?Mnj0Bf7$GbS
zx9l=P{5}V<$rX#(VQOJyY-EnuW4>dzp}7rWr;&}E1IE#v){qw#bkGHtEJ9cq?YEGl
zEro>k1RZvl>&TGAnOTG+Bw$B4euuydPr!W!K##J1qknn^ViWzE|93rn2ac0#AP$SB
zEeN#)<lFFQjV<Lbt$_7;U@~q@EJ89eFcQ!2Fw%Z=boFxn40Cyp<r_$s^S;X!xQZ9J
z)SDk-%PzAmGNhjw=~Hqf<aeF9dSaZH8Ci-mf1{(N9IzW&>1BrHyge{MwBe%tym8<{
zP~c4NlKT!zzZE55z`LZ+!<VDChbH)fOKZ_?oai_|Cpyq(cnSG$9Xa`e>yCY??c{-R
z_FLYQA<4s8goK1(N3?$%9$lcAz8(-_lQ+f}Ts3RC^7mlVmN{FRwm)F>!}}7@cwa2I
zm<x?tx!|SI<H427TnRx-J2a65X;~htBi_^VcU{UH5n^Bw5*39Vh0*cSmQ}-I2w;ax
zTgDZrms7t#(~dYV9NHIjjbFlx@%Hv`1??%o)w<9DIA1?-yOa^&iUZg9_`*2-;32s9
zdpdYwTs@Ws<AV2f4F)~wF&=0?M_;^$$CojHEfK(0ze)#<*a&DpOaKl|^Os-hh@R$w
z$6&viMqi@M&);*YEUoGnresJeFczV;Yhg!FOEbA7qV#Y03Rb>|<(IlEG=(4D-`5d`
zroAQL%Lu@sfySkud!^?u#XsM-)9F}=jW#JuybxWn;NX_$(}lj~+w!gqZ4p8GN`l}2
hqOCE=NLwe6^|vp28m%f6AO;`?AO;`?{y${k{{eBPv%CNR

literal 0
HcmV?d00001

diff --git a/draw.odin b/draw.odin
index 04f96a2..845311e 100644
--- a/draw.odin
+++ b/draw.odin
@@ -56,151 +56,109 @@ blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1}
 		// p0.x, p0.y, p1.x, p1.y, uv0.x, uv0.y, uv1.x, uv1.y);
 	v_offset := cast(u32) len(draw_list.vertices)
 
-	vertex := Vertex {
-		{p0.x, p0.y},
-		uv0.x, uv0.y
+	quadv : [4]Vertex = {
+		{
+			{p0.x, p0.y},
+			uv0.x, uv0.y
+		},
+		{
+			{p0.x, p1.y},
+			uv0.x, uv1.y
+		},
+		{
+			{p1.x, p0.y},
+			uv1.x, uv0.y
+		},
+		{
+			{p1.x, p1.y},
+			uv1.x, uv1.y
+		}
 	}
-	append_elem( & draw_list.vertices, vertex )
-
-	vertex = Vertex {
-		{p0.x, p1.y},
-		uv0.x, uv1.y
-	}
-	append_elem( & draw_list.vertices, vertex )
-
-	vertex = Vertex {
-		{p1.x, p0.y},
-		uv1.x, uv0.y
-	}
-	append_elem( & draw_list.vertices, vertex )
-
-	vertex = Vertex {
-		{p1.x, p1.y},
-		uv1.x, uv1.y
-	}
-	append_elem( & draw_list.vertices, vertex )
+	append( & draw_list.vertices, ..quadv[:] )
 
 	quad_indices : []u32 = {
-		0, 1, 2,
-		2, 1, 3
-	}
-	for index : i32 = 0; index < 6; index += 1 {
-		append( & draw_list.indices, v_offset + quad_indices[ index ] )
+		0 + v_offset, 1 + v_offset, 2 + v_offset,
+		2 + v_offset, 1 + v_offset, 3 + v_offset
 	}
+	append( & draw_list.indices, ..quad_indices[:] )
 	return
 }
 
-cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2  ) -> b32
+cache_glyph :: proc(ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2) -> b32
 {
 	// profile(#procedure)
 	if glyph_index == Glyph(0) {
-		// Note(Original Author): Glyph not in current hb_font
 		return false
 	}
 
-	// Retrieve the shape definition from the parser.
-	shape, error := parser_get_glyph_shape( & entry.parser_info, glyph_index )
-	assert( error == .None )
+	shape, error := parser_get_glyph_shape(&entry.parser_info, glyph_index)
+	assert(error == .None)
 	if len(shape) == 0 {
 		return false
 	}
 
-	if ctx.debug_print_verbose
-	{
-		log( "shape:")
-		for vertex in shape
-		{
-			if vertex.type == .Move {
-				logf("move_to %d %d", vertex.x, vertex.y )
-			}
-			else if vertex.type == .Line {
-				logf("line_to %d %d", vertex.x, vertex.y )
-			}
-			else if vertex.type == .Curve {
-				logf("curve_to %d %d through %d %d", vertex.x, vertex.y, vertex.contour_x0, vertex.contour_y0 )
-			}
-			else if vertex.type == .Cubic {
-				logf("cubic_to %d %d through %d %d and %d %d",
-					vertex.x, vertex.y,
-					vertex.contour_x0, vertex.contour_y0,
-					vertex.contour_x1, vertex.contour_y1 )
-			}
-		}
-	}
+	outside := Vec2{bounds_0.x - 21, bounds_0.y - 33}
 
-	/*
-	Note(Original Author):
-	We need a random point that is outside our shape. We simply pick something diagonally across from top-left bound corner.
-	Note that this outside point is scaled alongside the glyph in ve_fontcache_draw_filled_path, so we don't need to handle that here.
-	*/
-	outside := Vec2 {
-		bounds_0.x - 21,
-		bounds_0.y - 33,
-	}
-
-	// Note(Original Author): Figure out scaling so it fits within our box.
-	draw := DrawCall_Default
+	draw            := DrawCall_Default
 	draw.pass        = FrameBufferPass.Glyph
 	draw.start_index = u32(len(ctx.draw_list.indices))
 
-	// Note(Original Author);
-	// Draw the path using simplified version of https://medium.com/@evanwallace/easy-scalable-text-rendering-on-the-gpu-c3f4d782c5ac.
-	// Instead of involving fragment shader code we simply make use of modern GPU ability to crunch triangles and brute force curve definitions.
-	path := ctx.temp_path
-	clear( & path)
-	for edge in shape	do switch edge.type
-	{
+	path := &ctx.temp_path
+	clear(path)
+
+	append_bezier_curve :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2: Vec2, quality: u32) {
+		step := 1.0 / f32(quality)
+		for index := u32(1); index <= quality; index += 1 {
+			alpha := f32(index) * step
+			append( path, Vertex { pos = eval_point_on_bezier3(p0, p1, p2, alpha) } )
+		}
+	}
+
+	append_bezier_curve_cubic :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2, p3: Vec2, quality: u32) {
+		step := 1.0 / f32(quality)
+		for index := u32(1); index <= quality; index += 1 {
+			alpha := f32(index) * step
+			append( path, Vertex { pos = eval_point_on_bezier4(p0, p1, p2, p3, alpha) } )
+		}
+	}
+
+	for edge in shape do #partial switch edge.type {
 		case .Move:
 			if len(path) > 0 {
-				draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose )
+					draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose)
+					clear(path)
 			}
-			clear( & path)
 			fallthrough
 
 		case .Line:
-			append( & path, Vec2{ f32(edge.x), f32(edge.y) })
+			append( path, Vertex { pos = Vec2 { f32(edge.x), f32(edge.y)} } )
 
 		case .Curve:
-			assert( len(path) > 0 )
-			p0 := path[ len(path) - 1 ]
+			assert(len(path) > 0)
+			p0 := path[ len(path) - 1].pos
 			p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) }
 			p2 := Vec2{ f32(edge.x), f32(edge.y) }
-
-			step  := 1.0 / f32(ctx.curve_quality)
-			alpha := step
-			for index := i32(0); index < i32(ctx.curve_quality); index += 1 {
-				append( & path, eval_point_on_bezier3( p0, p1, p2, alpha ))
-				alpha += step
-			}
+			append_bezier_curve( path, p0, p1, p2, ctx.curve_quality )
 
 		case .Cubic:
-			assert( len(path) > 0 )
-			p0 := path[ len(path) - 1]
+			assert( len(path) > 0)
+			p0 := path[ len(path) - 1].pos
 			p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) }
 			p2 := Vec2{ f32(edge.contour_x1), f32(edge.contour_y1) }
 			p3 := Vec2{ f32(edge.x), f32(edge.y) }
-
-			step  := 1.0 / f32(ctx.curve_quality)
-			alpha := step
-			for index := i32(0); index < i32(ctx.curve_quality); index += 1 {
-				append( & path, eval_point_on_bezier4( p0, p1, p2, p3, alpha ))
-				alpha += step
-			}
-
-		case .None:
-			assert(false, "Unknown edge type or invalid")
+			append_bezier_curve_cubic( path, p0, p1, p2, p3, ctx.curve_quality )
 	}
+
 	if len(path) > 0 {
-		draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose )
+		draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose)
 	}
 
-	// Note(Original Author): Apend the draw call
-	draw.end_index = cast(u32) len(ctx.draw_list.indices)
+	draw.end_index = u32(len(ctx.draw_list.indices))
 	if draw.end_index > draw.start_index {
-		append(& ctx.draw_list.calls, draw)
+		append(&ctx.draw_list.calls, draw)
 	}
 
-	parser_free_shape( & entry.parser_info, shape )
+	parser_free_shape(&entry.parser_info, shape)
 	return true
 }
 
@@ -301,10 +259,9 @@ cache_glyph_to_atlas :: proc( ctx : ^Context,
 	glyph_buffer.batch_x   += i32(gwidth_scaled_px)
 	screenspace_x_form( & glyph_draw_translate, & glyph_draw_scale, glyph_buffer_size )
 
-	call : DrawCall
+	clear_target_region : DrawCall
 	{
-		// Queue up clear on target region on atlas
-		using call
+		using clear_target_region
 		pass        = .Atlas
 		region      = .Ignore
 		start_index = cast(u32) len(glyph_buffer.clear_draw_list.indices)
@@ -314,9 +271,12 @@ cache_glyph_to_atlas :: proc( ctx : ^Context,
 			{ 1.0, 1.0 },  { 1.0, 1.0 } )
 
 		end_index = cast(u32) len(glyph_buffer.clear_draw_list.indices)
-		append( & glyph_buffer.clear_draw_list.calls, call )
+	}
 
-		// Queue up a blit from glyph_update_FBO to the atlas
+	blit_to_atlas : DrawCall
+	{
+		using blit_to_atlas
+		pass        = .Atlas
 		region      = .None
 		start_index = cast(u32) len(glyph_buffer.draw_list.indices)
 
@@ -325,14 +285,17 @@ cache_glyph_to_atlas :: proc( ctx : ^Context,
 			src_position,       src_position  + src_size )
 
 		end_index = cast(u32) len(glyph_buffer.draw_list.indices)
-		append( & glyph_buffer.draw_list.calls, call )
 	}
 
+	append( & glyph_buffer.clear_draw_list.calls, clear_target_region )
+	append( & glyph_buffer.draw_list.calls, blit_to_atlas )
+
 	// Render glyph to glyph_update_FBO
 	cache_glyph( ctx, font, glyph_index, entry, vec2(bounds_0), vec2(bounds_1), glyph_draw_scale, glyph_draw_translate )
 }
 
-can_batch_glyph :: #force_inline proc( ctx : ^Context, font : FontID, entry : ^Entry, glyph_index : Glyph,
+// If the glyuph is found in the atlas, nothing occurs, otherwise, the glyph call is setup to catch it to the atlas
+check_glyph_in_atlas :: #force_inline proc( ctx : ^Context, font : FontID, entry : ^Entry, glyph_index : Glyph,
 	lru_code    : u64,
 	atlas_index : i32,
 	region_kind : AtlasRegionKind,
@@ -402,7 +365,7 @@ directly_draw_massive_glyph :: proc( ctx : ^Context,
 	// Figure out the source rect.
 	glyph_position := Vec2 {}
 	glyph_size     := vec2(glyph_padding_dbl)
-	glyph_dst_size := glyph_size + bounds_scaled
+	glyph_dst_size := glyph_size    + bounds_scaled
 	glyph_size     += bounds_scaled * over_sample
 
 	// Figure out the destination rect.
@@ -415,9 +378,11 @@ directly_draw_massive_glyph :: proc( ctx : ^Context,
 	textspace_x_form( & glyph_position, & glyph_size, glyph_buffer_size )
 
 	// Add the glyph drawcall.
-	call : DrawCall
+	calls : [2]DrawCall
+
+	draw_to_target := & calls[0]
 	{
-		using call
+		using draw_to_target
 		pass        = .Target_Uncached
 		colour      = ctx.colour
 		start_index = u32(len(ctx.draw_list.indices))
@@ -427,18 +392,20 @@ directly_draw_massive_glyph :: proc( ctx : ^Context,
 				glyph_position, glyph_position + glyph_size )
 
 		end_index = u32(len(ctx.draw_list.indices))
-		append( & ctx.draw_list.calls, call )
 	}
 
-	// Clear glyph_update_FBO.
-	call.pass              = .Glyph
-	call.start_index       = 0
-	call.end_index         = 0
-	call.clear_before_draw = true
-	append( & ctx.draw_list.calls, call )
+	clear_glyph_update := & calls[1]
+	{
+		// Clear glyph_update_FBO.
+		clear_glyph_update.pass              = .Glyph
+		clear_glyph_update.start_index       = 0
+		clear_glyph_update.end_index         = 0
+		clear_glyph_update.clear_before_draw = true
+	}
+	append( & ctx.draw_list.calls, ..calls[:] )
 }
 
-draw_cached_glyph :: proc( ctx : ^Context,
+draw_cached_glyph :: proc( ctx : ^Context, shaped : ^ShapedText,
 	entry              : ^Entry,
 	glyph_index        : Glyph,
 	lru_code           : u64,
@@ -480,26 +447,45 @@ draw_cached_glyph :: proc( ctx : ^Context,
 	bounds_0_scaled := bounds_0 * entry.size_scale //- { 0.5, 0.5 }
 	bounds_0_scaled  = ceil(bounds_0_scaled)
 
-	dst       := position + bounds_0_scaled * scale
-	dst       -= glyph_padding * scale
-	dst_scale := glyph_scale   * scale
+	dst       := position + (bounds_0_scaled - glyph_padding) * scale
+	dst_scale := glyph_scale * scale
 
 	textspace_x_form( & slot_position, & glyph_scale, atlas_size )
 
-	// Add the glyph drawcall
-	call := DrawCall_Default
+	// Shape call setup
+	when false
 	{
-		using call
-		pass        = .Target
-		colour      = ctx.colour
-		start_index = cast(u32) len(ctx.draw_list.indices)
+		call := DrawCall_Default
+		{
+			using call
+			pass        = .Target
+			colour      = ctx.colour
+			start_index = cast(u32) len(shaped.draw_list.indices)
 
-		blit_quad( & ctx.draw_list,
-			dst,           dst           + dst_scale,
-			slot_position, slot_position + glyph_scale )
-		end_index   = cast(u32) len(ctx.draw_list.indices)
+			blit_quad( & shaped.draw_list,
+				dst,           dst           + dst_scale,
+				slot_position, slot_position + glyph_scale )
+			end_index   = cast(u32) len(shaped.draw_list.indices)
+		}
+		append( & shaped.draw_list.calls, call )
+	}
+	else
+	{
+		// Add the glyph drawcall
+		call := DrawCall_Default
+		{
+			using call
+			pass        = .Target
+			colour      = ctx.colour
+			start_index = cast(u32) len(ctx.draw_list.indices)
+
+			blit_quad( & ctx.draw_list,
+				dst,           dst           + dst_scale,
+				slot_position, slot_position + glyph_scale )
+			end_index   = cast(u32) len(ctx.draw_list.indices)
+		}
+		append( & ctx.draw_list.calls, call )
 	}
-	append( & ctx.draw_list.calls, call )
 	return true
 }
 
@@ -509,7 +495,7 @@ draw_cached_glyph :: proc( ctx : ^Context,
 // Note(Original Author):
 // WARNING: doesn't actually append drawcall; caller is responsible for actually appending the drawcall.
 // ve_fontcache_draw_filled_path
-draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []Vec2,
+draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []Vertex,
 	scale     := Vec2 { 1, 1 },
 	translate := Vec2 { 0, 0 },
 	debug_print_verbose : b32 = false
@@ -519,19 +505,16 @@ draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []
 	{
 		log("outline_path:")
 		for point in path {
-			vec := point * scale + translate
+			vec := point.pos * scale + translate
 			logf(" %0.2f %0.2f", vec.x, vec.y )
 		}
 	}
 
 	v_offset := cast(u32) len(draw_list.vertices)
 	for point in path {
-		vertex := Vertex {
-			pos = point * scale + translate,
-			u = 0,
-			v = 0,
-		}
-		append( & draw_list.vertices, vertex )
+		point := point
+		point.pos = point.pos * scale + translate
+		append( & draw_list.vertices, point )
 	}
 
 	outside_vertex := cast(u32) len(draw_list.vertices)
@@ -546,42 +529,71 @@ draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []
 
 	for index : u32 = 1; index < cast(u32) len(path); index += 1 {
 		indices := & draw_list.indices
-		append( indices, outside_vertex )
-		append( indices, v_offset + index - 1 )
-		append( indices, v_offset + index )
+		to_add := [3]u32 {
+			outside_vertex,
+			v_offset + index - 1,
+			v_offset + index
+		}
+		append( indices, ..to_add[:] )
 	}
 }
 
-draw_text_batch :: proc( ctx : ^Context, entry : ^Entry, shaped : ^ShapedText,
+draw_text_batch :: proc(ctx: ^Context, entry: ^Entry, shaped: ^ShapedText,
 	batch_start_idx, batch_end_idx : i32,
-	position,        scale         : Vec2,
-	snap_width,      snap_height   : f32 )
+	position, scale                : Vec2,
+	snap_width, snap_height        : f32 )
 {
-	flush_glyph_buffer_to_atlas( ctx )
+	flush_glyph_buffer_to_atlas(ctx)
+
+	atlas         := & ctx.atlas
+	atlas_size    := Vec2{ f32(atlas.width), f32(atlas.height) }
+	glyph_padding := f32(atlas.glyph_padding)
+
 	for index := batch_start_idx; index < batch_end_idx; index += 1
 	{
-		glyph_index := shaped.glyphs[ index ]
+			glyph_index := shaped.glyphs[index]
 
-		if glyph_index == 0                                          do continue
-		if parser_is_glyph_empty( & entry.parser_info, glyph_index ) do continue
+			if glyph_index == 0 || parser_is_glyph_empty( & entry.parser_info, glyph_index) do continue
 
-		region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index )
-		lru_code                         := font_glyph_lru_code(entry.id, glyph_index)
-		atlas_index                      := cast(i32) -1
+			region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index )
+			lru_code                         := font_glyph_lru_code( entry.id, glyph_index )
+			atlas_index                      := region_kind != .E ? LRU_get( & region.state, lru_code ) : -1
+			bounds_0, bounds_1               := parser_get_glyph_box( & entry.parser_info, glyph_index )
+			vbounds_0   := vec2(bounds_0)
+			vbounds_1   := vec2(bounds_1)
+			bounds_size := Vec2 { vbounds_1.x - vbounds_0.x, vbounds_1.y - vbounds_0.y }
 
-		if region_kind != .E do atlas_index = LRU_get( & region.state, lru_code )
-		bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index )
+			shaped_position := shaped.positions[index]
+			glyph_translate := position + shaped_position * scale
 
-		shaped_position := shaped.positions[index]
-		glyph_translate := position + shaped_position * scale
+			if region_kind == .E
+			{
+					directly_draw_massive_glyph(ctx, entry, glyph_index,
+						vbounds_0, vbounds_1,
+						bounds_size,
+						over_sample, glyph_translate, scale )
+			}
+			else if atlas_index != -1
+			{
+					slot_position, _ := atlas_bbox( atlas, region_kind, atlas_index )
+					glyph_scale      := bounds_size * entry.size_scale + glyph_padding
+					bounds_0_scaled  := ceil( vbounds_0 * entry.size_scale )
+					dst              := glyph_translate + (bounds_0_scaled - glyph_padding) * scale
+					dst_scale        := glyph_scale * scale
+					textspace_x_form( & slot_position, & glyph_scale, atlas_size )
 
-		glyph_cached := draw_cached_glyph( ctx,
-			entry,       glyph_index,
-			lru_code,    atlas_index,
-			vec2(bounds_0), vec2(bounds_1),
-			region_kind, region, over_sample,
-			glyph_translate, scale)
-		assert( glyph_cached == true )
+					call             := DrawCall_Default
+					call.pass         = .Target
+					call.colour       = ctx.colour
+					call.start_index  = u32(len(ctx.draw_list.indices))
+
+					blit_quad(&ctx.draw_list, 
+						dst,           dst           + dst_scale,
+						slot_position, slot_position + glyph_scale )
+
+					call.end_index = u32(len(ctx.draw_list.indices))
+					append(&ctx.draw_list.calls, call)
+			}
 	}
 }
 
@@ -594,7 +606,6 @@ draw_text_shape :: proc( ctx : ^Context,
 	snap_width, snap_height : f32
 ) -> (cursor_pos : Vec2)
 {
-	// position := position //+ ctx.cursor_pos * scale
 	// profile(#procedure)
 	batch_start_idx : i32 = 0
 	for index : i32 = 0; index < cast(i32) len(shaped.glyphs); index += 1
@@ -607,9 +618,9 @@ draw_text_shape :: proc( ctx : ^Context,
 		atlas_index                      := cast(i32) -1
 
 		if region_kind != .E do atlas_index = LRU_get( & region.state, lru_code )
-		if can_batch_glyph( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue
+		if check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue
 
-		// Glyph has not been catched, needs to be directly drawn.
+		// We can no longer directly append the shape as it has missing glyphs in the atlas
 
 		// First batch the other cached glyphs
 		// flush_glyph_buffer_to_atlas(ctx)
@@ -621,10 +632,10 @@ draw_text_shape :: proc( ctx : ^Context,
 		batch_start_idx = index
 	}
 
-	// flush_glyph_buffer_to_atlas(ctx)
 	draw_text_batch( ctx, entry, shaped, batch_start_idx, cast(i32) len(shaped.glyphs), position, scale, snap_width , snap_height )
 	reset_batch_codepoint_state( ctx )
-	cursor_pos = shaped.end_cursor_pos
+
+	cursor_pos = position + shaped.end_cursor_pos * scale
 	return
 }
 
@@ -650,6 +661,34 @@ flush_glyph_buffer_to_atlas :: proc( ctx : ^Context )
 	}
 }
 
+// flush_glyph_buffer_to_atlas :: proc( ctx : ^Context )
+// {
+// 	// profile(#procedure)
+// 	// Flush drawcalls to draw list
+// 	if len(ctx.glyph_buffer.clear_draw_list.calls) > 0 {
+// 		merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.clear_draw_list)
+// 		clear_draw_list( & ctx.glyph_buffer.clear_draw_list)
+// 	}
+
+// 	if len(ctx.glyph_buffer.draw_list.calls) > 0 {
+// 		merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.draw_list)
+// 		clear_draw_list( & ctx.glyph_buffer.draw_list)
+// 	}
+
+// 	// Clear glyph_update_FBO
+// 	if ctx.glyph_buffer.batch_x != 0
+// 	{
+// 			call := DrawCall {
+// 				pass              = .Glyph,
+// 				start_index       = 0,
+// 				end_index         = 0,
+// 				clear_before_draw = true,
+// 			}
+// 			append( & ctx.draw_list.calls, call)
+// 			ctx.glyph_buffer.batch_x = 0
+// 	}
+// }
+
 // ve_fontcache_merge_drawlist
 merge_draw_list :: proc( dst, src : ^DrawList )
 {
@@ -677,42 +716,37 @@ merge_draw_list :: proc( dst, src : ^DrawList )
 	}
 }
 
-optimize_draw_list :: proc( draw_list : ^DrawList, call_offset : int )
-{
+optimize_draw_list :: proc(draw_list: ^DrawList, call_offset: int) {
 	// profile(#procedure)
-	assert( draw_list != nil )
+	assert(draw_list != nil)
 
-	write_index : int = call_offset
-	for index : int = 1 + call_offset; index < len(draw_list.calls); index += 1
+	can_merge_draw_calls :: #force_inline proc "contextless" ( a, b : ^DrawCall ) -> bool {
+		result := \
+		a.pass      == b.pass        &&
+		a.end_index == b.start_index &&
+		a.region    == b.region      &&
+		a.colour    == b.colour      &&
+		! b.clear_before_draw
+		return result
+	}
+
+	write_index := call_offset
+	for read_index := call_offset + 1; read_index < len(draw_list.calls); read_index += 1
 	{
-		assert( write_index <= index )
-		draw_0 := & draw_list.calls[ write_index ]
-		draw_1 := & draw_list.calls[ index ]
+		draw_current := & draw_list.calls[write_index]
+		draw_next    := & draw_list.calls[read_index]
 
-		merge : b32 = true
-		if draw_0.pass      != draw_1.pass        do merge = false
-		if draw_0.end_index != draw_1.start_index do merge = false
-		if draw_0.region    != draw_1.region      do merge = false
-		if draw_1.clear_before_draw               do merge = false
-		if draw_0.colour    != draw_1.colour      do merge = false
-
-		if merge
-		{
-			// logf("merging %v : %v %v", draw_0.pass, write_index, index )
-			draw_0.end_index   = draw_1.end_index
-			draw_1.start_index = 0
-			draw_1.end_index   = 0
+		if can_merge_draw_calls(draw_current, draw_next) {
+			draw_current.end_index = draw_next.end_index
 		}
-		else
-		{
-			// logf("can't merge %v : %v %v", draw_0.pass, write_index, index )
+		else {
+			// Move to the next write position and copy the draw call
 			write_index += 1
-			if write_index != index {
-				draw_2 := & draw_list.calls[ write_index ]
-				draw_2^ = draw_1^
+			if write_index != read_index {
+				draw_list.calls[write_index] = (draw_next^)
 			}
 		}
 	}
 
-	resize( & draw_list.calls, write_index + 1 )
+	resize( & draw_list.calls, write_index + 1)
 }
diff --git a/mappings.odin b/mappings.odin
index db538fa..e575c15 100644
--- a/mappings.odin
+++ b/mappings.odin
@@ -23,10 +23,10 @@ import "core:mem"
 	Arena           :: mem.Arena
 	arena_allocator :: mem.arena_allocator
 	arena_init      :: mem.arena_init
-// import "codebase:grime"
-	// log                :: grime.log
-	// logf               :: grime.logf
-	// profile            :: grime.profile
+import "codebase:grime"
+	log                :: grime.log
+	logf               :: grime.logf
+	profile            :: grime.profile
 
 //#region("Proc overload mappings")
 
diff --git a/misc.odin b/misc.odin
index a27b04c..cba3de4 100644
--- a/misc.odin
+++ b/misc.odin
@@ -1,7 +1,10 @@
 package VEFontCache
 
 import "base:runtime"
-import core_log "core:log"
+import "core:simd"
+import "core:math"
+
+// import core_log "core:log"
 
 Colour  :: [4]f32
 Vec2    :: [2]f32
@@ -17,23 +20,23 @@ vec2i_from_vec2   :: #force_inline proc "contextless" ( v2     : Vec2  ) -> Vec2
 
 // This buffer is used below excluisvely to prevent any allocator recusion when verbose logging from allocators.
 // This means a single line is limited to 32k buffer (increase naturally if this SOMEHOW becomes a bottleneck...)
-Logger_Allocator_Buffer : [32 * Kilobyte]u8
+// Logger_Allocator_Buffer : [32 * Kilobyte]u8
 
-log :: proc( msg : string, level := core_log.Level.Info, loc := #caller_location ) {
-	temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:])
-	context.allocator      = arena_allocator(& temp_arena)
-	context.temp_allocator = arena_allocator(& temp_arena)
+// log :: proc( msg : string, level := core_log.Level.Info, loc := #caller_location ) {
+// 	temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:])
+// 	context.allocator      = arena_allocator(& temp_arena)
+// 	context.temp_allocator = arena_allocator(& temp_arena)
 
-	core_log.log( level, msg, location = loc )
-}
+// 	core_log.log( level, msg, location = loc )
+// }
 
-logf :: proc( fmt : string, args : ..any,  level := core_log.Level.Info, loc := #caller_location  ) {
-	temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:])
-	context.allocator      = arena_allocator(& temp_arena)
-	context.temp_allocator = arena_allocator(& temp_arena)
+// logf :: proc( fmt : string, args : ..any,  level := core_log.Level.Info, loc := #caller_location  ) {
+// 	temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:])
+// 	context.allocator      = arena_allocator(& temp_arena)
+// 	context.temp_allocator = arena_allocator(& temp_arena)
 
-	core_log.logf( level, fmt, ..args, location = loc )
-}
+// 	core_log.logf( level, fmt, ..args, location = loc )
+// }
 
 reload_array :: proc( self : ^[dynamic]$Type, allocator : Allocator ) {
 	raw          := transmute( ^runtime.Raw_Dynamic_Array) self
@@ -50,61 +53,6 @@ font_glyph_lru_code :: #force_inline proc "contextless" ( font : FontID, glyph_i
 	return
 }
 
-shape_lru_hash :: #force_inline proc "contextless" ( label : string ) -> u64 {
-	hash : u64
-	for str_byte in transmute([]byte) label {
-		hash = ((hash << 8) + hash) + u64(str_byte)
-	}
-	return hash
-}
-
-// For a provided alpha value,
-// allows the function to calculate the position of a point along the curve at any given fraction of its total length
-// ve_fontcache_eval_bezier (quadratic)
-eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2
-{
-	p0    := vec2_64(p0)
-	p1    := vec2_64(p1)
-	p2    := vec2_64(p2)
-	alpha := f64(alpha)
-
-	weight_start   := (1 - alpha) * (1 - alpha)
-	weight_control := 2.0 * (1 - alpha) * alpha
-	weight_end     := alpha * alpha
-
-	starting_point := p0 * weight_start
-	control_point  := p1 * weight_control
-	end_point      := p2 * weight_end
-
-	point := starting_point + control_point + end_point
-	return { f32(point.x), f32(point.y) }
-}
-
-// For a provided alpha value,
-// allows the function to calculate the position of a point along the curve at any given fraction of its total length
-// ve_fontcache_eval_bezier (cubic)
-eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2
-{
-	p0    := vec2_64(p0)
-	p1    := vec2_64(p1)
-	p2    := vec2_64(p2)
-	p3    := vec2_64(p3)
-	alpha := f64(alpha)
-
-	weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha)
-	weight_c_a   := 3 * (1 - alpha) * (1 - alpha) * alpha
-	weight_c_b   := 3 * (1 - alpha) * alpha * alpha
-	weight_end   := alpha * alpha * alpha
-
-	start_point := p0 * weight_start
-	control_a   := p1 * weight_c_a
-	control_b   := p2 * weight_c_b
-	end_point   := p3 * weight_end
-
-	point := start_point + control_a + control_b + end_point
-	return { f32(point.x), f32(point.y) }
-}
-
 is_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32
 {
 	if glyph_index == 0 do return true
@@ -122,8 +70,9 @@ reset_batch_codepoint_state :: #force_inline proc( ctx : ^Context ) {
 	ctx.temp_codepoint_seen_num = 0
 }
 
-screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) {
-	when true
+screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 )
+{
+	if true
 	{
 		pos_64   := vec2_64_from_vec2(position^)
 		scale_64 := vec2_64_from_vec2(scale^)
@@ -137,14 +86,21 @@ screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2
 	}
 	else
 	{
+		pos      := position^
+		scale_32 := scale^
+
 		quotient : Vec2 = 1.0 / size
-		(position^) *= quotient * 2.0 - 1.0
-		(scale^)    *= quotient * 2.0
+		pos       = pos   * quotient * 2.0 - 1.0
+		scale_32  = scale_32 * quotient * 2.0
+
+		(position^) = pos
+		(scale^)    = scale_32
 	}
 }
 
-textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) {
-	when true
+textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 )
+{
+	if true
 	{
 		pos_64   := vec2_64_from_vec2(position^)
 		scale_64 := vec2_64_from_vec2(scale^)
@@ -158,8 +114,175 @@ textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2,
 	}
 	else
 	{
-		quotient    : Vec2 = 1.0 / size
+		quotient : Vec2 = 1.0 / size
 		(position^) *= quotient
 		(scale^)    *= quotient
 	}
 }
+
+Use_SIMD_For_Bezier_Ops :: true
+
+when ! Use_SIMD_For_Bezier_Ops
+{
+	// For a provided alpha value,
+	// allows the function to calculate the position of a point along the curve at any given fraction of its total length
+	// ve_fontcache_eval_bezier (quadratic)
+	eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2
+	{
+		p0    := vec2_64(p0)
+		p1    := vec2_64(p1)
+		p2    := vec2_64(p2)
+		alpha := f64(alpha)
+
+		weight_start   := (1 - alpha) * (1 - alpha)
+		weight_control := 2.0 * (1 - alpha) * alpha
+		weight_end     := alpha * alpha
+
+		starting_point := p0 * weight_start
+		control_point  := p1 * weight_control
+		end_point      := p2 * weight_end
+
+		point := starting_point + control_point + end_point
+		return { f32(point.x), f32(point.y) }
+	}
+
+	// For a provided alpha value,
+	// allows the function to calculate the position of a point along the curve at any given fraction of its total length
+	// ve_fontcache_eval_bezier (cubic)
+	eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2
+	{
+		p0    := vec2_64(p0)
+		p1    := vec2_64(p1)
+		p2    := vec2_64(p2)
+		p3    := vec2_64(p3)
+		alpha := f64(alpha)
+
+		weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha)
+		weight_c_a   := 3 * (1 - alpha) * (1 - alpha) * alpha
+		weight_c_b   := 3 * (1 - alpha) * alpha * alpha
+		weight_end   := alpha * alpha * alpha
+
+		start_point := p0 * weight_start
+		control_a   := p1 * weight_c_a
+		control_b   := p2 * weight_c_b
+		end_point   := p3 * weight_end
+
+		point := start_point + control_a + control_b + end_point
+		return { f32(point.x), f32(point.y) }
+	}
+}
+else
+{
+	Vec2_SIMD :: simd.f32x4
+
+	vec2_to_simd :: #force_inline proc "contextless" (v: Vec2) -> Vec2_SIMD {
+		return Vec2_SIMD{v.x, v.y, 0, 0}
+	}
+
+	simd_to_vec2 :: #force_inline proc "contextless" (v: Vec2_SIMD) -> Vec2 {
+		return Vec2{ simd.extract(v, 0), simd.extract(v, 1) }
+	}
+
+	vec2_add_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 {
+		simd_a := vec2_to_simd(a)
+		simd_b := vec2_to_simd(b)
+		result := simd.add(simd_a, simd_b)
+		return simd_to_vec2(result)
+	}
+
+	vec2_sub_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 {
+		simd_a := vec2_to_simd(a)
+		simd_b := vec2_to_simd(b)
+		result := simd.sub(simd_a, simd_b)
+		return simd_to_vec2(result)
+	}
+
+	vec2_mul_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 {
+		simd_a := vec2_to_simd(a)
+		simd_s := Vec2_SIMD{s, s, s, s}
+		result := simd.mul(simd_a, simd_s)
+		return simd_to_vec2(result)
+	}
+
+	vec2_div_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 {
+		simd_a := vec2_to_simd(a)
+		simd_s := Vec2_SIMD{s, s, s, s}
+		result := simd.div(simd_a, simd_s)
+		return simd_to_vec2(result)
+	}
+
+	vec2_dot_simd :: #force_inline proc "contextless" (a, b: Vec2) -> f32 {
+		simd_a := vec2_to_simd(a)
+		simd_b := vec2_to_simd(b)
+		result := simd.mul(simd_a, simd_b)
+		return simd.reduce_add_ordered(result)
+	}
+
+	vec2_length_sqr_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 {
+		return vec2_dot_simd(a, a)
+	}
+
+	vec2_length_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 {
+		return math.sqrt(vec2_length_sqr_simd(a))
+	}
+
+	vec2_normalize_simd :: #force_inline proc "contextless" (a: Vec2) -> Vec2 {
+		len := vec2_length_simd(a)
+		if len > 0 {
+			inv_len := 1.0 / len
+			return vec2_mul_simd(a, inv_len)
+		}
+		return a
+	}
+
+	// SIMD-optimized version of eval_point_on_bezier3
+	eval_point_on_bezier3 :: #force_inline proc "contextless" (p0, p1, p2: Vec2, alpha: f32) -> Vec2
+	{
+		simd_p0 := vec2_to_simd(p0)
+		simd_p1 := vec2_to_simd(p1)
+		simd_p2 := vec2_to_simd(p2)
+
+		one_minus_alpha := 1.0 - alpha
+		weight_start    := one_minus_alpha * one_minus_alpha
+		weight_control  := 2.0 * one_minus_alpha * alpha
+		weight_end      := alpha * alpha
+
+		simd_weights := Vec2_SIMD{weight_start, weight_control, weight_end, 0}
+		result := simd.add(
+			simd.add(
+				simd.mul( simd_p0, simd.swizzle( simd_weights, 0, 0, 0, 0) ),
+				simd.mul( simd_p1, simd.swizzle( simd_weights, 1, 1, 1, 1) )
+			),
+			simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) )
+		)
+
+		return simd_to_vec2(result)
+	}
+
+	eval_point_on_bezier4 :: #force_inline proc "contextless" (p0, p1, p2, p3: Vec2, alpha: f32) -> Vec2
+	{
+		simd_p0 := vec2_to_simd(p0)
+		simd_p1 := vec2_to_simd(p1)
+		simd_p2 := vec2_to_simd(p2)
+		simd_p3 := vec2_to_simd(p3)
+
+		one_minus_alpha := 1.0 - alpha
+		weight_start    := one_minus_alpha * one_minus_alpha * one_minus_alpha
+		weight_c_a      := 3 * one_minus_alpha * one_minus_alpha * alpha
+		weight_c_b      := 3 * one_minus_alpha * alpha * alpha
+		weight_end      := alpha * alpha * alpha
+
+		simd_weights := Vec2_SIMD { weight_start, weight_c_a, weight_c_b, weight_end }
+		result      := simd.add(
+			simd.add(
+				simd.mul( simd_p0, simd.swizzle(simd_weights, 0, 0, 0, 0) ),
+				simd.mul( simd_p1, simd.swizzle(simd_weights, 1, 1, 1, 1) )
+			),
+			simd.add(
+				simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) ),
+				simd.mul( simd_p3, simd.swizzle(simd_weights, 3, 3, 3, 3) )
+			)
+		)
+		return simd_to_vec2(result)
+	}
+}
diff --git a/shaped_text.odin b/shaped_text.odin
index 510ed30..1ac55c2 100644
--- a/shaped_text.odin
+++ b/shaped_text.odin
@@ -1,11 +1,10 @@
 package VEFontCache
 
-import "core:math"
-
 ShapedText :: struct {
 	glyphs         : [dynamic]Glyph,
 	positions      : [dynamic]Vec2,
 	end_cursor_pos : Vec2,
+	size           : Vec2,
 }
 
 ShapedTextCache :: struct {
@@ -14,36 +13,33 @@ ShapedTextCache :: struct {
 	next_cache_id : i32,
 }
 
+shape_lru_hash :: #force_inline proc "contextless" ( hash : ^u64, bytes : []byte ) {
+	for value in bytes {
+		(hash^) = (( (hash^) << 8) + (hash^) ) + u64(value)
+	}
+}
+
 shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, entry : ^Entry ) -> ^ShapedText
 {
 	// profile(#procedure)
-	@static buffer : [64 * Kilobyte]byte
+	font        := font
+	font_bytes  := slice_ptr( transmute(^byte) & font,  size_of(FontID) )
+	text_bytes  := transmute( []byte) text_utf8
 
-	font            := font
-	text_size       := len(text_utf8)
-	sice_end_offset := size_of(FontID) + len(text_utf8)
-
-	buffer_slice := buffer[:]
-	font_bytes   := slice_ptr( transmute(^byte) & font, size_of(FontID) )
-	copy( buffer_slice, font_bytes )
-
-	text_bytes             := transmute( []byte) text_utf8
-	buffer_slice_post_font := buffer[ size_of(FontID) : sice_end_offset ]
-	copy( buffer_slice_post_font, text_bytes )
-
-	hash := shape_lru_hash( transmute(string) buffer[: sice_end_offset ] )
+	lru_code : u64
+	shape_lru_hash( & lru_code, font_bytes )
+	shape_lru_hash( & lru_code, text_bytes )
 
 	shape_cache := & ctx.shape_cache
 	state       := & ctx.shape_cache.state
 
-	shape_cache_idx := LRU_get( state, hash )
+	shape_cache_idx := LRU_get( state, lru_code )
 	if shape_cache_idx == -1
 	{
 		if shape_cache.next_cache_id < i32(state.capacity) {
 			shape_cache_idx            = shape_cache.next_cache_id
 			shape_cache.next_cache_id += 1
-			evicted := LRU_put( state, hash, shape_cache_idx )
-			assert( evicted == hash )
+			evicted := LRU_put( state, lru_code, shape_cache_idx )
 		}
 		else
 		{
@@ -53,16 +49,16 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, en
 			shape_cache_idx = LRU_peek( state, next_evict_idx, must_find = true )
 			assert( shape_cache_idx != - 1 )
 
-			LRU_put( state, hash, shape_cache_idx )
+			LRU_put( state, lru_code, shape_cache_idx )
 		}
 
-		shape_text_uncached( ctx, font, text_utf8, entry, & shape_cache.storage[ shape_cache_idx ] )
+		shape_entry := & shape_cache.storage[ shape_cache_idx ]
+		shape_text_uncached( ctx, font, text_utf8, entry, shape_entry )
 	}
 
 	return & shape_cache.storage[ shape_cache_idx ]
 }
 
-// TODO(Ed): Make position rounding an option
 shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, entry : ^Entry, output : ^ShapedText )
 {
 	// profile(#procedure)
@@ -74,12 +70,17 @@ shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string,
 	clear( & output.glyphs )
 	clear( & output.positions )
 
-	ascent, descent, line_gap := parser_get_font_vertical_metrics( & entry.parser_info )
+	ascent_i32, descent_i32, line_gap_i32 := parser_get_font_vertical_metrics( & entry.parser_info )
+	ascent      := f32(ascent_i32)
+	descent     := f32(descent_i32)
+	line_gap    := f32(line_gap_i32)
+	line_height := (ascent - descent + line_gap) * entry.size_scale
 
 	if use_full_text_shape
 	{
 		// assert( entry.shaper_info != nil )
-		shaper_shape_from_text( & ctx.shaper_ctx, & entry.shaper_info, output, text_utf8, ascent, descent, line_gap, entry.size, entry.size_scale )
+		shaper_shape_from_text( & ctx.shaper_ctx, & entry.shaper_info, output, text_utf8, ascent_i32, descent_i32, line_gap_i32, entry.size, entry.size_scale )
+		// TODO(Ed): Need to be able to provide the text height as well
 		return
 	}
 	else
@@ -87,13 +88,10 @@ shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string,
 		// Note(Original Author):
 		// We use our own fallback dumbass text shaping.
 		// WARNING: PLEASE USE HARFBUZZ. GOOD TEXT SHAPING IS IMPORTANT FOR INTERNATIONALISATION.
-		ascent   := f32(ascent)
-		descent  := f32(descent)
-		line_gap := f32(line_gap)
 
-		position           : Vec2
-		advance            : i32 = 0
-		to_left_side_glyph : i32 = 0
+		line_count     : int = 1
+		max_line_width : f32 = 0
+		position       : Vec2
 
 		prev_codepoint : rune
 		for codepoint in text_utf8
@@ -104,29 +102,34 @@ shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string,
 			}
 			if codepoint == '\n'
 			{
-				position.x  = 0.0
-				position.y -= (ascent - descent + line_gap) * entry.size_scale
-				position.y  = ceil(position.y)
+				line_count    += 1
+				max_line_width = max(max_line_width, position.x)
+				position.x     = 0.0
+				position.y    -= line_height
+				position.y     = ceil(position.y)
 				prev_codepoint = rune(0)
 				continue
 			}
 			if abs( entry.size ) <= Advance_Snap_Smallfont_Size {
-				position.x = math.ceil( position.x )
+				position.x = ceil( position.x )
 			}
 
 			append( & output.glyphs, parser_find_glyph_index( & entry.parser_info, codepoint ))
-			advance, to_left_side_glyph = parser_get_codepoint_horizontal_metrics( & entry.parser_info, codepoint )
+			advance, _ := parser_get_codepoint_horizontal_metrics( & entry.parser_info, codepoint )
 
 			append( & output.positions, Vec2 {
 				ceil(position.x),
 				position.y
 			})
-			// append( & output.positions, position )
 
 			position.x    += f32(advance) * entry.size_scale
 			prev_codepoint = codepoint
 		}
 
 		output.end_cursor_pos = position
+		max_line_width        = max(max_line_width, position.x)
+
+		output.size.x = max_line_width
+		output.size.y = f32(line_count) * line_height
 	}
 }