From 6fac2a97ff83819a780858f7c451331c09e5cdf0 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 25 Jun 2024 17:10:18 -0400 Subject: [PATCH] Swapped LRU and temp_codepoint_seen to use odin's vendor hash map Starting to get serious about profiling this procedure and optimizing performance. I also want to make it more ideomatic now... --- code/font/VEFontCache/LRU.odin | 41 +++++++++++++------------- code/font/VEFontCache/VEFontCache.odin | 11 ++++--- code/font/VEFontCache/atlas.odin | 9 +++--- code/font/VEFontCache/draw.odin | 23 ++++++++++----- code/font/VEFontCache/mappings.odin | 5 ++++ code/font/VEFontCache/misc.odin | 21 +++++++------ code/font/VEFontCache/parser.odin | 3 -- code/font/VEFontCache/shaped_text.odin | 2 ++ code/font/VEFontCache/shaper.odin | 1 + 9 files changed, 67 insertions(+), 49 deletions(-) diff --git a/code/font/VEFontCache/LRU.odin b/code/font/VEFontCache/LRU.odin index b631779..e9a014a 100644 --- a/code/font/VEFontCache/LRU.odin +++ b/code/font/VEFontCache/LRU.odin @@ -146,14 +146,14 @@ LRU_Link :: struct { LRU_Cache :: struct { capacity : u32, num : u32, - table : HMapChained(LRU_Link), + table : map[u64]LRU_Link, key_queue : PoolList, } LRU_init :: proc( cache : ^LRU_Cache, capacity : u32, dbg_name : string = "" ) { error : AllocatorError cache.capacity = capacity - cache.table, error = make( HMapChained(LRU_Link), hmap_closest_prime( uint(capacity)) ) + cache.table, error = make( map[u64]LRU_Link, hmap_closest_prime( uint(capacity)) ) assert( error == .None, "VEFontCache.LRU_init : Failed to allocate cache's table") pool_list_init( & cache.key_queue, capacity, dbg_name = dbg_name ) @@ -166,7 +166,7 @@ LRU_free :: proc( cache : ^LRU_Cache ) LRU_reload :: proc( cache : ^LRU_Cache, allocator : Allocator ) { - hmap_chained_reload( cache.table, allocator ) + reload_map( & cache.table, allocator ) pool_list_reload( & cache.key_queue, allocator ) } @@ -177,19 +177,21 @@ LRU_hash_key :: #force_inline proc( key : u64 ) -> ( hash : u64 ) { return } -LRU_find :: proc( cache : ^LRU_Cache, key : u64, must_find := false ) -> ^LRU_Link { - hash := LRU_hash_key( key ) - link := get( cache.table, hash ) +LRU_find :: proc( cache : ^LRU_Cache, key : u64, must_find := false ) -> (LRU_Link, bool) { + // hash := LRU_hash_key( key ) + // link := get( cache.table, hash ) // if link == nil && must_find { // runtime.debug_trap() // link = get( cache.table, hash ) // } - return link + + link, success := cache.table[key] + return link, success } LRU_get :: proc( cache : ^LRU_Cache, key : u64 ) -> i32 { - iter := LRU_find( cache, key ) - if iter == nil { + iter, success := LRU_find( cache, key ) + if success == false { return -1 } LRU_refresh( cache, key ) @@ -206,8 +208,8 @@ LRU_get_next_evicted :: proc( cache : ^LRU_Cache ) -> u64 } LRU_peek :: proc( cache : ^LRU_Cache, key : u64, must_find := false ) -> i32 { - iter := LRU_find( cache, key, must_find ) - if iter == nil { + iter, success := LRU_find( cache, key, must_find ) + if success == false { return -1 } return iter.value @@ -215,9 +217,9 @@ LRU_peek :: proc( cache : ^LRU_Cache, key : u64, must_find := false ) -> i32 { LRU_put :: proc( cache : ^LRU_Cache, key : u64, value : i32 ) -> u64 { - hash_key := LRU_hash_key( key ) - iter := get( cache.table, hash_key ) - if iter != nil { + // hash_key := LRU_hash_key( key ) + iter, success := cache.table[key] + if success { LRU_refresh( cache, key ) iter.value = value return key @@ -227,11 +229,11 @@ LRU_put :: proc( cache : ^LRU_Cache, key : u64, value : i32 ) -> u64 if cache.key_queue.size >= cache.capacity { evict = pool_list_pop_back( & cache.key_queue ) - evict_hash := LRU_hash_key( evict ) + // evict_hash := LRU_hash_key( evict ) // if cache.table.dbg_name != "" { // logf("%v: Evicted %v with hash: %v", cache.table.dbg_name, evict, evict_hash) // } - hmap_chained_remove( cache.table, evict_hash ) + delete_key( & cache.table, evict ) cache.num -= 1 } @@ -240,17 +242,16 @@ LRU_put :: proc( cache : ^LRU_Cache, key : u64, value : i32 ) -> u64 // logf("%v: Pushed %v with hash: %v", cache.table.dbg_name, key, hash_key ) // } - set( cache.table, hash_key, LRU_Link { + cache.table[key] = LRU_Link { value = value, ptr = cache.key_queue.front - }) - + } cache.num += 1 return evict } LRU_refresh :: proc( cache : ^LRU_Cache, key : u64 ) { - link := LRU_find( cache, key ) + link, success := LRU_find( cache, key ) // if cache.table.dbg_name != "" { // logf("%v: Refreshed %v", cache.table.dbg_name, key) // } diff --git a/code/font/VEFontCache/VEFontCache.odin b/code/font/VEFontCache/VEFontCache.odin index 7e8b08c..8506922 100644 --- a/code/font/VEFontCache/VEFontCache.odin +++ b/code/font/VEFontCache/VEFontCache.odin @@ -59,7 +59,7 @@ Context :: struct { entries : Array(Entry), temp_path : Array(Vec2), - temp_codepoint_seen : HMapZPL(bool), + temp_codepoint_seen : map[u64]bool, temp_codepoint_seen_num : u32, snap_width : u32, @@ -152,7 +152,7 @@ init :: proc( ctx : ^Context, parser_kind : ParserKind, curve_quality : u32 = 12, entires_reserve : u32 = Kilobyte, temp_path_reserve : u32 = Kilobyte, - temp_codepoint_seen_reserve : u32 = 512, + temp_codepoint_seen_reserve : u32 = 1024, ) { assert( ctx != nil, "Must provide a valid context" ) @@ -173,7 +173,7 @@ init :: proc( ctx : ^Context, parser_kind : ParserKind, temp_path, error = make( Array(Vec2), u64(temp_path_reserve) ) assert(error == .None, "VEFontCache.init : Failed to allocate temp_path") - temp_codepoint_seen, error = make( HMapZPL(bool), u64(hmap_closest_prime( uint(temp_codepoint_seen_reserve))) ) + temp_codepoint_seen, error = make( map[u64]bool )//, hmap_closest_prime( uint(temp_codepoint_seen_reserve)) ) assert(error == .None, "VEFontCache.init : Failed to allocate temp_path") draw_list.vertices, error = make( Array(Vertex), 4 * Kilobyte ) @@ -279,7 +279,7 @@ hot_reload :: proc( ctx : ^Context, allocator : Allocator ) entries.backing = allocator temp_path.backing = allocator - hmap_zpl_reload( & ctx.temp_codepoint_seen, allocator ) + reload_map( & ctx.temp_codepoint_seen, allocator ) draw_list.vertices.backing = allocator draw_list.indices.backing = allocator @@ -397,6 +397,7 @@ unload_font :: proc( ctx : ^Context, font : FontID ) cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, scale, translate : Vec2 ) -> b32 { + profile(#procedure) assert( ctx != nil ) assert( font >= 0 && u64(font) < ctx.entries.num ) entry := & ctx.entries.data[ font ] @@ -519,6 +520,7 @@ cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, scale, cache_glyph_to_atlas :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph ) { + profile(#procedure) assert( ctx != nil ) assert( font >= 0 && font < FontID(ctx.entries.num) ) entry := & ctx.entries.data[ font ] @@ -650,6 +652,7 @@ is_empty :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32 measure_text_size :: proc( ctx : ^Context, font : FontID, text_utf8 : string ) -> (measured : Vec2) { + profile(#procedure) assert( ctx != nil ) assert( font >= 0 && font < FontID(ctx.entries.num) ) diff --git a/code/font/VEFontCache/atlas.odin b/code/font/VEFontCache/atlas.odin index 6adf8f5..0d6cc4b 100644 --- a/code/font/VEFontCache/atlas.odin +++ b/code/font/VEFontCache/atlas.odin @@ -90,6 +90,7 @@ atlas_bbox :: proc( atlas : ^Atlas, region : AtlasRegionKind, local_idx : i32 ) can_batch_glyph :: proc( ctx : ^Context, font : FontID, entry : ^Entry, glyph_index : Glyph ) -> b32 { + profile(#procedure) assert( ctx != nil ) assert( entry.id == font ) @@ -111,10 +112,10 @@ can_batch_glyph :: proc( ctx : ^Context, font : FontID, entry : ^Entry, glyph_in if region.next_idx > u32( region.state.capacity) { // We will evict LRU. We must predict which LRU will get evicted, and if it's something we've seen then we need to take slowpath and flush batch. next_evict_codepoint := LRU_get_next_evicted( & region.state ) - seen := get( & ctx.temp_codepoint_seen, next_evict_codepoint ) - assert(seen != nil) + seen, success := ctx.temp_codepoint_seen[next_evict_codepoint] + assert(success != false) - if (seen^) { + if (seen) { return false } } @@ -123,7 +124,7 @@ can_batch_glyph :: proc( ctx : ^Context, font : FontID, entry : ^Entry, glyph_in } assert( LRU_get( & region.state, lru_code ) != -1 ) - set( & ctx.temp_codepoint_seen, lru_code, true ) + ctx.temp_codepoint_seen[lru_code] = true ctx.temp_codepoint_seen_num += 1 return true } diff --git a/code/font/VEFontCache/draw.odin b/code/font/VEFontCache/draw.odin index b1a613c..0b1bc34 100644 --- a/code/font/VEFontCache/draw.odin +++ b/code/font/VEFontCache/draw.odin @@ -48,6 +48,7 @@ GlyphDrawBuffer :: struct { blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1}, uv0 : Vec2 = {0, 0}, uv1 : Vec2 = {1, 1} ) { + profile(#procedure) // logf("Blitting: xy0: %0.2f, %0.2f xy1: %0.2f, %0.2f uv0: %0.2f, %0.2f uv1: %0.2f, %0.2f", // p0.x, p0.y, p1.x, p1.y, uv0.x, uv0.y, uv1.x, uv1.y); v_offset := cast(u32) draw_list.vertices.num @@ -97,6 +98,7 @@ clear_draw_list :: proc( draw_list : ^DrawList ) { directly_draw_massive_glyph :: proc( ctx : ^Context, entry : ^Entry, glyph : Glyph, bounds_0 : Vec2i, bounds_width, bounds_height : i32, over_sample, position, scale : Vec2 ) { + profile(#procedure) flush_glyph_buffer_to_atlas( ctx ) // Draw un-antialiased glyph to update FBO. @@ -154,6 +156,7 @@ directly_draw_massive_glyph :: proc( ctx : ^Context, entry : ^Entry, glyph : Gly draw_cached_glyph :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph, position, scale : Vec2 ) -> b32 { + profile(#procedure) // Glyph not in current font if glyph_index == 0 do return true if parser_is_glyph_empty( & entry.parser_info, glyph_index ) do return true @@ -283,6 +286,7 @@ draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : [] // From there we should maek a 'draw text shape' that breaks up the batch text draws for each of the shapes. draw_text :: proc( ctx : ^Context, font : FontID, text_utf8 : string, position : Vec2, scale : Vec2 ) -> b32 { + profile(#procedure) assert( ctx != nil ) assert( font >= 0 && font < FontID(ctx.entries.num) ) @@ -411,6 +415,7 @@ draw_text_batch :: proc( ctx : ^Context, entry : ^Entry, shaped : ^ShapedText, b flush_glyph_buffer_to_atlas( ctx ) for index := batch_start_idx; index < batch_end_idx; index += 1 { + profile(#procedure) glyph_index := shaped.glyphs.data[ index ] shaped_position := shaped.positions.data[index] glyph_translate := position + shaped_position * scale @@ -423,6 +428,7 @@ draw_text_batch :: proc( ctx : ^Context, entry : ^Entry, shaped : ^ShapedText, b // Helper for draw_text, all raw text content should be confirmed to be either formatting or visible shapes before getting cached. draw_text_shape :: proc( ctx : ^Context, font : FontID, entry : ^Entry, shaped : ^ShapedText, position, scale : Vec2, snap_width, snap_height : f32 ) -> (cursor_pos : Vec2) { + profile(#procedure) batch_start_idx : i32 = 0 for index : i32 = 0; index < i32(shaped.glyphs.num); index += 1 { @@ -437,7 +443,7 @@ draw_text_shape :: proc( ctx : ^Context, font : FontID, entry : ^Entry, shaped : cache_glyph_to_atlas( ctx, font, glyph_index ) lru_code := font_glyph_lru_code(font, glyph_index) - set( & ctx.temp_codepoint_seen, lru_code, true ) + ctx.temp_codepoint_seen[lru_code] = true ctx.temp_codepoint_seen_num += 1 batch_start_idx = index @@ -457,6 +463,7 @@ flush_draw_list :: proc( ctx : ^Context ) { flush_glyph_buffer_to_atlas :: proc( ctx : ^Context ) { + profile(#procedure) // Flush drawcalls to draw list merge_draw_list( & ctx.draw_list, & ctx.atlas.clear_draw_list ) merge_draw_list( & ctx.draw_list, & ctx.atlas.draw_list) @@ -491,15 +498,16 @@ flush_layer :: proc( draw_list : ^DrawList ) {} // ve_fontcache_merge_drawlist merge_draw_list :: proc( dst, src : ^DrawList ) { + profile(#procedure) error : AllocatorError v_offset := cast(u32) dst.vertices.num - for index : u32 = 0; index < cast(u32) src.vertices.num; index += 1 { - error = append( & dst.vertices, src.vertices.data[index] ) - assert( error == .None ) - } - // error = append( & dst.vertices, src.vertices ) - // assert( error == .None ) + // for index : u32 = 0; index < cast(u32) src.vertices.num; index += 1 { + // error = append( & dst.vertices, src.vertices.data[index] ) + // assert( error == .None ) + // } + error = append( & dst.vertices, src.vertices ) + assert( error == .None ) i_offset := cast(u32) dst.indices.num for index : u32 = 0; index < cast(u32) src.indices.num; index += 1 { @@ -518,6 +526,7 @@ merge_draw_list :: proc( dst, src : ^DrawList ) optimize_draw_list :: proc( draw_list : ^DrawList, call_offset : u64 ) { + profile(#procedure) assert( draw_list != nil ) calls := array_to_slice(draw_list.calls) diff --git a/code/font/VEFontCache/mappings.odin b/code/font/VEFontCache/mappings.odin index cefcfa5..c7fb414 100644 --- a/code/font/VEFontCache/mappings.odin +++ b/code/font/VEFontCache/mappings.odin @@ -62,6 +62,8 @@ hmap_zpl_reload :: grime.hmap_zpl_reload hmap_zpl_remove :: grime.hmap_zpl_remove hmap_zpl_set :: grime.hmap_zpl_set +reload_map :: grime.reload_map + // Pool :: grime.Pool StackFixed :: grime.StackFixed @@ -77,6 +79,8 @@ stack_push_contextless :: grime.stack_push_contextless log :: grime.log logf :: grime.logf +profile :: grime.profile + //#region("Proc overload mappings") append :: proc { @@ -110,6 +114,7 @@ make :: proc { array_init, hmap_chained_init, hmap_zpl_init, + make_map, } // reload :: proc { diff --git a/code/font/VEFontCache/misc.odin b/code/font/VEFontCache/misc.odin index 585eac5..a9b7477 100644 --- a/code/font/VEFontCache/misc.odin +++ b/code/font/VEFontCache/misc.odin @@ -13,7 +13,6 @@ font_glyph_lru_code :: #force_inline proc( font : FontID, glyph_index : Glyph ) // copy( buffer[ len(font_bytes) :], glyph_bytes ) // hash := fnv64a( transmute([]byte) buffer[: size_of(FontID) + size_of(Glyph) ] ) // lru_code = hash - lru_code = u64(glyph_index) + ( ( 0x100000000 * u64(font) ) & 0xFFFFFFFF00000000 ) return } @@ -31,10 +30,10 @@ shape_lru_hash :: #force_inline proc( label : string ) -> u64 { // ve_fontcache_eval_bezier (quadratic) eval_point_on_bezier3 :: proc( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2 { - p0 := vec2_64_from_vec2(p0) - p1 := vec2_64_from_vec2(p1) - p2 := vec2_64_from_vec2(p2) - alpha := f64(alpha) + // p0 := vec2_64_from_vec2(p0) + // p1 := vec2_64_from_vec2(p1) + // p2 := vec2_64_from_vec2(p2) + // alpha := f64(alpha) weight_start := (1 - alpha) * (1 - alpha) weight_control := 2.0 * (1 - alpha) * alpha @@ -53,11 +52,11 @@ eval_point_on_bezier3 :: proc( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2 // ve_fontcache_eval_bezier (cubic) eval_point_on_bezier4 :: proc( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2 { - p0 := vec2_64_from_vec2(p0) - p1 := vec2_64_from_vec2(p1) - p2 := vec2_64_from_vec2(p2) - p3 := vec2_64_from_vec2(p3) - alpha := f64(alpha) + // p0 := vec2_64_from_vec2(p0) + // p1 := vec2_64_from_vec2(p1) + // p2 := vec2_64_from_vec2(p2) + // p3 := vec2_64_from_vec2(p3) + // alpha := f64(alpha) weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha) weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha @@ -74,7 +73,7 @@ eval_point_on_bezier4 :: proc( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2 } reset_batch_codepoint_state :: proc( ctx : ^Context ) { - clear( & ctx.temp_codepoint_seen ) + clear_map( & ctx.temp_codepoint_seen ) ctx.temp_codepoint_seen_num = 0 } diff --git a/code/font/VEFontCache/parser.odin b/code/font/VEFontCache/parser.odin index 3c0ee44..b4c9b78 100644 --- a/code/font/VEFontCache/parser.odin +++ b/code/font/VEFontCache/parser.odin @@ -210,9 +210,6 @@ parser_get_glyph_box :: proc( font : ^ParserFontInfo, glyph_index : Glyph ) -> ( case .STB_TrueType: x0, y0, x1, y1 : i32 - // { - // success := stbtt.InitFont( & font.stbtt_info, raw_data(font.data), 0 ) - // } success := cast(bool) stbtt.GetGlyphBox( & font.stbtt_info, i32(glyph_index), & x0, & y0, & x1, & y1 ) assert( success ) diff --git a/code/font/VEFontCache/shaped_text.odin b/code/font/VEFontCache/shaped_text.odin index 9ba1607..d085c22 100644 --- a/code/font/VEFontCache/shaped_text.odin +++ b/code/font/VEFontCache/shaped_text.odin @@ -16,6 +16,7 @@ ShapedTextCache :: struct { shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string ) -> ^ShapedText { + profile(#procedure) @static buffer : [64 * Kilobyte]byte font := font @@ -61,6 +62,7 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string ) - shape_text_uncached :: proc( ctx : ^Context, font : FontID, output : ^ShapedText, text_utf8 : string ) { + profile(#procedure) assert( ctx != nil ) assert( font >= 0 && font < FontID(ctx.entries.num) ) diff --git a/code/font/VEFontCache/shaper.odin b/code/font/VEFontCache/shaper.odin index 230314f..e4232be 100644 --- a/code/font/VEFontCache/shaper.odin +++ b/code/font/VEFontCache/shaper.odin @@ -70,6 +70,7 @@ shaper_unload_font :: proc( ctx : ^ShaperInfo ) shaper_shape_from_text :: proc( ctx : ^ShaperContext, info : ^ShaperInfo, output :^ShapedText, text_utf8 : string, ascent, descent, line_gap : i32, size, size_scale : f32 ) { + profile(#procedure) current_script := harfbuzz.Script.UNKNOWN hb_ucfunc := harfbuzz.unicode_funcs_get_default() harfbuzz.buffer_clear_contents( ctx.hb_buffer )