From dd1752f84b5015296ed34bc883293ad18e79cb88 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Wed, 1 Jan 2025 17:01:59 -0500 Subject: [PATCH] WIP - VeFontcache: Testing performance of using an SOA setup for processing shape's glyphs --- code/font/vefontcache/atlas.odin | 26 +++++ code/font/vefontcache/draw.odin | 144 +++++++++++++++++-------- code/font/vefontcache/mappings.odin | 6 ++ code/font/vefontcache/misc.odin | 2 +- code/font/vefontcache/shaped_text.odin | 6 +- code/font/vefontcache/shaper.odin | 17 +-- code/font/vefontcache/vefontcache.odin | 18 ++-- scripts/build.ps1 | 4 +- 8 files changed, 155 insertions(+), 68 deletions(-) diff --git a/code/font/vefontcache/atlas.odin b/code/font/vefontcache/atlas.odin index e63ca91..6290b59 100644 --- a/code/font/vefontcache/atlas.odin +++ b/code/font/vefontcache/atlas.odin @@ -125,3 +125,29 @@ decide_codepoint_region :: #force_inline proc (ctx : ^Context, entry : ^Entry, g } return .None, nil, {} } + +// Grab an atlas LRU cache slot. +atlas_reserve_slot :: #force_inline proc ( region : ^Atlas_Region, lru_code : u64 ) -> (atlas_index : i32) +{ + if region.next_idx < region.state.capacity + { + evicted := lru_put( & region.state, lru_code, region.next_idx ) + atlas_index = region.next_idx + region.next_idx += 1 + assert( evicted == lru_code ) + } + else + { + next_evict_codepoint := lru_get_next_evicted( & region.state ) + assert( next_evict_codepoint != 0xFFFFFFFFFFFFFFFF ) + + atlas_index = lru_peek( & region.state, next_evict_codepoint, must_find = true ) + assert( atlas_index != -1 ) + + evicted := lru_put( & region.state, lru_code, atlas_index ) + assert( evicted == next_evict_codepoint ) + } + + assert( lru_get( & region.state, lru_code ) != - 1 ) + return +} diff --git a/code/font/vefontcache/draw.odin b/code/font/vefontcache/draw.odin index 919e2e0..52df64d 100644 --- a/code/font/vefontcache/draw.odin +++ b/code/font/vefontcache/draw.odin @@ -352,31 +352,9 @@ cache_glyph_to_atlas :: proc( ctx : ^Context, // E region is special case and not cached to atlas. if region_kind == .None || region_kind == .E do return - // Grab an atlas LRU cache slot. atlas_index := atlas_index - if atlas_index == -1 - { - if region.next_idx < region.state.capacity - { - evicted := lru_put( & region.state, lru_code, region.next_idx ) - atlas_index = region.next_idx - region.next_idx += 1 - assert( evicted == lru_code ) - } - else - { - next_evict_codepoint := lru_get_next_evicted( & region.state ) - assert( next_evict_codepoint != 0xFFFFFFFFFFFFFFFF ) - - atlas_index = lru_peek( & region.state, next_evict_codepoint, must_find = true ) - assert( atlas_index != -1 ) - - evicted := lru_put( & region.state, lru_code, atlas_index ) - assert( evicted == next_evict_codepoint ) - } - - assert( lru_get( & region.state, lru_code ) != - 1 ) - } + // TODO(Ed): Try to make sure this is resolve always + if atlas_index == -1 do atlas_index = atlas_reserve_slot( region, lru_code ) atlas := & ctx.atlas glyph_buffer := & ctx.glyph_buffer @@ -462,34 +440,39 @@ check_glyph_in_atlas :: #force_inline proc( ctx : ^Context, font : Font_ID, entr region_kind : Atlas_Region_Kind, region : ^Atlas_Region, over_sample : Vec2 -) -> b32 +) -> (seen, should_cache : b8) { profile(#procedure) assert( glyph_index != -1 ) // E region can't batch - if region_kind == .E || region_kind == .None do return false - if ctx.temp_codepoint_seen_num > i32(cap(ctx.temp_codepoint_seen)) do return false + if region_kind == .E || region_kind == .None do return + if ctx.temp_codepoint_seen_num > i32(cap(ctx.temp_codepoint_seen)) do return if atlas_index == - 1 { - if region.next_idx > region.state.capacity { + // Check to see if we reached capacity for the atlas + if region.next_idx > region.state.capacity + { // We will evict LRU. We must predict which LRU will get evicted, and if it's something we've seen then we need to take slowpath and flush batch. next_evict_codepoint := lru_get_next_evicted( & region.state ) - seen, success := ctx.temp_codepoint_seen[next_evict_codepoint] + success : bool + seen, success = ctx.temp_codepoint_seen[next_evict_codepoint] assert(success != false) if (seen) { - return false + return } } + should_cache = true cache_glyph_to_atlas( ctx, font, glyph_index, lru_code, atlas_index, entry, region_kind, region, over_sample ) } assert( lru_get( & region.state, lru_code ) != -1 ) mark_batch_codepoint_seen( ctx, lru_code) - return true + seen = true + return } // ve_fontcache_clear_Draw_List @@ -675,6 +658,18 @@ draw_text_batch :: proc(ctx: ^Context, entry: ^Entry, shaped: ^Shaped_Text, } } +GlyphPackEntry :: struct { + lru_code : u64, + region : ^Atlas_Region, + over_sample : Vec2, + atlas_index : i32, + index : Glyph, + shape_id : i32, + region_kind : Atlas_Region_Kind, + in_atlas : b8, + should_cache : b8, +} + // Helper for draw_text, all raw text content should be confirmed to be either formatting or visible shapes before getting cached. draw_text_shape :: #force_inline proc( ctx : ^Context, font : Font_ID, @@ -685,29 +680,83 @@ draw_text_shape :: #force_inline proc( ctx : ^Context, ) -> (cursor_pos : Vec2) #no_bounds_check { profile(#procedure) - batch_start_idx : i32 = 0 - for index : i32 = 0; index < cast(i32) len(shaped.glyphs); index += 1 + + glyph_pack, pack_alloc_eror := make_soa(#soa[]GlyphPackEntry, len(shaped.glyphs), allocator = context.temp_allocator) + + profile_begin("SOA glyph pack processing") + for & glyph, index in glyph_pack { - glyph_index := shaped.glyphs[ index ] - if is_empty( ctx, entry, glyph_index ) do continue + glyph.shape_id = cast(i32) index + glyph.index = shaped.glyphs[ index ] + } + // for & glyph, index in glyph_pack + // { + // glyph.region_kind, + // glyph.region, + // glyph.over_sample = decide_codepoint_region( ctx, entry, glyph.index ) + // } + // for & glyph, index in glyph_pack + // { + // glyph.lru_code = font_glyph_lru_code(entry.id, glyph.index) + // } + // for & glyph, index in glyph_pack + // { + // glyph.atlas_index = -1 + // if glyph.region_kind != .E do glyph.atlas_index = lru_get( & glyph.region.state, glyph.lru_code ) + // } + // for & glyph, index in glyph_pack + // { + // glyph.in_atlas, glyph.should_cache = check_glyph_in_atlas( ctx, font, entry, glyph.index, glyph.lru_code, glyph.atlas_index, glyph.region_kind, glyph.region, glyph.over_sample ) + // } + // for & glyph, index in glyph_pack + // { + // if ! glyph.should_cache do continue + // cache_glyph_to_atlas(ctx, font, glyph.index, glyph.lru_code, glyph.atlas_index, entry, glyph.region_kind, glyph.region, glyph.over_sample) + // } + // for & glyph, index in glyph_pack + // { + // if ! glyph.in_atlas do continue - region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index ) - lru_code := font_glyph_lru_code(entry.id, glyph_index) - atlas_index := cast(i32) -1 + // assert( lru_get( & glyph.region.state, glyph.lru_code ) != -1 ) + // mark_batch_codepoint_seen( ctx, glyph.lru_code) + // } + profile_end() - if region_kind != .E do atlas_index = lru_get( & region.state, lru_code ) - if check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue + // Prepare uncached glyphs for caching + batch_start_idx : i32 = 0 + for & glyph, index in glyph_pack + { + // if is_glyph_empty( ctx, entry, glyph.index ) do continue + + glyph.region_kind, glyph.region, glyph.over_sample = decide_codepoint_region( ctx, entry, glyph.index ) + + glyph.lru_code = font_glyph_lru_code(entry.id, glyph.index) + + glyph.atlas_index = -1 + if glyph.region_kind != .E do glyph.atlas_index = lru_get( & glyph.region.state, glyph.lru_code ) + + glyph.in_atlas, glyph.should_cache = check_glyph_in_atlas( ctx, font, entry, glyph.index, glyph.lru_code, glyph.atlas_index, glyph.region_kind, glyph.region, glyph.over_sample ) + // if glyph.should_cache { + // cache_glyph_to_atlas(ctx, font, glyph.index, glyph.lru_code, glyph.atlas_index, entry, glyph.region_kind, glyph.region, glyph.over_sample) + // glyph.atlas_index = atlas_reserve_slot(glyph.region, glyph.lru_code) + // } + if glyph.in_atlas { + // assert( lru_get( & glyph.region.state, glyph.lru_code ) != -1 ) + // mark_batch_codepoint_seen( ctx, glyph.lru_code) + continue + } // We can no longer directly append the shape as it has missing glyphs in the atlas // First batch the other cached glyphs // flush_glyph_buffer_to_atlas(ctx) - draw_text_batch( ctx, entry, shaped, batch_start_idx, index, position, scale, snap_width, snap_height ) + draw_text_batch( ctx, entry, shaped, batch_start_idx, glyph.shape_id, position, scale, snap_width, snap_height ) reset_batch_codepoint_state( ctx ) - cache_glyph_to_atlas( ctx, font, glyph_index, lru_code, atlas_index, entry, region_kind, region, over_sample ) - mark_batch_codepoint_seen( ctx, lru_code) - batch_start_idx = index + cache_glyph_to_atlas( ctx, font, glyph.index, glyph.lru_code, glyph.atlas_index, entry, glyph.region_kind, glyph.region, glyph.over_sample ) + mark_batch_codepoint_seen( ctx, glyph.lru_code) + + batch_start_idx = 1 } draw_text_batch( ctx, entry, shaped, batch_start_idx, cast(i32) len(shaped.glyphs), position, scale, snap_width , snap_height ) @@ -731,14 +780,17 @@ draw_text_mono_latin_batch :: #force_inline proc( ctx : ^Context, for index : i32 = 0; index < cast(i32) len(shaped.glyphs); index += 1 { glyph_index := shaped.glyphs[ index ] - if is_empty( ctx, entry, glyph_index ) do continue + if is_glyph_empty( ctx, entry, glyph_index ) do continue region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index ) lru_code := font_glyph_lru_code(entry.id, glyph_index) atlas_index := cast(i32) -1 if region_kind != .E do atlas_index = lru_get( & region.state, lru_code ) - if check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue + + in_atlas, should_cache := check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) + if in_atlas do continue + if should_cache do cache_glyph_to_atlas(ctx, font, glyph_index, lru_code, atlas_index, entry, region_kind, region, over_sample ) // We can no longer directly append the shape as it has missing glyphs in the atlas diff --git a/code/font/vefontcache/mappings.odin b/code/font/vefontcache/mappings.odin index 0f4f220..50262e6 100644 --- a/code/font/vefontcache/mappings.odin +++ b/code/font/vefontcache/mappings.odin @@ -35,6 +35,8 @@ import "core:mem" arena_init :: mem.arena_init import "core:slice" + + //#region("Proc overload mappings") append :: proc { @@ -88,6 +90,10 @@ make :: proc { make_map_cap, } +make_soa :: proc { + make_soa_slice, +} + resize :: proc { resize_dynamic_array, } diff --git a/code/font/vefontcache/misc.odin b/code/font/vefontcache/misc.odin index ba889c0..ccf174a 100644 --- a/code/font/vefontcache/misc.odin +++ b/code/font/vefontcache/misc.odin @@ -54,7 +54,7 @@ font_glyph_lru_code :: #force_inline proc "contextless" ( font : Font_ID, glyph_ return } -is_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32 +is_glyph_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32 { if glyph_index == 0 do return true if parser_is_glyph_empty( & entry.parser_info, glyph_index ) do return true diff --git a/code/font/vefontcache/shaped_text.odin b/code/font/vefontcache/shaped_text.odin index 3ffc110..d267eb6 100644 --- a/code/font/vefontcache/shaped_text.odin +++ b/code/font/vefontcache/shaped_text.odin @@ -76,7 +76,7 @@ shape_text_uncached_advanced :: #force_inline proc( ctx : ^Context, font : Font_ line_gap := f32(line_gap_i32) line_height := (ascent - descent + line_gap) * entry.size_scale - shaper_shape_from_text( & ctx.shaper_ctx, & entry.shaper_info, output, text_utf8, ascent_i32, descent_i32, line_gap_i32, entry.size, entry.size_scale ) + shaper_shape_from_text( & ctx.shaper_ctx, & entry.parser_info, & entry.shaper_info, output, text_utf8, ascent_i32, descent_i32, line_gap_i32, entry.size, entry.size_scale ) } shape_text_uncached_latin :: proc( ctx : ^Context, font : Font_ID, text_utf8 : string, entry : ^Entry, output : ^Shaped_Text ) @@ -120,8 +120,8 @@ shape_text_uncached_latin :: proc( ctx : ^Context, font : Font_ID, text_utf8 : s } glyph_index := parser_find_glyph_index( & entry.parser_info, codepoint ) - is_empty := parser_is_glyph_empty( & entry.parser_info,glyph_index ) - if ! is_empty + is_glyph_empty := parser_is_glyph_empty( & entry.parser_info,glyph_index ) + if ! is_glyph_empty { append( & output.glyphs, glyph_index) append( & output.positions, Vec2 { diff --git a/code/font/vefontcache/shaper.odin b/code/font/vefontcache/shaper.odin index 8e3826a..0df1f25 100644 --- a/code/font/vefontcache/shaper.odin +++ b/code/font/vefontcache/shaper.odin @@ -54,7 +54,7 @@ shaper_unload_font :: proc( ctx : ^Shaper_Info ) if blob != nil do harfbuzz.blob_destroy( blob ) } -shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Shaper_Info, output :^Shaped_Text, text_utf8 : string, +shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, parser_info : ^Parser_Font_Info, info : ^Shaper_Info, output :^Shaped_Text, text_utf8 : string, ascent, descent, line_gap : i32, size, size_scale : f32 ) { profile(#procedure) @@ -72,7 +72,7 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha line_height := ((ascent - descent + line_gap) * size_scale) position : Vec2 - shape_run :: #force_inline proc( buffer : harfbuzz.Buffer, script : harfbuzz.Script, font : harfbuzz.Font, output : ^Shaped_Text, + shape_run :: #force_inline proc( parser_info : ^Parser_Font_Info, buffer : harfbuzz.Buffer, script : harfbuzz.Script, font : harfbuzz.Font, output : ^Shaped_Text, position : ^Vec2, max_line_width: ^f32, line_count: ^int, ascent, descent, line_gap, size, size_scale: f32, snap_shape_pos : b32, adv_snap_small_font_threshold : f32 ) @@ -115,8 +115,6 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha (position^) = ceil( position^ ) } - append( & output.glyphs, glyph_id ) - glyph_pos := position^ offset := Vec2 { f32(hb_gposition.x_offset), f32(hb_gposition.y_offset) } * size_scale glyph_pos += offset @@ -124,7 +122,6 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha if snap_shape_pos { glyph_pos = ceil(glyph_pos) } - append( & output.positions, glyph_pos) advance := Vec2 { f32(hb_gposition.x_advance) * size_scale, @@ -132,6 +129,12 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha } (position^) += advance (max_line_width^) = max(max_line_width^, position.x) + + is_empty := parser_is_glyph_empty(parser_info, glyph_id) + if ! is_empty { + append( & output.glyphs, glyph_id ) + append( & output.positions, glyph_pos) + } } output.end_cursor_pos = position^ @@ -159,7 +162,7 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha } // End current run since we've encountered a script change. - shape_run( + shape_run( parser_info, ctx.hb_buffer, current_script, info.font, output, & position, & max_line_width, & line_count, ascent, descent, line_gap, size, size_scale, @@ -170,7 +173,7 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha } // End the last run if needed - shape_run( + shape_run( parser_info, ctx.hb_buffer, current_script, info.font, output, & position, & max_line_width, & line_count, ascent, descent, line_gap, size, size_scale, diff --git a/code/font/vefontcache/vefontcache.odin b/code/font/vefontcache/vefontcache.odin index d7e35fd..c5ac171 100644 --- a/code/font/vefontcache/vefontcache.odin +++ b/code/font/vefontcache/vefontcache.odin @@ -36,7 +36,7 @@ Context :: struct { entries : [dynamic]Entry, temp_path : [dynamic]Vertex, - temp_codepoint_seen : map[u64]bool, + temp_codepoint_seen : map[u64]b8, temp_codepoint_seen_num : i32, snap_width : f32, @@ -147,10 +147,10 @@ startup :: proc( ctx : ^Context, parser_kind : Parser_Kind = .STB_TrueType, glyph_draw_params := Init_Glyph_Draw_Params_Default, shape_cache_params := Init_Shape_Cache_Params_Default, shaper_params := Init_Shaper_Params_Default, - default_curve_quality : u32 = 2, - entires_reserve : u32 = 256, - temp_path_reserve : u32 = 1024, - temp_codepoint_seen_reserve : u32 = 1024, + default_curve_quality : u32 = 8 * 2, + entires_reserve : u32 = 8 * 256, + temp_path_reserve : u32 = 8 * 1024, + temp_codepoint_seen_reserve : u32 = 8 * 1024, ) { assert( ctx != nil, "Must provide a valid context" ) @@ -175,16 +175,16 @@ startup :: proc( ctx : ^Context, parser_kind : Parser_Kind = .STB_TrueType, temp_path, error = make( [dynamic]Vertex, len = 0, cap = temp_path_reserve ) assert(error == .None, "VEFontCache.init : Failed to allocate temp_path") - temp_codepoint_seen, error = make( map[u64]bool, uint(temp_codepoint_seen_reserve) ) + temp_codepoint_seen, error = make( map[u64]b8, uint(temp_codepoint_seen_reserve) ) assert(error == .None, "VEFontCache.init : Failed to allocate temp_path") - draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = 1 * Kilobyte ) + draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = 8 * Kilobyte ) assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.vertices") - draw_list.indices, error = make( [dynamic]u32, len = 0, cap = 2 * Kilobyte ) + draw_list.indices, error = make( [dynamic]u32, len = 0, cap = 16 * Kilobyte ) assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.indices") - draw_list.calls, error = make( [dynamic]Draw_Call, len = 0, cap = 128 ) + draw_list.calls, error = make( [dynamic]Draw_Call, len = 0, cap = 1024 ) assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.calls") init_atlas_region :: proc( region : ^Atlas_Region, params : Init_Atlas_Params, region_params : Init_Atlas_Region_Params, factor : Vec2i, expected_cap : i32 ) diff --git a/scripts/build.ps1 b/scripts/build.ps1 index ec895c5..99726a2 100644 --- a/scripts/build.ps1 +++ b/scripts/build.ps1 @@ -201,10 +201,10 @@ push-location $path_root # $build_args += $flag_micro_architecture_native $build_args += $flag_use_separate_modules $build_args += $flag_thread_count + $CoreCount_Physical - # $build_args += $flag_optimize_none + $build_args += $flag_optimize_none # $build_args += $flag_optimize_minimal # $build_args += $flag_optimize_speed - $build_args += $falg_optimize_aggressive + # $build_args += $falg_optimize_aggressive $build_args += $flag_debug $build_args += $flag_pdb_name + $pdb $build_args += $flag_subsystem + 'windows'