From c114624eee7e5b2740ce7cc0946475bc0b5b6bf2 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Thu, 9 Jan 2025 23:36:39 -0500 Subject: [PATCH] fixes for: Atlas-Region B clear-region on caching glyph, batching, & tuning default values for performance Need to fix the debug vis for the library --- code/font/vefontcache/atlas.odin | 2 +- code/font/vefontcache/draw.odin | 40 +++++++++++++++----------- code/font/vefontcache/shaper.odin | 14 ++++----- code/font/vefontcache/vefontcache.odin | 39 +++++++++++-------------- code/grime/profiler.odin | 2 +- code/sectr/font/provider.odin | 4 +++ code/sectr/font/render_sokol.odin | 2 +- 7 files changed, 53 insertions(+), 50 deletions(-) diff --git a/code/font/vefontcache/atlas.odin b/code/font/vefontcache/atlas.odin index 06ab744..ad987f4 100644 --- a/code/font/vefontcache/atlas.odin +++ b/code/font/vefontcache/atlas.odin @@ -69,7 +69,7 @@ atlas_glyph_lru_code :: #force_inline proc "contextless" ( font : Font_ID, px_si @(optimization_mode="favor_size") atlas_region_bbox :: #force_inline proc( region : Atlas_Region, local_idx : i32 ) -> (position, size: Vec2) { - size = vec2(region.slot_size.x) + size = vec2(region.slot_size) position.x = cast(f32) (( local_idx % region.capacity.x ) * region.slot_size.x) position.y = cast(f32) (( local_idx / region.capacity.x ) * region.slot_size.y) diff --git a/code/font/vefontcache/draw.odin b/code/font/vefontcache/draw.odin index 9501a0e..a0fd218 100644 --- a/code/font/vefontcache/draw.odin +++ b/code/font/vefontcache/draw.odin @@ -281,11 +281,9 @@ generate_shapes_draw_list :: #force_inline proc ( ctx : ^Context, font : Font_ID * Dealing with shaping (essentially minimizing having to ever deal with it in a hot path if possible) * Dealing with atlas regioning (the expensive region resolution & parser calls are done on the shape pass) + Pipleine order: - * Resolve atlas lru codes and track shape indexes * Resolve the glyph's position offset from the target position - * Resolve glyph bounds and scale - * Resolve atlas region the glyph is associated with * Segregate the glyphs into three slices: oversized, to_cache, cached. * If oversized is not necessary for your use case and your hitting a bottleneck, remove it in a derivative procedure. * You have to to be drawing a px font size > ~140 px for it to trigger. @@ -389,8 +387,10 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text, { pack := cached + found_take_slow_path : b8 + success : bool + // Determine if we hit the limit for this batch. - if glyph_buffer.batch_cache.num >= glyph_buffer.batch_cache.cap do break Prepare_For_Batch if glyph.atlas_index == - 1 { // Check to see if we reached capacity for the atlas @@ -398,8 +398,9 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text, { // We will evict LRU. We must predict which LRU will get evicted, and if it's something we've seen then we need to take slowpath and flush batch. next_evict_glyph := lru_get_next_evicted( region.state ) - found_take_slow_path, success := glyph_buffer.batch_cache.table[next_evict_glyph] + found_take_slow_path, success = glyph_buffer.batch_cache.table[next_evict_glyph] assert(success != false) + // TODO(Ed): This might not be needed with the new pipeline/batching if (found_take_slow_path) { break Prepare_For_Batch } @@ -407,12 +408,17 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text, // profile_begin("glyph needs caching") glyph.atlas_index = atlas_reserve_slot(region, atlas_key) pack = to_cache - profile_end() + // profile_end() } // profile("append cached") glyph.region_pos, glyph.region_size = atlas_region_bbox(region ^, glyph.atlas_index) mark_glyph_seen(& glyph_buffer.batch_cache, atlas_key) append_sub_pack(pack, cast(i32) index) + // TODO(Ed): This might not be needed with the new pipeline/batching + // if (found_take_slow_path) { + // break Prepare_For_Batch + // } + if glyph_buffer.batch_cache.num >= glyph_buffer.batch_cache.cap do break Prepare_For_Batch continue } @@ -513,14 +519,14 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List, glyph := & glyph_pack[id] bounds := shape.bounds[id] bounds_scaled := mul(bounds, font_scale) - glyph_scale := ceil(size(bounds_scaled) + glyph_buffer.draw_padding) + glyph_scale := size(bounds_scaled) + glyph_buffer.draw_padding f32_allocated_x := cast(f32) glyph_buffer.allocated_x // Resolve how much space this glyph will allocate in the buffer buffer_size := glyph_scale * glyph_buffer.over_sample // Allocate a glyph glyph render target region (FBO) - to_allocate_x := buffer_size.x + 2.0 + to_allocate_x := buffer_size.x + 4.0 // If allocation would exceed buffer's bounds the buffer must be flush before this glyph can be rendered. glyph.flush_glyph_buffer = i32(f32_allocated_x + to_allocate_x) >= i32(glyph_buffer_size.x) @@ -593,9 +599,9 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List, profile_begin("gen oversized glyphs draw_list") when ENABLE_OVERSIZED_GLYPHS do if len(oversized) > 0 { - // colour.r = max(colour.a, enable_debug_vis_type) - // colour.g = max(colour.g, enable_debug_vis_type) - // colour.b = colour.b * f32(cast(i32) ! b32(cast(i32) enable_debug_vis_type)) + colour.r = max(colour.r, 1.0 * enable_debug_vis_type) + colour.g = max(colour.g, 1.0 * enable_debug_vis_type) + colour.b = colour.b * cast(f32) cast(i32) ! b32( cast(i32) enable_debug_vis_type) for pack_id, index in oversized { error : Allocator_Error glyph_pack[pack_id].shape, error = parser_get_glyph_shape(entry.parser_info, shape.glyph[pack_id]) @@ -738,18 +744,18 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List, for id, index in to_cache do parser_free_shape(entry.parser_info, glyph_pack[id].shape) profile_begin("gen_cached_draw_list: to_cache") - colour.r = max(colour.r, 1.0 * enable_debug_vis_type) - colour.g = max(colour.g, 1.0 * enable_debug_vis_type) - colour.b = max(colour.b, 1.0 * enable_debug_vis_type) + // colour.r = max(colour.r, 1.0 * enable_debug_vis_type) + // colour.g = colour.g * cast(f32) cast(i32) ! cast(b32) cast(i32) enable_debug_vis_type + // colour.b = colour.b * cast(f32) cast(i32) ! cast(b32) cast(i32) enable_debug_vis_type generate_blit_from_atlas_draw_list( draw_list, glyph_pack[:], to_cache, colour ) profile_end() } profile_end() profile_begin("gen_cached_draw_list: cached") - colour.r = max(colour.r, 0.80 * enable_debug_vis_type) - colour.g = max(colour.g, 0.25 * enable_debug_vis_type) - colour.b = max(colour.b, 0.25 * enable_debug_vis_type) + // colour.r = max(colour.r, 0.4 * enable_debug_vis_type) + // colour.g = max(colour.g, 0.4 * enable_debug_vis_type) + // colour.b = max(colour.b, 0.4 * enable_debug_vis_type) generate_blit_from_atlas_draw_list( draw_list, glyph_pack[:], cached, colour ) profile_end() } diff --git a/code/font/vefontcache/shaper.odin b/code/font/vefontcache/shaper.odin index 06e91bc..9144a74 100644 --- a/code/font/vefontcache/shaper.odin +++ b/code/font/vefontcache/shaper.odin @@ -14,7 +14,10 @@ Shape_Key :: u32 its position should be used for rendering. For this library's case it also involes keeping any content - that does not have to be resolved once again in the later stage of processing. + that does not have to be resolved once again in the later stage of processing: + * Resolve atlas lru codes + * Resolve glyph bounds and scale + * Resolve atlas region the glyph is associated with. Ideally the user should resolve this shape once and cache/store it on their side. They have the best ability to avoid costly lookups to streamline @@ -22,8 +25,8 @@ Shape_Key :: u32 For ease of use the cache does a relatively good job and only adds a few hundred nano-seconds to resolve a shape's lookup from its source specification. - If your doing something heavy though (where there is thousands, or tens-of thousands) - your not going to be satisfied with keeping that in the iteration). + If your doing something very heavy though (tens-of thousands +) your not + going to be satisfied with keeping that in the iteration). */ Shaped_Text :: struct #packed { glyph : [dynamic]Glyph, @@ -31,11 +34,6 @@ Shaped_Text :: struct #packed { atlas_lru_code : [dynamic]Atlas_Key, region_kind : [dynamic]Atlas_Region_Kind, bounds : [dynamic]Range2, - // TODO(Ed): Profile if its worth not doing compute for these per frame. - // bounds_scaled : [dynamic]Range2, - // bounds_size : [dynamic]Vec2, - // bounds_size_Scaled : [dynamic]Vec2, - atlas_bbox : [dynamic]Transform, end_cursor_pos : Vec2, size : Vec2, } diff --git a/code/font/vefontcache/vefontcache.odin b/code/font/vefontcache/vefontcache.odin index 688ad85..4c81a71 100644 --- a/code/font/vefontcache/vefontcache.odin +++ b/code/font/vefontcache/vefontcache.odin @@ -1,6 +1,4 @@ /* -A port of (https://github.com/hypernewbie/VEFontCache) to Odin. - See: https://github.com/Ed94/VEFontCache-Odin */ package vetext @@ -8,7 +6,7 @@ package vetext import "base:runtime" // See: mappings.odin for profiling hookup -DISABLE_PROFILING :: false +DISABLE_PROFILING :: true ENABLE_OVERSIZED_GLYPHS :: true Font_ID :: distinct i16 @@ -139,8 +137,8 @@ Init_Glyph_Draw_Params_Default :: Init_Glyph_Draw_Params { over_sample = 4, draw_padding = Init_Atlas_Params_Default.glyph_padding, shape_gen_scratch_reserve = 512, - buffer_glyph_limit = 4, - batch_glyph_limit = 32, + buffer_glyph_limit = 16, + batch_glyph_limit = 256, } Init_Shaper_Params :: struct { @@ -223,8 +221,8 @@ startup :: proc( ctx : ^Context, parser_kind : Parser_Kind = .STB_TrueType, // N atlas_size := Vec2i { 4096, 2048 } * i32(atlas.size_multiplier) slot_region_a := Vec2i { 32, 32 } * i32(atlas.size_multiplier) - slot_region_c := Vec2i { 64, 64 } * i32(atlas.size_multiplier) slot_region_b := Vec2i { 32, 64 } * i32(atlas.size_multiplier) + slot_region_c := Vec2i { 64, 64 } * i32(atlas.size_multiplier) slot_region_d := Vec2i { 128, 128 } * i32(atlas.size_multiplier) init_atlas_region :: proc( region : ^Atlas_Region, atlas_size, slot_size : Vec2i, factor : Vec2i ) @@ -303,9 +301,6 @@ startup :: proc( ctx : ^Context, parser_kind : Parser_Kind = .STB_TrueType, // N glyph_buffer.size.y = atlas.region_d.slot_size.y * i32(glyph_buffer.over_sample.y) glyph_buffer.draw_padding = cast(f32) glyph_draw_params.draw_padding - buffer_limit := glyph_draw_params.buffer_glyph_limit - batch_limit := glyph_draw_params.batch_glyph_limit - glyph_buffer.draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = 8 * Kilobyte ) assert( error == .None, "VEFontCache.init : Failed to allocate vertices array for glyph_buffer.draw_list" ) @@ -787,11 +782,11 @@ draw_text_layer :: #force_inline proc( ctx : ^Context, layer : []Text_Layer_Elem assert( ctx != nil ) assert( len(layer) > 0 ) + shapes := make( []Shaped_Text, len(layer) ) for elem in layer { assert( elem.font >= 0 && int(elem.font) < len(ctx.entries) ) - shapes := make( []Shaped_Text, len(layer) ) for elem, id in layer { entry := ctx.entries[ elem.font ] @@ -823,25 +818,25 @@ draw_text_layer :: #force_inline proc( ctx : ^Context, layer : []Text_Layer_Elem ) shapes[id] = shape } + } - for elem, id in layer { - entry := ctx.entries[ elem.font ] + for elem, id in layer { + entry := ctx.entries[ elem.font ] - ctx.cursor_pos = {} + ctx.cursor_pos = {} - colour := ctx.colour - colour.a = 1.0 + ctx.alpha_sharpen + colour := ctx.colour + colour.a = 1.0 + ctx.alpha_sharpen - adjusted_position := get_snapped_position( ctx^, elem.position ) + adjusted_position := get_snapped_position( ctx^, elem.position ) - // font_scale := parser_scale( entry.parser_info, elem.px_size ) + // font_scale := parser_scale( entry.parser_info, elem.px_size ) - target_px_size := elem.px_size * ctx.px_scalar - target_scale := elem.scale * (1 / ctx.px_scalar) - target_font_scale := parser_scale( entry.parser_info, target_px_size ) + target_px_size := elem.px_size * ctx.px_scalar + target_scale := elem.scale * (1 / ctx.px_scalar) + target_font_scale := parser_scale( entry.parser_info, target_px_size ) - generate_shapes_draw_list(ctx, elem.font, elem.colour, entry, target_px_size, target_font_scale, adjusted_position, target_scale, shapes ) - } + generate_shapes_draw_list(ctx, elem.font, elem.colour, entry, target_px_size, target_font_scale, adjusted_position, target_scale, shapes ) } } diff --git a/code/grime/profiler.odin b/code/grime/profiler.odin index 3a3a27c..2fcc5f7 100644 --- a/code/grime/profiler.odin +++ b/code/grime/profiler.odin @@ -15,7 +15,7 @@ set_profiler_module_context :: #force_inline proc "contextless" ( ctx : ^SpallPr Module_Context = ctx } -DISABLE_PROFILING :: false +DISABLE_PROFILING :: true @(deferred_none = profile_end, disabled = DISABLE_PROFILING) profile :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) { diff --git a/code/sectr/font/provider.odin b/code/sectr/font/provider.odin index 56c85cc..0876dc5 100644 --- a/code/sectr/font/provider.odin +++ b/code/sectr/font/provider.odin @@ -119,6 +119,10 @@ font_load :: proc(path_file : string, return fid } +font_provider_set_draw_type_visualization :: #force_inline proc( should_enable : b32 ) { + ve.set_draw_type_visualization( & get_state().font_provider_ctx.ve_ctx, should_enable ) +} + font_provider_set_alpha_sharpen :: #force_inline proc( scalar : f32 ) { ve.set_alpha_scalar( & get_state().font_provider_ctx.ve_ctx, scalar ) } diff --git a/code/sectr/font/render_sokol.odin b/code/sectr/font/render_sokol.odin index 4965a58..8be637a 100644 --- a/code/sectr/font/render_sokol.odin +++ b/code/sectr/font/render_sokol.odin @@ -74,7 +74,7 @@ font_provider_setup_sokol_gfx_objects :: proc( ctx : ^VE_RenderData, ve_ctx : ve verify( sokol_gfx.query_buffer_state( draw_list_vbuf) < ResourceState.FAILED, "Failed to make draw_list_vbuf" ) draw_list_ibuf = sokol_gfx.make_buffer( BufferDesciption { - size = size_of(u32) * 1 * Mega, + size = size_of(u32) * 3 * Mega, usage = BufferUsage.STREAM, type = BufferType.INDEXBUFFER, })