From b066b0de3a8c10b209b842be836f2cd54fa95ec4 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Fri, 3 Jan 2025 01:25:05 -0500 Subject: [PATCH] more perf improves for VEFontCache --- code/font/vefontcache/atlas.odin | 4 +- code/font/vefontcache/draw.odin | 240 +++++++++++++++---------- code/font/vefontcache/parser.odin | 2 +- code/font/vefontcache/vefontcache.odin | 8 +- code/sectr/engine/render.odin | 4 +- code/sectr/font/provider.odin | 7 + code/sectr/ui/core/base.odin | 10 +- code/sectr/ui/core/layout_compute.odin | 2 - scripts/build.ps1 | 4 +- toolchain/Odin | 1 + 10 files changed, 171 insertions(+), 111 deletions(-) create mode 160000 toolchain/Odin diff --git a/code/font/vefontcache/atlas.odin b/code/font/vefontcache/atlas.odin index 015890a..d8c3067 100644 --- a/code/font/vefontcache/atlas.odin +++ b/code/font/vefontcache/atlas.odin @@ -100,7 +100,7 @@ check_and_reserve_slot_in_atlas :: #force_inline proc( ctx : Context, glyph_inde ) -> (found, should_cache : b8 ) { profile(#procedure) - // assert( glyph_index != -1 ) + assert( glyph_index != -1 ) if ctx.temp_codepoint_seen_num > i32(cap(ctx.temp_codepoint_seen)) do return @@ -113,7 +113,7 @@ check_and_reserve_slot_in_atlas :: #force_inline proc( ctx : Context, glyph_inde next_evict_codepoint := lru_get_next_evicted( region.state ) success : bool found, success = ctx.temp_codepoint_seen[next_evict_codepoint] - // assert(success != false) + assert(success != false) if (found) { return } diff --git a/code/font/vefontcache/draw.odin b/code/font/vefontcache/draw.odin index e2038d8..44c4e86 100644 --- a/code/font/vefontcache/draw.odin +++ b/code/font/vefontcache/draw.odin @@ -11,18 +11,25 @@ Vertex :: struct { } Transform :: struct { - translate : Vec2, - scale : Vec2, + pos : Vec2, + scale : Vec2, } -Glyph_Bounds :: struct { +Range2 :: struct { p0, p1 : Vec2, } Glyph_Bounds_Mat :: matrix[2, 2] f32 -Glyph_Pack_Entry :: struct #packed { - translate : Vec2, +Glyph_Draw_Quad :: struct { + dst_pos : Vec2, + dst_scale : Vec2, + src_pos : Vec2, + src_scale : Vec2, +} + +Glyph_Pack_Entry :: struct { + position : Vec2, index : Glyph, lru_code : u64, @@ -35,15 +42,16 @@ Glyph_Pack_Entry :: struct #packed { shape : Parser_Glyph_Shape, - bounds : Glyph_Bounds, + bounds : Range2, + bounds_scaled : Range2, bounds_size : Vec2, bounds_size_scaled : Vec2, over_sample : Vec2, scale : Vec2, - draw_transform : Transform, - // cache_draw_scale : Vec2, - // cache_draw_translate : Vec2, + draw_transform : Transform, + + cached_draw_quad : Glyph_Draw_Quad, // shape_id : i32, } @@ -169,14 +177,18 @@ construct_filled_path :: #force_inline proc( draw_list : ^Draw_List, outside_poi } generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context, + // glyph_id : Glyph, + // parser_info : Parser_Font_Info, glyph_shape : Parser_Glyph_Shape, curve_quality : f32, - bounds : Glyph_Bounds, + bounds : Range2, scale, translate : Vec2 ) -> b32 { profile(#procedure) + // glyph_shape, error := parser_get_glyph_shape( parser_info, glyph_id ) + outside := Vec2{bounds.p0.x - 21, bounds.p0.y - 33} draw := Draw_Call_Default @@ -187,7 +199,7 @@ generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context, clear(path) step := 1.0 / curve_quality - for edge in glyph_shape do #partial switch edge.type + for edge, index in glyph_shape do #partial switch edge.type { case .Move: if len(path) > 0 { @@ -221,6 +233,9 @@ generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context, alpha := index * step append( path, Vertex { pos = eval_point_on_bezier4(p0, p1, p2, p3, alpha) } ) } + + case: + // assert(false, "WTF") } if len(path) > 0 { @@ -231,6 +246,8 @@ generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context, if draw.end_index > draw.start_index { append( & ctx.draw_list.calls, draw) } + + // parser_free_shape(parser_info, glyph_shape) return true } @@ -248,7 +265,7 @@ cache_glyph_to_atlas :: #force_no_inline proc ( ctx : ^Context, glyph_shape : Parser_Glyph_Shape, - bounds : Glyph_Bounds, // -> generate_glyph_pass_draw_list + bounds : Range2, // -> generate_glyph_pass_draw_list bounds_size : Vec2, @@ -259,7 +276,11 @@ cache_glyph_to_atlas :: #force_no_inline proc ( ctx : ^Context, entry : Entry, // region_kind : Atlas_Region_Kind, // region : ^Atlas_Region, - over_sample : Vec2 + over_sample : Vec2, + + + // glyph_id : Glyph, + // parser_info : Parser_Font_Info, ) { profile(#procedure) @@ -322,27 +343,35 @@ cache_glyph_to_atlas :: #force_no_inline proc ( ctx : ^Context, - screen_space_translate := buf_transform.translate + screen_space_translate := buf_transform.pos screen_space_scale := buf_transform.scale - screen_space_translate.x = (buf_transform.translate.x + batch_x) + screen_space_translate.x = (buf_transform.pos.x + batch_x) glyph_buf_Batch_x^ += i32(buffer_x_allocation) to_screen_space( & screen_space_translate, & screen_space_scale, glyph_buffer_size ) // Render glyph to glyph render target (FBO) - generate_glyph_pass_draw_list( ctx, glyph_shape, entry.curve_quality, bounds, screen_space_scale, screen_space_translate ) + generate_glyph_pass_draw_list( ctx, + // glyph_id, + // parser_info, + glyph_shape, + entry.curve_quality, bounds, screen_space_scale, screen_space_translate ) } generate_oversized_draw_list :: #force_no_inline proc( ctx : ^Context, glyph_padding : f32, glyph_buffer_size : Vec2, entry : Entry, - glyph : Glyph, + + // glyph_id : Glyph, + // parser_info : Parser_Font_Info, glyph_shape : Parser_Glyph_Shape, - bounds : Glyph_Bounds, // -> generate_glyph_pass_draw_list + + bounds : Range2, // -> generate_glyph_pass_draw_list bounds_size : Vec2, - over_sample, position, scale : Vec2 ) + over_sample, position, scale : Vec2 +) { profile(#procedure) // Draw un-antialiased glyph to draw_buffer @@ -350,7 +379,12 @@ generate_oversized_draw_list :: #force_no_inline proc( ctx : ^Context, glyph_draw_translate := -1 * bounds.p0 * glyph_draw_scale + glyph_padding to_screen_space( & glyph_draw_translate, & glyph_draw_scale, glyph_buffer_size ) - generate_glyph_pass_draw_list( ctx, glyph_shape, entry.curve_quality, bounds, glyph_draw_scale, glyph_draw_translate ) + generate_glyph_pass_draw_list( ctx, + // glyph_id, + // parser_info, + glyph_shape, + + entry.curve_quality, bounds, glyph_draw_scale, glyph_draw_translate ) bounds_scaled := bounds_size * entry.size_scale @@ -393,60 +427,11 @@ generate_oversized_draw_list :: #force_no_inline proc( ctx : ^Context, append( & ctx.draw_list.calls, ..calls[:] ) } -generate_cached_draw_list :: proc (draw_list : ^Draw_List, glyph_pack : #soa[]Glyph_Pack_Entry, sub_pack : []i32, - atlas_size : Vec2, - glyph_size_scale : f32, - colour : Colour, - position : Vec2, - scale : Vec2 -) -{ - profile(#procedure) - - call := Draw_Call_Default - call.pass = .Target - call.colour = colour - - for id, index in sub_pack - { - glyph := glyph_pack[id] - profile("cached") - - bounds_0_scaled := ceil(glyph.bounds.p0 * glyph_size_scale - 0.5 ) - dst_pos := glyph.translate + bounds_0_scaled * scale - dst_scale := glyph.scale * scale - src_pos := glyph.region_pos - - to_text_space( & src_pos, & glyph.scale, atlas_size ) - - call.start_index = u32(len(draw_list.indices)) - - blit_quad(draw_list, - dst_pos, dst_pos + dst_scale, - src_pos, src_pos + glyph.scale ) - - call.end_index = u32(len(draw_list.indices)) - - append(& draw_list.calls, call) - } -} - -// @(require_results) -append_no_bounds_check :: proc "contextless" (array: ^[dynamic]i32, value: i32) -> (n: int) { - raw := transmute(^runtime.Raw_Dynamic_Array)array - if raw.len >= raw.cap { - return 0 - } - array[raw.len] = value - raw.len += 1 - return raw.len -} - generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context, - entry : Entry, - shaped : Shaped_Text, - position, scale : Vec2, - snap_width, snap_height : f32 + entry : Entry, + shaped : Shaped_Text, + position, target_scale : Vec2, + snap_width, snap_height : f32 ) -> (cursor_pos : Vec2) #no_bounds_check { profile(#procedure) @@ -490,7 +475,7 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context, profile_begin("translate") for & glyph, index in glyph_pack { - glyph.translate = position + (shaped.positions[index]) * scale + glyph.position = position + (shaped.positions[index]) * target_scale } profile_end() @@ -501,8 +486,9 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context, } for & glyph, index in glyph_pack { - glyph.bounds = parser_get_bounds( entry.parser_info, glyph.index ) - glyph.bounds_size = glyph.bounds.p1 - glyph.bounds.p0 + glyph.bounds = parser_get_bounds( entry.parser_info, glyph.index ) + glyph.bounds_scaled = { glyph.bounds.p0 * entry.size_scale, glyph.bounds.p1 * entry.size_scale } + glyph.bounds_size = glyph.bounds.p1 - glyph.bounds.p0 } for & glyph, index in glyph_pack { @@ -535,7 +521,7 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context, continue } - region := atlas.regions[glyph.region_kind] + region := atlas.regions[glyph.region_kind] glyph.atlas_index = lru_get( & region.state, glyph.lru_code ) if ctx.temp_codepoint_seen_num <= i32(cap(ctx.temp_codepoint_seen)) @@ -569,30 +555,54 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context, } profile_end() - profile_begin("to_cache: font parser shape generation") + profile_begin("font parser shape generation") for id, index in sub_slice(to_cache) { error : Allocator_Error glyph_pack[id].shape, error = parser_get_glyph_shape(entry.parser_info, glyph_pack[id].index) - // assert(error == .None) + assert(error == .None) + } + for id, index in sub_slice(oversized) { + error : Allocator_Error + glyph_pack[id].shape, error = parser_get_glyph_shape(entry.parser_info, glyph_pack[id].index) + assert(error == .None) } profile_end() profile_begin("transform math") + for id, index in sub_slice(cached) + { + glyph := & glyph_pack[id] + quad := & glyph.cached_draw_quad + quad.dst_pos = glyph.position + glyph.bounds_scaled.p0 * target_scale + quad.dst_scale = glyph.scale * target_scale + quad.src_scale = glyph.scale + quad.src_pos = glyph.region_pos + to_text_space( & quad.src_pos, & quad.src_scale, atlas_size ) + } for id, index in sub_slice(to_cache) { - transform := & glyph_pack[id].draw_transform - transform.scale = glyph_buffer.over_sample * entry.size_scale - transform.translate = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding + glyph := & glyph_pack[id] + quad := & glyph.cached_draw_quad + quad.dst_pos = glyph.position + glyph.bounds_scaled.p0 * target_scale + quad.dst_scale = glyph.scale * target_scale + quad.src_scale = glyph.scale + quad.src_pos = glyph.region_pos + to_text_space( & quad.src_pos, & quad.src_scale, atlas_size ) + + transform := & glyph.draw_transform + transform.scale = glyph_buffer.over_sample * entry.size_scale + transform.pos = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding } for id, index in sub_slice(oversized) { - transform := & glyph_pack[id].draw_transform - transform.scale = glyph_buffer.over_sample * entry.size_scale - transform.translate = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding + transform := & glyph_pack[id].draw_transform + transform.scale = glyph_buffer.over_sample * entry.size_scale + transform.pos = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding } profile_end() profile_begin("to_cache: caching to atlas") + for id, index in sub_slice(to_cache) { glyph := glyph_pack[id] @@ -616,39 +626,77 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context, glyph.lru_code, glyph.atlas_index, entry, - glyph.over_sample + glyph.over_sample, + + // glyph.index, + // entry.parser_info, ) mark_batch_codepoint_seen(ctx, glyph.lru_code) } + reset_batch_codepoint_state( ctx ) flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.batch_x) + profile_end() - for id, index in sub_slice(to_cache) - { - parser_free_shape(entry.parser_info, glyph_pack[id].shape) - } - generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(to_cache), atlas_size, entry.size_scale, ctx.colour, position, scale ) - generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(cached), atlas_size, entry.size_scale, ctx.colour, position, scale ) + generate_cached_draw_list :: #force_inline proc (draw_list : ^Draw_List, glyph_pack : #soa[]Glyph_Pack_Entry, sub_pack : []i32, colour : Colour ) + { + profile(#procedure) + call := Draw_Call_Default + call.pass = .Target + call.colour = colour + for id, index in sub_pack + { + profile("glyph") + call.start_index = u32(len(draw_list.indices)) + + quad := glyph_pack[id].cached_draw_quad + blit_quad(draw_list, + quad.dst_pos, quad.dst_pos + quad.dst_scale, + quad.src_pos, quad.src_pos + quad.src_scale + ) + call.end_index = u32(len(draw_list.indices)) + append(& draw_list.calls, call) + } + } + generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(to_cache), ctx.colour ) + generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(cached), ctx.colour ) + + reset_batch_codepoint_state( ctx ) + flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.batch_x) profile_begin("generate oversized glyphs draw_list") for id, index in sub_slice(oversized) { glyph := glyph_pack[id] generate_oversized_draw_list(ctx, + glyph_buffer.draw_padding, glyph_buffer_size, - entry, glyph.index, glyph.shape, + + entry, + + // glyph.index, + // entry.parser_info, + + glyph.shape, glyph.bounds, glyph.bounds_size, - glyph.over_sample, glyph.translate, scale + glyph.over_sample, glyph.position, target_scale ) } - reset_batch_codepoint_state( ctx ) profile_end() - cursor_pos = position + shaped.end_cursor_pos * scale + reset_batch_codepoint_state( ctx ) + flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.batch_x) + + profile_begin("font parser shape cleanup") + for id, index in sub_slice(oversized) do parser_free_shape(entry.parser_info, glyph_pack[id].shape) + for id, index in sub_slice(to_cache) do parser_free_shape(entry.parser_info, glyph_pack[id].shape) + profile_end() + + cursor_pos = position + shaped.end_cursor_pos * target_scale return } diff --git a/code/font/vefontcache/parser.odin b/code/font/vefontcache/parser.odin index 3fcd135..6961131 100644 --- a/code/font/vefontcache/parser.odin +++ b/code/font/vefontcache/parser.odin @@ -220,7 +220,7 @@ parser_get_font_vertical_metrics :: #force_inline proc "contextless" ( font : Pa return } -parser_get_bounds :: #force_inline proc "contextless" ( font : Parser_Font_Info, glyph_index : Glyph ) -> (bounds : Glyph_Bounds) +parser_get_bounds :: #force_inline proc "contextless" ( font : Parser_Font_Info, glyph_index : Glyph ) -> (bounds : Range2) { profile(#procedure) diff --git a/code/font/vefontcache/vefontcache.odin b/code/font/vefontcache/vefontcache.odin index e7174a2..227d4e7 100644 --- a/code/font/vefontcache/vefontcache.odin +++ b/code/font/vefontcache/vefontcache.odin @@ -473,6 +473,7 @@ draw_text :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : str profile(#procedure) assert( ctx != nil ) assert( font >= 0 && int(font) < len(ctx.entries) ) + assert( len(text_utf8) > 0 ) ctx.cursor_pos = {} @@ -482,8 +483,6 @@ draw_text :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : str entry := ctx.entries[ font ] - - shape := shaper_shape_text_cached( ctx, font, text_utf8, entry, shaper_shape_text_uncached_advanced ) ctx.cursor_pos = generate_shape_draw_list( ctx, entry, shape, position, scale, ctx.snap_width, ctx.snap_height ) return true @@ -494,6 +493,7 @@ draw_text_no_snap :: #force_inline proc( ctx : ^Context, font : Font_ID, text_ut profile(#procedure) assert( ctx != nil ) assert( font >= 0 && int(font) < len(ctx.entries) ) + assert( len(text_utf8) > 0 ) ctx.cursor_pos = {} @@ -599,12 +599,16 @@ get_font_vertical_metrics :: #force_inline proc ( ctx : ^Context, font : Font_ID shape_text_latin :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : string, allocator := context.allocator ) -> Shaped_Text { + profile(#procedure) + assert( len(text_utf8) > 0 ) entry := ctx.entries[ font ] return shaper_shape_text_cached( ctx, font, text_utf8, entry, shaper_shape_from_text_latin ) } shape_text_advanced :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : string ) -> Shaped_Text { + profile(#procedure) + assert( len(text_utf8) > 0 ) entry := ctx.entries[ font ] return shaper_shape_text_cached( ctx, font, text_utf8, entry, shaper_shape_text_uncached_advanced ) } diff --git a/code/sectr/engine/render.odin b/code/sectr/engine/render.odin index 7b7444a..d058ebb 100644 --- a/code/sectr/engine/render.odin +++ b/code/sectr/engine/render.odin @@ -642,10 +642,12 @@ render_ui_via_box_list :: proc( box_list : []UI_RenderBoxInfo, text_list : []UI_ entry := text_list[text_id] font := entry.font.key != 0 ? entry.font : default_font - text_enqueued = true text_layer_done = b32(text_id > 0) && text_list[ text_id - 1 ].layer_signal text_id += 1 + if len(entry.text) == 0 do continue + text_enqueued = true + if cam != nil { // draw_text_shape_pos_extent_zoomed( entry.shape, font, entry.font_size, entry.position, cam_offset, screen_size, screen_size_norm, cam.zoom, entry.color ) draw_text_string_pos_extent_zoomed( entry.text, font, entry.font_size, entry.position, cam_offset, screen_size, screen_size_norm, cam.zoom, entry.color ) diff --git a/code/sectr/font/provider.odin b/code/sectr/font/provider.odin index 36d17fb..47e317f 100644 --- a/code/sectr/font/provider.odin +++ b/code/sectr/font/provider.odin @@ -146,6 +146,13 @@ get_font_vertical_metrics :: #force_inline proc ( font : FontID, font_size := Fo return } +shape_text_cached_latin :: #force_inline proc( text : string, font : FontID, font_size := Font_Use_Default_Size, scalar : f32 ) -> ShapedText +{ + ve_id, size := font_provider_resolve_draw_id( font, font_size * scalar ) + shape := ve.shape_text_latin( & get_state().font_provider_ctx.ve_ctx, ve_id, text ) + return shape +} + shape_text_cached :: #force_inline proc( text : string, font : FontID, font_size := Font_Use_Default_Size, scalar : f32 ) -> ShapedText { ve_id, size := font_provider_resolve_draw_id( font, font_size * scalar ) diff --git a/code/sectr/ui/core/base.odin b/code/sectr/ui/core/base.odin index 6dcf6d9..dad6323 100644 --- a/code/sectr/ui/core/base.odin +++ b/code/sectr/ui/core/base.odin @@ -277,16 +277,16 @@ ui_graph_build_end :: proc( ui : ^UI_State ) if ! current.computed.fresh { if len(current.text.str) > 0 { - app_window := get_state().app_window - screen_extent := app_window.extent - screen_size := screen_extent * 2 - screen_size_norm := 1 / screen_size + // app_window := get_state().app_window + // screen_extent := app_window.extent + // screen_size := screen_extent * 2 + // screen_size_norm := 1 / screen_size font_size_screen_scalar := app_config().font_size_screen_scalar // over_sample : f32 = f32(get_state().config.font_size_canvas_scalar) - current.computed.text_shape = shape_text_cached( current.text.str, current.style.font, current.layout.font_size, 1.0 ) + current.computed.text_shape = shape_text_cached_latin( current.text.str, current.style.font, current.layout.font_size, 1.0 ) } ui_box_compute_layout( current ) } diff --git a/code/sectr/ui/core/layout_compute.odin b/code/sectr/ui/core/layout_compute.odin index cd2ed72..d8e3570 100644 --- a/code/sectr/ui/core/layout_compute.odin +++ b/code/sectr/ui/core/layout_compute.odin @@ -73,8 +73,6 @@ ui_box_compute_layout :: proc( box : ^UI_Box, text_size : Vec2 if len(box.text.str) > 0 { - - text_size = computed.text_shape.size // if layout.font_size == computed.text_size.y { // text_size = computed.text_size diff --git a/scripts/build.ps1 b/scripts/build.ps1 index 99726a2..ec895c5 100644 --- a/scripts/build.ps1 +++ b/scripts/build.ps1 @@ -201,10 +201,10 @@ push-location $path_root # $build_args += $flag_micro_architecture_native $build_args += $flag_use_separate_modules $build_args += $flag_thread_count + $CoreCount_Physical - $build_args += $flag_optimize_none + # $build_args += $flag_optimize_none # $build_args += $flag_optimize_minimal # $build_args += $flag_optimize_speed - # $build_args += $falg_optimize_aggressive + $build_args += $falg_optimize_aggressive $build_args += $flag_debug $build_args += $flag_pdb_name + $pdb $build_args += $flag_subsystem + 'windows' diff --git a/toolchain/Odin b/toolchain/Odin new file mode 160000 index 0000000..aa8bc79 --- /dev/null +++ b/toolchain/Odin @@ -0,0 +1 @@ +Subproject commit aa8bc79d342b5de5ad597577db8bb65a46119114