more perf improves for VEFontCache

This commit is contained in:
Edward R. Gonzalez 2025-01-03 01:25:05 -05:00
parent 5e0afd5b7b
commit b066b0de3a
10 changed files with 171 additions and 111 deletions

View File

@ -100,7 +100,7 @@ check_and_reserve_slot_in_atlas :: #force_inline proc( ctx : Context, glyph_inde
) -> (found, should_cache : b8 )
{
profile(#procedure)
// assert( glyph_index != -1 )
assert( glyph_index != -1 )
if ctx.temp_codepoint_seen_num > i32(cap(ctx.temp_codepoint_seen)) do return
@ -113,7 +113,7 @@ check_and_reserve_slot_in_atlas :: #force_inline proc( ctx : Context, glyph_inde
next_evict_codepoint := lru_get_next_evicted( region.state )
success : bool
found, success = ctx.temp_codepoint_seen[next_evict_codepoint]
// assert(success != false)
assert(success != false)
if (found) {
return
}

View File

@ -11,18 +11,25 @@ Vertex :: struct {
}
Transform :: struct {
translate : Vec2,
scale : Vec2,
pos : Vec2,
scale : Vec2,
}
Glyph_Bounds :: struct {
Range2 :: struct {
p0, p1 : Vec2,
}
Glyph_Bounds_Mat :: matrix[2, 2] f32
Glyph_Pack_Entry :: struct #packed {
translate : Vec2,
Glyph_Draw_Quad :: struct {
dst_pos : Vec2,
dst_scale : Vec2,
src_pos : Vec2,
src_scale : Vec2,
}
Glyph_Pack_Entry :: struct {
position : Vec2,
index : Glyph,
lru_code : u64,
@ -35,15 +42,16 @@ Glyph_Pack_Entry :: struct #packed {
shape : Parser_Glyph_Shape,
bounds : Glyph_Bounds,
bounds : Range2,
bounds_scaled : Range2,
bounds_size : Vec2,
bounds_size_scaled : Vec2,
over_sample : Vec2,
scale : Vec2,
draw_transform : Transform,
// cache_draw_scale : Vec2,
// cache_draw_translate : Vec2,
draw_transform : Transform,
cached_draw_quad : Glyph_Draw_Quad,
// shape_id : i32,
}
@ -169,14 +177,18 @@ construct_filled_path :: #force_inline proc( draw_list : ^Draw_List, outside_poi
}
generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context,
// glyph_id : Glyph,
// parser_info : Parser_Font_Info,
glyph_shape : Parser_Glyph_Shape,
curve_quality : f32,
bounds : Glyph_Bounds,
bounds : Range2,
scale, translate : Vec2
) -> b32
{
profile(#procedure)
// glyph_shape, error := parser_get_glyph_shape( parser_info, glyph_id )
outside := Vec2{bounds.p0.x - 21, bounds.p0.y - 33}
draw := Draw_Call_Default
@ -187,7 +199,7 @@ generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context,
clear(path)
step := 1.0 / curve_quality
for edge in glyph_shape do #partial switch edge.type
for edge, index in glyph_shape do #partial switch edge.type
{
case .Move:
if len(path) > 0 {
@ -221,6 +233,9 @@ generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context,
alpha := index * step
append( path, Vertex { pos = eval_point_on_bezier4(p0, p1, p2, p3, alpha) } )
}
case:
// assert(false, "WTF")
}
if len(path) > 0 {
@ -231,6 +246,8 @@ generate_glyph_pass_draw_list :: #force_inline proc(ctx : ^Context,
if draw.end_index > draw.start_index {
append( & ctx.draw_list.calls, draw)
}
// parser_free_shape(parser_info, glyph_shape)
return true
}
@ -248,7 +265,7 @@ cache_glyph_to_atlas :: #force_no_inline proc ( ctx : ^Context,
glyph_shape : Parser_Glyph_Shape,
bounds : Glyph_Bounds, // -> generate_glyph_pass_draw_list
bounds : Range2, // -> generate_glyph_pass_draw_list
bounds_size : Vec2,
@ -259,7 +276,11 @@ cache_glyph_to_atlas :: #force_no_inline proc ( ctx : ^Context,
entry : Entry,
// region_kind : Atlas_Region_Kind,
// region : ^Atlas_Region,
over_sample : Vec2
over_sample : Vec2,
// glyph_id : Glyph,
// parser_info : Parser_Font_Info,
)
{
profile(#procedure)
@ -322,27 +343,35 @@ cache_glyph_to_atlas :: #force_no_inline proc ( ctx : ^Context,
screen_space_translate := buf_transform.translate
screen_space_translate := buf_transform.pos
screen_space_scale := buf_transform.scale
screen_space_translate.x = (buf_transform.translate.x + batch_x)
screen_space_translate.x = (buf_transform.pos.x + batch_x)
glyph_buf_Batch_x^ += i32(buffer_x_allocation)
to_screen_space( & screen_space_translate, & screen_space_scale, glyph_buffer_size )
// Render glyph to glyph render target (FBO)
generate_glyph_pass_draw_list( ctx, glyph_shape, entry.curve_quality, bounds, screen_space_scale, screen_space_translate )
generate_glyph_pass_draw_list( ctx,
// glyph_id,
// parser_info,
glyph_shape,
entry.curve_quality, bounds, screen_space_scale, screen_space_translate )
}
generate_oversized_draw_list :: #force_no_inline proc( ctx : ^Context,
glyph_padding : f32,
glyph_buffer_size : Vec2,
entry : Entry,
glyph : Glyph,
// glyph_id : Glyph,
// parser_info : Parser_Font_Info,
glyph_shape : Parser_Glyph_Shape,
bounds : Glyph_Bounds, // -> generate_glyph_pass_draw_list
bounds : Range2, // -> generate_glyph_pass_draw_list
bounds_size : Vec2,
over_sample, position, scale : Vec2 )
over_sample, position, scale : Vec2
)
{
profile(#procedure)
// Draw un-antialiased glyph to draw_buffer
@ -350,7 +379,12 @@ generate_oversized_draw_list :: #force_no_inline proc( ctx : ^Context,
glyph_draw_translate := -1 * bounds.p0 * glyph_draw_scale + glyph_padding
to_screen_space( & glyph_draw_translate, & glyph_draw_scale, glyph_buffer_size )
generate_glyph_pass_draw_list( ctx, glyph_shape, entry.curve_quality, bounds, glyph_draw_scale, glyph_draw_translate )
generate_glyph_pass_draw_list( ctx,
// glyph_id,
// parser_info,
glyph_shape,
entry.curve_quality, bounds, glyph_draw_scale, glyph_draw_translate )
bounds_scaled := bounds_size * entry.size_scale
@ -393,60 +427,11 @@ generate_oversized_draw_list :: #force_no_inline proc( ctx : ^Context,
append( & ctx.draw_list.calls, ..calls[:] )
}
generate_cached_draw_list :: proc (draw_list : ^Draw_List, glyph_pack : #soa[]Glyph_Pack_Entry, sub_pack : []i32,
atlas_size : Vec2,
glyph_size_scale : f32,
colour : Colour,
position : Vec2,
scale : Vec2
)
{
profile(#procedure)
call := Draw_Call_Default
call.pass = .Target
call.colour = colour
for id, index in sub_pack
{
glyph := glyph_pack[id]
profile("cached")
bounds_0_scaled := ceil(glyph.bounds.p0 * glyph_size_scale - 0.5 )
dst_pos := glyph.translate + bounds_0_scaled * scale
dst_scale := glyph.scale * scale
src_pos := glyph.region_pos
to_text_space( & src_pos, & glyph.scale, atlas_size )
call.start_index = u32(len(draw_list.indices))
blit_quad(draw_list,
dst_pos, dst_pos + dst_scale,
src_pos, src_pos + glyph.scale )
call.end_index = u32(len(draw_list.indices))
append(& draw_list.calls, call)
}
}
// @(require_results)
append_no_bounds_check :: proc "contextless" (array: ^[dynamic]i32, value: i32) -> (n: int) {
raw := transmute(^runtime.Raw_Dynamic_Array)array
if raw.len >= raw.cap {
return 0
}
array[raw.len] = value
raw.len += 1
return raw.len
}
generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context,
entry : Entry,
shaped : Shaped_Text,
position, scale : Vec2,
snap_width, snap_height : f32
entry : Entry,
shaped : Shaped_Text,
position, target_scale : Vec2,
snap_width, snap_height : f32
) -> (cursor_pos : Vec2) #no_bounds_check
{
profile(#procedure)
@ -490,7 +475,7 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context,
profile_begin("translate")
for & glyph, index in glyph_pack
{
glyph.translate = position + (shaped.positions[index]) * scale
glyph.position = position + (shaped.positions[index]) * target_scale
}
profile_end()
@ -501,8 +486,9 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context,
}
for & glyph, index in glyph_pack
{
glyph.bounds = parser_get_bounds( entry.parser_info, glyph.index )
glyph.bounds_size = glyph.bounds.p1 - glyph.bounds.p0
glyph.bounds = parser_get_bounds( entry.parser_info, glyph.index )
glyph.bounds_scaled = { glyph.bounds.p0 * entry.size_scale, glyph.bounds.p1 * entry.size_scale }
glyph.bounds_size = glyph.bounds.p1 - glyph.bounds.p0
}
for & glyph, index in glyph_pack
{
@ -535,7 +521,7 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context,
continue
}
region := atlas.regions[glyph.region_kind]
region := atlas.regions[glyph.region_kind]
glyph.atlas_index = lru_get( & region.state, glyph.lru_code )
if ctx.temp_codepoint_seen_num <= i32(cap(ctx.temp_codepoint_seen))
@ -569,30 +555,54 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context,
}
profile_end()
profile_begin("to_cache: font parser shape generation")
profile_begin("font parser shape generation")
for id, index in sub_slice(to_cache) {
error : Allocator_Error
glyph_pack[id].shape, error = parser_get_glyph_shape(entry.parser_info, glyph_pack[id].index)
// assert(error == .None)
assert(error == .None)
}
for id, index in sub_slice(oversized) {
error : Allocator_Error
glyph_pack[id].shape, error = parser_get_glyph_shape(entry.parser_info, glyph_pack[id].index)
assert(error == .None)
}
profile_end()
profile_begin("transform math")
for id, index in sub_slice(cached)
{
glyph := & glyph_pack[id]
quad := & glyph.cached_draw_quad
quad.dst_pos = glyph.position + glyph.bounds_scaled.p0 * target_scale
quad.dst_scale = glyph.scale * target_scale
quad.src_scale = glyph.scale
quad.src_pos = glyph.region_pos
to_text_space( & quad.src_pos, & quad.src_scale, atlas_size )
}
for id, index in sub_slice(to_cache)
{
transform := & glyph_pack[id].draw_transform
transform.scale = glyph_buffer.over_sample * entry.size_scale
transform.translate = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding
glyph := & glyph_pack[id]
quad := & glyph.cached_draw_quad
quad.dst_pos = glyph.position + glyph.bounds_scaled.p0 * target_scale
quad.dst_scale = glyph.scale * target_scale
quad.src_scale = glyph.scale
quad.src_pos = glyph.region_pos
to_text_space( & quad.src_pos, & quad.src_scale, atlas_size )
transform := & glyph.draw_transform
transform.scale = glyph_buffer.over_sample * entry.size_scale
transform.pos = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding
}
for id, index in sub_slice(oversized)
{
transform := & glyph_pack[id].draw_transform
transform.scale = glyph_buffer.over_sample * entry.size_scale
transform.translate = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding
transform := & glyph_pack[id].draw_transform
transform.scale = glyph_buffer.over_sample * entry.size_scale
transform.pos = -1 * glyph_pack[id].bounds.p0 * transform.scale + atlas.glyph_padding
}
profile_end()
profile_begin("to_cache: caching to atlas")
for id, index in sub_slice(to_cache)
{
glyph := glyph_pack[id]
@ -616,39 +626,77 @@ generate_shape_draw_list :: #force_no_inline proc( ctx : ^Context,
glyph.lru_code,
glyph.atlas_index,
entry,
glyph.over_sample
glyph.over_sample,
// glyph.index,
// entry.parser_info,
)
mark_batch_codepoint_seen(ctx, glyph.lru_code)
}
reset_batch_codepoint_state( ctx )
flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.batch_x)
profile_end()
for id, index in sub_slice(to_cache)
{
parser_free_shape(entry.parser_info, glyph_pack[id].shape)
}
generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(to_cache), atlas_size, entry.size_scale, ctx.colour, position, scale )
generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(cached), atlas_size, entry.size_scale, ctx.colour, position, scale )
generate_cached_draw_list :: #force_inline proc (draw_list : ^Draw_List, glyph_pack : #soa[]Glyph_Pack_Entry, sub_pack : []i32, colour : Colour )
{
profile(#procedure)
call := Draw_Call_Default
call.pass = .Target
call.colour = colour
for id, index in sub_pack
{
profile("glyph")
call.start_index = u32(len(draw_list.indices))
quad := glyph_pack[id].cached_draw_quad
blit_quad(draw_list,
quad.dst_pos, quad.dst_pos + quad.dst_scale,
quad.src_pos, quad.src_pos + quad.src_scale
)
call.end_index = u32(len(draw_list.indices))
append(& draw_list.calls, call)
}
}
generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(to_cache), ctx.colour )
generate_cached_draw_list( draw_list, glyph_pack[:], sub_slice(cached), ctx.colour )
reset_batch_codepoint_state( ctx )
flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.batch_x)
profile_begin("generate oversized glyphs draw_list")
for id, index in sub_slice(oversized)
{
glyph := glyph_pack[id]
generate_oversized_draw_list(ctx,
glyph_buffer.draw_padding,
glyph_buffer_size,
entry, glyph.index, glyph.shape,
entry,
// glyph.index,
// entry.parser_info,
glyph.shape,
glyph.bounds,
glyph.bounds_size,
glyph.over_sample, glyph.translate, scale
glyph.over_sample, glyph.position, target_scale
)
}
reset_batch_codepoint_state( ctx )
profile_end()
cursor_pos = position + shaped.end_cursor_pos * scale
reset_batch_codepoint_state( ctx )
flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.batch_x)
profile_begin("font parser shape cleanup")
for id, index in sub_slice(oversized) do parser_free_shape(entry.parser_info, glyph_pack[id].shape)
for id, index in sub_slice(to_cache) do parser_free_shape(entry.parser_info, glyph_pack[id].shape)
profile_end()
cursor_pos = position + shaped.end_cursor_pos * target_scale
return
}

View File

@ -220,7 +220,7 @@ parser_get_font_vertical_metrics :: #force_inline proc "contextless" ( font : Pa
return
}
parser_get_bounds :: #force_inline proc "contextless" ( font : Parser_Font_Info, glyph_index : Glyph ) -> (bounds : Glyph_Bounds)
parser_get_bounds :: #force_inline proc "contextless" ( font : Parser_Font_Info, glyph_index : Glyph ) -> (bounds : Range2)
{
profile(#procedure)

View File

@ -473,6 +473,7 @@ draw_text :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : str
profile(#procedure)
assert( ctx != nil )
assert( font >= 0 && int(font) < len(ctx.entries) )
assert( len(text_utf8) > 0 )
ctx.cursor_pos = {}
@ -482,8 +483,6 @@ draw_text :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : str
entry := ctx.entries[ font ]
shape := shaper_shape_text_cached( ctx, font, text_utf8, entry, shaper_shape_text_uncached_advanced )
ctx.cursor_pos = generate_shape_draw_list( ctx, entry, shape, position, scale, ctx.snap_width, ctx.snap_height )
return true
@ -494,6 +493,7 @@ draw_text_no_snap :: #force_inline proc( ctx : ^Context, font : Font_ID, text_ut
profile(#procedure)
assert( ctx != nil )
assert( font >= 0 && int(font) < len(ctx.entries) )
assert( len(text_utf8) > 0 )
ctx.cursor_pos = {}
@ -599,12 +599,16 @@ get_font_vertical_metrics :: #force_inline proc ( ctx : ^Context, font : Font_ID
shape_text_latin :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : string, allocator := context.allocator ) -> Shaped_Text
{
profile(#procedure)
assert( len(text_utf8) > 0 )
entry := ctx.entries[ font ]
return shaper_shape_text_cached( ctx, font, text_utf8, entry, shaper_shape_from_text_latin )
}
shape_text_advanced :: #force_inline proc( ctx : ^Context, font : Font_ID, text_utf8 : string ) -> Shaped_Text
{
profile(#procedure)
assert( len(text_utf8) > 0 )
entry := ctx.entries[ font ]
return shaper_shape_text_cached( ctx, font, text_utf8, entry, shaper_shape_text_uncached_advanced )
}

View File

@ -642,10 +642,12 @@ render_ui_via_box_list :: proc( box_list : []UI_RenderBoxInfo, text_list : []UI_
entry := text_list[text_id]
font := entry.font.key != 0 ? entry.font : default_font
text_enqueued = true
text_layer_done = b32(text_id > 0) && text_list[ text_id - 1 ].layer_signal
text_id += 1
if len(entry.text) == 0 do continue
text_enqueued = true
if cam != nil {
// draw_text_shape_pos_extent_zoomed( entry.shape, font, entry.font_size, entry.position, cam_offset, screen_size, screen_size_norm, cam.zoom, entry.color )
draw_text_string_pos_extent_zoomed( entry.text, font, entry.font_size, entry.position, cam_offset, screen_size, screen_size_norm, cam.zoom, entry.color )

View File

@ -146,6 +146,13 @@ get_font_vertical_metrics :: #force_inline proc ( font : FontID, font_size := Fo
return
}
shape_text_cached_latin :: #force_inline proc( text : string, font : FontID, font_size := Font_Use_Default_Size, scalar : f32 ) -> ShapedText
{
ve_id, size := font_provider_resolve_draw_id( font, font_size * scalar )
shape := ve.shape_text_latin( & get_state().font_provider_ctx.ve_ctx, ve_id, text )
return shape
}
shape_text_cached :: #force_inline proc( text : string, font : FontID, font_size := Font_Use_Default_Size, scalar : f32 ) -> ShapedText
{
ve_id, size := font_provider_resolve_draw_id( font, font_size * scalar )

View File

@ -277,16 +277,16 @@ ui_graph_build_end :: proc( ui : ^UI_State )
if ! current.computed.fresh
{
if len(current.text.str) > 0 {
app_window := get_state().app_window
screen_extent := app_window.extent
screen_size := screen_extent * 2
screen_size_norm := 1 / screen_size
// app_window := get_state().app_window
// screen_extent := app_window.extent
// screen_size := screen_extent * 2
// screen_size_norm := 1 / screen_size
font_size_screen_scalar := app_config().font_size_screen_scalar
// over_sample : f32 = f32(get_state().config.font_size_canvas_scalar)
current.computed.text_shape = shape_text_cached( current.text.str, current.style.font, current.layout.font_size, 1.0 )
current.computed.text_shape = shape_text_cached_latin( current.text.str, current.style.font, current.layout.font_size, 1.0 )
}
ui_box_compute_layout( current )
}

View File

@ -73,8 +73,6 @@ ui_box_compute_layout :: proc( box : ^UI_Box,
text_size : Vec2
if len(box.text.str) > 0
{
text_size = computed.text_shape.size
// if layout.font_size == computed.text_size.y {
// text_size = computed.text_size

View File

@ -201,10 +201,10 @@ push-location $path_root
# $build_args += $flag_micro_architecture_native
$build_args += $flag_use_separate_modules
$build_args += $flag_thread_count + $CoreCount_Physical
$build_args += $flag_optimize_none
# $build_args += $flag_optimize_none
# $build_args += $flag_optimize_minimal
# $build_args += $flag_optimize_speed
# $build_args += $falg_optimize_aggressive
$build_args += $falg_optimize_aggressive
$build_args += $flag_debug
$build_args += $flag_pdb_name + $pdb
$build_args += $flag_subsystem + 'windows'

1
toolchain/Odin Submodule

@ -0,0 +1 @@
Subproject commit aa8bc79d342b5de5ad597577db8bb65a46119114