WIP - VeFontcache: Testing performance of using an SOA setup for processing shape's glyphs
This commit is contained in:
@ -125,3 +125,29 @@ decide_codepoint_region :: #force_inline proc (ctx : ^Context, entry : ^Entry, g
return .None, nil, {}
// Grab an atlas LRU cache slot.
atlas_reserve_slot :: #force_inline proc ( region : ^Atlas_Region, lru_code : u64 ) -> (atlas_index : i32)
if region.next_idx < region.state.capacity
evicted := lru_put( & region.state, lru_code, region.next_idx )
atlas_index = region.next_idx
region.next_idx += 1
assert( evicted == lru_code )
next_evict_codepoint := lru_get_next_evicted( & region.state )
assert( next_evict_codepoint != 0xFFFFFFFFFFFFFFFF )
atlas_index = lru_peek( & region.state, next_evict_codepoint, must_find = true )
assert( atlas_index != -1 )
evicted := lru_put( & region.state, lru_code, atlas_index )
assert( evicted == next_evict_codepoint )
assert( lru_get( & region.state, lru_code ) != - 1 )
@ -352,31 +352,9 @@ cache_glyph_to_atlas :: proc( ctx : ^Context,
// E region is special case and not cached to atlas.
if region_kind == .None || region_kind == .E do return
// Grab an atlas LRU cache slot.
atlas_index := atlas_index
if atlas_index == -1
if region.next_idx < region.state.capacity
evicted := lru_put( & region.state, lru_code, region.next_idx )
atlas_index = region.next_idx
region.next_idx += 1
assert( evicted == lru_code )
next_evict_codepoint := lru_get_next_evicted( & region.state )
assert( next_evict_codepoint != 0xFFFFFFFFFFFFFFFF )
atlas_index = lru_peek( & region.state, next_evict_codepoint, must_find = true )
assert( atlas_index != -1 )
evicted := lru_put( & region.state, lru_code, atlas_index )
assert( evicted == next_evict_codepoint )
assert( lru_get( & region.state, lru_code ) != - 1 )
// TODO(Ed): Try to make sure this is resolve always
if atlas_index == -1 do atlas_index = atlas_reserve_slot( region, lru_code )
atlas := & ctx.atlas
glyph_buffer := & ctx.glyph_buffer
@ -462,34 +440,39 @@ check_glyph_in_atlas :: #force_inline proc( ctx : ^Context, font : Font_ID, entr
region_kind : Atlas_Region_Kind,
region : ^Atlas_Region,
over_sample : Vec2
) -> b32
) -> (seen, should_cache : b8)
assert( glyph_index != -1 )
// E region can't batch
if region_kind == .E || region_kind == .None do return false
if ctx.temp_codepoint_seen_num > i32(cap(ctx.temp_codepoint_seen)) do return false
if region_kind == .E || region_kind == .None do return
if ctx.temp_codepoint_seen_num > i32(cap(ctx.temp_codepoint_seen)) do return
if atlas_index == - 1
if region.next_idx > region.state.capacity {
// Check to see if we reached capacity for the atlas
if region.next_idx > region.state.capacity
// We will evict LRU. We must predict which LRU will get evicted, and if it's something we've seen then we need to take slowpath and flush batch.
next_evict_codepoint := lru_get_next_evicted( & region.state )
seen, success := ctx.temp_codepoint_seen[next_evict_codepoint]
success : bool
seen, success = ctx.temp_codepoint_seen[next_evict_codepoint]
assert(success != false)
if (seen) {
return false
should_cache = true
cache_glyph_to_atlas( ctx, font, glyph_index, lru_code, atlas_index, entry, region_kind, region, over_sample )
assert( lru_get( & region.state, lru_code ) != -1 )
mark_batch_codepoint_seen( ctx, lru_code)
return true
seen = true
// ve_fontcache_clear_Draw_List
@ -675,6 +658,18 @@ draw_text_batch :: proc(ctx: ^Context, entry: ^Entry, shaped: ^Shaped_Text,
GlyphPackEntry :: struct {
lru_code : u64,
region : ^Atlas_Region,
over_sample : Vec2,
atlas_index : i32,
index : Glyph,
shape_id : i32,
region_kind : Atlas_Region_Kind,
in_atlas : b8,
should_cache : b8,
// Helper for draw_text, all raw text content should be confirmed to be either formatting or visible shapes before getting cached.
draw_text_shape :: #force_inline proc( ctx : ^Context,
font : Font_ID,
@ -685,29 +680,83 @@ draw_text_shape :: #force_inline proc( ctx : ^Context,
) -> (cursor_pos : Vec2) #no_bounds_check
batch_start_idx : i32 = 0
for index : i32 = 0; index < cast(i32) len(shaped.glyphs); index += 1
glyph_pack, pack_alloc_eror := make_soa(#soa[]GlyphPackEntry, len(shaped.glyphs), allocator = context.temp_allocator)
profile_begin("SOA glyph pack processing")
for & glyph, index in glyph_pack
glyph_index := shaped.glyphs[ index ]
if is_empty( ctx, entry, glyph_index ) do continue
glyph.shape_id = cast(i32) index
glyph.index = shaped.glyphs[ index ]
// for & glyph, index in glyph_pack
// {
// glyph.region_kind,
// glyph.region,
// glyph.over_sample = decide_codepoint_region( ctx, entry, glyph.index )
// }
// for & glyph, index in glyph_pack
// {
// glyph.lru_code = font_glyph_lru_code(entry.id, glyph.index)
// }
// for & glyph, index in glyph_pack
// {
// glyph.atlas_index = -1
// if glyph.region_kind != .E do glyph.atlas_index = lru_get( & glyph.region.state, glyph.lru_code )
// }
// for & glyph, index in glyph_pack
// {
// glyph.in_atlas, glyph.should_cache = check_glyph_in_atlas( ctx, font, entry, glyph.index, glyph.lru_code, glyph.atlas_index, glyph.region_kind, glyph.region, glyph.over_sample )
// }
// for & glyph, index in glyph_pack
// {
// if ! glyph.should_cache do continue
// cache_glyph_to_atlas(ctx, font, glyph.index, glyph.lru_code, glyph.atlas_index, entry, glyph.region_kind, glyph.region, glyph.over_sample)
// }
// for & glyph, index in glyph_pack
// {
// if ! glyph.in_atlas do continue
region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index )
lru_code := font_glyph_lru_code(entry.id, glyph_index)
atlas_index := cast(i32) -1
// assert( lru_get( & glyph.region.state, glyph.lru_code ) != -1 )
// mark_batch_codepoint_seen( ctx, glyph.lru_code)
// }
if region_kind != .E do atlas_index = lru_get( & region.state, lru_code )
if check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue
// Prepare uncached glyphs for caching
batch_start_idx : i32 = 0
for & glyph, index in glyph_pack
// if is_glyph_empty( ctx, entry, glyph.index ) do continue
glyph.region_kind, glyph.region, glyph.over_sample = decide_codepoint_region( ctx, entry, glyph.index )
glyph.lru_code = font_glyph_lru_code(entry.id, glyph.index)
glyph.atlas_index = -1
if glyph.region_kind != .E do glyph.atlas_index = lru_get( & glyph.region.state, glyph.lru_code )
glyph.in_atlas, glyph.should_cache = check_glyph_in_atlas( ctx, font, entry, glyph.index, glyph.lru_code, glyph.atlas_index, glyph.region_kind, glyph.region, glyph.over_sample )
// if glyph.should_cache {
// cache_glyph_to_atlas(ctx, font, glyph.index, glyph.lru_code, glyph.atlas_index, entry, glyph.region_kind, glyph.region, glyph.over_sample)
// glyph.atlas_index = atlas_reserve_slot(glyph.region, glyph.lru_code)
// }
if glyph.in_atlas {
// assert( lru_get( & glyph.region.state, glyph.lru_code ) != -1 )
// mark_batch_codepoint_seen( ctx, glyph.lru_code)
// We can no longer directly append the shape as it has missing glyphs in the atlas
// First batch the other cached glyphs
// flush_glyph_buffer_to_atlas(ctx)
draw_text_batch( ctx, entry, shaped, batch_start_idx, index, position, scale, snap_width, snap_height )
draw_text_batch( ctx, entry, shaped, batch_start_idx, glyph.shape_id, position, scale, snap_width, snap_height )
reset_batch_codepoint_state( ctx )
cache_glyph_to_atlas( ctx, font, glyph_index, lru_code, atlas_index, entry, region_kind, region, over_sample )
mark_batch_codepoint_seen( ctx, lru_code)
batch_start_idx = index
cache_glyph_to_atlas( ctx, font, glyph.index, glyph.lru_code, glyph.atlas_index, entry, glyph.region_kind, glyph.region, glyph.over_sample )
mark_batch_codepoint_seen( ctx, glyph.lru_code)
batch_start_idx = 1
draw_text_batch( ctx, entry, shaped, batch_start_idx, cast(i32) len(shaped.glyphs), position, scale, snap_width , snap_height )
@ -731,14 +780,17 @@ draw_text_mono_latin_batch :: #force_inline proc( ctx : ^Context,
for index : i32 = 0; index < cast(i32) len(shaped.glyphs); index += 1
glyph_index := shaped.glyphs[ index ]
if is_empty( ctx, entry, glyph_index ) do continue
if is_glyph_empty( ctx, entry, glyph_index ) do continue
region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index )
lru_code := font_glyph_lru_code(entry.id, glyph_index)
atlas_index := cast(i32) -1
if region_kind != .E do atlas_index = lru_get( & region.state, lru_code )
if check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue
in_atlas, should_cache := check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample )
if in_atlas do continue
if should_cache do cache_glyph_to_atlas(ctx, font, glyph_index, lru_code, atlas_index, entry, region_kind, region, over_sample )
// We can no longer directly append the shape as it has missing glyphs in the atlas
@ -35,6 +35,8 @@ import "core:mem"
arena_init :: mem.arena_init
import "core:slice"
//#region("Proc overload mappings")
append :: proc {
@ -88,6 +90,10 @@ make :: proc {
make_soa :: proc {
resize :: proc {
@ -54,7 +54,7 @@ font_glyph_lru_code :: #force_inline proc "contextless" ( font : Font_ID, glyph_
is_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32
is_glyph_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32
if glyph_index == 0 do return true
if parser_is_glyph_empty( & entry.parser_info, glyph_index ) do return true
@ -76,7 +76,7 @@ shape_text_uncached_advanced :: #force_inline proc( ctx : ^Context, font : Font_
line_gap := f32(line_gap_i32)
line_height := (ascent - descent + line_gap) * entry.size_scale
shaper_shape_from_text( & ctx.shaper_ctx, & entry.shaper_info, output, text_utf8, ascent_i32, descent_i32, line_gap_i32, entry.size, entry.size_scale )
shaper_shape_from_text( & ctx.shaper_ctx, & entry.parser_info, & entry.shaper_info, output, text_utf8, ascent_i32, descent_i32, line_gap_i32, entry.size, entry.size_scale )
shape_text_uncached_latin :: proc( ctx : ^Context, font : Font_ID, text_utf8 : string, entry : ^Entry, output : ^Shaped_Text )
@ -120,8 +120,8 @@ shape_text_uncached_latin :: proc( ctx : ^Context, font : Font_ID, text_utf8 : s
glyph_index := parser_find_glyph_index( & entry.parser_info, codepoint )
is_empty := parser_is_glyph_empty( & entry.parser_info,glyph_index )
if ! is_empty
is_glyph_empty := parser_is_glyph_empty( & entry.parser_info,glyph_index )
if ! is_glyph_empty
append( & output.glyphs, glyph_index)
append( & output.positions, Vec2 {
@ -54,7 +54,7 @@ shaper_unload_font :: proc( ctx : ^Shaper_Info )
if blob != nil do harfbuzz.blob_destroy( blob )
shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Shaper_Info, output :^Shaped_Text, text_utf8 : string,
shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, parser_info : ^Parser_Font_Info, info : ^Shaper_Info, output :^Shaped_Text, text_utf8 : string,
ascent, descent, line_gap : i32, size, size_scale : f32 )
@ -72,7 +72,7 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha
line_height := ((ascent - descent + line_gap) * size_scale)
position : Vec2
shape_run :: #force_inline proc( buffer : harfbuzz.Buffer, script : harfbuzz.Script, font : harfbuzz.Font, output : ^Shaped_Text,
shape_run :: #force_inline proc( parser_info : ^Parser_Font_Info, buffer : harfbuzz.Buffer, script : harfbuzz.Script, font : harfbuzz.Font, output : ^Shaped_Text,
position : ^Vec2, max_line_width: ^f32, line_count: ^int,
ascent, descent, line_gap, size, size_scale: f32,
snap_shape_pos : b32, adv_snap_small_font_threshold : f32 )
@ -115,8 +115,6 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha
(position^) = ceil( position^ )
append( & output.glyphs, glyph_id )
glyph_pos := position^
offset := Vec2 { f32(hb_gposition.x_offset), f32(hb_gposition.y_offset) } * size_scale
glyph_pos += offset
@ -124,7 +122,6 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha
if snap_shape_pos {
glyph_pos = ceil(glyph_pos)
append( & output.positions, glyph_pos)
advance := Vec2 {
f32(hb_gposition.x_advance) * size_scale,
@ -132,6 +129,12 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha
(position^) += advance
(max_line_width^) = max(max_line_width^, position.x)
is_empty := parser_is_glyph_empty(parser_info, glyph_id)
if ! is_empty {
append( & output.glyphs, glyph_id )
append( & output.positions, glyph_pos)
output.end_cursor_pos = position^
@ -159,7 +162,7 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha
// End current run since we've encountered a script change.
shape_run( parser_info,
ctx.hb_buffer, current_script, info.font, output,
& position, & max_line_width, & line_count,
ascent, descent, line_gap, size, size_scale,
@ -170,7 +173,7 @@ shaper_shape_from_text :: #force_inline proc( ctx : ^Shaper_Context, info : ^Sha
// End the last run if needed
shape_run( parser_info,
ctx.hb_buffer, current_script, info.font, output,
& position, & max_line_width, & line_count,
ascent, descent, line_gap, size, size_scale,
@ -36,7 +36,7 @@ Context :: struct {
entries : [dynamic]Entry,
temp_path : [dynamic]Vertex,
temp_codepoint_seen : map[u64]bool,
temp_codepoint_seen : map[u64]b8,
temp_codepoint_seen_num : i32,
snap_width : f32,
@ -147,10 +147,10 @@ startup :: proc( ctx : ^Context, parser_kind : Parser_Kind = .STB_TrueType,
glyph_draw_params := Init_Glyph_Draw_Params_Default,
shape_cache_params := Init_Shape_Cache_Params_Default,
shaper_params := Init_Shaper_Params_Default,
default_curve_quality : u32 = 2,
entires_reserve : u32 = 256,
temp_path_reserve : u32 = 1024,
temp_codepoint_seen_reserve : u32 = 1024,
default_curve_quality : u32 = 8 * 2,
entires_reserve : u32 = 8 * 256,
temp_path_reserve : u32 = 8 * 1024,
temp_codepoint_seen_reserve : u32 = 8 * 1024,
assert( ctx != nil, "Must provide a valid context" )
@ -175,16 +175,16 @@ startup :: proc( ctx : ^Context, parser_kind : Parser_Kind = .STB_TrueType,
temp_path, error = make( [dynamic]Vertex, len = 0, cap = temp_path_reserve )
assert(error == .None, "VEFontCache.init : Failed to allocate temp_path")
temp_codepoint_seen, error = make( map[u64]bool, uint(temp_codepoint_seen_reserve) )
temp_codepoint_seen, error = make( map[u64]b8, uint(temp_codepoint_seen_reserve) )
assert(error == .None, "VEFontCache.init : Failed to allocate temp_path")
draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = 1 * Kilobyte )
draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = 8 * Kilobyte )
assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.vertices")
draw_list.indices, error = make( [dynamic]u32, len = 0, cap = 2 * Kilobyte )
draw_list.indices, error = make( [dynamic]u32, len = 0, cap = 16 * Kilobyte )
assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.indices")
draw_list.calls, error = make( [dynamic]Draw_Call, len = 0, cap = 128 )
draw_list.calls, error = make( [dynamic]Draw_Call, len = 0, cap = 1024 )
assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.calls")
init_atlas_region :: proc( region : ^Atlas_Region, params : Init_Atlas_Params, region_params : Init_Atlas_Region_Params, factor : Vec2i, expected_cap : i32 )
@ -201,10 +201,10 @@ push-location $path_root
# $build_args += $flag_micro_architecture_native
$build_args += $flag_use_separate_modules
$build_args += $flag_thread_count + $CoreCount_Physical
# $build_args += $flag_optimize_none
$build_args += $flag_optimize_none
# $build_args += $flag_optimize_minimal
# $build_args += $flag_optimize_speed
$build_args += $falg_optimize_aggressive
# $build_args += $falg_optimize_aggressive
$build_args += $flag_debug
$build_args += $flag_pdb_name + $pdb
$build_args += $flag_subsystem + 'windows'
Reference in New Issue
Block a user