partially restoring old order to try to identity the regression with blitting to atlas...

This commit is contained in:
Edward R. Gonzalez 2025-01-07 17:52:42 -05:00
parent 6e01c39899
commit 7dee697103
8 changed files with 80 additions and 79 deletions

View File

@ -23,7 +23,8 @@ LRU_Fail_Mask_64 :: 0xFFFFFFFFFFFFFFFF
Pool_ListIter :: i32
// Pool_ListValue :: LRU_Key
Pool_List_Item :: struct( $V_Type : typeid ) #packed {
// Pool_List_Item :: struct( $V_Type : typeid ) #packed {
Pool_List_Item :: struct( $V_Type : typeid ) {
prev : Pool_ListIter,
next : Pool_ListIter,
value : V_Type,

View File

@ -302,7 +302,7 @@ generate_shapes_draw_list :: #force_inline proc ( ctx : ^Context, font : Font_ID
* Resolve glyph bounds and scale
* Resolve atlas region the glyph is associated with
* Segregate the glyphs into three slices: oversized, to_cache, cached.
* If oversized is not necessary for your use case and your hitting a bottle neck, remove it in a derivative procedure.
* If oversized is not necessary for your use case and your hitting a bottleneck, remove it in a derivative procedure.
* You have to to be drawing a px font size > ~140 px for it to trigger.
* The atlas can be scaled with the size_multiplier parameter of startup so that it becomes more irrelevant if processing a larger atlas is a non-issue.
* The segregation will not allow slices to exceed the batch_cache capacity of the glyph_buffer (configurable within startup params)
@ -503,7 +503,7 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text,
Order: Oversized first, then to_cache, then cached.
Oversized and to_cache will both enqueue operations for rendering glyphs to the glyph buffer render target.
The compute section will have operations reguarding how many glyphs they made alloate before a flush must occur.
The compute section will have operations reguarding how many glyphs they may alloate before a flush must occur.
A flush will force one of the following:
* Oversized will have a draw call setup to blit directly from the glyph buffer to the target.
* to_cache will blit the glyphs rendered to the buffer to the atlas.
@ -530,42 +530,16 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
colour := colour
profile_begin("glyph buffer transform & draw quads compute")
for id, index in oversized
for id, index in cached
{
glyph := & glyph_pack[id]
f32_allocated_x := cast(f32) glyph_buffer.allocated_x
// Resolve how much space this glyph will allocate in the buffer
buffer_size := (glyph.bounds_size_scaled + glyph_buffer.draw_padding) * glyph.over_sample
// Allocate a glyph glyph render target region (FBO)
to_allocate_x := buffer_size.x + 2.0
glyph_buffer.allocated_x += i32(to_allocate_x)
// If allocation would exceed buffer's bounds the buffer must be flush before this glyph can be rendered.
glyph.flush_glyph_buffer = i32(f32_allocated_x + to_allocate_x) >= i32(glyph_buffer_size.x)
glyph.buffer_x = f32_allocated_x * f32( i32( ! glyph.flush_glyph_buffer ) )
// Quad to for drawing atlas slot to target
draw_quad := & glyph.draw_quad
glyph_padding := vec2(glyph_buffer.draw_padding)
// Target position (draw_list's target image)
draw_quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0 - glyph_padding) * target_scale
draw_quad.dst_scale = (glyph.bounds_size_scaled + glyph_padding) * target_scale
// The glyph buffer space transform for generate_glyph_pass_draw_list
draw_transform := & glyph.draw_transform
draw_transform.scale = font_scale * glyph.over_sample
draw_transform.pos = -1 * glyph.bounds.p0 * draw_transform.scale + vec2(atlas.glyph_padding)
draw_transform.pos.x += glyph.buffer_x
to_glyph_buffer_space( & draw_transform.pos, & draw_transform.scale, glyph_buffer_size )
draw_quad.src_pos = Vec2 { glyph.buffer_x, 0 }
draw_quad.src_scale = glyph.bounds_size_scaled * glyph.over_sample + glyph_padding
to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, glyph_buffer_size )
glyph := & glyph_pack[id]
quad := & glyph.draw_quad
quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0) * target_scale
quad.dst_scale = (glyph.scale) * target_scale
quad.src_scale = (glyph.scale)
quad.src_pos = (glyph.region_pos)
to_target_space( & quad.src_pos, & quad.src_scale, atlas_size )
}
for id, index in to_cache
{
@ -604,25 +578,51 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
draw_quad.src_pos = (glyph.region_pos)
to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, atlas_size )
}
for id, index in cached
for id, index in oversized
{
// Quad to for drawing atlas slot to target
glyph := & glyph_pack[id]
quad := & glyph.draw_quad
quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0) * target_scale
quad.dst_scale = (glyph.scale) * target_scale
quad.src_scale = (glyph.scale)
quad.src_pos = (glyph.region_pos)
to_target_space( & quad.src_pos, & quad.src_scale, atlas_size )
f32_allocated_x := cast(f32) glyph_buffer.allocated_x
// Resolve how much space this glyph will allocate in the buffer
buffer_size := (glyph.bounds_size_scaled + glyph_buffer.draw_padding) * glyph.over_sample
// Allocate a glyph glyph render target region (FBO)
to_allocate_x := buffer_size.x + 2.0
glyph_buffer.allocated_x += i32(to_allocate_x)
// If allocation would exceed buffer's bounds the buffer must be flush before this glyph can be rendered.
glyph.flush_glyph_buffer = i32(f32_allocated_x + to_allocate_x) >= i32(glyph_buffer_size.x)
glyph.buffer_x = f32_allocated_x * f32( i32( ! glyph.flush_glyph_buffer ) )
// Quad to for drawing atlas slot to target
draw_quad := & glyph.draw_quad
glyph_padding := vec2(glyph_buffer.draw_padding)
// Target position (draw_list's target image)
draw_quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0 - glyph_padding) * target_scale
draw_quad.dst_scale = (glyph.bounds_size_scaled + glyph_padding) * target_scale
// The glyph buffer space transform for generate_glyph_pass_draw_list
draw_transform := & glyph.draw_transform
draw_transform.scale = font_scale * glyph.over_sample
draw_transform.pos = -1 * glyph.bounds.p0 * draw_transform.scale + vec2(atlas.glyph_padding)
draw_transform.pos.x += glyph.buffer_x
to_glyph_buffer_space( & draw_transform.pos, & draw_transform.scale, glyph_buffer_size )
draw_quad.src_pos = Vec2 { glyph.buffer_x, 0 }
draw_quad.src_scale = glyph.bounds_size_scaled * glyph.over_sample + glyph_padding
to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, glyph_buffer_size )
}
profile_end()
profile_begin("generate oversized glyphs draw_list")
if len(oversized) > 0
{
colour.r = max(colour.a, enable_debug_vis_type)
colour.g = max(colour.g, enable_debug_vis_type)
colour.b = colour.b * f32(cast(i32) ! b32(cast(i32) enable_debug_vis_type))
// colour.r = max(colour.a, enable_debug_vis_type)
// colour.g = max(colour.g, enable_debug_vis_type)
// colour.b = colour.b * f32(cast(i32) ! b32(cast(i32) enable_debug_vis_type))
for id, index in oversized {
error : Allocator_Error
glyph_pack[id].shape, error = parser_get_glyph_shape(entry.parser_info, glyph_pack[id].index)
@ -754,22 +754,22 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List,
)
}
profile_begin("generate_cached_draw_list: cached")
colour.r = max(colour.r, 1.0 * enable_debug_vis_type)
colour.g = max(colour.g, 1.0 * enable_debug_vis_type)
colour.b = max(colour.b, 1.0 * enable_debug_vis_type)
generate_cached_draw_list( draw_list, glyph_pack[:], cached, colour )
profile_end()
flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.allocated_x)
for id, index in to_cache do parser_free_shape(entry.parser_info, glyph_pack[id].shape)
profile_begin("generate_cached_draw_list: cached")
// colour.r = max(colour.r, 1.0 * enable_debug_vis_type)
// colour.g = max(colour.g, 1.0 * enable_debug_vis_type)
// colour.b = max(colour.b, 1.0 * enable_debug_vis_type)
generate_cached_draw_list( draw_list, glyph_pack[:], cached, colour )
profile_end()
}
profile_end()
profile_begin("generate_cached_draw_list: to_cache")
colour.r = max(colour.r, 0.80 * enable_debug_vis_type)
colour.g = max(colour.g, 0.25 * enable_debug_vis_type)
colour.b = max(colour.b, 0.25 * enable_debug_vis_type)
// colour.r = max(colour.r, 0.80 * enable_debug_vis_type)
// colour.g = max(colour.g, 0.25 * enable_debug_vis_type)
// colour.b = max(colour.b, 0.25 * enable_debug_vis_type)
generate_cached_draw_list( draw_list, glyph_pack[:], to_cache, colour )
profile_end()
}

View File

@ -52,7 +52,7 @@ to_bytes :: #force_inline proc "contextless" ( typed_data : ^$Type ) -> []byte {
@(optimization_mode="favor_size")
djb8_hash :: #force_inline proc "contextless" ( hash : ^$Type, bytes : []byte ) { for value in bytes do (hash^) = (( (hash^) << 8) + (hash^) ) + Type(value) }
RGBA8 :: [4]f32
RGBA8 :: [4]u8
RGBAN :: [4]f32
Vec2 :: [2]f32
Vec2i :: [2]i32

View File

@ -14,8 +14,7 @@ Shape_Key :: u32
its position should be used for rendering.
For this library's case it also involes keeping any content
that does not have to be resolved up once again in a later stage of
preparing it for rendering.
that does not have to be resolved once again in the later stage of processing.
Ideally the user should resolve this shape once and cache/store it on their side.
They have the best ability to avoid costly lookups to streamline

View File

@ -584,19 +584,19 @@ pop_zoom :: #force_inline proc( ctx : ^Context )
auto_pop_zoom :: #force_inline proc( ctx : ^Context, zoom : f32 ) { pop(& ctx.stack.zoom) }
@(deferred_in = auto_pop_vpz)
scope_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) {
scope_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) {
assert(ctx != nil)
append(& ctx.stack.view, camera.view )
append(& ctx.stack.position, camera.position )
append(& ctx.stack.zoom, camera.zoom )
}
push_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) {
push_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) {
assert(ctx != nil)
append(& ctx.stack.view, camera.view )
append(& ctx.stack.position, camera.position )
append(& ctx.stack.zoom, camera.zoom )
}
pop_vpz :: #force_inline proc( ctx : ^Context ) {
pop_vpz :: #force_inline proc( ctx : ^Context ) {
assert(ctx != nil)
pop(& ctx.stack.view )
pop(& ctx.stack.position)
@ -664,18 +664,19 @@ draw_text_shape_normalized_space :: #force_inline proc( ctx : ^Context,
font_scale := parser_scale( entry.parser_info, px_size )
px_upscale := px_size * ctx.px_scalar
downscale := scale * (1 / ctx.px_scalar)
font_scale_upscale := parser_scale( entry.parser_info, px_upscale )
target_px_size := px_size * ctx.px_scalar
target_scale := scale * (1 / ctx.px_scalar)
target_font_scale := parser_scale( entry.parser_info, target_px_size )
ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer, ctx.px_scalar,
ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer,
ctx.px_scalar,
ctx.enable_draw_type_visualization,
adjusted_colour,
entry,
px_upscale,
font_scale_upscale,
target_px_size,
target_font_scale,
position,
downscale,
target_scale,
)
}
@ -719,7 +720,8 @@ draw_text_normalized_space :: #force_inline proc( ctx : ^Context,
target_font_scale,
shaper_shape_text_uncached_advanced
)
ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer, ctx.px_scalar,
ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer,
ctx.px_scalar,
ctx.enable_draw_type_visualization,
colour,
entry,
@ -834,7 +836,6 @@ flush_draw_list :: #force_inline proc( ctx : ^Context ) {
ctx.draw_layer.calls_offset = 0
}
flush_draw_list_layer :: #force_inline proc( ctx : ^Context ) {
assert( ctx != nil )
ctx.draw_layer.vertices_offset = len(ctx.draw_list.vertices)

View File

@ -15,7 +15,7 @@ set_profiler_module_context :: #force_inline proc "contextless" ( ctx : ^SpallPr
Module_Context = ctx
}
DISABLE_PROFILING :: false
DISABLE_PROFILING :: true
@(deferred_none = profile_end, disabled = DISABLE_PROFILING)
profile :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) {

View File

@ -153,8 +153,8 @@ startup :: proc( prof : ^SpallProfiler, persistent_mem, frame_mem, transient_mem
color_theme = App_Thm_Dusk
text_snap_glyph_positions = true
text_size_screen_scalar = 2.0
text_size_canvas_scalar = 2.0
text_size_screen_scalar = 1.0
text_size_canvas_scalar = 1.0
text_alpha_sharpen = 0.25
}
@ -526,7 +526,7 @@ tick_work_frame :: #force_inline proc( host_delta_time_ms : f64 ) -> b32
config := & get_state().config
debug := & get_state().debug
debug.draw_ui_box_bounds_points = false
debug.draw_ui_box_bounds_points = true
debug.draw_ui_padding_bounds = false
debug.draw_ui_content_bounds = false

View File

@ -205,9 +205,9 @@ push-location $path_root
# $build_args += $flag_micro_architecture_native
$build_args += $flag_use_separate_modules
$build_args += $flag_thread_count + $CoreCount_Physical
# $build_args += $flag_optimize_none
$build_args += $flag_optimize_none
# $build_args += $flag_optimize_minimal
$build_args += $flag_optimize_speed
# $build_args += $flag_optimize_speed
# $build_args += $falg_optimize_aggressive
$build_args += $flag_debug
$build_args += $flag_pdb_name + $pdb