From 7dee697103a2ee5f4a77e48246547f8ad94068d7 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Tue, 7 Jan 2025 17:52:42 -0500 Subject: [PATCH] partially restoring old order to try to identity the regression with blitting to atlas... --- code/font/vefontcache/LRU.odin | 3 +- code/font/vefontcache/draw.odin | 114 ++++++++++++------------- code/font/vefontcache/misc.odin | 2 +- code/font/vefontcache/shaper.odin | 3 +- code/font/vefontcache/vefontcache.odin | 25 +++--- code/grime/profiler.odin | 2 +- code/sectr/engine/client_api.odin | 6 +- scripts/build.ps1 | 4 +- 8 files changed, 80 insertions(+), 79 deletions(-) diff --git a/code/font/vefontcache/LRU.odin b/code/font/vefontcache/LRU.odin index 0f8f1b3..6761d50 100644 --- a/code/font/vefontcache/LRU.odin +++ b/code/font/vefontcache/LRU.odin @@ -23,7 +23,8 @@ LRU_Fail_Mask_64 :: 0xFFFFFFFFFFFFFFFF Pool_ListIter :: i32 // Pool_ListValue :: LRU_Key -Pool_List_Item :: struct( $V_Type : typeid ) #packed { +// Pool_List_Item :: struct( $V_Type : typeid ) #packed { +Pool_List_Item :: struct( $V_Type : typeid ) { prev : Pool_ListIter, next : Pool_ListIter, value : V_Type, diff --git a/code/font/vefontcache/draw.odin b/code/font/vefontcache/draw.odin index 360588f..7fd8bb7 100644 --- a/code/font/vefontcache/draw.odin +++ b/code/font/vefontcache/draw.odin @@ -302,7 +302,7 @@ generate_shapes_draw_list :: #force_inline proc ( ctx : ^Context, font : Font_ID * Resolve glyph bounds and scale * Resolve atlas region the glyph is associated with * Segregate the glyphs into three slices: oversized, to_cache, cached. - * If oversized is not necessary for your use case and your hitting a bottle neck, remove it in a derivative procedure. + * If oversized is not necessary for your use case and your hitting a bottleneck, remove it in a derivative procedure. * You have to to be drawing a px font size > ~140 px for it to trigger. * The atlas can be scaled with the size_multiplier parameter of startup so that it becomes more irrelevant if processing a larger atlas is a non-issue. * The segregation will not allow slices to exceed the batch_cache capacity of the glyph_buffer (configurable within startup params) @@ -503,7 +503,7 @@ generate_shape_draw_list :: proc( draw_list : ^Draw_List, shape : Shaped_Text, Order: Oversized first, then to_cache, then cached. Oversized and to_cache will both enqueue operations for rendering glyphs to the glyph buffer render target. - The compute section will have operations reguarding how many glyphs they made alloate before a flush must occur. + The compute section will have operations reguarding how many glyphs they may alloate before a flush must occur. A flush will force one of the following: * Oversized will have a draw call setup to blit directly from the glyph buffer to the target. * to_cache will blit the glyphs rendered to the buffer to the atlas. @@ -530,42 +530,16 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List, colour := colour profile_begin("glyph buffer transform & draw quads compute") - for id, index in oversized + for id, index in cached { - glyph := & glyph_pack[id] - - f32_allocated_x := cast(f32) glyph_buffer.allocated_x - // Resolve how much space this glyph will allocate in the buffer - buffer_size := (glyph.bounds_size_scaled + glyph_buffer.draw_padding) * glyph.over_sample - - // Allocate a glyph glyph render target region (FBO) - to_allocate_x := buffer_size.x + 2.0 - glyph_buffer.allocated_x += i32(to_allocate_x) - - // If allocation would exceed buffer's bounds the buffer must be flush before this glyph can be rendered. - glyph.flush_glyph_buffer = i32(f32_allocated_x + to_allocate_x) >= i32(glyph_buffer_size.x) - glyph.buffer_x = f32_allocated_x * f32( i32( ! glyph.flush_glyph_buffer ) ) - // Quad to for drawing atlas slot to target - draw_quad := & glyph.draw_quad - - glyph_padding := vec2(glyph_buffer.draw_padding) - - // Target position (draw_list's target image) - draw_quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0 - glyph_padding) * target_scale - draw_quad.dst_scale = (glyph.bounds_size_scaled + glyph_padding) * target_scale - - // The glyph buffer space transform for generate_glyph_pass_draw_list - draw_transform := & glyph.draw_transform - draw_transform.scale = font_scale * glyph.over_sample - draw_transform.pos = -1 * glyph.bounds.p0 * draw_transform.scale + vec2(atlas.glyph_padding) - draw_transform.pos.x += glyph.buffer_x - to_glyph_buffer_space( & draw_transform.pos, & draw_transform.scale, glyph_buffer_size ) - - - draw_quad.src_pos = Vec2 { glyph.buffer_x, 0 } - draw_quad.src_scale = glyph.bounds_size_scaled * glyph.over_sample + glyph_padding - to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, glyph_buffer_size ) + glyph := & glyph_pack[id] + quad := & glyph.draw_quad + quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0) * target_scale + quad.dst_scale = (glyph.scale) * target_scale + quad.src_scale = (glyph.scale) + quad.src_pos = (glyph.region_pos) + to_target_space( & quad.src_pos, & quad.src_scale, atlas_size ) } for id, index in to_cache { @@ -604,25 +578,51 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List, draw_quad.src_pos = (glyph.region_pos) to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, atlas_size ) } - for id, index in cached + for id, index in oversized { - // Quad to for drawing atlas slot to target glyph := & glyph_pack[id] - quad := & glyph.draw_quad - quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0) * target_scale - quad.dst_scale = (glyph.scale) * target_scale - quad.src_scale = (glyph.scale) - quad.src_pos = (glyph.region_pos) - to_target_space( & quad.src_pos, & quad.src_scale, atlas_size ) + + f32_allocated_x := cast(f32) glyph_buffer.allocated_x + // Resolve how much space this glyph will allocate in the buffer + buffer_size := (glyph.bounds_size_scaled + glyph_buffer.draw_padding) * glyph.over_sample + + // Allocate a glyph glyph render target region (FBO) + to_allocate_x := buffer_size.x + 2.0 + glyph_buffer.allocated_x += i32(to_allocate_x) + + // If allocation would exceed buffer's bounds the buffer must be flush before this glyph can be rendered. + glyph.flush_glyph_buffer = i32(f32_allocated_x + to_allocate_x) >= i32(glyph_buffer_size.x) + glyph.buffer_x = f32_allocated_x * f32( i32( ! glyph.flush_glyph_buffer ) ) + + // Quad to for drawing atlas slot to target + draw_quad := & glyph.draw_quad + + glyph_padding := vec2(glyph_buffer.draw_padding) + + // Target position (draw_list's target image) + draw_quad.dst_pos = glyph.position + (glyph.bounds_scaled.p0 - glyph_padding) * target_scale + draw_quad.dst_scale = (glyph.bounds_size_scaled + glyph_padding) * target_scale + + // The glyph buffer space transform for generate_glyph_pass_draw_list + draw_transform := & glyph.draw_transform + draw_transform.scale = font_scale * glyph.over_sample + draw_transform.pos = -1 * glyph.bounds.p0 * draw_transform.scale + vec2(atlas.glyph_padding) + draw_transform.pos.x += glyph.buffer_x + to_glyph_buffer_space( & draw_transform.pos, & draw_transform.scale, glyph_buffer_size ) + + + draw_quad.src_pos = Vec2 { glyph.buffer_x, 0 } + draw_quad.src_scale = glyph.bounds_size_scaled * glyph.over_sample + glyph_padding + to_target_space( & draw_quad.src_pos, & draw_quad.src_scale, glyph_buffer_size ) } profile_end() profile_begin("generate oversized glyphs draw_list") if len(oversized) > 0 { - colour.r = max(colour.a, enable_debug_vis_type) - colour.g = max(colour.g, enable_debug_vis_type) - colour.b = colour.b * f32(cast(i32) ! b32(cast(i32) enable_debug_vis_type)) + // colour.r = max(colour.a, enable_debug_vis_type) + // colour.g = max(colour.g, enable_debug_vis_type) + // colour.b = colour.b * f32(cast(i32) ! b32(cast(i32) enable_debug_vis_type)) for id, index in oversized { error : Allocator_Error glyph_pack[id].shape, error = parser_get_glyph_shape(entry.parser_info, glyph_pack[id].index) @@ -754,22 +754,22 @@ batch_generate_glyphs_draw_list :: proc ( draw_list : ^Draw_List, ) } - profile_begin("generate_cached_draw_list: cached") - colour.r = max(colour.r, 1.0 * enable_debug_vis_type) - colour.g = max(colour.g, 1.0 * enable_debug_vis_type) - colour.b = max(colour.b, 1.0 * enable_debug_vis_type) - generate_cached_draw_list( draw_list, glyph_pack[:], cached, colour ) - profile_end() - flush_glyph_buffer_draw_list(draw_list, & glyph_buffer.draw_list, & glyph_buffer.clear_draw_list, & glyph_buffer.allocated_x) for id, index in to_cache do parser_free_shape(entry.parser_info, glyph_pack[id].shape) + + profile_begin("generate_cached_draw_list: cached") + // colour.r = max(colour.r, 1.0 * enable_debug_vis_type) + // colour.g = max(colour.g, 1.0 * enable_debug_vis_type) + // colour.b = max(colour.b, 1.0 * enable_debug_vis_type) + generate_cached_draw_list( draw_list, glyph_pack[:], cached, colour ) + profile_end() } profile_end() profile_begin("generate_cached_draw_list: to_cache") - colour.r = max(colour.r, 0.80 * enable_debug_vis_type) - colour.g = max(colour.g, 0.25 * enable_debug_vis_type) - colour.b = max(colour.b, 0.25 * enable_debug_vis_type) + // colour.r = max(colour.r, 0.80 * enable_debug_vis_type) + // colour.g = max(colour.g, 0.25 * enable_debug_vis_type) + // colour.b = max(colour.b, 0.25 * enable_debug_vis_type) generate_cached_draw_list( draw_list, glyph_pack[:], to_cache, colour ) profile_end() } diff --git a/code/font/vefontcache/misc.odin b/code/font/vefontcache/misc.odin index d14d1ee..5240228 100644 --- a/code/font/vefontcache/misc.odin +++ b/code/font/vefontcache/misc.odin @@ -52,7 +52,7 @@ to_bytes :: #force_inline proc "contextless" ( typed_data : ^$Type ) -> []byte { @(optimization_mode="favor_size") djb8_hash :: #force_inline proc "contextless" ( hash : ^$Type, bytes : []byte ) { for value in bytes do (hash^) = (( (hash^) << 8) + (hash^) ) + Type(value) } -RGBA8 :: [4]f32 +RGBA8 :: [4]u8 RGBAN :: [4]f32 Vec2 :: [2]f32 Vec2i :: [2]i32 diff --git a/code/font/vefontcache/shaper.odin b/code/font/vefontcache/shaper.odin index b069b53..128326f 100644 --- a/code/font/vefontcache/shaper.odin +++ b/code/font/vefontcache/shaper.odin @@ -14,8 +14,7 @@ Shape_Key :: u32 its position should be used for rendering. For this library's case it also involes keeping any content - that does not have to be resolved up once again in a later stage of - preparing it for rendering. + that does not have to be resolved once again in the later stage of processing. Ideally the user should resolve this shape once and cache/store it on their side. They have the best ability to avoid costly lookups to streamline diff --git a/code/font/vefontcache/vefontcache.odin b/code/font/vefontcache/vefontcache.odin index bbe423f..22d494d 100644 --- a/code/font/vefontcache/vefontcache.odin +++ b/code/font/vefontcache/vefontcache.odin @@ -584,19 +584,19 @@ pop_zoom :: #force_inline proc( ctx : ^Context ) auto_pop_zoom :: #force_inline proc( ctx : ^Context, zoom : f32 ) { pop(& ctx.stack.zoom) } @(deferred_in = auto_pop_vpz) -scope_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) { +scope_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) { assert(ctx != nil) append(& ctx.stack.view, camera.view ) append(& ctx.stack.position, camera.position ) append(& ctx.stack.zoom, camera.zoom ) } -push_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) { +push_vpz :: #force_inline proc( ctx : ^Context, camera : VPZ_Transform ) { assert(ctx != nil) append(& ctx.stack.view, camera.view ) append(& ctx.stack.position, camera.position ) append(& ctx.stack.zoom, camera.zoom ) } -pop_vpz :: #force_inline proc( ctx : ^Context ) { +pop_vpz :: #force_inline proc( ctx : ^Context ) { assert(ctx != nil) pop(& ctx.stack.view ) pop(& ctx.stack.position) @@ -664,18 +664,19 @@ draw_text_shape_normalized_space :: #force_inline proc( ctx : ^Context, font_scale := parser_scale( entry.parser_info, px_size ) - px_upscale := px_size * ctx.px_scalar - downscale := scale * (1 / ctx.px_scalar) - font_scale_upscale := parser_scale( entry.parser_info, px_upscale ) + target_px_size := px_size * ctx.px_scalar + target_scale := scale * (1 / ctx.px_scalar) + target_font_scale := parser_scale( entry.parser_info, target_px_size ) - ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer, ctx.px_scalar, + ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer, + ctx.px_scalar, ctx.enable_draw_type_visualization, adjusted_colour, entry, - px_upscale, - font_scale_upscale, + target_px_size, + target_font_scale, position, - downscale, + target_scale, ) } @@ -719,7 +720,8 @@ draw_text_normalized_space :: #force_inline proc( ctx : ^Context, target_font_scale, shaper_shape_text_uncached_advanced ) - ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer, ctx.px_scalar, + ctx.cursor_pos = generate_shape_draw_list( & ctx.draw_list, shape, & ctx.atlas, & ctx.glyph_buffer, + ctx.px_scalar, ctx.enable_draw_type_visualization, colour, entry, @@ -834,7 +836,6 @@ flush_draw_list :: #force_inline proc( ctx : ^Context ) { ctx.draw_layer.calls_offset = 0 } - flush_draw_list_layer :: #force_inline proc( ctx : ^Context ) { assert( ctx != nil ) ctx.draw_layer.vertices_offset = len(ctx.draw_list.vertices) diff --git a/code/grime/profiler.odin b/code/grime/profiler.odin index 3a3a27c..2fcc5f7 100644 --- a/code/grime/profiler.odin +++ b/code/grime/profiler.odin @@ -15,7 +15,7 @@ set_profiler_module_context :: #force_inline proc "contextless" ( ctx : ^SpallPr Module_Context = ctx } -DISABLE_PROFILING :: false +DISABLE_PROFILING :: true @(deferred_none = profile_end, disabled = DISABLE_PROFILING) profile :: #force_inline proc "contextless" ( name : string, loc := #caller_location ) { diff --git a/code/sectr/engine/client_api.odin b/code/sectr/engine/client_api.odin index c691df3..18037d1 100644 --- a/code/sectr/engine/client_api.odin +++ b/code/sectr/engine/client_api.odin @@ -153,8 +153,8 @@ startup :: proc( prof : ^SpallProfiler, persistent_mem, frame_mem, transient_mem color_theme = App_Thm_Dusk text_snap_glyph_positions = true - text_size_screen_scalar = 2.0 - text_size_canvas_scalar = 2.0 + text_size_screen_scalar = 1.0 + text_size_canvas_scalar = 1.0 text_alpha_sharpen = 0.25 } @@ -526,7 +526,7 @@ tick_work_frame :: #force_inline proc( host_delta_time_ms : f64 ) -> b32 config := & get_state().config debug := & get_state().debug - debug.draw_ui_box_bounds_points = false + debug.draw_ui_box_bounds_points = true debug.draw_ui_padding_bounds = false debug.draw_ui_content_bounds = false diff --git a/scripts/build.ps1 b/scripts/build.ps1 index c84e564..5d7a531 100644 --- a/scripts/build.ps1 +++ b/scripts/build.ps1 @@ -205,9 +205,9 @@ push-location $path_root # $build_args += $flag_micro_architecture_native $build_args += $flag_use_separate_modules $build_args += $flag_thread_count + $CoreCount_Physical - # $build_args += $flag_optimize_none + $build_args += $flag_optimize_none # $build_args += $flag_optimize_minimal - $build_args += $flag_optimize_speed + # $build_args += $flag_optimize_speed # $build_args += $falg_optimize_aggressive $build_args += $flag_debug $build_args += $flag_pdb_name + $pdb