From 3b5cc3cc806d6002463f859d01b48ab01b731f47 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Fri, 28 Jun 2024 05:29:53 -0400 Subject: [PATCH] Some minor performance improvments, draw_list caching for shapes failed. --- code/font/VEFontCache/Readme.md | 33 +++-- code/font/VEFontCache/VEFontCache.odin | 12 +- code/font/VEFontCache/draw.odin | 173 +++++++++++++------------ code/font/VEFontCache/shaped_text.odin | 31 +++-- 4 files changed, 137 insertions(+), 112 deletions(-) diff --git a/code/font/VEFontCache/Readme.md b/code/font/VEFontCache/Readme.md index a9dd600..72cef34 100644 --- a/code/font/VEFontCache/Readme.md +++ b/code/font/VEFontCache/Readme.md @@ -6,21 +6,23 @@ Its original purpose was for use in game engines, however its rendeirng quality See: [docs/Readme.md](docs/Readme.md) for the library's interface -TODO (Making it a more idiomatic library): +## TODOs + +### (Making it a more idiomatic library): * Setup freetype, harfbuzz, depedency management within the library -TODO Documentation: +### Documentation: * Pureref outline of draw_text exectuion * Markdown general documentation -TODO Content: +### Content: * Port over the original demo utilizing sokol libraries instead * Provide a sokol_gfx backend package -TODO Additional Features: +### Additional Features: * Support for freetype * Support for harfbuzz @@ -28,9 +30,22 @@ TODO Additional Features: * By default the library's position is in unsigned normalized render space * Allow curve_quality to be set on a per-font basis -TODO Optimization: +### Optimization: -* Look into caching the draw_list for each shape instead of the glyphs/positions - * Each shape is already constrained to a Entry which is restricted to already a size-class for the glyphs - * Caching a glyph to atlas or generating the draw command for a glyph quad to screen is expensive for large batches. -* Attempt to look into chunking shapes again if caching the draw_list for a shape is found to be optimal +* Look into setting up multi-threading by giving each thread a context + * There is a heavy performance bottleneck in iterating the text/shape/glyphs on the cpu (single-thread) vs the actual rendering + * draw_text can provide in the context a job list per thread for the user to thenk hookup to their own threading solution to handle. + * Context would need to be segregated into staged data structures for each thread to utilize + * Each should have their own? + * draw_list + * draw_layer + * atlas.next_idx + * glyph_draw_buffer + * shape_cache + * This would need to converge to the singlar draw_list on a per layer basis (then user reqeusts a draw_list layer there could a yield to wait for the jobs to finish); if the interface expects the user to issue the commands single-threaded unless, we just assume the user is going to feed the gpu the commands & data through separate threads as well (not ideal ux). + +Failed Attempts: + +* Attempted to chunk the text to more granular 'shapes' from `draw_list` before doing the actual call to `draw_text_shape`. This lead to a larger performance cost due to the additional iteration across the text string. +* Attempted to cache the shape draw_list for future calls. Led to larger performance cost due to additional iteration in the `merge_draw_list`. + * The shapes glyphs must still be traversed to identify if the glyph is cached. This arguably could be handled in `shape_text_uncached`, however that would require a significan't amount of refactoring to identify... (and would be more unergonomic when shapers libs are processing the text) diff --git a/code/font/VEFontCache/VEFontCache.odin b/code/font/VEFontCache/VEFontCache.odin index 87d5cae..7dba58f 100644 --- a/code/font/VEFontCache/VEFontCache.odin +++ b/code/font/VEFontCache/VEFontCache.odin @@ -44,7 +44,7 @@ Context :: struct { entries : [dynamic]Entry, - temp_path : [dynamic]Vec2, + temp_path : [dynamic]Vertex, temp_codepoint_seen : map[u64]bool, temp_codepoint_seen_num : u32, @@ -133,8 +133,8 @@ InitShapeCacheParams :: struct { } InitShapeCacheParams_Default :: InitShapeCacheParams { - capacity = 1024, - reserve_length = 1024, + capacity = 2048, + reserve_length = 2048, } // ve_fontcache_init @@ -145,8 +145,8 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind, shape_cache_params := InitShapeCacheParams_Default, curve_quality : u32 = 3, entires_reserve : u32 = 512, - temp_path_reserve : u32 = 512, - temp_codepoint_seen_reserve : u32 = 512, + temp_path_reserve : u32 = 1024, + temp_codepoint_seen_reserve : u32 = 2048, ) { assert( ctx != nil, "Must provide a valid context" ) @@ -164,7 +164,7 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind, entries, error = make( [dynamic]Entry, len = 0, cap = entires_reserve ) assert(error == .None, "VEFontCache.init : Failed to allocate entries") - temp_path, error = make( [dynamic]Vec2, len = 0, cap = temp_path_reserve ) + temp_path, error = make( [dynamic]Vertex, len = 0, cap = temp_path_reserve ) assert(error == .None, "VEFontCache.init : Failed to allocate temp_path") temp_codepoint_seen, error = make( map[u64]bool, uint(temp_codepoint_seen_reserve) ) diff --git a/code/font/VEFontCache/draw.odin b/code/font/VEFontCache/draw.odin index 2957943..43eec00 100644 --- a/code/font/VEFontCache/draw.odin +++ b/code/font/VEFontCache/draw.odin @@ -56,37 +56,31 @@ blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1} // p0.x, p0.y, p1.x, p1.y, uv0.x, uv0.y, uv1.x, uv1.y); v_offset := cast(u32) len(draw_list.vertices) - vertex := Vertex { + quadv : [4]Vertex + + quadv[0] = Vertex { {p0.x, p0.y}, uv0.x, uv0.y } - append( & draw_list.vertices, vertex ) - - vertex = Vertex { + quadv[1] = Vertex { {p0.x, p1.y}, uv0.x, uv1.y } - append( & draw_list.vertices, vertex ) - - vertex = Vertex { + quadv[2] = Vertex { {p1.x, p0.y}, uv1.x, uv0.y } - append( & draw_list.vertices, vertex ) - - vertex = Vertex { + quadv[3] = Vertex { {p1.x, p1.y}, uv1.x, uv1.y } - append( & draw_list.vertices, vertex ) + append( & draw_list.vertices, ..quadv[:] ) quad_indices : []u32 = { - 0, 1, 2, - 2, 1, 3 - } - for index : i32 = 0; index < 6; index += 1 { - append( & draw_list.indices, v_offset + quad_indices[ index ] ) + 0 + v_offset, 1 + v_offset, 2 + v_offset, + 2 + v_offset, 1 + v_offset, 3 + v_offset } + append( & draw_list.indices, ..quad_indices[:] ) return } @@ -146,36 +140,37 @@ cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, entry : // Note(Original Author); // Draw the path using simplified version of https://medium.com/@evanwallace/easy-scalable-text-rendering-on-the-gpu-c3f4d782c5ac. // Instead of involving fragment shader code we simply make use of modern GPU ability to crunch triangles and brute force curve definitions. - path := ctx.temp_path - clear( & path) + path := & ctx.temp_path + clear( path) for edge in shape do switch edge.type { case .Move: if len(path) > 0 { draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose ) } - clear( & path) + clear( path) fallthrough case .Line: - append( & path, Vec2{ f32(edge.x), f32(edge.y) }) + vertex := Vertex { pos = Vec2{ f32(edge.x), f32(edge.y) } } + append( path, vertex) case .Curve: assert( len(path) > 0 ) - p0 := path[ len(path) - 1 ] + p0 := path[ len(path) - 1 ].pos p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) } p2 := Vec2{ f32(edge.x), f32(edge.y) } step := 1.0 / f32(ctx.curve_quality) alpha := step for index := i32(0); index < i32(ctx.curve_quality); index += 1 { - append( & path, eval_point_on_bezier3( p0, p1, p2, alpha )) + append( path, Vertex { pos = eval_point_on_bezier3( p0, p1, p2, alpha ) }) alpha += step } case .Cubic: assert( len(path) > 0 ) - p0 := path[ len(path) - 1] + p0 := path[ len(path) - 1].pos p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) } p2 := Vec2{ f32(edge.contour_x1), f32(edge.contour_y1) } p3 := Vec2{ f32(edge.x), f32(edge.y) } @@ -183,7 +178,7 @@ cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, entry : step := 1.0 / f32(ctx.curve_quality) alpha := step for index := i32(0); index < i32(ctx.curve_quality); index += 1 { - append( & path, eval_point_on_bezier4( p0, p1, p2, p3, alpha )) + append( path, Vertex { pos = eval_point_on_bezier4( p0, p1, p2, p3, alpha ) }) alpha += step } @@ -197,7 +192,7 @@ cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, entry : // Note(Original Author): Apend the draw call draw.end_index = cast(u32) len(ctx.draw_list.indices) if draw.end_index > draw.start_index { - append(& ctx.draw_list.calls, draw) + append( & ctx.draw_list.calls, draw) } parser_free_shape( & entry.parser_info, shape ) @@ -301,10 +296,9 @@ cache_glyph_to_atlas :: proc( ctx : ^Context, glyph_buffer.batch_x += i32(gwidth_scaled_px) screenspace_x_form( & glyph_draw_translate, & glyph_draw_scale, glyph_buffer_size ) - call : DrawCall + clear_target_region : DrawCall { - // Queue up clear on target region on atlas - using call + using clear_target_region pass = .Atlas region = .Ignore start_index = cast(u32) len(glyph_buffer.clear_draw_list.indices) @@ -314,9 +308,12 @@ cache_glyph_to_atlas :: proc( ctx : ^Context, { 1.0, 1.0 }, { 1.0, 1.0 } ) end_index = cast(u32) len(glyph_buffer.clear_draw_list.indices) - append( & glyph_buffer.clear_draw_list.calls, call ) + } - // Queue up a blit from glyph_update_FBO to the atlas + blit_to_atlas : DrawCall + { + using blit_to_atlas + pass = .Atlas region = .None start_index = cast(u32) len(glyph_buffer.draw_list.indices) @@ -325,9 +322,11 @@ cache_glyph_to_atlas :: proc( ctx : ^Context, src_position, src_position + src_size ) end_index = cast(u32) len(glyph_buffer.draw_list.indices) - append( & glyph_buffer.draw_list.calls, call ) } + append( & glyph_buffer.clear_draw_list.calls, clear_target_region ) + append( & glyph_buffer.draw_list.calls, blit_to_atlas ) + // Render glyph to glyph_update_FBO cache_glyph( ctx, font, glyph_index, entry, vec2(bounds_0), vec2(bounds_1), glyph_draw_scale, glyph_draw_translate ) } @@ -403,7 +402,7 @@ directly_draw_massive_glyph :: proc( ctx : ^Context, // Figure out the source rect. glyph_position := Vec2 {} glyph_size := vec2(glyph_padding_dbl) - glyph_dst_size := glyph_size + bounds_scaled + glyph_dst_size := glyph_size + bounds_scaled glyph_size += bounds_scaled * over_sample // Figure out the destination rect. @@ -416,9 +415,11 @@ directly_draw_massive_glyph :: proc( ctx : ^Context, textspace_x_form( & glyph_position, & glyph_size, glyph_buffer_size ) // Add the glyph drawcall. - call : DrawCall + calls : [2]DrawCall + + draw_to_target := & calls[0] { - using call + using draw_to_target pass = .Target_Uncached colour = ctx.colour start_index = u32(len(ctx.draw_list.indices)) @@ -428,15 +429,17 @@ directly_draw_massive_glyph :: proc( ctx : ^Context, glyph_position, glyph_position + glyph_size ) end_index = u32(len(ctx.draw_list.indices)) - append( & ctx.draw_list.calls, call ) } - // Clear glyph_update_FBO. - call.pass = .Glyph - call.start_index = 0 - call.end_index = 0 - call.clear_before_draw = true - append( & ctx.draw_list.calls, call ) + clear_glyph_update := & calls[1] + { + // Clear glyph_update_FBO. + clear_glyph_update.pass = .Glyph + clear_glyph_update.start_index = 0 + clear_glyph_update.end_index = 0 + clear_glyph_update.clear_before_draw = true + } + append( & ctx.draw_list.calls, ..calls[:] ) } draw_cached_glyph :: proc( ctx : ^Context, shaped : ^ShapedText, @@ -481,14 +484,13 @@ draw_cached_glyph :: proc( ctx : ^Context, shaped : ^ShapedText, bounds_0_scaled := bounds_0 * entry.size_scale //- { 0.5, 0.5 } bounds_0_scaled = ceil(bounds_0_scaled) - dst := position + bounds_0_scaled * scale - dst -= glyph_padding * scale - dst_scale := glyph_scale * scale + dst := position + (bounds_0_scaled - glyph_padding) * scale + dst_scale := glyph_scale * scale textspace_x_form( & slot_position, & glyph_scale, atlas_size ) // Shape call setup - if false + when false { call := DrawCall_Default { @@ -504,22 +506,23 @@ draw_cached_glyph :: proc( ctx : ^Context, shaped : ^ShapedText, } append( & shaped.draw_list.calls, call ) } - - // Add the glyph drawcall - call := DrawCall_Default + else { - using call - pass = .Target - colour = ctx.colour - start_index = cast(u32) len(ctx.draw_list.indices) + // Add the glyph drawcall + call := DrawCall_Default + { + using call + pass = .Target + colour = ctx.colour + start_index = cast(u32) len(ctx.draw_list.indices) - blit_quad( & ctx.draw_list, - dst, dst + dst_scale, - slot_position, slot_position + glyph_scale ) - end_index = cast(u32) len(ctx.draw_list.indices) + blit_quad( & ctx.draw_list, + dst, dst + dst_scale, + slot_position, slot_position + glyph_scale ) + end_index = cast(u32) len(ctx.draw_list.indices) + } + append( & ctx.draw_list.calls, call ) } - append( & ctx.draw_list.calls, call ) - return true } @@ -529,7 +532,7 @@ draw_cached_glyph :: proc( ctx : ^Context, shaped : ^ShapedText, // Note(Original Author): // WARNING: doesn't actually append drawcall; caller is responsible for actually appending the drawcall. // ve_fontcache_draw_filled_path -draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []Vec2, +draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []Vertex, scale := Vec2 { 1, 1 }, translate := Vec2 { 0, 0 }, debug_print_verbose : b32 = false @@ -539,19 +542,16 @@ draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : [] { log("outline_path:") for point in path { - vec := point * scale + translate + vec := point.pos * scale + translate logf(" %0.2f %0.2f", vec.x, vec.y ) } } v_offset := cast(u32) len(draw_list.vertices) for point in path { - vertex := Vertex { - pos = point * scale + translate, - u = 0, - v = 0, - } - append( & draw_list.vertices, vertex ) + point := point + point.pos = point.pos * scale + translate + append( & draw_list.vertices, point ) } outside_vertex := cast(u32) len(draw_list.vertices) @@ -566,9 +566,12 @@ draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : [] for index : u32 = 1; index < cast(u32) len(path); index += 1 { indices := & draw_list.indices - append( indices, outside_vertex ) - append( indices, v_offset + index - 1 ) - append( indices, v_offset + index ) + to_add := [3]u32 { + outside_vertex, + v_offset + index - 1, + v_offset + index + } + append( indices, ..to_add[:] ) } } @@ -600,7 +603,7 @@ draw_text_batch :: proc( ctx : ^Context, entry : ^Entry, shaped : ^ShapedText, lru_code, atlas_index, vec2(bounds_0), vec2(bounds_1), region_kind, region, over_sample, - glyph_translate, scale) + glyph_translate, scale ) assert( glyph_cached == true ) } } @@ -614,8 +617,17 @@ draw_text_shape :: proc( ctx : ^Context, snap_width, snap_height : f32 ) -> (cursor_pos : Vec2) { - draw_hash := shape_draw_hash( shaped, position, scale ) - dirty_shape := ! (len(shaped.draw_list.calls) > 0) || draw_hash != shaped.draw_hash + // draw_hash := shape_draw_hash( shaped, position, scale ) + // dirty_shape := len(shaped.draw_list.calls) == 0 || draw_hash != shaped.draw_hash + // if ! dirty_shape { + // merge_draw_list( & ctx.draw_list, & shaped.draw_list ) + // reset_batch_codepoint_state( ctx ) + // cursor_pos = position + shaped.end_cursor_pos * scale + // return + // } + // if dirty_shape { + // clear_draw_list( & shaped.draw_list ) + // } // position := position //+ ctx.cursor_pos * scale // profile(#procedure) @@ -633,9 +645,10 @@ draw_text_shape :: proc( ctx : ^Context, if check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue // We can no longer directly append the shape as it has missing glyphs in the atlas - dirty_shape = true - - // Glyph has not been catched, needs to be directly drawn. + // if !dirty_shape { + // clear_draw_list( & shaped.draw_list ) + // } + // dirty_shape = true // First batch the other cached glyphs // flush_glyph_buffer_to_atlas(ctx) @@ -650,13 +663,11 @@ draw_text_shape :: proc( ctx : ^Context, // if dirty_shape { flush_glyph_buffer_to_atlas(ctx) draw_text_batch( ctx, entry, shaped, batch_start_idx, cast(i32) len(shaped.glyphs), position, scale, snap_width , snap_height ) - // shaped.draw_hash = draw_hash + // shaped.draw_hash = draw_hash // } - // else { - // flush_glyph_buffer_to_atlas( ctx ) - // merge_draw_list( & ctx.draw_list, & shaped.draw_list ) - // } - reset_batch_codepoint_state( ctx ) + + // merge_draw_list( & ctx.draw_list, & shaped.draw_list ) + // reset_batch_codepoint_state( ctx ) cursor_pos = position + shaped.end_cursor_pos * scale return diff --git a/code/font/VEFontCache/shaped_text.odin b/code/font/VEFontCache/shaped_text.odin index 50768ef..75a2db9 100644 --- a/code/font/VEFontCache/shaped_text.odin +++ b/code/font/VEFontCache/shaped_text.odin @@ -1,13 +1,13 @@ package VEFontCache ShapedText :: struct { - draw_list : DrawList, + // draw_list : DrawList, glyphs : [dynamic]Glyph, positions : [dynamic]Vec2, end_cursor_pos : Vec2, size : Vec2, - storage_hash : u64, - draw_hash : u64, + // storage_hash : u64, + // draw_hash : u64, } ShapedTextCache :: struct { @@ -17,18 +17,18 @@ ShapedTextCache :: struct { } -shape_draw_hash :: #force_inline proc "contextless" ( shaped : ^ShapedText, pos, scale : Vec2 ) -> (draw_hash : u64) -{ - pos := pos - scale := scale - pos_bytes := slice_ptr( transmute(^byte) & pos, size_of(Vec2)) - scale_bytes := slice_ptr( transmute(^byte) & scale, size_of(Vec2)) +// shape_draw_hash :: #force_inline proc "contextless" ( shaped : ^ShapedText, pos, scale : Vec2 ) -> (draw_hash : u64) +// { +// pos := pos +// scale := scale +// pos_bytes := slice_ptr( transmute(^byte) & pos, size_of(Vec2)) +// scale_bytes := slice_ptr( transmute(^byte) & scale, size_of(Vec2)) - draw_hash = shaped.storage_hash - shape_lru_hash( & shaped.draw_hash, pos_bytes ) - shape_lru_hash( & shaped.draw_hash, scale_bytes ) - return -} +// draw_hash = shaped.storage_hash +// shape_lru_hash( & shaped.draw_hash, pos_bytes ) +// shape_lru_hash( & shaped.draw_hash, scale_bytes ) +// return +// } // shape_lru_hash_og :: #force_inline proc "contextless" ( label : string ) -> u64 { // hash : u64 @@ -91,7 +91,7 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, en } shape_entry := & shape_cache.storage[ shape_cache_idx ] - shape_entry.storage_hash = lru_code + // shape_entry.storage_hash = lru_code shape_text_uncached( ctx, font, text_utf8, entry, shape_entry ) } @@ -106,7 +106,6 @@ shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, use_full_text_shape := ctx.text_shape_adv - clear_draw_list( & output.draw_list ) clear( & output.glyphs ) clear( & output.positions )