diff --git a/LRU.odin b/LRU.odin index db5d440..ea97a6b 100644 --- a/LRU.odin +++ b/LRU.odin @@ -28,11 +28,11 @@ PoolList :: struct { pool_list_init :: proc( pool : ^PoolList, capacity : u32, dbg_name : string = "" ) { error : AllocatorError - pool.items, error = make( [dynamic]PoolListItem, u64(capacity) ) + pool.items, error = make( [dynamic]PoolListItem, int(capacity) ) assert( error == .None, "VEFontCache.pool_list_init : Failed to allocate items array") resize( & pool.items, capacity ) - pool.free_list, error = make( [dynamic]PoolListIter, u64(capacity) ) + pool.free_list, error = make( [dynamic]PoolListIter, len = 0, cap = int(capacity) ) assert( error == .None, "VEFontCache.pool_list_init : Failed to allocate free_list array") resize( & pool.free_list, capacity ) @@ -55,7 +55,7 @@ pool_list_init :: proc( pool : ^PoolList, capacity : u32, dbg_name : string = "" pool_list_free :: proc( pool : ^PoolList ) { - // TODO(Ed): Implement + // TODO(Ed): Implement } pool_list_reload :: proc( pool : ^PoolList, allocator : Allocator ) @@ -120,6 +120,23 @@ pool_list_erase :: proc( pool : ^PoolList, iter : PoolListIter ) } } +pool_list_move_to_front :: #force_inline proc( pool : ^PoolList, iter : PoolListIter ) +{ + using pool + + if front == iter do return + + item := & items[iter] + if item.prev != -1 do items[ item.prev ].next = item.next + if item.next != -1 do items[ item.next ].prev = item.prev + if back == iter do back = item.prev + + item.prev = -1 + item.next = front + items[ front ].prev = iter + front = iter +} + pool_list_peek_back :: #force_inline proc ( pool : ^PoolList ) -> PoolListValue { assert( pool.back != - 1 ) value := pool.items[ pool.back ].value @@ -160,7 +177,7 @@ LRU_init :: proc( cache : ^LRU_Cache, capacity : u32, dbg_name : string = "" ) { LRU_free :: proc( cache : ^LRU_Cache ) { - // TODO(Ed): Implement + // TODO(Ed): Implement } LRU_reload :: #force_inline proc( cache : ^LRU_Cache, allocator : Allocator ) @@ -180,13 +197,12 @@ LRU_find :: #force_inline proc "contextless" ( cache : ^LRU_Cache, key : u64, mu return link, success } -LRU_get :: #force_inline proc( cache : ^LRU_Cache, key : u64 ) -> i32 { - iter, success := LRU_find( cache, key ) - if success == false { - return -1 +LRU_get :: #force_inline proc( cache: ^LRU_Cache, key : u64 ) -> i32 { + if link, ok := &cache.table[ key ]; ok { + pool_list_move_to_front(&cache.key_queue, link.ptr) + return link.value } - LRU_refresh( cache, key ) - return iter.value + return -1 } LRU_get_next_evicted :: #force_inline proc ( cache : ^LRU_Cache ) -> u64 @@ -206,26 +222,25 @@ LRU_peek :: #force_inline proc ( cache : ^LRU_Cache, key : u64, must_find := fal return iter.value } -LRU_put :: #force_inline proc ( cache : ^LRU_Cache, key : u64, value : i32 ) -> u64 +LRU_put :: #force_inline proc( cache : ^LRU_Cache, key : u64, value : i32 ) -> u64 { - iter, success := cache.table[key] - if success { - LRU_refresh( cache, key ) - iter.value = value + if link, ok := & cache.table[ key ]; ok { + pool_list_move_to_front( & cache.key_queue, link.ptr ) + link.value = value return key } evict := key if cache.key_queue.size >= cache.capacity { - evict = pool_list_pop_back( & cache.key_queue ) - delete_key( & cache.table, evict ) + evict = pool_list_pop_back(&cache.key_queue) + delete_key(&cache.table, evict) cache.num -= 1 } - pool_list_push_front( & cache.key_queue, key ) - cache.table[key] = LRU_Link { - value = value, - ptr = cache.key_queue.front + pool_list_push_front(&cache.key_queue, key) + cache.table[key] = LRU_Link{ + value = value, + ptr = cache.key_queue.front, } cache.num += 1 return evict diff --git a/Readme.md b/Readme.md index 0e9f370..f6690a9 100644 --- a/Readme.md +++ b/Readme.md @@ -1,29 +1,51 @@ # VE Font Cache : Odin Port -This is a port of the library base on the [original](https://github.com/hypernewbie/VEFontCache). +This is a port of the library based on [fork](https://github.com/hypernewbie/VEFontCache) Its original purpose was for use in game engines, however its rendeirng quality and performance is more than adequate for many other applications. See: [docs/Readme.md](docs/Readme.md) for the library's interface -TODO (Making it a more idiomatic library): +## TODOs + +### (Making it a more idiomatic library): * Setup freetype, harfbuzz, depedency management within the library -TODO Documentation: +### Documentation: * Pureref outline of draw_text exectuion * Markdown general documentation -TODO Content: +### Content: * Port over the original demo utilizing sokol libraries instead * Provide a sokol_gfx backend package -TODO Additional Features: +### Additional Features: * Support for freetype * Support for harfbuzz * Ability to set a draw transform, viewport and projection * By default the library's position is in unsigned normalized render space * Allow curve_quality to be set on a per-font basis + +### Optimization: + +* Look into setting up multi-threading by giving each thread a context + * There is a heavy performance bottleneck in iterating the text/shape/glyphs on the cpu (single-thread) vs the actual rendering + * draw_text can provide in the context a job list per thread for the user to thenk hookup to their own threading solution to handle. + * Context would need to be segregated into staged data structures for each thread to utilize + * Each should have their own? + * draw_list + * draw_layer + * atlas.next_idx + * glyph_draw_buffer + * shape_cache + * This would need to converge to the singlar draw_list on a per layer basis (then user reqeusts a draw_list layer there could a yield to wait for the jobs to finish); if the interface expects the user to issue the commands single-threaded unless, we just assume the user is going to feed the gpu the commands & data through separate threads as well (not ideal ux). + +Failed Attempts: + +* Attempted to chunk the text to more granular 'shapes' from `draw_list` before doing the actual call to `draw_text_shape`. This lead to a larger performance cost due to the additional iteration across the text string. +* Attempted to cache the shape draw_list for future calls. Led to larger performance cost due to additional iteration in the `merge_draw_list`. + * The shapes glyphs must still be traversed to identify if the glyph is cached. This arguably could be handled in `shape_text_uncached`, however that would require a significan't amount of refactoring to identify... (and would be more unergonomic when shapers libs are processing the text) diff --git a/VEFontCache.odin b/VEFontCache.odin index 2a8c710..7dba58f 100644 --- a/VEFontCache.odin +++ b/VEFontCache.odin @@ -44,7 +44,7 @@ Context :: struct { entries : [dynamic]Entry, - temp_path : [dynamic]Vec2, + temp_path : [dynamic]Vertex, temp_codepoint_seen : map[u64]bool, temp_codepoint_seen_num : u32, @@ -133,8 +133,8 @@ InitShapeCacheParams :: struct { } InitShapeCacheParams_Default :: InitShapeCacheParams { - capacity = 1024, - reserve_length = 1024, + capacity = 2048, + reserve_length = 2048, } // ve_fontcache_init @@ -145,8 +145,8 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind, shape_cache_params := InitShapeCacheParams_Default, curve_quality : u32 = 3, entires_reserve : u32 = 512, - temp_path_reserve : u32 = 512, - temp_codepoint_seen_reserve : u32 = 512, + temp_path_reserve : u32 = 1024, + temp_codepoint_seen_reserve : u32 = 2048, ) { assert( ctx != nil, "Must provide a valid context" ) @@ -161,25 +161,26 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind, ctx.curve_quality = curve_quality error : AllocatorError - entries, error = make( [dynamic]Entry, entires_reserve ) + entries, error = make( [dynamic]Entry, len = 0, cap = entires_reserve ) assert(error == .None, "VEFontCache.init : Failed to allocate entries") - temp_path, error = make( [dynamic]Vec2, temp_path_reserve ) + temp_path, error = make( [dynamic]Vertex, len = 0, cap = temp_path_reserve ) assert(error == .None, "VEFontCache.init : Failed to allocate temp_path") temp_codepoint_seen, error = make( map[u64]bool, uint(temp_codepoint_seen_reserve) ) assert(error == .None, "VEFontCache.init : Failed to allocate temp_path") - draw_list.vertices, error = make( [dynamic]Vertex, 4 * Kilobyte ) + draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = 4 * Kilobyte ) assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.vertices") - draw_list.indices, error = make( [dynamic]u32, 8 * Kilobyte ) + draw_list.indices, error = make( [dynamic]u32, len = 0, cap = 8 * Kilobyte ) assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.indices") - draw_list.calls, error = make( [dynamic]DrawCall, 512 ) + draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = 512 ) assert(error == .None, "VEFontCache.init : Failed to allocate draw_list.calls") - init_atlas_region :: proc( region : ^AtlasRegion, params : InitAtlasParams, region_params : InitAtlasRegionParams, factor : Vec2i, expected_cap : i32 ) { + init_atlas_region :: proc( region : ^AtlasRegion, params : InitAtlasParams, region_params : InitAtlasRegionParams, factor : Vec2i, expected_cap : i32 ) + { using region next_idx = 0; @@ -225,11 +226,20 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind, for idx : u32 = 0; idx < shape_cache_params.capacity; idx += 1 { stroage_entry := & shape_cache.storage[idx] using stroage_entry - glyphs, error = make( [dynamic]Glyph, shape_cache_params.reserve_length ) + glyphs, error = make( [dynamic]Glyph, len = 0, cap = shape_cache_params.reserve_length ) assert( error == .None, "VEFontCache.init : Failed to allocate glyphs array for shape cache storage" ) - positions, error = make( [dynamic]Vec2, shape_cache_params.reserve_length ) + positions, error = make( [dynamic]Vec2, len = 0, cap = shape_cache_params.reserve_length ) assert( error == .None, "VEFontCache.init : Failed to allocate positions array for shape cache storage" ) + + draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = glyph_draw_params.buffer_batch * 2 ) + assert( error == .None, "VEFontCache.init : Failed to allocate calls for draw_list" ) + + draw_list.indices, error = make( [dynamic]u32, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 6 ) + assert( error == .None, "VEFontCache.init : Failed to allocate indices array for draw_list" ) + + draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 ) + assert( error == .None, "VEFontCache.init : Failed to allocate vertices array for draw_list" ) } // Note(From original author): We can actually go over VE_FONTCACHE_GLYPHDRAW_BUFFER_BATCH batches due to smart packing! @@ -241,22 +251,22 @@ startup :: proc( ctx : ^Context, parser_kind : ParserKind, height = atlas.region_d.height * u32(over_sample.y) draw_padding = glyph_draw_params.draw_padding - draw_list.calls, error = make( [dynamic]DrawCall, cast(u64) glyph_draw_params.buffer_batch * 2 ) + draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = glyph_draw_params.buffer_batch * 2 ) assert( error == .None, "VEFontCache.init : Failed to allocate calls for draw_list" ) - draw_list.indices, error = make( [dynamic]u32, cast(u64) glyph_draw_params.buffer_batch * 2 * 6 ) + draw_list.indices, error = make( [dynamic]u32, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 6 ) assert( error == .None, "VEFontCache.init : Failed to allocate indices array for draw_list" ) - draw_list.vertices, error = make( [dynamic]Vertex, glyph_draw_params.buffer_batch * 2 * 4 ) + draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 ) assert( error == .None, "VEFontCache.init : Failed to allocate vertices array for draw_list" ) - clear_draw_list.calls, error = make( [dynamic]DrawCall, cast(u64) glyph_draw_params.buffer_batch * 2 ) + clear_draw_list.calls, error = make( [dynamic]DrawCall, len = 0, cap = glyph_draw_params.buffer_batch * 2 ) assert( error == .None, "VEFontCache.init : Failed to allocate calls for calls for clear_draw_list" ) - clear_draw_list.indices, error = make( [dynamic]u32, cast(u64) glyph_draw_params.buffer_batch * 2 * 4 ) + clear_draw_list.indices, error = make( [dynamic]u32, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 ) assert( error == .None, "VEFontCache.init : Failed to allocate calls for indices array for clear_draw_list" ) - clear_draw_list.vertices, error = make( [dynamic]Vertex, glyph_draw_params.buffer_batch * 2 * 4 ) + clear_draw_list.vertices, error = make( [dynamic]Vertex, len = 0, cap = glyph_draw_params.buffer_batch * 2 * 4 ) assert( error == .None, "VEFontCache.init : Failed to allocate vertices array for clear_draw_list" ) } @@ -395,7 +405,7 @@ configure_snap :: #force_inline proc( ctx : ^Context, snap_width, snap_height : get_cursor_pos :: #force_inline proc "contextless" ( ctx : ^Context ) -> Vec2 { return ctx.cursor_pos } set_colour :: #force_inline proc "contextless" ( ctx : ^Context, colour : Colour ) { ctx.colour = colour } -draw_text :: proc( ctx : ^Context, font : FontID, text_utf8 : string, position : Vec2, scale : Vec2 ) -> b32 +draw_text :: proc( ctx : ^Context, font : FontID, text_utf8 : string, position, scale : Vec2 ) -> b32 { // profile(#procedure) assert( ctx != nil ) @@ -471,24 +481,9 @@ measure_text_size :: proc( ctx : ^Context, font : FontID, text_utf8 : string ) - assert( ctx != nil ) assert( font >= 0 && int(font) < len(ctx.entries) ) - atlas := ctx.atlas - entry := & ctx.entries[ font ] - shaped := shape_text_cached( ctx, font, text_utf8, entry ) - padding := cast(f32) atlas.glyph_padding - - for index : i32 = 0; index < i32(len(shaped.glyphs)); index += 1 - { - glyph_index := shaped.glyphs[ index ] - if is_empty( ctx, entry, glyph_index ) do continue - - bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index ) - bounds_size := bounds_1 - bounds_0 - - glyph_size := Vec2 { f32(bounds_size.x), f32(bounds_size.y) } * entry.size_scale - measured.y = max(measured.y, glyph_size.y) - } - measured.x = shaped.end_cursor_pos.x - return measured + entry := &ctx.entries[font] + shaped := shape_text_cached(ctx, font, text_utf8, entry) + return shaped.size } get_font_vertical_metrics :: #force_inline proc ( ctx : ^Context, font : FontID ) -> ( ascent, descent, line_gap : i32 ) diff --git a/atlas.odin b/atlas.odin index 180d5f2..c20a0fb 100644 --- a/atlas.odin +++ b/atlas.odin @@ -86,68 +86,110 @@ atlas_bbox :: proc( atlas : ^Atlas, region : AtlasRegionKind, local_idx : i32 ) return } -decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph +// decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph +// ) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2) +// { +// if parser_is_glyph_empty( & entry.parser_info, glyph_index ) { +// region_kind = .None +// } + +// bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index ) +// bounds_width := f32(bounds_1.x - bounds_0.x) +// bounds_height := f32(bounds_1.y - bounds_0.y) + +// atlas := & ctx.atlas +// glyph_buffer := & ctx.glyph_buffer + +// glyph_padding := f32(atlas.glyph_padding) * 2 + +// bounds_width_scaled := cast(u32) (bounds_width * entry.size_scale + glyph_padding) +// bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding) + +// if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height +// { +// // Region A for small glyphs. These are good for things such as punctuation. +// region_kind = .A +// region = & atlas.region_a +// } +// else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height +// { +// // Region B for tall glyphs. These are good for things such as european alphabets. +// region_kind = .B +// region = & atlas.region_b +// } +// else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height +// { +// // Region C for big glyphs. These are good for things such as asian typography. +// region_kind = .C +// region = & atlas.region_c +// } +// else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height +// { +// // Region D for huge glyphs. These are good for things such as titles and 4k. +// region_kind = .D +// region = & atlas.region_d +// } +// else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height +// { +// // Region 'E' for massive glyphs. These are rendered uncached and un-oversampled. +// region_kind = .E +// region = nil +// if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 { +// over_sample = { 2.0, 2.0 } +// } +// else { +// over_sample = { 1.0, 1.0 } +// } +// return +// } +// else { +// region_kind = .None +// return +// } + +// over_sample = glyph_buffer.over_sample +// assert(region != nil) +// return +// } + +decide_codepoint_region :: proc(ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2) { - if parser_is_glyph_empty( & entry.parser_info, glyph_index ) { - region_kind = .None + if parser_is_glyph_empty(&entry.parser_info, glyph_index) { + return .None, nil, {} } - bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index ) - bounds_width := f32(bounds_1.x - bounds_0.x) - bounds_height := f32(bounds_1.y - bounds_0.y) + bounds_0, bounds_1 := parser_get_glyph_box(&entry.parser_info, glyph_index) + bounds_width := f32(bounds_1.x - bounds_0.x) + bounds_height := f32(bounds_1.y - bounds_0.y) - atlas := & ctx.atlas - glyph_buffer := & ctx.glyph_buffer + atlas := & ctx.atlas + glyph_buffer := & ctx.glyph_buffer + glyph_padding := f32( atlas.glyph_padding ) * 2 - glyph_padding := f32(atlas.glyph_padding) * 2 + bounds_width_scaled := u32(bounds_width * entry.size_scale + glyph_padding) + bounds_height_scaled := u32(bounds_height * entry.size_scale + glyph_padding) - bounds_width_scaled := cast(u32) (bounds_width * entry.size_scale + glyph_padding) - bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding) - - if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height - { - // Region A for small glyphs. These are good for things such as punctuation. - region_kind = .A - region = & atlas.region_a - } - else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height - { - // Region B for tall glyphs. These are good for things such as european alphabets. - region_kind = .B - region = & atlas.region_b - } - else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height - { - // Region C for big glyphs. These are good for things such as asian typography. - region_kind = .C - region = & atlas.region_c - } - else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height - { - // Region D for huge glyphs. These are good for things such as titles and 4k. - region_kind = .D - region = & atlas.region_d - } - else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height - { - // Region 'E' for massive glyphs. These are rendered uncached and un-oversampled. - region_kind = .E - region = nil - if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 { - over_sample = { 2.0, 2.0 } - } - else { - over_sample = { 1.0, 1.0 } - } - return - } - else { - region_kind = .None - return + // Use a lookup table for faster region selection + region_lookup := [4]struct { kind: AtlasRegionKind, region: ^AtlasRegion } { + { .A, & atlas.region_a }, + { .B, & atlas.region_b }, + { .C, & atlas.region_c }, + { .D, & atlas.region_d }, } - over_sample = glyph_buffer.over_sample - assert(region != nil) - return + for region in region_lookup do if bounds_width_scaled <= region.region.width && bounds_height_scaled <= region.region.height { + return region.kind, region.region, glyph_buffer.over_sample + } + + if bounds_width_scaled <= glyph_buffer.width \ + && bounds_height_scaled <= glyph_buffer.height { + over_sample = \ + bounds_width_scaled <= glyph_buffer.width / 2 && + bounds_height_scaled <= glyph_buffer.height / 2 ? \ + {2.0, 2.0} \ + : {1.0, 1.0} + return .E, nil, over_sample + } + return .None, nil, {} } diff --git a/docs/draw_text_codepaths.pur b/docs/draw_text_codepaths.pur new file mode 100644 index 0000000..801029c Binary files /dev/null and b/docs/draw_text_codepaths.pur differ diff --git a/draw.odin b/draw.odin index 04f96a2..845311e 100644 --- a/draw.odin +++ b/draw.odin @@ -56,151 +56,109 @@ blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1} // p0.x, p0.y, p1.x, p1.y, uv0.x, uv0.y, uv1.x, uv1.y); v_offset := cast(u32) len(draw_list.vertices) - vertex := Vertex { - {p0.x, p0.y}, - uv0.x, uv0.y + quadv : [4]Vertex = { + { + {p0.x, p0.y}, + uv0.x, uv0.y + }, + { + {p0.x, p1.y}, + uv0.x, uv1.y + }, + { + {p1.x, p0.y}, + uv1.x, uv0.y + }, + { + {p1.x, p1.y}, + uv1.x, uv1.y + } } - append_elem( & draw_list.vertices, vertex ) - - vertex = Vertex { - {p0.x, p1.y}, - uv0.x, uv1.y - } - append_elem( & draw_list.vertices, vertex ) - - vertex = Vertex { - {p1.x, p0.y}, - uv1.x, uv0.y - } - append_elem( & draw_list.vertices, vertex ) - - vertex = Vertex { - {p1.x, p1.y}, - uv1.x, uv1.y - } - append_elem( & draw_list.vertices, vertex ) + append( & draw_list.vertices, ..quadv[:] ) quad_indices : []u32 = { - 0, 1, 2, - 2, 1, 3 - } - for index : i32 = 0; index < 6; index += 1 { - append( & draw_list.indices, v_offset + quad_indices[ index ] ) + 0 + v_offset, 1 + v_offset, 2 + v_offset, + 2 + v_offset, 1 + v_offset, 3 + v_offset } + append( & draw_list.indices, ..quad_indices[:] ) return } -cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2 ) -> b32 +cache_glyph :: proc(ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2) -> b32 { // profile(#procedure) if glyph_index == Glyph(0) { - // Note(Original Author): Glyph not in current hb_font return false } - // Retrieve the shape definition from the parser. - shape, error := parser_get_glyph_shape( & entry.parser_info, glyph_index ) - assert( error == .None ) + shape, error := parser_get_glyph_shape(&entry.parser_info, glyph_index) + assert(error == .None) if len(shape) == 0 { return false } - if ctx.debug_print_verbose - { - log( "shape:") - for vertex in shape - { - if vertex.type == .Move { - logf("move_to %d %d", vertex.x, vertex.y ) - } - else if vertex.type == .Line { - logf("line_to %d %d", vertex.x, vertex.y ) - } - else if vertex.type == .Curve { - logf("curve_to %d %d through %d %d", vertex.x, vertex.y, vertex.contour_x0, vertex.contour_y0 ) - } - else if vertex.type == .Cubic { - logf("cubic_to %d %d through %d %d and %d %d", - vertex.x, vertex.y, - vertex.contour_x0, vertex.contour_y0, - vertex.contour_x1, vertex.contour_y1 ) - } - } - } + outside := Vec2{bounds_0.x - 21, bounds_0.y - 33} - /* - Note(Original Author): - We need a random point that is outside our shape. We simply pick something diagonally across from top-left bound corner. - Note that this outside point is scaled alongside the glyph in ve_fontcache_draw_filled_path, so we don't need to handle that here. - */ - outside := Vec2 { - bounds_0.x - 21, - bounds_0.y - 33, - } - - // Note(Original Author): Figure out scaling so it fits within our box. - draw := DrawCall_Default + draw := DrawCall_Default draw.pass = FrameBufferPass.Glyph draw.start_index = u32(len(ctx.draw_list.indices)) - // Note(Original Author); - // Draw the path using simplified version of https://medium.com/@evanwallace/easy-scalable-text-rendering-on-the-gpu-c3f4d782c5ac. - // Instead of involving fragment shader code we simply make use of modern GPU ability to crunch triangles and brute force curve definitions. - path := ctx.temp_path - clear( & path) - for edge in shape do switch edge.type - { + path := &ctx.temp_path + clear(path) + + append_bezier_curve :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2: Vec2, quality: u32) { + step := 1.0 / f32(quality) + for index := u32(1); index <= quality; index += 1 { + alpha := f32(index) * step + append( path, Vertex { pos = eval_point_on_bezier3(p0, p1, p2, alpha) } ) + } + } + + append_bezier_curve_cubic :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2, p3: Vec2, quality: u32) { + step := 1.0 / f32(quality) + for index := u32(1); index <= quality; index += 1 { + alpha := f32(index) * step + append( path, Vertex { pos = eval_point_on_bezier4(p0, p1, p2, p3, alpha) } ) + } + } + + for edge in shape do #partial switch edge.type { case .Move: if len(path) > 0 { - draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose ) + draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose) + clear(path) } - clear( & path) fallthrough case .Line: - append( & path, Vec2{ f32(edge.x), f32(edge.y) }) + append( path, Vertex { pos = Vec2 { f32(edge.x), f32(edge.y)} } ) case .Curve: - assert( len(path) > 0 ) - p0 := path[ len(path) - 1 ] + assert(len(path) > 0) + p0 := path[ len(path) - 1].pos p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) } p2 := Vec2{ f32(edge.x), f32(edge.y) } - - step := 1.0 / f32(ctx.curve_quality) - alpha := step - for index := i32(0); index < i32(ctx.curve_quality); index += 1 { - append( & path, eval_point_on_bezier3( p0, p1, p2, alpha )) - alpha += step - } + append_bezier_curve( path, p0, p1, p2, ctx.curve_quality ) case .Cubic: - assert( len(path) > 0 ) - p0 := path[ len(path) - 1] + assert( len(path) > 0) + p0 := path[ len(path) - 1].pos p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) } p2 := Vec2{ f32(edge.contour_x1), f32(edge.contour_y1) } p3 := Vec2{ f32(edge.x), f32(edge.y) } - - step := 1.0 / f32(ctx.curve_quality) - alpha := step - for index := i32(0); index < i32(ctx.curve_quality); index += 1 { - append( & path, eval_point_on_bezier4( p0, p1, p2, p3, alpha )) - alpha += step - } - - case .None: - assert(false, "Unknown edge type or invalid") + append_bezier_curve_cubic( path, p0, p1, p2, p3, ctx.curve_quality ) } + if len(path) > 0 { - draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose ) + draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose) } - // Note(Original Author): Apend the draw call - draw.end_index = cast(u32) len(ctx.draw_list.indices) + draw.end_index = u32(len(ctx.draw_list.indices)) if draw.end_index > draw.start_index { - append(& ctx.draw_list.calls, draw) + append(&ctx.draw_list.calls, draw) } - parser_free_shape( & entry.parser_info, shape ) + parser_free_shape(&entry.parser_info, shape) return true } @@ -301,10 +259,9 @@ cache_glyph_to_atlas :: proc( ctx : ^Context, glyph_buffer.batch_x += i32(gwidth_scaled_px) screenspace_x_form( & glyph_draw_translate, & glyph_draw_scale, glyph_buffer_size ) - call : DrawCall + clear_target_region : DrawCall { - // Queue up clear on target region on atlas - using call + using clear_target_region pass = .Atlas region = .Ignore start_index = cast(u32) len(glyph_buffer.clear_draw_list.indices) @@ -314,9 +271,12 @@ cache_glyph_to_atlas :: proc( ctx : ^Context, { 1.0, 1.0 }, { 1.0, 1.0 } ) end_index = cast(u32) len(glyph_buffer.clear_draw_list.indices) - append( & glyph_buffer.clear_draw_list.calls, call ) + } - // Queue up a blit from glyph_update_FBO to the atlas + blit_to_atlas : DrawCall + { + using blit_to_atlas + pass = .Atlas region = .None start_index = cast(u32) len(glyph_buffer.draw_list.indices) @@ -325,14 +285,17 @@ cache_glyph_to_atlas :: proc( ctx : ^Context, src_position, src_position + src_size ) end_index = cast(u32) len(glyph_buffer.draw_list.indices) - append( & glyph_buffer.draw_list.calls, call ) } + append( & glyph_buffer.clear_draw_list.calls, clear_target_region ) + append( & glyph_buffer.draw_list.calls, blit_to_atlas ) + // Render glyph to glyph_update_FBO cache_glyph( ctx, font, glyph_index, entry, vec2(bounds_0), vec2(bounds_1), glyph_draw_scale, glyph_draw_translate ) } -can_batch_glyph :: #force_inline proc( ctx : ^Context, font : FontID, entry : ^Entry, glyph_index : Glyph, +// If the glyuph is found in the atlas, nothing occurs, otherwise, the glyph call is setup to catch it to the atlas +check_glyph_in_atlas :: #force_inline proc( ctx : ^Context, font : FontID, entry : ^Entry, glyph_index : Glyph, lru_code : u64, atlas_index : i32, region_kind : AtlasRegionKind, @@ -402,7 +365,7 @@ directly_draw_massive_glyph :: proc( ctx : ^Context, // Figure out the source rect. glyph_position := Vec2 {} glyph_size := vec2(glyph_padding_dbl) - glyph_dst_size := glyph_size + bounds_scaled + glyph_dst_size := glyph_size + bounds_scaled glyph_size += bounds_scaled * over_sample // Figure out the destination rect. @@ -415,9 +378,11 @@ directly_draw_massive_glyph :: proc( ctx : ^Context, textspace_x_form( & glyph_position, & glyph_size, glyph_buffer_size ) // Add the glyph drawcall. - call : DrawCall + calls : [2]DrawCall + + draw_to_target := & calls[0] { - using call + using draw_to_target pass = .Target_Uncached colour = ctx.colour start_index = u32(len(ctx.draw_list.indices)) @@ -427,18 +392,20 @@ directly_draw_massive_glyph :: proc( ctx : ^Context, glyph_position, glyph_position + glyph_size ) end_index = u32(len(ctx.draw_list.indices)) - append( & ctx.draw_list.calls, call ) } - // Clear glyph_update_FBO. - call.pass = .Glyph - call.start_index = 0 - call.end_index = 0 - call.clear_before_draw = true - append( & ctx.draw_list.calls, call ) + clear_glyph_update := & calls[1] + { + // Clear glyph_update_FBO. + clear_glyph_update.pass = .Glyph + clear_glyph_update.start_index = 0 + clear_glyph_update.end_index = 0 + clear_glyph_update.clear_before_draw = true + } + append( & ctx.draw_list.calls, ..calls[:] ) } -draw_cached_glyph :: proc( ctx : ^Context, +draw_cached_glyph :: proc( ctx : ^Context, shaped : ^ShapedText, entry : ^Entry, glyph_index : Glyph, lru_code : u64, @@ -480,26 +447,45 @@ draw_cached_glyph :: proc( ctx : ^Context, bounds_0_scaled := bounds_0 * entry.size_scale //- { 0.5, 0.5 } bounds_0_scaled = ceil(bounds_0_scaled) - dst := position + bounds_0_scaled * scale - dst -= glyph_padding * scale - dst_scale := glyph_scale * scale + dst := position + (bounds_0_scaled - glyph_padding) * scale + dst_scale := glyph_scale * scale textspace_x_form( & slot_position, & glyph_scale, atlas_size ) - // Add the glyph drawcall - call := DrawCall_Default + // Shape call setup + when false { - using call - pass = .Target - colour = ctx.colour - start_index = cast(u32) len(ctx.draw_list.indices) + call := DrawCall_Default + { + using call + pass = .Target + colour = ctx.colour + start_index = cast(u32) len(shaped.draw_list.indices) - blit_quad( & ctx.draw_list, - dst, dst + dst_scale, - slot_position, slot_position + glyph_scale ) - end_index = cast(u32) len(ctx.draw_list.indices) + blit_quad( & shaped.draw_list, + dst, dst + dst_scale, + slot_position, slot_position + glyph_scale ) + end_index = cast(u32) len(shaped.draw_list.indices) + } + append( & shaped.draw_list.calls, call ) + } + else + { + // Add the glyph drawcall + call := DrawCall_Default + { + using call + pass = .Target + colour = ctx.colour + start_index = cast(u32) len(ctx.draw_list.indices) + + blit_quad( & ctx.draw_list, + dst, dst + dst_scale, + slot_position, slot_position + glyph_scale ) + end_index = cast(u32) len(ctx.draw_list.indices) + } + append( & ctx.draw_list.calls, call ) } - append( & ctx.draw_list.calls, call ) return true } @@ -509,7 +495,7 @@ draw_cached_glyph :: proc( ctx : ^Context, // Note(Original Author): // WARNING: doesn't actually append drawcall; caller is responsible for actually appending the drawcall. // ve_fontcache_draw_filled_path -draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []Vec2, +draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : []Vertex, scale := Vec2 { 1, 1 }, translate := Vec2 { 0, 0 }, debug_print_verbose : b32 = false @@ -519,19 +505,16 @@ draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : [] { log("outline_path:") for point in path { - vec := point * scale + translate + vec := point.pos * scale + translate logf(" %0.2f %0.2f", vec.x, vec.y ) } } v_offset := cast(u32) len(draw_list.vertices) for point in path { - vertex := Vertex { - pos = point * scale + translate, - u = 0, - v = 0, - } - append( & draw_list.vertices, vertex ) + point := point + point.pos = point.pos * scale + translate + append( & draw_list.vertices, point ) } outside_vertex := cast(u32) len(draw_list.vertices) @@ -546,42 +529,71 @@ draw_filled_path :: proc( draw_list : ^DrawList, outside_point : Vec2, path : [] for index : u32 = 1; index < cast(u32) len(path); index += 1 { indices := & draw_list.indices - append( indices, outside_vertex ) - append( indices, v_offset + index - 1 ) - append( indices, v_offset + index ) + to_add := [3]u32 { + outside_vertex, + v_offset + index - 1, + v_offset + index + } + append( indices, ..to_add[:] ) } } -draw_text_batch :: proc( ctx : ^Context, entry : ^Entry, shaped : ^ShapedText, +draw_text_batch :: proc(ctx: ^Context, entry: ^Entry, shaped: ^ShapedText, batch_start_idx, batch_end_idx : i32, - position, scale : Vec2, - snap_width, snap_height : f32 ) + position, scale : Vec2, + snap_width, snap_height : f32 ) { - flush_glyph_buffer_to_atlas( ctx ) + flush_glyph_buffer_to_atlas(ctx) + + atlas := & ctx.atlas + atlas_size := Vec2{ f32(atlas.width), f32(atlas.height) } + glyph_padding := f32(atlas.glyph_padding) + for index := batch_start_idx; index < batch_end_idx; index += 1 { - glyph_index := shaped.glyphs[ index ] + glyph_index := shaped.glyphs[index] - if glyph_index == 0 do continue - if parser_is_glyph_empty( & entry.parser_info, glyph_index ) do continue + if glyph_index == 0 || parser_is_glyph_empty( & entry.parser_info, glyph_index) do continue - region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index ) - lru_code := font_glyph_lru_code(entry.id, glyph_index) - atlas_index := cast(i32) -1 + region_kind, region, over_sample := decide_codepoint_region( ctx, entry, glyph_index ) + lru_code := font_glyph_lru_code( entry.id, glyph_index ) + atlas_index := region_kind != .E ? LRU_get( & region.state, lru_code ) : -1 + bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index ) + vbounds_0 := vec2(bounds_0) + vbounds_1 := vec2(bounds_1) + bounds_size := Vec2 { vbounds_1.x - vbounds_0.x, vbounds_1.y - vbounds_0.y } - if region_kind != .E do atlas_index = LRU_get( & region.state, lru_code ) - bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index ) + shaped_position := shaped.positions[index] + glyph_translate := position + shaped_position * scale - shaped_position := shaped.positions[index] - glyph_translate := position + shaped_position * scale + if region_kind == .E + { + directly_draw_massive_glyph(ctx, entry, glyph_index, + vbounds_0, vbounds_1, + bounds_size, + over_sample, glyph_translate, scale ) + } + else if atlas_index != -1 + { + slot_position, _ := atlas_bbox( atlas, region_kind, atlas_index ) + glyph_scale := bounds_size * entry.size_scale + glyph_padding + bounds_0_scaled := ceil( vbounds_0 * entry.size_scale ) + dst := glyph_translate + (bounds_0_scaled - glyph_padding) * scale + dst_scale := glyph_scale * scale + textspace_x_form( & slot_position, & glyph_scale, atlas_size ) - glyph_cached := draw_cached_glyph( ctx, - entry, glyph_index, - lru_code, atlas_index, - vec2(bounds_0), vec2(bounds_1), - region_kind, region, over_sample, - glyph_translate, scale) - assert( glyph_cached == true ) + call := DrawCall_Default + call.pass = .Target + call.colour = ctx.colour + call.start_index = u32(len(ctx.draw_list.indices)) + + blit_quad(&ctx.draw_list, + dst, dst + dst_scale, + slot_position, slot_position + glyph_scale ) + + call.end_index = u32(len(ctx.draw_list.indices)) + append(&ctx.draw_list.calls, call) + } } } @@ -594,7 +606,6 @@ draw_text_shape :: proc( ctx : ^Context, snap_width, snap_height : f32 ) -> (cursor_pos : Vec2) { - // position := position //+ ctx.cursor_pos * scale // profile(#procedure) batch_start_idx : i32 = 0 for index : i32 = 0; index < cast(i32) len(shaped.glyphs); index += 1 @@ -607,9 +618,9 @@ draw_text_shape :: proc( ctx : ^Context, atlas_index := cast(i32) -1 if region_kind != .E do atlas_index = LRU_get( & region.state, lru_code ) - if can_batch_glyph( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue + if check_glyph_in_atlas( ctx, font, entry, glyph_index, lru_code, atlas_index, region_kind, region, over_sample ) do continue - // Glyph has not been catched, needs to be directly drawn. + // We can no longer directly append the shape as it has missing glyphs in the atlas // First batch the other cached glyphs // flush_glyph_buffer_to_atlas(ctx) @@ -621,10 +632,10 @@ draw_text_shape :: proc( ctx : ^Context, batch_start_idx = index } - // flush_glyph_buffer_to_atlas(ctx) draw_text_batch( ctx, entry, shaped, batch_start_idx, cast(i32) len(shaped.glyphs), position, scale, snap_width , snap_height ) reset_batch_codepoint_state( ctx ) - cursor_pos = shaped.end_cursor_pos + + cursor_pos = position + shaped.end_cursor_pos * scale return } @@ -650,6 +661,34 @@ flush_glyph_buffer_to_atlas :: proc( ctx : ^Context ) } } +// flush_glyph_buffer_to_atlas :: proc( ctx : ^Context ) +// { +// // profile(#procedure) +// // Flush drawcalls to draw list +// if len(ctx.glyph_buffer.clear_draw_list.calls) > 0 { +// merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.clear_draw_list) +// clear_draw_list( & ctx.glyph_buffer.clear_draw_list) +// } + +// if len(ctx.glyph_buffer.draw_list.calls) > 0 { +// merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.draw_list) +// clear_draw_list( & ctx.glyph_buffer.draw_list) +// } + +// // Clear glyph_update_FBO +// if ctx.glyph_buffer.batch_x != 0 +// { +// call := DrawCall { +// pass = .Glyph, +// start_index = 0, +// end_index = 0, +// clear_before_draw = true, +// } +// append( & ctx.draw_list.calls, call) +// ctx.glyph_buffer.batch_x = 0 +// } +// } + // ve_fontcache_merge_drawlist merge_draw_list :: proc( dst, src : ^DrawList ) { @@ -677,42 +716,37 @@ merge_draw_list :: proc( dst, src : ^DrawList ) } } -optimize_draw_list :: proc( draw_list : ^DrawList, call_offset : int ) -{ +optimize_draw_list :: proc(draw_list: ^DrawList, call_offset: int) { // profile(#procedure) - assert( draw_list != nil ) + assert(draw_list != nil) - write_index : int = call_offset - for index : int = 1 + call_offset; index < len(draw_list.calls); index += 1 + can_merge_draw_calls :: #force_inline proc "contextless" ( a, b : ^DrawCall ) -> bool { + result := \ + a.pass == b.pass && + a.end_index == b.start_index && + a.region == b.region && + a.colour == b.colour && + ! b.clear_before_draw + return result + } + + write_index := call_offset + for read_index := call_offset + 1; read_index < len(draw_list.calls); read_index += 1 { - assert( write_index <= index ) - draw_0 := & draw_list.calls[ write_index ] - draw_1 := & draw_list.calls[ index ] + draw_current := & draw_list.calls[write_index] + draw_next := & draw_list.calls[read_index] - merge : b32 = true - if draw_0.pass != draw_1.pass do merge = false - if draw_0.end_index != draw_1.start_index do merge = false - if draw_0.region != draw_1.region do merge = false - if draw_1.clear_before_draw do merge = false - if draw_0.colour != draw_1.colour do merge = false - - if merge - { - // logf("merging %v : %v %v", draw_0.pass, write_index, index ) - draw_0.end_index = draw_1.end_index - draw_1.start_index = 0 - draw_1.end_index = 0 + if can_merge_draw_calls(draw_current, draw_next) { + draw_current.end_index = draw_next.end_index } - else - { - // logf("can't merge %v : %v %v", draw_0.pass, write_index, index ) + else { + // Move to the next write position and copy the draw call write_index += 1 - if write_index != index { - draw_2 := & draw_list.calls[ write_index ] - draw_2^ = draw_1^ + if write_index != read_index { + draw_list.calls[write_index] = (draw_next^) } } } - resize( & draw_list.calls, write_index + 1 ) + resize( & draw_list.calls, write_index + 1) } diff --git a/mappings.odin b/mappings.odin index db538fa..e575c15 100644 --- a/mappings.odin +++ b/mappings.odin @@ -23,10 +23,10 @@ import "core:mem" Arena :: mem.Arena arena_allocator :: mem.arena_allocator arena_init :: mem.arena_init -// import "codebase:grime" - // log :: grime.log - // logf :: grime.logf - // profile :: grime.profile +import "codebase:grime" + log :: grime.log + logf :: grime.logf + profile :: grime.profile //#region("Proc overload mappings") diff --git a/misc.odin b/misc.odin index a27b04c..cba3de4 100644 --- a/misc.odin +++ b/misc.odin @@ -1,7 +1,10 @@ package VEFontCache import "base:runtime" -import core_log "core:log" +import "core:simd" +import "core:math" + +// import core_log "core:log" Colour :: [4]f32 Vec2 :: [2]f32 @@ -17,23 +20,23 @@ vec2i_from_vec2 :: #force_inline proc "contextless" ( v2 : Vec2 ) -> Vec2 // This buffer is used below excluisvely to prevent any allocator recusion when verbose logging from allocators. // This means a single line is limited to 32k buffer (increase naturally if this SOMEHOW becomes a bottleneck...) -Logger_Allocator_Buffer : [32 * Kilobyte]u8 +// Logger_Allocator_Buffer : [32 * Kilobyte]u8 -log :: proc( msg : string, level := core_log.Level.Info, loc := #caller_location ) { - temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:]) - context.allocator = arena_allocator(& temp_arena) - context.temp_allocator = arena_allocator(& temp_arena) +// log :: proc( msg : string, level := core_log.Level.Info, loc := #caller_location ) { +// temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:]) +// context.allocator = arena_allocator(& temp_arena) +// context.temp_allocator = arena_allocator(& temp_arena) - core_log.log( level, msg, location = loc ) -} +// core_log.log( level, msg, location = loc ) +// } -logf :: proc( fmt : string, args : ..any, level := core_log.Level.Info, loc := #caller_location ) { - temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:]) - context.allocator = arena_allocator(& temp_arena) - context.temp_allocator = arena_allocator(& temp_arena) +// logf :: proc( fmt : string, args : ..any, level := core_log.Level.Info, loc := #caller_location ) { +// temp_arena : Arena; arena_init(& temp_arena, Logger_Allocator_Buffer[:]) +// context.allocator = arena_allocator(& temp_arena) +// context.temp_allocator = arena_allocator(& temp_arena) - core_log.logf( level, fmt, ..args, location = loc ) -} +// core_log.logf( level, fmt, ..args, location = loc ) +// } reload_array :: proc( self : ^[dynamic]$Type, allocator : Allocator ) { raw := transmute( ^runtime.Raw_Dynamic_Array) self @@ -50,61 +53,6 @@ font_glyph_lru_code :: #force_inline proc "contextless" ( font : FontID, glyph_i return } -shape_lru_hash :: #force_inline proc "contextless" ( label : string ) -> u64 { - hash : u64 - for str_byte in transmute([]byte) label { - hash = ((hash << 8) + hash) + u64(str_byte) - } - return hash -} - -// For a provided alpha value, -// allows the function to calculate the position of a point along the curve at any given fraction of its total length -// ve_fontcache_eval_bezier (quadratic) -eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2 -{ - p0 := vec2_64(p0) - p1 := vec2_64(p1) - p2 := vec2_64(p2) - alpha := f64(alpha) - - weight_start := (1 - alpha) * (1 - alpha) - weight_control := 2.0 * (1 - alpha) * alpha - weight_end := alpha * alpha - - starting_point := p0 * weight_start - control_point := p1 * weight_control - end_point := p2 * weight_end - - point := starting_point + control_point + end_point - return { f32(point.x), f32(point.y) } -} - -// For a provided alpha value, -// allows the function to calculate the position of a point along the curve at any given fraction of its total length -// ve_fontcache_eval_bezier (cubic) -eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2 -{ - p0 := vec2_64(p0) - p1 := vec2_64(p1) - p2 := vec2_64(p2) - p3 := vec2_64(p3) - alpha := f64(alpha) - - weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha) - weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha - weight_c_b := 3 * (1 - alpha) * alpha * alpha - weight_end := alpha * alpha * alpha - - start_point := p0 * weight_start - control_a := p1 * weight_c_a - control_b := p2 * weight_c_b - end_point := p3 * weight_end - - point := start_point + control_a + control_b + end_point - return { f32(point.x), f32(point.y) } -} - is_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32 { if glyph_index == 0 do return true @@ -122,8 +70,9 @@ reset_batch_codepoint_state :: #force_inline proc( ctx : ^Context ) { ctx.temp_codepoint_seen_num = 0 } -screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) { - when true +screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) +{ + if true { pos_64 := vec2_64_from_vec2(position^) scale_64 := vec2_64_from_vec2(scale^) @@ -137,14 +86,21 @@ screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2 } else { + pos := position^ + scale_32 := scale^ + quotient : Vec2 = 1.0 / size - (position^) *= quotient * 2.0 - 1.0 - (scale^) *= quotient * 2.0 + pos = pos * quotient * 2.0 - 1.0 + scale_32 = scale_32 * quotient * 2.0 + + (position^) = pos + (scale^) = scale_32 } } -textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) { - when true +textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) +{ + if true { pos_64 := vec2_64_from_vec2(position^) scale_64 := vec2_64_from_vec2(scale^) @@ -158,8 +114,175 @@ textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, } else { - quotient : Vec2 = 1.0 / size + quotient : Vec2 = 1.0 / size (position^) *= quotient (scale^) *= quotient } } + +Use_SIMD_For_Bezier_Ops :: true + +when ! Use_SIMD_For_Bezier_Ops +{ + // For a provided alpha value, + // allows the function to calculate the position of a point along the curve at any given fraction of its total length + // ve_fontcache_eval_bezier (quadratic) + eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2 + { + p0 := vec2_64(p0) + p1 := vec2_64(p1) + p2 := vec2_64(p2) + alpha := f64(alpha) + + weight_start := (1 - alpha) * (1 - alpha) + weight_control := 2.0 * (1 - alpha) * alpha + weight_end := alpha * alpha + + starting_point := p0 * weight_start + control_point := p1 * weight_control + end_point := p2 * weight_end + + point := starting_point + control_point + end_point + return { f32(point.x), f32(point.y) } + } + + // For a provided alpha value, + // allows the function to calculate the position of a point along the curve at any given fraction of its total length + // ve_fontcache_eval_bezier (cubic) + eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2 + { + p0 := vec2_64(p0) + p1 := vec2_64(p1) + p2 := vec2_64(p2) + p3 := vec2_64(p3) + alpha := f64(alpha) + + weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha) + weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha + weight_c_b := 3 * (1 - alpha) * alpha * alpha + weight_end := alpha * alpha * alpha + + start_point := p0 * weight_start + control_a := p1 * weight_c_a + control_b := p2 * weight_c_b + end_point := p3 * weight_end + + point := start_point + control_a + control_b + end_point + return { f32(point.x), f32(point.y) } + } +} +else +{ + Vec2_SIMD :: simd.f32x4 + + vec2_to_simd :: #force_inline proc "contextless" (v: Vec2) -> Vec2_SIMD { + return Vec2_SIMD{v.x, v.y, 0, 0} + } + + simd_to_vec2 :: #force_inline proc "contextless" (v: Vec2_SIMD) -> Vec2 { + return Vec2{ simd.extract(v, 0), simd.extract(v, 1) } + } + + vec2_add_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_b := vec2_to_simd(b) + result := simd.add(simd_a, simd_b) + return simd_to_vec2(result) + } + + vec2_sub_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_b := vec2_to_simd(b) + result := simd.sub(simd_a, simd_b) + return simd_to_vec2(result) + } + + vec2_mul_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_s := Vec2_SIMD{s, s, s, s} + result := simd.mul(simd_a, simd_s) + return simd_to_vec2(result) + } + + vec2_div_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_s := Vec2_SIMD{s, s, s, s} + result := simd.div(simd_a, simd_s) + return simd_to_vec2(result) + } + + vec2_dot_simd :: #force_inline proc "contextless" (a, b: Vec2) -> f32 { + simd_a := vec2_to_simd(a) + simd_b := vec2_to_simd(b) + result := simd.mul(simd_a, simd_b) + return simd.reduce_add_ordered(result) + } + + vec2_length_sqr_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 { + return vec2_dot_simd(a, a) + } + + vec2_length_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 { + return math.sqrt(vec2_length_sqr_simd(a)) + } + + vec2_normalize_simd :: #force_inline proc "contextless" (a: Vec2) -> Vec2 { + len := vec2_length_simd(a) + if len > 0 { + inv_len := 1.0 / len + return vec2_mul_simd(a, inv_len) + } + return a + } + + // SIMD-optimized version of eval_point_on_bezier3 + eval_point_on_bezier3 :: #force_inline proc "contextless" (p0, p1, p2: Vec2, alpha: f32) -> Vec2 + { + simd_p0 := vec2_to_simd(p0) + simd_p1 := vec2_to_simd(p1) + simd_p2 := vec2_to_simd(p2) + + one_minus_alpha := 1.0 - alpha + weight_start := one_minus_alpha * one_minus_alpha + weight_control := 2.0 * one_minus_alpha * alpha + weight_end := alpha * alpha + + simd_weights := Vec2_SIMD{weight_start, weight_control, weight_end, 0} + result := simd.add( + simd.add( + simd.mul( simd_p0, simd.swizzle( simd_weights, 0, 0, 0, 0) ), + simd.mul( simd_p1, simd.swizzle( simd_weights, 1, 1, 1, 1) ) + ), + simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) ) + ) + + return simd_to_vec2(result) + } + + eval_point_on_bezier4 :: #force_inline proc "contextless" (p0, p1, p2, p3: Vec2, alpha: f32) -> Vec2 + { + simd_p0 := vec2_to_simd(p0) + simd_p1 := vec2_to_simd(p1) + simd_p2 := vec2_to_simd(p2) + simd_p3 := vec2_to_simd(p3) + + one_minus_alpha := 1.0 - alpha + weight_start := one_minus_alpha * one_minus_alpha * one_minus_alpha + weight_c_a := 3 * one_minus_alpha * one_minus_alpha * alpha + weight_c_b := 3 * one_minus_alpha * alpha * alpha + weight_end := alpha * alpha * alpha + + simd_weights := Vec2_SIMD { weight_start, weight_c_a, weight_c_b, weight_end } + result := simd.add( + simd.add( + simd.mul( simd_p0, simd.swizzle(simd_weights, 0, 0, 0, 0) ), + simd.mul( simd_p1, simd.swizzle(simd_weights, 1, 1, 1, 1) ) + ), + simd.add( + simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) ), + simd.mul( simd_p3, simd.swizzle(simd_weights, 3, 3, 3, 3) ) + ) + ) + return simd_to_vec2(result) + } +} diff --git a/shaped_text.odin b/shaped_text.odin index 510ed30..1ac55c2 100644 --- a/shaped_text.odin +++ b/shaped_text.odin @@ -1,11 +1,10 @@ package VEFontCache -import "core:math" - ShapedText :: struct { glyphs : [dynamic]Glyph, positions : [dynamic]Vec2, end_cursor_pos : Vec2, + size : Vec2, } ShapedTextCache :: struct { @@ -14,36 +13,33 @@ ShapedTextCache :: struct { next_cache_id : i32, } +shape_lru_hash :: #force_inline proc "contextless" ( hash : ^u64, bytes : []byte ) { + for value in bytes { + (hash^) = (( (hash^) << 8) + (hash^) ) + u64(value) + } +} + shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, entry : ^Entry ) -> ^ShapedText { // profile(#procedure) - @static buffer : [64 * Kilobyte]byte + font := font + font_bytes := slice_ptr( transmute(^byte) & font, size_of(FontID) ) + text_bytes := transmute( []byte) text_utf8 - font := font - text_size := len(text_utf8) - sice_end_offset := size_of(FontID) + len(text_utf8) - - buffer_slice := buffer[:] - font_bytes := slice_ptr( transmute(^byte) & font, size_of(FontID) ) - copy( buffer_slice, font_bytes ) - - text_bytes := transmute( []byte) text_utf8 - buffer_slice_post_font := buffer[ size_of(FontID) : sice_end_offset ] - copy( buffer_slice_post_font, text_bytes ) - - hash := shape_lru_hash( transmute(string) buffer[: sice_end_offset ] ) + lru_code : u64 + shape_lru_hash( & lru_code, font_bytes ) + shape_lru_hash( & lru_code, text_bytes ) shape_cache := & ctx.shape_cache state := & ctx.shape_cache.state - shape_cache_idx := LRU_get( state, hash ) + shape_cache_idx := LRU_get( state, lru_code ) if shape_cache_idx == -1 { if shape_cache.next_cache_id < i32(state.capacity) { shape_cache_idx = shape_cache.next_cache_id shape_cache.next_cache_id += 1 - evicted := LRU_put( state, hash, shape_cache_idx ) - assert( evicted == hash ) + evicted := LRU_put( state, lru_code, shape_cache_idx ) } else { @@ -53,16 +49,16 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, en shape_cache_idx = LRU_peek( state, next_evict_idx, must_find = true ) assert( shape_cache_idx != - 1 ) - LRU_put( state, hash, shape_cache_idx ) + LRU_put( state, lru_code, shape_cache_idx ) } - shape_text_uncached( ctx, font, text_utf8, entry, & shape_cache.storage[ shape_cache_idx ] ) + shape_entry := & shape_cache.storage[ shape_cache_idx ] + shape_text_uncached( ctx, font, text_utf8, entry, shape_entry ) } return & shape_cache.storage[ shape_cache_idx ] } -// TODO(Ed): Make position rounding an option shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, entry : ^Entry, output : ^ShapedText ) { // profile(#procedure) @@ -74,12 +70,17 @@ shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, clear( & output.glyphs ) clear( & output.positions ) - ascent, descent, line_gap := parser_get_font_vertical_metrics( & entry.parser_info ) + ascent_i32, descent_i32, line_gap_i32 := parser_get_font_vertical_metrics( & entry.parser_info ) + ascent := f32(ascent_i32) + descent := f32(descent_i32) + line_gap := f32(line_gap_i32) + line_height := (ascent - descent + line_gap) * entry.size_scale if use_full_text_shape { // assert( entry.shaper_info != nil ) - shaper_shape_from_text( & ctx.shaper_ctx, & entry.shaper_info, output, text_utf8, ascent, descent, line_gap, entry.size, entry.size_scale ) + shaper_shape_from_text( & ctx.shaper_ctx, & entry.shaper_info, output, text_utf8, ascent_i32, descent_i32, line_gap_i32, entry.size, entry.size_scale ) + // TODO(Ed): Need to be able to provide the text height as well return } else @@ -87,13 +88,10 @@ shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, // Note(Original Author): // We use our own fallback dumbass text shaping. // WARNING: PLEASE USE HARFBUZZ. GOOD TEXT SHAPING IS IMPORTANT FOR INTERNATIONALISATION. - ascent := f32(ascent) - descent := f32(descent) - line_gap := f32(line_gap) - position : Vec2 - advance : i32 = 0 - to_left_side_glyph : i32 = 0 + line_count : int = 1 + max_line_width : f32 = 0 + position : Vec2 prev_codepoint : rune for codepoint in text_utf8 @@ -104,29 +102,34 @@ shape_text_uncached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, } if codepoint == '\n' { - position.x = 0.0 - position.y -= (ascent - descent + line_gap) * entry.size_scale - position.y = ceil(position.y) + line_count += 1 + max_line_width = max(max_line_width, position.x) + position.x = 0.0 + position.y -= line_height + position.y = ceil(position.y) prev_codepoint = rune(0) continue } if abs( entry.size ) <= Advance_Snap_Smallfont_Size { - position.x = math.ceil( position.x ) + position.x = ceil( position.x ) } append( & output.glyphs, parser_find_glyph_index( & entry.parser_info, codepoint )) - advance, to_left_side_glyph = parser_get_codepoint_horizontal_metrics( & entry.parser_info, codepoint ) + advance, _ := parser_get_codepoint_horizontal_metrics( & entry.parser_info, codepoint ) append( & output.positions, Vec2 { ceil(position.x), position.y }) - // append( & output.positions, position ) position.x += f32(advance) * entry.size_scale prev_codepoint = codepoint } output.end_cursor_pos = position + max_line_width = max(max_line_width, position.x) + + output.size.x = max_line_width + output.size.y = f32(line_count) * line_height } }