From a28303bad6ca993683f878e0b086447d42e5d58c Mon Sep 17 00:00:00 2001 From: Ed_ Date: Fri, 28 Jun 2024 07:31:51 -0400 Subject: [PATCH] Added manual simd but odins already doing it perfectly (+ ohter attempted optimizations) --- code/font/VEFontCache/atlas.odin | 152 +++++++++++------ code/font/VEFontCache/draw.odin | 175 +++++++++---------- code/font/VEFontCache/misc.odin | 224 +++++++++++++++++++------ code/font/VEFontCache/shaped_text.odin | 2 - 4 files changed, 354 insertions(+), 199 deletions(-) diff --git a/code/font/VEFontCache/atlas.odin b/code/font/VEFontCache/atlas.odin index 180d5f2..c20a0fb 100644 --- a/code/font/VEFontCache/atlas.odin +++ b/code/font/VEFontCache/atlas.odin @@ -86,68 +86,110 @@ atlas_bbox :: proc( atlas : ^Atlas, region : AtlasRegionKind, local_idx : i32 ) return } -decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph +// decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph +// ) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2) +// { +// if parser_is_glyph_empty( & entry.parser_info, glyph_index ) { +// region_kind = .None +// } + +// bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index ) +// bounds_width := f32(bounds_1.x - bounds_0.x) +// bounds_height := f32(bounds_1.y - bounds_0.y) + +// atlas := & ctx.atlas +// glyph_buffer := & ctx.glyph_buffer + +// glyph_padding := f32(atlas.glyph_padding) * 2 + +// bounds_width_scaled := cast(u32) (bounds_width * entry.size_scale + glyph_padding) +// bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding) + +// if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height +// { +// // Region A for small glyphs. These are good for things such as punctuation. +// region_kind = .A +// region = & atlas.region_a +// } +// else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height +// { +// // Region B for tall glyphs. These are good for things such as european alphabets. +// region_kind = .B +// region = & atlas.region_b +// } +// else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height +// { +// // Region C for big glyphs. These are good for things such as asian typography. +// region_kind = .C +// region = & atlas.region_c +// } +// else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height +// { +// // Region D for huge glyphs. These are good for things such as titles and 4k. +// region_kind = .D +// region = & atlas.region_d +// } +// else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height +// { +// // Region 'E' for massive glyphs. These are rendered uncached and un-oversampled. +// region_kind = .E +// region = nil +// if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 { +// over_sample = { 2.0, 2.0 } +// } +// else { +// over_sample = { 1.0, 1.0 } +// } +// return +// } +// else { +// region_kind = .None +// return +// } + +// over_sample = glyph_buffer.over_sample +// assert(region != nil) +// return +// } + +decide_codepoint_region :: proc(ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2) { - if parser_is_glyph_empty( & entry.parser_info, glyph_index ) { - region_kind = .None + if parser_is_glyph_empty(&entry.parser_info, glyph_index) { + return .None, nil, {} } - bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index ) - bounds_width := f32(bounds_1.x - bounds_0.x) - bounds_height := f32(bounds_1.y - bounds_0.y) + bounds_0, bounds_1 := parser_get_glyph_box(&entry.parser_info, glyph_index) + bounds_width := f32(bounds_1.x - bounds_0.x) + bounds_height := f32(bounds_1.y - bounds_0.y) - atlas := & ctx.atlas - glyph_buffer := & ctx.glyph_buffer + atlas := & ctx.atlas + glyph_buffer := & ctx.glyph_buffer + glyph_padding := f32( atlas.glyph_padding ) * 2 - glyph_padding := f32(atlas.glyph_padding) * 2 + bounds_width_scaled := u32(bounds_width * entry.size_scale + glyph_padding) + bounds_height_scaled := u32(bounds_height * entry.size_scale + glyph_padding) - bounds_width_scaled := cast(u32) (bounds_width * entry.size_scale + glyph_padding) - bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding) - - if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height - { - // Region A for small glyphs. These are good for things such as punctuation. - region_kind = .A - region = & atlas.region_a - } - else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height - { - // Region B for tall glyphs. These are good for things such as european alphabets. - region_kind = .B - region = & atlas.region_b - } - else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height - { - // Region C for big glyphs. These are good for things such as asian typography. - region_kind = .C - region = & atlas.region_c - } - else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height - { - // Region D for huge glyphs. These are good for things such as titles and 4k. - region_kind = .D - region = & atlas.region_d - } - else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height - { - // Region 'E' for massive glyphs. These are rendered uncached and un-oversampled. - region_kind = .E - region = nil - if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 { - over_sample = { 2.0, 2.0 } - } - else { - over_sample = { 1.0, 1.0 } - } - return - } - else { - region_kind = .None - return + // Use a lookup table for faster region selection + region_lookup := [4]struct { kind: AtlasRegionKind, region: ^AtlasRegion } { + { .A, & atlas.region_a }, + { .B, & atlas.region_b }, + { .C, & atlas.region_c }, + { .D, & atlas.region_d }, } - over_sample = glyph_buffer.over_sample - assert(region != nil) - return + for region in region_lookup do if bounds_width_scaled <= region.region.width && bounds_height_scaled <= region.region.height { + return region.kind, region.region, glyph_buffer.over_sample + } + + if bounds_width_scaled <= glyph_buffer.width \ + && bounds_height_scaled <= glyph_buffer.height { + over_sample = \ + bounds_width_scaled <= glyph_buffer.width / 2 && + bounds_height_scaled <= glyph_buffer.height / 2 ? \ + {2.0, 2.0} \ + : {1.0, 1.0} + return .E, nil, over_sample + } + return .None, nil, {} } diff --git a/code/font/VEFontCache/draw.odin b/code/font/VEFontCache/draw.odin index 55db0e9..845311e 100644 --- a/code/font/VEFontCache/draw.odin +++ b/code/font/VEFontCache/draw.odin @@ -56,23 +56,23 @@ blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1} // p0.x, p0.y, p1.x, p1.y, uv0.x, uv0.y, uv1.x, uv1.y); v_offset := cast(u32) len(draw_list.vertices) - quadv : [4]Vertex - - quadv[0] = Vertex { - {p0.x, p0.y}, - uv0.x, uv0.y - } - quadv[1] = Vertex { - {p0.x, p1.y}, - uv0.x, uv1.y - } - quadv[2] = Vertex { - {p1.x, p0.y}, - uv1.x, uv0.y - } - quadv[3] = Vertex { - {p1.x, p1.y}, - uv1.x, uv1.y + quadv : [4]Vertex = { + { + {p0.x, p0.y}, + uv0.x, uv0.y + }, + { + {p0.x, p1.y}, + uv0.x, uv1.y + }, + { + {p1.x, p0.y}, + uv1.x, uv0.y + }, + { + {p1.x, p1.y}, + uv1.x, uv1.y + } } append( & draw_list.vertices, ..quadv[:] ) @@ -84,118 +84,81 @@ blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1} return } -cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2 ) -> b32 +cache_glyph :: proc(ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2) -> b32 { // profile(#procedure) if glyph_index == Glyph(0) { - // Note(Original Author): Glyph not in current hb_font return false } - // Retrieve the shape definition from the parser. - shape, error := parser_get_glyph_shape( & entry.parser_info, glyph_index ) - assert( error == .None ) + shape, error := parser_get_glyph_shape(&entry.parser_info, glyph_index) + assert(error == .None) if len(shape) == 0 { return false } - if ctx.debug_print_verbose - { - log( "shape:") - for vertex in shape - { - if vertex.type == .Move { - logf("move_to %d %d", vertex.x, vertex.y ) - } - else if vertex.type == .Line { - logf("line_to %d %d", vertex.x, vertex.y ) - } - else if vertex.type == .Curve { - logf("curve_to %d %d through %d %d", vertex.x, vertex.y, vertex.contour_x0, vertex.contour_y0 ) - } - else if vertex.type == .Cubic { - logf("cubic_to %d %d through %d %d and %d %d", - vertex.x, vertex.y, - vertex.contour_x0, vertex.contour_y0, - vertex.contour_x1, vertex.contour_y1 ) - } - } - } + outside := Vec2{bounds_0.x - 21, bounds_0.y - 33} - /* - Note(Original Author): - We need a random point that is outside our shape. We simply pick something diagonally across from top-left bound corner. - Note that this outside point is scaled alongside the glyph in ve_fontcache_draw_filled_path, so we don't need to handle that here. - */ - outside := Vec2 { - bounds_0.x - 21, - bounds_0.y - 33, - } - - // Note(Original Author): Figure out scaling so it fits within our box. - draw := DrawCall_Default + draw := DrawCall_Default draw.pass = FrameBufferPass.Glyph draw.start_index = u32(len(ctx.draw_list.indices)) - // Note(Original Author); - // Draw the path using simplified version of https://medium.com/@evanwallace/easy-scalable-text-rendering-on-the-gpu-c3f4d782c5ac. - // Instead of involving fragment shader code we simply make use of modern GPU ability to crunch triangles and brute force curve definitions. - path := & ctx.temp_path - clear( path) - for edge in shape do switch edge.type - { + path := &ctx.temp_path + clear(path) + + append_bezier_curve :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2: Vec2, quality: u32) { + step := 1.0 / f32(quality) + for index := u32(1); index <= quality; index += 1 { + alpha := f32(index) * step + append( path, Vertex { pos = eval_point_on_bezier3(p0, p1, p2, alpha) } ) + } + } + + append_bezier_curve_cubic :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2, p3: Vec2, quality: u32) { + step := 1.0 / f32(quality) + for index := u32(1); index <= quality; index += 1 { + alpha := f32(index) * step + append( path, Vertex { pos = eval_point_on_bezier4(p0, p1, p2, p3, alpha) } ) + } + } + + for edge in shape do #partial switch edge.type { case .Move: if len(path) > 0 { - draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose ) + draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose) + clear(path) } - clear( path) fallthrough case .Line: - vertex := Vertex { pos = Vec2{ f32(edge.x), f32(edge.y) } } - append( path, vertex) + append( path, Vertex { pos = Vec2 { f32(edge.x), f32(edge.y)} } ) case .Curve: - assert( len(path) > 0 ) - p0 := path[ len(path) - 1 ].pos + assert(len(path) > 0) + p0 := path[ len(path) - 1].pos p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) } p2 := Vec2{ f32(edge.x), f32(edge.y) } - - step := 1.0 / f32(ctx.curve_quality) - alpha := step - for index := i32(0); index < i32(ctx.curve_quality); index += 1 { - append( path, Vertex { pos = eval_point_on_bezier3( p0, p1, p2, alpha ) }) - alpha += step - } + append_bezier_curve( path, p0, p1, p2, ctx.curve_quality ) case .Cubic: - assert( len(path) > 0 ) + assert( len(path) > 0) p0 := path[ len(path) - 1].pos p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) } p2 := Vec2{ f32(edge.contour_x1), f32(edge.contour_y1) } p3 := Vec2{ f32(edge.x), f32(edge.y) } - - step := 1.0 / f32(ctx.curve_quality) - alpha := step - for index := i32(0); index < i32(ctx.curve_quality); index += 1 { - append( path, Vertex { pos = eval_point_on_bezier4( p0, p1, p2, p3, alpha ) }) - alpha += step - } - - case .None: - assert(false, "Unknown edge type or invalid") + append_bezier_curve_cubic( path, p0, p1, p2, p3, ctx.curve_quality ) } + if len(path) > 0 { - draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose ) + draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose) } - // Note(Original Author): Apend the draw call - draw.end_index = cast(u32) len(ctx.draw_list.indices) + draw.end_index = u32(len(ctx.draw_list.indices)) if draw.end_index > draw.start_index { - append( & ctx.draw_list.calls, draw) + append(&ctx.draw_list.calls, draw) } - parser_free_shape( & entry.parser_info, shape ) + parser_free_shape(&entry.parser_info, shape) return true } @@ -698,6 +661,34 @@ flush_glyph_buffer_to_atlas :: proc( ctx : ^Context ) } } +// flush_glyph_buffer_to_atlas :: proc( ctx : ^Context ) +// { +// // profile(#procedure) +// // Flush drawcalls to draw list +// if len(ctx.glyph_buffer.clear_draw_list.calls) > 0 { +// merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.clear_draw_list) +// clear_draw_list( & ctx.glyph_buffer.clear_draw_list) +// } + +// if len(ctx.glyph_buffer.draw_list.calls) > 0 { +// merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.draw_list) +// clear_draw_list( & ctx.glyph_buffer.draw_list) +// } + +// // Clear glyph_update_FBO +// if ctx.glyph_buffer.batch_x != 0 +// { +// call := DrawCall { +// pass = .Glyph, +// start_index = 0, +// end_index = 0, +// clear_before_draw = true, +// } +// append( & ctx.draw_list.calls, call) +// ctx.glyph_buffer.batch_x = 0 +// } +// } + // ve_fontcache_merge_drawlist merge_draw_list :: proc( dst, src : ^DrawList ) { diff --git a/code/font/VEFontCache/misc.odin b/code/font/VEFontCache/misc.odin index 3ca9c12..7d9f863 100644 --- a/code/font/VEFontCache/misc.odin +++ b/code/font/VEFontCache/misc.odin @@ -1,6 +1,9 @@ package VEFontCache import "base:runtime" +import "core:simd" +import "core:math" + // import core_log "core:log" Colour :: [4]f32 @@ -50,54 +53,6 @@ font_glyph_lru_code :: #force_inline proc "contextless" ( font : FontID, glyph_i return } - -// For a provided alpha value, -// allows the function to calculate the position of a point along the curve at any given fraction of its total length -// ve_fontcache_eval_bezier (quadratic) -eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2 -{ - p0 := vec2_64(p0) - p1 := vec2_64(p1) - p2 := vec2_64(p2) - alpha := f64(alpha) - - weight_start := (1 - alpha) * (1 - alpha) - weight_control := 2.0 * (1 - alpha) * alpha - weight_end := alpha * alpha - - starting_point := p0 * weight_start - control_point := p1 * weight_control - end_point := p2 * weight_end - - point := starting_point + control_point + end_point - return { f32(point.x), f32(point.y) } -} - -// For a provided alpha value, -// allows the function to calculate the position of a point along the curve at any given fraction of its total length -// ve_fontcache_eval_bezier (cubic) -eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2 -{ - p0 := vec2_64(p0) - p1 := vec2_64(p1) - p2 := vec2_64(p2) - p3 := vec2_64(p3) - alpha := f64(alpha) - - weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha) - weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha - weight_c_b := 3 * (1 - alpha) * alpha * alpha - weight_end := alpha * alpha * alpha - - start_point := p0 * weight_start - control_a := p1 * weight_c_a - control_b := p2 * weight_c_b - end_point := p3 * weight_end - - point := start_point + control_a + control_b + end_point - return { f32(point.x), f32(point.y) } -} - is_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32 { if glyph_index == 0 do return true @@ -115,7 +70,8 @@ reset_batch_codepoint_state :: #force_inline proc( ctx : ^Context ) { ctx.temp_codepoint_seen_num = 0 } -screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) { +screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) +{ if true { pos_64 := vec2_64_from_vec2(position^) @@ -142,7 +98,8 @@ screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2 } } -textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) { +textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) +{ if true { pos_64 := vec2_64_from_vec2(position^) @@ -162,3 +119,170 @@ textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, (scale^) *= quotient } } + +Use_SIMD_For_Bezier_Ops :: true + +when ! Use_SIMD_For_Bezier_Ops +{ + // For a provided alpha value, + // allows the function to calculate the position of a point along the curve at any given fraction of its total length + // ve_fontcache_eval_bezier (quadratic) + eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2 + { + p0 := vec2_64(p0) + p1 := vec2_64(p1) + p2 := vec2_64(p2) + alpha := f64(alpha) + + weight_start := (1 - alpha) * (1 - alpha) + weight_control := 2.0 * (1 - alpha) * alpha + weight_end := alpha * alpha + + starting_point := p0 * weight_start + control_point := p1 * weight_control + end_point := p2 * weight_end + + point := starting_point + control_point + end_point + return { f32(point.x), f32(point.y) } + } + + // For a provided alpha value, + // allows the function to calculate the position of a point along the curve at any given fraction of its total length + // ve_fontcache_eval_bezier (cubic) + eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2 + { + p0 := vec2_64(p0) + p1 := vec2_64(p1) + p2 := vec2_64(p2) + p3 := vec2_64(p3) + alpha := f64(alpha) + + weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha) + weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha + weight_c_b := 3 * (1 - alpha) * alpha * alpha + weight_end := alpha * alpha * alpha + + start_point := p0 * weight_start + control_a := p1 * weight_c_a + control_b := p2 * weight_c_b + end_point := p3 * weight_end + + point := start_point + control_a + control_b + end_point + return { f32(point.x), f32(point.y) } + } +} +else +{ + Vec2_SIMD :: simd.f32x4 + + vec2_to_simd :: #force_inline proc "contextless" (v: Vec2) -> Vec2_SIMD { + return Vec2_SIMD{v.x, v.y, 0, 0} + } + + simd_to_vec2 :: #force_inline proc "contextless" (v: Vec2_SIMD) -> Vec2 { + return Vec2{simd.extract(v, 0), simd.extract(v, 1)} + } + + vec2_add_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_b := vec2_to_simd(b) + result := simd.add(simd_a, simd_b) + return simd_to_vec2(result) + } + + vec2_sub_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_b := vec2_to_simd(b) + result := simd.sub(simd_a, simd_b) + return simd_to_vec2(result) + } + + vec2_mul_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_s := Vec2_SIMD{s, s, s, s} + result := simd.mul(simd_a, simd_s) + return simd_to_vec2(result) + } + + vec2_div_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 { + simd_a := vec2_to_simd(a) + simd_s := Vec2_SIMD{s, s, s, s} + result := simd.div(simd_a, simd_s) + return simd_to_vec2(result) + } + + vec2_dot_simd :: #force_inline proc "contextless" (a, b: Vec2) -> f32 { + simd_a := vec2_to_simd(a) + simd_b := vec2_to_simd(b) + result := simd.mul(simd_a, simd_b) + return simd.reduce_add_ordered(result) + } + + vec2_length_sqr_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 { + return vec2_dot_simd(a, a) + } + + vec2_length_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 { + return math.sqrt(vec2_length_sqr_simd(a)) + } + + vec2_normalize_simd :: #force_inline proc "contextless" (a: Vec2) -> Vec2 { + len := vec2_length_simd(a) + if len > 0 { + inv_len := 1.0 / len + return vec2_mul_simd(a, inv_len) + } + return a + } + + // SIMD-optimized version of eval_point_on_bezier3 + eval_point_on_bezier3 :: #force_inline proc "contextless" (p0, p1, p2: Vec2, alpha: f32) -> Vec2 + { + simd_p0 := vec2_to_simd(p0) + simd_p1 := vec2_to_simd(p1) + simd_p2 := vec2_to_simd(p2) + + one_minus_alpha := 1.0 - alpha + weight_start := one_minus_alpha * one_minus_alpha + weight_control := 2.0 * one_minus_alpha * alpha + weight_end := alpha * alpha + + simd_weights := Vec2_SIMD{weight_start, weight_control, weight_end, 0} + result := simd.add( + simd.add( + simd.mul( simd_p0, simd.swizzle( simd_weights, 0, 0, 0, 0) ), + simd.mul( simd_p1, simd.swizzle( simd_weights, 1, 1, 1, 1) ) + ), + simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) ) + ) + + return simd_to_vec2(result) + } + + eval_point_on_bezier4 :: #force_inline proc "contextless" (p0, p1, p2, p3: Vec2, alpha: f32) -> Vec2 + { + simd_p0 := vec2_to_simd(p0) + simd_p1 := vec2_to_simd(p1) + simd_p2 := vec2_to_simd(p2) + simd_p3 := vec2_to_simd(p3) + + one_minus_alpha := 1.0 - alpha + weight_start := one_minus_alpha * one_minus_alpha * one_minus_alpha + weight_c_a := 3 * one_minus_alpha * one_minus_alpha * alpha + weight_c_b := 3 * one_minus_alpha * alpha * alpha + weight_end := alpha * alpha * alpha + + simd_weights := Vec2_SIMD { weight_start, weight_c_a, weight_c_b, weight_end } + result := simd.add( + simd.add( + simd.mul( simd_p0, simd.swizzle(simd_weights, 0, 0, 0, 0) ), + simd.mul( simd_p1, simd.swizzle(simd_weights, 1, 1, 1, 1) ) + ), + simd.add( + simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) ), + simd.mul( simd_p3, simd.swizzle(simd_weights, 3, 3, 3, 3) ) + ) + ) + return simd_to_vec2(result) + } +} diff --git a/code/font/VEFontCache/shaped_text.odin b/code/font/VEFontCache/shaped_text.odin index 9e7808c..1ac55c2 100644 --- a/code/font/VEFontCache/shaped_text.odin +++ b/code/font/VEFontCache/shaped_text.odin @@ -40,7 +40,6 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, en shape_cache_idx = shape_cache.next_cache_id shape_cache.next_cache_id += 1 evicted := LRU_put( state, lru_code, shape_cache_idx ) - assert( evicted == lru_code ) } else { @@ -54,7 +53,6 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, en } shape_entry := & shape_cache.storage[ shape_cache_idx ] - // shape_entry.storage_hash = lru_code shape_text_uncached( ctx, font, text_utf8, entry, shape_entry ) }