Added manual simd but odins already doing it perfectly (+ ohter attempted optimizations)
This commit is contained in:
parent
b8665d0bc2
commit
a28303bad6
@ -86,68 +86,110 @@ atlas_bbox :: proc( atlas : ^Atlas, region : AtlasRegionKind, local_idx : i32 )
|
||||
return
|
||||
}
|
||||
|
||||
decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph
|
||||
// decide_codepoint_region :: proc( ctx : ^Context, entry : ^Entry, glyph_index : Glyph
|
||||
// ) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2)
|
||||
// {
|
||||
// if parser_is_glyph_empty( & entry.parser_info, glyph_index ) {
|
||||
// region_kind = .None
|
||||
// }
|
||||
|
||||
// bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index )
|
||||
// bounds_width := f32(bounds_1.x - bounds_0.x)
|
||||
// bounds_height := f32(bounds_1.y - bounds_0.y)
|
||||
|
||||
// atlas := & ctx.atlas
|
||||
// glyph_buffer := & ctx.glyph_buffer
|
||||
|
||||
// glyph_padding := f32(atlas.glyph_padding) * 2
|
||||
|
||||
// bounds_width_scaled := cast(u32) (bounds_width * entry.size_scale + glyph_padding)
|
||||
// bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding)
|
||||
|
||||
// if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height
|
||||
// {
|
||||
// // Region A for small glyphs. These are good for things such as punctuation.
|
||||
// region_kind = .A
|
||||
// region = & atlas.region_a
|
||||
// }
|
||||
// else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height
|
||||
// {
|
||||
// // Region B for tall glyphs. These are good for things such as european alphabets.
|
||||
// region_kind = .B
|
||||
// region = & atlas.region_b
|
||||
// }
|
||||
// else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height
|
||||
// {
|
||||
// // Region C for big glyphs. These are good for things such as asian typography.
|
||||
// region_kind = .C
|
||||
// region = & atlas.region_c
|
||||
// }
|
||||
// else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height
|
||||
// {
|
||||
// // Region D for huge glyphs. These are good for things such as titles and 4k.
|
||||
// region_kind = .D
|
||||
// region = & atlas.region_d
|
||||
// }
|
||||
// else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height
|
||||
// {
|
||||
// // Region 'E' for massive glyphs. These are rendered uncached and un-oversampled.
|
||||
// region_kind = .E
|
||||
// region = nil
|
||||
// if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 {
|
||||
// over_sample = { 2.0, 2.0 }
|
||||
// }
|
||||
// else {
|
||||
// over_sample = { 1.0, 1.0 }
|
||||
// }
|
||||
// return
|
||||
// }
|
||||
// else {
|
||||
// region_kind = .None
|
||||
// return
|
||||
// }
|
||||
|
||||
// over_sample = glyph_buffer.over_sample
|
||||
// assert(region != nil)
|
||||
// return
|
||||
// }
|
||||
|
||||
decide_codepoint_region :: proc(ctx : ^Context, entry : ^Entry, glyph_index : Glyph
|
||||
) -> (region_kind : AtlasRegionKind, region : ^AtlasRegion, over_sample : Vec2)
|
||||
{
|
||||
if parser_is_glyph_empty( & entry.parser_info, glyph_index ) {
|
||||
region_kind = .None
|
||||
if parser_is_glyph_empty(&entry.parser_info, glyph_index) {
|
||||
return .None, nil, {}
|
||||
}
|
||||
|
||||
bounds_0, bounds_1 := parser_get_glyph_box( & entry.parser_info, glyph_index )
|
||||
bounds_width := f32(bounds_1.x - bounds_0.x)
|
||||
bounds_height := f32(bounds_1.y - bounds_0.y)
|
||||
bounds_0, bounds_1 := parser_get_glyph_box(&entry.parser_info, glyph_index)
|
||||
bounds_width := f32(bounds_1.x - bounds_0.x)
|
||||
bounds_height := f32(bounds_1.y - bounds_0.y)
|
||||
|
||||
atlas := & ctx.atlas
|
||||
glyph_buffer := & ctx.glyph_buffer
|
||||
atlas := & ctx.atlas
|
||||
glyph_buffer := & ctx.glyph_buffer
|
||||
glyph_padding := f32( atlas.glyph_padding ) * 2
|
||||
|
||||
glyph_padding := f32(atlas.glyph_padding) * 2
|
||||
bounds_width_scaled := u32(bounds_width * entry.size_scale + glyph_padding)
|
||||
bounds_height_scaled := u32(bounds_height * entry.size_scale + glyph_padding)
|
||||
|
||||
bounds_width_scaled := cast(u32) (bounds_width * entry.size_scale + glyph_padding)
|
||||
bounds_height_scaled := cast(u32) (bounds_height * entry.size_scale + glyph_padding)
|
||||
|
||||
if bounds_width_scaled <= atlas.region_a.width && bounds_height_scaled <= atlas.region_a.height
|
||||
{
|
||||
// Region A for small glyphs. These are good for things such as punctuation.
|
||||
region_kind = .A
|
||||
region = & atlas.region_a
|
||||
}
|
||||
else if bounds_width_scaled <= atlas.region_b.width && bounds_height_scaled <= atlas.region_b.height
|
||||
{
|
||||
// Region B for tall glyphs. These are good for things such as european alphabets.
|
||||
region_kind = .B
|
||||
region = & atlas.region_b
|
||||
}
|
||||
else if bounds_width_scaled <= atlas.region_c.width && bounds_height_scaled <= atlas.region_c.height
|
||||
{
|
||||
// Region C for big glyphs. These are good for things such as asian typography.
|
||||
region_kind = .C
|
||||
region = & atlas.region_c
|
||||
}
|
||||
else if bounds_width_scaled <= atlas.region_d.width && bounds_height_scaled <= atlas.region_d.height
|
||||
{
|
||||
// Region D for huge glyphs. These are good for things such as titles and 4k.
|
||||
region_kind = .D
|
||||
region = & atlas.region_d
|
||||
}
|
||||
else if bounds_width_scaled <= glyph_buffer.width && bounds_height_scaled <= glyph_buffer.height
|
||||
{
|
||||
// Region 'E' for massive glyphs. These are rendered uncached and un-oversampled.
|
||||
region_kind = .E
|
||||
region = nil
|
||||
if bounds_width_scaled <= glyph_buffer.width / 2 && bounds_height_scaled <= glyph_buffer.height / 2 {
|
||||
over_sample = { 2.0, 2.0 }
|
||||
}
|
||||
else {
|
||||
over_sample = { 1.0, 1.0 }
|
||||
}
|
||||
return
|
||||
}
|
||||
else {
|
||||
region_kind = .None
|
||||
return
|
||||
// Use a lookup table for faster region selection
|
||||
region_lookup := [4]struct { kind: AtlasRegionKind, region: ^AtlasRegion } {
|
||||
{ .A, & atlas.region_a },
|
||||
{ .B, & atlas.region_b },
|
||||
{ .C, & atlas.region_c },
|
||||
{ .D, & atlas.region_d },
|
||||
}
|
||||
|
||||
over_sample = glyph_buffer.over_sample
|
||||
assert(region != nil)
|
||||
return
|
||||
for region in region_lookup do if bounds_width_scaled <= region.region.width && bounds_height_scaled <= region.region.height {
|
||||
return region.kind, region.region, glyph_buffer.over_sample
|
||||
}
|
||||
|
||||
if bounds_width_scaled <= glyph_buffer.width \
|
||||
&& bounds_height_scaled <= glyph_buffer.height {
|
||||
over_sample = \
|
||||
bounds_width_scaled <= glyph_buffer.width / 2 &&
|
||||
bounds_height_scaled <= glyph_buffer.height / 2 ? \
|
||||
{2.0, 2.0} \
|
||||
: {1.0, 1.0}
|
||||
return .E, nil, over_sample
|
||||
}
|
||||
return .None, nil, {}
|
||||
}
|
||||
|
@ -56,23 +56,23 @@ blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1}
|
||||
// p0.x, p0.y, p1.x, p1.y, uv0.x, uv0.y, uv1.x, uv1.y);
|
||||
v_offset := cast(u32) len(draw_list.vertices)
|
||||
|
||||
quadv : [4]Vertex
|
||||
|
||||
quadv[0] = Vertex {
|
||||
{p0.x, p0.y},
|
||||
uv0.x, uv0.y
|
||||
}
|
||||
quadv[1] = Vertex {
|
||||
{p0.x, p1.y},
|
||||
uv0.x, uv1.y
|
||||
}
|
||||
quadv[2] = Vertex {
|
||||
{p1.x, p0.y},
|
||||
uv1.x, uv0.y
|
||||
}
|
||||
quadv[3] = Vertex {
|
||||
{p1.x, p1.y},
|
||||
uv1.x, uv1.y
|
||||
quadv : [4]Vertex = {
|
||||
{
|
||||
{p0.x, p0.y},
|
||||
uv0.x, uv0.y
|
||||
},
|
||||
{
|
||||
{p0.x, p1.y},
|
||||
uv0.x, uv1.y
|
||||
},
|
||||
{
|
||||
{p1.x, p0.y},
|
||||
uv1.x, uv0.y
|
||||
},
|
||||
{
|
||||
{p1.x, p1.y},
|
||||
uv1.x, uv1.y
|
||||
}
|
||||
}
|
||||
append( & draw_list.vertices, ..quadv[:] )
|
||||
|
||||
@ -84,118 +84,81 @@ blit_quad :: proc( draw_list : ^DrawList, p0 : Vec2 = {0, 0}, p1 : Vec2 = {1, 1}
|
||||
return
|
||||
}
|
||||
|
||||
cache_glyph :: proc( ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2 ) -> b32
|
||||
cache_glyph :: proc(ctx : ^Context, font : FontID, glyph_index : Glyph, entry : ^Entry, bounds_0, bounds_1 : Vec2, scale, translate : Vec2) -> b32
|
||||
{
|
||||
// profile(#procedure)
|
||||
if glyph_index == Glyph(0) {
|
||||
// Note(Original Author): Glyph not in current hb_font
|
||||
return false
|
||||
}
|
||||
|
||||
// Retrieve the shape definition from the parser.
|
||||
shape, error := parser_get_glyph_shape( & entry.parser_info, glyph_index )
|
||||
assert( error == .None )
|
||||
shape, error := parser_get_glyph_shape(&entry.parser_info, glyph_index)
|
||||
assert(error == .None)
|
||||
if len(shape) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if ctx.debug_print_verbose
|
||||
{
|
||||
log( "shape:")
|
||||
for vertex in shape
|
||||
{
|
||||
if vertex.type == .Move {
|
||||
logf("move_to %d %d", vertex.x, vertex.y )
|
||||
}
|
||||
else if vertex.type == .Line {
|
||||
logf("line_to %d %d", vertex.x, vertex.y )
|
||||
}
|
||||
else if vertex.type == .Curve {
|
||||
logf("curve_to %d %d through %d %d", vertex.x, vertex.y, vertex.contour_x0, vertex.contour_y0 )
|
||||
}
|
||||
else if vertex.type == .Cubic {
|
||||
logf("cubic_to %d %d through %d %d and %d %d",
|
||||
vertex.x, vertex.y,
|
||||
vertex.contour_x0, vertex.contour_y0,
|
||||
vertex.contour_x1, vertex.contour_y1 )
|
||||
}
|
||||
}
|
||||
}
|
||||
outside := Vec2{bounds_0.x - 21, bounds_0.y - 33}
|
||||
|
||||
/*
|
||||
Note(Original Author):
|
||||
We need a random point that is outside our shape. We simply pick something diagonally across from top-left bound corner.
|
||||
Note that this outside point is scaled alongside the glyph in ve_fontcache_draw_filled_path, so we don't need to handle that here.
|
||||
*/
|
||||
outside := Vec2 {
|
||||
bounds_0.x - 21,
|
||||
bounds_0.y - 33,
|
||||
}
|
||||
|
||||
// Note(Original Author): Figure out scaling so it fits within our box.
|
||||
draw := DrawCall_Default
|
||||
draw := DrawCall_Default
|
||||
draw.pass = FrameBufferPass.Glyph
|
||||
draw.start_index = u32(len(ctx.draw_list.indices))
|
||||
|
||||
// Note(Original Author);
|
||||
// Draw the path using simplified version of https://medium.com/@evanwallace/easy-scalable-text-rendering-on-the-gpu-c3f4d782c5ac.
|
||||
// Instead of involving fragment shader code we simply make use of modern GPU ability to crunch triangles and brute force curve definitions.
|
||||
path := & ctx.temp_path
|
||||
clear( path)
|
||||
for edge in shape do switch edge.type
|
||||
{
|
||||
path := &ctx.temp_path
|
||||
clear(path)
|
||||
|
||||
append_bezier_curve :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2: Vec2, quality: u32) {
|
||||
step := 1.0 / f32(quality)
|
||||
for index := u32(1); index <= quality; index += 1 {
|
||||
alpha := f32(index) * step
|
||||
append( path, Vertex { pos = eval_point_on_bezier3(p0, p1, p2, alpha) } )
|
||||
}
|
||||
}
|
||||
|
||||
append_bezier_curve_cubic :: #force_inline proc(path: ^[dynamic]Vertex, p0, p1, p2, p3: Vec2, quality: u32) {
|
||||
step := 1.0 / f32(quality)
|
||||
for index := u32(1); index <= quality; index += 1 {
|
||||
alpha := f32(index) * step
|
||||
append( path, Vertex { pos = eval_point_on_bezier4(p0, p1, p2, p3, alpha) } )
|
||||
}
|
||||
}
|
||||
|
||||
for edge in shape do #partial switch edge.type {
|
||||
case .Move:
|
||||
if len(path) > 0 {
|
||||
draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose )
|
||||
draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose)
|
||||
clear(path)
|
||||
}
|
||||
clear( path)
|
||||
fallthrough
|
||||
|
||||
case .Line:
|
||||
vertex := Vertex { pos = Vec2{ f32(edge.x), f32(edge.y) } }
|
||||
append( path, vertex)
|
||||
append( path, Vertex { pos = Vec2 { f32(edge.x), f32(edge.y)} } )
|
||||
|
||||
case .Curve:
|
||||
assert( len(path) > 0 )
|
||||
p0 := path[ len(path) - 1 ].pos
|
||||
assert(len(path) > 0)
|
||||
p0 := path[ len(path) - 1].pos
|
||||
p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) }
|
||||
p2 := Vec2{ f32(edge.x), f32(edge.y) }
|
||||
|
||||
step := 1.0 / f32(ctx.curve_quality)
|
||||
alpha := step
|
||||
for index := i32(0); index < i32(ctx.curve_quality); index += 1 {
|
||||
append( path, Vertex { pos = eval_point_on_bezier3( p0, p1, p2, alpha ) })
|
||||
alpha += step
|
||||
}
|
||||
append_bezier_curve( path, p0, p1, p2, ctx.curve_quality )
|
||||
|
||||
case .Cubic:
|
||||
assert( len(path) > 0 )
|
||||
assert( len(path) > 0)
|
||||
p0 := path[ len(path) - 1].pos
|
||||
p1 := Vec2{ f32(edge.contour_x0), f32(edge.contour_y0) }
|
||||
p2 := Vec2{ f32(edge.contour_x1), f32(edge.contour_y1) }
|
||||
p3 := Vec2{ f32(edge.x), f32(edge.y) }
|
||||
|
||||
step := 1.0 / f32(ctx.curve_quality)
|
||||
alpha := step
|
||||
for index := i32(0); index < i32(ctx.curve_quality); index += 1 {
|
||||
append( path, Vertex { pos = eval_point_on_bezier4( p0, p1, p2, p3, alpha ) })
|
||||
alpha += step
|
||||
}
|
||||
|
||||
case .None:
|
||||
assert(false, "Unknown edge type or invalid")
|
||||
append_bezier_curve_cubic( path, p0, p1, p2, p3, ctx.curve_quality )
|
||||
}
|
||||
|
||||
if len(path) > 0 {
|
||||
draw_filled_path( & ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose )
|
||||
draw_filled_path(&ctx.draw_list, outside, path[:], scale, translate, ctx.debug_print_verbose)
|
||||
}
|
||||
|
||||
// Note(Original Author): Apend the draw call
|
||||
draw.end_index = cast(u32) len(ctx.draw_list.indices)
|
||||
draw.end_index = u32(len(ctx.draw_list.indices))
|
||||
if draw.end_index > draw.start_index {
|
||||
append( & ctx.draw_list.calls, draw)
|
||||
append(&ctx.draw_list.calls, draw)
|
||||
}
|
||||
|
||||
parser_free_shape( & entry.parser_info, shape )
|
||||
parser_free_shape(&entry.parser_info, shape)
|
||||
return true
|
||||
}
|
||||
|
||||
@ -698,6 +661,34 @@ flush_glyph_buffer_to_atlas :: proc( ctx : ^Context )
|
||||
}
|
||||
}
|
||||
|
||||
// flush_glyph_buffer_to_atlas :: proc( ctx : ^Context )
|
||||
// {
|
||||
// // profile(#procedure)
|
||||
// // Flush drawcalls to draw list
|
||||
// if len(ctx.glyph_buffer.clear_draw_list.calls) > 0 {
|
||||
// merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.clear_draw_list)
|
||||
// clear_draw_list( & ctx.glyph_buffer.clear_draw_list)
|
||||
// }
|
||||
|
||||
// if len(ctx.glyph_buffer.draw_list.calls) > 0 {
|
||||
// merge_draw_list( & ctx.draw_list, & ctx.glyph_buffer.draw_list)
|
||||
// clear_draw_list( & ctx.glyph_buffer.draw_list)
|
||||
// }
|
||||
|
||||
// // Clear glyph_update_FBO
|
||||
// if ctx.glyph_buffer.batch_x != 0
|
||||
// {
|
||||
// call := DrawCall {
|
||||
// pass = .Glyph,
|
||||
// start_index = 0,
|
||||
// end_index = 0,
|
||||
// clear_before_draw = true,
|
||||
// }
|
||||
// append( & ctx.draw_list.calls, call)
|
||||
// ctx.glyph_buffer.batch_x = 0
|
||||
// }
|
||||
// }
|
||||
|
||||
// ve_fontcache_merge_drawlist
|
||||
merge_draw_list :: proc( dst, src : ^DrawList )
|
||||
{
|
||||
|
@ -1,6 +1,9 @@
|
||||
package VEFontCache
|
||||
|
||||
import "base:runtime"
|
||||
import "core:simd"
|
||||
import "core:math"
|
||||
|
||||
// import core_log "core:log"
|
||||
|
||||
Colour :: [4]f32
|
||||
@ -50,54 +53,6 @@ font_glyph_lru_code :: #force_inline proc "contextless" ( font : FontID, glyph_i
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// For a provided alpha value,
|
||||
// allows the function to calculate the position of a point along the curve at any given fraction of its total length
|
||||
// ve_fontcache_eval_bezier (quadratic)
|
||||
eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2
|
||||
{
|
||||
p0 := vec2_64(p0)
|
||||
p1 := vec2_64(p1)
|
||||
p2 := vec2_64(p2)
|
||||
alpha := f64(alpha)
|
||||
|
||||
weight_start := (1 - alpha) * (1 - alpha)
|
||||
weight_control := 2.0 * (1 - alpha) * alpha
|
||||
weight_end := alpha * alpha
|
||||
|
||||
starting_point := p0 * weight_start
|
||||
control_point := p1 * weight_control
|
||||
end_point := p2 * weight_end
|
||||
|
||||
point := starting_point + control_point + end_point
|
||||
return { f32(point.x), f32(point.y) }
|
||||
}
|
||||
|
||||
// For a provided alpha value,
|
||||
// allows the function to calculate the position of a point along the curve at any given fraction of its total length
|
||||
// ve_fontcache_eval_bezier (cubic)
|
||||
eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2
|
||||
{
|
||||
p0 := vec2_64(p0)
|
||||
p1 := vec2_64(p1)
|
||||
p2 := vec2_64(p2)
|
||||
p3 := vec2_64(p3)
|
||||
alpha := f64(alpha)
|
||||
|
||||
weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha)
|
||||
weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha
|
||||
weight_c_b := 3 * (1 - alpha) * alpha * alpha
|
||||
weight_end := alpha * alpha * alpha
|
||||
|
||||
start_point := p0 * weight_start
|
||||
control_a := p1 * weight_c_a
|
||||
control_b := p2 * weight_c_b
|
||||
end_point := p3 * weight_end
|
||||
|
||||
point := start_point + control_a + control_b + end_point
|
||||
return { f32(point.x), f32(point.y) }
|
||||
}
|
||||
|
||||
is_empty :: #force_inline proc ( ctx : ^Context, entry : ^Entry, glyph_index : Glyph ) -> b32
|
||||
{
|
||||
if glyph_index == 0 do return true
|
||||
@ -115,7 +70,8 @@ reset_batch_codepoint_state :: #force_inline proc( ctx : ^Context ) {
|
||||
ctx.temp_codepoint_seen_num = 0
|
||||
}
|
||||
|
||||
screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) {
|
||||
screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 )
|
||||
{
|
||||
if true
|
||||
{
|
||||
pos_64 := vec2_64_from_vec2(position^)
|
||||
@ -142,7 +98,8 @@ screenspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2
|
||||
}
|
||||
}
|
||||
|
||||
textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 ) {
|
||||
textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2, size : Vec2 )
|
||||
{
|
||||
if true
|
||||
{
|
||||
pos_64 := vec2_64_from_vec2(position^)
|
||||
@ -162,3 +119,170 @@ textspace_x_form :: #force_inline proc "contextless" ( position, scale : ^Vec2,
|
||||
(scale^) *= quotient
|
||||
}
|
||||
}
|
||||
|
||||
Use_SIMD_For_Bezier_Ops :: true
|
||||
|
||||
when ! Use_SIMD_For_Bezier_Ops
|
||||
{
|
||||
// For a provided alpha value,
|
||||
// allows the function to calculate the position of a point along the curve at any given fraction of its total length
|
||||
// ve_fontcache_eval_bezier (quadratic)
|
||||
eval_point_on_bezier3 :: #force_inline proc "contextless" ( p0, p1, p2 : Vec2, alpha : f32 ) -> Vec2
|
||||
{
|
||||
p0 := vec2_64(p0)
|
||||
p1 := vec2_64(p1)
|
||||
p2 := vec2_64(p2)
|
||||
alpha := f64(alpha)
|
||||
|
||||
weight_start := (1 - alpha) * (1 - alpha)
|
||||
weight_control := 2.0 * (1 - alpha) * alpha
|
||||
weight_end := alpha * alpha
|
||||
|
||||
starting_point := p0 * weight_start
|
||||
control_point := p1 * weight_control
|
||||
end_point := p2 * weight_end
|
||||
|
||||
point := starting_point + control_point + end_point
|
||||
return { f32(point.x), f32(point.y) }
|
||||
}
|
||||
|
||||
// For a provided alpha value,
|
||||
// allows the function to calculate the position of a point along the curve at any given fraction of its total length
|
||||
// ve_fontcache_eval_bezier (cubic)
|
||||
eval_point_on_bezier4 :: #force_inline proc "contextless" ( p0, p1, p2, p3 : Vec2, alpha : f32 ) -> Vec2
|
||||
{
|
||||
p0 := vec2_64(p0)
|
||||
p1 := vec2_64(p1)
|
||||
p2 := vec2_64(p2)
|
||||
p3 := vec2_64(p3)
|
||||
alpha := f64(alpha)
|
||||
|
||||
weight_start := (1 - alpha) * (1 - alpha) * (1 - alpha)
|
||||
weight_c_a := 3 * (1 - alpha) * (1 - alpha) * alpha
|
||||
weight_c_b := 3 * (1 - alpha) * alpha * alpha
|
||||
weight_end := alpha * alpha * alpha
|
||||
|
||||
start_point := p0 * weight_start
|
||||
control_a := p1 * weight_c_a
|
||||
control_b := p2 * weight_c_b
|
||||
end_point := p3 * weight_end
|
||||
|
||||
point := start_point + control_a + control_b + end_point
|
||||
return { f32(point.x), f32(point.y) }
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Vec2_SIMD :: simd.f32x4
|
||||
|
||||
vec2_to_simd :: #force_inline proc "contextless" (v: Vec2) -> Vec2_SIMD {
|
||||
return Vec2_SIMD{v.x, v.y, 0, 0}
|
||||
}
|
||||
|
||||
simd_to_vec2 :: #force_inline proc "contextless" (v: Vec2_SIMD) -> Vec2 {
|
||||
return Vec2{simd.extract(v, 0), simd.extract(v, 1)}
|
||||
}
|
||||
|
||||
vec2_add_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 {
|
||||
simd_a := vec2_to_simd(a)
|
||||
simd_b := vec2_to_simd(b)
|
||||
result := simd.add(simd_a, simd_b)
|
||||
return simd_to_vec2(result)
|
||||
}
|
||||
|
||||
vec2_sub_simd :: #force_inline proc "contextless" (a, b: Vec2) -> Vec2 {
|
||||
simd_a := vec2_to_simd(a)
|
||||
simd_b := vec2_to_simd(b)
|
||||
result := simd.sub(simd_a, simd_b)
|
||||
return simd_to_vec2(result)
|
||||
}
|
||||
|
||||
vec2_mul_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 {
|
||||
simd_a := vec2_to_simd(a)
|
||||
simd_s := Vec2_SIMD{s, s, s, s}
|
||||
result := simd.mul(simd_a, simd_s)
|
||||
return simd_to_vec2(result)
|
||||
}
|
||||
|
||||
vec2_div_simd :: #force_inline proc "contextless" (a: Vec2, s: f32) -> Vec2 {
|
||||
simd_a := vec2_to_simd(a)
|
||||
simd_s := Vec2_SIMD{s, s, s, s}
|
||||
result := simd.div(simd_a, simd_s)
|
||||
return simd_to_vec2(result)
|
||||
}
|
||||
|
||||
vec2_dot_simd :: #force_inline proc "contextless" (a, b: Vec2) -> f32 {
|
||||
simd_a := vec2_to_simd(a)
|
||||
simd_b := vec2_to_simd(b)
|
||||
result := simd.mul(simd_a, simd_b)
|
||||
return simd.reduce_add_ordered(result)
|
||||
}
|
||||
|
||||
vec2_length_sqr_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 {
|
||||
return vec2_dot_simd(a, a)
|
||||
}
|
||||
|
||||
vec2_length_simd :: #force_inline proc "contextless" (a: Vec2) -> f32 {
|
||||
return math.sqrt(vec2_length_sqr_simd(a))
|
||||
}
|
||||
|
||||
vec2_normalize_simd :: #force_inline proc "contextless" (a: Vec2) -> Vec2 {
|
||||
len := vec2_length_simd(a)
|
||||
if len > 0 {
|
||||
inv_len := 1.0 / len
|
||||
return vec2_mul_simd(a, inv_len)
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// SIMD-optimized version of eval_point_on_bezier3
|
||||
eval_point_on_bezier3 :: #force_inline proc "contextless" (p0, p1, p2: Vec2, alpha: f32) -> Vec2
|
||||
{
|
||||
simd_p0 := vec2_to_simd(p0)
|
||||
simd_p1 := vec2_to_simd(p1)
|
||||
simd_p2 := vec2_to_simd(p2)
|
||||
|
||||
one_minus_alpha := 1.0 - alpha
|
||||
weight_start := one_minus_alpha * one_minus_alpha
|
||||
weight_control := 2.0 * one_minus_alpha * alpha
|
||||
weight_end := alpha * alpha
|
||||
|
||||
simd_weights := Vec2_SIMD{weight_start, weight_control, weight_end, 0}
|
||||
result := simd.add(
|
||||
simd.add(
|
||||
simd.mul( simd_p0, simd.swizzle( simd_weights, 0, 0, 0, 0) ),
|
||||
simd.mul( simd_p1, simd.swizzle( simd_weights, 1, 1, 1, 1) )
|
||||
),
|
||||
simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) )
|
||||
)
|
||||
|
||||
return simd_to_vec2(result)
|
||||
}
|
||||
|
||||
eval_point_on_bezier4 :: #force_inline proc "contextless" (p0, p1, p2, p3: Vec2, alpha: f32) -> Vec2
|
||||
{
|
||||
simd_p0 := vec2_to_simd(p0)
|
||||
simd_p1 := vec2_to_simd(p1)
|
||||
simd_p2 := vec2_to_simd(p2)
|
||||
simd_p3 := vec2_to_simd(p3)
|
||||
|
||||
one_minus_alpha := 1.0 - alpha
|
||||
weight_start := one_minus_alpha * one_minus_alpha * one_minus_alpha
|
||||
weight_c_a := 3 * one_minus_alpha * one_minus_alpha * alpha
|
||||
weight_c_b := 3 * one_minus_alpha * alpha * alpha
|
||||
weight_end := alpha * alpha * alpha
|
||||
|
||||
simd_weights := Vec2_SIMD { weight_start, weight_c_a, weight_c_b, weight_end }
|
||||
result := simd.add(
|
||||
simd.add(
|
||||
simd.mul( simd_p0, simd.swizzle(simd_weights, 0, 0, 0, 0) ),
|
||||
simd.mul( simd_p1, simd.swizzle(simd_weights, 1, 1, 1, 1) )
|
||||
),
|
||||
simd.add(
|
||||
simd.mul( simd_p2, simd.swizzle(simd_weights, 2, 2, 2, 2) ),
|
||||
simd.mul( simd_p3, simd.swizzle(simd_weights, 3, 3, 3, 3) )
|
||||
)
|
||||
)
|
||||
return simd_to_vec2(result)
|
||||
}
|
||||
}
|
||||
|
@ -40,7 +40,6 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, en
|
||||
shape_cache_idx = shape_cache.next_cache_id
|
||||
shape_cache.next_cache_id += 1
|
||||
evicted := LRU_put( state, lru_code, shape_cache_idx )
|
||||
assert( evicted == lru_code )
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -54,7 +53,6 @@ shape_text_cached :: proc( ctx : ^Context, font : FontID, text_utf8 : string, en
|
||||
}
|
||||
|
||||
shape_entry := & shape_cache.storage[ shape_cache_idx ]
|
||||
// shape_entry.storage_hash = lru_code
|
||||
shape_text_uncached( ctx, font, text_utf8, entry, shape_entry )
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user